Skip to content

Commit

Permalink
spiders: move Statetul and OAI to common module
Browse files Browse the repository at this point in the history
Signed-off-by: Szymon Łopaciuk <[email protected]>
  • Loading branch information
szymonlopaciuk committed Jan 16, 2018
1 parent a96f3c4 commit 6b7d886
Show file tree
Hide file tree
Showing 25 changed files with 53 additions and 30 deletions.
8 changes: 0 additions & 8 deletions hepcrawl/spiders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,3 @@
# more details.

from __future__ import absolute_import, division, print_function

from scrapy import Spider


class StatefulSpider(Spider):
def __init__(self, *args, **kwargs):
self.state = {}
super(StatefulSpider, self).__init__(*args, **kwargs)
2 changes: 1 addition & 1 deletion hepcrawl/spiders/alpha_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from scrapy import Request
from scrapy.spiders import CrawlSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/aps_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from scrapy import Request

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/arxiv_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from scrapy import Request, Selector
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..mappings import CONFERENCE_WORDS, THESIS_WORDS
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/base_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from scrapy import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/brown_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from scrapy import Request
from scrapy.spiders import CrawlSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/cds_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from inspire_dojson import marcxml2record
from os.path import join as path_join

from .oaipmh_spider import OAIPMHSpider
from .common import OAIPMHSpider
from ..utils import ParsedItem


Expand Down
13 changes: 13 additions & 0 deletions hepcrawl/spiders/common/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2015, 2016, 2017, 2018 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

from __future__ import absolute_import, division, print_function

from .oaipmh_spider import OAIPMHSpider
from .stateful_spider import StatefulSpider
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from scrapy.http import Request, XmlResponse
from scrapy.selector import Selector
from . import StatefulSpider
from .stateful_spider import StatefulSpider


LOGGER = logging.getLogger(__name__)
Expand Down
18 changes: 18 additions & 0 deletions hepcrawl/spiders/common/stateful_spider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
#
# This file is part of hepcrawl.
# Copyright (C) 2015, 2016, 2017, 2018 CERN.
#
# hepcrawl is a free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

from __future__ import absolute_import, division, print_function

from scrapy import Spider


class StatefulSpider(Spider):
def __init__(self, *args, **kwargs):
self.state = {}
super(StatefulSpider, self).__init__(*args, **kwargs)
2 changes: 1 addition & 1 deletion hepcrawl/spiders/desy_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from scrapy import Request
from six.moves import urllib

from . import StatefulSpider
from .common import StatefulSpider
from ..utils import (
ftp_list_files,
ftp_connection_info,
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/dnb_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from scrapy import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/edp_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from scrapy import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..extractors.jats import Jats
from ..items import HEPRecord
from ..loaders import HEPLoader
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/elsevier_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from scrapy import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/hindawi_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from scrapy import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/infn_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from scrapy.http import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/iop_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from scrapy import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..extractors.nlm import NLM
from ..items import HEPRecord
from ..loaders import HEPLoader
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/magic_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from scrapy import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/mit_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from scrapy.http import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/phenix_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from scrapy import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import ParsedItem
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/phil_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from scrapy import Request
from scrapy.spiders import CrawlSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/pos_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from scrapy import Request, Selector

from . import StatefulSpider
from .common import StatefulSpider
from ..dateutils import create_valid_date
from ..items import HEPRecord
from ..loaders import HEPLoader
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/t2k_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from scrapy import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..items import HEPRecord
from ..loaders import HEPLoader
from ..utils import (
Expand Down
2 changes: 1 addition & 1 deletion hepcrawl/spiders/wsp_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from scrapy import Request
from scrapy.spiders import XMLFeedSpider

from . import StatefulSpider
from .common import StatefulSpider
from ..parsers import JatsParser
from ..utils import (
ftp_list_files,
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_oaipmh.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from mock import patch
import pytest

from hepcrawl.spiders.oaipmh_spider import OAIPMHSpider, NoLastRunToLoad
from hepcrawl.spiders.common.oaipmh_spider import OAIPMHSpider, NoLastRunToLoad
from hepcrawl.testlib.fixtures import clean_dir
from scrapy.utils.project import get_project_settings

Expand Down

0 comments on commit 6b7d886

Please sign in to comment.