From 604ebefd3cc76e774e6a2b89c409c6dc14d390ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= Date: Thu, 14 Dec 2017 10:17:31 +0100 Subject: [PATCH] stricter error catching when loading last_runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create an exception for when a last funs file doesn't exist. Signed-off-by: Szymon Łopaciuk --- hepcrawl/spiders/oaipmh_spider.py | 29 +- tests/functional/cds/fixtures/cds.xml | 1480 ------------------------- tests/unit/test_oaipmh.py | 9 +- 3 files changed, 22 insertions(+), 1496 deletions(-) delete mode 100644 tests/functional/cds/fixtures/cds.xml diff --git a/hepcrawl/spiders/oaipmh_spider.py b/hepcrawl/spiders/oaipmh_spider.py index 1a057dd2..d785b214 100644 --- a/hepcrawl/spiders/oaipmh_spider.py +++ b/hepcrawl/spiders/oaipmh_spider.py @@ -10,7 +10,7 @@ """Generic spider for OAI-PMH servers.""" import logging -from errno import EEXIST +from errno import EEXIST as FILE_EXISTS, ENOENT as NO_SUCH_FILE_OR_DIR from datetime import datetime from dateutil import parser as dateparser import hashlib @@ -28,6 +28,12 @@ LOGGER = logging.getLogger(__name__) +class NoLastRunToLoad(Exception): + """Error raised when there was a problem with loading the last_runs file""" + def __init__(self, file_path): + self.message = "Failed to load file at {}".format(file_path) + + class OAIPMHSpider(StatefulSpider): """ Implements a spider for the OAI-PMH protocol by using the Python sickle library. @@ -132,8 +138,10 @@ def _load_last_run(self): last_run = json.load(f) LOGGER.info('Last run file loaded: {}'.format(repr(last_run))) return last_run - except IOError: - return None + except IOError as exc: + if exc.errno == NO_SUCH_FILE_OR_DIR: + raise NoLastRunToLoad(file_path) + raise def _save_run(self, started_at): """Store last run information @@ -159,18 +167,17 @@ def _save_run(self, started_at): try: makedirs(path.dirname(file_path)) except OSError as exc: - if exc.errno == EEXIST: - pass - else: + if exc.errno != FILE_EXISTS: raise with open(file_path, 'w') as f: json.dump(last_run_info, f, indent=4) @property def _resume_from(self): - last_run = self._load_last_run() - if not last_run: + try: + last_run = self._load_last_run() + resume_at = last_run['until_date'] or last_run['last_run_finished_at'] + date_parsed = dateparser.parse(resume_at) + return date_parsed.strftime('%Y-%m-%d') + except NoLastRunToLoad: return None - resume_at = last_run['until_date'] or last_run['last_run_finished_at'] - date_parsed = dateparser.parse(resume_at) - return date_parsed.strftime('%Y-%m-%d') diff --git a/tests/functional/cds/fixtures/cds.xml b/tests/functional/cds/fixtures/cds.xml deleted file mode 100644 index 9bec8576..00000000 --- a/tests/functional/cds/fixtures/cds.xml +++ /dev/null @@ -1,1480 +0,0 @@ - - - -2017-12-07T15:05:26Zhttp://cds.cern.ch/oai2d -
oai:cds.cern.ch:12007522017-11-16T08:09:30Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1200752 - SzGeCERN - 20171116090930.0 - - oai:cds.cern.ch:1200752 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509577 - - - eng - - - Dubus, G - Grenoble Observ. - - - High and very high energy gamma-ray emission from binaries - - - 2009 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS MQW7-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - 018 - PoS - MQW7 - 2008 - - - http://cds.cern.ch/record/1200752/files/MQW7_018.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090817 - - - 1129423 - 018 - izmir20080901 - - - PUBLIC - - - 002842486CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12007532017-11-16T08:09:30Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1200753 - SzGeCERN - 20171116090930.0 - - oai:cds.cern.ch:1200753 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509578 - - - eng - - - Dubois, R - SLAC - - - GLAST: Launched and Being Commissioned - Status and Prospects for Microquasars - - - Fermi: Launched and Being Commissioned - Status and Prospects for Microquasars - Other title - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS MQW7-2009 - - - No authors - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - for the Fermi LAT Collaboration - - - 019 - PoS - MQW7 - 2008 - - - http://cds.cern.ch/record/1200753/files/MQW7_019.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090817 - - - 1129423 - 019 - izmir20080901 - - - PUBLIC - - - 002842487CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12007542017-11-16T08:09:30Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1200754 - SzGeCERN - 20171116090930.0 - - oai:cds.cern.ch:1200754 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509579 - - - eng - - - Romero, G E - Villa Elisa, Inst. Argentino Radioastron. - La Plata U. - - - Hadronic models of high-energy radiation from microquasars: recent developments - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS MQW7-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - 020 - PoS - MQW7 - 2008 - - - http://cds.cern.ch/record/1200754/files/MQW7_020.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090817 - - - 1129423 - 020 - izmir20080901 - - - PUBLIC - - - 002842488CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12032802017-11-16T08:09:52Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1203280 - SzGeCERN - 20171116090952.0 - - oai:cds.cern.ch:1203280 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509595 - - - eng - - - Guess, C J - Michigan State U., NSCL - Michigan U. - Michigan State U., JINA - - - Studying matrix elements for the neutrinoless double beta decay of 150Nd via the 150Sm(t,3He)150Pm* and 150Nd(3He,t)150Pm* reactions - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS NIC X-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - Austin, S M - Michigan State U., NSCL - Michigan State U., JINA - - - Bazin, D - Michigan State U., NSCL - - - Brown, B A - Michigan State U., NSCL - Michigan U. - Michigan State U., JINA - - - Caesar, C - Michigan State U., NSCL - Mainz U. - - - Deaven, J M - Michigan State U., NSCL - Michigan U. - Michigan State U., JINA - - - Herlitzius, C - Michigan State U., NSCL - Mainz U. - - - Hitt, G W - Michigan State U., NSCL - Michigan U. - Michigan State U., JINA - - - Meharchand, R T - Michigan State U., NSCL - Michigan U. - Michigan State U., JINA - - - Perdikakis, G - Michigan State U., NSCL - Michigan State U., JINA - - - Shimbara, Y - Niigata U., Grad. Sch. Sci. Tech. - - - Tur, C - Michigan State U., NSCL - Michigan State U., JINA - - - Zegers, R G T - Michigan State U., NSCL - Michigan U. - Michigan State U., JINA - - - 104 - PoS - NIC X - 2008 - - - http://cds.cern.ch/record/1203280/files/NIC20X_104.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090827 - - - 1024674 - 104 - mackinacisland20080727 - - - PUBLIC - - - 002844587CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12032812017-11-16T08:09:55Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1203281 - SzGeCERN - 20171116090955.0 - - oai:cds.cern.ch:1203281 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509596 - - - eng - - - Jachowicz, N - Ghent U. - - - Untangling supernova-neutrino oscillations with beta-beam data - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS NIC X-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - McLaughlin, G C - North Carolina State U. - - - Volpe, C - Orsay, IPN - - - 107 - PoS - NIC X - 2008 - - - http://cds.cern.ch/record/1203281/files/NIC20X_107.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090827 - - - 1024674 - 107 - mackinacisland20080727 - - - PUBLIC - - - 002844588CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12033612017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1203361 - SzGeCERN - 20171116090958.0 - - oai:cds.cern.ch:1203361 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509597 - - - eng - - - Kawagoe, S - Tokyo U. - - - Neutrino oscillations in non-spherical supernova explosions - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS NIC X-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - Takiwaki, T - Tokyo U. - - - Kotake, K - Natl. Astron. Observ. of Japan - - - 109 - PoS - NIC X - 2008 - - - http://cds.cern.ch/record/1203361/files/NIC20X_109.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090827 - - - 1024674 - 109 - mackinacisland20080727 - - - PUBLIC - - - 002844668CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12033622017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1203362 - SzGeCERN - 20171116090958.0 - - oai:cds.cern.ch:1203362 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509598 - - - eng - - - Nakazato, K - Waseda U. - - - Neutrino Emission from Stellar Collapse including Hadron-Quark Mixed Phase - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS NIC X-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - Sumiyoshi, K - Numazu Coll. Tech. - - - Yamada, s - Waseda U. - - - 116 - PoS - NIC X - 2008 - - - http://cds.cern.ch/record/1203362/files/NIC20X_116.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090827 - - - 1024674 - 116 - mackinacisland20080727 - - - PUBLIC - - - 002844669CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12033632017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1203363 - SzGeCERN - 20171116090958.0 - - oai:cds.cern.ch:1203363 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509599 - - - eng - - - Sumiyoshi, K - Numazu Coll. Tech. - - - Short neutrino burst from failed supernovae as a probe of dense matter with hyperon mixture - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS NIC X-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - Ishizuka, C - Hokkaido U. - - - Ohnishi, A - Kyoto U., Yukawa Inst., Kyoto - - - Yamada, S - Waseda U. - - - Suzuki, H - Tokyo U. of Sci. - - - 122 - PoS - NIC X - 2008 - - - http://cds.cern.ch/record/1203363/files/NICX_122.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090827 - - - 1024674 - 122 - mackinacisland20080727 - - - PUBLIC - - - 002844670CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12033642017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1203364 - SzGeCERN - 20171116090958.0 - - oai:cds.cern.ch:1203364 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509600 - - - eng - - - Suzuki, T - Tokyo U. - - - Neutrino Nucleus Reactions and Nucleosynthesis in Stars - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS NIC X-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - Yoshida, T - Natl. Astron. Observ. of Japan - - - Chiba, S - JAEA, Ibaraki - - - Honma, M - Aizu U. - - - Higashiyama, K - Chiba Inst. Tech. - - - Umeda, H - Tokyo U. - - - Nomoto, K - Tokyo U. - - - Kajino, T - Tokyo U. - Natl. Astron. Observ. of Japan - - - Otsuka, T - Tokyo U. - - - 123 - PoS - NIC X - 2008 - - - http://cds.cern.ch/record/1203364/files/NICX_123.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090827 - - - 1024674 - 123 - mackinacisland20080727 - - - PUBLIC - - - 002844671CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12033652017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1203365 - SzGeCERN - 20171116090958.0 - - oai:cds.cern.ch:1203365 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509601 - - - eng - - - Whitehouse, S - Basel U. - - - Neutrino transport in 3D simulations of core-collapse supernovae - - - A new approach to neutrino transport in 3D simulations of core-collapse supernovae - Other title - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS NIC X-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - Liebendörfer, M - Basel U. - - - 243 - PoS - NIC X - 2008 - - - http://cds.cern.ch/record/1203365/files/NICX_243.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090827 - - - 1024674 - 243 - mackinacisland20080727 - - - PUBLIC - - - 002844672CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12033662017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1203366 - SzGeCERN - 20171116090958.0 - - oai:cds.cern.ch:1203366 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509602 - - - eng - - - Arcones, A - Damstadt, Tech. Hochsch. - Darmstadt, GSI - - - Neutrino-driven winds and nucleosynthesis - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS NIC X-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - Martínez-Pinedo, G - Darmstadt, GSI - - - Schwenk, A - TRIUMF - - - O’Connor, E - TRIUMF - Caltech - - - Langanke, K - Damstadt, Tech. Hochsch. - Darmstadt, GSI - - - Horowitz, C J - Indiana U. - - - Janka, H T - Garching, Max Planck Inst. - - - 128 - PoS - NIC X - 2008 - - - http://cds.cern.ch/record/1203366/files/NIC20X_128.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090827 - - - 1024674 - 128 - mackinacisland20080727 - - - PUBLIC - - - 002844673CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12033672017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1203367 - SzGeCERN - 20171116090958.0 - - oai:cds.cern.ch:1203367 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509603 - - - eng - - - Roberts, L - UC, Santa Cruz, Astron. Astrophys. - - - Nucleosynthesis in the Neutrino Driven Wind of Protoneutron Stars - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS NIC X-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - Woosley, S - UC, Santa Cruz, Astron. Astrophys. - - - Heger, A - Minnesota U. - - - Hoffman, R - LLNL, Livermore - - - 146 - PoS - NIC X - 2008 - - - http://cds.cern.ch/record/1203367/files/NICX_146.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090827 - - - 1024674 - 146 - mackinacisland20080727 - - - PUBLIC - - - 002844674CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12033692017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1203369 - SzGeCERN - 20171116090958.0 - - oai:cds.cern.ch:1203369 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509604 - - - eng - - - Kojima, K - Tokyo U. - Natl. Astron. Observ. of Japan - - - Neutrino effect in cosmology with the primordial magnetic field - - - Neutrino effects in cosmology with A primordial magnetic field - Other title - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS NIC X-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - Ichiki, K - Nagoya U. - - - Kajino, T - Tokyo U. - Natl. Astron. Observ. of Japan - - - Mathews, G J - Notre Dame U. - Natl. Astron. Observ. of Japan - - - 226 - PoS - NIC X - 2008 - - - http://cds.cern.ch/record/1203369/files/NICX_226.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090827 - - - 1024674 - 226 - mackinacisland20080727 - - - PUBLIC - - - 002844676CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
oai:cds.cern.ch:12033702017-11-16T08:09:47Zcerncds:FULLTEXTforINSPIRE
- 00000coc 2200000uu 4500 - 1203370 - SzGeCERN - 20171116090947.0 - - oai:cds.cern.ch:1203370 - cerncds:FULLTEXT - forINSPIRE - - - Inspire - 1509605 - - - eng - - - Yamazaki, D G - Natl. Astron. Observ. of Japan - - - A Strong Constraint on the Neutrino Mass from the Formation of Large Scale Structure in the Presence of the Primordial Magnetic Field - - - 2008 - - - Open Access - CC-BY-NC-SA-3.0 - http://creativecommons.org/licenses/by-nc-sa/3.0/ - - - SIS POS NIC X-2009 - - - Inspire - - - SzGeCERN - Astrophysics and Astronomy - - - ARTICLE - - - Ichiki, K - Tokyo U. - - - Kajino, T - Natl. Astron. Observ. of Japan - - - Mathews, G J - Notre Dame U. - - - 239 - PoS - NIC X - 2008 - - - http://cds.cern.ch/record/1203370/files/NICX_239.pdf - Published version from PoS - - - n - 200933 - - - 13 - - - 20110201 - 1448 - CER01 - 20090827 - - - 1024674 - 239 - mackinacisland20080727 - - - PUBLIC - - - 002844677CER - - - ARTICLE - - - ConferencePaper - - - Hidden - - -
-
- diff --git a/tests/unit/test_oaipmh.py b/tests/unit/test_oaipmh.py index 497ff25b..42420f1b 100644 --- a/tests/unit/test_oaipmh.py +++ b/tests/unit/test_oaipmh.py @@ -10,10 +10,9 @@ from datetime import datetime import json from mock import patch -from os import remove, rmdir import pytest -from hepcrawl.spiders.oaipmh_spider import OAIPMHSpider +from hepcrawl.spiders.oaipmh_spider import OAIPMHSpider, NoLastRunToLoad from hepcrawl.testlib.fixtures import clean_dir from scrapy.utils.project import get_project_settings @@ -58,7 +57,7 @@ def test_last_run_file_path(spider): assert expected == result -def test_store_and_load_last_run(spider, cleanup): +def test_load_last_run(spider, cleanup): now = datetime.utcnow() spider._save_run(started_at=now) @@ -84,8 +83,8 @@ def test_store_and_load_last_run(spider, cleanup): def test_load_nonexistent(spider): - last_run = spider._load_last_run() - assert last_run == None + with pytest.raises(NoLastRunToLoad): + spider._load_last_run() def test_resume_from_nonexistent_no_error(spider):