diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 61fdf2a9..e3b90cdf 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -59,8 +59,11 @@ services: functional_cds: <<: *service_base command: py.test -vv tests/functional/cds - links: - - scrapyd + depends_on: + scrapyd: + condition: service_healthy + cds-http-server.local: + condition: service_healthy functional_pos: <<: *service_base @@ -130,6 +133,21 @@ services: - "CMD-SHELL" - "curl https://localhost:443/" + cds-http-server.local: + image: nginx:stable-alpine + volumes: + - ${PWD}/tests/functional/cds/fixtures/http_server/conf/proxy.conf:/etc/nginx/conf.d/default.conf + - ${PWD}/tests/functional/cds/fixtures/http_server/records:/etc/nginx/html/ + ports: + - 80:80 + healthcheck: + timeout: 5s + interval: 5s + retries: 5 + test: + - "CMD-SHELL" + - "curl http://localhost:80/" + rabbitmq: image: rabbitmq healthcheck: diff --git a/hepcrawl/spiders/cds_spider.py b/hepcrawl/spiders/cds_spider.py index e9aabb80..21d1ab15 100644 --- a/hepcrawl/spiders/cds_spider.py +++ b/hepcrawl/spiders/cds_spider.py @@ -42,13 +42,18 @@ class CDSSpider(OAIPMHSpider): name = 'CDS' - def __init__(self, from_date=None, oai_set="forINSPIRE", *args, **kwargs): + def __init__(self, + oai_endpoint='http://cds.cern.ch/oai2d', + from_date=None, + oai_set="forINSPIRE", + *args, **kwargs): super(CDSSpider, self).__init__( - url='http://cds.cern.ch/oai2d', + url=oai_endpoint, metadata_prefix='marcxml', oai_set=oai_set, from_date=from_date, - **kwargs) + **kwargs + ) def parse_record(self, selector): selector.remove_namespaces() diff --git a/hepcrawl/spiders/oaipmh_spider.py b/hepcrawl/spiders/oaipmh_spider.py index b66528a2..30121365 100644 --- a/hepcrawl/spiders/oaipmh_spider.py +++ b/hepcrawl/spiders/oaipmh_spider.py @@ -51,8 +51,14 @@ class OAIPMHSpider(StatefulSpider): name = 'OAI-PMH' granularity = _Granularity.DATE - def __init__(self, url, metadata_prefix='marcxml', oai_set=None, alias=None, - from_date=None, until_date=None, granularity=_Granularity.DATE, + def __init__(self, + url, + metadata_prefix='oai_dc', + oai_set=None, + alias=None, + from_date=None, + until_date=None, + granularity=_Granularity.DATE, record_class=Record, *args, **kwargs): super(OAIPMHSpider, self).__init__(*args, **kwargs) self.url = url diff --git a/tests/functional/cds/fixtures/http_server/conf/proxy.conf b/tests/functional/cds/fixtures/http_server/conf/proxy.conf new file mode 100644 index 00000000..68d70722 --- /dev/null +++ b/tests/functional/cds/fixtures/http_server/conf/proxy.conf @@ -0,0 +1,12 @@ +server { + listen 80; + server_name localhost; + charset_types text/xml; + charset UTF-8; + + location /oai2d { + if ($args ~ from=2017-11-15&verb=ListRecords&set=forINSPIRE&metadataPrefix=marcxml) { + rewrite ^.*$ /cds.xml permanent; + } + } +} diff --git a/tests/functional/cds/fixtures/http_server/records/cds.xml b/tests/functional/cds/fixtures/http_server/records/cds.xml new file mode 100644 index 00000000..9bec8576 --- /dev/null +++ b/tests/functional/cds/fixtures/http_server/records/cds.xml @@ -0,0 +1,1480 @@ + + + +2017-12-07T15:05:26Zhttp://cds.cern.ch/oai2d +
oai:cds.cern.ch:12007522017-11-16T08:09:30Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1200752 + SzGeCERN + 20171116090930.0 + + oai:cds.cern.ch:1200752 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509577 + + + eng + + + Dubus, G + Grenoble Observ. + + + High and very high energy gamma-ray emission from binaries + + + 2009 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS MQW7-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + 018 + PoS + MQW7 + 2008 + + + http://cds.cern.ch/record/1200752/files/MQW7_018.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090817 + + + 1129423 + 018 + izmir20080901 + + + PUBLIC + + + 002842486CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12007532017-11-16T08:09:30Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1200753 + SzGeCERN + 20171116090930.0 + + oai:cds.cern.ch:1200753 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509578 + + + eng + + + Dubois, R + SLAC + + + GLAST: Launched and Being Commissioned - Status and Prospects for Microquasars + + + Fermi: Launched and Being Commissioned - Status and Prospects for Microquasars + Other title + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS MQW7-2009 + + + No authors + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + for the Fermi LAT Collaboration + + + 019 + PoS + MQW7 + 2008 + + + http://cds.cern.ch/record/1200753/files/MQW7_019.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090817 + + + 1129423 + 019 + izmir20080901 + + + PUBLIC + + + 002842487CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12007542017-11-16T08:09:30Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1200754 + SzGeCERN + 20171116090930.0 + + oai:cds.cern.ch:1200754 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509579 + + + eng + + + Romero, G E + Villa Elisa, Inst. Argentino Radioastron. + La Plata U. + + + Hadronic models of high-energy radiation from microquasars: recent developments + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS MQW7-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + 020 + PoS + MQW7 + 2008 + + + http://cds.cern.ch/record/1200754/files/MQW7_020.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090817 + + + 1129423 + 020 + izmir20080901 + + + PUBLIC + + + 002842488CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12032802017-11-16T08:09:52Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203280 + SzGeCERN + 20171116090952.0 + + oai:cds.cern.ch:1203280 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509595 + + + eng + + + Guess, C J + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + Studying matrix elements for the neutrinoless double beta decay of 150Nd via the 150Sm(t,3He)150Pm* and 150Nd(3He,t)150Pm* reactions + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Austin, S M + Michigan State U., NSCL + Michigan State U., JINA + + + Bazin, D + Michigan State U., NSCL + + + Brown, B A + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + Caesar, C + Michigan State U., NSCL + Mainz U. + + + Deaven, J M + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + Herlitzius, C + Michigan State U., NSCL + Mainz U. + + + Hitt, G W + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + Meharchand, R T + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + Perdikakis, G + Michigan State U., NSCL + Michigan State U., JINA + + + Shimbara, Y + Niigata U., Grad. Sch. Sci. Tech. + + + Tur, C + Michigan State U., NSCL + Michigan State U., JINA + + + Zegers, R G T + Michigan State U., NSCL + Michigan U. + Michigan State U., JINA + + + 104 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203280/files/NIC20X_104.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 104 + mackinacisland20080727 + + + PUBLIC + + + 002844587CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12032812017-11-16T08:09:55Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203281 + SzGeCERN + 20171116090955.0 + + oai:cds.cern.ch:1203281 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509596 + + + eng + + + Jachowicz, N + Ghent U. + + + Untangling supernova-neutrino oscillations with beta-beam data + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + McLaughlin, G C + North Carolina State U. + + + Volpe, C + Orsay, IPN + + + 107 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203281/files/NIC20X_107.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 107 + mackinacisland20080727 + + + PUBLIC + + + 002844588CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033612017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203361 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203361 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509597 + + + eng + + + Kawagoe, S + Tokyo U. + + + Neutrino oscillations in non-spherical supernova explosions + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Takiwaki, T + Tokyo U. + + + Kotake, K + Natl. Astron. Observ. of Japan + + + 109 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203361/files/NIC20X_109.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 109 + mackinacisland20080727 + + + PUBLIC + + + 002844668CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033622017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203362 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203362 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509598 + + + eng + + + Nakazato, K + Waseda U. + + + Neutrino Emission from Stellar Collapse including Hadron-Quark Mixed Phase + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Sumiyoshi, K + Numazu Coll. Tech. + + + Yamada, s + Waseda U. + + + 116 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203362/files/NIC20X_116.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 116 + mackinacisland20080727 + + + PUBLIC + + + 002844669CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033632017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203363 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203363 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509599 + + + eng + + + Sumiyoshi, K + Numazu Coll. Tech. + + + Short neutrino burst from failed supernovae as a probe of dense matter with hyperon mixture + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Ishizuka, C + Hokkaido U. + + + Ohnishi, A + Kyoto U., Yukawa Inst., Kyoto + + + Yamada, S + Waseda U. + + + Suzuki, H + Tokyo U. of Sci. + + + 122 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203363/files/NICX_122.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 122 + mackinacisland20080727 + + + PUBLIC + + + 002844670CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033642017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203364 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203364 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509600 + + + eng + + + Suzuki, T + Tokyo U. + + + Neutrino Nucleus Reactions and Nucleosynthesis in Stars + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Yoshida, T + Natl. Astron. Observ. of Japan + + + Chiba, S + JAEA, Ibaraki + + + Honma, M + Aizu U. + + + Higashiyama, K + Chiba Inst. Tech. + + + Umeda, H + Tokyo U. + + + Nomoto, K + Tokyo U. + + + Kajino, T + Tokyo U. + Natl. Astron. Observ. of Japan + + + Otsuka, T + Tokyo U. + + + 123 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203364/files/NICX_123.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 123 + mackinacisland20080727 + + + PUBLIC + + + 002844671CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033652017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203365 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203365 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509601 + + + eng + + + Whitehouse, S + Basel U. + + + Neutrino transport in 3D simulations of core-collapse supernovae + + + A new approach to neutrino transport in 3D simulations of core-collapse supernovae + Other title + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Liebendörfer, M + Basel U. + + + 243 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203365/files/NICX_243.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 243 + mackinacisland20080727 + + + PUBLIC + + + 002844672CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033662017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203366 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203366 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509602 + + + eng + + + Arcones, A + Damstadt, Tech. Hochsch. + Darmstadt, GSI + + + Neutrino-driven winds and nucleosynthesis + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Martínez-Pinedo, G + Darmstadt, GSI + + + Schwenk, A + TRIUMF + + + O’Connor, E + TRIUMF + Caltech + + + Langanke, K + Damstadt, Tech. Hochsch. + Darmstadt, GSI + + + Horowitz, C J + Indiana U. + + + Janka, H T + Garching, Max Planck Inst. + + + 128 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203366/files/NIC20X_128.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 128 + mackinacisland20080727 + + + PUBLIC + + + 002844673CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033672017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203367 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203367 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509603 + + + eng + + + Roberts, L + UC, Santa Cruz, Astron. Astrophys. + + + Nucleosynthesis in the Neutrino Driven Wind of Protoneutron Stars + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Woosley, S + UC, Santa Cruz, Astron. Astrophys. + + + Heger, A + Minnesota U. + + + Hoffman, R + LLNL, Livermore + + + 146 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203367/files/NICX_146.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 146 + mackinacisland20080727 + + + PUBLIC + + + 002844674CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033692017-11-16T08:09:58Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203369 + SzGeCERN + 20171116090958.0 + + oai:cds.cern.ch:1203369 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509604 + + + eng + + + Kojima, K + Tokyo U. + Natl. Astron. Observ. of Japan + + + Neutrino effect in cosmology with the primordial magnetic field + + + Neutrino effects in cosmology with A primordial magnetic field + Other title + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Ichiki, K + Nagoya U. + + + Kajino, T + Tokyo U. + Natl. Astron. Observ. of Japan + + + Mathews, G J + Notre Dame U. + Natl. Astron. Observ. of Japan + + + 226 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203369/files/NICX_226.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 226 + mackinacisland20080727 + + + PUBLIC + + + 002844676CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
oai:cds.cern.ch:12033702017-11-16T08:09:47Zcerncds:FULLTEXTforINSPIRE
+ 00000coc 2200000uu 4500 + 1203370 + SzGeCERN + 20171116090947.0 + + oai:cds.cern.ch:1203370 + cerncds:FULLTEXT + forINSPIRE + + + Inspire + 1509605 + + + eng + + + Yamazaki, D G + Natl. Astron. Observ. of Japan + + + A Strong Constraint on the Neutrino Mass from the Formation of Large Scale Structure in the Presence of the Primordial Magnetic Field + + + 2008 + + + Open Access + CC-BY-NC-SA-3.0 + http://creativecommons.org/licenses/by-nc-sa/3.0/ + + + SIS POS NIC X-2009 + + + Inspire + + + SzGeCERN + Astrophysics and Astronomy + + + ARTICLE + + + Ichiki, K + Tokyo U. + + + Kajino, T + Natl. Astron. Observ. of Japan + + + Mathews, G J + Notre Dame U. + + + 239 + PoS + NIC X + 2008 + + + http://cds.cern.ch/record/1203370/files/NICX_239.pdf + Published version from PoS + + + n + 200933 + + + 13 + + + 20110201 + 1448 + CER01 + 20090827 + + + 1024674 + 239 + mackinacisland20080727 + + + PUBLIC + + + 002844677CER + + + ARTICLE + + + ConferencePaper + + + Hidden + + +
+
+ diff --git a/tests/functional/cds/test_cds.py b/tests/functional/cds/test_cds.py index 38f41c2e..a864d534 100644 --- a/tests/functional/cds/test_cds.py +++ b/tests/functional/cds/test_cds.py @@ -9,84 +9,97 @@ """Functional tests for CDS spider""" -import pytest -import requests_mock +from __future__ import absolute_import, division, print_function -import copy -import json -from scrapy.crawler import CrawlerProcess -from scrapy.utils.project import get_project_settings -from tempfile import NamedTemporaryFile +import os +import pytest +from hepcrawl.testlib.celery_monitor import CeleryMonitor from hepcrawl.testlib.fixtures import ( get_test_suite_path, expected_json_results_from_file, + clean_dir, ) +from hepcrawl.testlib.tasks import app as celery_app +from hepcrawl.testlib.utils import get_crawler_instance -@pytest.fixture -def cds_oai_server(): - with requests_mock.Mocker() as m: - m.get('http://cds.cern.ch/oai2d?from=2017-11-15&verb=ListRecords&set=forINSPIRE&metadataPrefix=marcxml', - text=open(get_test_suite_path('cds', 'fixtures', 'cds.xml', test_suite='functional')).read()) - yield m - - -def override_dynamic_fields_on_records(records): - clean_records = [] - for record in records: - clean_record = override_dynamic_fields_on_record(record) - clean_records.append(clean_record) - - return clean_records +@pytest.fixture(scope='function', autouse=True) +def cleanup(): + clean_dir() + clean_dir(path=os.path.join(os.getcwd(), '.scrapy')) + yield + clean_dir() + clean_dir(path=os.path.join(os.getcwd(), '.scrapy')) -def override_dynamic_fields_on_record(record): - def _override(field_key, original_dict, backup_dict, new_value): - backup_dict[field_key] = original_dict[field_key] - original_dict[field_key] = new_value - - clean_record = copy.deepcopy(record) - overriden_fields = {} - dummy_random_date = u'2017-04-03T10:26:40.365216' - - overriden_fields['acquisition_source'] = {} - _override( - field_key='datetime', - original_dict=clean_record['acquisition_source'], - backup_dict=overriden_fields['acquisition_source'], - new_value=dummy_random_date, +def override_generated_fields(record): + record['acquisition_source']['datetime'] = u'2017-04-03T10:26:40.365216' + record['acquisition_source']['submission_number'] = ( + u'5652c7f6190f11e79e8000224dabeaad' ) - _override( - field_key='submission_number', - original_dict=clean_record['acquisition_source'], - backup_dict=overriden_fields['acquisition_source'], - new_value=u'5652c7f6190f11e79e8000224dabeaad', - ) - - return clean_record - -def test_cds(cds_oai_server): - f = NamedTemporaryFile('r+') - - settings = get_project_settings() - settings.set('FEED_FORMAT', 'json') - settings.set('FEED_URI', f.name) - - process = CrawlerProcess(settings) - process.crawl('CDS', from_date='2017-11-15', oai_set='forINSPIRE') - process.start() - - result = json.load(f) - - expected = expected_json_results_from_file( - 'cds', 'fixtures', 'cds_expected.json' + return record + + +def get_configuration(): + return { + 'CRAWLER_HOST_URL': 'http://scrapyd:6800', + 'CRAWLER_PROJECT': 'hepcrawl', + 'CRAWLER_ARGUMENTS': { + 'from_date': '2017-11-15', + 'oai_set': 'forINSPIRE', + 'oai_endpoint': 'http://cds-http-server.local/oai2d', + } + } + + +@pytest.mark.parametrize( + 'expected_results, config', + [ + ( + expected_json_results_from_file( + 'cds', + 'fixtures', + 'cds_expected.json', + ), + get_configuration(), + ), + ], + ids=[ + 'smoke', + ] +) +def test_cds( + expected_results, + config, +): + crawler = get_crawler_instance(config['CRAWLER_HOST_URL']) + + results = CeleryMonitor.do_crawl( + app=celery_app, + monitor_timeout=5, + monitor_iter_limit=100, + events_limit=2, + crawler_instance=crawler, + project=config['CRAWLER_PROJECT'], + spider='CDS', + settings={}, + **config['CRAWLER_ARGUMENTS'] ) - expected = override_dynamic_fields_on_records(expected) - result = override_dynamic_fields_on_records(result) + gotten_results = [override_generated_fields(result) for result in results] + expected_results = [ + override_generated_fields(expected) for expected in expected_results + ] - assert result == expected + gotten_results = sorted( + gotten_results, + key=lambda x: x['document_type'] + ) + expected_results = sorted( + expected_results, + key=lambda x: x['document_type'] + ) - f.close() + assert gotten_results == expected_results