Skip to content

Commit

Permalink
Merge pull request #195 from szymonlopaciuk/bump_schema_dojson
Browse files Browse the repository at this point in the history
bump inspire-schemas~=56.0, inspire-dojson~=57.0

Sem-Ver: breaking change
  • Loading branch information
david-caro authored Dec 5, 2017
2 parents 2346fa5 + 8672838 commit 9eb3c02
Show file tree
Hide file tree
Showing 13 changed files with 34 additions and 26 deletions.
5 changes: 4 additions & 1 deletion hepcrawl/parsers/jats.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,10 @@ def get_author_affiliations(self, author_node):
for raw_referred_id in raw_referred_ids:
referred_ids.update(set(raw_referred_id.split(' ')))

affiliations = [self.get_affiliation(rid) for rid in referred_ids]
affiliations = [
self.get_affiliation(rid) for rid in referred_ids
if self.get_affiliation(rid)
]

return affiliations

Expand Down
6 changes: 6 additions & 0 deletions hepcrawl/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@
# Allow duplicate requests
DUPEFILTER_CLASS = "scrapy.dupefilters.BaseDupeFilter"

# URI base prefix for $schema to be used during record generation
SCHEMA_BASE_URI = os.environ.get(
'APP_SCHEMA_BASE_URI',
'http://localhost/schemas/records/'
)

# Configure maximum concurrent requests performed by Scrapy (default: 16)
# CONCURRENT_REQUESTS=32

Expand Down
2 changes: 2 additions & 0 deletions hepcrawl/spiders/cds_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ def parse_node(self, response, node):
marcxml_record = record_xml_output(inspire_bibrec)
record = create_record(marcxml_record)
json_record = hep.do(record)
base_uri = self.settings['SCHEMA_BASE_URI']
json_record['$schema'] = base_uri + 'hep.json'
parsed_item = ParsedItem(
record=json_record,
record_format='hep',
Expand Down
2 changes: 2 additions & 0 deletions hepcrawl/spiders/desy_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ def _create_json_record(xml_record):
)
with app.app_context():
dojson_record = hep.do(object_record)
base_uri = self.settings['SCHEMA_BASE_URI']
dojson_record['$schema'] = base_uri + 'hep.json'

return dojson_record

Expand Down
5 changes: 3 additions & 2 deletions hepcrawl/spiders/pos_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import re
import os
from urlparse import urljoin
from six.moves.urllib.parse import quote

from scrapy import Request, Selector

Expand Down Expand Up @@ -335,8 +336,8 @@ def get_documents(path):
return [
{
'key': os.path.basename(path),
'url': path,
'original_url': path,
'url': quote(path, safe=':/'),
'original_url': quote(path, safe=':/'),
'hidden': True,
'fulltext': True,
},
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

install_requires = [
'autosemver~=0.2',
'inspire-schemas~=53.0',
'inspire-dojson~=53.0',
'inspire-schemas~=56.0',
'inspire-dojson~=57.0',
'inspire-utils~=0.0,>=0.0.13',
'Scrapy>=1.1.0',
'scrapy-crawl-once~=0.1,>=0.1.1',
Expand Down
6 changes: 2 additions & 4 deletions tests/functional/cds/fixtures/cds_smoke_records_expected.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[
{
"$schema": "hep.json",
"$schema": "http://localhost/schemas/records/hep.json",
"_collections": [
"Literature"
],
Expand Down Expand Up @@ -49,7 +49,6 @@
],
"inspire_categories": [
{
"source": "undefined",
"term": "Accelerators"
}
],
Expand Down Expand Up @@ -78,7 +77,7 @@
]
},
{
"$schema": "hep.json",
"$schema": "http://localhost/schemas/records/hep.json",
"_collections": [
"Literature"
],
Expand Down Expand Up @@ -124,7 +123,6 @@
],
"inspire_categories": [
{
"source": "undefined",
"term": "Accelerators"
}
],
Expand Down
6 changes: 2 additions & 4 deletions tests/functional/desy/fixtures/desy_records_ftp_expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@
],
"inspire_categories": [
{
"source": "undefined",
"term": "Experiment-HEP"
}
],
Expand Down Expand Up @@ -194,7 +193,7 @@
"year": 2017
}
],
"$schema": "hep.json",
"$schema": "http://localhost/schemas/records/hep.json",
"document_type": [
"thesis"
],
Expand Down Expand Up @@ -252,7 +251,6 @@
"number_of_pages": 141,
"inspire_categories": [
{
"source": "undefined",
"term": "Theory-HEP"
}
],
Expand Down Expand Up @@ -436,7 +434,7 @@
"year": 2017
}
],
"$schema": "hep.json",
"$schema": "http://localhost/schemas/records/hep.json",
"document_type": [
"thesis"
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@
],
"inspire_categories": [
{
"source": "undefined",
"term": "Experiment-HEP"
}
],
Expand Down Expand Up @@ -194,7 +193,7 @@
"year": 2017
}
],
"$schema": "hep.json",
"$schema": "http://localhost/schemas/records/hep.json",
"document_type": [
"thesis"
],
Expand Down Expand Up @@ -252,7 +251,6 @@
"number_of_pages": 141,
"inspire_categories": [
{
"source": "undefined",
"term": "Theory-HEP"
}
],
Expand Down Expand Up @@ -436,7 +434,7 @@
"year": 2017
}
],
"$schema": "hep.json",
"$schema": "http://localhost/schemas/records/hep.json",
"document_type": [
"thesis"
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@
],
"inspire_categories": [
{
"source": "undefined",
"term": "Experiment-HEP"
}
],
Expand Down Expand Up @@ -194,7 +193,7 @@
"year": 2017
}
],
"$schema": "hep.json",
"$schema": "http://localhost/schemas/records/hep.json",
"document_type": [
"thesis"
],
Expand Down Expand Up @@ -252,7 +251,6 @@
"number_of_pages": 141,
"inspire_categories": [
{
"source": "undefined",
"term": "Theory-HEP"
}
],
Expand Down Expand Up @@ -436,7 +434,7 @@
"year": 2017
}
],
"$schema": "hep.json",
"$schema": "http://localhost/schemas/records/hep.json",
"document_type": [
"thesis"
],
Expand Down
3 changes: 1 addition & 2 deletions tests/unit/responses/desy/desy_record_expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@
],
"inspire_categories": [
{
"source": "undefined",
"term": "Experiment-HEP"
}
],
Expand Down Expand Up @@ -193,7 +192,7 @@
"year": 2017
}
],
"$schema": "hep.json",
"$schema": "http://localhost/schemas/records/hep.json",
"document_type": [
"thesis"
],
Expand Down
5 changes: 4 additions & 1 deletion tests/unit/test_desy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
expected_json_results_from_file,
fake_response_from_file,
)
from hepcrawl.testlib.utils import deep_sort


def create_spider():
Expand Down Expand Up @@ -110,4 +111,6 @@ def test_pipeline(generated_records, expected_records):
override_generated_fields(generated_record)
for generated_record in generated_records
]
assert clean_generated_records == expected_records
sorted_generated_records = deep_sort(clean_generated_records)
sorted_expected_records = deep_sort(expected_records)
assert sorted_generated_records == sorted_expected_records
4 changes: 2 additions & 2 deletions tests/unit/test_pos.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ def test_pipeline_conference_paper(generated_conference_paper):
'key': 'LATTICE 2013_001.pdf',
'fulltext': True,
'hidden': True,
'url': u'https://pos.sissa.it/archive/conferences/187/001/LATTICE 2013_001.pdf',
'original_url': u'https://pos.sissa.it/archive/conferences/187/001/LATTICE 2013_001.pdf',
'url': u'https://pos.sissa.it/archive/conferences/187/001/LATTICE%202013_001.pdf',
'original_url': u'https://pos.sissa.it/archive/conferences/187/001/LATTICE%202013_001.pdf',
'source': 'pos',
}
],
Expand Down

0 comments on commit 9eb3c02

Please sign in to comment.