diff --git a/inspire_utils/config.py b/inspire_utils/config.py
index e323fad..66d162f 100644
--- a/inspire_utils/config.py
+++ b/inspire_utils/config.py
@@ -19,7 +19,6 @@
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.
-
"""INSPIRE configuration loader.
Inspired by the Flask configuration loader:
@@ -47,10 +46,7 @@ def __init__(self, file_path, cause):
cause (string): reason of failure, i.e. what exactly was the
problem while parsing
"""
- message = six.text_type("Malformed config at {}: {}").format(
- file_path,
- cause
- )
+ message = six.text_type("Malformed config at {}: {}").format(file_path, cause)
super(MalformedConfig, self).__init__(message)
diff --git a/inspire_utils/date.py b/inspire_utils/date.py
index 4216579..70ad648 100644
--- a/inspire_utils/date.py
+++ b/inspire_utils/date.py
@@ -19,7 +19,6 @@
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.
-
"""Utils to handle dates in INSPIRE."""
from __future__ import absolute_import, division, print_function
@@ -48,13 +47,18 @@ class PartialDate(object):
Raises:
TypeError: when the date parts are not `int` s or `None`.
ValueError: when the date is not valid.
-
"""
+
def __init__(self, year, month=None, day=None):
- well_typed = all(isinstance(part, int) or part is None for part in (year, month, day))
+ well_typed = all(
+ isinstance(part, int) or part is None for part in (year, month, day)
+ )
if not well_typed:
- raise TypeError(u'arguments to {classname} must be of type int or None'.format(
- classname=type(self).__name__))
+ raise TypeError(
+ u'arguments to {classname} must be of type int or None'.format(
+ classname=type(self).__name__
+ )
+ )
if year is None or year < 1000:
raise ValueError('year must be an int >= 1000')
if day and not month:
@@ -68,10 +72,18 @@ def __init__(self, year, month=None, day=None):
self.day = day
def __repr__(self):
- return u'PartialDate(year={self.year}, month={self.month}, day={self.day})'.format(self=self)
+ return (
+ u'PartialDate(year={self.year}, month={self.month}, day={self.day})'.format(
+ self=self
+ )
+ )
def __eq__(self, other):
- return self.year == other.year and self.month == other.month and self.day == other.day
+ return (
+ self.year == other.year
+ and self.month == other.month
+ and self.day == other.day
+ )
def __lt__(self, other):
self_month = self.month or 99
@@ -97,7 +109,6 @@ def loads(cls, string):
Traceback (most recent call last):
...
ValueError: month must be in 1..12
-
"""
date_parts = string.split('-')
@@ -116,7 +127,6 @@ def dumps(self):
Returns:
str: normalized date, in the form ``YYYY-MM-DD``, ``YYYY-MM`` or
``YYYY`` (depending on the information present in the date)
-
"""
non_empty = itertools.takewhile(bool, (self.year, self.month, self.day))
# XXX: this only handles dates after 1000, which should be sufficient
@@ -147,7 +157,6 @@ def parse(cls, date, **kwargs):
Examples:
>>> PartialDate.parse('30 Jun 1686')
PartialDate(year=1686, month=6, day=30)
-
"""
# In order to detect partial dates, parse twice with different defaults
# and compare the results.
@@ -180,7 +189,6 @@ def from_parts(cls, year, month=None, day=None):
Examples:
>>> PartialDate.from_parts('1686', 'June', '30')
PartialDate(year=1686, month=6, day=30)
-
"""
# XXX: 0 is not a valid year/month/day
non_empty = itertools.takewhile(
@@ -194,13 +202,18 @@ def pprint(self):
Examples:
>>> PartialDate(1686, 6, 30).pprint()
u'Jun 30, 1686'
-
"""
if not self.month:
- return dates.format_date(datetime.date(self.year, 1, 1), 'yyyy', locale='en')
+ return dates.format_date(
+ datetime.date(self.year, 1, 1), 'yyyy', locale='en'
+ )
if not self.day:
- return dates.format_date(datetime.date(self.year, self.month, 1), 'MMM, yyyy', locale='en')
- return dates.format_date(datetime.date(self.year, self.month, self.day), 'MMM d, yyyy', locale='en')
+ return dates.format_date(
+ datetime.date(self.year, self.month, 1), 'MMM, yyyy', locale='en'
+ )
+ return dates.format_date(
+ datetime.date(self.year, self.month, self.day), 'MMM d, yyyy', locale='en'
+ )
def normalize_date(date, **kwargs):
@@ -232,7 +245,6 @@ def normalize_date(date, **kwargs):
>>> normalize_date(None)
>>> normalize_date('30 Jun 1686')
'1686-06-30'
-
"""
if date is None:
return
@@ -265,7 +277,10 @@ def earliest_date(dates):
def fill_missing_date_parts(date):
- """Sets missing day and/or month to 1. Useful to avoid errors when saving to DB."""
+ """Sets missing day and/or month to 1.
+
+ Useful to avoid errors when saving to DB.
+ """
if date is None:
return
diff --git a/inspire_utils/dedupers.py b/inspire_utils/dedupers.py
index d15edbb..f559a85 100644
--- a/inspire_utils/dedupers.py
+++ b/inspire_utils/dedupers.py
@@ -28,8 +28,8 @@
def dedupe_list(list_with_duplicates):
"""Remove duplicates from a list preserving the order.
- We might be tempted to use the list(set(l)) idiom, but it doesn't preserve
- the order, which hinders testability.
+ We might be tempted to use the list(set(l)) idiom, but it doesn't
+ preserve the order, which hinders testability.
"""
result = []
@@ -43,9 +43,11 @@ def dedupe_list(list_with_duplicates):
def dedupe_list_of_dicts(ld):
"""Remove duplicates from a list of dictionaries preserving the order.
- We can't use the generic list helper because a dictionary isn't hashable.
- Adapted from http://stackoverflow.com/a/9427216/374865.
+ We can't use the generic list helper because a dictionary isn't
+ hashable. Adapted from
+ http://stackoverflow.com/a/9427216/374865.
"""
+
def _freeze(o):
"""Recursively freezes a dict into an hashable object.
diff --git a/inspire_utils/helpers.py b/inspire_utils/helpers.py
index 36078c7..260caa2 100644
--- a/inspire_utils/helpers.py
+++ b/inspire_utils/helpers.py
@@ -58,7 +58,6 @@ def force_list(data):
['foo', 'bar']
>>> force_list(['foo', 'bar', 'baz'])
['foo', 'bar', 'baz']
-
"""
if data is None:
return []
@@ -81,7 +80,6 @@ def maybe_float(el):
Examples:
>>> maybe_float('35.0499505')
35.0499505
-
"""
try:
return float(el)
@@ -101,7 +99,6 @@ def maybe_int(el):
Examples:
>>> maybe_int('10')
10
-
"""
try:
return int(el)
@@ -132,7 +129,8 @@ def remove_tags(dirty, allowed_tags=(), allowed_trees=(), strip=None):
removed.
Examples:
- >>> tag = '
Only this text remains.Not this one.
'
+ >>> tag = 'Only this text remains.
+ Not this one.
'
>>> remove_tags(tag, allowed_tree=('b',), strip='@class="hidden"')
u'Only this text remains.'
>>> remove_tags(tag, allowed_tags=('b',), strip='@class="hidden"')
@@ -141,7 +139,9 @@ def remove_tags(dirty, allowed_tags=(), allowed_trees=(), strip=None):
u'Only this text remains.'
"""
if isinstance(dirty, six.string_types):
- element = etree.fromstring(u''.join(('', dirty, '')))
+ element = etree.fromstring(
+ u''.join(('', dirty, ''))
+ )
elif isinstance(dirty, etree._Element):
element = dirty
else: # assuming scrapy Selector
@@ -156,7 +156,9 @@ def remove_tags(dirty, allowed_tags=(), allowed_trees=(), strip=None):
return tail
subtext = u''.join(
- remove_tags(child, allowed_tags=allowed_tags, allowed_trees=allowed_trees, strip=strip)
+ remove_tags(
+ child, allowed_tags=allowed_tags, allowed_trees=allowed_trees, strip=strip
+ )
for child in element
)
text = element.text or u''
diff --git a/inspire_utils/logging.py b/inspire_utils/logging.py
index da252ae..16ef9be 100644
--- a/inspire_utils/logging.py
+++ b/inspire_utils/logging.py
@@ -36,13 +36,15 @@ def __getattr__(self, item):
def error(self, message, *args, **kwargs):
"""Log error with stack trace and locals information.
- By default, enables stack trace information in logging messages, so that stacktrace and locals appear in Sentry.
+ By default, enables stack trace information in logging messages,
+ so that stacktrace and locals appear in Sentry.
"""
kwargs.setdefault('extra', {}).setdefault('stack', True)
return self.logger.error(message, *args, **kwargs)
def getStackTraceLogger(*args, **kwargs):
- """Returns a :class:`StackTrace` logger that wraps a Python logger instance."""
+ """Returns a :class:`StackTrace` logger that wraps a Python logger
+ instance."""
logger = logging.getLogger(*args, **kwargs)
return StackTraceLogger(logger)
diff --git a/inspire_utils/name.py b/inspire_utils/name.py
index 9fe5624..b883d08 100644
--- a/inspire_utils/name.py
+++ b/inspire_utils/name.py
@@ -38,20 +38,45 @@
_LASTNAME_NON_LASTNAME_SEPARATORS = [u' ', u', ']
_NAMES_MAX_NUMBER_THRESHOLD = 5
-"""Threshold for skipping the combinatorial expansion of names (when generating name variations). """
+"""Threshold for skipping the combinatorial expansion of names (when generating
+name variations)."""
def _prepare_nameparser_constants():
"""Prepare nameparser Constants.
- Remove nameparser's titles and use our own and add as suffixes the roman numerals.
- Configuration is the same for all names (i.e. instances).
+ Remove nameparser's titles and use our own and add as suffixes the
+ roman numerals. Configuration is the same for all names (i.e.
+ instances).
"""
constants = Constants()
- roman_numeral_suffixes = [u'vi', u'vii', u'viii', u'ix', u'x', u'xi',
- u'xii', u'xiii', u'xiv', u'xv']
- titles = [u'Dr', u'Prof', u'Professor', u'Sir', u'Editor', u'Ed', u'Mr',
- u'Mrs', u'Ms', u'Chair', u'Co-Chair', u'Chairs', u'co-Chairs']
+ roman_numeral_suffixes = [
+ u'vi',
+ u'vii',
+ u'viii',
+ u'ix',
+ u'x',
+ u'xi',
+ u'xii',
+ u'xiii',
+ u'xiv',
+ u'xv',
+ ]
+ titles = [
+ u'Dr',
+ u'Prof',
+ u'Professor',
+ u'Sir',
+ u'Editor',
+ u'Ed',
+ u'Mr',
+ u'Mrs',
+ u'Ms',
+ u'Chair',
+ u'Co-Chair',
+ u'Chairs',
+ u'co-Chairs',
+ ]
constants.titles.remove(*constants.titles).add(*titles)
constants.suffix_not_acronyms.add(*roman_numeral_suffixes)
constants.suffixes_prefixes_titles.remove(*constants.suffixes_prefixes_titles)
@@ -65,15 +90,18 @@ class ParsedName(object):
After construction, the instance exposes the fields exposed by `HumanName` instance, i.e.
`title`, `first`, `middle`, `last`, `suffix`.
"""
+
constants = _prepare_nameparser_constants()
- """The default constants configuration for `HumanName` to use for parsing all names."""
+ """The default constants configuration for `HumanName` to use for parsing
+ all names."""
def __init__(self, name, constants=None, without_titles=False):
"""Create a ParsedName instance.
Args:
name (Union[str, HumanName]): The name to be parsed (must be non empty nor None).
- constants (:class:`nameparser.config.Constants`): Configuration for `HumanName` instantiation.
+ constants (:class:`nameparser.config.Constants`): Configuration
+ for `HumanName` instantiation.
(Can be None, if provided it overwrites the default one generated in
:method:`prepare_nameparser_constants`.)
"""
@@ -93,7 +121,9 @@ def __init__(self, name, constants=None, without_titles=False):
self._parsed_name.capitalize()
- if ',' not in name and (not self.first_list or (self.first_list and '.' not in self.first_list[-1])):
+ if ',' not in name and (
+ not self.first_list or (self.first_list and '.' not in self.first_list[-1])
+ ):
self.maybe_only_last_name = True
else:
self.maybe_only_last_name = False
@@ -116,18 +146,26 @@ def first_initials(self):
@property
def first(self):
- name = u'{} {}'.format(self._parsed_name.first, self._parsed_name.middle).strip()
+ name = u'{} {}'.format(
+ self._parsed_name.first, self._parsed_name.middle
+ ).strip()
return name.strip('.')
@property
def first_initials_list(self):
- names_no_dash_list = itertools.chain.from_iterable(name.split("-") for name in self.first_list)
- names_split_on_dot = itertools.chain.from_iterable(name.split('.') for name in names_no_dash_list)
+ names_no_dash_list = itertools.chain.from_iterable(
+ name.split("-") for name in self.first_list
+ )
+ names_split_on_dot = itertools.chain.from_iterable(
+ name.split('.') for name in names_no_dash_list
+ )
return [(name[0] + u'.') for name in names_split_on_dot if name]
@property
def first_list(self):
- first_and_middle_names = self._parsed_name.first_list + self._parsed_name.middle_list
+ first_and_middle_names = (
+ self._parsed_name.first_list + self._parsed_name.middle_list
+ )
names = [name for name in first_and_middle_names if name and name != '.']
return names
@@ -156,9 +194,11 @@ def loads(cls, name):
ValueError: when name is empty or None.
"""
if not isinstance(name, six.string_types):
- raise TypeError(u'arguments to {classname} must be of type {string_types}'.format(
- classname=cls.__name__, string_types=repr(six.string_types)
- ))
+ raise TypeError(
+ u'arguments to {classname} must be of type {string_types}'.format(
+ classname=cls.__name__, string_types=repr(six.string_types)
+ )
+ )
if not name or name.isspace():
raise ValueError('name must not be empty')
@@ -166,12 +206,12 @@ def loads(cls, name):
def dumps(self):
"""Dump the name to string, after normalizing it."""
+
def _is_initial(author_name):
return len(author_name) == 1 or u'.' in author_name
def _ensure_dotted_initials(author_name):
- if _is_initial(author_name) \
- and u'.' not in author_name:
+ if _is_initial(author_name) and u'.' not in author_name:
seq = (author_name, u'.')
author_name = u''.join(seq)
return author_name
@@ -183,20 +223,30 @@ def _ensure_dotted_suffixes(author_suffix):
return author_suffix
def _is_roman_numeral(suffix):
- """Controls that the user's input only contains valid roman numerals"""
- valid_roman_numerals = [u'M', u'D', u'C', u'L', u'X',
- u'V', u'I', u'(', u')']
- return all(letters in valid_roman_numerals
- for letters in suffix.upper())
-
- first_and_middle_names = iter(_ensure_dotted_initials(name) for name in self.first_list)
+ """Controls that the user's input only contains valid roman
+ numerals."""
+ valid_roman_numerals = [
+ u'M',
+ u'D',
+ u'C',
+ u'L',
+ u'X',
+ u'V',
+ u'I',
+ u'(',
+ u')',
+ ]
+ return all(letters in valid_roman_numerals for letters in suffix.upper())
+
+ first_and_middle_names = iter(
+ _ensure_dotted_initials(name) for name in self.first_list
+ )
try:
prev = next(first_and_middle_names)
names_with_spaces = [prev]
except StopIteration:
- LOGGER.warning(u"Cannot process %s properly",
- self._parsed_name.original)
+ LOGGER.warning(u"Cannot process %s properly", self._parsed_name.original)
names_with_spaces = []
for name in first_and_middle_names:
@@ -213,8 +263,8 @@ def _is_roman_numeral(suffix):
suffix = _ensure_dotted_suffixes(self.suffix)
final_name = u', '.join(
- part for part in (self.last, normalized_names.strip(), suffix)
- if part)
+ part for part in (self.last, normalized_names.strip(), suffix) if part
+ )
# Replace unicode curly apostrophe to normal apostrophe.
final_name = final_name.replace(u'’', '\'')
@@ -235,27 +285,16 @@ def pprint(self, initials_only=False):
u'S. M. Lieber'
>>> ParsedName('Downey, Robert Jr.').pprint(initials_only=True)
u'R. Downey Jr.'
-
"""
last_name = self.last
suffixes = ', ' + self.suffix if self.suffix else ''
- if initials_only and last_name != u'':
- first_names = self.first_initials
- else:
- first_names = self.first
+ first_names = self.first_initials if initials_only and last_name != u'' else self.first
return u'{} {}{}'.format(first_names, last_name, suffixes).strip()
@classmethod
- def from_parts(
- cls,
- first=None,
- last=None,
- middle=None,
- suffix=None,
- title=None
- ):
+ def from_parts(cls, first=None, last=None, middle=None, suffix=None, title=None):
name = HumanName()
name.first = first
name.middle = middle
@@ -266,16 +305,19 @@ def from_parts(
def generate_es_query(self, keyword="authors"):
"""Generates a query handling specifically authors.
+
Notes:
There are three main cases:
1) ``a Smith``
This will just generate a ``match`` query on ``last_name``
2) ``a John Smith``
- This will just generate a ``match`` query on ``last_name`` and a ``prefix`` query on ``first_name``
- and a ``match`` query on the initial ``J``. This will return results from ``Smith, John`` and ``Smith, J``
+ This will just generate a ``match`` query on ``last_name`` and a ``prefix``
+ query on ``first_name`` and a ``match`` query on the initial ``J``.
+ This will return results from ``Smith, John`` and ``Smith, J``
but not from ``Smith, Jane``.
3) ``a J Smith``
- This will just generate a ``match`` query on ``last_name`` and a match query on ``first_name.initials``.
+ This will just generate a ``match`` query on ``last_name`` and a match
+ query on ``first_name.initials``.
Please note, cases such as ``J.D.`` have been properly handled by the tokenizer.
"""
nested_query = {
@@ -381,9 +423,8 @@ def normalize_name(name):
def _generate_non_lastnames_variations(non_lastnames):
"""Generate variations for all non-lastnames.
- E.g. For 'John Richard', this method generates: [
- 'John', 'J', 'Richard', 'R', 'John Richard', 'John R', 'J Richard', 'J R',
- ]
+ E.g. For 'John Richard', this method generates: [ 'John', 'J',
+ 'Richard', 'R', 'John Richard', 'John R', 'J Richard', 'J R', ]
"""
if not non_lastnames:
return []
@@ -411,12 +452,18 @@ def _generate_lastnames_variations(lastnames):
if not lastnames:
return []
- split_lastnames = [split_lastname for lastname in lastnames for split_lastname in lastname.split('-')]
+ split_lastnames = [
+ split_lastname
+ for lastname in lastnames
+ for split_lastname in lastname.split('-')
+ ]
lastnames_variations = split_lastnames
if len(split_lastnames) > 1:
# Generate lastnames concatenation if there are more than one lastname after split.
- lastnames_variations.append(u' '.join([lastname for lastname in split_lastnames]))
+ lastnames_variations.append(
+ u' '.join([lastname for lastname in split_lastnames])
+ )
return lastnames_variations
@@ -431,19 +478,23 @@ def generate_name_variations(name):
list: All the name variations for the given name.
Notes:
- Uses `unidecode` for doing unicode characters transliteration to ASCII ones. This was chosen so that we can map
- both full names of authors in HEP records and user's input to the same space and thus make exact queries work.
+ Uses `unidecode` for doing unicode characters transliteration to ASCII ones.
+ This was chosen so that we can map both full names of authors in HEP records
+ and user's input to the same space and thus make exact queries work.
"""
+
def _update_name_variations_with_product(set_a, set_b):
- name_variations.update([
- unidecode((names_variation[0] +
- separator +
- names_variation[1]).strip(''.join(_LASTNAME_NON_LASTNAME_SEPARATORS))).lower()
- for names_variation
- in product(set_a, set_b)
- for separator
- in _LASTNAME_NON_LASTNAME_SEPARATORS
- ])
+ name_variations.update(
+ [
+ unidecode(
+ (names_variation[0] + separator + names_variation[1]).strip(
+ ''.join(_LASTNAME_NON_LASTNAME_SEPARATORS)
+ )
+ ).lower()
+ for names_variation in product(set_a, set_b)
+ for separator in _LASTNAME_NON_LASTNAME_SEPARATORS
+ ]
+ )
parsed_name = ParsedName.loads(name)
@@ -453,27 +504,32 @@ def _update_name_variations_with_product(set_a, set_b):
name_variations = set()
- # We need to filter out empty entries, since HumanName for this name `Perelstein,, Maxim` returns a first_list with
- # an empty string element.
+ # We need to filter out empty entries, since HumanName for this name
+ # `Perelstein,, Maxim` returns a first_list with an empty string element.
non_lastnames = [
non_lastname
- for non_lastname
- in parsed_name.first_list + parsed_name.suffix_list
+ for non_lastname in parsed_name.first_list + parsed_name.suffix_list
if non_lastname
]
- # This is needed because due to erroneous data (e.g. having many authors in a single authors field) ends up
+ # This is needed because due to erroneous data (e.g. having many
+ # authors in a single authors field) ends up
# requiring a lot of memory (due to combinatorial expansion of all non lastnames).
# The policy is to use the input as a name variation, since this data will have to be curated.
- if len(non_lastnames) > _NAMES_MAX_NUMBER_THRESHOLD or len(parsed_name.last_list) > _NAMES_MAX_NUMBER_THRESHOLD:
- LOGGER.warning('Skipping name variations generation - too many names in: "%s"', name)
+ if (
+ len(non_lastnames) > _NAMES_MAX_NUMBER_THRESHOLD
+ or len(parsed_name.last_list) > _NAMES_MAX_NUMBER_THRESHOLD
+ ):
+ LOGGER.warning(
+ 'Skipping name variations generation - too many names in: "%s"', name
+ )
return [name]
- non_lastnames_variations = \
- _generate_non_lastnames_variations(non_lastnames)
+ non_lastnames_variations = _generate_non_lastnames_variations(non_lastnames)
lastnames_variations = _generate_lastnames_variations(parsed_name.last_list)
- # Create variations where lastnames comes first and is separated from non lastnames either by space or comma.
+ # Create variations where lastnames comes first and is separated
+ # from non lastnames either by space or comma.
_update_name_variations_with_product(lastnames_variations, non_lastnames_variations)
# Second part of transformations - having the lastnames in the end.
@@ -490,7 +546,8 @@ def format_name(name, initials_only=False, without_titles=False):
Args:
name (str): The name to format, in pretty much any format.
- initials_only (bool): ``True`` if we want the first names to be displayed with only the initial followed by a dot. ``False`` otherwise.
+ initials_only (bool): ``True`` if we want the first names to be
+ displayed with only the initial followed by a dot. ``False`` otherwise.
Examples:
>>> format_name('Lieber, Stanley Martin')
@@ -500,4 +557,8 @@ def format_name(name, initials_only=False, without_titles=False):
>>> format_name('Downey, Robert Jr.', initials_only=True)
u'R. Downey Jr.'
"""
- return ParsedName(name=name, without_titles=without_titles).loads(name).pprint(initials_only)
+ return (
+ ParsedName(name=name, without_titles=without_titles)
+ .loads(name)
+ .pprint(initials_only)
+ )
diff --git a/inspire_utils/parsers/arxiv.py b/inspire_utils/parsers/arxiv.py
index 6c24884..68d630d 100644
--- a/inspire_utils/parsers/arxiv.py
+++ b/inspire_utils/parsers/arxiv.py
@@ -47,15 +47,11 @@
)
RE_CONFERENCE = re.compile(
- r'\b(%s)\b' % '|'.join(
- [re.escape(word) for word in CONFERENCE_WORDS]
- ),
+ r'\b(%s)\b' % '|'.join([re.escape(word) for word in CONFERENCE_WORDS]),
re.I | re.U,
)
RE_THESIS = re.compile(
- r'\b(%s)\b' % '|'.join(
- [re.escape(word) for word in THESIS_WORDS]
- ),
+ r'\b(%s)\b' % '|'.join([re.escape(word) for word in THESIS_WORDS]),
re.I | re.U,
)
RE_PAGES = re.compile(r'(?i)(\d+)\s*pages?\b')
@@ -71,18 +67,20 @@ def _handle_sqrt(node, l2tobj):
def get_arxiv_latex_context_db():
default_db = get_default_latex_context_db()
- arxiv_db = default_db.filter_context(keep_categories=["latex-base", "advanced-symbols"])
+ arxiv_db = default_db.filter_context(
+ keep_categories=["latex-base", "advanced-symbols"]
+ )
arxiv_db.add_context_category(
- "overrides",
- prepend=True,
- macros=[
- MacroTextSpec("sqrt", _handle_sqrt)
- ]
+ "overrides", prepend=True, macros=[MacroTextSpec("sqrt", _handle_sqrt)]
)
# adapted from https://github.com/phfaist/pylatexenc/issues/32
- arxiv_db.set_unknown_macro_spec(MacroTextSpec("", lambda node: node.latex_verbatim()))
- arxiv_db.set_unknown_environment_spec(EnvironmentTextSpec("", lambda node: node.latex_verbatim()))
+ arxiv_db.set_unknown_macro_spec(
+ MacroTextSpec("", lambda node: node.latex_verbatim())
+ )
+ arxiv_db.set_unknown_environment_spec(
+ EnvironmentTextSpec("", lambda node: node.latex_verbatim())
+ )
return arxiv_db
@@ -98,6 +96,7 @@ class ArxivParser(object):
source (Optional[str]): if provided, sets the ``source`` everywhere in
the record. Otherwise, the source is extracted from the arXiv metadata.
"""
+
_l2t = LatexNodes2Text(
latex_context=get_arxiv_latex_context_db(),
math_mode="verbatim",
@@ -139,8 +138,9 @@ def parse(self):
self.builder.add_arxiv_eprint(self.arxiv_eprint, self.arxiv_categories)
self.builder.add_private_note(self.private_note)
self.builder.add_document_type(self.document_type)
- normalized_categories = [classify_field(arxiv_cat)
- for arxiv_cat in self.arxiv_categories]
+ normalized_categories = [
+ classify_field(arxiv_cat) for arxiv_cat in self.arxiv_categories
+ ]
self.builder.add_inspire_categories(dedupe_list(normalized_categories), 'arxiv')
return self.builder.record
@@ -161,8 +161,13 @@ def _get_authors_and_collaborations(self, node):
# take 'for the' out of the general phrases and dont use it in
# affiliations
collab_phrases = [
- 'consortium', ' collab ', 'collaboration', ' team', 'group',
- ' on behalf of ', ' representing ',
+ 'consortium',
+ ' collab ',
+ 'collaboration',
+ ' team',
+ 'group',
+ ' on behalf of ',
+ ' representing ',
]
inst_phrases = ['institute', 'university', 'department', 'center']
@@ -172,16 +177,23 @@ def _get_authors_and_collaborations(self, node):
some_affiliation_contains_collaboration = False
authors_and_affiliations = (
- self._get_author_names_and_affiliations(author) for author in author_selectors
+ self._get_author_names_and_affiliations(author)
+ for author in author_selectors
)
next_author_and_affiliations = (
- self._get_author_names_and_affiliations(author) for author in author_selectors
+ self._get_author_names_and_affiliations(author)
+ for author in author_selectors
)
next(next_author_and_affiliations)
- for (forenames, keyname, affiliations), (next_forenames, next_keyname, _) in six.moves.zip_longest(
- authors_and_affiliations, next_author_and_affiliations,
- fillvalue=('end of author-list', '', None)
+ for (forenames, keyname, affiliations), (
+ next_forenames,
+ next_keyname,
+ _,
+ ) in six.moves.zip_longest(
+ authors_and_affiliations,
+ next_author_and_affiliations,
+ fillvalue=('end of author-list', '', None),
):
name_string = " %s %s " % (forenames, keyname)
@@ -193,9 +205,7 @@ def _get_authors_and_collaborations(self, node):
for aff in affiliations:
affiliation_contains_collaboration = any(
phrase in aff.lower() for phrase in collab_phrases
- ) and not any(
- phrase in aff.lower() for phrase in inst_phrases
- )
+ ) and not any(phrase in aff.lower() for phrase in inst_phrases)
if affiliation_contains_collaboration:
affiliations_with_collaborations.append(aff)
some_affiliation_contains_collaboration = True
@@ -214,12 +224,14 @@ def _get_authors_and_collaborations(self, node):
coll, author_name = coll_cleanforthe(name_string)
if author_name:
surname, given_names = split_fullname(author_name)
- authors.append({
- 'full_name': surname + ', ' + given_names,
- 'surname': surname,
- 'given_names': given_names,
- 'affiliations': [],
- })
+ authors.append(
+ {
+ 'full_name': surname + ', ' + given_names,
+ 'surname': surname,
+ 'given_names': given_names,
+ 'affiliations': [],
+ }
+ )
if coll and coll not in collaborations:
collaborations.append(coll)
elif name_string.strip() == ':':
@@ -228,30 +240,35 @@ def _get_authors_and_collaborations(self, node):
if not some_affiliation_contains_collaboration:
# everything up to now seems to be collaboration info
for author_info in authors:
- name_string = " %s %s " % \
- (author_info['given_names'], author_info['surname'])
+ name_string = " %s %s " % (
+ author_info['given_names'],
+ author_info['surname'],
+ )
coll, author_name = coll_cleanforthe(name_string)
if coll and coll not in collaborations:
collaborations.append(coll)
authors = []
else:
- authors.append({
- 'full_name': keyname + ', ' + forenames,
- 'surname': keyname,
- 'given_names': forenames,
- 'affiliations': affiliations_without_collaborations
- })
+ authors.append(
+ {
+ 'full_name': keyname + ', ' + forenames,
+ 'surname': keyname,
+ 'given_names': forenames,
+ 'affiliations': affiliations_without_collaborations,
+ }
+ )
if warning_tags:
- warning = 'WARNING: Colon in authors before %s: Check author list for collaboration names!' % ', '.join(warning_tags)
+ warning = (
+ 'WARNING: Colon in authors before %s: Check author list for collaboration names!'
+ % ', '.join(warning_tags)
+ )
else:
warning = ''
return authors, collaborations, warning
@staticmethod
def _get_author_names_and_affiliations(author_node):
- forenames = u' '.join(
- author_node.xpath('.//forenames//text()').extract()
- )
+ forenames = u' '.join(author_node.xpath('.//forenames//text()').extract())
keyname = u' '.join(author_node.xpath('.//keyname//text()').extract())
affiliations = author_node.xpath('.//affiliation//text()').extract()
@@ -272,8 +289,12 @@ def abstract(self):
@property
def authors(self):
authors, _, _ = self.authors_and_collaborations
- parsed_authors = [self.builder.make_author(
- full_name=auth["full_name"], raw_affiliations=auth["affiliations"]) for auth in authors]
+ parsed_authors = [
+ self.builder.make_author(
+ full_name=auth["full_name"], raw_affiliations=auth["affiliations"]
+ )
+ for auth in authors
+ ]
return parsed_authors
@@ -286,7 +307,9 @@ def collaborations(self):
@property
def dois(self):
doi_values = self.root.xpath('.//doi/text()').extract()
- doi_values_splitted = chain.from_iterable([re.split(RE_DOIS, doi) for doi in doi_values])
+ doi_values_splitted = chain.from_iterable(
+ [re.split(RE_DOIS, doi) for doi in doi_values]
+ )
dois = [
{'doi': value, 'material': 'publication'} for value in doi_values_splitted
]
@@ -328,7 +351,9 @@ def pubinfo_freetext(self):
@property
def title(self):
- long_text_fixed = self.fix_long_text(self.root.xpath('.//title/text()').extract_first())
+ long_text_fixed = self.fix_long_text(
+ self.root.xpath('.//title/text()').extract_first()
+ )
return self.latex_to_unicode(long_text_fixed)
@staticmethod
@@ -386,7 +411,9 @@ def arxiv_eprint(self):
def arxiv_categories(self):
categories = self.root.xpath('.//categories/text()').extract_first(default='[]')
categories = categories.split()
- categories_without_old = [normalize_arxiv_category(arxiv_cat) for arxiv_cat in categories]
+ categories_without_old = [
+ normalize_arxiv_category(arxiv_cat) for arxiv_cat in categories
+ ]
return dedupe_list(categories_without_old)
@@ -409,7 +436,9 @@ def source(self):
@property
def authors_and_collaborations(self):
if not hasattr(self, '_authors_and_collaborations'):
- self._authors_and_collaborations = self._get_authors_and_collaborations(self.root)
+ self._authors_and_collaborations = self._get_authors_and_collaborations(
+ self.root
+ )
return self._authors_and_collaborations
@classmethod
diff --git a/inspire_utils/parsers/author_xml.py b/inspire_utils/parsers/author_xml.py
index 9acc26f..4565f52 100644
--- a/inspire_utils/parsers/author_xml.py
+++ b/inspire_utils/parsers/author_xml.py
@@ -55,11 +55,11 @@ def parse(self):
content.remove_namespaces()
undefined_or_none_value_regex = re.compile("undefined|none", re.IGNORECASE)
undefined_or_empty_inspireid_value_regex = re.compile(
- "undefined|inspire-\s*$", re.IGNORECASE # noqa
+ "undefined|inspire-\s*$", re.IGNORECASE # noqa
)
undefined_value_regex = re.compile("undefined", re.IGNORECASE)
ror_path_value_regex = re.compile("https://ror.org/*")
- remove_new_line_regex = re.compile("\s*\n\s*") # noqa
+ remove_new_line_regex = re.compile("\s*\n\s*") # noqa
# Goes through all the authors in the file
for author in content.xpath("//Person"):
@@ -71,10 +71,12 @@ def parse(self):
# Gets all the author ids
for source, id in zip(
author.xpath(
- './authorIDs/authorID[@source!="" and text()!=""]/@source | ./authorids/authorid[@source!="" and text()!=""]/@source'
+ './authorIDs/authorID[@source!="" and text()!=""]/@source'
+ '| ./authorids/authorid[@source!="" and text()!=""]/@source'
).getall(),
author.xpath(
- './authorIDs/authorID[@source!="" and text()!=""]/text() | ./authorids/authorid[@source!="" and text()!=""]/text()'
+ './authorIDs/authorID[@source!="" and text()!=""]/text()'
+ '| ./authorids/authorid[@source!="" and text()!=""]/text()'
).getall(),
):
source = re.sub(remove_new_line_regex, "", source)
@@ -94,7 +96,8 @@ def parse(self):
"./authorAffiliations/authorAffiliation/@organizationid"
).getall():
orgName = content.xpath(
- 'string(//organizations/Organization[@id="{}"]/orgName[@source="spiresICN" or @source="INSPIRE" and text()!="" ]/text())'.format(
+ 'string(//organizations/Organization[@id="{}"]/orgName[@source="spiresICN"'
+ 'or @source="INSPIRE" and text()!="" ]/text())'.format(
affiliation
)
).get()
@@ -105,15 +108,18 @@ def parse(self):
):
affiliations.append(cleaned_org_name)
- # Gets all the affiliations_identifiers for affiliated organizations using the organization ids from author
+ # Gets all the affiliations_identifiers for affiliated organizations
+ # using the organization ids from author
for value, source in zip(
content.xpath(
- '//organizations/Organization[@id="{}"]/orgName[@source="ROR" or @source="GRID" and text()!=""]/text()'.format(
+ '//organizations/Organization[@id="{}"]/orgName[@source="ROR"'
+ 'or @source="GRID" and text()!=""]/text()'.format(
affiliation
)
).getall(),
content.xpath(
- '//organizations/Organization[@id="{}"]/orgName[@source="ROR" or @source="GRID" and text()!=""]/@source'.format(
+ '//organizations/Organization[@id="{}"]/orgName[@source="ROR"'
+ 'or @source="GRID" and text()!=""]/@source'.format(
affiliation
)
).getall(),
diff --git a/inspire_utils/parsers/crossref.py b/inspire_utils/parsers/crossref.py
index d732ebe..1c66071 100644
--- a/inspire_utils/parsers/crossref.py
+++ b/inspire_utils/parsers/crossref.py
@@ -66,6 +66,7 @@ class CrossrefParser(object):
source (Optional[str]): if provided, sets the ``source`` everywhere in
the record. Otherwise, the source is extracted from the Crossref metadata.
"""
+
def __init__(self, crossref_record, source=None):
self.record = crossref_record.get("message")
if not source:
@@ -115,9 +116,7 @@ def subtitle(self):
@property
def dois(self):
value = self.record.get("DOI")
- dois = [
- {'doi': value, 'material': self.material}
- ]
+ dois = [{'doi': value, 'material': self.material}]
return dois
@@ -133,7 +132,9 @@ def material(self):
material = 'erratum'
elif title.startswith("Addendum") or subtitle.startswith("Addendum"):
material = 'addendum'
- elif title.startswith("Publisher's Note") or subtitle.startswith("Publisher's Note"):
+ elif title.startswith("Publisher's Note") or subtitle.startswith(
+ "Publisher's Note"
+ ):
material = 'editorial note'
else:
material = 'publication'
@@ -238,7 +239,9 @@ def get_author(self, author_key):
affiliations = self.get_author_affiliations(author_key)
orcid = self.get_author_orcid(author_key)
- return self.builder.make_author(author_name, raw_affiliations=affiliations, ids=orcid)
+ return self.builder.make_author(
+ author_name, raw_affiliations=affiliations, ids=orcid
+ )
@property
def authors(self):
diff --git a/inspire_utils/parsers/elsevier.py b/inspire_utils/parsers/elsevier.py
index b6bd862..203fe0f 100644
--- a/inspire_utils/parsers/elsevier.py
+++ b/inspire_utils/parsers/elsevier.py
@@ -107,7 +107,8 @@ class ElsevierParser(object):
subclassed to customize its behavior.
Args:
- elsevier_record (Union[str, scrapy.selector.Selector]): the record in Elsevier format to parse.
+ elsevier_record (Union[str, scrapy.selector.Selector]): the record in Elsevier
+ format to parse.
source (Optional[str]): if provided, sets the ``source`` everywhere in
the record. Otherwise, the source is extracted from the Elsevier metadata.
"""
@@ -141,9 +142,7 @@ def parse(self):
if self.imprints_date:
self.builder.add_imprint_date(self.imprints_date)
elif self.publication_date:
- self.builder.add_imprint_date(
- self.publication_date.dumps()
- )
+ self.builder.add_imprint_date(self.publication_date.dumps())
for reference in self.references:
self.builder.add_reference(reference)
@@ -176,14 +175,17 @@ def references(self):
@property
def abstract(self):
- abstract_nodes = self.root.xpath(".//head/abstract[not(@graphical)]/abstract-sec/simple-para")
+ abstract_nodes = self.root.xpath(
+ ".//head/abstract[not(@graphical)]/abstract-sec/simple-para"
+ )
if not abstract_nodes:
return
- abstract_paragraphs = [remove_tags(
- abstract_node, **self.remove_tags_config_abstract
- ).strip("/ \n") for abstract_node in abstract_nodes]
+ abstract_paragraphs = [
+ remove_tags(abstract_node, **self.remove_tags_config_abstract).strip("/ \n")
+ for abstract_node in abstract_nodes
+ ]
abstract = ' '.join(abstract_paragraphs)
return abstract
@@ -270,9 +272,7 @@ def dois(self):
@property
def document_type(self):
doctype = None
- if self.root.xpath(
- "./*[contains(name(),'article') or self::book-review]"
- ):
+ if self.root.xpath("./*[contains(name(),'article') or self::book-review]"):
doctype = "article"
elif self.root.xpath("./*[self::book or self::simple-book]"):
doctype = "book"
@@ -428,9 +428,9 @@ def publication_info(self):
@property
def publisher(self):
- publisher = self.root.xpath("string(./RDF/Description/publisher[1])").extract_first(
- "Elsevier B.V."
- )
+ publisher = self.root.xpath(
+ "string(./RDF/Description/publisher[1])"
+ ).extract_first("Elsevier B.V.")
return publisher
@@ -443,7 +443,11 @@ def subtitle(self):
@property
def title(self):
title = self.root.xpath("./*/head/title[1]").extract_first()
- return remove_tags(title, **self.remove_tags_config_title).strip("\n") if title else None
+ return (
+ remove_tags(title, **self.remove_tags_config_title).strip("\n")
+ if title
+ else None
+ )
@property
def year(self):
@@ -544,7 +548,9 @@ def get_reference_authors(ref_node):
authors = ref_node.xpath("./contribution/authors/author")
authors_names = []
for author in authors:
- given_names = author.xpath("string(./given-name[1])").extract_first(default="")
+ given_names = author.xpath("string(./given-name[1])").extract_first(
+ default=""
+ )
last_names = author.xpath("string(./surname[1])").extract_first(default="")
authors_names.append(" ".join([given_names, last_names]).strip())
return authors_names
@@ -562,7 +568,9 @@ def get_reference_editors(ref_node):
editors = ref_node.xpath(".//editors/authors/author")
editors_names = []
for editor in editors:
- given_names = editor.xpath("string(./given-name[1])").extract_first(default="")
+ given_names = editor.xpath("string(./given-name[1])").extract_first(
+ default=""
+ )
last_names = editor.xpath("string(./surname[1])").extract_first(default="")
editors_names.append(" ".join([given_names, last_names]).strip())
return editors_names
@@ -580,13 +588,13 @@ def get_reference_pages(ref_node):
def get_reference_iter(self, ref_node):
"""Extract one reference.
- Args:
- ref_node(scrapy.selector.Selector): a selector on a single
- reference, i.e. ``[``.
+ Args:
+ ref_node(scrapy.selector.Selector): a selector on a single
+ reference, i.e. ``][``.
- Yields:
- dict: the parsed reference, as generated by
- :class:`inspire_schemas.api.ReferenceBuilder`
+ Yields:
+ dict: the parsed reference, as generated by
+ :class:`inspire_schemas.api.ReferenceBuilder`
"""
# handle also unstructured refs
for citation_node in ref_node.xpath("./reference|./other-ref"):
@@ -599,7 +607,10 @@ def get_reference_iter(self, ref_node):
)
fields = [
- (("string(.//series/title/maintitle[1])"), builder.set_journal_title,),
+ (
+ ("string(.//series/title/maintitle[1])"),
+ builder.set_journal_title,
+ ),
(
"string(.//title[parent::edited-book|parent::book]/maintitle[1])",
builder.add_parent_title,
@@ -646,7 +657,7 @@ def get_reference_iter(self, ref_node):
"|self::label"
"|self::publisher"
"|self::doi"
- "|self::pages"
+ "|self::pages",
)
.strip("\"';,. \t\n\r")
.replace("()", "")
diff --git a/inspire_utils/parsers/jats.py b/inspire_utils/parsers/jats.py
index fbe18b6..c723f0e 100644
--- a/inspire_utils/parsers/jats.py
+++ b/inspire_utils/parsers/jats.py
@@ -35,9 +35,7 @@
from inspire_utils.helpers import maybe_int, remove_tags
from inspire_utils.utils import get_node
-JOURNAL_TITLES_MAPPING = {
- "Physics": "APS Physics"
-}
+JOURNAL_TITLES_MAPPING = {"Physics": "APS Physics"}
class JatsParser(object):
@@ -51,6 +49,7 @@ class JatsParser(object):
source (Optional[str]): if provided, sets the ``source`` everywhere in
the record. Otherwise, the source is extracted from the JATS metadata.
"""
+
def __init__(self, jats_record, source=None):
self.root = self.get_root_node(jats_record)
if not source:
@@ -104,7 +103,7 @@ def references(self):
remove_tags_config_abstract = {
'allowed_tags': ['sup', 'sub'],
'allowed_trees': ['math'],
- 'strip': 'self::pub-id|self::issn'
+ 'strip': 'self::pub-id|self::issn',
}
remove_tags_config_title = {
@@ -118,7 +117,9 @@ def abstract(self):
if not abstract_nodes:
return
- abstract = remove_tags(abstract_nodes[0], **self.remove_tags_config_abstract).strip()
+ abstract = remove_tags(
+ abstract_nodes[0], **self.remove_tags_config_abstract
+ ).strip()
return abstract
@property
@@ -129,7 +130,9 @@ def article_type(self):
@property
def artid(self):
- artid = self.root.xpath('./front/article-meta//elocation-id//text()').extract_first()
+ artid = self.root.xpath(
+ './front/article-meta//elocation-id//text()'
+ ).extract_first()
return artid
@@ -166,28 +169,34 @@ def copyright(self):
@property
def copyright_holder(self):
- copyright_holder = self.root.xpath('./front//copyright-holder/text()').extract_first()
+ copyright_holder = self.root.xpath(
+ './front//copyright-holder/text()'
+ ).extract_first()
return copyright_holder
@property
def copyright_statement(self):
- copyright_statement = self.root.xpath('./front//copyright-statement/text()').extract_first()
+ copyright_statement = self.root.xpath(
+ './front//copyright-statement/text()'
+ ).extract_first()
return copyright_statement
@property
def copyright_year(self):
- copyright_year = self.root.xpath('./front//copyright-year/text()').extract_first()
+ copyright_year = self.root.xpath(
+ './front//copyright-year/text()'
+ ).extract_first()
return maybe_int(copyright_year)
@property
def dois(self):
- doi_values = self.root.xpath('./front/article-meta//article-id[@pub-id-type="doi"]/text()').extract()
- dois = [
- {'doi': value, 'material': self.material} for value in doi_values
- ]
+ doi_values = self.root.xpath(
+ './front/article-meta//article-id[@pub-id-type="doi"]/text()'
+ ).extract()
+ dois = [{'doi': value, 'material': self.material} for value in doi_values]
if self.material != 'publication':
doi_values = self.root.xpath(
@@ -222,20 +231,26 @@ def journal_title(self):
@property
def journal_issue(self):
- journal_issue = self.root.xpath('./front/article-meta/issue/text()').extract_first()
+ journal_issue = self.root.xpath(
+ './front/article-meta/issue/text()'
+ ).extract_first()
return journal_issue
@property
def journal_volume(self):
- journal_volume = self.root.xpath('./front/article-meta/volume/text()').extract_first()
+ journal_volume = self.root.xpath(
+ './front/article-meta/volume/text()'
+ ).extract_first()
return journal_volume
@property
def keywords(self):
keyword_groups = self.root.xpath('./front//kwd-group')
- keywords = itertools.chain.from_iterable(self.get_keywords(group) for group in keyword_groups)
+ keywords = itertools.chain.from_iterable(
+ self.get_keywords(group) for group in keyword_groups
+ )
return keywords
@@ -251,7 +266,11 @@ def license(self):
@property
def license_statement(self):
- license_statement = self.root.xpath('string(./front/article-meta//license)').extract_first().strip()
+ license_statement = (
+ self.root.xpath('string(./front/article-meta//license)')
+ .extract_first()
+ .strip()
+ )
return license_statement
@@ -279,13 +298,17 @@ def material(self):
@property
def number_of_pages(self):
- number_of_pages = maybe_int(self.root.xpath('./front/article-meta//page-count/@count').extract_first())
+ number_of_pages = maybe_int(
+ self.root.xpath('./front/article-meta//page-count/@count').extract_first()
+ )
return number_of_pages
@property
def page_start(self):
- page_start = self.root.xpath('./front/article-meta/fpage/text()').extract_first()
+ page_start = self.root.xpath(
+ './front/article-meta/fpage/text()'
+ ).extract_first()
return page_start
@@ -305,9 +328,7 @@ def publication_date(self):
)
if date_nodes:
- publication_date = min(
- self.get_date(date_node) for date_node in date_nodes
- )
+ publication_date = min(self.get_date(date_node) for date_node in date_nodes)
return publication_date
@@ -382,13 +403,11 @@ def year(self):
'./front//pub-date[@pub-type="ppub"] |'
'./front//pub-date[starts-with(@date-type,"pub") and $not_online] |'
'./front//date[starts-with(@date-type,"pub") and $not_online]',
- not_online=not_online
+ not_online=not_online,
)
if date_nodes:
- year = min(
- self.get_date(date_node) for date_node in date_nodes
- ).year
+ year = min(self.get_date(date_node) for date_node in date_nodes).year
return year
@@ -401,7 +420,8 @@ def get_author_affiliations(self, author_node):
referred_ids.update(set(raw_referred_id.split(' ')))
affiliations = [
- self.get_affiliation(rid) for rid in referred_ids
+ self.get_affiliation(rid)
+ for rid in referred_ids
if self.get_affiliation(rid)
]
@@ -439,11 +459,7 @@ def _get_iso_date(iso_date_string):
@staticmethod
def _get_date_from_parts(year, month, day):
- possible_dates = [
- [year, month, day],
- [year, month],
- [year]
- ]
+ possible_dates = [[year, month, day], [year, month], [year]]
# we try different date combinations
# cause even if date part is not None
# it can raise validation error
@@ -485,10 +501,15 @@ def get_date(self, date_node):
def get_keywords(group_node):
"""Extract keywords from a keyword group."""
schema = None
- if 'pacs' in group_node.xpath('@kwd-group-type').extract_first(default='').lower():
+ if (
+ 'pacs'
+ in group_node.xpath('@kwd-group-type').extract_first(default='').lower()
+ ):
schema = 'PACS'
- keywords = (kwd.xpath('string(.)').extract_first() for kwd in group_node.xpath('.//kwd'))
+ keywords = (
+ kwd.xpath('string(.)').extract_first() for kwd in group_node.xpath('.//kwd')
+ )
keyword_dicts = ({'keyword': keyword, 'schema': schema} for keyword in keywords)
return keyword_dicts
@@ -507,10 +528,7 @@ def get_root_node(jats_record):
scrapy.selector.Selector: a selector on the root ````
node.
"""
- if isinstance(jats_record, six.string_types):
- root = get_node(jats_record)
- else:
- root = jats_record
+ root = get_node(jats_record) if isinstance(jats_record, six.string_types) else jats_record
root.remove_namespaces()
return root
@@ -531,15 +549,14 @@ def get_author(self, author_node):
orcid = self.get_orcid(author_node)
author_ids = [("ORCID", orcid)] if orcid else []
return self.builder.make_author(
- author_name,
- raw_affiliations=affiliations,
- emails=emails,
- ids=author_ids
+ author_name, raw_affiliations=affiliations, emails=emails, ids=author_ids
)
@staticmethod
def get_orcid(author_node):
- orcid = author_node.xpath('./contrib-id[@contrib-id-type="orcid"]/text()').extract_first()
+ orcid = author_node.xpath(
+ './contrib-id[@contrib-id-type="orcid"]/text()'
+ ).extract_first()
if orcid:
return normalize_orcid(orcid)
@@ -555,8 +572,7 @@ def get_reference_authors(ref_node, role):
List[str]: list of names
"""
return ref_node.xpath(
- './person-group[@person-group-type=$role]/string-name/text()',
- role=role
+ './person-group[@person-group-type=$role]/string-name/text()', role=role
).extract()
def get_reference(self, ref_node):
@@ -576,7 +592,7 @@ def get_reference(self, ref_node):
builder.add_raw_reference(
ref_node.extract().strip(),
source=self.builder.source,
- ref_format='JATS'
+ ref_format='JATS',
)
fields = [
@@ -600,10 +616,13 @@ def get_reference(self, ref_node):
(
'pub-id[@pub-id-type="other"]'
'[contains(preceding-sibling::text(),"Report No")]/text()',
- builder.add_report_number
+ builder.add_report_number,
),
('./article-title/text()', builder.add_title),
- ('../label/text()', lambda x, builder=builder: builder.set_label(x.strip('[].')))
+ (
+ '../label/text()',
+ lambda x, builder=builder: builder.set_label(x.strip('[].')),
+ ),
]
for xpath, field_handler in fields:
diff --git a/inspire_utils/query.py b/inspire_utils/query.py
index 75c7996..a1398c6 100644
--- a/inspire_utils/query.py
+++ b/inspire_utils/query.py
@@ -26,15 +26,18 @@ def wrap_queries_in_bool_clauses_if_more_than_one(
queries, use_must_clause, preserve_bool_semantics_if_one_clause=False
):
"""Helper for wrapping a list of queries into a bool.{must, should} clause.
+
Args:
queries (list): List of queries to be wrapped in a bool.{must, should} clause.
- use_must_clause (bool): Flag that signifies whether to use 'must' or 'should' clause.
- preserve_bool_semantics_if_one_clause (bool): Flag that signifies whether to generate a bool query even if
- there's only one clause. This happens to generate boolean query semantics. Usually not the case, but
- useful for boolean queries support.
+ use_must_clause (bool): Flag that signifies whether to use
+ 'must' or 'should' clause.
+ preserve_bool_semantics_if_one_clause (bool): Flag that signifies whether to
+ generate a bool query even if there's only one clause. This happens to generate
+ boolean query semantics. Usually not the case, but useful for boolean queries support.
Returns:
- (dict): If len(queries) > 1, the bool clause, otherwise if len(queries) == 1, will return the query itself,
- while finally, if len(queries) == 0, then an empty dictionary is returned.
+ (dict): If len(queries) > 1, the bool clause, otherwise if len(queries) == 1,
+ will return the query itself,while finally, if len(queries) == 0,
+ then an empty dictionary is returned.
"""
if not queries:
return {}
@@ -48,9 +51,7 @@ def wrap_queries_in_bool_clauses_if_more_than_one(
def ordered(obj):
- """
- Helper to order the dictionary
- """
+ """Helper to order the dictionary."""
# See https://stackoverflow.com/a/25851972
if isinstance(obj, dict):
return sorted((k, ordered(v)) for k, v in obj.items())
diff --git a/inspire_utils/record.py b/inspire_utils/record.py
index 4e25369..f7fe520 100644
--- a/inspire_utils/record.py
+++ b/inspire_utils/record.py
@@ -504,12 +504,11 @@ def get_values_for_schema(elements, schema):
def replace_undesirable_characters(line):
- """
- Replace certain bad characters in a text line.
- @param line: (string) the text line in which bad characters are to
- be replaced.
- @return: (string) the text line after the bad characters have been
- replaced.
+ """Replace certain bad characters in a text line. @param line: (string) the
+ text line in which bad characters are to.
+
+ be replaced. @return: (string) the text line after the
+ bad characters have been replaced.
"""
# These are separate because we want a particular order
for bad_string, replacement in UNDESIRABLE_STRING_REPLACEMENTS:
diff --git a/inspire_utils/urls.py b/inspire_utils/urls.py
index ea339d6..efe50bd 100644
--- a/inspire_utils/urls.py
+++ b/inspire_utils/urls.py
@@ -19,7 +19,6 @@
# In applying this license, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.
-
"""URL-related utils."""
from __future__ import absolute_import, division, print_function
@@ -45,7 +44,7 @@ def ensure_scheme(url, default_scheme='http'):
netloc=parsed.path,
path='',
query=parsed.query,
- fragment=parsed.fragment
+ fragment=parsed.fragment,
)
return urlunsplit(parsed)
diff --git a/inspire_utils/utils.py b/inspire_utils/utils.py
index 6c6b67d..3a99c7c 100644
--- a/inspire_utils/utils.py
+++ b/inspire_utils/utils.py
@@ -12,7 +12,9 @@
r'\b(?:for|on behalf of|representing)\b',
re.IGNORECASE,
)
-INST_PHRASES = ['for the development', ]
+INST_PHRASES = [
+ 'for the development',
+]
def get_node(text, namespaces=None):
@@ -25,7 +27,7 @@ def get_node(text, namespaces=None):
def coll_cleanforthe(coll):
- """ Cleanup collaboration, try to find author """
+ """Cleanup collaboration, try to find author."""
author = None
if any(phrase for phrase in INST_PHRASES if phrase in coll.lower()):
@@ -52,10 +54,10 @@ def coll_cleanforthe(coll):
def split_fullname(author, switch_name_order=False):
"""Split an author name to surname and given names.
- It accepts author strings with and without comma separation.
- As default surname is first in case of comma separation, otherwise last.
- Multi-part surnames are incorrectly detected in strings without comma
- separation.
+ It accepts author strings with and without comma separation. As
+ default surname is first in case of comma separation, otherwise
+ last. Multi-part surnames are incorrectly detected in strings
+ without comma separation.
"""
if not author:
return "", ""
@@ -112,7 +114,7 @@ def split_fullname(author, switch_name_order=False):
'talk',
'talks',
'workshop',
- 'workshops'
+ 'workshops',
]
THESIS_WORDS = [
@@ -136,5 +138,5 @@ def split_fullname(author, switch_name_order=False):
'staatsexamensarbeit',
'tesi',
'thesis',
- 'travail'
+ 'travail',
]
diff --git a/ruff.toml b/ruff.toml
index 843a909..8c073e3 100644
--- a/ruff.toml
+++ b/ruff.toml
@@ -1,4 +1,5 @@
target-version = "py311"
+line-length = 100
[lint.flake8-tidy-imports]
ban-relative-imports = "all"
@@ -19,7 +20,7 @@ select = [
# flake8-pytest-style
"PT",
]
-ignore = ["B904", "E501"]
+ignore = ["B904"]
[lint.pycodestyle]
diff --git a/tests/test_config.py b/tests/test_config.py
index 525d6e2..ae02fa9 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -42,10 +42,12 @@ def test_config(tmpdir):
mock_config = tmpdir.join("inspirehep.cfg")
mock_config.write("SERVER_NAME = '0.0.0.0'; OTHER_VARIABLE = 42")
- config = Config(defaults={
- 'SERVER_NAME': '127.0.0.1',
- 'SOME_OTHER_DEFAULT': 1234,
- })
+ config = Config(
+ defaults={
+ 'SERVER_NAME': '127.0.0.1',
+ 'SOME_OTHER_DEFAULT': 1234,
+ }
+ )
config.load_pyfile(mock_config.strpath)
assert config['SERVER_NAME'] == '0.0.0.0'
@@ -84,9 +86,12 @@ def test_config_invalid_file(tmpdir):
with pytest.raises(MalformedConfig):
config.load_pyfile(mock_config.strpath)
+
@pytest.mark.usefixtures(name="_restore_cwd")
def test_load_config(tmpdir):
- mock_inspirehep_var_cfg = tmpdir.mkdir('var').mkdir('inspirehep-instance').join("inspirehep.cfg")
+ mock_inspirehep_var_cfg = (
+ tmpdir.mkdir('var').mkdir('inspirehep-instance').join("inspirehep.cfg")
+ )
mock_inspirehep_var_cfg.write("SERVER_NAME = '0.0.0.0'")
mock_inspirehep_cfg = tmpdir.join("inspirehep.cfg")
diff --git a/tests/test_date.py b/tests/test_date.py
index 66b1eff..e66f92e 100644
--- a/tests/test_date.py
+++ b/tests/test_date.py
@@ -68,7 +68,9 @@ def test_partial_date_raises_on_day_with_no_month():
def test_partial_date_raises_on_wrong_types():
- with pytest.raises(TypeError, match='arguments to PartialDate must be of type int or None'):
+ with pytest.raises(
+ TypeError, match='arguments to PartialDate must be of type int or None'
+ ):
PartialDate('1686', '6', '30')
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index c0f0672..e317d1d 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -83,7 +83,8 @@ def test_maybe_int_returns_none_otherwise():
def test_remove_tags_allowed_trees_strip():
allowed_trees = ('b',)
strip = '@class="hidden"'
- snippet = ']Only this text remains.Not this one.
'
+ snippet = ('Only this text remains.'
+ 'Not this one.
')
result = remove_tags(snippet, allowed_trees=allowed_trees, strip=strip)
expected = u'Only this text remains.'
@@ -94,7 +95,8 @@ def test_remove_tags_allowed_trees_strip():
def test_remove_tags_allowed_tags_strip():
allowed_tags = ('b',)
strip = '@class="hidden"'
- snippet = 'Only this text remains.Not this one.
'
+ snippet = ('Only this text remains.'
+ 'Not this one.
')
result = remove_tags(snippet, allowed_tags=allowed_tags, strip=strip)
expected = u'Only this text remains.'
@@ -105,7 +107,8 @@ def test_remove_tags_allowed_tags_strip():
def test_remove_tags_allowed_tags_strip_preserves_text():
allowed_tags = ('i',)
strip = '@class="hidden"'
- snippet = 'Only this text remains.Not this one.
'
+ snippet = ('Only this text remains.'
+ 'Not this one.
')
result = remove_tags(snippet, allowed_tags=allowed_tags, strip=strip)
expected = u'Only this text remains.'
diff --git a/tests/test_name.py b/tests/test_name.py
index af0a3da..9e108de 100644
--- a/tests/test_name.py
+++ b/tests/test_name.py
@@ -120,8 +120,11 @@ def test_normalize_name_handles_suffixes(input_author_name, expected):
@pytest.mark.parametrize(
("input_author_name", "expected"),
- [("Sir John Smith", "Smith, John"),
- ("Bao, Hon", "Bao, Hon"), ("ed witten", "Witten, Ed")],
+ [
+ ("Sir John Smith", "Smith, John"),
+ ("Bao, Hon", "Bao, Hon"),
+ ("ed witten", "Witten, Ed"),
+ ],
)
def test_normalize_name_handles_titles(input_author_name, expected):
assert normalize_name(input_author_name) == expected
@@ -416,8 +419,7 @@ def test_format_author_name():
def test_format_author_name_with_initials():
expected = "S. M. Lieber"
- assert expected == format_name(
- "Lieber, Stanley Martin", initials_only=True)
+ assert expected == format_name("Lieber, Stanley Martin", initials_only=True)
def test_format_author_name_with_initials_with_all_caps_name():
@@ -439,15 +441,20 @@ def test_parsed_name_initials():
@pytest.mark.parametrize(
("input_author_name", "expected"),
- [("Lieber, Ed", "E. Lieber"),
- ('Lieber, Ed Viktor', "E. V. Lieber"),
- ('Lieber, Ed Jr.', "E. Lieber, Jr."),
- ('Lieber, Ed Victor Jr.', "E. V. Lieber, Jr."),
- ],
+ [
+ ("Lieber, Ed", "E. Lieber"),
+ ('Lieber, Ed Viktor', "E. V. Lieber"),
+ ('Lieber, Ed Jr.', "E. Lieber, Jr."),
+ ('Lieber, Ed Victor Jr.', "E. V. Lieber, Jr."),
+ ],
)
-def test_format_author_name_with_initials_when_first_name_is_similar_to_title(input_author_name, expected):
+def test_format_author_name_with_initials_when_first_name_is_similar_to_title(
+ input_author_name, expected
+):
- assert expected == format_name(input_author_name, initials_only=True, without_titles=True)
+ assert expected == format_name(
+ input_author_name, initials_only=True, without_titles=True
+ )
def test_parsed_wrong_names_and_not_fail():
@@ -485,9 +492,9 @@ def test_first_name_with_dash_is_printed_with_dash_and_initialized_correctly():
def test_first_name_initials_without_whitespace_is_initialized_correctly():
- assert format_name(
- "Miguel A-M.G. Garcia", initials_only=True
- ) == u"M. A. M. G. Garcia"
+ assert (
+ format_name("Miguel A-M.G. Garcia", initials_only=True) == u"M. A. M. G. Garcia"
+ )
def test_last_name_recognized_correctly_regression_test():
@@ -1348,48 +1355,52 @@ def test_generate_es_query_title_name():
name = "ed witten"
expected_query = {
'nested': {
- 'path': 'authors', 'query': {
+ 'path': 'authors',
+ 'query': {
'bool': {
'must': [
{
'match': {
u'authors.last_name': {
'operator': 'AND',
- 'query': u'Witten'
+ 'query': u'Witten',
}
}
- }, {
+ },
+ {
'bool': {
'should': [
{
'match_phrase_prefix': {
u'authors.first_name': {
'query': u'Ed',
- 'analyzer': 'names_analyzer'
+ 'analyzer': 'names_analyzer',
}
}
- }, {
+ },
+ {
'match': {
u'authors.first_name': {
'operator': 'AND',
'query': u'Ed',
- 'analyzer': 'names_initials_analyzer'
+ 'analyzer': 'names_initials_analyzer',
}
}
- }, {
+ },
+ {
'match': {
u'authors.full_name': {
'operator': 'AND',
- 'query': 'Ed Witten'
+ 'query': 'Ed Witten',
}
}
- }
+ },
]
}
- }
+ },
]
}
- }
+ },
}
}
parsed_name = ParsedName(name)
diff --git a/tests/test_parsers_arxiv.py b/tests/test_parsers_arxiv.py
index 5e17a5e..6e1938e 100644
--- a/tests/test_parsers_arxiv.py
+++ b/tests/test_parsers_arxiv.py
@@ -51,15 +51,23 @@ def test_latex_to_unicode_preserves_math():
def test_latex_to_unicode_preserves_braces_containing_more_than_one_char():
- expected = u"On the origin of the Type~{\\sc ii} spicules - dynamic 3D MHD simulations"
- result = ArxivParser.latex_to_unicode(u"On the origin of the Type~{\\sc ii} spicules - dynamic 3D MHD simulations")
+ expected = (
+ u"On the origin of the Type~{\\sc ii} spicules - dynamic 3D MHD simulations"
+ )
+ result = ArxivParser.latex_to_unicode(
+ u"On the origin of the Type~{\\sc ii} spicules - dynamic 3D MHD simulations"
+ )
assert result == expected
def test_latex_to_unicode_preserves_comments():
- expected = u"A 4% measurement of $H_0$ using the cumulative distribution of strong-lensing time delays in doubly-imaged quasars"
- result = ArxivParser.latex_to_unicode(u"A 4% measurement of $H_0$ using the cumulative distribution of strong-lensing time delays in doubly-imaged quasars")
+ expected = (u"A 4% measurement of $H_0$ using the cumulative"
+ u"distribution of strong-lensing time delays in doubly-imaged quasars")
+ result = ArxivParser.latex_to_unicode(
+ (u"A 4% measurement of $H_0$ using the cumulative"
+ u"distribution of strong-lensing time delays in doubly-imaged quasars")
+ )
assert result == expected
diff --git a/tests/test_parsers_crossref.py b/tests/test_parsers_crossref.py
index 2c12cc5..8fa8303 100644
--- a/tests/test_parsers_crossref.py
+++ b/tests/test_parsers_crossref.py
@@ -54,13 +54,16 @@ def get_parser_by_file(filename):
return CrossrefParser(aps_crossref)
-@pytest.fixture(scope='module', params=[
- ('2018.3804742.json', '2018.3804742_expected.yml'),
- ('tasc.2017.2776938.json', 'tasc.2017.2776938_expected.yml'),
- ('9781316535783.011.json', '9781316535783.011_expected.yml'),
- ('PhysRevB.33.3547.2.json', 'PhysRevB.33.3547.2_expected.yml'),
- ('s1463-4988(99)00060-3.json', 's1463-4988(99)00060-3_expected.yml'),
-])
+@pytest.fixture(
+ scope='module',
+ params=[
+ ('2018.3804742.json', '2018.3804742_expected.yml'),
+ ('tasc.2017.2776938.json', 'tasc.2017.2776938_expected.yml'),
+ ('9781316535783.011.json', '9781316535783.011_expected.yml'),
+ ('PhysRevB.33.3547.2.json', 'PhysRevB.33.3547.2_expected.yml'),
+ ('s1463-4988(99)00060-3.json', 's1463-4988(99)00060-3_expected.yml'),
+ ],
+)
def records(request):
return {
'crossref': get_parser_by_file(request.param[0]),
@@ -99,24 +102,20 @@ def test_data_completeness(records):
assert field in all_fields
-@pytest.mark.parametrize(
- 'field_name',
- REQUIRED_FIELDS
-)
+@pytest.mark.parametrize('field_name', REQUIRED_FIELDS)
def test_required_fields(field_name, records):
- '''Check every field in this list since all of them are required in a Crossref record'''
+ """Check every field in this list since all of them are required in a
+ Crossref record."""
result = getattr(records['crossref'], field_name)
expected = records['expected'][field_name]
assert result == expected
-@pytest.mark.parametrize(
- 'field_name',
- UNREQUIRED_FIELDS
-)
+@pytest.mark.parametrize('field_name', UNREQUIRED_FIELDS)
def test_unrequired_fields(field_name, records):
- '''Check if the field was parsed correctly only if the field exists in this record'''
+ """Check if the field was parsed correctly only if the field exists in this
+ record."""
if field_name in records['expected']:
result = getattr(records['crossref'], field_name)
expected = records['expected'][field_name]
diff --git a/tests/test_parsers_elsevier.py b/tests/test_parsers_elsevier.py
index 234e67e..fb79d0a 100644
--- a/tests/test_parsers_elsevier.py
+++ b/tests/test_parsers_elsevier.py
@@ -55,17 +55,20 @@ def get_parser_by_file(filename):
return ElsevierParser(aps_elsevier)
-@pytest.fixture(scope='module', params=[
- ('j.nima.2019.162787.xml', 'j.nima.2019.162787_expected.yml'),
- ('j.nuclphysa.2020.121991.xml', 'j.nuclphysa.2020.121991_expected.yml'),
- ('j.nima.2019.162728.xml', 'j.nima.2019.162728_expected.yml'),
- ('j.nimb.2019.04.063.xml', 'j.nimb.2019.04.063_expected.yml'),
- ('j.cpc.2020.107740.xml', 'j.cpc.2020.107740_expected.yml'),
- ('j.scib.2020.01.008.xml', 'j.scib.2020.01.008_expected.yml'),
- ('aphy.2001.6176.xml', 'aphy.2001.6176_expected.yml'),
- ('j.aim.2021.107831.xml', 'j.aim.2021.107831_expected.yml'),
- ('j.nuclphysa.2020.121992.xml', 'j.nuclphysa.2020.121992_expected.yml'),
-])
+@pytest.fixture(
+ scope='module',
+ params=[
+ ('j.nima.2019.162787.xml', 'j.nima.2019.162787_expected.yml'),
+ ('j.nuclphysa.2020.121991.xml', 'j.nuclphysa.2020.121991_expected.yml'),
+ ('j.nima.2019.162728.xml', 'j.nima.2019.162728_expected.yml'),
+ ('j.nimb.2019.04.063.xml', 'j.nimb.2019.04.063_expected.yml'),
+ ('j.cpc.2020.107740.xml', 'j.cpc.2020.107740_expected.yml'),
+ ('j.scib.2020.01.008.xml', 'j.scib.2020.01.008_expected.yml'),
+ ('aphy.2001.6176.xml', 'aphy.2001.6176_expected.yml'),
+ ('j.aim.2021.107831.xml', 'j.aim.2021.107831_expected.yml'),
+ ('j.nuclphysa.2020.121992.xml', 'j.nuclphysa.2020.121992_expected.yml'),
+ ],
+)
def records(request):
return {
'elsevier': get_parser_by_file(request.param[0]),
@@ -97,11 +100,7 @@ def records(request):
'journal_issue',
'is_conference_paper',
]
-FIELDS_TO_CHECK_SEPARATELY = [
- 'publication_date',
- 'documents',
- 'collaborations'
-]
+FIELDS_TO_CHECK_SEPARATELY = ['publication_date', 'documents', 'collaborations']
def test_data_completeness(records):
@@ -110,10 +109,7 @@ def test_data_completeness(records):
assert field in tested_fields
-@pytest.mark.parametrize(
- 'field_name',
- FIELDS_TO_CHECK
-)
+@pytest.mark.parametrize('field_name', FIELDS_TO_CHECK)
def test_field(field_name, records):
result = getattr(records['elsevier'], field_name)
expected = records['expected'][field_name]
@@ -148,8 +144,7 @@ def test_parse(records):
def test_attach_fulltext_document(records):
parser = records['elsevier']
parser.attach_fulltext_document(
- records['file_name'],
- 'http://example.org/{}'.format(records['file_name'])
+ records['file_name'], 'http://example.org/{}'.format(records['file_name'])
)
result = parser.parse()
assert result['documents'] == records['expected']['documents']
diff --git a/tests/test_parsers_jats.py b/tests/test_parsers_jats.py
index d222dab..a7c420b 100644
--- a/tests/test_parsers_jats.py
+++ b/tests/test_parsers_jats.py
@@ -55,13 +55,16 @@ def get_parser_by_file(filename):
return JatsParser(aps_jats)
-@pytest.fixture(scope='module', params=[
- ('PhysRevD.102.014505.xml', 'PhysRevD.102.014505_expected.yml'),
- ('PhysRevX.7.021022.xml', 'PhysRevX.7.021022_expected.yml'),
- ('PhysRevX.4.021018.xml', 'PhysRevX.4.021018_expected.yml'),
- ('PhysRevD.96.095036.xml', 'PhysRevD.96.095036_expected.yml'),
- ('PhysRevX.7.021021.xml', 'PhysRevX.7.021021_expected.yml'),
-])
+@pytest.fixture(
+ scope='module',
+ params=[
+ ('PhysRevD.102.014505.xml', 'PhysRevD.102.014505_expected.yml'),
+ ('PhysRevX.7.021022.xml', 'PhysRevX.7.021022_expected.yml'),
+ ('PhysRevX.4.021018.xml', 'PhysRevX.4.021018_expected.yml'),
+ ('PhysRevD.96.095036.xml', 'PhysRevD.96.095036_expected.yml'),
+ ('PhysRevX.7.021021.xml', 'PhysRevX.7.021021_expected.yml'),
+ ],
+)
def records(request):
return {
'jats': get_parser_by_file(request.param[0]),
@@ -105,10 +108,7 @@ def test_data_completeness(records):
assert field in tested_fields
-@pytest.mark.parametrize(
- 'field_name',
- FIELDS_TO_CHECK
-)
+@pytest.mark.parametrize('field_name', FIELDS_TO_CHECK)
def test_field(field_name, records):
result = getattr(records['jats'], field_name)
expected = records['expected'][field_name]
@@ -145,8 +145,7 @@ def test_parse(records):
def test_attach_fulltext_document(records):
parser = records['jats']
parser.attach_fulltext_document(
- records['file_name'],
- 'http://example.org/{}'.format(records['file_name'])
+ records['file_name'], 'http://example.org/{}'.format(records['file_name'])
)
result = parser.parse()
diff --git a/tests/test_record.py b/tests/test_record.py
index 31e65c3..1cdbb68 100644
--- a/tests/test_record.py
+++ b/tests/test_record.py
@@ -74,9 +74,7 @@ def test_get_value_allows_slices_in_paths():
def test_get_value_returns_none_if_inner_key_does_not_exist_on_string():
- record = {
- 'foo': 'bar'
- }
+ record = {'foo': 'bar'}
result = get_value(record, 'foo.value')
diff --git a/tests/test_urls.py b/tests/test_urls.py
index aac1c37..eccc167 100644
--- a/tests/test_urls.py
+++ b/tests/test_urls.py
@@ -54,7 +54,7 @@
'explicit netloc',
'with scheme, no netloc',
'without scheme, no netloc',
- ]
+ ],
)
def test_ensure_scheme(url, scheme, expected):
if scheme:
@@ -86,7 +86,7 @@ def test_ensure_scheme(url, scheme, expected):
'integer recid, scheme already present',
'string recid, no scheme',
'unicode url',
- ]
+ ],
)
def test_record_url_by_pattern(pattern, recid, expected):
assert record_url_by_pattern(pattern, recid) == expected