From 05cfb6bd0ce1e6090607849124e5e81daddb134b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= Date: Mon, 8 Jan 2018 10:52:45 +0100 Subject: [PATCH 1/2] =?UTF-8?q?tests:=20remove=20xmlns=20from=20test=5Fref?= =?UTF-8?q?erences=20in=20edp=20Due=20to=20scrapy/parsel#99=20test=20becam?= =?UTF-8?q?e=20invalid,=20this=20removes=20the=20namespace=20declarations?= =?UTF-8?q?=20from=20raw=5Freferences.=20Signed-off-by:=20Szymon=20=C5=81o?= =?UTF-8?q?paciuk=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/test_edp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_edp.py b/tests/unit/test_edp.py index 3ff75717..c3b78e47 100644 --- a/tests/unit/test_edp.py +++ b/tests/unit/test_edp.py @@ -587,7 +587,7 @@ def test_references(record_references_only): 'journal_title': u'J. Nucl. Radiochem. Sci.', 'journal_volume': u'10', 'number': u'5a', - 'raw_reference': u'R.V. Krishnan, G. Panneerselvam, P. Manikandan M.P. Antony, K. Nagarajan, J. Nucl. Radiochem. Sci., 10.1, 19\u201326 (2009).', + 'raw_reference': u'R.V. Krishnan, G. Panneerselvam, P. Manikandan M.P. Antony, K. Nagarajan, J. Nucl. Radiochem. Sci., 10.1, 19\u201326 (2009).', 'year': u'2009' } From 55f2f7ef3cfb85a6a56c03dc69849b3085538d8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= Date: Mon, 8 Jan 2018 11:09:45 +0100 Subject: [PATCH 2/2] =?UTF-8?q?parsers:=20remove=20cleanup=5Fnamespaces=20?= =?UTF-8?q?from=20JATS=20Unnecessary,=20as=20new=20version=20of=20scrapy?= =?UTF-8?q?=20handles=20it.=20Signed-off-by:=20Szymon=20=C5=81opaciuk=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hepcrawl/parsers/jats.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/hepcrawl/parsers/jats.py b/hepcrawl/parsers/jats.py index 6a1aa524..e8a29c0d 100644 --- a/hepcrawl/parsers/jats.py +++ b/hepcrawl/parsers/jats.py @@ -20,8 +20,6 @@ from inspire_utils.date import PartialDate from inspire_utils.helpers import maybe_int, remove_tags -from lxml import etree # needed until a new release of parsel is made - from ..utils import get_first, get_node @@ -459,7 +457,6 @@ def get_root_node(jats_record): else: root = jats_record root.remove_namespaces() - etree.cleanup_namespaces(root.root) return root