From e568d911e461246fef364fa94e79f3dfad0958ac Mon Sep 17 00:00:00 2001 From: Alex Sandro Date: Wed, 10 May 2017 10:23:43 -0300 Subject: [PATCH 1/3] Correcting decoding failure (considering lower case and upper case letters). --- htmlentities/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htmlentities/__init__.py b/htmlentities/__init__.py index 714a203..73f761f 100644 --- a/htmlentities/__init__.py +++ b/htmlentities/__init__.py @@ -18,7 +18,7 @@ def encode(source): def decode(source): - for entitie in re.findall('&(?:[a-z][a-z0-9]+);', source): + for entitie in re.findall('&(?:[a-z][a-z0-9]+);', source, re.I): entitie = entitie.replace('&', '') entitie = entitie.replace(';', '') source = source.replace('&%s;' % entitie, unichr(name2codepoint[entitie])) From e31d572ea3e293c4c3fdf3592c5dc96b61b77185 Mon Sep 17 00:00:00 2001 From: Alex Sandro Date: Wed, 10 May 2017 10:24:49 -0300 Subject: [PATCH 2/3] Test that proves the correction in decoding. --- tests/test_decoding.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_decoding.py b/tests/test_decoding.py index 45bfd28..7a1da4a 100644 --- a/tests/test_decoding.py +++ b/tests/test_decoding.py @@ -15,3 +15,13 @@ def test_should_decode_basic_entities(self): def test_should_decode_utf8_accents(self): self.assertEqual(u'é', htmlentities.decode('é')) self.assertEqual(u'ê', htmlentities.decode('ê')) + + def test_decode_complex_unicode_text(self): + """This test fails because the regular expression does not recognize U&""" + text = u"Übergroße Äpfel mit Würmern" + + # result string: Übergroße Äpfel mit Würmern + + result = htmlentities.decode(htmlentities.encode(text)) + + self.assertEquals(result, text) From af02a70787e62a7e2662e3a663bd20d2d163e487 Mon Sep 17 00:00:00 2001 From: Alex Sandro Date: Wed, 10 May 2017 10:44:38 -0300 Subject: [PATCH 3/3] Doc string... --- tests/test_decoding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_decoding.py b/tests/test_decoding.py index 7a1da4a..eade01e 100644 --- a/tests/test_decoding.py +++ b/tests/test_decoding.py @@ -17,7 +17,7 @@ def test_should_decode_utf8_accents(self): self.assertEqual(u'ê', htmlentities.decode('ê')) def test_decode_complex_unicode_text(self): - """This test fails because the regular expression does not recognize U&""" + """Validates the encoding and decoding of sample text""" text = u"Übergroße Äpfel mit Würmern" # result string: Übergroße Äpfel mit Würmern