diff --git a/htmlentities/__init__.py b/htmlentities/__init__.py index 714a203..73f761f 100644 --- a/htmlentities/__init__.py +++ b/htmlentities/__init__.py @@ -18,7 +18,7 @@ def encode(source): def decode(source): - for entitie in re.findall('&(?:[a-z][a-z0-9]+);', source): + for entitie in re.findall('&(?:[a-z][a-z0-9]+);', source, re.I): entitie = entitie.replace('&', '') entitie = entitie.replace(';', '') source = source.replace('&%s;' % entitie, unichr(name2codepoint[entitie])) diff --git a/tests/test_decoding.py b/tests/test_decoding.py index 45bfd28..eade01e 100644 --- a/tests/test_decoding.py +++ b/tests/test_decoding.py @@ -15,3 +15,13 @@ def test_should_decode_basic_entities(self): def test_should_decode_utf8_accents(self): self.assertEqual(u'é', htmlentities.decode('é')) self.assertEqual(u'ê', htmlentities.decode('ê')) + + def test_decode_complex_unicode_text(self): + """Validates the encoding and decoding of sample text""" + text = u"Übergroße Äpfel mit Würmern" + + # result string: Übergroße Äpfel mit Würmern + + result = htmlentities.decode(htmlentities.encode(text)) + + self.assertEquals(result, text)