Skip to content

Commit

Permalink
fix: HL-1478 xml error (#3368)
Browse files Browse the repository at this point in the history
* fix: malformed h1 tag

* fix: invisible characters crashing xml generation
  • Loading branch information
rikuke authored Oct 4, 2024
1 parent 9bb81d9 commit 2b08d0c
Showing 3 changed files with 38 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -102,7 +102,7 @@ def _generate_decision_text_string(
else:
decision_section = DeniedDecisionProposalFactory()
decision_string = f"""<section id="paatos"><h1>Päätös</h1>{decision_section.template_decision_text}</section>\
<section id="paatoksenperustelut"><h1>Päätöksen perustelut/h1>{decision_section.template_justification_text}</section>""" # noqa
<section id="paatoksenperustelut"><h1>Päätöksen perustelut</h1>{decision_section.template_justification_text}</section>""" # noqa
decision_type = decision_section.decision_type
return replace_decision_template_placeholders(
decision_string, decision_type, application
13 changes: 13 additions & 0 deletions backend/benefit/applications/services/ahjo_xml_builder.py
Original file line number Diff line number Diff line change
@@ -79,10 +79,23 @@ def __init__(
super().__init__(application)
self.ahjo_decision_text = ahjo_decision_text

@staticmethod
def sanitize_text_input(text: str) -> str:
replacements = {"&nbsp;": " ", "\u200b": "", "\ufeff": "", "\u00a0": " "}

for target, replacement in replacements.items():
text = text.replace(target, replacement)

return text

def remove_non_breaking_spaces(self, text: str) -> str:
return text.replace("\u00A0", " ")

def generate_xml(self) -> AhjoXMLString:
xml_string = (
f"{XML_VERSION}<body>{self.ahjo_decision_text.decision_text}</body>"
)
xml_string = self.sanitize_text_input(xml_string)
self.validate_against_schema(
xml_string, self.load_xsd_as_string(XML_SCHEMA_PATH)
)
24 changes: 24 additions & 0 deletions backend/benefit/applications/tests/test_ahjo_xml_builder.py
Original file line number Diff line number Diff line change
@@ -314,3 +314,27 @@ def test_get_context_for_secret_xml_with_multiple_periods(
assert context["calculation_periods"][1].total_amount == int(sub_total_row_2.amount)
assert context["total_amount_row"] == total_eur_row
assert context["total_amount_row"].amount == int(total_eur_row.amount)


@pytest.mark.parametrize(
"input_text, expected_output",
[
("Hello&nbsp;World", "Hello World"), # &nbsp; should be replaced by space
(
"Zero\u200bWidth\u200bSpace",
"ZeroWidthSpace",
), # Zero-width space should be removed
("\ufeffBOM at start", "BOM at start"), # BOM should be removed
(
"Non-breaking\u00a0space",
"Non-breaking space",
), # Non-breaking space should be replaced with space
(
"&nbsp;\u200b\u00a0\u200bTest\u200b\u200b",
" Test",
), # Mixed invisible characters
("No special characters", "No special characters"), # No changes expected
],
)
def test_sanitize_text_input(input_text, expected_output):
assert AhjoPublicXMLBuilder.sanitize_text_input(input_text) == expected_output

0 comments on commit 2b08d0c

Please sign in to comment.