From 1d23a7aa69e11175183e760728823223f466dc7a Mon Sep 17 00:00:00 2001
From: rikuke <33894149+rikuke@users.noreply.github.com>
Date: Thu, 16 Nov 2023 08:51:00 +0200
Subject: [PATCH] feat: optionally add a byte order mark to csv export (#2455)

---
 .../applications/api/v1/application_batch_views.py     |  2 +-
 .../benefit/applications/services/csv_export_base.py   |  8 +++++++-
 .../applications/tests/test_talpa_integration.py       | 10 ++++++----
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/backend/benefit/applications/api/v1/application_batch_views.py b/backend/benefit/applications/api/v1/application_batch_views.py
index 6256c98190..90d0c15530 100755
--- a/backend/benefit/applications/api/v1/application_batch_views.py
+++ b/backend/benefit/applications/api/v1/application_batch_views.py
@@ -190,7 +190,7 @@ def talpa_export_batch(self, request, *args, **kwargs) -> StreamingHttpResponse:
         )
 
         response = StreamingHttpResponse(
-            csv_service.get_csv_string_lines_generator(True),
+            csv_service.get_csv_string_lines_generator(True, True),
             content_type="text/csv",
         )
         response["Content-Disposition"] = "attachment; filename={filename}.csv".format(
diff --git a/backend/benefit/applications/services/csv_export_base.py b/backend/benefit/applications/services/csv_export_base.py
index 6422e3f735..6db485c6c3 100644
--- a/backend/benefit/applications/services/csv_export_base.py
+++ b/backend/benefit/applications/services/csv_export_base.py
@@ -117,19 +117,25 @@ def get_csv_cell_list_lines_generator(
             yield line
 
     def get_csv_string_lines_generator(
-        self, remove_quotes: bool = False
+        self, remove_quotes: bool = False, add_bom: bool = False
     ) -> Generator[str, None, None]:
         """
         Generate CSV's string lines using self.get_csv_cell_list_lines_generator().
 
         :return: Generator which generates list of strings that each end with '\r\n'.
         Passing remove_quotes=True will disable quoting of values as it is required by the Talpa integration.
+        Passing add_bom=True will add a BOM (Byte Order Mark) at the beginning of the file.
         """
         quoting = csv.QUOTE_NONE if remove_quotes else csv.QUOTE_NONNUMERIC
 
         io = StringIO()
         csv_writer = csv.writer(io, delimiter=self.CSV_DELIMITER, quoting=quoting)
         line_length_set = set()
+
+        # Add BOM as the first item in the generator
+        if add_bom:
+            yield "\ufeff"
+
         for line in self.get_csv_cell_list_lines_generator():
             line_length_set.add(len(line))
             assert len(line_length_set) == 1, "Each CSV line must have same colum count"
diff --git a/backend/benefit/applications/tests/test_talpa_integration.py b/backend/benefit/applications/tests/test_talpa_integration.py
index 628e33bca8..1e1a596baa 100644
--- a/backend/benefit/applications/tests/test_talpa_integration.py
+++ b/backend/benefit/applications/tests/test_talpa_integration.py
@@ -40,10 +40,12 @@ def test_talpa_csv_output(pruned_applications_csv_service_with_one_application):
     csv_lines = split_lines_at_semicolon(
         pruned_applications_csv_service_with_one_application.get_csv_string()
     )
+    # BOM at the beginning of the file
     assert csv_lines[0][0] == '"Hakemusnumero"'
-    for idx, col in enumerate(
-        pruned_applications_csv_service_with_one_application.CSV_COLUMNS
-    ):
+    csv_columns = iter(pruned_applications_csv_service_with_one_application.CSV_COLUMNS)
+    next(csv_columns, None)  # Skip the first element
+
+    for idx, col in enumerate(csv_columns, start=1):
         assert csv_lines[0][idx] == f'"{col.heading}"'
 
     assert (
@@ -112,7 +114,7 @@ def test_write_talpa_csv_file(
     application.save()
     output_file = tmp_path / "output.csv"
     pruned_applications_csv_service_with_one_application.write_csv_file(output_file)
-    with open(output_file, encoding="utf-8") as f:
+    with open(output_file, encoding="utf-8-sig") as f:
         contents = f.read()
         assert contents.startswith('"Hakemusnumero";"Työnantajan tyyppi"')
         assert "äöÄÖtest" in contents