From 1d23a7aa69e11175183e760728823223f466dc7a Mon Sep 17 00:00:00 2001 From: rikuke <33894149+rikuke@users.noreply.github.com> Date: Thu, 16 Nov 2023 08:51:00 +0200 Subject: [PATCH] feat: optionally add a byte order mark to csv export (#2455) --- .../applications/api/v1/application_batch_views.py | 2 +- .../benefit/applications/services/csv_export_base.py | 8 +++++++- .../applications/tests/test_talpa_integration.py | 10 ++++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/backend/benefit/applications/api/v1/application_batch_views.py b/backend/benefit/applications/api/v1/application_batch_views.py index 6256c98190..90d0c15530 100755 --- a/backend/benefit/applications/api/v1/application_batch_views.py +++ b/backend/benefit/applications/api/v1/application_batch_views.py @@ -190,7 +190,7 @@ def talpa_export_batch(self, request, *args, **kwargs) -> StreamingHttpResponse: ) response = StreamingHttpResponse( - csv_service.get_csv_string_lines_generator(True), + csv_service.get_csv_string_lines_generator(True, True), content_type="text/csv", ) response["Content-Disposition"] = "attachment; filename={filename}.csv".format( diff --git a/backend/benefit/applications/services/csv_export_base.py b/backend/benefit/applications/services/csv_export_base.py index 6422e3f735..6db485c6c3 100644 --- a/backend/benefit/applications/services/csv_export_base.py +++ b/backend/benefit/applications/services/csv_export_base.py @@ -117,19 +117,25 @@ def get_csv_cell_list_lines_generator( yield line def get_csv_string_lines_generator( - self, remove_quotes: bool = False + self, remove_quotes: bool = False, add_bom: bool = False ) -> Generator[str, None, None]: """ Generate CSV's string lines using self.get_csv_cell_list_lines_generator(). :return: Generator which generates list of strings that each end with '\r\n'. Passing remove_quotes=True will disable quoting of values as it is required by the Talpa integration. + Passing add_bom=True will add a BOM (Byte Order Mark) at the beginning of the file. """ quoting = csv.QUOTE_NONE if remove_quotes else csv.QUOTE_NONNUMERIC io = StringIO() csv_writer = csv.writer(io, delimiter=self.CSV_DELIMITER, quoting=quoting) line_length_set = set() + + # Add BOM as the first item in the generator + if add_bom: + yield "\ufeff" + for line in self.get_csv_cell_list_lines_generator(): line_length_set.add(len(line)) assert len(line_length_set) == 1, "Each CSV line must have same colum count" diff --git a/backend/benefit/applications/tests/test_talpa_integration.py b/backend/benefit/applications/tests/test_talpa_integration.py index 628e33bca8..1e1a596baa 100644 --- a/backend/benefit/applications/tests/test_talpa_integration.py +++ b/backend/benefit/applications/tests/test_talpa_integration.py @@ -40,10 +40,12 @@ def test_talpa_csv_output(pruned_applications_csv_service_with_one_application): csv_lines = split_lines_at_semicolon( pruned_applications_csv_service_with_one_application.get_csv_string() ) + # BOM at the beginning of the file assert csv_lines[0][0] == '"Hakemusnumero"' - for idx, col in enumerate( - pruned_applications_csv_service_with_one_application.CSV_COLUMNS - ): + csv_columns = iter(pruned_applications_csv_service_with_one_application.CSV_COLUMNS) + next(csv_columns, None) # Skip the first element + + for idx, col in enumerate(csv_columns, start=1): assert csv_lines[0][idx] == f'"{col.heading}"' assert ( @@ -112,7 +114,7 @@ def test_write_talpa_csv_file( application.save() output_file = tmp_path / "output.csv" pruned_applications_csv_service_with_one_application.write_csv_file(output_file) - with open(output_file, encoding="utf-8") as f: + with open(output_file, encoding="utf-8-sig") as f: contents = f.read() assert contents.startswith('"Hakemusnumero";"Työnantajan tyyppi"') assert "äöÄÖtest" in contents