From c6d44f0ae9c9e1a030a752c7b4ad65cc8869f0b4 Mon Sep 17 00:00:00 2001 From: Aditya Sridhar Date: Wed, 28 Aug 2024 17:47:23 -0400 Subject: [PATCH 1/4] chore: refactor common code across lambda functions and fixes incorrect typing --- .../src/functions/generate_treasury_report.py | 40 +++++++++++++++---- .../subrecipient_treasury_report_gen.py | 21 ++++++---- python/src/lib/s3_helper.py | 14 +++++-- python/src/lib/treasury_generation_common.py | 12 ++++++ python/src/lib/workbook_utils.py | 11 ++++- 5 files changed, 77 insertions(+), 21 deletions(-) diff --git a/python/src/functions/generate_treasury_report.py b/python/src/functions/generate_treasury_report.py index 7132eddc..37187f44 100644 --- a/python/src/functions/generate_treasury_report.py +++ b/python/src/functions/generate_treasury_report.py @@ -12,12 +12,13 @@ from openpyxl.worksheet.worksheet import Worksheet from pydantic import BaseModel -from src.lib.constants import OUTPUT_TEMPLATE_FILENAME_BY_PROJECT from src.lib.logging import get_logger, reset_contextvars from src.lib.s3_helper import download_s3_object, upload_generated_file_to_s3 from src.lib.treasury_generation_common import ( OrganizationObj, + OutputFileType, UserObj, + get_generated_output_file_key, get_output_template, ) from src.lib.workbook_utils import convert_xlsx_to_csv @@ -177,6 +178,10 @@ def process_event(payload: ProjectLambdaPayload, logger): with tempfile.NamedTemporaryFile() as file: # Download projects from S3 created_at = file_info.createdAt + + # TODO: Add a verification to ensure that the objectKey here is formatted correctly. + # Path must be: uploads/{organization_id}/{agency_id}/{reporting_period_id}/{upload_id}/{filename}.xlsm + download_s3_object( s3_client, os.environ["REPORTING_DATA_BUCKET_NAME"], @@ -200,13 +205,16 @@ def process_event(payload: ProjectLambdaPayload, logger): ### 5) Save data # Output XLSX file - file_destination_prefix = f"treasuryreports/{organization.id}/{organization.preferences.current_reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT[project_use_code]}" with tempfile.NamedTemporaryFile("w") as new_output_file: output_workbook.save(new_output_file.name) upload_generated_file_to_s3( client=s3_client, bucket=os.environ["REPORTING_DATA_BUCKET_NAME"], - key=f"{file_destination_prefix}.xlsx", + key=get_generated_output_file_key( + file_type=OutputFileType.XLSX, + project=project_use_code, + organization=organization, + ), file=new_output_file, ) # Output CSV file for treasury @@ -215,16 +223,24 @@ def process_event(payload: ProjectLambdaPayload, logger): upload_generated_file_to_s3( client=s3_client, bucket=os.environ["REPORTING_DATA_BUCKET_NAME"], - key=f"{file_destination_prefix}.csv", + key=get_generated_output_file_key( + file_type=OutputFileType.CSV, + project=project_use_code, + organization=organization, + ), file=csv_file, ) # Store project_id_agency_id to row number in a json file with tempfile.NamedTemporaryFile("w") as json_file: - json_file = json.dump(project_agency_id_to_row_map) + json.dump(project_agency_id_to_row_map, fp=json_file) upload_generated_file_to_s3( client=s3_client, bucket=os.environ["REPORTING_DATA_BUCKET_NAME"], - key=f"{file_destination_prefix}.json", + key=get_generated_output_file_key( + file_type=OutputFileType.JSON, + project=project_use_code, + organization=organization, + ), file=json_file, ) @@ -244,7 +260,11 @@ def download_output_file( download_s3_object( client=s3_client, bucket=os.environ["REPORTING_DATA_BUCKET_NAME"], - key=f"treasuryreports/{organization.id}/{organization.preferences.current_reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT[project_use_code]}.xlsx", + key=get_generated_output_file_key( + file_type=OutputFileType.XLSX, + project=project_use_code, + organization=organization, + ), destination=output_file, ) highest_row_num = max(project_agency_id_to_row_map.values()) @@ -272,7 +292,11 @@ def get_existing_output_metadata( download_s3_object( s3_client, os.environ["REPORTING_DATA_BUCKET_NAME"], - f"treasuryreports/{organization.id}/{organization.preferences.current_reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT[project_use_code]}.json", + get_generated_output_file_key( + file_type=OutputFileType.JSON, + project=project_use_code, + organization=organization, + ), existing_file, ) except Exception: diff --git a/python/src/functions/subrecipient_treasury_report_gen.py b/python/src/functions/subrecipient_treasury_report_gen.py index e033a56f..bc579413 100644 --- a/python/src/functions/subrecipient_treasury_report_gen.py +++ b/python/src/functions/subrecipient_treasury_report_gen.py @@ -11,12 +11,13 @@ from openpyxl import Workbook, load_workbook from pydantic import BaseModel -from src.lib.constants import OUTPUT_TEMPLATE_FILENAME_BY_PROJECT from src.lib.logging import get_logger, reset_contextvars from src.lib.s3_helper import download_s3_object, upload_generated_file_to_s3 from src.lib.treasury_generation_common import ( OrganizationObj, + OutputFileType, UserObj, + get_generated_output_file_key, get_output_template, ) from src.lib.workbook_utils import convert_xlsx_to_csv, find_last_populated_row @@ -126,7 +127,7 @@ def process_event( logger=logger, ) - upload_workbook(workbook, s3_client, organization_id, reporting_period_id) + upload_workbook(workbook, s3_client, payload.organization) output_file.close() recent_subrecipients_file.close() @@ -206,20 +207,22 @@ def get_most_recent_upload(subrecipient): def upload_workbook( workbook: Workbook, s3client: S3Client, - organization_id: int, - reporting_period_id: int, + organization: OrganizationObj, ): """ Handles upload of workbook to S3, both in xlsx and csv formats """ - filekey_without_extension = f"treasuryreports/{organization_id}/{reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT['Subrecipient']}" with tempfile.NamedTemporaryFile("w") as new_xlsx_file: workbook.save(new_xlsx_file.name) upload_generated_file_to_s3( s3client, os.environ["REPORTING_DATA_BUCKET_NAME"], - f"{filekey_without_extension}.xlsx", + get_generated_output_file_key( + file_type=OutputFileType.XLSX, + project="Subrecipient", + organization=organization, + ), new_xlsx_file, ) @@ -234,6 +237,10 @@ def upload_workbook( upload_generated_file_to_s3( s3client, os.environ["REPORTING_DATA_BUCKET_NAME"], - f"{filekey_without_extension}.csv", + get_generated_output_file_key( + file_type=OutputFileType.CSV, + project="Subrecipient", + organization=organization, + ), new_csv_file, ) diff --git a/python/src/lib/s3_helper.py b/python/src/lib/s3_helper.py index 71574c0b..2fbc7b0c 100644 --- a/python/src/lib/s3_helper.py +++ b/python/src/lib/s3_helper.py @@ -1,8 +1,11 @@ -from src.lib.logging import get_logger -from mypy_boto3_s3.client import S3Client -from typing import IO +import tempfile +from typing import IO, Union from urllib.parse import unquote_plus +from mypy_boto3_s3.client import S3Client + +from src.lib.logging import get_logger + def download_s3_object(client: S3Client, bucket: str, key: str, destination: IO[bytes]): """Downloads an S3 object to a local file. @@ -24,7 +27,10 @@ def download_s3_object(client: S3Client, bucket: str, key: str, destination: IO[ def upload_generated_file_to_s3( - client: S3Client, bucket: str, key: str, file: IO[bytes] + client: S3Client, + bucket: str, + key: str, + file: Union[IO[bytes], tempfile._TemporaryFileWrapper[str]], ): """Persists file to S3. diff --git a/python/src/lib/treasury_generation_common.py b/python/src/lib/treasury_generation_common.py index 36bf3153..23e6871a 100644 --- a/python/src/lib/treasury_generation_common.py +++ b/python/src/lib/treasury_generation_common.py @@ -1,4 +1,5 @@ import os +from enum import Enum from typing import IO from mypy_boto3_s3.client import S3Client @@ -22,6 +23,17 @@ class UserObj(BaseModel): id: int email: str +class OutputFileType(Enum): + XLSX = "xlsx" + CSV = "csv" + JSON = "json" + + +def get_generated_output_file_key( + file_type: OutputFileType, project: str, organization: OrganizationObj +) -> str: + return f"treasuryreports/{organization.id}/{organization.preferences.current_reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT[project]}.{file_type}" + def get_output_template( s3_client: S3Client, diff --git a/python/src/lib/workbook_utils.py b/python/src/lib/workbook_utils.py index 5eb9d3e3..6aa87ab9 100644 --- a/python/src/lib/workbook_utils.py +++ b/python/src/lib/workbook_utils.py @@ -1,5 +1,8 @@ -from typing import Optional, IO import csv +from tempfile import _TemporaryFileWrapper +from typing import IO, Optional, Union + +from openpyxl import Workbook from openpyxl.worksheet.worksheet import Worksheet """ @@ -15,7 +18,11 @@ def escape_for_csv(text: Optional[str]): return text -def convert_xlsx_to_csv(csv_file: IO[bytes], file: IO[bytes], num_rows: int): +def convert_xlsx_to_csv( + csv_file: Union[IO[bytes], _TemporaryFileWrapper], + file: Union[IO[bytes], Workbook], + num_rows: int, +): """ Convert xlsx file to csv. """ From df9bae45f57eda2599e2525dcb85a82feb1e16e2 Mon Sep 17 00:00:00 2001 From: Aditya Sridhar Date: Thu, 29 Aug 2024 07:32:43 -0400 Subject: [PATCH 2/4] fix: error relating to unexpected subscription in type --- python/src/lib/s3_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/lib/s3_helper.py b/python/src/lib/s3_helper.py index 2fbc7b0c..f336ec38 100644 --- a/python/src/lib/s3_helper.py +++ b/python/src/lib/s3_helper.py @@ -30,7 +30,7 @@ def upload_generated_file_to_s3( client: S3Client, bucket: str, key: str, - file: Union[IO[bytes], tempfile._TemporaryFileWrapper[str]], + file: Union[IO[bytes], tempfile._TemporaryFileWrapper], ): """Persists file to S3. From 95eaaebcaf551790e480b5f7346b0b1cd06fbc18 Mon Sep 17 00:00:00 2001 From: Aditya Sridhar Date: Thu, 29 Aug 2024 07:48:31 -0400 Subject: [PATCH 3/4] fix: tests --- .../lib/test_subrecipient_treasury_report_gen.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/python/tests/src/lib/test_subrecipient_treasury_report_gen.py b/python/tests/src/lib/test_subrecipient_treasury_report_gen.py index 703dec61..805ce406 100644 --- a/python/tests/src/lib/test_subrecipient_treasury_report_gen.py +++ b/python/tests/src/lib/test_subrecipient_treasury_report_gen.py @@ -15,6 +15,7 @@ upload_workbook, write_subrecipients_to_workbook, ) +from src.lib.treasury_generation_common import OrganizationObj class TestHandleNoEventOrNoContext: @@ -281,13 +282,19 @@ def test_upload_workbook( mock_s3_client = MagicMock() workbook = Workbook() workbook.create_sheet(WORKSHEET_NAME) - organization_id = 99 - reporting_period_id = 88 - upload_template_location_minus_filetype = f"treasuryreports/{organization_id}/{reporting_period_id}/CPFSubrecipientTemplate" + organization_raw = { + "id": 99, + "preferences": {"current_reporting_period_id": 88}, + } + organization = OrganizationObj.model_validate(organization_raw) + + upload_template_location_minus_filetype = ( + "treasuryreports/99/88/CPFSubrecipientTemplate" + ) upload_template_xlsx_key = f"{upload_template_location_minus_filetype}.xlsx" upload_template_csv_key = f"{upload_template_location_minus_filetype}.csv" - upload_workbook(workbook, mock_s3_client, organization_id, reporting_period_id) + upload_workbook(workbook, mock_s3_client, organization) mock_convert_xlsx_to_csv.assert_called_once() From 892121dd20c964fec764813bee6a20ff69d767b2 Mon Sep 17 00:00:00 2001 From: Aditya Sridhar Date: Thu, 29 Aug 2024 07:58:35 -0400 Subject: [PATCH 4/4] fix: ensure enum value is used for building string --- python/src/lib/treasury_generation_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/lib/treasury_generation_common.py b/python/src/lib/treasury_generation_common.py index 23e6871a..bcea0343 100644 --- a/python/src/lib/treasury_generation_common.py +++ b/python/src/lib/treasury_generation_common.py @@ -32,7 +32,7 @@ class OutputFileType(Enum): def get_generated_output_file_key( file_type: OutputFileType, project: str, organization: OrganizationObj ) -> str: - return f"treasuryreports/{organization.id}/{organization.preferences.current_reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT[project]}.{file_type}" + return f"treasuryreports/{organization.id}/{organization.preferences.current_reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT[project]}.{file_type.value}" def get_output_template(