Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: refactor common code across lambda functions and fixes incorrect typing #403

Merged
merged 5 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 32 additions & 8 deletions python/src/functions/generate_treasury_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@
from openpyxl.worksheet.worksheet import Worksheet
from pydantic import BaseModel

from src.lib.constants import OUTPUT_TEMPLATE_FILENAME_BY_PROJECT
from src.lib.logging import get_logger, reset_contextvars
from src.lib.s3_helper import download_s3_object, upload_generated_file_to_s3
from src.lib.treasury_generation_common import (
OrganizationObj,
OutputFileType,
UserObj,
get_generated_output_file_key,
get_output_template,
)
from src.lib.workbook_utils import convert_xlsx_to_csv
Expand Down Expand Up @@ -177,6 +178,10 @@ def process_event(payload: ProjectLambdaPayload, logger):
with tempfile.NamedTemporaryFile() as file:
# Download projects from S3
created_at = file_info.createdAt

# TODO: Add a verification to ensure that the objectKey here is formatted correctly.
# Path must be: uploads/{organization_id}/{agency_id}/{reporting_period_id}/{upload_id}/{filename}.xlsm
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could potentially borrow logic here from matchRegex in processValidationJson, which has the same key structure

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯 - will address in a separate PR


download_s3_object(
s3_client,
os.environ["REPORTING_DATA_BUCKET_NAME"],
Expand All @@ -200,13 +205,16 @@ def process_event(payload: ProjectLambdaPayload, logger):

### 5) Save data
# Output XLSX file
file_destination_prefix = f"treasuryreports/{organization.id}/{organization.preferences.current_reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT[project_use_code]}"
with tempfile.NamedTemporaryFile("w") as new_output_file:
output_workbook.save(new_output_file.name)
upload_generated_file_to_s3(
client=s3_client,
bucket=os.environ["REPORTING_DATA_BUCKET_NAME"],
key=f"{file_destination_prefix}.xlsx",
key=get_generated_output_file_key(
file_type=OutputFileType.XLSX,
project=project_use_code,
organization=organization,
),
file=new_output_file,
)
# Output CSV file for treasury
Expand All @@ -215,16 +223,24 @@ def process_event(payload: ProjectLambdaPayload, logger):
upload_generated_file_to_s3(
client=s3_client,
bucket=os.environ["REPORTING_DATA_BUCKET_NAME"],
key=f"{file_destination_prefix}.csv",
key=get_generated_output_file_key(
file_type=OutputFileType.CSV,
project=project_use_code,
organization=organization,
),
file=csv_file,
)
# Store project_id_agency_id to row number in a json file
with tempfile.NamedTemporaryFile("w") as json_file:
json_file = json.dump(project_agency_id_to_row_map)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vshia would be great to get your input here. I updated this based on typing errors I noticed. However I wasn't sure if this breaks any of the existing functionality.

json.dump(project_agency_id_to_row_map, fp=json_file)
upload_generated_file_to_s3(
client=s3_client,
bucket=os.environ["REPORTING_DATA_BUCKET_NAME"],
key=f"{file_destination_prefix}.json",
key=get_generated_output_file_key(
file_type=OutputFileType.JSON,
project=project_use_code,
organization=organization,
),
file=json_file,
)

Expand All @@ -244,7 +260,11 @@ def download_output_file(
download_s3_object(
client=s3_client,
bucket=os.environ["REPORTING_DATA_BUCKET_NAME"],
key=f"treasuryreports/{organization.id}/{organization.preferences.current_reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT[project_use_code]}.xlsx",
key=get_generated_output_file_key(
file_type=OutputFileType.XLSX,
project=project_use_code,
organization=organization,
),
destination=output_file,
)
highest_row_num = max(project_agency_id_to_row_map.values())
Expand Down Expand Up @@ -272,7 +292,11 @@ def get_existing_output_metadata(
download_s3_object(
s3_client,
os.environ["REPORTING_DATA_BUCKET_NAME"],
f"treasuryreports/{organization.id}/{organization.preferences.current_reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT[project_use_code]}.json",
get_generated_output_file_key(
file_type=OutputFileType.JSON,
project=project_use_code,
organization=organization,
),
existing_file,
)
except Exception:
Expand Down
21 changes: 14 additions & 7 deletions python/src/functions/subrecipient_treasury_report_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@
from openpyxl import Workbook, load_workbook
from pydantic import BaseModel

from src.lib.constants import OUTPUT_TEMPLATE_FILENAME_BY_PROJECT
from src.lib.logging import get_logger, reset_contextvars
from src.lib.s3_helper import download_s3_object, upload_generated_file_to_s3
from src.lib.treasury_generation_common import (
OrganizationObj,
OutputFileType,
UserObj,
get_generated_output_file_key,
get_output_template,
)
from src.lib.workbook_utils import convert_xlsx_to_csv, find_last_populated_row
Expand Down Expand Up @@ -126,7 +127,7 @@ def process_event(
logger=logger,
)

upload_workbook(workbook, s3_client, organization_id, reporting_period_id)
upload_workbook(workbook, s3_client, payload.organization)

output_file.close()
recent_subrecipients_file.close()
Expand Down Expand Up @@ -206,20 +207,22 @@ def get_most_recent_upload(subrecipient):
def upload_workbook(
workbook: Workbook,
s3client: S3Client,
organization_id: int,
reporting_period_id: int,
organization: OrganizationObj,
):
"""
Handles upload of workbook to S3, both in xlsx and csv formats
"""
filekey_without_extension = f"treasuryreports/{organization_id}/{reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT['Subrecipient']}"

with tempfile.NamedTemporaryFile("w") as new_xlsx_file:
workbook.save(new_xlsx_file.name)
upload_generated_file_to_s3(
s3client,
os.environ["REPORTING_DATA_BUCKET_NAME"],
f"{filekey_without_extension}.xlsx",
get_generated_output_file_key(
file_type=OutputFileType.XLSX,
project="Subrecipient",
organization=organization,
),
new_xlsx_file,
)

Expand All @@ -234,6 +237,10 @@ def upload_workbook(
upload_generated_file_to_s3(
s3client,
os.environ["REPORTING_DATA_BUCKET_NAME"],
f"{filekey_without_extension}.csv",
get_generated_output_file_key(
file_type=OutputFileType.CSV,
project="Subrecipient",
organization=organization,
),
new_csv_file,
)
14 changes: 10 additions & 4 deletions python/src/lib/s3_helper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from src.lib.logging import get_logger
from mypy_boto3_s3.client import S3Client
from typing import IO
import tempfile
from typing import IO, Union
from urllib.parse import unquote_plus

from mypy_boto3_s3.client import S3Client

from src.lib.logging import get_logger


def download_s3_object(client: S3Client, bucket: str, key: str, destination: IO[bytes]):
"""Downloads an S3 object to a local file.
Expand All @@ -24,7 +27,10 @@ def download_s3_object(client: S3Client, bucket: str, key: str, destination: IO[


def upload_generated_file_to_s3(
client: S3Client, bucket: str, key: str, file: IO[bytes]
client: S3Client,
bucket: str,
key: str,
file: Union[IO[bytes], tempfile._TemporaryFileWrapper[str]],
):
"""Persists file to S3.

Expand Down
12 changes: 12 additions & 0 deletions python/src/lib/treasury_generation_common.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from enum import Enum
from typing import IO

from mypy_boto3_s3.client import S3Client
Expand All @@ -22,6 +23,17 @@ class UserObj(BaseModel):
id: int
email: str

class OutputFileType(Enum):
XLSX = "xlsx"
CSV = "csv"
JSON = "json"


def get_generated_output_file_key(
file_type: OutputFileType, project: str, organization: OrganizationObj
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any appetite for making project values an Enum too? I know we're grabbing from the dictionary below so perhaps not strictly necessary but could make the flow here more legible?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯 - I'll create a separate PR with this updated

) -> str:
return f"treasuryreports/{organization.id}/{organization.preferences.current_reporting_period_id}/{OUTPUT_TEMPLATE_FILENAME_BY_PROJECT[project]}.{file_type}"


def get_output_template(
s3_client: S3Client,
Expand Down
11 changes: 9 additions & 2 deletions python/src/lib/workbook_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from typing import Optional, IO
import csv
from tempfile import _TemporaryFileWrapper
from typing import IO, Optional, Union

from openpyxl import Workbook
from openpyxl.worksheet.worksheet import Worksheet

"""
Expand All @@ -15,7 +18,11 @@ def escape_for_csv(text: Optional[str]):
return text


def convert_xlsx_to_csv(csv_file: IO[bytes], file: IO[bytes], num_rows: int):
def convert_xlsx_to_csv(
csv_file: Union[IO[bytes], _TemporaryFileWrapper],
file: Union[IO[bytes], Workbook],
num_rows: int,
):
"""
Convert xlsx file to csv.
"""
Expand Down
Loading