From 7eea8ace19adb07558776a80d371fed76f606155 Mon Sep 17 00:00:00 2001 From: "Hassan D. M. Sambo" Date: Tue, 23 Jan 2024 10:57:18 -0500 Subject: [PATCH 1/3] Ensure the process runs all pages (#3259) --- .../process_failed_migration.py | 2 +- .../throwaway_scripts/reprocess_migration_cli_commands.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/backend/census_historical_migration/process_failed_migration.py b/backend/census_historical_migration/process_failed_migration.py index a2f4422183..9e893baa1b 100644 --- a/backend/census_historical_migration/process_failed_migration.py +++ b/backend/census_historical_migration/process_failed_migration.py @@ -36,7 +36,7 @@ def reprocess_failed_reports(audit_year, page_size, pages, error_tag): ) for page_number in pages: - if page_number < paginator.num_pages: + if page_number <= paginator.num_pages: page = paginator.page(page_number) if page.object_list.count() > 0: dbkey_list = [status.dbkey for status in page.object_list] diff --git a/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py b/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py index c1191e46de..bc01f858c1 100644 --- a/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py +++ b/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py @@ -1,4 +1,5 @@ import argparse +import time from util import ( trigger_migration_workflow, @@ -7,6 +8,8 @@ # This script is a one-off to reprocess mdata migration for a failed # migration attempt associated with a specific error tag. +# Command is `python generate_cli_commands.py year total_records pages_per_instance instances error_tag` +# `python generate_cli_commands.py 2022 42000 5 80 invalid_email_error` parser = argparse.ArgumentParser( description="Trigger data migration Github Actions through gh API calls" @@ -24,7 +27,10 @@ args, workflow_name="failed-data-migration-reprocessor.yml" ) print(args) - for cmd in cmds: + for ndx, cmd in enumerate(cmds): + print(f"# Instance {ndx + 1}") + cmd.append("-f") cmd.append(f"error_tag={args.error_tag}") print(" ".join(cmd)) subprocess.run(cmd) # nosec + time.sleep(15) From 97e955057c30d18e5ed576b91c7fd52ff5e9dad3 Mon Sep 17 00:00:00 2001 From: "Hassan D. M. Sambo" Date: Tue, 23 Jan 2024 16:44:49 -0500 Subject: [PATCH 2/3] Code fix to avoid NoneType object has no attribute upper error (#3265) --- .../sac_general_lib/general_information.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/census_historical_migration/sac_general_lib/general_information.py b/backend/census_historical_migration/sac_general_lib/general_information.py index a96a392e1a..96a5d50a66 100644 --- a/backend/census_historical_migration/sac_general_lib/general_information.py +++ b/backend/census_historical_migration/sac_general_lib/general_information.py @@ -184,7 +184,9 @@ def _census_audit_type(s): def xform_country(general_information, audit_header): """Transforms the country from Census format to FAC format.""" # Transformation to be documented. - auditor_country = general_information.get("auditor_country").upper() + auditor_country = string_to_string( + general_information.get("auditor_country") + ).upper() if auditor_country in ["US", "USA"]: general_information["auditor_country"] = "USA" elif auditor_country == "": From 23d1fca7e89d7a54b5472aac62385c9cd8d9ef26 Mon Sep 17 00:00:00 2001 From: "Hassan D. M. Sambo" Date: Tue, 23 Jan 2024 18:03:34 -0500 Subject: [PATCH 3/3] 3266 invalid literal for int with base 10 (#3267) * fix for invalid literal for int * Added test case * Code improvement * Linting --- .../test_excel_creation.py | 10 ++++++++++ .../reprocess_migration_cli_commands.py | 4 ++-- .../throwaway_scripts/start_process_cli_commands.py | 13 ++++++++----- .../throwaway_scripts/util.py | 1 - .../workbooklib/excel_creation_utils.py | 2 +- 5 files changed, 21 insertions(+), 9 deletions(-) diff --git a/backend/census_historical_migration/test_excel_creation.py b/backend/census_historical_migration/test_excel_creation.py index 8249660733..8ceae113e9 100644 --- a/backend/census_historical_migration/test_excel_creation.py +++ b/backend/census_historical_migration/test_excel_creation.py @@ -222,3 +222,13 @@ def test_sorting(self): self.assertEqual( [record.seq_number for record in sorted_records], ["1", "2", "10"] ) + + def test_sorting_with_empty_field(self): + """Test sorting with empty sort field.""" + records = [ + self.MockRecord(other="1", seq_number=""), + self.MockRecord(other="10", seq_number=""), + self.MockRecord(other="2", seq_number=""), + ] + sorted_records = sort_by_field(records, "seq_number") + self.assertEqual([record.other for record in sorted_records], ["1", "10", "2"]) diff --git a/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py b/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py index bc01f858c1..f6e1ae1aac 100644 --- a/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py +++ b/backend/census_historical_migration/throwaway_scripts/reprocess_migration_cli_commands.py @@ -8,8 +8,8 @@ # This script is a one-off to reprocess mdata migration for a failed # migration attempt associated with a specific error tag. -# Command is `python generate_cli_commands.py year total_records pages_per_instance instances error_tag` -# `python generate_cli_commands.py 2022 42000 5 80 invalid_email_error` +# Command is `python reprocess_migration_cli_commands.py year total_records pages_per_instance instances error_tag` +# `python reprocess_migration_cli_commands.py 2022 42000 5 80 invalid_email_error` parser = argparse.ArgumentParser( description="Trigger data migration Github Actions through gh API calls" diff --git a/backend/census_historical_migration/throwaway_scripts/start_process_cli_commands.py b/backend/census_historical_migration/throwaway_scripts/start_process_cli_commands.py index c1adfb8a74..7b8052a26e 100644 --- a/backend/census_historical_migration/throwaway_scripts/start_process_cli_commands.py +++ b/backend/census_historical_migration/throwaway_scripts/start_process_cli_commands.py @@ -1,7 +1,8 @@ import argparse import subprocess # nosec +import time -from util import trigger_migration_workflow # nosec +from util import trigger_migration_workflow # This throwaway script spits out code that can be # copy-pasted into a bash script, or directly into the command line. @@ -21,30 +22,32 @@ if __name__ == "__main__": cmds = trigger_migration_workflow(args) - for cmd in cmds: + for ndx, cmd in enumerate(cmds): + print(f"# Instance {ndx + 1}") cmd = " ".join(cmd) print(cmd) subprocess.run(cmd) # nosec + time.sleep(15) # Examples # With round numbers, it comes out nice and tidy. -# python generate_cli_commands.py 2022 42000 5 80 +# python start_process_cli_commands.py 2022 42000 5 80 # Each instance must run 525 records. # With 5 pages per instance, the page size is 105. # There are 400 pages in total. # This means we will attempt 42000 records. # Off-by-one, and we make sure we don't drop that extra. -# python generate_cli_commands.py 2022 42001 5 80 +# python start_process_cli_commands.py 2022 42001 5 80 # Each instance must run 526 records. # With 5 pages per instance, the page size is 106. # There are 397 pages in total. # This means we will attempt 42082 records. # More pages, and we get closer to the exact number. -# python generate_cli_commands.py 2022 42001 10 80 +# python start_process_cli_commands.py 2022 42001 10 80 # Each instance must run 526 records. # With 10 pages per instance, the page size is 53. # There are 793 pages in total. diff --git a/backend/census_historical_migration/throwaway_scripts/util.py b/backend/census_historical_migration/throwaway_scripts/util.py index a011e6190c..7dce59ffed 100644 --- a/backend/census_historical_migration/throwaway_scripts/util.py +++ b/backend/census_historical_migration/throwaway_scripts/util.py @@ -31,7 +31,6 @@ def trigger_migration_workflow( cmds = [] for ndx, page_set in enumerate(page_chunks): # gh workflow run historic-data-migrator-with-pagination.yml -f environment=preview -f year=2022 -f page_size=1 -f pages=1 - print(f"# Instance {ndx + 1}") cmds.append( [ "gh", diff --git a/backend/census_historical_migration/workbooklib/excel_creation_utils.py b/backend/census_historical_migration/workbooklib/excel_creation_utils.py index 18e647d86c..4be7615e5f 100644 --- a/backend/census_historical_migration/workbooklib/excel_creation_utils.py +++ b/backend/census_historical_migration/workbooklib/excel_creation_utils.py @@ -203,4 +203,4 @@ def sort_by_field(records, sort_field): """ Sorts records by a specified field. The values of the field are converted to integers before sorting. """ - return sorted(records, key=lambda record: int(getattr(record, sort_field))) + return sorted(records, key=lambda record: int(getattr(record, sort_field) or 0))