Skip to content

Commit

Permalink
Merge pull request #3268 from GSA-TTS/main
Browse files Browse the repository at this point in the history
  • Loading branch information
jadudm authored Jan 24, 2024
2 parents ea96c70 + 23d1fca commit 7296d59
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def reprocess_failed_reports(audit_year, page_size, pages, error_tag):
)

for page_number in pages:
if page_number < paginator.num_pages:
if page_number <= paginator.num_pages:
page = paginator.page(page_number)
if page.object_list.count() > 0:
dbkey_list = [status.dbkey for status in page.object_list]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,9 @@ def _census_audit_type(s):
def xform_country(general_information, audit_header):
"""Transforms the country from Census format to FAC format."""
# Transformation to be documented.
auditor_country = general_information.get("auditor_country").upper()
auditor_country = string_to_string(
general_information.get("auditor_country")
).upper()
if auditor_country in ["US", "USA"]:
general_information["auditor_country"] = "USA"
elif auditor_country == "":
Expand Down
10 changes: 10 additions & 0 deletions backend/census_historical_migration/test_excel_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,13 @@ def test_sorting(self):
self.assertEqual(
[record.seq_number for record in sorted_records], ["1", "2", "10"]
)

def test_sorting_with_empty_field(self):
"""Test sorting with empty sort field."""
records = [
self.MockRecord(other="1", seq_number=""),
self.MockRecord(other="10", seq_number=""),
self.MockRecord(other="2", seq_number=""),
]
sorted_records = sort_by_field(records, "seq_number")
self.assertEqual([record.other for record in sorted_records], ["1", "10", "2"])
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import time

from util import (
trigger_migration_workflow,
Expand All @@ -7,6 +8,8 @@

# This script is a one-off to reprocess mdata migration for a failed
# migration attempt associated with a specific error tag.
# Command is `python reprocess_migration_cli_commands.py year total_records pages_per_instance instances error_tag`
# `python reprocess_migration_cli_commands.py 2022 42000 5 80 invalid_email_error`

parser = argparse.ArgumentParser(
description="Trigger data migration Github Actions through gh API calls"
Expand All @@ -24,7 +27,10 @@
args, workflow_name="failed-data-migration-reprocessor.yml"
)
print(args)
for cmd in cmds:
for ndx, cmd in enumerate(cmds):
print(f"# Instance {ndx + 1}")
cmd.append("-f")
cmd.append(f"error_tag={args.error_tag}")
print(" ".join(cmd))
subprocess.run(cmd) # nosec
time.sleep(15)
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import argparse
import subprocess # nosec
import time

from util import trigger_migration_workflow # nosec
from util import trigger_migration_workflow

# This throwaway script spits out code that can be
# copy-pasted into a bash script, or directly into the command line.
Expand All @@ -21,30 +22,32 @@

if __name__ == "__main__":
cmds = trigger_migration_workflow(args)
for cmd in cmds:
for ndx, cmd in enumerate(cmds):
print(f"# Instance {ndx + 1}")
cmd = " ".join(cmd)
print(cmd)
subprocess.run(cmd) # nosec
time.sleep(15)


# Examples

# With round numbers, it comes out nice and tidy.
# python generate_cli_commands.py 2022 42000 5 80
# python start_process_cli_commands.py 2022 42000 5 80
# Each instance must run 525 records.
# With 5 pages per instance, the page size is 105.
# There are 400 pages in total.
# This means we will attempt 42000 records.

# Off-by-one, and we make sure we don't drop that extra.
# python generate_cli_commands.py 2022 42001 5 80
# python start_process_cli_commands.py 2022 42001 5 80
# Each instance must run 526 records.
# With 5 pages per instance, the page size is 106.
# There are 397 pages in total.
# This means we will attempt 42082 records.

# More pages, and we get closer to the exact number.
# python generate_cli_commands.py 2022 42001 10 80
# python start_process_cli_commands.py 2022 42001 10 80
# Each instance must run 526 records.
# With 10 pages per instance, the page size is 53.
# There are 793 pages in total.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ def trigger_migration_workflow(
cmds = []
for ndx, page_set in enumerate(page_chunks):
# gh workflow run historic-data-migrator-with-pagination.yml -f environment=preview -f year=2022 -f page_size=1 -f pages=1
print(f"# Instance {ndx + 1}")
cmds.append(
[
"gh",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,4 +203,4 @@ def sort_by_field(records, sort_field):
"""
Sorts records by a specified field. The values of the field are converted to integers before sorting.
"""
return sorted(records, key=lambda record: int(getattr(record, sort_field)))
return sorted(records, key=lambda record: int(getattr(record, sort_field) or 0))

0 comments on commit 7296d59

Please sign in to comment.