Skip to content

Commit

Permalink
Merge pull request #3244 from GSA-TTS/main
Browse files Browse the repository at this point in the history
  • Loading branch information
jadudm authored Jan 20, 2024
2 parents 40828c7 + 09f2af2 commit dbe42c9
Show file tree
Hide file tree
Showing 20 changed files with 435 additions and 157 deletions.
2 changes: 2 additions & 0 deletions .github/ISSUE_TEMPLATE/offboarding.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ assignees: ''
- [ ] Make a PR to [remove the departing team member from the list of developers and managers](https://github.com/GSA-TTS/FAC/tree/main/terraform/meta/config.tf) with access to our spaces.
- [ ] [Remove the departing team member as a member of the FAC group in New Relic.](https://one.newrelic.com/admin-portal/organizations/users-list) (@GSA-TTS/fac-admins can do this)
- [ ] If they're leaving TTS altogether, also delete their account.
- [ ] Remove the user from any test accounts (e.g. the Google Group that is used for Cypress test accounts) if they are in that group.
- [ ] Remove from the API GG if they are a member.


**For product leads/owners, also...**
Expand Down
50 changes: 50 additions & 0 deletions .github/workflows/failed-data-migration-reprocessor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
name: Failed data migration reprocessor
on:
workflow_dispatch:
inputs:
environment:
required: true
type: choice
description: The environment the workflow should run on.
options:
- dev
- staging
- preview
year:
required: true
type: string
description: Provide audit year.
page_size:
required: true
type: string
description: Number of audit reports by page.
pages:
required: true
type: string
description: Comma-separated list of pages.
error_tag:
required: true
type: string
description: Error tag associated with failed migrations.

jobs:
historic-data-migrator:
name: Generate and disseminate historic data in ${{ inputs.environment }} database
runs-on: ubuntu-latest
environment: ${{ inputs.environment }}
env:
space: ${{ inputs.environment }}

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Run Django command to generate and disseminate historic data in ${{ inputs.environment }}
uses: cloud-gov/cg-cli-tools@main
with:
cf_username: ${{ secrets.CF_USERNAME }}
cf_password: ${{ secrets.CF_PASSWORD }}
cf_org: gsa-tts-oros-fac
cf_space: ${{ env.space }}
command: cf run-task gsa-fac -k 1G -m 1G --name failed_data_migration_reprocessor --command "python manage.py reprocess_failed_migration --year ${{ inputs.year }} --page_size ${{ inputs.page_size }} --pages ${{ inputs.pages }} --error_tag ${{ inputs.error_tag }}"
Binary file modified backend/audit/fixtures/basic.pdf
Binary file not shown.
10 changes: 6 additions & 4 deletions backend/audit/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,8 @@ def validate_single_audit_report_file_extension(file):

def validate_pdf_file_integrity(file):
"""Files must be readable PDFs"""
MIN_CHARARACTERS_IN_PDF = 6000

try:
reader = PdfReader(file)

Expand All @@ -538,13 +540,13 @@ def validate_pdf_file_integrity(file):
for page in reader.pages:
page_text = page.extract_text()
text_length += len(page_text)
# If we find any characters, we're content.
if text_length > 0:
# If we find enough characters, we're content.
if text_length >= MIN_CHARARACTERS_IN_PDF:
break

if text_length == 0:
if text_length < MIN_CHARARACTERS_IN_PDF:
raise ValidationError(
"We were unable to process the file you uploaded because it contains no readable text."
"We were unable to process the file you uploaded because it contains no readable text or too little text."
)

except ValidationError:
Expand Down
26 changes: 20 additions & 6 deletions backend/audit/views/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@
validate_secondary_auditors_json,
)

from dissemination.file_downloads import get_download_url, get_filename

logging.basicConfig(
format="%(asctime)s %(levelname)-8s %(module)s:%(lineno)d %(message)s"
)
Expand Down Expand Up @@ -204,11 +206,28 @@ def _save_audit_data(self, sac, form_section, audit_data):
setattr(sac, handler_info["field_name"], audit_data)
sac.save()

# this is marked as csrf_exempt to enable by-hand testing via tools like Postman. Should be removed when the frontend form is implemented!
@method_decorator(csrf_exempt)
def dispatch(self, *args, **kwargs):
return super(ExcelFileHandlerView, self).dispatch(*args, **kwargs)

def get(self, request, *args, **kwargs):
"""
Given a report ID and form section, redirect the caller to a download URL for the associated Excel file (if one exists)
"""
try:
report_id = kwargs["report_id"]
form_section = kwargs["form_section"]

sac = SingleAuditChecklist.objects.get(report_id=report_id)

filename = get_filename(sac, form_section)
download_url = get_download_url(filename)

return redirect(download_url)
except SingleAuditChecklist.DoesNotExist as err:
logger.warning("no SingleAuditChecklist found with report ID %s", report_id)
raise PermissionDenied() from err

def post(self, request, *_args, **kwargs):
"""
Handle Excel file upload:
Expand Down Expand Up @@ -270,11 +289,6 @@ def post(self, request, *_args, **kwargs):
class SingleAuditReportFileHandlerView(
SingleAuditChecklistAccessRequiredMixin, generic.View
):
# this is marked as csrf_exempt to enable by-hand testing via tools like Postman. Should be removed when the frontend form is implemented!
@method_decorator(csrf_exempt)
def dispatch(self, *args, **kwargs):
return super(SingleAuditReportFileHandlerView, self).dispatch(*args, **kwargs)

def post(self, request, *args, **kwargs):
try:
report_id = kwargs["report_id"]
Expand Down
62 changes: 25 additions & 37 deletions backend/census_historical_migration/end_to_end_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,44 +113,32 @@ def run_end_to_end(user, audit_header):
InspectionRecord.reset()
try:
sac = setup_sac(user, audit_header)
builder_loader = workbook_builder_loader(user, sac, audit_header)

if sac.general_information["audit_type"] == "alternative-compliance-engagement":
logger.info(
f"Skipping ACE audit: {audit_header.DBKEY} {audit_header.AUDITYEAR}"
)
raise DataMigrationError(
"Skipping ACE audit",
"skip_ace_audit",
)
else:
builder_loader = workbook_builder_loader(user, sac, audit_header)

for section, fun in sections_to_handlers.items():
builder_loader(fun, section)

record_dummy_pdf_object(sac, user)

errors = sac.validate_cross()

if errors.get("errors"):
raise CrossValidationError(
f"{errors.get('errors')}", "cross_validation"
)

step_through_certifications(sac, audit_header)

disseminate(sac)

MigrationResult.append_success(f"{sac.report_id} created")
record_migration_status(
audit_header.AUDITYEAR,
audit_header.DBKEY,
)
record_migration_transformations(
audit_header.AUDITYEAR,
audit_header.DBKEY,
sac.report_id,
)
for section, fun in sections_to_handlers.items():
builder_loader(fun, section)

record_dummy_pdf_object(sac, user)

errors = sac.validate_cross()

if errors.get("errors"):
raise CrossValidationError(f"{errors.get('errors')}", "cross_validation")

step_through_certifications(sac, audit_header)

disseminate(sac)

MigrationResult.append_success(f"{sac.report_id} created")
record_migration_status(
audit_header.AUDITYEAR,
audit_header.DBKEY,
)
record_migration_transformations(
audit_header.AUDITYEAR,
audit_header.DBKEY,
sac.report_id,
)
except Exception as exc:
handle_exception(exc, audit_header)

Expand Down
31 changes: 18 additions & 13 deletions backend/census_historical_migration/historic_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,26 @@ def load_historic_data_for_year(audit_year, page_size, pages):
logger.info(
f"Processing page {page_number} with {page.object_list.count()} submissions."
)
total_count, error_count = perform_migration(
user, page.object_list, total_count, error_count
)
log_results(error_count, total_count)

for submission in page.object_list:
# Migrate a single submission
run_end_to_end(user, submission)

MigrationResult.append_summary(submission.AUDITYEAR, submission.DBKEY)

total_count += 1

if MigrationResult.has_errors():
error_count += 1
if total_count % 5 == 0:
logger.info(f"Processed = {total_count}, Errors = {error_count}")

log_results(error_count, total_count)
def perform_migration(user, submissions, round_count, total_error_count):
total_count = round_count
error_count = total_error_count
for submission in submissions:
# Migrate a single submission
run_end_to_end(user, submission)
total_count += 1
if MigrationResult.has_errors():
error_count += 1
if total_count % 5 == 0:
logger.info(f"Processed = {total_count}, Errors = {error_count}")
MigrationResult.append_summary(submission.AUDITYEAR, submission.DBKEY)

return total_count, error_count


def log_results(error_count, total_count):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from census_historical_migration.process_failed_migration import (
reprocess_failed_reports,
)
from census_historical_migration.sac_general_lib.utils import (
normalize_year_string_or_exit,
)

from django.core.management.base import BaseCommand

import logging
import sys


logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)


class Command(BaseCommand):
help = """
Reprocess failed migration reports for a given year and error tag using pagination
Usage:
manage.py run_migration
--year <audit year>
--page_size <page size>
--pages <comma separated pages>
--error_tag <error tag>
"""

def add_arguments(self, parser):
parser.add_argument("--year", help="Audit Year")
parser.add_argument("--page_size", help="Number of records by page", type=int)
parser.add_argument("--pages", help="comma separated pages", type=str)
parser.add_argument("--error_tag", help="error tag", type=str)

def handle(self, *args, **options):
year = normalize_year_string_or_exit(options.get("year"))

try:
pages_str = options["pages"]
pages = list(map(lambda d: int(d), pages_str.split(",")))
except ValueError:
logger.error(f"Found a non-integer in pages '{pages_str}'")
sys.exit(-1)

reprocess_failed_reports(
year, options["page_size"], pages, options["error_tag"]
)
56 changes: 56 additions & 0 deletions backend/census_historical_migration/process_failed_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import logging

from .historic_data_loader import (
create_or_get_user,
log_results,
perform_migration,
)
from .models import ELECAUDITHEADER as AuditHeader, ReportMigrationStatus

from django.contrib.auth import get_user_model
from django.core.paginator import Paginator

logger = logging.getLogger(__name__)

User = get_user_model()


def reprocess_failed_reports(audit_year, page_size, pages, error_tag):
"""Iterates over and processes submissions for the given audit year"""
total_count = error_count = 0
user = create_or_get_user()
failed_migrations = (
ReportMigrationStatus.objects.filter(
audit_year=audit_year,
migration_status="FAILURE",
migrationerrordetail__tag=error_tag,
)
.order_by("id")
.distinct()
)

paginator = Paginator(failed_migrations, page_size)

logger.info(
f"{failed_migrations.count()} reports have failed migration with error tag {error_tag}"
)

for page_number in pages:
if page_number < paginator.num_pages:
page = paginator.page(page_number)
if page.object_list.count() > 0:
dbkey_list = [status.dbkey for status in page.object_list]

submissions = AuditHeader.objects.filter(
DBKEY__in=dbkey_list, AUDITYEAR=audit_year
)
logger.info(
f"Processing page {page_number} with {submissions.count() if submissions else 0} submissions."
)
total_count, error_count = perform_migration(
user, submissions, total_count, error_count
)
else:
logger.info(f"Skipping page {page_number} as it is out of range")

log_results(error_count, total_count)
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,13 @@ def xform_audit_type(general_information):
# Transformation recorded.
if general_information.get("audit_type"):
value_in_db = general_information["audit_type"]
general_information["audit_type"] = _census_audit_type(value_in_db.upper())
audit_type = _census_audit_type(value_in_db.upper())
if audit_type == AUDIT_TYPE_DICT["A"]:
raise DataMigrationError(
"Skipping ACE audit",
"skip_ace_audit",
)
general_information["audit_type"] = audit_type
track_transformations(
"AUDITTYPE",
value_in_db,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,16 +199,23 @@ def test_missing_period(self):
class TestXformAuditType(SimpleTestCase):
def test_valid_audit_type(self):
for key, value in AUDIT_TYPE_DICT.items():
with self.subTest(key=key):
general_information = {"audit_type": key}
result = xform_audit_type(general_information)
self.assertEqual(result["audit_type"], value)
if value != "alternative-compliance-engagement":
with self.subTest(key=key):
general_information = {"audit_type": key}
result = xform_audit_type(general_information)
self.assertEqual(result["audit_type"], value)

def test_invalid_audit_type(self):
general_information = {"audit_type": "invalid_key"}
with self.assertRaises(DataMigrationError):
xform_audit_type(general_information)

def test_ace_audit_type(self):
# audit type "alternative-compliance-engagement" is not supported at this time.
general_information = {"audit_type": AUDIT_TYPE_DICT["A"]}
with self.assertRaises(DataMigrationError):
xform_audit_type(general_information)

def test_missing_audit_type(self):
general_information = {}
with self.assertRaises(DataMigrationError):
Expand Down
Loading

0 comments on commit dbe42c9

Please sign in to comment.