diff --git a/.gitignore b/.gitignore index 3755775edc..3bd252bacc 100644 --- a/.gitignore +++ b/.gitignore @@ -150,6 +150,8 @@ cython_debug/ .idea/ backend/.idea/ +backend/census_historical_migration/data/ + # Data loading artifacts backend/data_distro/data_to_load/run_logs/Results_* backend/data_distro/data_to_load/run_logs/Lines_* diff --git a/backend/audit/validators.py b/backend/audit/validators.py index 0ee855d918..d0a5c823d3 100644 --- a/backend/audit/validators.py +++ b/backend/audit/validators.py @@ -247,6 +247,7 @@ def validate_use_of_gsa_migration_keyword(general_information, is_data_migration if not is_data_migration and settings.GSA_MIGRATION in [ general_information.get("auditee_email", ""), general_information.get("auditor_email", ""), + general_information.get("auditee_uei", ""), general_information.get("ein", ""), general_information.get("auditor_ein", ""), ]: diff --git a/backend/census_historical_migration/sac_general_lib/general_information.py b/backend/census_historical_migration/sac_general_lib/general_information.py index 1377dc5c94..3d23187c14 100644 --- a/backend/census_historical_migration/sac_general_lib/general_information.py +++ b/backend/census_historical_migration/sac_general_lib/general_information.py @@ -18,7 +18,8 @@ from ..sac_general_lib.utils import ( create_json_from_db_object, ) - +from jsonschema import validate +from jsonschema.exceptions import ValidationError from ..change_record import InspectionRecord, CensusRecord, GsaFacRecord @@ -142,6 +143,32 @@ def xform_entity_type(phrase): ] +def is_uei_valid(uei): + try: + with open(f"{settings.OUTPUT_BASE_DIR}/UeiSchema.json") as schema: + schema_json = json.load(schema) + uei_schema = schema_json.get("properties")["uei"] + validate(instance=uei, schema=uei_schema) + return True + except FileNotFoundError: + raise DataMigrationError( + f"UeiSchema.json file not found in {settings.OUTPUT_BASE_DIR}", + "missing_uei_schema_json", + ) + except json.decoder.JSONDecodeError: + raise DataMigrationError( + "UeiSchema.json file contains invalid JSON.", "invalid_uei_schema_json" + ) + + except ValidationError: + return False + + except Exception as e: + raise DataMigrationError( + f"Error validating Auditee UEI: {e}", "cannot_valid_auditee_uei" + ) + + def xform_update_multiple_eins_flag(audit_header): """Updates the multiple_eins_covered flag. This updates does not propagate to the database, it only updates the object. @@ -339,6 +366,22 @@ def xform_replace_empty_auditee_email(general_information): return general_information +def xform_replace_empty_or_invalid_auditee_uei_with_gsa_migration(audit_header): + """Replaces empty or invalid auditee UEI with GSA Migration keyword""" + # Transformation recorded. + if not (audit_header.UEI and is_uei_valid(audit_header.UEI)): + track_transformations( + "UEI", + audit_header.UEI, + "auditee_uei", + settings.GSA_MIGRATION, + "xform_replace_empty_or_invalid_auditee_uei_with_gsa_migration", + ) + audit_header.UEI = settings.GSA_MIGRATION + + return audit_header + + def track_transformations( census_column, census_value, gsa_field, gsa_value, transformation_functions ): @@ -403,6 +446,7 @@ def general_information(audit_header): xform_update_multiple_eins_flag(audit_header) xform_update_multiple_ueis_flag(audit_header) xform_update_entity_type(audit_header) + xform_replace_empty_or_invalid_auditee_uei_with_gsa_migration(audit_header) general_information = create_json_from_db_object(audit_header, mappings) transformations = [ xform_auditee_fiscal_period_start, diff --git a/backend/census_historical_migration/test_federal_awards_xforms.py b/backend/census_historical_migration/test_federal_awards_xforms.py index f478dd50a4..ae22d83727 100644 --- a/backend/census_historical_migration/test_federal_awards_xforms.py +++ b/backend/census_historical_migration/test_federal_awards_xforms.py @@ -19,6 +19,7 @@ xform_populate_default_passthrough_amount, xform_populate_default_passthrough_names_ids, xform_replace_invalid_extension, + xform_program_name, xform_is_passthrough_award, is_valid_extension, ) @@ -585,3 +586,25 @@ def test_match_numbers_mixed_empty_and_non_empty(self): self.assertEqual(transformed_names, names) self.assertEqual(transformed_ids, expected_ids) + + +class TestXformMissingProgramName(SimpleTestCase): + class AuditMock: + def __init__(self, program_name): + self.FEDERALPROGRAMNAME = program_name + + def test_with_normal_program_name(self): + """Test for missing program name""" + audits = [self.AuditMock("Some fake name")] + + xform_program_name(audits) + + self.assertEqual(audits[0].FEDERALPROGRAMNAME, "Some fake name") + + def test_with_missing_program_name(self): + """Test for missing program name""" + audits = [self.AuditMock("")] + + xform_program_name(audits) + + self.assertEqual(audits[0].FEDERALPROGRAMNAME, settings.GSA_MIGRATION) diff --git a/backend/census_historical_migration/test_general_information_xforms.py b/backend/census_historical_migration/test_general_information_xforms.py index 5fac782274..9403f356f4 100644 --- a/backend/census_historical_migration/test_general_information_xforms.py +++ b/backend/census_historical_migration/test_general_information_xforms.py @@ -1,11 +1,13 @@ from datetime import datetime, timedelta -from unittest.mock import patch +import json +from unittest.mock import mock_open, patch from django.conf import settings from django.test import SimpleTestCase from .sac_general_lib.general_information import ( AUDIT_TYPE_DICT, PERIOD_DICT, + is_uei_valid, xform_audit_period_covered, xform_audit_type, xform_auditee_fiscal_period_end, @@ -14,6 +16,7 @@ xform_entity_type, xform_replace_empty_auditor_email, xform_replace_empty_auditee_email, + xform_replace_empty_or_invalid_auditee_uei_with_gsa_migration, xform_replace_empty_or_invalid_auditor_ein_with_gsa_migration, xform_replace_empty_or_invalid_auditee_ein_with_gsa_migration, ) @@ -266,6 +269,104 @@ def test_missing_auditee_email(self): self.assertEqual(xform_replace_empty_auditee_email(input_data), expected_output) +class TestXformReplaceEmptyOrInvalidUEIs(SimpleTestCase): + + class MockAuditHeader: + + def __init__(self, UEI): + self.UEI = UEI + + def setUp(self): + self.audit_header = self.MockAuditHeader("") + self.valid_uei = "ZQGGHJH74DW7" + self.invalid_uei = "123" + self.uei_schema = { + "oneOf": [ + { + "allOf": [ + {"maxLength": 12, "minLength": 12}, + {"pattern": "^[A-HJ-NP-Z1-9][A-HJ-NP-Z0-9]+$"}, + { + "pattern": "^(?![A-HJ-NP-Z1-9]+[A-HJ-NP-Z0-9]*?[0-9]{9})[A-HJ-NP-Z0-9]*$" + }, + {"pattern": "^(?![0-9]{9})"}, + ], + "type": "string", + }, + {"const": "GSA_MIGRATION", "type": "string"}, + ] + } + + def test_auditee_uei_valid(self): + """Test that valid auditee EIN is not replaced.""" + self.audit_header.UEI = "ZQGGHJH74DW7" + result = xform_replace_empty_or_invalid_auditee_uei_with_gsa_migration( + self.audit_header + ) + print(result) + self.assertEqual(result.UEI, "ZQGGHJH74DW7") + + def test_auditee_uei_invalid_replaced(self): + """Test that invalid auditee UEI is replaced.""" + self.audit_header.UEI = "invalid_uei" + with patch( + "census_historical_migration.sac_general_lib.general_information.track_transformations" + ) as mock_track: + result = xform_replace_empty_or_invalid_auditee_uei_with_gsa_migration( + self.audit_header + ) + mock_track.assert_called_once_with( + "UEI", + "invalid_uei", + "auditee_uei", + settings.GSA_MIGRATION, + "xform_replace_empty_or_invalid_auditee_uei_with_gsa_migration", + ) + self.assertEqual(result.UEI, settings.GSA_MIGRATION) + + def test_auditee_uei_empty_replaced(self): + """Test that empty auditee UEI is replaced.""" + self.audit_header.UEI = "auditee_uei" + with patch( + "census_historical_migration.sac_general_lib.general_information.track_transformations" + ) as mock_track: + result = xform_replace_empty_or_invalid_auditee_uei_with_gsa_migration( + self.audit_header + ) + mock_track.assert_called_once() + self.assertEqual(result.UEI, settings.GSA_MIGRATION) + + @patch("builtins.open", side_effect=FileNotFoundError) + @patch( + "census_historical_migration.sac_general_lib.general_information.settings.OUTPUT_BASE_DIR", + "some/dir", + ) + def test_missing_schema_file(self, mock_open): + with self.assertRaises(DataMigrationError) as context: + is_uei_valid(self.valid_uei) + self.assertIn( + "UeiSchema.json file not found in some/dir", str(context.exception) + ) + + @patch("builtins.open", new_callable=mock_open, read_data="invalid json") + @patch( + "json.load", + side_effect=json.decoder.JSONDecodeError( + "Expecting value", "line 1 column 1 (char 0)", 0 + ), + ) + @patch( + "census_historical_migration.sac_general_lib.general_information.settings.OUTPUT_BASE_DIR", + "some/dir", + ) + def test_invalid_json_schema_file(self, mock_json_load, mock_open): + with self.assertRaises(DataMigrationError) as context: + is_uei_valid(self.valid_uei) + self.assertIn( + "UeiSchema.json file contains invalid JSON", str(context.exception) + ) + + class TestXformReplaceEmptyOrInvalidEins(SimpleTestCase): def test_auditor_ein_valid(self): """Test that valid auditor EIN is not replaced.""" diff --git a/backend/census_historical_migration/workbooklib/federal_awards.py b/backend/census_historical_migration/workbooklib/federal_awards.py index 8caf608c6d..5bb7dd8694 100644 --- a/backend/census_historical_migration/workbooklib/federal_awards.py +++ b/backend/census_historical_migration/workbooklib/federal_awards.py @@ -286,6 +286,30 @@ def xform_constructs_cluster_names( return (cluster_names, other_cluster_names, state_cluster_names) +def xform_program_name(audits): + """Default missing program_name to GSA_MIGRATION""" + change_records = [] + is_empty_program_name_found = False + + for audit in audits: + program_name = string_to_string(audit.FEDERALPROGRAMNAME) + if not program_name: + track_transformations( + "FEDERALPROGRAMNAME", + audit.FEDERALPROGRAMNAME, + "federal_program_name", + settings.GSA_MIGRATION, + ["xform_program_name"], + change_records, + ) + + is_empty_program_name_found = True + audit.FEDERALPROGRAMNAME = settings.GSA_MIGRATION + + if change_records and is_empty_program_name_found: + InspectionRecord.append_federal_awards_changes(change_records) + + def is_valid_prefix(prefix): """ Checks if the provided prefix is a valid CFDA prefix. @@ -535,6 +559,7 @@ def generate_federal_awards(audit_header, outfile): xform_missing_program_total(audits) xform_missing_findings_count(audits) xform_missing_amount_expended(audits) + xform_program_name(audits) xform_is_passthrough_award(audits) map_simple_columns(wb, mappings, audits)