From 581ca834ccde64266b3ec7516fcba460f3c0fcfb Mon Sep 17 00:00:00 2001 From: "Hassan D. M. Sambo" Date: Fri, 26 Apr 2024 14:30:05 -0400 Subject: [PATCH] 3701 address errors caused by bad or missing EINs (#3726) * #3701 Schema change to allow GSA_MIGRATION as EIN in general information section * #3701 Updated validator to ensure that gsa_migration is only allowed as ein for historical records * #3701 Code change to replace empty or invalid eins with gsa_migration keyword * #3701 Added test cases * #3701 Fix for failing test --- backend/audit/test_schemas.py | 2 +- backend/audit/validators.py | 2 + .../sac_general_lib/general_information.py | 45 ++++++++++++ .../test_general_information_xforms.py | 69 +++++++++++++++++++ backend/config/settings.py | 1 + .../sections/GeneralInformation.schema.json | 24 +++++-- .../GeneralInformationRequired.schema.json | 24 +++++-- backend/schemas/source/base/Base.libsonnet | 1 + .../GeneralInformation.schema.jsonnet | 18 ++++- 9 files changed, 175 insertions(+), 11 deletions(-) diff --git a/backend/audit/test_schemas.py b/backend/audit/test_schemas.py index 8cd1bdfeb8..f3592b8d46 100644 --- a/backend/audit/test_schemas.py +++ b/backend/audit/test_schemas.py @@ -129,7 +129,7 @@ def test_invalid_ein(self): with self.assertRaisesRegex( exceptions.ValidationError, - "does not match", + "is not valid under any of the given schemas", msg=f"ValidationError not raised with EIN = {bad_ein}", ): validate(instance, schema) diff --git a/backend/audit/validators.py b/backend/audit/validators.py index cf971374f3..0ee855d918 100644 --- a/backend/audit/validators.py +++ b/backend/audit/validators.py @@ -247,6 +247,8 @@ def validate_use_of_gsa_migration_keyword(general_information, is_data_migration if not is_data_migration and settings.GSA_MIGRATION in [ general_information.get("auditee_email", ""), general_information.get("auditor_email", ""), + general_information.get("ein", ""), + general_information.get("auditor_ein", ""), ]: raise ValidationError( _(f"{settings.GSA_MIGRATION} not permitted outside of migrations"), diff --git a/backend/census_historical_migration/sac_general_lib/general_information.py b/backend/census_historical_migration/sac_general_lib/general_information.py index dbf38b72f5..318865bc9b 100644 --- a/backend/census_historical_migration/sac_general_lib/general_information.py +++ b/backend/census_historical_migration/sac_general_lib/general_information.py @@ -336,6 +336,49 @@ def track_transformations( ) +def xform_replace_empty_or_invalid_auditor_ein_with_gsa_migration(general_information): + """Replaces empty or invalid auditor EIN with GSA Migration keyword""" + # Transformation recorded. + if not ( + general_information.get("auditor_ein") + and re.match( + settings.EMPLOYER_IDENTIFICATION_NUMBER, + general_information.get("auditor_ein"), + ) + ): + track_transformations( + "AUDITOR_EIN", + general_information.get("auditor_ein"), + "auditor_ein", + settings.GSA_MIGRATION, + "xform_replace_empty_or_invalid_auditor_ein_with_gsa_migration", + ) + general_information["auditor_ein"] = settings.GSA_MIGRATION + + return general_information + + +def xform_replace_empty_or_invalid_auditee_ein_with_gsa_migration(general_information): + """Replaces empty or invalid auditee EIN with GSA Migration keyword""" + # Transformation recorded. + if not ( + general_information.get("ein") + and re.match( + settings.EMPLOYER_IDENTIFICATION_NUMBER, general_information.get("ein") + ) + ): + track_transformations( + "EIN", + general_information.get("ein"), + "auditee_ein", + settings.GSA_MIGRATION, + "xform_replace_empty_or_invalid_auditee_ein_with_gsa_migration", + ) + general_information["ein"] = settings.GSA_MIGRATION + + return general_information + + def general_information(audit_header): """Generates general information JSON.""" xform_update_multiple_eins_flag(audit_header) @@ -349,6 +392,8 @@ def general_information(audit_header): xform_audit_type, xform_replace_empty_auditor_email, xform_replace_empty_auditee_email, + xform_replace_empty_or_invalid_auditor_ein_with_gsa_migration, + xform_replace_empty_or_invalid_auditee_ein_with_gsa_migration, ] for transform in transformations: diff --git a/backend/census_historical_migration/test_general_information_xforms.py b/backend/census_historical_migration/test_general_information_xforms.py index 4ffb49a4d9..5fac782274 100644 --- a/backend/census_historical_migration/test_general_information_xforms.py +++ b/backend/census_historical_migration/test_general_information_xforms.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta +from unittest.mock import patch from django.conf import settings from django.test import SimpleTestCase @@ -13,6 +14,8 @@ xform_entity_type, xform_replace_empty_auditor_email, xform_replace_empty_auditee_email, + xform_replace_empty_or_invalid_auditor_ein_with_gsa_migration, + xform_replace_empty_or_invalid_auditee_ein_with_gsa_migration, ) from .exception_utils import ( DataMigrationError, @@ -261,3 +264,69 @@ def test_missing_auditee_email(self): input_data = {} expected_output = {"auditee_email": settings.GSA_MIGRATION} self.assertEqual(xform_replace_empty_auditee_email(input_data), expected_output) + + +class TestXformReplaceEmptyOrInvalidEins(SimpleTestCase): + def test_auditor_ein_valid(self): + """Test that valid auditor EIN is not replaced.""" + info = {"auditor_ein": "123456789"} + result = xform_replace_empty_or_invalid_auditor_ein_with_gsa_migration(info) + self.assertEqual(result["auditor_ein"], "123456789") + + def test_auditor_ein_invalid_replaced(self): + """Test that invalid auditor EIN is replaced.""" + info = {"auditor_ein": "invalid_ein"} + with patch( + "census_historical_migration.sac_general_lib.general_information.track_transformations" + ) as mock_track: + result = xform_replace_empty_or_invalid_auditor_ein_with_gsa_migration(info) + mock_track.assert_called_once_with( + "AUDITOR_EIN", + "invalid_ein", + "auditor_ein", + settings.GSA_MIGRATION, + "xform_replace_empty_or_invalid_auditor_ein_with_gsa_migration", + ) + self.assertEqual(result["auditor_ein"], settings.GSA_MIGRATION) + + def test_auditor_ein_empty_replaced(self): + """Test that empty auditor EIN is replaced.""" + info = {"auditor_ein": ""} + with patch( + "census_historical_migration.sac_general_lib.general_information.track_transformations" + ) as mock_track: + result = xform_replace_empty_or_invalid_auditor_ein_with_gsa_migration(info) + mock_track.assert_called_once() + self.assertEqual(result["auditor_ein"], settings.GSA_MIGRATION) + + def test_auditee_ein_valid(self): + """Test that valid auditee EIN is not replaced.""" + info = {"ein": "123456789"} + result = xform_replace_empty_or_invalid_auditee_ein_with_gsa_migration(info) + self.assertEqual(result["ein"], "123456789") + + def test_auditee_ein_invalid_replaced(self): + """Test that invalid auditee EIN is replaced.""" + info = {"ein": "invalid_ein"} + with patch( + "census_historical_migration.sac_general_lib.general_information.track_transformations" + ) as mock_track: + result = xform_replace_empty_or_invalid_auditee_ein_with_gsa_migration(info) + mock_track.assert_called_once_with( + "EIN", + "invalid_ein", + "auditee_ein", + settings.GSA_MIGRATION, + "xform_replace_empty_or_invalid_auditee_ein_with_gsa_migration", + ) + self.assertEqual(result["ein"], settings.GSA_MIGRATION) + + def test_auditee_ein_empty_replaced(self): + """Test that empty auditee EIN is replaced.""" + info = {"ein": ""} + with patch( + "census_historical_migration.sac_general_lib.general_information.track_transformations" + ) as mock_track: + result = xform_replace_empty_or_invalid_auditee_ein_with_gsa_migration(info) + mock_track.assert_called_once() + self.assertEqual(result["ein"], settings.GSA_MIGRATION) diff --git a/backend/config/settings.py b/backend/config/settings.py index 59890ae10e..e3def504fa 100644 --- a/backend/config/settings.py +++ b/backend/config/settings.py @@ -546,6 +546,7 @@ REGEX_RD_EXTENSION = r"^RD[0-9]?$" REGEX_THREE_DIGIT_EXTENSION = r"^[0-9]{3}[A-Za-z]{0,1}$" REGEX_U_EXTENSION = r"^U[0-9]{2}$" +EMPLOYER_IDENTIFICATION_NUMBER = r"^[0-9]{9}$" GSA_MIGRATION = "GSA_MIGRATION" # There is a copy of `GSA_MIGRATION` in Base.libsonnet. If you change it here, change it there too. GSA_MIGRATION_INT = -999999999 # A copy of theses constants exists in schema/source/base/Base.libsonnet diff --git a/backend/schemas/output/sections/GeneralInformation.schema.json b/backend/schemas/output/sections/GeneralInformation.schema.json index d7406b4616..efd51e6c6b 100644 --- a/backend/schemas/output/sections/GeneralInformation.schema.json +++ b/backend/schemas/output/sections/GeneralInformation.schema.json @@ -193,8 +193,16 @@ "type": "string" }, "auditor_ein": { - "pattern": "^[0-9]{9}$", - "type": "string" + "oneOf": [ + { + "pattern": "^[0-9]{9}$", + "type": "string" + }, + { + "const": "GSA_MIGRATION", + "type": "string" + } + ] }, "auditor_ein_not_an_ssn_attestation": { "type": "boolean" @@ -294,8 +302,16 @@ "type": "string" }, "ein": { - "pattern": "^[0-9]{9}$", - "type": "string" + "oneOf": [ + { + "pattern": "^[0-9]{9}$", + "type": "string" + }, + { + "const": "GSA_MIGRATION", + "type": "string" + } + ] }, "ein_not_an_ssn_attestation": { "type": "boolean" diff --git a/backend/schemas/output/sections/GeneralInformationRequired.schema.json b/backend/schemas/output/sections/GeneralInformationRequired.schema.json index 9f1021cab8..e855855c76 100644 --- a/backend/schemas/output/sections/GeneralInformationRequired.schema.json +++ b/backend/schemas/output/sections/GeneralInformationRequired.schema.json @@ -303,8 +303,16 @@ "type": "string" }, "auditor_ein": { - "pattern": "^[0-9]{9}$", - "type": "string" + "oneOf": [ + { + "pattern": "^[0-9]{9}$", + "type": "string" + }, + { + "const": "GSA_MIGRATION", + "type": "string" + } + ] }, "auditor_ein_not_an_ssn_attestation": { "type": "boolean" @@ -404,8 +412,16 @@ "type": "string" }, "ein": { - "pattern": "^[0-9]{9}$", - "type": "string" + "oneOf": [ + { + "pattern": "^[0-9]{9}$", + "type": "string" + }, + { + "const": "GSA_MIGRATION", + "type": "string" + } + ] }, "ein_not_an_ssn_attestation": { "type": "boolean" diff --git a/backend/schemas/source/base/Base.libsonnet b/backend/schemas/source/base/Base.libsonnet index 98e8f44763..cfe79ccbb9 100644 --- a/backend/schemas/source/base/Base.libsonnet +++ b/backend/schemas/source/base/Base.libsonnet @@ -316,6 +316,7 @@ local Compound = { maxLength: 500, }, EmployerIdentificationNumber: Types.string { + # A python version of these regexes also exists in settings.py pattern: '^[0-9]{9}$', }, UniqueEntityIdentifier: { diff --git a/backend/schemas/source/sections/GeneralInformation.schema.jsonnet b/backend/schemas/source/sections/GeneralInformation.schema.jsonnet index 5e3cd9bacf..41c9f96514 100644 --- a/backend/schemas/source/sections/GeneralInformation.schema.jsonnet +++ b/backend/schemas/source/sections/GeneralInformation.schema.jsonnet @@ -24,7 +24,14 @@ Typechecks fields, but allows for empty data as well. Contains conditional check // Auditee information auditee_uei: Base.Compound.UniqueEntityIdentifier, - ein: Base.Compound.EmployerIdentificationNumber, + ein: { + oneOf: [ + Base.Compound.EmployerIdentificationNumber, + Types.string { + const: Base.Const.GSA_MIGRATION, + }, + ], + }, ein_not_an_ssn_attestation: Types.boolean, auditee_name: Types.string { maxLength: 100, @@ -58,7 +65,14 @@ Typechecks fields, but allows for empty data as well. Contains conditional check }, // Auditor information - auditor_ein: Base.Compound.EmployerIdentificationNumber, + auditor_ein: { + oneOf: [ + Base.Compound.EmployerIdentificationNumber, + Types.string { + const: Base.Const.GSA_MIGRATION, + }, + ], + }, auditor_ein_not_an_ssn_attestation: Types.boolean, auditor_firm_name: Types.string { maxLength: 100,