diff --git a/backend/dissemination/forms.py b/backend/dissemination/forms.py index 2271cbe227..760e0cca0c 100644 --- a/backend/dissemination/forms.py +++ b/backend/dissemination/forms.py @@ -2,9 +2,43 @@ class SearchForm(forms.Form): - AY_choices = ( + # Multiple choice field mappings + findings_field_mapping = { + "field_name": [ + "all_findings", + "is_modified_opinion", + "is_other_findings", + "is_material_weakness", + "is_significant_deficiency", + "is_other_matters", + "is_questioned_costs", + "is_repeat_finding", + ], + "friendly_name": [ + "Any findings", + "Modified opinion", + "Other findings", + "Material weakness", + "Significant deficiency", + "Other matters", + "Questioned costs", + "Repeat finding", + ], + } + + # Multiple choice field Tuples. "choices" variable in field declaration. + AY_choices = (("all_years", "All years"),) + tuple( (x, str(x)) for x in reversed(range(2016, 2024)) - ) # ((2016, "2016"), (2017, "2017"), ..., (2023, "2023")) + ) + findings_choices = list(zip(*findings_field_mapping.values())) + direct_funding_choices = ( + ("direct_funding", "Direct funding"), + ("passthrough_funding", "Passthrough funding"), + ) + major_program_choices = ( + (True, "Y"), + (False, "N"), + ) # Query params entity_name = forms.CharField(required=False) @@ -14,7 +48,16 @@ class SearchForm(forms.Form): end_date = forms.DateField(required=False) cog_or_oversight = forms.CharField(required=False) agency_name = forms.CharField(required=False) - audit_year = forms.MultipleChoiceField(choices=AY_choices, required=False) + audit_year = forms.MultipleChoiceField( + choices=AY_choices, initial=[2023], required=False + ) + findings = forms.MultipleChoiceField(choices=findings_choices, required=False) + direct_funding = forms.MultipleChoiceField( + choices=direct_funding_choices, required=False + ) + major_program = forms.MultipleChoiceField( + choices=major_program_choices, required=False + ) auditee_state = forms.CharField(required=False) # Display params @@ -22,3 +65,72 @@ class SearchForm(forms.Form): page = forms.CharField(required=False) order_by = forms.CharField(required=False) order_direction = forms.CharField(required=False) + + # Variables for cleaning functions + text_input_delimiters = [ + ",", + ":", + ";", + "-", + " ", + ] + + def clean_aln(self): + """ + Clean up the ALN field. Replace common separators with a newline. + Split on the newlines. Strip all the resulting elements. + """ + text_input = self.cleaned_data["aln"] + for delimiter in self.text_input_delimiters: + text_input = text_input.replace(delimiter, "\n") + text_input = [x.strip() for x in text_input.splitlines()] + return text_input + + def clean_entity_name(self): + """ + Clean the name field. We can't trust that separators aren't a part of a name somewhere, + so just split on newlines. + """ + text_input = self.cleaned_data["entity_name"] + return text_input.splitlines() + + def clean_uei_or_ein(self): + """ + Clean up the UEI/EIN field. Replace common separators with a newline. + Split on the newlines. Strip all the resulting elements. + """ + text_input = self.cleaned_data["uei_or_ein"] + for delimiter in self.text_input_delimiters: + text_input = text_input.replace(delimiter, "\n") + text_input = [x.strip() for x in text_input.splitlines()] + return text_input + + def clean_audit_year(self): + """ + If "All years" is selected, don't include any years. + """ + audit_year = self.cleaned_data["audit_year"] + if "all_years" in audit_year: + return [] + return audit_year + + def clean_major_program(self): + """ + If 'Any' is selected, don't include the more specific fields. + """ + major_program = self.cleaned_data["major_program"] + if "any" in major_program: + return ["any"] + return major_program + + def clean_page(self): + """ + Default page number to one. + """ + return int(self.cleaned_data["page"] or 1) + + def clean_limit(self): + """ + Default page limit to 30. + """ + return int(self.cleaned_data["limit"] or 30) diff --git a/backend/dissemination/management/commands/populate_aln_table.py b/backend/dissemination/management/commands/populate_aln_table.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/dissemination/search.py b/backend/dissemination/search.py index 28ee9e7d60..1533e82185 100644 --- a/backend/dissemination/search.py +++ b/backend/dissemination/search.py @@ -1,8 +1,11 @@ import logging import time -from .search_constants import ORDER_BY, DIRECTION, DAS_LIMIT -from .search_general import report_timing, search_general -from .search_alns import search_alns +from .searchlib.search_constants import ORDER_BY, DIRECTION, DAS_LIMIT +from .searchlib.search_general import report_timing, search_general +from .searchlib.search_alns import search_alns +from .searchlib.search_findings import search_findings +from .searchlib.search_direct_funding import search_direct_funding +from .searchlib.search_major_program import search_major_program logger = logging.getLogger(__name__) @@ -30,6 +33,10 @@ def search(params): results = search_general(params) results = _sort_results(results, params) results = search_alns(results, params) + results = search_findings(results, params) + results = search_direct_funding(results, params) + results = search_major_program(results, params) + results = results.distinct("report_id", params.get("order_by", "fac_accepted_date")) t1 = time.time() report_timing("search", params, t0, t1) diff --git a/backend/dissemination/searchlib/__init__.py b/backend/dissemination/searchlib/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/dissemination/search_alns.py b/backend/dissemination/searchlib/search_alns.py similarity index 100% rename from backend/dissemination/search_alns.py rename to backend/dissemination/searchlib/search_alns.py diff --git a/backend/dissemination/search_constants.py b/backend/dissemination/searchlib/search_constants.py similarity index 100% rename from backend/dissemination/search_constants.py rename to backend/dissemination/searchlib/search_constants.py diff --git a/backend/dissemination/searchlib/search_direct_funding.py b/backend/dissemination/searchlib/search_direct_funding.py new file mode 100644 index 0000000000..f3fbb69744 --- /dev/null +++ b/backend/dissemination/searchlib/search_direct_funding.py @@ -0,0 +1,28 @@ +from django.db.models import Q +import time +from .search_general import report_timing + +import logging + +logger = logging.getLogger(__name__) + + +def search_direct_funding(general_results, params): + t0 = time.time() + q = Q() + direct_funding_fields = params.get("direct_funding", []) + + for field in direct_funding_fields: + match field: + case "direct_funding": + q |= Q(federalaward__is_direct="Y") + case "passthrough_funding": + q |= Q(federalaward__is_direct="N") + case _: + pass + + filtered_general_results = general_results.filter(q) + + t1 = time.time() + report_timing("search_direct_funding", params, t0, t1) + return filtered_general_results diff --git a/backend/dissemination/searchlib/search_findings.py b/backend/dissemination/searchlib/search_findings.py new file mode 100644 index 0000000000..6d0aab9fbd --- /dev/null +++ b/backend/dissemination/searchlib/search_findings.py @@ -0,0 +1,47 @@ +from django.db.models import Q +import time +from .search_general import report_timing + +import logging + +logger = logging.getLogger(__name__) + + +def search_findings(general_results, params): + t0 = time.time() + q = Q() + findings_fields = params.get("findings", []) + + for field in findings_fields: + match field: + case "all_findings": + # This can be achieved via federalaward__findings_count__gt=0, + # But, it's faster to chain ORs in the Finding table than it is to walk the FederalAward table. + q |= Q(finding__is_modified_opinion="Y") + q |= Q(finding__is_other_findings="Y") + q |= Q(finding__is_material_weakness="Y") + q |= Q(finding__is_significant_deficiency="Y") + q |= Q(finding__is_other_matters="Y") + q |= Q(finding__is_questioned_costs="Y") + q |= Q(finding__is_repeat_finding="Y") + case "is_modified_opinion": + q |= Q(finding__is_modified_opinion="Y") + case "is_other_findings": + q |= Q(finding__is_other_findings="Y") + case "is_material_weakness": + q |= Q(finding__is_material_weakness="Y") + case "is_significant_deficiency": + q |= Q(finding__is_significant_deficiency="Y") + case "is_other_matters": + q |= Q(finding__is_other_matters="Y") + case "is_questioned_costs": + q |= Q(finding__is_questioned_costs="Y") + case "is_repeat_finding": + q |= Q(finding__is_repeat_finding="Y") + case _: + pass + filtered_general_results = general_results.filter(q) + + t1 = time.time() + report_timing("search_findings", params, t0, t1) + return filtered_general_results diff --git a/backend/dissemination/search_general.py b/backend/dissemination/searchlib/search_general.py similarity index 99% rename from backend/dissemination/search_general.py rename to backend/dissemination/searchlib/search_general.py index d4aaa576cc..690f811150 100644 --- a/backend/dissemination/search_general.py +++ b/backend/dissemination/searchlib/search_general.py @@ -47,7 +47,7 @@ def search_general(params=None): r_end_date = General.objects.filter(q_end_date) ############## - # Intersection + # Cog/Over q_cogover = _get_cog_or_oversight_match_query( params.get("agency_name", None), params.get("cog_or_oversight", None) ) diff --git a/backend/dissemination/searchlib/search_major_program.py b/backend/dissemination/searchlib/search_major_program.py new file mode 100644 index 0000000000..e1caf4cf7b --- /dev/null +++ b/backend/dissemination/searchlib/search_major_program.py @@ -0,0 +1,27 @@ +from django.db.models import Q +import time +from .search_general import report_timing + +import logging + +logger = logging.getLogger(__name__) + + +def search_major_program(general_results, params): + """ + Searches on FederalAward columns 'is_major'. Comes in as True/False, to search on Y/N. + """ + t0 = time.time() + q = Q() + major_program_fields = params.get("major_program", []) + + if True in major_program_fields: + q |= Q(federalaward__is_major="Y") + elif False in major_program_fields: + q |= Q(federalaward__is_major="N") + + filtered_general_results = general_results.filter(q).distinct() + + t1 = time.time() + report_timing("search_major_program", params, t0, t1) + return filtered_general_results diff --git a/backend/dissemination/templates/search.html b/backend/dissemination/templates/search.html index 84159808c8..63c31561d2 100644 --- a/backend/dissemination/templates/search.html +++ b/backend/dissemination/templates/search.html @@ -33,7 +33,7 @@