Skip to content

Commit

Permalink
Search - Extend Filter Fields (#3440)
Browse files Browse the repository at this point in the history
* Getting search started.

* Direct funding search. Rewrite form cleaning. Collapse filters by default.

* Linting!

* Search on major program.

* Linting is for winners

* Major program to Y/N field.

* Linted!

* Minor tests, new field defaults

* Linting!

* If there aren't eight linting commits, did anything happen?

* Adds "Any findings" to the list.

The performance of the big OR does not seem to be a problem, but it
would be nice to do differently.

Not today's blocker, though.

* Cleanup, linting, comment removal.

* Fiend -> Friend. Not the same! Plus a comment.

* Whitespace is not okay, I guess

* map -> zip

* ALL -> all_findings

* apply distinct only to relevant cols

---------

Co-authored-by: Matt Jadud <[email protected]>
Co-authored-by: Tim Ballard <[email protected]>
  • Loading branch information
3 people authored Feb 28, 2024
1 parent 91aeb9b commit bb35a92
Show file tree
Hide file tree
Showing 14 changed files with 464 additions and 131 deletions.
118 changes: 115 additions & 3 deletions backend/dissemination/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,43 @@


class SearchForm(forms.Form):
AY_choices = (
# Multiple choice field mappings
findings_field_mapping = {
"field_name": [
"all_findings",
"is_modified_opinion",
"is_other_findings",
"is_material_weakness",
"is_significant_deficiency",
"is_other_matters",
"is_questioned_costs",
"is_repeat_finding",
],
"friendly_name": [
"Any findings",
"Modified opinion",
"Other findings",
"Material weakness",
"Significant deficiency",
"Other matters",
"Questioned costs",
"Repeat finding",
],
}

# Multiple choice field Tuples. "choices" variable in field declaration.
AY_choices = (("all_years", "All years"),) + tuple(
(x, str(x)) for x in reversed(range(2016, 2024))
) # ((2016, "2016"), (2017, "2017"), ..., (2023, "2023"))
)
findings_choices = list(zip(*findings_field_mapping.values()))
direct_funding_choices = (
("direct_funding", "Direct funding"),
("passthrough_funding", "Passthrough funding"),
)
major_program_choices = (
(True, "Y"),
(False, "N"),
)

# Query params
entity_name = forms.CharField(required=False)
Expand All @@ -14,11 +48,89 @@ class SearchForm(forms.Form):
end_date = forms.DateField(required=False)
cog_or_oversight = forms.CharField(required=False)
agency_name = forms.CharField(required=False)
audit_year = forms.MultipleChoiceField(choices=AY_choices, required=False)
audit_year = forms.MultipleChoiceField(
choices=AY_choices, initial=[2023], required=False
)
findings = forms.MultipleChoiceField(choices=findings_choices, required=False)
direct_funding = forms.MultipleChoiceField(
choices=direct_funding_choices, required=False
)
major_program = forms.MultipleChoiceField(
choices=major_program_choices, required=False
)
auditee_state = forms.CharField(required=False)

# Display params
limit = forms.CharField(required=False)
page = forms.CharField(required=False)
order_by = forms.CharField(required=False)
order_direction = forms.CharField(required=False)

# Variables for cleaning functions
text_input_delimiters = [
",",
":",
";",
"-",
" ",
]

def clean_aln(self):
"""
Clean up the ALN field. Replace common separators with a newline.
Split on the newlines. Strip all the resulting elements.
"""
text_input = self.cleaned_data["aln"]
for delimiter in self.text_input_delimiters:
text_input = text_input.replace(delimiter, "\n")
text_input = [x.strip() for x in text_input.splitlines()]
return text_input

def clean_entity_name(self):
"""
Clean the name field. We can't trust that separators aren't a part of a name somewhere,
so just split on newlines.
"""
text_input = self.cleaned_data["entity_name"]
return text_input.splitlines()

def clean_uei_or_ein(self):
"""
Clean up the UEI/EIN field. Replace common separators with a newline.
Split on the newlines. Strip all the resulting elements.
"""
text_input = self.cleaned_data["uei_or_ein"]
for delimiter in self.text_input_delimiters:
text_input = text_input.replace(delimiter, "\n")
text_input = [x.strip() for x in text_input.splitlines()]
return text_input

def clean_audit_year(self):
"""
If "All years" is selected, don't include any years.
"""
audit_year = self.cleaned_data["audit_year"]
if "all_years" in audit_year:
return []
return audit_year

def clean_major_program(self):
"""
If 'Any' is selected, don't include the more specific fields.
"""
major_program = self.cleaned_data["major_program"]
if "any" in major_program:
return ["any"]
return major_program

def clean_page(self):
"""
Default page number to one.
"""
return int(self.cleaned_data["page"] or 1)

def clean_limit(self):
"""
Default page limit to 30.
"""
return int(self.cleaned_data["limit"] or 30)
Empty file.
13 changes: 10 additions & 3 deletions backend/dissemination/search.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import logging
import time
from .search_constants import ORDER_BY, DIRECTION, DAS_LIMIT
from .search_general import report_timing, search_general
from .search_alns import search_alns
from .searchlib.search_constants import ORDER_BY, DIRECTION, DAS_LIMIT
from .searchlib.search_general import report_timing, search_general
from .searchlib.search_alns import search_alns
from .searchlib.search_findings import search_findings
from .searchlib.search_direct_funding import search_direct_funding
from .searchlib.search_major_program import search_major_program

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -30,6 +33,10 @@ def search(params):
results = search_general(params)
results = _sort_results(results, params)
results = search_alns(results, params)
results = search_findings(results, params)
results = search_direct_funding(results, params)
results = search_major_program(results, params)
results = results.distinct("report_id", params.get("order_by", "fac_accepted_date"))

t1 = time.time()
report_timing("search", params, t0, t1)
Expand Down
Empty file.
File renamed without changes.
File renamed without changes.
28 changes: 28 additions & 0 deletions backend/dissemination/searchlib/search_direct_funding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from django.db.models import Q
import time
from .search_general import report_timing

import logging

logger = logging.getLogger(__name__)


def search_direct_funding(general_results, params):
t0 = time.time()
q = Q()
direct_funding_fields = params.get("direct_funding", [])

for field in direct_funding_fields:
match field:
case "direct_funding":
q |= Q(federalaward__is_direct="Y")
case "passthrough_funding":
q |= Q(federalaward__is_direct="N")
case _:
pass

filtered_general_results = general_results.filter(q)

t1 = time.time()
report_timing("search_direct_funding", params, t0, t1)
return filtered_general_results
47 changes: 47 additions & 0 deletions backend/dissemination/searchlib/search_findings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from django.db.models import Q
import time
from .search_general import report_timing

import logging

logger = logging.getLogger(__name__)


def search_findings(general_results, params):
t0 = time.time()
q = Q()
findings_fields = params.get("findings", [])

for field in findings_fields:
match field:
case "all_findings":
# This can be achieved via federalaward__findings_count__gt=0,
# But, it's faster to chain ORs in the Finding table than it is to walk the FederalAward table.
q |= Q(finding__is_modified_opinion="Y")
q |= Q(finding__is_other_findings="Y")
q |= Q(finding__is_material_weakness="Y")
q |= Q(finding__is_significant_deficiency="Y")
q |= Q(finding__is_other_matters="Y")
q |= Q(finding__is_questioned_costs="Y")
q |= Q(finding__is_repeat_finding="Y")
case "is_modified_opinion":
q |= Q(finding__is_modified_opinion="Y")
case "is_other_findings":
q |= Q(finding__is_other_findings="Y")
case "is_material_weakness":
q |= Q(finding__is_material_weakness="Y")
case "is_significant_deficiency":
q |= Q(finding__is_significant_deficiency="Y")
case "is_other_matters":
q |= Q(finding__is_other_matters="Y")
case "is_questioned_costs":
q |= Q(finding__is_questioned_costs="Y")
case "is_repeat_finding":
q |= Q(finding__is_repeat_finding="Y")
case _:
pass
filtered_general_results = general_results.filter(q)

t1 = time.time()
report_timing("search_findings", params, t0, t1)
return filtered_general_results
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def search_general(params=None):
r_end_date = General.objects.filter(q_end_date)

##############
# Intersection
# Cog/Over
q_cogover = _get_cog_or_oversight_match_query(
params.get("agency_name", None), params.get("cog_or_oversight", None)
)
Expand Down
27 changes: 27 additions & 0 deletions backend/dissemination/searchlib/search_major_program.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from django.db.models import Q
import time
from .search_general import report_timing

import logging

logger = logging.getLogger(__name__)


def search_major_program(general_results, params):
"""
Searches on FederalAward columns 'is_major'. Comes in as True/False, to search on Y/N.
"""
t0 = time.time()
q = Q()
major_program_fields = params.get("major_program", [])

if True in major_program_fields:
q |= Q(federalaward__is_major="Y")
elif False in major_program_fields:
q |= Q(federalaward__is_major="N")

filtered_general_results = general_results.filter(q).distinct()

t1 = time.time()
report_timing("search_major_program", params, t0, t1)
return filtered_general_results
Loading

0 comments on commit bb35a92

Please sign in to comment.