Skip to content

Commit

Permalink
Merge pull request #44 from cfpb/features/33_add_validation_id
Browse files Browse the repository at this point in the history
Task 33, adding validation id
  • Loading branch information
nargis-sultani authored Sep 12, 2023
2 parents ade2867 + 45ced7d commit 2eaf1ae
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 18 deletions.
27 changes: 27 additions & 0 deletions src/tests/test_checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pytest

from validator.checks import SBLCheck


class TestSBLCheck:
def test_no_id_check(self):
with pytest.raises(Exception) as exc:
SBLCheck(lambda: True, warning=True, name="Just a Warning")

assert "Each check must be assigned a `name` and an `id`." in str(exc.value)
assert exc.type == ValueError

def test_no_name_check(self):
with pytest.raises(Exception) as exc:
SBLCheck(lambda: True, id="00000", warning=True)

assert "Each check must be assigned a `name` and an `id`." in str(exc.value)
assert exc.type == ValueError

def test_name_and_id_check(self):
raised = False
try:
SBLCheck(lambda: True, id="00000", warning=True, name="Just a Warning")
except ValueError:
raised = True
assert raised is False
27 changes: 13 additions & 14 deletions src/validator/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
name="Just a Warning"
)
error_check_implied = SBLCheck(lambda: Truename="Error Check")
error_check_implied = SBLCheck(lambda: True, name="Error Check")
error_check_explicit = SBLCheck(
lambda: True,
Expand All @@ -32,31 +32,37 @@


class SBLCheck(Check):
"""A custom Pandera.Check subclasss that requires a `name` be
"""A custom Pandera.Check subclasss that requires a `name` and an `id` be
specified. Additionally, an attribute named `warning` is added to
the class to enable distinction between warnings and errors. The
default value of warning is `False` which corresponds to an error.
Don't use this class directly. Make use of the SBLErrorCheck and
SBLWarningCheck subclasses below."""

def __init__(self, check_fn: Callable, warning=False, *args, **kwargs):
"""Custom init method that verifies the presence of `name` in
def __init__(
self, check_fn: Callable, id: str = None, warning=False, *args, **kwargs
):
"""Custom init method that verifies the presence of `name` and `id` in
kwargs creates a custom class attribute called `warning`. All
other initializaiton is handled by the parent Check class.
Args:
check_fn (Callable): A function which evaluates the validity
of the column(s) being tested.
id (str, required): Each check mut have an id.
warning (bool, optional): Boolean specifying whether to
treat the check as a warning rather than an error.
Raises:
ValueError: Raised if `name` not supplied in kwargs.
ValueError: Raised if `name` not supplied in kwargs and if id is not
supplied or None.
"""

if "name" not in kwargs:
raise ValueError("Each check must be assigned a `name`.")
self.id = id

if "name" not in kwargs or id is None:
raise ValueError("Each check must be assigned a `name` and an `id`.")

# if warning==False treat check as an error check
self.warning = warning
Expand All @@ -67,10 +73,3 @@ def __init__(self, check_fn: Callable, warning=False, *args, **kwargs):
def get_backend(cls, check_obj: Any) -> Type[BaseCheckBackend]:
"""Assume Pandas DataFrame and return PandasCheckBackend"""
return PandasCheckBackend


if __name__ == "__main__":
warning_check = SBLCheck(lambda: True, warning=True, name="Just a Warning")

error_check_implied = SBLCheck(lambda: True, name="Error Check")
error_check_explicit = SBLCheck(lambda: True, warning=False, name="Also an Error")
8 changes: 6 additions & 2 deletions src/validator/create_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,24 @@ def print_schema_errors(errors: SchemaErrors, phase: str):
for error in errors.schema_errors:
# Name of the column in the dataframe being checked
schema_error = error["error"]
column_name = schema_error.schema.name
check_id = "n/a"

# built in checks such as unique=True are different than custom
# checks unfortunately so the name needs to be accessed differently
try:
check_name = schema_error.check.name
check_id = schema_error.check.id
# This will either be a boolean series or a single bool
check_output = schema_error.check_output
except AttributeError:
check_name = schema_error.check
# this is just a string that we'd need to parse manually
check_output = schema_error.args[0]

print(f"{phase} Validation `{check_name}` failed for column `{column_name}`")
print(
f"{phase} Validation `{check_name}` with id: `{check_id}` "
"failed for column `{column_name}`"
)
print(check_output)
print("")

Expand Down
Loading

0 comments on commit 2eaf1ae

Please sign in to comment.