From 057f9ad5c41a16847f072a7eff1a8be0a7de54d5 Mon Sep 17 00:00:00 2001 From: Rajat Venkatesh Date: Thu, 25 Nov 2021 21:47:22 +0530 Subject: [PATCH] fix: Fix athena connection and scanning logic. Update dbcat to 0.10.6 which has part of the fixes. Fix cli and API to make access and secret keys optional Fix #153 Fix #155 --- piicatcher/__init__.py | 2 +- piicatcher/api.py | 4 ++-- piicatcher/cli.py | 4 ++-- poetry.lock | 6 +++--- pyproject.toml | 2 +- tests/test_api.py | 14 ++++++++++++++ tests/test_cli.py | 13 +++++++++++++ 7 files changed, 36 insertions(+), 9 deletions(-) diff --git a/piicatcher/__init__.py b/piicatcher/__init__.py index c651401..057fd34 100644 --- a/piicatcher/__init__.py +++ b/piicatcher/__init__.py @@ -1,2 +1,2 @@ # flake8: noqa -__version__ = "0.17.3" +__version__ = "0.17.4" diff --git a/piicatcher/api.py b/piicatcher/api.py index 521f7f5..c08664e 100644 --- a/piicatcher/api.py +++ b/piicatcher/api.py @@ -375,10 +375,10 @@ def scan_snowflake( def scan_athena( catalog_params: Dict[str, Any], name: str, - aws_access_key_id: str, - aws_secret_access_key: str, region_name: str, s3_staging_dir: str, + aws_access_key_id: Optional[str] = None, + aws_secret_access_key: Optional[str] = None, scan_type: ScanTypeEnum = ScanTypeEnum.shallow, incremental: bool = True, output_format: OutputFormat = OutputFormat.tabular, diff --git a/piicatcher/cli.py b/piicatcher/cli.py index baca67f..af5afdc 100644 --- a/piicatcher/cli.py +++ b/piicatcher/cli.py @@ -330,8 +330,8 @@ def snowflake( @app.command() def athena( name: str = typer.Option(..., help="A memorable name for the database"), - aws_access_key_id: str = typer.Option(..., help="AWS Access Key"), - aws_secret_access_key: str = typer.Option(..., help="AWS Secret Key"), + aws_access_key_id: str = typer.Option(None, help="AWS Access Key"), + aws_secret_access_key: str = typer.Option(None, help="AWS Secret Key"), region_name: str = typer.Option(..., help="AWS Region Name"), s3_staging_dir: str = typer.Option(..., help="S3 Staging Dir"), scan_type: ScanTypeEnum = typer.Option( diff --git a/poetry.lock b/poetry.lock index 93d4d77..5483aba 100644 --- a/poetry.lock +++ b/poetry.lock @@ -590,7 +590,7 @@ python-versions = ">=3.6, <3.7" [[package]] name = "dbcat" -version = "0.10.4" +version = "0.10.6" description = "Tokern Data Catalog" category = "main" optional = false @@ -3259,8 +3259,8 @@ dataclasses = [ {file = "dataclasses-0.8.tar.gz", hash = "sha256:8479067f342acf957dc82ec415d355ab5edb7e7646b90dc6e2fd1d96ad084c97"}, ] dbcat = [ - {file = "dbcat-0.10.4-py3-none-any.whl", hash = "sha256:ab739c076c88b09034b6e38e3ba5b51269d6354ee5c05944d5e4fb52683463ee"}, - {file = "dbcat-0.10.4.tar.gz", hash = "sha256:8cd16375133c1fb415a2ee7bcfc99085271c5869dd0d8be5b80bbe7697d354e7"}, + {file = "dbcat-0.10.6-py3-none-any.whl", hash = "sha256:4fd613652e76e2deb9d988ee313c8aec6820867e075ff8d3400b0b00d92b51c8"}, + {file = "dbcat-0.10.6.tar.gz", hash = "sha256:10fda30fc5c7340f89bfc09899d5eba80b074dcced4683d948bcfdbf09918488"}, ] decopatch = [ {file = "decopatch-1.4.8-py2.py3-none-any.whl", hash = "sha256:29a74d5d753423b188d5b537532da4f4b88e33ddccb95a8a20a5eff5b13265d4"}, diff --git a/pyproject.toml b/pyproject.toml index 38c384e..38d08f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "piicatcher" -version = "0.17.3" +version = "0.17.4" description = "Find PII data in databases" authors = ["Tokern "] license = "Apache 2.0" diff --git a/tests/test_api.py b/tests/test_api.py index 7ca1abf..2e71691 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -167,3 +167,17 @@ def test_scan_athena(mocker, temp_sqlite_path, app_dir_path): ) piicatcher.api.scan_database.assert_called_once() Catalog.add_source.assert_called_once() + + +def test_scan_athena_iam(mocker, temp_sqlite_path, app_dir_path): + mocker.patch("piicatcher.api.scan_database") + mocker.patch.object(Catalog, "add_source") + + scan_athena( + catalog_params={"path": temp_sqlite_path, "app_dir": app_dir_path}, + name="test_scan_athena", + region_name="r", + s3_staging_dir="s3", + ) + piicatcher.api.scan_database.assert_called_once() + Catalog.add_source.assert_called_once() diff --git a/tests/test_cli.py b/tests/test_cli.py index 390fccf..ba8615f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -102,6 +102,19 @@ def case_athena_cli(): ] +def case_athena_cli_iam(): + return [ + "scan", + "athena", + "--name", + "athena_cli", + "--region-name", + "us-east-1", + "--s3-staging-dir", + "s3://dummy", + ] + + @parametrize_with_cases("args", cases=".") def test_cli(mocker, temp_sqlite_path, args): mocker.patch("piicatcher.api.scan_database")