Skip to content

Commit

Permalink
Detect tables that are not present in the mapping file
Browse files Browse the repository at this point in the history
Introduces #1221
  • Loading branch information
aminmovahed-db committed Jul 28, 2024
1 parent 696e177 commit e132357
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/databricks/labs/ucx/hive_metastore/table_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ def __init__(
self._seen_tables: dict[str, str] = {}
self._principal_grants = principal_grants

def not_migrated_refresh(self) -> list[Table]:
table_rows: list[Table] = []
for crawled_table in self._tc.snapshot():
if not self.is_migrated(crawled_table.database, crawled_table.name):
table_rows.append(crawled_table)
return table_rows # depending on how to publish this data, we may need to convert it to other forms to able to show it in the dashboard

def index(self):
return self._migration_status_refresher.index()

Expand Down
4 changes: 4 additions & 0 deletions src/databricks/labs/ucx/hive_metastore/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ def refresh_migration_status(self, ctx: RuntimeContext):
"""Refresh the migration status to present it in the dashboard."""
ctx.tables_migrator.index_full_refresh()

@job_task(job_cluster="table_migration", depends_on=[migrate_external_tables_sync, migrate_dbfs_root_delta_tables, migrate_dbfs_root_non_delta_tables,migrate_views])
def refresh_not_migrated_status(self, ctx: RuntimeContext):
"""Refresh the not migrated tables status to present it in the dashboard."""
ctx.tables_migrator.not_migrated_refresh()

class MigrateHiveSerdeTablesInPlace(Workflow):
def __init__(self):
Expand Down
5 changes: 5 additions & 0 deletions tests/unit/hive_metastore/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,8 @@ def test_refresh_migration_status_is_refreshed(run_workflow, workflow):
assert "DELETE FROM hive_metastore.ucx.migration_status" in ctx.sql_backend.queries
assert "SHOW DATABASES" in ctx.sql_backend.queries
# No "SHOW TABLE FROM" query as table are not mocked

#TODO: create a unit test for the new task in the workflow
# def test_refresh_not_migrated_status_is_refreshed(run_workflow):
# ctx = run_workflow(TableMigration.refresh_not_migrated_status)
# # ctx.workspace_client.catalogs.list.assert_called()

0 comments on commit e132357

Please sign in to comment.