Skip to content

Commit

Permalink
Add dashboard for tracking migration progress (#3016)
Browse files Browse the repository at this point in the history
## Changes
Add dashboard for tracking migration process, screenshot below as
comments.

Open changes:
- [x] Update install to replace UCX catalog in queries, call the value
to be replace `multiworkspaces`
- [x] Add counter for tables and views migrated
- [x] Use object serializer from #2743
- [x] Filter for latest scan only
- [x] Add overview of the (unique) failure messages
- [x] Reword "readiness"

### Linked issues

Resolves #2596
References databrickslabs/lsql#306
Requires: #3083 

### Functionality

- [x] added a new dashboard: migration-progress

### Tests

- [x] manually tested
- [x] added integration tests
- [x] verified on staging environment (screenshot attached)
  • Loading branch information
JCZuurmond authored Oct 29, 2024
1 parent 35a0309 commit 1abc9fe
Show file tree
Hide file tree
Showing 21 changed files with 529 additions and 5 deletions.
13 changes: 10 additions & 3 deletions src/databricks/labs/ucx/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,9 +634,16 @@ def _handle_existing_dashboard(self, dashboard_id: str, display_name: str, paren
def _create_dashboard(self, folder: Path, *, parent_path: str) -> None:
"""Create a lakeview dashboard from the SQL queries in the folder"""
logger.info(f"Creating dashboard in {folder}...")
metadata = DashboardMetadata.from_path(folder).replace_database(
database=f"hive_metastore.{self._config.inventory_database}",
database_to_replace="inventory",
metadata = (
DashboardMetadata.from_path(folder)
.replace_database( # Assessment and migration dashboards
database=f"hive_metastore.{self._config.inventory_database}",
database_to_replace="inventory",
)
.replace_database( # Migration progress dashboard
catalog=self._config.ucx_catalog,
catalog_to_replace="ucx_catalog",
)
)
metadata.display_name = f"{self._name('UCX ')} {folder.parent.stem.title()} ({folder.stem.title()})"
reference = f"{folder.parent.stem}_{folder.stem}".lower()
Expand Down
6 changes: 4 additions & 2 deletions src/databricks/labs/ucx/progress/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ class ProgressTrackingInstallation:
_SCHEMA = "multiworkspace"

def __init__(self, sql_backend: SqlBackend, ucx_catalog: str) -> None:
# `mod` is a required parameter, though, it's not used in this context without views.
self._schema_deployer = SchemaDeployer(sql_backend, self._SCHEMA, mod=None, catalog=ucx_catalog)
from databricks.labs import ucx # pylint: disable=import-outside-toplevel

self._schema_deployer = SchemaDeployer(sql_backend, self._SCHEMA, mod=ucx, catalog=ucx_catalog)

def run(self) -> None:
self._schema_deployer.deploy_schema()
self._schema_deployer.deploy_table("workflow_runs", WorkflowRun)
self._schema_deployer.deploy_table("historical", Historical)
self._schema_deployer.deploy_view("objects_snapshot", "queries/views/objects_snapshot.sql")
logger.info("Installation completed successfully!")


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
height: 4
---

# Migration Progress

> If widgets show `Unable to render visualization.` verify if
> the [UCX catalog exists](https://github.com/databrickslabs/ucx?tab=readme-ov-file#create-ucx-catalog-command).
This dashboard displays the migration progress, with data visualized from the `migration-progress-experimental`
workflow. This workflow is designed to run regularly — either daily or weekly — to provide an up-to-date overview of the
migration progress.

In addition to offering real-time insights into migration progress, the dashboard also facilitates planning and task
division. For instance, you can choose to migrate one workspace or schema at a time. By assigning a migration owner to
each workspace and/or schema, the dashboard shows how the resources allocated to that owner are progressing.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* --title 'Overall progress (%)' --width 2 */
SELECT
ROUND(100 * try_divide(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type IN ('ClusterInfo', 'Grant', 'JobInfo', 'PipelineInfo', 'PolicyInfo', 'Table', 'Udf')
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* --title 'UDF migration progress (%)' */
SELECT
ROUND(100 * TRY_DIVIDE(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = "Udf"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* --title 'Grant migration progress (%)' */
SELECT
ROUND(100 * TRY_DIVIDE(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = "Grant"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* --title 'Job migration progress (%)' */
SELECT
ROUND(100 * TRY_DIVIDE(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = "JobInfo"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* --title 'Cluster migration progress (%)' */
SELECT
ROUND(100 * TRY_DIVIDE(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = "ClusterInfo"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* --title 'Table migration progress (%)' --width 2 */
SELECT
ROUND(100 * TRY_DIVIDE(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = "Table"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* --title 'Pipeline migration progress (%)' */
SELECT
ROUND(100 * TRY_DIVIDE(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = "PipelineInfo"
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* --title 'Policy migration progress (%)' */
SELECT
ROUND(100 * TRY_DIVIDE(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = "PolicyInfo"
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/* --title 'Distinct failures per object type' --width 6 */
with failures AS (
SELECT object_type, explode(failures) AS failure
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type IN ('ClusterInfo', 'Grant', 'JobInfo', 'PipelineInfo', 'PolicyInfo', 'Table', 'Udf')
)

SELECT
object_type,
COUNT(*) AS count,
failure
FROM failures
GROUP BY object_type, failure
ORDER BY object_type, failure
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Tables and Views

This section presents the migration progress of tables and views, detailing which data objects are migrated and which
are pending migration.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
title: Filter for owner(s)
column: owner
type: MULTI_SELECT
width: 6
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/* --title 'Pending migration' --description 'Total number of tables and views' --height 6 */
SELECT COUNT(*) AS count
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = 'Table' AND array_contains(failures, 'Pending migration')
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
--title 'Pending migration'
--description 'Tables and views per owner'
--width 5
--overrides '{"spec": {
"version": 3,
"widgetType": "bar",
"encodings": {
"x":{"fieldName": "owner", "scale": {"type": "categorical"}, "displayName": "owner"},
"y":{"fieldName": "count", "scale": {"type": "quantitative"}, "displayName": "count"}
}
}}'
*/
WITH owners_with_failures AS (
SELECT owner
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = 'Table' AND array_contains(failures, 'Pending migration')
)

SELECT
owner,
COUNT(1) AS count
FROM owners_with_failures
GROUP BY owner
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/* --title 'Migrated' --description 'Total number of tables and views' --height 6 */
SELECT COUNT(*) AS count
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = 'Table' AND SIZE(failures) == 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/* --title 'Overview' --description 'Tables and views migration' --width 5 */
WITH migration_statuses AS (
SELECT *
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = 'Table'
)

SELECT
owner,
DOUBLE(CEIL(100 * COUNT_IF(SIZE(failures) = 0) / SUM(COUNT(*)) OVER (PARTITION BY owner), 2)) AS percentage,
COUNT(*) AS total,
COUNT_IF(SIZE(failures) = 0) AS total_migrated,
COUNT_IF(SIZE(failures) > 0) AS total_not_migrated
FROM migration_statuses
GROUP BY owner
25 changes: 25 additions & 0 deletions src/databricks/labs/ucx/queries/views/objects_snapshot.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
WITH last_workflow_run AS (
SELECT
workspace_id,
MAX(STRUCT(finished_at, workflow_run_attempt, started_at, workflow_run_id)) AS max_struct
FROM $inventory.workflow_runs -- $inventory is a hardcoded name for replacing target schema in a view definition
WHERE workflow_name = 'migration-progress-experimental'
GROUP BY workspace_id
)

SELECT
historical.workspace_id,
historical.job_run_id,
historical.object_type,
historical.object_id,
historical.data,
historical.failures,
historical.owner,
historical.ucx_version
FROM
$inventory.historical AS historical -- $inventory is a hardcoded name for replacing target schema in a view definition
JOIN
last_workflow_run
ON
historical.workspace_id = last_workflow_run.workspace_id
AND historical.job_run_id = last_workflow_run.max_struct.workflow_run_id
Empty file.
Loading

0 comments on commit 1abc9fe

Please sign in to comment.