Skip to content

Commit

Permalink
Copy changes from #3112
Browse files Browse the repository at this point in the history
  • Loading branch information
JCZuurmond committed Dec 17, 2024
1 parent 54c4442 commit 24284b4
Show file tree
Hide file tree
Showing 20 changed files with 154 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
SELECT
ROUND(100 * try_divide(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type IN ('ClusterInfo', 'Grant', 'JobInfo', 'PipelineInfo', 'PolicyInfo', 'Table', 'Udf')
WHERE object_type IN ('ClusterInfo', 'DirectFsAccess', 'Grant', 'JobInfo', 'PipelineInfo', 'PolicyInfo', 'QueryProblem', 'Table', 'Udf', 'UsedTable')
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* --title 'Table migration progress (%)' --width 2 */
/* --title 'Table migration progress (%)' */
SELECT
ROUND(100 * TRY_DIVIDE(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage
FROM ucx_catalog.multiworkspace.objects_snapshot
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/* --title '"Table references in code" progress (%)' --description 'Tables referring UC over Hive metastore' */
SELECT
ROUND(100 * TRY_DIVIDE(COUNT_IF(SIZE(failures) = 0), COUNT(*)), 2) AS percentage
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = "UsedTable"
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/* --title 'Direct filesystem access progress (#)' --description 'Unsupported in Unity Catalog' */
SELECT COUNT(*) AS counter
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = "DirectFsAccess"
-- Redundant filter as a direct filesystem access is a failure by definition (see description above),
-- however, filter is defined for explicitness and as this knowledge is not "known" to this query.
AND SIZE(failures) > 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/* --title 'Query problem progress (#)' */
SELECT COUNT(*) AS counter
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = "QueryProblem"
-- Redundant filter as a query problem is a failure by definition, however, filter is defined for explicitness
AND SIZE(failures) > 0
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
with failures AS (
SELECT object_type, explode(failures) AS failure
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type IN ('ClusterInfo', 'Grant', 'JobInfo', 'PipelineInfo', 'PolicyInfo', 'Table', 'Udf')
WHERE object_type IN ('ClusterInfo', 'DirectFsAccess', 'Grant', 'JobInfo', 'PipelineInfo', 'PolicyInfo', 'QueryProblem', 'Table', 'Udf', 'UsedTable')
)

SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* --title 'Overview' --description 'Tables and views migration' --width 5 */
WITH migration_statuses AS (
SELECT *
SELECT owner, failures
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = 'Table'
)
Expand Down
8 changes: 8 additions & 0 deletions src/databricks/labs/ucx/queries/progress/main/03_00_code.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Code

This section shows Unity Catalog compatability issues found while linting code. There are two kinds of code changes to
perform:
- Data asset reference, like references to Hive metastore tables and views or direct filesystem access (dfsa). These
references should be updated to refer to their Unity Catalog counterparts.
- Linting compatability issues, like using RDDs or directly accessing the Spark context. These issues should be resolved
by following the instructions stated with the issue.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/* --title 'Pending migration' --description 'Total number of table, view and dfsa references' --height 6 */
SELECT COUNT(*) AS count
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type IN ('DirectFsAccess', 'UsedTable') AND SIZE(failures) > 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
--title 'Pending migration'
--description 'Tables, views and dfsa per owner'
--width 5
--overrides '{"spec": {
"version": 3,
"widgetType": "bar",
"encodings": {
"x": {"fieldName": "owner", "scale": {"type": "categorical"}, "displayName": "owner"},
"y": {"fieldName": "count", "scale": {"type": "quantitative"}, "displayName": "count"}
}
}}'
*/
WITH owners_with_failures AS (
SELECT owner
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type IN ('DirectFsAccess', 'UsedTable') AND SIZE(failures) > 0
)

SELECT
owner,
COUNT(1) AS count
FROM owners_with_failures
GROUP BY owner
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/* --title 'Migrated' --description 'Total number of table, view and dfsa references' --height 6 */
SELECT COUNT(*) AS count
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type IN ('DirectFsAccess', 'UsedTable') AND SIZE(failures) == 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/* --title 'Overview' --description 'Table, view and dfsa migration' --width 5 */
WITH migration_statuses AS (
SELECT owner, object_type, failures
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type IN ('DirectFsAccess', 'UsedTable')
)

SELECT
owner,
CASE
WHEN object_type = 'DirectFsAccess' THEN 'Direct filesystem access'
WHEN object_type = 'UsedTable' THEN 'Table or view reference'
ELSE object_type
END AS object_type,
DOUBLE(CEIL(100 * COUNT_IF(SIZE(failures) = 0) / SUM(COUNT(*)) OVER (PARTITION BY owner, object_type), 2)) AS percentage,
COUNT(*) AS total,
COUNT_IF(SIZE(failures) = 0) AS total_migrated,
COUNT_IF(SIZE(failures) > 0) AS total_not_migrated
FROM migration_statuses
GROUP BY owner, object_type
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
--title 'Data asset references'
--width 6
--overrides '{"spec":{
"encodings":{
"columns": [
{"fieldName": "workspace_id", "booleanValues": ["false", "true"], "type": "string", "displayAs": "string", "title": "workspace_id"},
{"fieldName": "object_type", "booleanValues": ["false", "true"], "type": "string", "displayAs": "string", "title": "object_type"},
{"fieldName": "object_id", "booleanValues": ["false", "true"], "linkUrlTemplate": "{{ link }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "object_id"},
{"fieldName": "failure", "booleanValues": ["false", "true"], "type": "integer", "displayAs": "number", "title": "failure"},
{"fieldName": "is_read", "booleanValues": ["false", "true"], "type": "integer", "displayAs": "number", "title": "is_read"},
{"fieldName": "is_write", "booleanValues": ["false", "true"], "type": "integer", "displayAs": "number", "title": "is_write"}
]},
"invisibleColumns": [
{"name": "link", "booleanValues": ["false", "true"], "linkUrlTemplate": "{{ @ }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "link"}
]
}}'
*/
SELECT
workspace_id,
owner,
CASE
WHEN object_type = 'DirectFsAccess' THEN 'Direct filesystem access'
WHEN object_type = 'UsedTable' THEN 'Table or view reference'
ELSE object_type
END AS object_type,
CASE
WHEN object_type = 'DirectFsAccess' THEN data.path
WHEN object_type = 'UsedTable' THEN CONCAT_WS('.', object_id)
ELSE CONCAT_WS('.', object_id)
END AS object_id,
EXPLODE(failures) AS failure,
CAST(data.is_read AS BOOLEAN) AS is_read,
CAST(data.is_write AS BOOLEAN) AS is_write,
-- Below are invisible column(s) used in links url templates
CASE
-- SQL queries do NOT point to the workspace, i.e. start with '/'
WHEN object_type = 'DirectFsAccess' AND SUBSTRING(data.source_id, 0, 1) != '/' THEN CONCAT('/sql/editor/', data.source_id)
ELSE CONCAT('/#workspace', data.source_id)
END AS link
FROM ucx_catalog.multiworkspace.objects_snapshot
ORDER BY workspace_id, owner, object_type, object_id
WHERE object_type IN ('DirectFsAccess', 'UsedTable')
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
--title 'Code compatability issues'
--width 6
--overrides '{"spec":{
"encodings":{
"columns": [
{"fieldName": "workspace_id", "booleanValues": ["false", "true"], "type": "string", "displayAs": "string", "title": "workspace_id"},
{"fieldName": "code", "booleanValues": ["false", "true"], "type": "string", "displayAs": "string", "title": "code"},
{"fieldName": "message", "booleanValues": ["false", "true"], "type": "string", "displayAs": "string", "title": "message"},
{"fieldName": "dashboard_name", "booleanValues": ["false", "true"], "linkUrlTemplate": "/sql/dashboards/{{ dashboard_id }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "dashboard"},
{"fieldName": "query_name", "booleanValues": ["false", "true"], "linkUrlTemplate": "/sql/editor/{{ query_id }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "query"}
]},
"invisibleColumns": [
{"name": "dashboard_id", "booleanValues": ["false", "true"], "linkUrlTemplate": "{{ @ }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "dashboard_id"},
{"name": "query_id", "booleanValues": ["false", "true"], "linkUrlTemplate": "{{ @ }}", "linkTextTemplate": "{{ @ }}", "linkTitleTemplate": "{{ @ }}", "linkOpenInNewTab": true, "type": "string", "displayAs": "link", "title": "query_id"}
]
}}'
*/
SELECT
workspace_id,
data.code,
data.message,
data.dashboard_name,
data.query_name,
-- Below are invisible columns used in links url templates
data.dashboard_id,
data.query_id
FROM ucx_catalog.multiworkspace.objects_snapshot
WHERE object_type = 'QueryProblem'

0 comments on commit 24284b4

Please sign in to comment.