Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a dashboard with a counter from a single query #107

Merged
merged 34 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
85c5d33
Add counter
JCZuurmond May 27, 2024
c790842
Rename Dashboards to Dashboard
JCZuurmond May 27, 2024
fc8ec76
Remove unused import
JCZuurmond May 27, 2024
69ec277
Make dashboard singular
JCZuurmond May 27, 2024
509d5e0
Format
JCZuurmond May 27, 2024
34901ae
Add integration test and endpoint to deploy dashboard from code
JCZuurmond May 27, 2024
df54092
Handle WidgetSpec v0 table
JCZuurmond May 27, 2024
cc48c67
Add TODO
JCZuurmond May 27, 2024
3207abc
Rewrite test to count queries
JCZuurmond May 27, 2024
800e1ff
Create datasets from queries
JCZuurmond May 27, 2024
dc734bf
Handle counter v0
JCZuurmond May 27, 2024
782a860
Add test for creating counter specs
JCZuurmond May 27, 2024
0dfa9b0
Add a counter widget for each query
JCZuurmond May 27, 2024
c38b8e4
Deploy lakeview dashboard
JCZuurmond May 27, 2024
66f8bfe
Remove unused import
JCZuurmond May 27, 2024
193c299
Split dashboard create and deploy
JCZuurmond May 27, 2024
4038081
Add count field
JCZuurmond May 27, 2024
e042d81
Refactor to clean test dashboards
JCZuurmond May 27, 2024
cda46f0
Test the lakeview dicts
JCZuurmond May 27, 2024
8939903
Create random id
JCZuurmond May 27, 2024
44eb2d4
Refactor dashboard to plural
JCZuurmond May 27, 2024
861c884
Refactor create to create_dashboard
JCZuurmond May 27, 2024
117a1d0
Refactor deploy to deploy_dashboard
JCZuurmond May 27, 2024
883ab45
Remove name collision
JCZuurmond May 27, 2024
d834d5a
Change counter height to three
JCZuurmond May 27, 2024
dc42bdb
Add working dashboard
JCZuurmond May 28, 2024
c566c65
Add disaggregated field
JCZuurmond May 28, 2024
d89b0db
Add counter field encoding
JCZuurmond May 28, 2024
2ffd6de
Name query
JCZuurmond May 28, 2024
253122d
Format
JCZuurmond May 28, 2024
f825244
Add comment
JCZuurmond May 28, 2024
f878eaa
Remove create random id
JCZuurmond May 28, 2024
c2ace7b
Replace fields in testing
JCZuurmond May 28, 2024
36b1330
Format
JCZuurmond May 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/databricks/labs/lsql/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def as_dict(self) -> dict[str, Any]:

# PySpark's compatibility
def asDict(self, recursive: bool = False) -> dict[str, Any]:
_ = recursive
return self.as_dict()

def __eq__(self, other):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,27 +1,47 @@
import json
JCZuurmond marked this conversation as resolved.
Show resolved Hide resolved
import random
import string
from pathlib import Path
from typing import Protocol, ClassVar, runtime_checkable
from typing import ClassVar, Protocol, runtime_checkable

import sqlglot
import yaml
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.dashboards import Dashboard as SDKDashboard
from databricks.sdk.service.workspace import ExportFormat

from databricks.labs.lsql.lakeview import Dashboard, Page, Query, NamedQuery, ControlFieldEncoding
from databricks.labs.lsql.lakeview import (
ControlFieldEncoding,
CounterEncodingMap,
CounterSpec,
)
from databricks.labs.lsql.lakeview import Dashboard as LakeviewDashboard
JCZuurmond marked this conversation as resolved.
Show resolved Hide resolved
from databricks.labs.lsql.lakeview import (
Dataset,
Field,
Layout,
NamedQuery,
Page,
Position,
Query,
Widget,
)


@runtime_checkable
class _DataclassInstance(Protocol):
__dataclass_fields__: ClassVar[dict]

class Dashboards:

class Dashboard: # TODO: Rename, maybe DashboardClient?
def __init__(self, ws: WorkspaceClient):
self._ws = ws

def get_dashboard(self, dashboard_path: str):
with self._ws.workspace.download(dashboard_path, format=ExportFormat.SOURCE) as f:
raw = f.read().decode('utf-8')
raw = f.read().decode("utf-8")
as_dict = json.loads(raw)
return Dashboard.from_dict(as_dict)
return LakeviewDashboard.from_dict(as_dict)

def save_to_folder(self, dashboard_path: str, local_path: Path):
local_path.mkdir(parents=True, exist_ok=True)
Expand All @@ -34,23 +54,67 @@ def save_to_folder(self, dashboard_path: str, local_path: Path):
sql_query = dataset.query
self._format_sql_file(sql_query, query_path)
lvdash_yml = local_path / "lvdash.yml"
with lvdash_yml.open('w') as f:
with lvdash_yml.open("w") as f:
first_page = dashboard.pages[0]
self._replace_names(first_page, better_names)
page = first_page.as_dict()
yaml.safe_dump(page, f)
assert True

@staticmethod
def _create_random_id() -> str:
charset = string.ascii_lowercase + string.digits
return "".join(random.choices(charset, k=8))

def create(self, dashboard_folder: Path) -> LakeviewDashboard:
"""Create a dashboard from code, i.e. configuration and queries."""
datasets, layouts = [], []
for query_path in dashboard_folder.glob("*.sql"):
with query_path.open("r") as query_file:
raw_query = query_file.read()
dataset = Dataset(name=self._create_random_id(), display_name=query_path.stem, query=raw_query)
datasets.append(dataset)

fields = [Field(name="count", expression="`count`")]
query = Query(dataset_name=dataset.name, fields=fields)
named_query = NamedQuery(name=self._create_random_id(), query=query)
counter_spec = CounterSpec(CounterEncodingMap())
widget = Widget(name=self._create_random_id(), queries=[named_query], spec=counter_spec)
position = Position(x=0, y=0, width=1, height=1)
layout = Layout(widget=widget, position=position)
layouts.append(layout)

page = Page(name=dashboard_folder.name, display_name=dashboard_folder.name, layout=layouts)
lakeview_dashboard = LakeviewDashboard(datasets=datasets, pages=[page])
return lakeview_dashboard

def deploy(
self, lakeview_dashboard: LakeviewDashboard, *, display_name: str | None = None, dashboard_id: str | None = None
) -> SDKDashboard:
"""Deploy a lakeview dashboard."""
if (display_name is None and dashboard_id is None) or (display_name is not None and dashboard_id is not None):
raise ValueError("Give either display_name or dashboard_id.")
if display_name is not None:
dashboard = self._ws.lakeview.create(
display_name, serialized_dashboard=json.dumps(lakeview_dashboard.as_dict())
)
else:
assert dashboard_id is not None
dashboard = self._ws.lakeview.update(
dashboard_id, serialized_dashboard=json.dumps(lakeview_dashboard.as_dict())
)
return dashboard

def _format_sql_file(self, sql_query, query_path):
with query_path.open('w') as f:
with query_path.open("w") as f:
try:
for statement in sqlglot.parse(sql_query):
# see https://sqlglot.com/sqlglot/generator.html#Generator
pretty = statement.sql(
dialect='databricks',
dialect="databricks",
normalize=True, # normalize identifiers to lowercase
pretty=True, # format the produced SQL string
normalize_functions='upper', # normalize function names to uppercase
normalize_functions="upper", # normalize function names to uppercase
max_text_width=80, # wrap text at 120 characters
)
f.write(f"{pretty};\n")
Expand All @@ -70,11 +134,11 @@ def _replace_names(self, node: _DataclassInstance, better_names: dict[str, str])
node.dataset_name = better_names.get(node.dataset_name, node.dataset_name)
elif isinstance(node, NamedQuery) and node.query:
# 'dashboards/01eeb077e38c17e6ba3511036985960c/datasets/01eeb081882017f6a116991d124d3068_...'
if node.name.startswith('dashboards/'):
if node.name.startswith("dashboards/"):
parts = [node.query.dataset_name]
for field in node.query.fields:
parts.append(field.name)
new_name = '_'.join(parts)
new_name = "_".join(parts)
better_names[node.name] = new_name
node.name = better_names.get(node.name, node.name)
elif isinstance(node, ControlFieldEncoding):
Expand Down
5 changes: 5 additions & 0 deletions src/databricks/labs/lsql/lakeview/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import abc
import json
from dataclasses import dataclass
from enum import Enum
from typing import Any
Expand Down Expand Up @@ -88,6 +89,10 @@ def as_dict(self) -> Json:

@classmethod
def from_dict(cls, d: Json) -> WidgetSpec:
if d["version"] == 0 and d["viz_spec"]["viz_type"].lower() == "table":
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nfx : This looks to be missing from the generated code. Or am I missing something?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happens there?..

Copy link
Member Author

@JCZuurmond JCZuurmond May 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Converting ucx's main assessment dashboard to lakeview and then exporting it results in widgets:

        {
          "widget": {
            "name": "d337ba8c",
            "queries": [
              {
                "name": "479b6dfe8614407993e86d5c4ba9a585",
                "query": {
                  "datasetName": "ec0f18c0",
                  "disaggregated": true
                }
              }
            ],
            "spec": {
              "version": 0,
              "viz_spec": {
                "display_name": "",
                "viz_type": "COUNTER",
                "serialized_options": "{\"counterLabel\": \"Metastore Crawl Failures\", \"counterColName\": \"count_failures\", \"rowNumber\": 1, \"targetRowNumber\": 1, \"stringDecimal\": 0, \"stringDecChar\": \".\", \"stringThouSep\": \",\", \"tooltipFormat\": \"0,0.000\", \"countRow\": false}",
                "query_name": "479b6dfe8614407993e86d5c4ba9a585"
              }
            }
          },
          "position": {
            "x": 1,
            "y": 3,
            "width": 1,
            "height": 3
          }
        },

and

        {
          "widget": {
            "name": "45c2e3cf",
            "queries": [
              {
                "name": "3b1dbed848e245fd88443bcea2029389",
                "query": {
                  "datasetName": "8e4a73e9",
                  "disaggregated": true
                }
              }
            ],
            "spec": {
              "version": 0,
              "viz_spec": {
                "display_name": "Mount Points",
                "viz_type": "TABLE",
                "serialized_options": "{\"itemsPerPage\": 25, \"condensed\": true, \"withRowNumber\": false, \"version\": 2, \"columns\": [{\"name\": \"name\", \"title\": \"name\", \"type\": \"string\", \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"displayAs\": \"string\", \"visible\": true, \"order\": 100000, \"allowSearch\": false, \"alignContent\": \"left\", \"allowHTML\": false, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}, {\"name\": \"source\", \"title\": \"source\", \"type\": \"string\", \"imageUrlTemplate\": \"{{ @ }}\", \"imageTitleTemplate\": \"{{ @ }}\", \"linkUrlTemplate\": \"{{ @ }}\", \"linkTextTemplate\": \"{{ @ }}\", \"linkTitleTemplate\": \"{{ @ }}\", \"linkOpenInNewTab\": true, \"displayAs\": \"string\", \"visible\": true, \"order\": 100000, \"allowSearch\": false, \"alignContent\": \"left\", \"allowHTML\": false, \"highlightLinks\": false, \"useMonospaceFont\": false, \"preserveWhitespace\": false}]}",
                "query_name": "3b1dbed848e245fd88443bcea2029389"
              }
            }
          },
          "position": {
            "x": 3,
            "y": 53,
            "width": 3,
            "height": 8
          }
        },

note the version 0 in them, which was not generated

return TableV1Spec.from_dict(json.loads(d["viz_spec"]["serialized_options"]))
if d["version"] == 0 and d["viz_spec"]["viz_type"].lower() == "counter":
return CounterSpec.from_dict(json.loads(d["viz_spec"]["serialized_options"]))
if d["version"] == 1 and d["widgetType"] == "details":
return DetailsV1Spec.from_dict(d)
if d["version"] == 1 and d["widgetType"] == "table":
Expand Down
1 change: 1 addition & 0 deletions tests/integration/queries/counter.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 6217 AS count
53 changes: 49 additions & 4 deletions tests/integration/test_dashboards.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,57 @@
from pathlib import Path

from databricks.sdk import WorkspaceClient
import pytest

from databricks.labs.lsql.dashboards import Dashboards
from databricks.labs.lsql.dashboard import Dashboard
from databricks.labs.lsql.lakeview.model import CounterSpec


@pytest.fixture
def dashboard_id(ws, make_random):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am missing a "list" endpoint on the lakeview API. Could we request one?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

they are files...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are suggesting to list all files in the Workspace to mimic a list behavior?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only .lvdash.json

"""Clean the lakeview dashboard"""

dashboard_display_name = f"created_by_lsql_{make_random()}"
dashboard = ws.lakeview.create(dashboard_display_name)

yield dashboard.dashboard_id

ws.lakeview.trash(dashboard.dashboard_id)


def test_load_dashboard(ws):
dashboards = Dashboards(ws)
dashboard = Dashboard(ws)
src = "/Workspace/Users/[email protected]/Trivial Dashboard.lvdash.json"
dst = Path(__file__).parent / "sample"
dashboards.save_to_folder(src, dst)
dashboard.save_to_folder(src, dst)


def test_dashboard_deploys_one_dataset_per_query(ws):
def test_dashboard_deploys_one_dataset_per_query(ws, make_random):
def test_dashboard_creates_one_dataset_per_query(ws, make_random):
def test_dashboard_creates_one_dataset_per_query(ws):
queries = Path(__file__).parent / "queries"
dashboard = Dashboard(ws).create(queries)
assert len(dashboard.datasets) == len([query for query in queries.glob("*.sql")])


def test_dashboard_creates_one_counter_widget_per_query(ws):
queries = Path(__file__).parent / "queries"
dashboard = Dashboard(ws).create(queries)

counter_widgets = []
for page in dashboard.pages:
for layout in page.layout:
if isinstance(layout.widget.spec, CounterSpec):
counter_widgets.append(layout.widget)

assert len(counter_widgets) == len([query for query in queries.glob("*.sql")])


def test_dashboard_deploys_dashboard(ws, dashboard_id):
queries = Path(__file__).parent / "queries"
dashboard_client = Dashboard(ws)
lakeview_dashboard = dashboard_client.create(queries)

dashboard = dashboard_client.deploy(lakeview_dashboard, dashboard_id=dashboard_id)

JCZuurmond marked this conversation as resolved.
Show resolved Hide resolved
assert dashboard_client.get_dashboard(dashboard.path).as_dict() == lakeview_dashboard.as_dict()
Loading