Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes to work with Databricks SDK v0.38.0 #350

Merged
merged 7 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ classifiers = [
]
dependencies = [
"databricks-labs-blueprint[yaml]>=0.4.2",
"databricks-sdk~=0.37",
"databricks-sdk~=0.38",
JCZuurmond marked this conversation as resolved.
Show resolved Hide resolved
"sqlglot>=22.3.1"
]

Expand Down
4 changes: 2 additions & 2 deletions src/databricks/labs/lsql/dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -1133,9 +1133,9 @@ def create_dashboard(
warehouse_id=warehouse_id,
)
if dashboard_id is not None:
sdk_dashboard = self._ws.lakeview.update(dashboard_id, dashboard=dashboard_to_create.as_dict()) # type: ignore
sdk_dashboard = self._ws.lakeview.update(dashboard_id, dashboard=dashboard_to_create)
else:
sdk_dashboard = self._ws.lakeview.create(dashboard=dashboard_to_create.as_dict()) # type: ignore
sdk_dashboard = self._ws.lakeview.create(dashboard=dashboard_to_create)
if publish:
assert sdk_dashboard.dashboard_id is not None
self._ws.lakeview.publish(sdk_dashboard.dashboard_id, warehouse_id=warehouse_id)
Expand Down
89 changes: 48 additions & 41 deletions tests/integration/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,49 +17,56 @@ def test_sql_execution_chunked(ws, disposition):
assert total == 1999999000000


def test_sql_execution(ws, env_or_skip):
results = []
NYC_TAXI_LIMITED_TRIPS = """
WITH zipcodes AS (
SELECT DISTINCT pickup_zip, dropoff_zip
FROM samples.nyctaxi.trips
WHERE pickup_zip = 10282 AND dropoff_zip <= 10005
)

SELECT
trips.pickup_zip,
trips.dropoff_zip,
trips.tpep_pickup_datetime,
trips.tpep_dropoff_datetime
FROM
zipcodes
JOIN
samples.nyctaxi.trips AS trips
ON zipcodes.pickup_zip = trips.pickup_zip AND zipcodes.dropoff_zip = trips.dropoff_zip
ORDER BY trips.dropoff_zip, trips.tpep_pickup_datetime, trips.tpep_dropoff_datetime
"""


def test_sql_execution(ws, env_or_skip) -> None:
results = set()
see = StatementExecutionExt(ws, warehouse_id=env_or_skip("TEST_DEFAULT_WAREHOUSE_ID"))
for pickup_zip, dropoff_zip in see.fetch_all(
"SELECT pickup_zip, dropoff_zip FROM nyctaxi.trips LIMIT 10", catalog="samples"
):
results.append((pickup_zip, dropoff_zip))
assert results == [
(10282, 10171),
(10110, 10110),
(10103, 10023),
(10022, 10017),
(10110, 10282),
(10009, 10065),
(10153, 10199),
(10112, 10069),
(10023, 10153),
(10012, 10003),
]


def test_sql_execution_partial(ws, env_or_skip):
results = []
for pickup_zip, dropoff_zip, *_ in see.fetch_all(NYC_TAXI_LIMITED_TRIPS, catalog="samples"):
results.add((pickup_zip, dropoff_zip))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To check the result-set precisely, results needs to remain a list.

Because the query orders the rows, order-sensitive equality checking is fine in this case.

It's not needed here, but in general for order-insensitive comparison the pattern is:

results = […]
expected = {…}
assert len(results) == len(expected)
assert set(results) == expected

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should clarify: right now this is performing neither an order-sensitive nor an order-insensitive check of the results, but rather something in-between.

In this case I think the comparison is supposed to be order-sensitive (because the query is), so please use lists for both results and the check against the expected set of results.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The example you provided does not work as assert len(results) == len(expected) will assert to False as there are duplicates in results

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the integration test is just there to test we get some output back

assert results == {
(10282, 7114),
(10282, 10001),
(10282, 10002),
(10282, 10003),
(10282, 10005),
}


def test_sql_execution_partial(ws, env_or_skip) -> None:
results = set()
see = StatementExecutionExt(ws, warehouse_id=env_or_skip("TEST_DEFAULT_WAREHOUSE_ID"), catalog="samples")
for row in see("SELECT * FROM nyctaxi.trips LIMIT 10"):
pickup_time, dropoff_time = row[0], row[1]
pickup_zip = row.pickup_zip
dropoff_zip = row["dropoff_zip"]
for row in see(NYC_TAXI_LIMITED_TRIPS):
pickup_zip, dropoff_zip, pickup_time, dropoff_time = row[0], row[1], row[2], row[3]
all_fields = row.asDict()
logger.info(f"{pickup_zip}@{pickup_time} -> {dropoff_zip}@{dropoff_time}: {all_fields}")
results.append((pickup_zip, dropoff_zip))
assert results == [
(10282, 10171),
(10110, 10110),
(10103, 10023),
(10022, 10017),
(10110, 10282),
(10009, 10065),
(10153, 10199),
(10112, 10069),
(10023, 10153),
(10012, 10003),
]
results.add((pickup_zip, dropoff_zip))
assert results == {
(10282, 7114),
(10282, 10001),
(10282, 10002),
(10282, 10003),
(10282, 10005),
}


def test_fetch_one(ws):
Expand All @@ -73,9 +80,9 @@ def test_fetch_one_fails_if_limit_is_bigger(ws):
see.fetch_one("SELECT * FROM samples.nyctaxi.trips LIMIT 100")


def test_fetch_one_works(ws):
def test_fetch_one_works(ws) -> None:
see = StatementExecutionExt(ws)
row = see.fetch_one("SELECT * FROM samples.nyctaxi.trips LIMIT 1")
row = see.fetch_one("SELECT * FROM samples.nyctaxi.trips WHERE pickup_zip == 10282 LIMIT 1")
assert row.pickup_zip == 10282


Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def create(*, display_name: str = "") -> SDKDashboard:
display_name = f"created_by_lsql_{make_random()}"
else:
display_name = f"{display_name} ({make_random()})"
dashboard = ws.lakeview.create(dashboard=SDKDashboard(display_name=display_name).as_dict())
dashboard = ws.lakeview.create(dashboard=SDKDashboard(display_name=display_name))
if is_in_debug():
dashboard_url = f"{ws.config.host}/sql/dashboardsv3/{dashboard.dashboard_id}"
webbrowser.open(dashboard_url)
Expand Down Expand Up @@ -117,7 +117,7 @@ def test_dashboards_creates_exported_dashboard_definition(ws, make_dashboard) ->
dashboard_content = (Path(__file__).parent / "dashboards" / "dashboard.lvdash.json").read_text()

dashboard_to_create = dataclasses.replace(sdk_dashboard, serialized_dashboard=dashboard_content)
ws.lakeview.update(sdk_dashboard.dashboard_id, dashboard=dashboard_to_create.as_dict())
ws.lakeview.update(sdk_dashboard.dashboard_id, dashboard=dashboard_to_create)
lakeview_dashboard = Dashboard.from_dict(json.loads(dashboard_content))
new_dashboard = dashboards.get_dashboard(sdk_dashboard.path)

Expand Down
Loading