diff --git a/data/people_countries_delta_dask/_delta_log/.00000000000000000000.json.crc b/data/people_countries_delta_dask/_delta_log/.00000000000000000000.json.crc new file mode 100644 index 0000000..2a72af8 Binary files /dev/null and b/data/people_countries_delta_dask/_delta_log/.00000000000000000000.json.crc differ diff --git a/data/people_countries_delta_dask/_delta_log/00000000000000000000.json b/data/people_countries_delta_dask/_delta_log/00000000000000000000.json new file mode 100644 index 0000000..000d2e4 --- /dev/null +++ b/data/people_countries_delta_dask/_delta_log/00000000000000000000.json @@ -0,0 +1,6 @@ +{"commitInfo":{"timestamp":1706278148531,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[\"country\"]"},"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"3","numOutputRows":"5","numOutputBytes":"3045"},"engineInfo":"Apache-Spark/3.4.0 Delta-Lake/2.4.0","txnId":"1cbc9537-63eb-4799-8647-2d947ae8fa41"}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"1f110132-a652-4be9-815e-348f294515cf","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"first_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"last_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"country\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"continent\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["country"],"configuration":{},"createdTime":1706278146762}} +{"add":{"path":"country=Argentina/part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet","partitionValues":{"country":"Argentina"},"size":1018,"modificationTime":1706278148083,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"first_name\":\"Ernesto\",\"last_name\":\"Guevara\",\"continent\":\"NaN\"},\"maxValues\":{\"first_name\":\"Ernesto\",\"last_name\":\"Guevara\",\"continent\":\"NaN\"},\"nullCount\":{\"first_name\":0,\"last_name\":0,\"continent\":0}}"}} +{"add":{"path":"country=China/part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet","partitionValues":{"country":"China"},"size":1002,"modificationTime":1706278148138,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"first_name\":\"Bruce\",\"last_name\":\"Lee\",\"continent\":\"Asia\"},\"maxValues\":{\"first_name\":\"Jack\",\"last_name\":\"Ma\",\"continent\":\"Asia\"},\"nullCount\":{\"first_name\":0,\"last_name\":0,\"continent\":0}}"}} +{"add":{"path":"country=Germany/part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet","partitionValues":{"country":"Germany"},"size":1025,"modificationTime":1706278148185,"dataChange":true,"stats":"{\"numRecords\":2,\"minValues\":{\"first_name\":\"Soraya\",\"last_name\":\"Jala\",\"continent\":\"NaN\"},\"maxValues\":{\"first_name\":\"Wolfgang\",\"last_name\":\"Manche\",\"continent\":\"NaN\"},\"nullCount\":{\"first_name\":0,\"last_name\":0,\"continent\":0}}"}} diff --git a/data/people_countries_delta_dask/country=Argentina/.part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet.crc b/data/people_countries_delta_dask/country=Argentina/.part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet.crc new file mode 100644 index 0000000..31c4629 Binary files /dev/null and b/data/people_countries_delta_dask/country=Argentina/.part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet.crc differ diff --git a/data/people_countries_delta_dask/country=Argentina/part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet b/data/people_countries_delta_dask/country=Argentina/part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet new file mode 100644 index 0000000..214a0c9 Binary files /dev/null and b/data/people_countries_delta_dask/country=Argentina/part-00000-8d0390a3-f797-4265-b9c2-da1c941680a3.c000.snappy.parquet differ diff --git a/data/people_countries_delta_dask/country=China/.part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet.crc b/data/people_countries_delta_dask/country=China/.part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet.crc new file mode 100644 index 0000000..f5463cc Binary files /dev/null and b/data/people_countries_delta_dask/country=China/.part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet.crc differ diff --git a/data/people_countries_delta_dask/country=China/part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet b/data/people_countries_delta_dask/country=China/part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet new file mode 100644 index 0000000..61ea5f4 Binary files /dev/null and b/data/people_countries_delta_dask/country=China/part-00000-88fba1af-b28d-4303-9c85-9a97be631d40.c000.snappy.parquet differ diff --git a/data/people_countries_delta_dask/country=Germany/.part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet.crc b/data/people_countries_delta_dask/country=Germany/.part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet.crc new file mode 100644 index 0000000..7c8b129 Binary files /dev/null and b/data/people_countries_delta_dask/country=Germany/.part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet.crc differ diff --git a/data/people_countries_delta_dask/country=Germany/part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet b/data/people_countries_delta_dask/country=Germany/part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet new file mode 100644 index 0000000..8ab9c0a Binary files /dev/null and b/data/people_countries_delta_dask/country=Germany/part-00000-030076e1-5ec9-47c2-830a-1569f823b6ee.c000.snappy.parquet differ diff --git a/notebooks/python-deltalake/dask-deltalake.ipynb b/notebooks/python-deltalake/dask-deltalake.ipynb new file mode 100644 index 0000000..423a79d --- /dev/null +++ b/notebooks/python-deltalake/dask-deltalake.ipynb @@ -0,0 +1,808 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2324f66b-f367-4b07-9892-0a3d8b9153d2", + "metadata": {}, + "source": [ + "# Using Delta Lake with Dask\n", + "\n", + "Delta Lake is a great storage format for Dask analyses. This page will explain why and how to use Delta Lake with Dask.\n", + "\n", + "You will learn how to read Delta Lakes into Dask DataFrames, how to query Delta tables with Dask, and the unique advantages Delta Lake offers the Dask community.\n", + "\n", + "Here are some of the benefits that Delta Lake provides Dask users:\n", + "- better performance with file skipping\n", + "- enhanced file skipping via Z Ordering\n", + "- ACID transactions for reliable writes\n", + "- easy time-travel functionality" + ] + }, + { + "cell_type": "markdown", + "id": "546a47b5-5614-4078-80be-61e2b365cedc", + "metadata": {}, + "source": [ + "> ❗️ `dask-deltatable` doesn't currently work with deltalake=0.14, use deltalake=13.0 or lower. See https://github.com/dask-contrib/dask-deltatable/issues/65" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "001e4111-23d7-4db2-9eda-68cb57ba46d2", + "metadata": {}, + "outputs": [], + "source": [ + "import dask_deltatable as ddt\n", + "import dask.dataframe as dd\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "c61cc62d-ed7f-4d1c-b613-e7555708c0ac", + "metadata": {}, + "source": [ + "## Read Delta Lake into a Dask DataFrame" + ] + }, + { + "cell_type": "markdown", + "id": "64b6710b-aa00-4f97-a2d5-b6dc9810a9b8", + "metadata": {}, + "source": [ + "Let's start with some data stored in a Delta Lake on disk. Read it into a Dask DataFrame using `dask-deltatable.read_deltalake`:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6b4a1fca-0b21-4a34-998c-7fd07d86f1ff", + "metadata": {}, + "outputs": [], + "source": [ + "# read delta table into Dask DataFrame\n", + "delta_path = \"../../data/people_countries_delta_dask\"\n", + "ddf = ddt.read_deltalake(delta_path)" + ] + }, + { + "cell_type": "markdown", + "id": "f4b07844-cba9-4095-8429-38b62f69c9e6", + "metadata": {}, + "source": [ + "Dask is a library for efficient distributed computing and works with [lazy evaluation](https://docs.dask.org/en/stable/user-interfaces.html#laziness-and-computing). Function calls to `dask.dataframe` build a task graph in the background. To trigger computation, call `.compute()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "cf5d9296-5914-497b-be00-fe7371ed6d57", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
first_namelast_namecountrycontinent
0ErnestoGuevaraArgentinaNaN
0BruceLeeChinaAsia
1JackMaChinaAsia
0WolfgangMancheGermanyNaN
1SorayaJalaGermanyNaN
\n", + "
" + ], + "text/plain": [ + " first_name last_name country continent\n", + "0 Ernesto Guevara Argentina NaN\n", + "0 Bruce Lee China Asia\n", + "1 Jack Ma China Asia\n", + "0 Wolfgang Manche Germany NaN\n", + "1 Soraya Jala Germany NaN" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.compute()" + ] + }, + { + "cell_type": "markdown", + "id": "868bc406-14d1-4905-8dfc-1af2279c82ed", + "metadata": {}, + "source": [ + "You can read in specific versions of Delta tables by specifying a `version` number or a timestamp:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "44b79297-0d22-4411-a84c-9b385c204624", + "metadata": {}, + "outputs": [], + "source": [ + "# # with specific version\n", + "# ddf = ddt.read_deltalake(delta_path, version=3)\n", + "\n", + "# # with specific datetime\n", + "# ddt.read_deltalake(delta_path, datetime=\"2018-12-19T16:39:57-08:00\")" + ] + }, + { + "cell_type": "markdown", + "id": "b8651b12-aeaf-4664-b26f-a3daaabff0b0", + "metadata": {}, + "source": [ + "`dask-deltatable` also supports reading from remote sources like S3 with:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "cbf00cc0-298c-4655-ae9a-b29d12e83d2a", + "metadata": {}, + "outputs": [], + "source": [ + "# ddt.read_deltalake(\"s3://bucket_name/delta_path\", version=3)" + ] + }, + { + "cell_type": "markdown", + "id": "c2d50dc6-0d2f-4fe6-9a8a-2cd8e3afb5eb", + "metadata": {}, + "source": [ + "> To read data from remote sources you'll need to make sure the credentials are properly configured in environment variables or config files. Refer to your cloud provider documentation to configure these." + ] + }, + { + "cell_type": "markdown", + "id": "fc085856-4a88-4087-a1da-bff626094236", + "metadata": {}, + "source": [ + "## What can I do with a Dask Deltatable?" + ] + }, + { + "cell_type": "markdown", + "id": "d550eec3-7d19-4591-b752-74d98eccb997", + "metadata": {}, + "source": [ + "Reading a Delta Lake in with `dask-deltatable` returns a regular Dask DataFrame. You can perform [all the regular Dask operations](https://docs.dask.org/en/stable/dataframe.html) on this DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b3b749a1-6e5e-41e9-b362-9faafcf9d616", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dask.dataframe.core.DataFrame" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(ddf)" + ] + }, + { + "cell_type": "markdown", + "id": "d8bb7107-fdce-4d22-a759-f5802155a46d", + "metadata": {}, + "source": [ + "Let's take a look at the first few rows:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "eed2de99-738c-4e9b-b2e5-fcd5ab3b89f7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
first_namelast_namecountrycontinent
0ErnestoGuevaraArgentina<NA>
1WolfgangMancheGermany<NA>
2SorayaJalaGermany<NA>
\n", + "
" + ], + "text/plain": [ + " first_name last_name country continent\n", + "0 Ernesto Guevara Argentina \n", + "1 Wolfgang Manche Germany \n", + "2 Soraya Jala Germany " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.head(n=3)" + ] + }, + { + "cell_type": "markdown", + "id": "717d4956-e095-465b-b506-ba4e376a7503", + "metadata": {}, + "source": [ + "`dask.dataframe.head()` shows you the first rows of the first partition in the dataframe. In this case, the first partition only has 1 row.\n", + "\n", + "This is because the Delta Lake has been partitioned by country:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7e09a4a2-b2e0-44ea-b0c3-58c7fbbadeec", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m_delta_log\u001b[m\u001b[m \u001b[34mcountry=Argentina\u001b[m\u001b[m \u001b[34mcountry=China\u001b[m\u001b[m \u001b[34mcountry=Germany\u001b[m\u001b[m\n" + ] + } + ], + "source": [ + "!ls ../../data/people_countries_delta_dask" + ] + }, + { + "cell_type": "markdown", + "id": "bdec4f14-921f-420f-bc19-671c4f210f6b", + "metadata": {}, + "source": [ + "`dask-deltatable` neatly reads in the partitioned Delta Lake into corresponding Dask DataFrame partitions:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f1ba689d-e198-4710-9152-0aafe761880e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# see number of partitions\n", + "ddf.npartitions" + ] + }, + { + "cell_type": "markdown", + "id": "f1ffc4f2-45ea-4884-b861-d8293110e97c", + "metadata": {}, + "source": [ + "You can inspect a single partition using `dask.dataframe.get_partition()`:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cf5724fa-2203-4917-965d-e566cd82b16e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
first_namelast_namecountrycontinent
0WolfgangMancheGermany<NA>
1SorayaJalaGermany<NA>
\n", + "
" + ], + "text/plain": [ + " first_name last_name country continent\n", + "0 Wolfgang Manche Germany \n", + "1 Soraya Jala Germany " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.get_partition(n=1).compute()" + ] + }, + { + "cell_type": "markdown", + "id": "081a17cb-5ff9-4797-9329-38178f3342f9", + "metadata": {}, + "source": [ + "## Perform Dask Operations" + ] + }, + { + "cell_type": "markdown", + "id": "38d76a77-e09b-41c9-b516-4ecda564738a", + "metadata": {}, + "source": [ + "Let's perform some basic computations over the Delta Lake data that's now stored in our Dask DataFrame. \n", + "\n", + "Suppose you want to group the dataset by the `country` column:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "298a51f4-06b9-46f0-a654-6adbabf7eee8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
first_namelast_namecontinent
country
Argentina110
Germany220
China222
\n", + "
" + ], + "text/plain": [ + " first_name last_name continent\n", + "country \n", + "Argentina 1 1 0\n", + "Germany 2 2 0\n", + "China 2 2 2" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.groupby(['country']).count().compute()" + ] + }, + { + "cell_type": "markdown", + "id": "11ed91c2-0fcf-4114-b057-902bc546c0d1", + "metadata": {}, + "source": [ + "Dask executes this `groupby` operation in parallel across all available cores. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b40fc539-c9d2-4050-b699-2ed00b76f4d0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "95219b2f-434b-491c-a02d-230f711ffcfe", + "metadata": {}, + "source": [ + "## Map Functions over Partitions" + ] + }, + { + "cell_type": "markdown", + "id": "230855dd-e3c2-461b-a969-7a0e8abed69d", + "metadata": {}, + "source": [ + "You can also use Dask's `map_partitions` method to map a custom Python function over all the partitions. \n", + "\n", + "Let's write a function that will replace the missing `continent` values with the right continent names." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8e3fdaf8-d969-448a-af30-91886b166bac", + "metadata": {}, + "outputs": [], + "source": [ + "# define custom python function\n", + "\n", + "# get na_string\n", + "df = ddf.get_partition(0).compute()\n", + "na_string = df.iloc[0].continent\n", + "na_string\n", + "\n", + "# define function\n", + "def replace_proper(partition, na_string):\n", + " if [partition.country == \"Argentina\"]:\n", + " partition.loc[partition.country==\"Argentina\"] = partition.loc[partition.country==\"Argentina\"].replace(na_string, \"South America\")\n", + " if [partition.country == \"Germany\"]:\n", + " partition.loc[partition.country==\"Germany\"] = partition.loc[partition.country==\"Germany\"].replace(na_string, \"Europe\")\n", + " else:\n", + " pass\n", + " return partition " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a1f7d880-0152-47bb-a67c-3d880c1d3e8b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
first_namelast_namecountrycontinent
0ErnestoGuevaraArgentinaSouth America
0WolfgangMancheGermanyEurope
1SorayaJalaGermanyEurope
0BruceLeeChinaAsia
1JackMaChinaAsia
\n", + "
" + ], + "text/plain": [ + " first_name last_name country continent\n", + "0 Ernesto Guevara Argentina South America\n", + "0 Wolfgang Manche Germany Europe\n", + "1 Soraya Jala Germany Europe\n", + "0 Bruce Lee China Asia\n", + "1 Jack Ma China Asia" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# define metadata and map function over partitions\n", + "meta = dict(ddf.dtypes)\n", + "ddf3 = ddf.map_partitions(replace_proper, na_string, meta=meta)\n", + "ddf3.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a639399-56d5-40cf-9e14-762f52fd78c8", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "3a475f84-cb5f-44ba-bd42-511ce879318c", + "metadata": {}, + "source": [ + "## Write to Delta Lake\n", + "After doing your data processing in Dask, you can write the data back out to Delta Lake using `to_deltalake`:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "084ba149-a179-4945-8c37-68732cf2c137", + "metadata": {}, + "outputs": [], + "source": [ + "# ddt.to_deltalake(ddf, \"tmp/test_write\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81d675e5-3363-45d5-ab86-4cd831bdcada", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "aec71505-2de9-408d-8a2f-ce48989eacd2", + "metadata": {}, + "source": [ + "## Contribute to `dask-deltalake`" + ] + }, + { + "cell_type": "markdown", + "id": "b0370ae5-08a9-4873-9479-e6cfc169995a", + "metadata": {}, + "source": [ + "To contribute, go to the [`dask-deltalake` Github repository](https://github.com/rrpelgrim/dask-deltatable)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3fb5cbc-b5ab-42a5-bf6c-937e194568e7", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad4eb23b-3dfb-469a-a7e8-6c23aa8bfb90", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:dask-delta-0140]", + "language": "python", + "name": "conda-env-dask-delta-0140-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}