diff --git a/.gitignore b/.gitignore index 02b36fd..bfb36e7 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ dist/ # macOS detritus .DS_Store +# config files +.env diff --git a/examples/pyspark/delta-lake/Dockerfile b/examples/pyspark/delta-lake/Dockerfile new file mode 100644 index 0000000..1abb551 --- /dev/null +++ b/examples/pyspark/delta-lake/Dockerfile @@ -0,0 +1,70 @@ +# This is a muti-stage Dockerfile that can be used to build many different types of +# bundled dependencies for PySpark projects. +# The `base` stage installs generic tools necessary for packaging. +# +# There are `export-` and `build-` stages for the different types of projects. +# - python-packages - Generic support for Python projects with pyproject.toml +# - poetry - Support for Poetry projects +# +# This Dockerfile is generated automatically as part of the emr-cli tool. +# Feel free to modify it for your needs, but leave the `build-` and `export-` +# stages related to your project. +# +# To build manually, you can use the following command, assuming +# the Docker BuildKit backend is enabled. https://docs.docker.com/build/buildkit/ +# +# Example for building a poetry project and saving the output to dist/ folder +# docker build --target export-poetry --output dist . + + +## ---------------------------------------------------------------------------- +## Base stage for python development +## ---------------------------------------------------------------------------- +FROM --platform=linux/amd64 amazonlinux:2 AS base + +RUN yum install -y python3 tar gzip + +ENV VIRTUAL_ENV=/opt/venv +RUN python3 -m venv $VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" + +# EMR 6.x uses Python 3.7 - limit Poetry version to 1.5.1 +ENV POETRY_VERSION=1.5.1 +RUN python3 -m pip install --upgrade pip +RUN curl -sSL https://install.python-poetry.org | python3 - + +ENV PATH="$PATH:/root/.local/bin" + +WORKDIR /app + +COPY . . + +# Test stage - installs test dependencies defined in pyproject.toml +FROM base as test +RUN python3 -m pip install .[test] + +## ---------------------------------------------------------------------------- +## Build and export stages for standard Python projects +## ---------------------------------------------------------------------------- +# Build stage - installs required dependencies and creates a venv package +FROM base as build-python +RUN python3 -m pip install venv-pack==0.2.0 && \ + python3 -m pip install . +RUN mkdir /output && venv-pack -o /output/pyspark_deps.tar.gz + +# Export stage - used to copy packaged venv to local filesystem +FROM scratch AS export-python +COPY --from=build-python /output/pyspark_deps.tar.gz / + +## ---------------------------------------------------------------------------- +## Build and export stages for Poetry Python projects +## ---------------------------------------------------------------------------- +# Build stage for poetry +FROM base as build-poetry +RUN poetry self add poetry-plugin-bundle && \ + poetry bundle venv dist/bundle --without dev && \ + tar -czvf dist/pyspark_deps.tar.gz -C dist/bundle . && \ + rm -rf dist/bundle + +FROM scratch as export-poetry +COPY --from=build-poetry /app/dist/pyspark_deps.tar.gz / diff --git a/examples/pyspark/delta-lake/README.md b/examples/pyspark/delta-lake/README.md new file mode 100644 index 0000000..44cf428 --- /dev/null +++ b/examples/pyspark/delta-lake/README.md @@ -0,0 +1,78 @@ +# EMR Serverless Delta Lake with Poetry example + +This example shows how to use the [`emr-cli`](https://github.com/awslabs/amazon-emr-cli) to deploy a Poetry-based project with Delta Lake to EMR Serverless. + +As of EMR 6.9.0, Delta Lake jars are provided on the EMR Serverless image. This means you can use the `spark.jars` Spark configuration item to specify the path to the local Delta Lake jars. If you use a different version than what's provided with EMR Serverless, you can still use the `--packages` option to specify your version. + +## Getting Started + +> [!NOTE] +> This assumes you already have an EMR Serverless 6.9.0 application or have completed the pre-requisites in this repo's [README](/README.md). + +To create an EMR Serverless application compatible with those code, use the following command: + +```bash +aws emr-serverless create-application \ + --release-label emr-6.9.0 \ + --type SPARK +``` + +- Define some environment variables to be used later + +```shell +export APPLICATION_ID= +export S3_BUCKET= +export JOB_ROLE_ARN=arn:aws:iam:::role/emr-serverless-job-role +``` + +You can either `git clone` this project or use the `emr init` command to create a Poetry project and add the `delta-take` dependency yourself. + +- Option 1: `git clone` + +``` +git clone https://github.com/aws-samples/emr-serverless-samples.git +cd emr-serverless-samples/examples/pyspark/delta-lake +poetry install +``` + +- Option 2: `emr init` + +``` +emr init --project-type poetry delta-lake +cd delta-lake +poetry add delta-spark==2.1.0 +``` + +Copy `main.py` from this directory to your new folder. + +## Deploying + +```bash +emr run \ + --application-id ${APPLICATION_ID} \ + --job-role ${JOB_ROLE_ARN} \ + --s3-code-uri s3://${S3_BUCKET}/tmp/emr-cli-delta-lake/ \ + --s3-logs-uri s3://${S3_BUCKET}/logs/ \ + --entry-point main.py \ + --job-args ${S3_BUCKET} \ + --spark-submit-opts "--conf spark.jars=/usr/share/aws/delta/lib/delta-core.jar,/usr/share/aws/delta/lib/delta-storage.jar" \ + --build --wait --show-stdout +``` + +> [!NOTE] +> Because of how `delta-spark` is packaged, this will include `pyspark` as a dependency. The `--build` flag packages and deploys a virtualenv with `delta-spark` and related dependencies. + +You should see the following output: + +``` +[emr-cli]: Job submitted to EMR Serverless (Job Run ID: 00fgj5hq9e4le80m) +[emr-cli]: Waiting for job to complete... +[emr-cli]: Job state is now: SCHEDULED +[emr-cli]: Job state is now: RUNNING +[emr-cli]: Job state is now: SUCCESS +[emr-cli]: stdout for 00fgj5hq9e4le80m +-------------------------------------- +Itsa Delta! + +[emr-cli]: Job completed successfully! +``` \ No newline at end of file diff --git a/examples/pyspark/delta-lake/main.py b/examples/pyspark/delta-lake/main.py new file mode 100644 index 0000000..8e0108d --- /dev/null +++ b/examples/pyspark/delta-lake/main.py @@ -0,0 +1,26 @@ +import sys +import uuid + +from delta import DeltaTable, configure_spark_with_delta_pip +from pyspark.sql import SparkSession + +builder = ( + SparkSession.builder.appName("DeltaExample") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog", + ) +) + +spark = configure_spark_with_delta_pip(builder).getOrCreate() + +bucket_name = sys.argv[1] + +url = f"s3://{bucket_name}/tmp/delta-lake/output/1.0.1/{uuid.uuid4()}/" + +# creates a Delta table and outputs to target S3 bucket +spark.range(0, 5).write.format("delta").save(url) + +if DeltaTable.isDeltaTable(spark, url): + print("Itsa Delta!") diff --git a/examples/pyspark/delta-lake/poetry.lock b/examples/pyspark/delta-lake/poetry.lock new file mode 100644 index 0000000..275a9bc --- /dev/null +++ b/examples/pyspark/delta-lake/poetry.lock @@ -0,0 +1,325 @@ +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. + +[[package]] +name = "boto3" +version = "1.26.100" +description = "The AWS SDK for Python" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "boto3-1.26.100-py3-none-any.whl", hash = "sha256:b5be5bcffe17d70a72622f8ecbb428df7b11ef8d1facdfa984e94c6fc9fa301b"}, + {file = "boto3-1.26.100.tar.gz", hash = "sha256:567f03ac638c3a6f4af00d88d081df7d6b8de4d127a26543c4ec1e7509e1a626"}, +] + +[package.dependencies] +botocore = ">=1.29.100,<1.30.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.6.0,<0.7.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.29.100" +description = "Low-level, data-driven core of boto 3." +optional = false +python-versions = ">= 3.7" +files = [ + {file = "botocore-1.29.100-py3-none-any.whl", hash = "sha256:d5c4c5bbbbf0ec62a4235ccac1b9bbb579558f7bb3231d7fb6054e1f64d3a623"}, + {file = "botocore-1.29.100.tar.gz", hash = "sha256:ff6585df3dcef2057be5e54b45d254608d3769d726ea4ccd4e17f77825e5b13d"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = ">=1.25.4,<1.27" + +[package.extras] +crt = ["awscrt (==0.16.9)"] + +[[package]] +name = "click" +version = "7.1.2" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, + {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"}, +] + +[[package]] +name = "delta-spark" +version = "2.1.0" +description = "Python APIs for using Delta Lake with Apache Spark" +optional = false +python-versions = ">=3.6" +files = [ + {file = "delta-spark-2.1.0.tar.gz", hash = "sha256:7f6d55d2344a39ad3ab3a0c08fb947a24a1a80d8bfa70318e66717d49ba8e8ce"}, + {file = "delta_spark-2.1.0-py3-none-any.whl", hash = "sha256:f3586cfddc871ffc7f12923ff4a4d601fc5efbf7b550e60388b22ede7f492fa1"}, +] + +[package.dependencies] +importlib-metadata = ">=1.0.0" +pyspark = ">=3.3.0,<3.4.0" + +[[package]] +name = "emr-cli" +version = "0.0.16" +description = "A command-line interface for packaging, deploying, and running your EMR Serverless Spark jobs." +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "emr_cli-0.0.16-py3-none-any.whl", hash = "sha256:138b4e3dcd23c385db01299c082a2513120c18f2a8f71b1bb5153fb075af94c2"}, + {file = "emr_cli-0.0.16.tar.gz", hash = "sha256:08fe66fb0fe7858fd2d361a690a6a08342dbd5b2dc817dcc09ddb5bf43ee30c8"}, +] + +[package.dependencies] +boto3 = ">=1.26.6,<2.0.0" +click = ">=7.1.2,<8.0.0" +importlib-metadata = {version = "6.7.0", markers = "python_version == \"3.7\""} +pyyaml = "5.3.1" +rich = ">=13.4.2,<14.0.0" + +[[package]] +name = "importlib-metadata" +version = "6.7.0" +description = "Read metadata from Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "importlib_metadata-6.7.0-py3-none-any.whl", hash = "sha256:cb52082e659e97afc5dac71e79de97d8681de3aa07ff18578330904a9d18e5b5"}, + {file = "importlib_metadata-6.7.0.tar.gz", hash = "sha256:1aaf550d4f73e5d6783e7acb77aec43d49da8017410afae93822cc9cca98c4d4"}, +] + +[package.dependencies] +typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] + +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + +[[package]] +name = "markdown-it-py" +version = "2.2.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.7" +files = [ + {file = "markdown-it-py-2.2.0.tar.gz", hash = "sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1"}, + {file = "markdown_it_py-2.2.0-py3-none-any.whl", hash = "sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" +typing_extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""} + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["attrs", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + +[[package]] +name = "py4j" +version = "0.10.9.5" +description = "Enables Python programs to dynamically access arbitrary Java objects" +optional = false +python-versions = "*" +files = [ + {file = "py4j-0.10.9.5-py2.py3-none-any.whl", hash = "sha256:52d171a6a2b031d8a5d1de6efe451cf4f5baff1a2819aabc3741c8406539ba04"}, + {file = "py4j-0.10.9.5.tar.gz", hash = "sha256:276a4a3c5a2154df1860ef3303a927460e02e97b047dc0a47c1c3fb8cce34db6"}, +] + +[[package]] +name = "pygments" +version = "2.17.2" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pygments-2.17.2-py3-none-any.whl", hash = "sha256:b27c2826c47d0f3219f29554824c30c5e8945175d888647acd804ddd04af846c"}, + {file = "pygments-2.17.2.tar.gz", hash = "sha256:da46cec9fd2de5be3a8a784f434e4c4ab670b4ff54d605c4c2717e9d49c4c367"}, +] + +[package.extras] +plugins = ["importlib-metadata"] +windows-terminal = ["colorama (>=0.4.6)"] + +[[package]] +name = "pyspark" +version = "3.3.0" +description = "Apache Spark Python API" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pyspark-3.3.0.tar.gz", hash = "sha256:7ebe8e9505647b4d124d5a82fca60dfd3891021cf8ad6c5ec88777eeece92cf7"}, +] + +[package.dependencies] +py4j = "0.10.9.5" + +[package.extras] +ml = ["numpy (>=1.15)"] +mllib = ["numpy (>=1.15)"] +pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] +sql = ["pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pyyaml" +version = "5.3.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = "*" +files = [ + {file = "PyYAML-5.3.1-cp27-cp27m-win32.whl", hash = "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f"}, + {file = "PyYAML-5.3.1-cp27-cp27m-win_amd64.whl", hash = "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76"}, + {file = "PyYAML-5.3.1-cp35-cp35m-win32.whl", hash = "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2"}, + {file = "PyYAML-5.3.1-cp35-cp35m-win_amd64.whl", hash = "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c"}, + {file = "PyYAML-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2"}, + {file = "PyYAML-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648"}, + {file = "PyYAML-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"}, + {file = "PyYAML-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf"}, + {file = "PyYAML-5.3.1-cp38-cp38-win32.whl", hash = "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97"}, + {file = "PyYAML-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee"}, + {file = "PyYAML-5.3.1-cp39-cp39-win32.whl", hash = "sha256:ad9c67312c84def58f3c04504727ca879cb0013b2517c85a9a253f0cb6380c0a"}, + {file = "PyYAML-5.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:6034f55dab5fea9e53f436aa68fa3ace2634918e8b5994d82f3621c04ff5ed2e"}, + {file = "PyYAML-5.3.1.tar.gz", hash = "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d"}, +] + +[[package]] +name = "rich" +version = "13.7.0" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.7.0-py3-none-any.whl", hash = "sha256:6da14c108c4866ee9520bbffa71f6fe3962e193b7da68720583850cd4548e235"}, + {file = "rich-13.7.0.tar.gz", hash = "sha256:5cb5123b5cf9ee70584244246816e9114227e0b98ad9176eede6ad54bf5403fa"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" +typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""} + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + +[[package]] +name = "s3transfer" +version = "0.6.0" +description = "An Amazon S3 Transfer Manager" +optional = false +python-versions = ">= 3.7" +files = [ + {file = "s3transfer-0.6.0-py3-none-any.whl", hash = "sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd"}, + {file = "s3transfer-0.6.0.tar.gz", hash = "sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947"}, +] + +[package.dependencies] +botocore = ">=1.12.36,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "typing-extensions" +version = "4.5.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, + {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, +] + +[[package]] +name = "urllib3" +version = "1.26.15" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "urllib3-1.26.15-py2.py3-none-any.whl", hash = "sha256:aa751d169e23c7479ce47a0cb0da579e3ede798f994f5816a74e4f4500dcea42"}, + {file = "urllib3-1.26.15.tar.gz", hash = "sha256:8a388717b9476f934a21484e8c8e61875ab60644d29b9b39e11e4b9dc1c6b305"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "zipp" +version = "3.15.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.7" +files = [ + {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, + {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[metadata] +lock-version = "2.0" +python-versions = "^3.7" +content-hash = "8a38311a067b6e47739a3414e9d490e244b7b573970d9c4eb8dd6b3055ac3962" diff --git a/examples/pyspark/delta-lake/pyproject.toml b/examples/pyspark/delta-lake/pyproject.toml new file mode 100644 index 0000000..441a60c --- /dev/null +++ b/examples/pyspark/delta-lake/pyproject.toml @@ -0,0 +1,20 @@ +[tool.poetry] +name = "delta-lake" +version = "0.1.0" +description = "" +authors = ["Your Name "] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.7" +boto3 = "^1.26.100" +delta-spark = "2.1.0" +emr-cli = ">=0.0.16,<0.1.0" + +[tool.poetry.group.dev.dependencies] +pyspark = "3.3.0" +emr-cli = ">=0.0.16,<0.1.0" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api"