diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 449c68e6..2880be5d 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,4 +1,3 @@ - version: 2 updates: - package-ecosystem: pip diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 930ccfa6..ab380e26 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -2,15 +2,15 @@ name: Tests on: push: - branches: [ "main" ] + branches: ["main"] paths-ignore: - - 'docs/**' + - "docs/**" pull_request: - branches: [ "main" ] + branches: ["main"] paths-ignore: - - 'docs/**' + - "docs/**" schedule: - - cron: '0 4 * * *' # run once a day at 4 AM + - cron: "0 4 * * *" # run once a day at 4 AM env: PYTEST_ADDOPTS: "--color=yes" @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11","3.12"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 40ff0022..032e0fe0 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,4 +1,3 @@ - name: Release Python Package on: @@ -13,7 +12,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.x' + python-version: "3.x" - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/test-integration.yaml b/.github/workflows/test-integration.yaml index 791687f7..cc3d8757 100644 --- a/.github/workflows/test-integration.yaml +++ b/.github/workflows/test-integration.yaml @@ -2,10 +2,10 @@ name: Integration tests on: push: - branches: [ "main" ] + branches: ["main"] pull_request: - branches: [ "main" ] - types: [ opened, reopened, synchronize, labeled ] + branches: ["main"] + types: [opened, reopened, synchronize, labeled] env: PYTEST_ADDOPTS: "--color=yes" @@ -29,12 +29,11 @@ jobs: # https://github.com/pangeo-forge/pangeo-forge-recipes/issues/540#issuecomment-1685096271 # Once https://github.com/pangeo-forge/pangeo-forge-runner/pull/90 goes in, we can add back # integration testing for 3.10 and 3.11 (for runner versions that follow that PR). - python-version: ["3.9"] # , "3.10", "3.11"] - runner-version: [ - "pangeo-forge-runner==0.9.1", - "pangeo-forge-runner==0.9.2", - "pangeo-forge-runner==0.9.3", - ] + python-version: ["3.9"] # , "3.10", "3.11"] + runner-version: + - "pangeo-forge-runner==0.9.1" + - "pangeo-forge-runner==0.9.2" + - "pangeo-forge-runner==0.9.3" steps: - uses: actions/checkout@v4 - name: 🔁 Setup Python @@ -45,7 +44,6 @@ jobs: cache: pip cache-dependency-path: pyproject.toml - - name: Install pangeo-forge recipes and runner shell: bash -l {0} run: | @@ -57,7 +55,7 @@ jobs: run: | python -m pip install ecmwflibs eccodes cfgrib - - name: 'Setup minio' + - name: "Setup minio" run: | wget --quiet https://dl.min.io/server/minio/release/linux-amd64/minio chmod +x minio diff --git a/.gitignore b/.gitignore index dd1a2d32..e2e8ef6e 100644 --- a/.gitignore +++ b/.gitignore @@ -136,3 +136,6 @@ dask-worker-space # vscode .vscode/ + +# linter caches +.prettier_cache diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8b4a5ad6..1a9d2271 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,47 +1,52 @@ +ci: + autofix_prs: true + autoupdate_schedule: "monthly" + repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-docstring-first -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 24.10.0 hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-docstring-first - - id: check-json - - id: check-yaml - - id: pretty-format-json - args: ["--autofix", "--indent=2", "--no-sort-keys"] - exclude: "docs/" + - id: black -- repo: https://github.com/psf/black - rev: 24.4.2 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.0 hooks: - - id: black - args: ["--line-length", "100"] + - id: ruff + args: ["--fix", "--show-fixes"] -- repo: https://github.com/PyCQA/flake8 - rev: 7.1.0 + - repo: https://github.com/pre-commit/mirrors-mypy + rev: "v1.10.0" hooks: - - id: flake8 - exclude: pangeo_forge_recipes/recipes + - id: mypy + exclude: tests,pangeo_forge_recipes/recipes -- repo: https://github.com/asottile/seed-isort-config - rev: v2.2.0 + - repo: https://github.com/rstcheck/rstcheck + rev: v6.2.4 hooks: - - id: seed-isort-config + - id: rstcheck -- repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.10.0' + - repo: https://github.com/rbubley/mirrors-prettier + rev: v3.3.3 hooks: - - id: mypy - exclude: tests,pangeo_forge_recipes/recipes + - id: prettier + args: ["--cache-location=.prettier_cache/cache"] -- repo: https://github.com/pycqa/isort - rev: 5.13.2 + - repo: https://github.com/ComPWA/taplo-pre-commit + rev: v0.9.3 hooks: - - id: isort - args: ["--profile", "black"] + - id: taplo-format + args: ["--option", "array_auto_collapse=false"] + - id: taplo-lint + args: ["--no-schema"] -- repo: https://github.com/rstcheck/rstcheck - rev: v6.2.0 + - repo: https://github.com/abravalheri/validate-pyproject + rev: v0.23 hooks: - - id: rstcheck + - id: validate-pyproject diff --git a/README.md b/README.md index a1bdec9f..a52cda6b 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![NSF Award 2026932](https://img.shields.io/badge/NSF-2026932-blue)](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2026932&HistoricalAwards=false) -__pangeo-forge__ is an open-source tool designed to aid the extraction, transformation, and loading of datasets. The goal of pangeo-forge is to make it easy to extract datasets from traditional data repositories and deposit them into cloud object storage in analysis-ready, cloud-optimized format. +**pangeo-forge** is an open-source tool designed to aid the extraction, transformation, and loading of datasets. The goal of pangeo-forge is to make it easy to extract datasets from traditional data repositories and deposit them into cloud object storage in analysis-ready, cloud-optimized format. pangeo-forge is inspired by [conda-forge](https://conda-forge.org/), a community-led collection of recipes for building [Conda](https://docs.conda.io/en/latest/) packages. We hope that pangeo-forge can play the same role for datasets. diff --git a/docs/advanced/globus.md b/docs/advanced/globus.md index 28b5874f..45c85a41 100644 --- a/docs/advanced/globus.md +++ b/docs/advanced/globus.md @@ -12,6 +12,7 @@ following workaround. To ingest files from Globus with Pangeo Forge, you must create a _fully public Guest Collection and access the files via HTTPS_. The specific steps are as follows: + - Verify that the Globus endpoint you want to transfer from uses **Globus Connect Server V5**. This method _will not work_ with earlier Globus versions (e.g. V4). diff --git a/docs/api_reference.md b/docs/api_reference.md index a6797cc2..e77e05f1 100644 --- a/docs/api_reference.md +++ b/docs/api_reference.md @@ -2,7 +2,6 @@ ## File Patterns - ```{eval-rst} .. autoclass:: pangeo_forge_recipes.patterns.FilePattern :members: @@ -16,7 +15,6 @@ :members: ``` - ```{eval-rst} .. autoclass:: pangeo_forge_recipes.patterns.MergeDim :members: @@ -49,7 +47,6 @@ :members: ``` - ## Processing Functions The [Beam PTransform Style Guide](https://beam.apache.org/contribute/ptransform-style-guide/) recommends: @@ -81,7 +78,6 @@ The [Beam PTransform Style Guide](https://beam.apache.org/contribute/ptransform- ## Combiners - ```{eval-rst} .. automodule:: pangeo_forge_recipes.combiners :members: diff --git a/docs/composition/examples/gpcp-from-gcs.md b/docs/composition/examples/gpcp-from-gcs.md index a6a3df71..7ad5ae42 100644 --- a/docs/composition/examples/gpcp-from-gcs.md +++ b/docs/composition/examples/gpcp-from-gcs.md @@ -5,4 +5,5 @@ This recipe is representative of the [](../styles.md#open-with-xarray-write-to-z ``` ```{literalinclude} ../../../examples/feedstock/gpcp_from_gcs.py + ``` diff --git a/docs/composition/examples/gpcp-rechunk.md b/docs/composition/examples/gpcp-rechunk.md index 87cbc548..75f3b10e 100644 --- a/docs/composition/examples/gpcp-rechunk.md +++ b/docs/composition/examples/gpcp-rechunk.md @@ -1,5 +1,5 @@ # GPCP Rechunk - ```{literalinclude} ../../../examples/feedstock/gpcp_rechunk.py + ``` diff --git a/docs/composition/examples/noaa-oisst.md b/docs/composition/examples/noaa-oisst.md index 798bfcb0..a25e36cb 100644 --- a/docs/composition/examples/noaa-oisst.md +++ b/docs/composition/examples/noaa-oisst.md @@ -5,4 +5,5 @@ This recipe is representative of the [](../styles.md#open-with-xarray-write-to-z ``` ```{literalinclude} ../../../examples/feedstock/noaa_oisst.py + ``` diff --git a/docs/composition/file_patterns.md b/docs/composition/file_patterns.md index 0b0da4f7..24e74e9b 100644 --- a/docs/composition/file_patterns.md +++ b/docs/composition/file_patterns.md @@ -18,8 +18,8 @@ inputs (or "ingredients") upon which the recipe will act. File patterns describe - Where individual source files are located; and - How they should be organized logically as part of an aggregate dataset. -(In this respect, file patterns are conceptually similar to -[NcML](https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_aggregation.html) documents.) + (In this respect, file patterns are conceptually similar to + [NcML](https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_aggregation.html) documents.) ```{note} API Reference is available here: {class}`pangeo_forge_recipes.patterns.FilePattern` @@ -65,6 +65,7 @@ http://data-provider.org/data/humidity/humidity_10.txt ``` This is a relatively common way to organize data files: + - There are two different "variables" (temperature and humidity), stored in separate files. - There is a sequence of 10 files for each variable. We will assume that this represents the "time" axis of the data. @@ -124,6 +125,7 @@ These are the same as the names used in our [](#combine-dimensions). We now need to define the "combine dimensions" of the file pattern. Combine dimensions are one of two types: + - {class}`pangeo_forge_recipes.patterns.ConcatDim`: The files should be combined by concatenating _the same variables_ sequentially along an axis. This is conceptually similar to Xarray's [concat](http://xarray.pydata.org/en/stable/combining.html#concatenate) @@ -134,14 +136,14 @@ Combine dimensions are one of two types: operation. File patterns permit us to combine multiple combine dims into a single pattern. -For the present example, we have one ``MergeDim``: +For the present example, we have one `MergeDim`: ```{code-cell} ipython3 from pangeo_forge_recipes.patterns import MergeDim variable_merge_dim = MergeDim("variable", ["temperature", "humidity"]) ``` -...and one ``ConcatDim``: +...and one `ConcatDim`: ```{code-cell} ipython3 from pangeo_forge_recipes.patterns import ConcatDim @@ -177,7 +179,7 @@ pattern To see the full code in one place, please refer back to [](#sneak-peek-the-full-code). -### Create a `FilePattern` from a list of files +### Create a `FilePattern` from a list of files Alternatively, we can also use the convenience function {meth}`pangeo_forge_recipes.patterns.pattern_from_file_sequence` to create a file pattern from a list of files. @@ -193,14 +195,13 @@ file_list = [ pattern = pattern_from_file_sequence(file_list, concat_dim="time") ``` - ## Inspect a `FilePattern` We can inspect file patterns manually to understand how they work. This is not necessary to create a recipe; however digging into a `FilePattern`'s internals may be helpful in debugging a complex recipe. Internally, the file pattern maps the keys of the [](#combine-dimensions) to logical indices. We can see all of these keys by iterating over -the patterns using the ``items()`` method: +the patterns using the `items()` method: ```{code-cell} ipython3 for index, fname in pattern.items(): @@ -227,7 +228,7 @@ As covered in {doc}`index`, a recipe is composed of a sequence of Apache Beam tr The data Apache Beam transforms operate on are [`PCollections`](https://beam.apache.org/documentation/programming-guide/#pcollections). Therefore, we bring the contents of a `FilePattern` into a recipe, we pass the index:url -pairs generated by the file pattern's ``items()`` method into Beam's `Create` constructor +pairs generated by the file pattern's `items()` method into Beam's `Create` constructor as follows: ```{code-cell} ipython3 diff --git a/docs/composition/styles.md b/docs/composition/styles.md index 1be55ce1..3fccd12a 100644 --- a/docs/composition/styles.md +++ b/docs/composition/styles.md @@ -21,20 +21,17 @@ Below we give a very basic overview of how this recipe is used. First you must define a {doc}`file pattern `. Once you have a {class}`FilePattern ` object, the recipe pipeline will contain at a minimum the following transforms applied to the file pattern collection: -* {class}`pangeo_forge_recipes.transforms.OpenURLWithFSSpec`: retrieves each pattern file using the specified URLs. -* {class}`pangeo_forge_recipes.transforms.OpenWithXarray`: load each pattern file into an [`xarray.Dataset`](https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html). -* {class}`pangeo_forge_recipes.transforms.StoreToZarr`: generate a Zarr store by combining the datasets. -* {class}`pangeo_forge_recipes.transforms.ConsolidateDimensionCoordinates`: consolidate the Dimension Coordinates for dataset read performance. -* {class}`pangeo_forge_recipes.transforms.ConsolidateMetadata`: calls Zarr's convinience function to consolidate metadata. - -### Open existing Zarr Store -* {class}`pangeo_forge_recipes.transforms.OpenWithXarray` supports opening existing Zarr stores. This might be useful for rechunking a Zarr store into an alternative chunking scheme. -An example of this recipe can be found in - {doc}`examples/gpcp-rechunk` - - +- {class}`pangeo_forge_recipes.transforms.OpenURLWithFSSpec`: retrieves each pattern file using the specified URLs. +- {class}`pangeo_forge_recipes.transforms.OpenWithXarray`: load each pattern file into an [`xarray.Dataset`](https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html). +- {class}`pangeo_forge_recipes.transforms.StoreToZarr`: generate a Zarr store by combining the datasets. +- {class}`pangeo_forge_recipes.transforms.ConsolidateDimensionCoordinates`: consolidate the Dimension Coordinates for dataset read performance. +- {class}`pangeo_forge_recipes.transforms.ConsolidateMetadata`: calls Zarr's convinience function to consolidate metadata. +### Open existing Zarr Store +- {class}`pangeo_forge_recipes.transforms.OpenWithXarray` supports opening existing Zarr stores. This might be useful for rechunking a Zarr store into an alternative chunking scheme. + An example of this recipe can be found in - {doc}`examples/gpcp-rechunk` ```{tip} If using the {class}`pangeo_forge_recipes.transforms.ConsolidateDimensionCoordinates` transform, make sure to chain on the {class}`pangeo_forge_recipes.transforms.ConsolidateMetadata` transform to your recipe. @@ -56,7 +53,6 @@ selecting this option, it is therefore up to you, the user, to ensure that the i append. ``` - ## Open with Kerchunk, write to virtual Zarr The standard Zarr recipe creates a copy of the original dataset in the Zarr format, this diff --git a/docs/composition/transforms.md b/docs/composition/transforms.md index 12085570..41c8573a 100644 --- a/docs/composition/transforms.md +++ b/docs/composition/transforms.md @@ -12,11 +12,11 @@ kernelspec: Once you have a {doc}`file pattern ` for your source data, it's time to define a set of transforms to apply to the data, which may include: - - Standard transforms from Apache Beam's - [Python transform catalog](https://beam.apache.org/documentation/transforms/python/overview/) - - `pangeo-forge-recipes` core transforms, such as [](#openers) and [](#writers) - - Third-party extensions from the Pangeo Forge {doc}`../ecosystem` - - Your own transforms, such as custom [](#preprocessors) +- Standard transforms from Apache Beam's + [Python transform catalog](https://beam.apache.org/documentation/transforms/python/overview/) +- `pangeo-forge-recipes` core transforms, such as [](#openers) and [](#writers) +- Third-party extensions from the Pangeo Forge {doc}`../ecosystem` +- Your own transforms, such as custom [](#preprocessors) ```{hint} Please refer to the [](./index.md#generic-sequence) and [](./styles.md) for discussion of @@ -40,7 +40,7 @@ Once you've created a {doc}`file pattern ` for your source data, you'll need to open it somehow. Pangeo Forge currently provides the following openers: - {class}`pangeo_forge_recipes.transforms.OpenURLWithFSSpec` - - ⚙️ `cache` - Deploy-time configurable keyword argument + - ⚙️ `cache` - Deploy-time configurable keyword argument - {class}`pangeo_forge_recipes.transforms.OpenWithXarray` - {class}`pangeo_forge_recipes.transforms.OpenWithKerchunk` @@ -57,9 +57,9 @@ for this purpose and included in your recipe. ## Writers - {class}`pangeo_forge_recipes.transforms.StoreToZarr` - - ⚙️ `target_root` - Deploy-time configurable keyword argument + - ⚙️ `target_root` - Deploy-time configurable keyword argument - {class}`pangeo_forge_recipes.transforms.WriteCombinedReference` - - ⚙️ `target_root` - Deploy-time configurable keyword argument + - ⚙️ `target_root` - Deploy-time configurable keyword argument ## What's next diff --git a/docs/contributing.md b/docs/contributing.md index f6b21eee..f3a63ab9 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -7,11 +7,11 @@ Reporting bugs and requesting new features via GitHub Issues is a very valuable To open a new issue, head over to the relevant issue page: - : -For anything related to {doc}`composition/index`. + For anything related to {doc}`composition/index`. - : -For the {doc}`deployment/cli`. + For the {doc}`deployment/cli`. - : -For the {doc}`deployment/action`. + For the {doc}`deployment/action`. ## Dev environment setup @@ -39,7 +39,6 @@ Finally, create a new git remote pointing to the upstream repo: git remote add upstream git@github.com:pangeo-forge/pangeo-forge-recipes.git ``` - ### Create a virtual environment We strongly recommend creating an isolated virtual environment, diff --git a/docs/deployment/cli.md b/docs/deployment/cli.md index 7dcedb5a..cf3000c0 100644 --- a/docs/deployment/cli.md +++ b/docs/deployment/cli.md @@ -50,13 +50,14 @@ A recipe in your {doc}`feedstock ` can be deployed with the followin language: bash --- ``` + Where the variables have the following values assigned to them: - `REPO`: Path to the feedstock repo. This can be a -local path or a URL to a GitHub repo. + local path or a URL to a GitHub repo. - `CONFIG_FILE`: Local path to the deployment [](#configuration-file). - `RECIPE_ID`: The `id` of the recipe you'd like to run as it appears -in your feedstock's [](./feedstocks.md#metayaml). + in your feedstock's [](./feedstocks.md#metayaml). - `JOB_NAME`: A unique name for this deployment. ```{tip} diff --git a/docs/deployment/index.md b/docs/deployment/index.md index 9a4d09b0..c363f2ac 100644 --- a/docs/deployment/index.md +++ b/docs/deployment/index.md @@ -28,15 +28,15 @@ Users are welcome to use this native Beam deployment approach for their recipes Apache Beam (and therefore, Pangeo Forge) supports flexible deployment via "runners", which include: -* [DirectRunner](https://beam.apache.org/documentation/runners/direct/): -Useful for testing during recipe development and, in multithreaded mode, for certain production workloads. -(Note that Apache Beam does _not_ recommend this runner for production.) -* [FlinkRunner](https://beam.apache.org/documentation/runners/flink/): -Executes pipelines using [Apache Flink](https://flink.apache.org/). -* [DataflowRunner](https://beam.apache.org/documentation/runners/dataflow/): -Uses the [Google Cloud Dataflow managed service](https://cloud.google.com/dataflow/service/dataflow-service-desc). -* [DaskRunner](https://beam.apache.org/releases/pydoc/current/apache_beam.runners.dask.dask_runner.html): -Executes pipelines via [Dask.distributed](https://distributed.dask.org/en/stable/). +- [DirectRunner](https://beam.apache.org/documentation/runners/direct/): + Useful for testing during recipe development and, in multithreaded mode, for certain production workloads. + (Note that Apache Beam does _not_ recommend this runner for production.) +- [FlinkRunner](https://beam.apache.org/documentation/runners/flink/): + Executes pipelines using [Apache Flink](https://flink.apache.org/). +- [DataflowRunner](https://beam.apache.org/documentation/runners/dataflow/): + Uses the [Google Cloud Dataflow managed service](https://cloud.google.com/dataflow/service/dataflow-service-desc). +- [DaskRunner](https://beam.apache.org/releases/pydoc/current/apache_beam.runners.dask.dask_runner.html): + Executes pipelines via [Dask.distributed](https://distributed.dask.org/en/stable/). When deploying with the CLI, the runner is specified via a [](cli.md#configuration-file). diff --git a/docs/index.md b/docs/index.md index ae4df6f8..5b81c13d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -14,8 +14,8 @@ Pangeo Forge is made of three official components: - `pangeo-forge-runner` **Command Line Interface (CLI)** - A utility for managing the configuration and deployment of version-controlled recipes. See {doc}`deployment/cli` for details. - **Deploy Recipe Action** - A Github Action that wraps the CLI, providing an interface -for configuration and deployment of recipes in response to to GitHub Event triggers. -See {doc}`deployment/action` for details. + for configuration and deployment of recipes in response to to GitHub Event triggers. + See {doc}`deployment/action` for details. A growing {doc}`ecosystem` of third-party extensions provide additional reusable components for customization of recipes. @@ -56,7 +56,6 @@ transparency remain consistent over time, implementation details and project sco since publication. ``` - ## Frequently Asked Questions ### Is Pangeo Forge the right tool for my dataset? @@ -90,13 +89,12 @@ Check out {doc}`contributing` for how to get started! Pangeo Forge is a community run effort with a variety of roles: - **Recipe developers** — Data engineers and enthusiasts who write recipes to define the data conversions. -This can be anyone with a desire to create analysis ready cloud-optimized (ARCO) data. Explore -{doc}`composition/index` for more on this role. + This can be anyone with a desire to create analysis ready cloud-optimized (ARCO) data. Explore + {doc}`composition/index` for more on this role. - **Data users** - Analysts, scientists, and domain experts who use the ARCO data produced by Pangeo Forge -in their work and research. + in their work and research. - **Tooling developers** - Scientists and software developers who maintain and enhance the -open-source code base which makes Pangeo Forge run. See {doc}`contributing` for more. - + open-source code base which makes Pangeo Forge run. See {doc}`contributing` for more. ## Site Contents diff --git a/pyproject.toml b/pyproject.toml index 301fbbf9..b6536077 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,52 +9,52 @@ requires-python = ">=3.8" description = "Pipeline tools for building and publishing analysis ready datasets." readme = "README.md" authors = [ - { name = "Ryan Abernathey", email = "rpa@ldeo.columbia.edu" } + { name = "Ryan Abernathey", email = "rpa@ldeo.columbia.edu" }, ] classifiers = [ - "Development Status :: 1 - Planning", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Intended Audience :: Science/Research", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Topic :: Scientific/Engineering", + "Development Status :: 1 - Planning", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Topic :: Scientific/Engineering", ] license = { text = "Apache-2.0" } keywords = ["pangeo", "data"] dependencies = [ - "apache-beam>=2.48", - "cftime", - "dask", - "fastparquet", - "fsspec[http]", - "h5netcdf", - "h5py", - "kerchunk!=0.2.6", - "netcdf4", - "numcodecs", - "xarray", - "zarr", + "apache-beam>=2.48", + "cftime", + "dask", + "fastparquet", + "fsspec[http]", + "h5netcdf", + "h5py", + "kerchunk!=0.2.6", + "netcdf4", + "numcodecs", + "xarray", + "zarr", ] [project.optional-dependencies] test = [ - "click", - "pytest<8.0.0", - "pytest-cov", - "pytest-xdist", - "pytest-lazy-fixture", - "pytest-sugar", - "pytest-timeout", - "s3fs", - "gcsfs", - "scipy", + "click", + "pytest<8.0.0", + "pytest-cov", + "pytest-xdist", + "pytest-lazy-fixture", + "pytest-sugar", + "pytest-timeout", + "s3fs", + "gcsfs", + "scipy", ] minio = [ - "docker", + "docker", ] [project.urls] @@ -75,14 +75,40 @@ pangeo_forge_recipes = ["py.typed"] [tool.black] line-length = 100 -[tool.isort] -known_first_party = "pangeo_forge_recipes" -known_third_party = ["aiohttp", "apache_beam", "cftime", "click", "dask", "fsspec", "gcsfs", "kerchunk", "numpy", "packaging", "pandas", "pytest", "pytest_lazyfixture", "s3fs", "xarray", "zarr"] -multi_line_output = 3 -include_trailing_comma = true -force_grid_wrap = 0 -combine_as_imports = true -line_length = 100 +[tool.ruff] +target-version = "py310" +builtins = ["ellipsis"] +exclude = [".git", ".eggs", "build", "dist", "__pycache__"] +line-length = 100 + +[tool.ruff.lint] +ignore = [ + "E402", # module level import not at top of file + "E501", # line too long - let black worry about that + "E731", # do not assign a lambda expression, use a def + "UP038", # type union instead of tuple for isinstance etc +] +per-file-ignores = { "examples/runner-config/local.py" = ["F821"] } +select = [ + "F", # Pyflakes + "E", # Pycodestyle + "I", # isort + # "UP", # Pyupgrade + # "TID", # flake8-tidy-imports + "W", +] +# extend-safe-fixes = [ +# "TID252", # absolute imports +# ] +fixable = ["I"] + +[tool.ruff.lint.isort] +known-first-party = ["pangeo_forge_recipes"] +combine-as-imports = true + +[tool.ruff.lint.flake8-tidy-imports] +# Disallow all relative imports. +ban-relative-imports = "all" [tool.pytest.ini_options] log_cli = false diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 1cb951fc..00000000 --- a/setup.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Everything here should be moved to pyproject.toml or similar - -[flake8] -max-line-length = 100 -per-file-ignores = - examples/runner-config/local.py:F821