Skip to content

Commit

Permalink
Release version 1.5.2, Merge pull request #262 from sentinel-hub/develop
Browse files Browse the repository at this point in the history
Release version 1.5.2
  • Loading branch information
zigaLuksic authored Aug 16, 2023
2 parents a66e96b + 2a0664e commit b24404d
Show file tree
Hide file tree
Showing 108 changed files with 2,404 additions and 1,880 deletions.
9 changes: 0 additions & 9 deletions .flake8

This file was deleted.

14 changes: 9 additions & 5 deletions .github/workflows/ci_action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ on:
branches:
- "main"
- "develop"
schedule:
- cron: "0 0 * * *"
workflow_call:

concurrency:
# This will cancel outdated runs on the same pull-request, but not runs for other triggers
Expand Down Expand Up @@ -63,6 +62,7 @@ jobs:
python-version:
- "3.8"
- "3.10"
- "3.11"
include:
# A flag marks whether full or partial tests should be run
# We don't run integration tests on pull requests from outside repos, because they don't have secrets
Expand All @@ -84,11 +84,11 @@ jobs:
run: |
sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable -y
sudo apt-get update
sudo apt-get install -y build-essential gdal-bin libgdal-dev graphviz proj-bin gcc libproj-dev libspatialindex-dev
sudo apt-get install -y build-essential gdal-bin libgdal-dev graphviz proj-bin gcc libproj-dev libspatialindex-dev libboost-all-dev
export CPLUS_INCLUDE_PATH=/usr/include/gdal
export C_INCLUDE_PATH=/usr/include/gdal
pip install -e .[DEV,ML]
pip install gdal==$(gdal-config --version | awk -F'[.]' '{print $1"."$2}')
pip install gdal==$(gdal-config --version)
- name: Run fast tests
if: ${{ !matrix.full_test_suite }}
Expand All @@ -100,7 +100,11 @@ jobs:
sentinelhub.config \
--sh_client_id "${{ secrets.SH_CLIENT_ID }}" \
--sh_client_secret "${{ secrets.SH_CLIENT_SECRET }}"
pytest -m "chain or not integration" --cov --cov-report=term --cov-report=xml
if [ ${{ github.event_name }} == 'push' ]; then
pytest -m "chain or not integration" --cov --cov-report=term --cov-report=xml
else
pytest -m "chain or not integration"
fi
- name: Upload code coverage
if: ${{ matrix.full_test_suite && github.event_name == 'push' }}
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/scheduler_caller.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: scheduled build caller

on:
schedule:
# Schedule events are triggered by whoever last changed the cron schedule
- cron: "5 0 * * *"

jobs:
call-workflow:
uses: sentinel-hub/eo-grow/.github/workflows/ci_action.yml@develop
secrets: inherit
35 changes: 6 additions & 29 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,48 +13,25 @@ repos:
- id: debug-statements

- repo: https://github.com/pre-commit/mirrors-prettier
rev: "v3.0.0-alpha.6"
rev: "v3.0.1"
hooks:
- id: prettier
exclude: "tests/(test_stats|test_project)/"
types_or: [json]

- repo: https://github.com/psf/black
rev: 23.3.0
rev: 23.7.0
hooks:
- id: black
language_version: python3

- repo: https://github.com/pycqa/isort
rev: 5.12.0
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.0.282"
hooks:
- id: isort
name: isort (python)

- repo: https://github.com/PyCQA/autoflake
rev: v2.0.2
hooks:
- id: autoflake
args:
[
--remove-all-unused-imports,
--in-place,
--ignore-init-module-imports,
]

- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
additional_dependencies:
- flake8-bugbear==23.2.13
- flake8-comprehensions==3.10.1
- flake8-simplify==0.19.3
- flake8-typing-imports==1.14.0
- id: ruff

- repo: https://github.com/nbQA-dev/nbQA
rev: 1.7.0
hooks:
- id: nbqa-black
- id: nbqa-isort
- id: nbqa-flake8
- id: nbqa-ruff
155 changes: 155 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
## [Version 1.5.2] - 2023-08-16

- Pipelines using a Ray cluster now add the cluster configuration file to the logs folder.
- The CLI command `eogrow-ray` no longer supports `--screen` and `--stop` commands.
- Changelog now also stored in the `CHANGELOG.md` file.
- Improved test-data generating pipeline.
- Switched from `flake8` and `isort` to `ruff`.
- Various minor improvements.

## [Version 1.5.1] - 2023-05-03

- Fix bug in `LoggingManager.Schema` where `Tuple[str]` was used instead of `Tuple[str, ...]` for certain fields, preventing parsing of correct configurations.


## [Version 1.5.0] - 2023-04-25

- (**code-breaking**) Simplified `RasterizePipeline` and improve rasterization of temporal vectors.
- (**code-breaking**) Area managers no longer offer AOI modification in the `area` parameter. It has been replaced with a simpler `filename` field. We added a rerouting parser, so old configs should work for a while longer.
- (**code-breaking**) Separated machine learning requirements to `ML` extra that you can install via `pip install eogrow[ML]`. These packages are only necessary for sampling, training, and prediction pipelines.
- Added `VectorImportPipeline` for adding vector features to EOPatches.
- Improved `ExportMapsPipeline` when working with large amounts of files, contributed by @aashishd.
- Config files are now uploaded to the cluster before being executed. This prevents issues with commands failing on very large configs.
- Added `restrict_types` validator that detects incompatible `FeatureType` inputs for fields of type `Feature`.
- Added `ensure_storage_key_presence` validator, which checks that the specified storage key is defined in the storage manager. Typos in storage keys will now be detected at validation.
- Storage managers now support a `filesystem_kwargs` parameter.
- Fixed bug where area managers would not filter the grid correctly if the grid was read from the cache.
- Logs to stdout are now colored and contain timestamps.
- Logging configs can now use `"..."` value to reference default packages for fields such as `pipeline_ignore_packages`.
- Pipelines can now be given custom names, which makes it easier to identify certain pipelines when searching for logs or when running them in config chains.
- Switched to a `pyproject.toml` based installation.
- Added new sections to documentation of the high level overview and a collection of commonly used patterns.
- Improved testing tools.
- Various minor improvements.


## [Version 1.4.0] - 2023-01-26

- (**code-breaking**) Large changes to area managers. See PR https://github.com/sentinel-hub/eo-grow/pull/168
* EOPatch manager functionality was merged to area managers. EOPatch managers were removed.
* Changes to area manager Schemas.
* Changes to area manager interface. Check documentations for all the changes.
* Adjustments to Pipeline interface. See PR https://github.com/sentinel-hub/eo-grow/pull/168 for how most pipelines need to be adjusted.
* Improved filtration via list of EOPatch names.
- (**code-breaking**) Added `ZipMapPipeline` which replaces `MappingPipeline`.
- (**code-breaking**) Added `SplitGridPipeline` which replaces `SwitchGridsPipeline`.
- (**code-breaking**) Adjusted resize parameters in `ImportTiffPipeline` according to changes in `SpatialResizeTask` in new `eo-learn` version.
- Fixed issue with label encoder in prediction pipeline. Contributed by @ashishdhiman-tomtom
- Moved types to `eogrow.types` and deprecate `eogrow.utils.types`. Remove `Path` type alias.
- Added support for EOPatch names when using the `-t` flag.


## [Version 1.3.3] - 2022-17-11

- Added `ImportTiffPipeline` for importing a tiff file into EOPatches.
- `ExportMapsPipeline` now runs in parallel (single-machine only).
- Fixed issue where `ExportMapsPipeline` consumed increasing amounts of storage space.
- Area and eopatch managers for batch grids now warn the user if not linked correctly.
- Added `pyogrio` as a possible `geopandas` backend for IO (experimental).
- Add support for `geopandas` version 0.12.
- Improve types after `mypy` version 0.990.
- Removed `utils.enum` and old style of templating due to non-use.
- Other various improvements and clean-ups.


## [Version 1.3.2] - 2022-24-10

- Greatly improved `ExportMapsPipeline` and `IngestByocTilesPipeline`, which are now also able to export and ingest temporal BYOC collections
- Improved test suite for exporting maps and ingesting BYOC collections
- Fixed code according to newly exposed `eolearn.core` types
- Fixed broken github links in documentation
- Improvements to CI, added pre-commit hooks to the repository


## [Version 1.3.1] - 2022-31-08

- BYOC ingestion pipeline is better at handling CRS objects
- Becaue `pydantic` now type-checks default factories two custom factories `list_factory` and `dict_factory` have been added, because using just `list` currently clashes with fields of kind `List[int]`.


## [Version 1.3.0] - 2022-30-08

- Added `IngestByocTiles` pipeline, which creates or updates a BYOC collection from maps exported via `ExportMapsPipeline`.
- Greatly improved `DataCollection` parser, which can now parse `DataCollectionSchema` objects instead of just names.
- Added tests for validator utility functions.
- New general validators `ensure_defined_together` and `ensure_exactly_one_defined` for verifying optional parameters.
- Documentation of `Schema` objects is now much more verbose.
- `ExportMapsPipeline` now saves maps into subfolders (per UTM zone).
- Fixed issue where `ExportMapPipeline` ignored `dtype` and `nodata` when merging.
- Improved handling of `aws_profile` parameter in storage managers.
- `RasterizePipeline` now has an additional `raster_shape` parameter.


## [Version 1.2.0] - 2022-27-07

- Fixed a bug in `BatchToEOPatchPipeline` where temporal dimension of some imported features could be reversed. Memory-optimization functionalities have been reverted.
- Improved the way `filesystem` object is passed to EOTasks in EOWorkflows. These changes are a consequence of changes in `eo-learn==1.2.0`.
- Added support for `aws_acl` parameter into `Storage` schema.
- Download pipelines now support an optional `size` parameter.
- Official support for Python `3.10`.
- Large changes in testing utilities. Statistics produced by `ContentTester` have been changed and are now more descriptive.
- Improvements in code-style checkers and CI.


## [Version 1.1.1] - 2022-14-06

- Support session sharing in download pipelines.
- Improved `BatchAreaManager` bounding boxes.
- Improve memory footprint of various pipelines.
- Disabled `skip_existing` and `eopatch_list` at validation time for pipelines that do not support filtration.
- Support for rasterization of temporal vector features from files.
- Docs are now built automatically and the type annotations are included in parameter descriptions, resulting in better readability.
- Many minor improvements and fixes in code, tests, and documentation.


## [Version 1.1.0] - 2022-03-05

- Large changes in config objects and schemas:
* replaced `Config` object with config utility functions `collect_configs_from_path`, `interpret_config_from_dict`, and `interpret_config_from_path`,
* pipeline and manager config objects are now `pydantic` schema classes, which are fully typed objects,
* removed `${env:variable}` from the config language.

- Changes in area managers:
* added `AreaManager.cache_grid` method,
* (**code-breaking**)improved functionalities of `BatchAreaManger`, instead of `tile_buffer` it now uses `tile_buffer_x` and `tile_buffer_y` config parameters,
* (**code-breaking**) improved `UtmZoneAreaManager`, replaced `patch_buffer` config parameter with `patch_buffer_x` and `patch_buffer_y` which now work with absolute instead of relative buffers ,
* implemented grid transformation methods for `UtmZoneAreaManager` and `BatchAreaManager`.

- Other core improvements:
* added `EOGrowObject.from_raw_config` and `EOGrowObject.from_path` methods,
* fixed an issue in `EOPatchManager`,
* improvements of pipeline logging, logging handlers, and filters.

- Pipeline improvements:
* Implemented `SwitchGridPipeline` for converting data between tiling grids.
* Large updates of `BatchDownloadPipeline` with restructured config schema and additional functionalities.
* `BatchToEOPatchPipeline` now works with `input_folder_key` and `output_folder_key` instead of `folder_key` and has an option not to delete input data. A few issues in the pipeline were fixed and unit tests were added.
* Minor improvements of config parameters in `MergeSamplesPipeline` and prediction pipelines.
* Implemented `DummyDataPipeline` for generating data for unit tests.
- New tasks:
* `SpatialJoinTask` and `SpatialSliceTask` for spatial operations on EOPatches,
* `DummyRasterFeatureTask` and `DummyTimestampFeatureTask` for creating EOPatches with dummy data.
- Updates in utilities:
* added utilities for spatial operations and grid transformations,
* implemented `eogrow.utils.fs.LocalFolder` abstraction,
* renamed `get_patches_without_all_features` into `get_patches_with_missing_features` from `eogrow.utils.filter` ,
* (**code-breaking**) updated `eogrow.utils.testing.run_and_test_pipeline` to work with a list of pipeline configs.
- Created the `eo-grow` package [documentation page](https://eo-grow.readthedocs.io/en/latest/).
- `eo-grow` is now a fully typed package. Added mypy and isort code checking to CI.
- Updated tutorial notebooks to work with the latest code.
- Many minor improvements and fixes in code, tests, and documentation.


## [Version 1.0.0] - 2022-02-10

First release of the `eo-grow` package.
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
# Include typehints in descriptions
autodoc_typehints = "description"

# Both the class and the __init__ methods docstring are concatenated and inserted.
# Both the class' and the __init__ method's docstring are concatenated and inserted.
autoclass_content = "both"

# Content is in the same order as in module
Expand Down
2 changes: 1 addition & 1 deletion eogrow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""The main module of the eo-grow package."""

__version__ = "1.5.1"
__version__ = "1.5.2"
37 changes: 7 additions & 30 deletions eogrow/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import click

from .core.config import collect_configs_from_path, interpret_config_from_dict
from .core.logging import CLUSTER_FILE_LOCATION_ON_HEAD
from .core.schemas import build_schema_template
from .pipelines.testing import TestPipeline
from .utils.general import jsonify
Expand Down Expand Up @@ -69,26 +70,6 @@ def run_pipeline(config_path: str, cli_variables: Tuple[str, ...], test_patches:
@click.option(
"--start", "start_cluster", is_flag=True, type=bool, help="Starts the cluster if it is not currently running."
)
@click.option(
"--stop",
"stop_cluster",
is_flag=True,
type=bool,
help=(
"Stops the cluster if after running the pipeline. In order for this to work got to AWS console "
"-> IAM -> Roles -> select ray-autoscaler-v1 role and attach IAMReadOnlyAccess policy."
),
)
@click.option(
"--screen",
"use_screen",
is_flag=True,
type=bool,
help=(
"Run the cluster in a detached mode using screen software. Use Ctrl+A+D to detach any time, "
"even when running a pipeline or a Jupyter notebook. Use Ctrl+D to terminate the remote screen."
),
)
@click.option(
"--tmux",
"use_tmux",
Expand All @@ -102,8 +83,6 @@ def run_pipeline_on_cluster(
config_path: str,
cluster_yaml: str,
start_cluster: bool,
stop_cluster: bool,
use_screen: bool,
use_tmux: bool,
cli_variables: Tuple[str, ...],
test_patches: Tuple[int, ...],
Expand All @@ -119,26 +98,24 @@ def run_pipeline_on_cluster(
if start_cluster:
start_cluster_if_needed(cluster_yaml)

if stop_cluster and (use_screen or use_tmux):
raise NotImplementedError("It is not clear how to combine stop flag with either screen or tmux flag")

raw_configs = [interpret_config_from_dict(config) for config in collect_configs_from_path(config_path)]
remote_path = generate_cluster_config_path(config_path)

with NamedTemporaryFile(mode="w", delete=True, suffix=".json") as local_path:
json.dump(raw_configs, local_path)
local_path.flush() # without this the sync can happen before the file content is written

subprocess.run(f"ray rsync_up {cluster_yaml} {local_path.name!r} {remote_path!r}", shell=True)
subprocess.run(f"ray rsync_up {cluster_yaml} {cluster_yaml!r} {CLUSTER_FILE_LOCATION_ON_HEAD!r}", shell=True)

cmd = (
f"eogrow {remote_path}"
+ "".join(f' -v "{cli_var_spec}"' for cli_var_spec in cli_variables) # noqa B028
+ "".join(f' -v "{cli_var_spec}"' for cli_var_spec in cli_variables) # B028
+ "".join(f" -t {patch_index}" for patch_index in test_patches)
+ ("; " if stop_cluster else "") # Otherwise, ray will incorrectly prepare a command for stopping a cluster
)
flag_info = [("stop", stop_cluster), ("screen", use_screen), ("tmux", use_tmux)]
exec_flags = " ".join(f"--{flag_name}" for flag_name, use_flag in flag_info if use_flag)
exec_flags = "--tmux" if use_tmux else ""

subprocess.run(f"ray exec {exec_flags} {cluster_yaml} {cmd!r}", shell=True) # noqa B028
subprocess.run(f"ray exec {exec_flags} {cluster_yaml} {cmd!r}", shell=True) # B028


@click.command()
Expand Down
Loading

0 comments on commit b24404d

Please sign in to comment.