From 78314e3f1323e1e1c334ee39bb98d4f98f8e7d7f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 2 Oct 2024 15:19:53 -0400 Subject: [PATCH 01/40] search s3 bucket&prefix for granules if none provided --- src/hyp3_srg/time_series.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index 6914574..7be459e 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -44,6 +44,24 @@ def get_s3_args(uri: str, dest_dir: Optional[Path] = None) -> None: return bucket, key, out_path +def get_granules_from_s3(bucket: str, prefix: str = '') -> list[str]: + """Retrieve granule (zip files) uris from the given s3 bucket and prefix. + + Args: + bucket: the s3 bucket + prefix: the path after the bucket and before the file + + Returns: + uris: a list of uris to the zip files + """ + + bucket = bucket.replace('s3:', '').replace('/', '') + res = S3.list_objects(Bucket=bucket, Prefix=prefix) + keys = [item['Key'] for item in res['Contents'] if item['Key'].endswith('.zip')] + uris = ['/'.join(['s3://' + bucket, key]) for key in keys] + return uris + + def download_from_s3(uri: str, dest_dir: Optional[Path] = None) -> None: """Download a file from an S3 bucket @@ -316,6 +334,14 @@ def time_series( if not sbas_dir.exists(): mkdir(sbas_dir) + bucket_is_for_upload = True + + if granules == []: + if bucket is None: + raise ValueError('Either a list of granules or a s3 bucket must be provided, but got neither.') + granules = get_granules_from_s3(bucket, bucket_prefix) + bucket_is_for_upload = False + granule_names = load_products(granules) dem_path = dem.download_dem_for_srg(bounds, work_dir) @@ -325,7 +351,7 @@ def time_series( create_time_series(work_dir=sbas_dir) zip_path = package_time_series(granule_names, bounds, work_dir) - if bucket: + if bucket_is_for_upload and bucket: upload_file_to_s3(zip_path, bucket, bucket_prefix) print(f'Finished time-series processing for {", ".join(granule_names)}!') @@ -345,7 +371,7 @@ def main(): ) parser.add_argument('--bucket', help='AWS S3 bucket HyP3 for upload the final product(s)') parser.add_argument('--bucket-prefix', default='', help='Add a bucket prefix to product(s)') - parser.add_argument('granules', type=str.split, nargs='+', help='GSLC granules.') + parser.add_argument('granules', type=str.split, nargs='*', default='', help='GSLC granules.') args = parser.parse_args() args.granules = [item for sublist in args.granules for item in sublist] time_series(**args.__dict__) From 981d7a947c62d766c0566d467686657d3fa421e8 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 2 Oct 2024 15:21:59 -0400 Subject: [PATCH 02/40] updated changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fe4e88..7f5f8f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.8.1] + +### Changed +* `time_series` will now search the s3 bucket (`--bucket` and `--bucket-prefix`) for granules if no granules were explicitly provided + ## [0.8.0] ### Added From 795b71ce4532448b0916e47f0d97c69123a6cf6d Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 2 Oct 2024 15:54:16 -0400 Subject: [PATCH 03/40] filter to only s1[ab]_iw_raw when searching s3 --- src/hyp3_srg/time_series.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index 7be459e..319a4c8 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -4,6 +4,7 @@ import argparse import logging +import re import shutil from os import mkdir from pathlib import Path @@ -54,10 +55,13 @@ def get_granules_from_s3(bucket: str, prefix: str = '') -> list[str]: Returns: uris: a list of uris to the zip files """ - bucket = bucket.replace('s3:', '').replace('/', '') res = S3.list_objects(Bucket=bucket, Prefix=prefix) - keys = [item['Key'] for item in res['Contents'] if item['Key'].endswith('.zip')] + + def is_valid_key(key): + return (key.endswith('.zip') or key.endswith('.geo')) and re.search('S1[AB]_IW_RAW', key.split('/')[-1]) + + keys = [item['Key'] for item in res['Contents'] if is_valid_key(item['Key'])] uris = ['/'.join(['s3://' + bucket, key]) for key in keys] return uris @@ -317,6 +321,8 @@ def time_series( bounds: list[float], bucket: str = None, bucket_prefix: str = '', + gslc_bucket: str = None, + gslc_bucket_prefix: str = '', work_dir: Optional[Path] = None, ) -> None: """Create and package a time series stack from a set of Sentinel-1 GSLCs. @@ -326,6 +332,8 @@ def time_series( bounds: bounding box that was used to generate the GSLCs bucket: AWS S3 bucket for uploading the final product(s) bucket_prefix: Add a bucket prefix to the product(s) + gslc_bucket: AWS S3 bucket containing GSLCs for time-series processing + gslc_bucket_prefix: Path to GSLCs within gslc_bucket. work_dir: Working directory for processing """ if work_dir is None: @@ -334,13 +342,10 @@ def time_series( if not sbas_dir.exists(): mkdir(sbas_dir) - bucket_is_for_upload = True - if granules == []: - if bucket is None: + if gslc_bucket is None: raise ValueError('Either a list of granules or a s3 bucket must be provided, but got neither.') - granules = get_granules_from_s3(bucket, bucket_prefix) - bucket_is_for_upload = False + granules = get_granules_from_s3(gslc_bucket, gslc_bucket_prefix) granule_names = load_products(granules) dem_path = dem.download_dem_for_srg(bounds, work_dir) @@ -351,7 +356,7 @@ def time_series( create_time_series(work_dir=sbas_dir) zip_path = package_time_series(granule_names, bounds, work_dir) - if bucket_is_for_upload and bucket: + if bucket: upload_file_to_s3(zip_path, bucket, bucket_prefix) print(f'Finished time-series processing for {", ".join(granule_names)}!') @@ -371,6 +376,8 @@ def main(): ) parser.add_argument('--bucket', help='AWS S3 bucket HyP3 for upload the final product(s)') parser.add_argument('--bucket-prefix', default='', help='Add a bucket prefix to product(s)') + parser.add_argument('--gslc-bucket', help='AWS S3 bucket containing GSLCs to process') + parser.add_argument('--gslc-bucket-prefix', default='', help='Path to GSLCs within gslc-bucket.') parser.add_argument('granules', type=str.split, nargs='*', default='', help='GSLC granules.') args = parser.parse_args() args.granules = [item for sublist in args.granules for item in sublist] From ef0d7d19eaf7da28eda04ea32634ad3b94a275ac Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 2 Oct 2024 15:57:20 -0400 Subject: [PATCH 04/40] raise error if both gslc-bucket and granules are provided --- src/hyp3_srg/time_series.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index 319a4c8..2a83fb0 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -342,6 +342,9 @@ def time_series( if not sbas_dir.exists(): mkdir(sbas_dir) + if granules and gslc_bucket: + raise ValueError('One of a list of granules or a s3 bucket must be provided, but got both.') + if granules == []: if gslc_bucket is None: raise ValueError('Either a list of granules or a s3 bucket must be provided, but got neither.') From f17be666675ec36e40dcacf6ef97a8263fec8419 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 2 Oct 2024 16:43:54 -0400 Subject: [PATCH 05/40] moved s3 functions to utils --- src/hyp3_srg/time_series.py | 46 ++++---------------------------- src/hyp3_srg/utils.py | 52 +++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 41 deletions(-) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index 2a83fb0..daa8006 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -12,51 +12,27 @@ from shutil import copyfile from typing import Iterable, Optional -from boto3 import client from hyp3lib.aws import upload_file_to_s3 from hyp3lib.fetch import download_file as download_from_http from hyp3_srg import dem, utils -S3 = client('s3') log = logging.getLogger(__name__) -def get_s3_args(uri: str, dest_dir: Optional[Path] = None) -> None: - """Retrieve the arguments for downloading from an S3 bucket - - Args: - uri: URI of the file to download - dest_dir: the directory to place the downloaded file in - - Returns: - bucket: the s3 bucket to download from - key: the path to the file following the s3 bucket - out_path: the destination path of the file to download - """ - if dest_dir is None: - dest_dir = Path.cwd() - - simple_s3_uri = Path(uri.replace('s3://', '')) - bucket = simple_s3_uri.parts[0] - key = '/'.join(simple_s3_uri.parts[1:]) - out_path = dest_dir / simple_s3_uri.parts[-1] - return bucket, key, out_path - - -def get_granules_from_s3(bucket: str, prefix: str = '') -> list[str]: +def get_gslc_uris_from_s3(bucket: str, prefix: str = '') -> list[str]: """Retrieve granule (zip files) uris from the given s3 bucket and prefix. Args: - bucket: the s3 bucket + bucket: the s3 bucket name prefix: the path after the bucket and before the file Returns: uris: a list of uris to the zip files """ bucket = bucket.replace('s3:', '').replace('/', '') - res = S3.list_objects(Bucket=bucket, Prefix=prefix) + res = utils.s3_list_objects(bucket, prefix) def is_valid_key(key): return (key.endswith('.zip') or key.endswith('.geo')) and re.search('S1[AB]_IW_RAW', key.split('/')[-1]) @@ -66,18 +42,6 @@ def is_valid_key(key): return uris -def download_from_s3(uri: str, dest_dir: Optional[Path] = None) -> None: - """Download a file from an S3 bucket - - Args: - uri: URI of the file to download - dest_dir: the directory to place the downloaded file in - """ - bucket, key, out_path = get_s3_args(uri, dest_dir) - S3.download_file(bucket, key, out_path) - return out_path - - def load_products(uris: Iterable[str], overwrite: bool = False): """Load the products from the provided URIs @@ -96,7 +60,7 @@ def load_products(uris: Iterable[str], overwrite: bool = False): if product_exists and not overwrite: pass elif uri.startswith('s3'): - download_from_s3(uri, dest_dir=work_dir) + utils.download_from_s3(uri, dest_dir=work_dir) elif uri.startswith('http'): download_from_http(uri, directory=work_dir) elif len(Path(uri).parts) > 1: @@ -348,7 +312,7 @@ def time_series( if granules == []: if gslc_bucket is None: raise ValueError('Either a list of granules or a s3 bucket must be provided, but got neither.') - granules = get_granules_from_s3(gslc_bucket, gslc_bucket_prefix) + granules = get_gslc_uris_from_s3(gslc_bucket, gslc_bucket_prefix) granule_names = load_products(granules) dem_path = dem.download_dem_for_srg(bounds, work_dir) diff --git a/src/hyp3_srg/utils.py b/src/hyp3_srg/utils.py index f708c26..5f3ebcc 100644 --- a/src/hyp3_srg/utils.py +++ b/src/hyp3_srg/utils.py @@ -8,6 +8,7 @@ from zipfile import ZipFile import asf_search +from boto3 import client from s1_orbits import fetch_for_scene from shapely.geometry import Polygon, shape @@ -227,3 +228,54 @@ def how_many_gpus(): (param, err) = proc.communicate() ngpus = int(str(param, 'UTF-8').split()[0]) return ngpus + + +def get_s3_args(uri: str, dest_dir: Optional[Path] = None) -> None: + """Retrieve the arguments for downloading from an S3 bucket + + Args: + uri: URI of the file to download + dest_dir: the directory to place the downloaded file in + + Returns: + bucket: the s3 bucket to download from + key: the path to the file following the s3 bucket + out_path: the destination path of the file to download + """ + if dest_dir is None: + dest_dir = Path.cwd() + + simple_s3_uri = Path(uri.replace('s3://', '')) + bucket = simple_s3_uri.parts[0] + key = '/'.join(simple_s3_uri.parts[1:]) + out_path = dest_dir / simple_s3_uri.parts[-1] + return bucket, key, out_path + + +def s3_list_objects(bucket: str, prefix: str = '') -> dict: + """List objects in bucket at prefix + + Args: + bucket: the simple s3 bucket name + prefix: the path within the bucket to search + + Returns: + res: dictionary containing the response + """ + S3 = client('s3') + bucket = bucket.replace('s3:', '').replace('/', '') + res = S3.list_objects(Bucket=bucket, Prefix=prefix) + return res + + +def download_from_s3(uri: str, dest_dir: Optional[Path] = None) -> None: + """Download a file from an S3 bucket + + Args: + uri: URI of the file to download + dest_dir: the directory to place the downloaded file in + """ + S3 = client('s3') + bucket, key, out_path = get_s3_args(uri, dest_dir) + S3.download_file(bucket, key, out_path) + return out_path From 1ef9f090af1277929a24cc3a7e9852dc29d9b62b Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 2 Oct 2024 16:44:38 -0400 Subject: [PATCH 06/40] added test for getting gslc uris from s3 --- tests/test_time_series.py | 43 ++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/tests/test_time_series.py b/tests/test_time_series.py index 49392b5..67153d2 100644 --- a/tests/test_time_series.py +++ b/tests/test_time_series.py @@ -1,6 +1,7 @@ from pathlib import Path +from unittest import mock -from hyp3_srg import time_series +from hyp3_srg import time_series, utils def test_create_time_series_product_name(): @@ -43,9 +44,37 @@ def test_get_size_from_dem(tmp_path): assert dem_width, dem_height == (1235, 873) -def test_get_s3_args(): - s3_uri_1 = 's3://foo/bar.zip' - s3_uri_2 = 's3://foo/bing/bong/bar.zip' - dest_dir = Path('output') - assert time_series.get_s3_args(s3_uri_1) == ('foo', 'bar.zip', Path.cwd() / "bar.zip") - assert time_series.get_s3_args(s3_uri_2, dest_dir) == ('foo', 'bing/bong/bar.zip', dest_dir / 'bar.zip') +def test_get_gslc_uris_from_s3(monkeypatch): + bucket = 'bucket' + prefix = 'prefix' + + mock_response = { + 'Contents': [ + { + 'Key': f'{prefix}/S1A_IW_RAW_foo.zip' + }, + { + 'Key': f'{prefix}/prefibad_key.zip' + }, + { + 'Key': f'{prefix}/S1A_IW_RAW_foo.bad_extension' + }, + { + 'Key': f'{prefix}/S1B_IW_RAW_bar.geo' + } + ] + } + + correct_uris = [ + f's3://{bucket}/{prefix}/S1A_IW_RAW_foo.zip', + f's3://{bucket}/{prefix}/S1B_IW_RAW_bar.geo' + ] + + with monkeypatch.context() as m: + mock_s3_list_objects = mock.Mock(return_value=mock_response) + m.setattr(utils, 's3_list_objects', mock_s3_list_objects) + + uris = time_series.get_gslc_uris_from_s3(bucket, prefix) + assert uris == correct_uris + uris = time_series.get_gslc_uris_from_s3(f's3://{bucket}/', prefix) + assert uris == correct_uris From 0905b010fdd1801d63ba57e3facdc80c03c9aa9c Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 2 Oct 2024 16:44:49 -0400 Subject: [PATCH 07/40] moved tests for s3 functions --- tests/test_utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index fad3bad..1f51aed 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -85,3 +85,11 @@ def test_call_stanford_module(monkeypatch): m.setenv('PROC_HOME', '.') utils.call_stanford_module('foo/bar.py', ['arg1', 'arg2']) mock_run.assert_called_once_with([Path('foo/bar.py'), 'arg1', 'arg2'], cwd=Path.cwd(), check=True) + + +def test_get_s3_args(): + s3_uri_1 = 's3://foo/bar.zip' + s3_uri_2 = 's3://foo/bing/bong/bar.zip' + dest_dir = Path('output') + assert utils.get_s3_args(s3_uri_1) == ('foo', 'bar.zip', Path.cwd() / "bar.zip") + assert utils.get_s3_args(s3_uri_2, dest_dir) == ('foo', 'bing/bong/bar.zip', dest_dir / 'bar.zip') From 00a030329024cc7a8262809453448cc1ab18d1c8 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 2 Oct 2024 16:45:40 -0400 Subject: [PATCH 08/40] removed unused import --- tests/test_time_series.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_time_series.py b/tests/test_time_series.py index 67153d2..0865da2 100644 --- a/tests/test_time_series.py +++ b/tests/test_time_series.py @@ -1,4 +1,3 @@ -from pathlib import Path from unittest import mock from hyp3_srg import time_series, utils From 723873aa7515115694b2dd653713eac6b1d0d12a Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 2 Oct 2024 16:47:50 -0400 Subject: [PATCH 09/40] corrected changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f5f8f7..ab6084d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [0.8.1] ### Changed -* `time_series` will now search the s3 bucket (`--bucket` and `--bucket-prefix`) for granules if no granules were explicitly provided +* `time_series` can now search an s3 bucket (`--gslc-bucket` and `--gslc-bucket-prefix`) for granules if no granules are explicitly provided ## [0.8.0] From 762f2408730337de66276c6ad505e6e2bd7c603f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 2 Oct 2024 16:49:20 -0400 Subject: [PATCH 10/40] better changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab6084d..51c47cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [0.8.1] ### Changed -* `time_series` can now search an s3 bucket (`--gslc-bucket` and `--gslc-bucket-prefix`) for granules if no granules are explicitly provided +* `time_series` can now search an s3 bucket for GSLCs (using `--gslc-bucket` and `--gslc-bucket-prefix`) if no GSLC granules are explicitly provided ## [0.8.0] From d880b4768c4c4a85da4de712f4816cf24486cd8e Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 14:43:21 -0800 Subject: [PATCH 11/40] simplify gslc s3 search interface, fix pycharm warnings --- src/hyp3_srg/time_series.py | 52 ++++++++++++++++++------------------- tests/test_time_series.py | 2 +- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index daa8006..ceb8db5 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -74,26 +74,26 @@ def load_products(uris: Iterable[str], overwrite: bool = False): return granule_names -def get_size_from_dem(dem_file: str) -> tuple[int]: +def get_size_from_dem(dem_path: str) -> tuple[int, int]: """Get the length and width from a .rsc DEM file Args: - dem_file: path to the .rsc dem file. + dem_path: path to the .rsc dem file. Returns: dem_width, dem_length: tuple containing the dem width and dem length """ - with open(dem_file) as dem: - width_line = dem.readline() + with open(dem_path) as dem_file: + width_line = dem_file.readline() dem_width = width_line.split()[1] - length_line = dem.readline() + length_line = dem_file.readline() dem_length = length_line.split()[1] return int(dem_width), int(dem_length) def generate_wrapped_interferograms( - looks: tuple[int], baselines: tuple[int], dem_shape: tuple[int], work_dir: Path + looks: tuple[int, int], baselines: tuple[int, int], dem_shape: tuple[int, int], work_dir: Path ) -> None: """Generates wrapped interferograms from GSLCs @@ -113,7 +113,7 @@ def generate_wrapped_interferograms( utils.call_stanford_module('sentinel/ps_sbas_igrams.py', args=sbas_args, work_dir=work_dir) -def unwrap_interferograms(dem_shape: tuple[int], unw_shape: tuple[int], work_dir: Path) -> None: +def unwrap_interferograms(dem_shape: tuple[int, int], unw_shape: tuple[int, int], work_dir: Path) -> None: """Unwraps wrapped interferograms in parallel Args: @@ -130,7 +130,7 @@ def unwrap_interferograms(dem_shape: tuple[int], unw_shape: tuple[int], work_dir def compute_sbas_velocity_solution( - threshold: float, do_tropo_correction: bool, unw_shape: tuple[int], work_dir: Path + threshold: float, do_tropo_correction: bool, unw_shape: tuple[int, int], work_dir: Path ) -> None: """Computes the sbas velocity solution from the unwrapped interferograms @@ -153,11 +153,9 @@ def compute_sbas_velocity_solution( tropo_correct_args = ['unwlist', unw_width, unw_length] utils.call_stanford_module('int/tropocorrect.py', args=tropo_correct_args, work_dir=work_dir) - num_unw_files = 0 with open(work_dir / 'unwlist', 'r') as unw_list: num_unw_files = len(unw_list.readlines()) - num_slcs = 0 with open(work_dir / 'geolist', 'r') as slc_list: num_slcs = len(slc_list.readlines()) @@ -166,8 +164,8 @@ def compute_sbas_velocity_solution( def create_time_series( - looks: tuple[int] = (10, 10), - baselines: tuple[int] = (1000, 1000), + looks: tuple[int, int] = (10, 10), + baselines: tuple[int, int] = (1000, 1000), threshold: float = 0.5, do_tropo_correction: bool = True, work_dir: Path | None = None, @@ -184,7 +182,7 @@ def create_time_series( dem_shape = get_size_from_dem('elevation.dem.rsc') generate_wrapped_interferograms(looks=looks, baselines=baselines, dem_shape=dem_shape, work_dir=work_dir) - unw_shape = get_size_from_dem(work_dir / 'dem.rsc') + unw_shape = get_size_from_dem(str(work_dir / 'dem.rsc')) unwrap_interferograms(dem_shape=dem_shape, unw_shape=unw_shape, work_dir=work_dir) compute_sbas_velocity_solution( @@ -199,7 +197,7 @@ def create_time_series_product_name( """Create a product name for the given granules. Args: - granules: list of the granule names + granule_names: list of the granule names bounds: bounding box that was used to generate the GSLCs Returns: @@ -276,7 +274,7 @@ def package_time_series( 'velocity', ] [shutil.copy(sbas_dir / f, product_path / f) for f in to_keep] - shutil.make_archive(product_path, 'zip', product_path) + shutil.make_archive(str(product_path), 'zip', product_path) return zip_path @@ -285,7 +283,6 @@ def time_series( bounds: list[float], bucket: str = None, bucket_prefix: str = '', - gslc_bucket: str = None, gslc_bucket_prefix: str = '', work_dir: Optional[Path] = None, ) -> None: @@ -296,8 +293,7 @@ def time_series( bounds: bounding box that was used to generate the GSLCs bucket: AWS S3 bucket for uploading the final product(s) bucket_prefix: Add a bucket prefix to the product(s) - gslc_bucket: AWS S3 bucket containing GSLCs for time-series processing - gslc_bucket_prefix: Path to GSLCs within gslc_bucket. + gslc_bucket_prefix: GSLCs are found at bucket_prefix/gslc_bucket_prefix within bucket work_dir: Working directory for processing """ if work_dir is None: @@ -306,13 +302,14 @@ def time_series( if not sbas_dir.exists(): mkdir(sbas_dir) - if granules and gslc_bucket: - raise ValueError('One of a list of granules or a s3 bucket must be provided, but got both.') + if granules and gslc_bucket_prefix: + raise ValueError('One of a list of granules or a GSLC S3 bucket prefix must be provided, but got both.') - if granules == []: - if gslc_bucket is None: - raise ValueError('Either a list of granules or a s3 bucket must be provided, but got neither.') - granules = get_gslc_uris_from_s3(gslc_bucket, gslc_bucket_prefix) + if not granules: + if gslc_bucket_prefix is None: + raise ValueError('Either a list of granules or a GSLC S3 bucket prefix must be provided, but got neither.') + # TODO: check that bucket and bucket_prefix were passed + granules = get_gslc_uris_from_s3(bucket, f'{bucket_prefix}/{gslc_bucket_prefix}') granule_names = load_products(granules) dem_path = dem.download_dem_for_srg(bounds, work_dir) @@ -343,8 +340,11 @@ def main(): ) parser.add_argument('--bucket', help='AWS S3 bucket HyP3 for upload the final product(s)') parser.add_argument('--bucket-prefix', default='', help='Add a bucket prefix to product(s)') - parser.add_argument('--gslc-bucket', help='AWS S3 bucket containing GSLCs to process') - parser.add_argument('--gslc-bucket-prefix', default='', help='Path to GSLCs within gslc-bucket.') + parser.add_argument( + '--gslc-bucket-prefix', + default='', + help='GSLCs are found at bucket-prefix/gslc-bucket_prefix within bucket' + ) parser.add_argument('granules', type=str.split, nargs='*', default='', help='GSLC granules.') args = parser.parse_args() args.granules = [item for sublist in args.granules for item in sublist] diff --git a/tests/test_time_series.py b/tests/test_time_series.py index 0865da2..75302b0 100644 --- a/tests/test_time_series.py +++ b/tests/test_time_series.py @@ -39,7 +39,7 @@ def test_get_size_from_dem(tmp_path): rsc_path = tmp_path / 'elevation.dem.rsc' with open(rsc_path, 'w') as rsc_file: rsc_file.write(rsc_content.strip()) - dem_width, dem_height = time_series.get_size_from_dem(dem_file=rsc_path) + dem_width, dem_height = time_series.get_size_from_dem(dem_path=rsc_path) assert dem_width, dem_height == (1235, 873) From 5c04c22dcdaa577b6db2d0a5c9c22a4bfef4b73c Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 15:45:26 -0800 Subject: [PATCH 12/40] temp hard-code granules sub-prefix, fix a typo --- src/hyp3_srg/back_projection.py | 4 ++++ src/hyp3_srg/time_series.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hyp3_srg/back_projection.py b/src/hyp3_srg/back_projection.py index 3b6d9a0..6afe7b3 100644 --- a/src/hyp3_srg/back_projection.py +++ b/src/hyp3_srg/back_projection.py @@ -164,6 +164,10 @@ def main(): args.bounds = [float(item) for sublist in args.bounds for item in sublist] if len(args.bounds) != 4: parser.error('Bounds must have exactly 4 values: [min lon, min lat, max lon, max lat] in EPSG:4326.') + + # TODO: don't hard-code this + args.bucket_prefix += '/granules' + back_project(**args.__dict__) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index ceb8db5..e09e7d7 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -343,7 +343,7 @@ def main(): parser.add_argument( '--gslc-bucket-prefix', default='', - help='GSLCs are found at bucket-prefix/gslc-bucket_prefix within bucket' + help='GSLCs are found at bucket-prefix/gslc-bucket-prefix within bucket' ) parser.add_argument('granules', type=str.split, nargs='*', default='', help='GSLC granules.') args = parser.parse_args() From c7464cc0a51d19313af24302cc2635cd3b3b30e6 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 16:02:18 -0800 Subject: [PATCH 13/40] add --use-granules-from-s3 option --- src/hyp3_srg/back_projection.py | 2 +- src/hyp3_srg/time_series.py | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/hyp3_srg/back_projection.py b/src/hyp3_srg/back_projection.py index 6afe7b3..e29a21c 100644 --- a/src/hyp3_srg/back_projection.py +++ b/src/hyp3_srg/back_projection.py @@ -165,7 +165,7 @@ def main(): if len(args.bounds) != 4: parser.error('Bounds must have exactly 4 values: [min lon, min lat, max lon, max lat] in EPSG:4326.') - # TODO: don't hard-code this + # TODO: add a cli option to write granules to this sub-prefix args.bucket_prefix += '/granules' back_project(**args.__dict__) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index e09e7d7..a92ba42 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -281,9 +281,9 @@ def package_time_series( def time_series( granules: Iterable[str], bounds: list[float], + use_granules_from_s3: bool, bucket: str = None, bucket_prefix: str = '', - gslc_bucket_prefix: str = '', work_dir: Optional[Path] = None, ) -> None: """Create and package a time series stack from a set of Sentinel-1 GSLCs. @@ -291,9 +291,9 @@ def time_series( Args: granules: List of Sentinel-1 GSLCs bounds: bounding box that was used to generate the GSLCs + use_granules_from_s3: Whether to download input granules from S3 bucket: AWS S3 bucket for uploading the final product(s) bucket_prefix: Add a bucket prefix to the product(s) - gslc_bucket_prefix: GSLCs are found at bucket_prefix/gslc_bucket_prefix within bucket work_dir: Working directory for processing """ if work_dir is None: @@ -302,14 +302,16 @@ def time_series( if not sbas_dir.exists(): mkdir(sbas_dir) - if granules and gslc_bucket_prefix: - raise ValueError('One of a list of granules or a GSLC S3 bucket prefix must be provided, but got both.') + # TODO: check this at cli parsing + if granules and use_granules_from_s3: + raise ValueError('granules must not be provided with --use-granules-from-s3') + # TODO: check this at cli parsing if not granules: - if gslc_bucket_prefix is None: - raise ValueError('Either a list of granules or a GSLC S3 bucket prefix must be provided, but got neither.') + if not use_granules_from_s3: + raise ValueError('--use-granules-from-s3 must be used if granules not provided') # TODO: check that bucket and bucket_prefix were passed - granules = get_gslc_uris_from_s3(bucket, f'{bucket_prefix}/{gslc_bucket_prefix}') + granules = get_gslc_uris_from_s3(bucket, f'{bucket_prefix}/granules') granule_names = load_products(granules) dem_path = dem.download_dem_for_srg(bounds, work_dir) @@ -340,11 +342,7 @@ def main(): ) parser.add_argument('--bucket', help='AWS S3 bucket HyP3 for upload the final product(s)') parser.add_argument('--bucket-prefix', default='', help='Add a bucket prefix to product(s)') - parser.add_argument( - '--gslc-bucket-prefix', - default='', - help='GSLCs are found at bucket-prefix/gslc-bucket-prefix within bucket' - ) + parser.add_argument('--use-granules-from-s3', type=bool, action='store_true') parser.add_argument('granules', type=str.split, nargs='*', default='', help='GSLC granules.') args = parser.parse_args() args.granules = [item for sublist in args.granules for item in sublist] From 8c84b234fe959b3751c60a5837309bf0ef57cb25 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 4 Oct 2024 17:08:38 -0800 Subject: [PATCH 14/40] remove unexpected kwarg --- src/hyp3_srg/time_series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index a92ba42..e43fcb2 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -342,7 +342,7 @@ def main(): ) parser.add_argument('--bucket', help='AWS S3 bucket HyP3 for upload the final product(s)') parser.add_argument('--bucket-prefix', default='', help='Add a bucket prefix to product(s)') - parser.add_argument('--use-granules-from-s3', type=bool, action='store_true') + parser.add_argument('--use-granules-from-s3', action='store_true') parser.add_argument('granules', type=str.split, nargs='*', default='', help='GSLC granules.') args = parser.parse_args() args.granules = [item for sublist in args.granules for item in sublist] From 09e153135a1d4d50d18fdf0495bf902bc08e5e3a Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 7 Oct 2024 09:47:21 -0800 Subject: [PATCH 15/40] update time_series --bounds interface to match back_projection --- src/hyp3_srg/time_series.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index e43fcb2..f8badb4 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -337,15 +337,23 @@ def main(): S1A_IW_RAW__0SDV_20231229T134404_20231229T134436_051870_064437_5F38.geo """ parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument( - '--bounds', default=None, type=float, nargs=4, help='Bounding box that was used to generate the GSLCs' - ) parser.add_argument('--bucket', help='AWS S3 bucket HyP3 for upload the final product(s)') parser.add_argument('--bucket-prefix', default='', help='Add a bucket prefix to product(s)') + parser.add_argument( + '--bounds', + default=None, + type=str.split, + nargs='+', + help='DEM extent bbox in EPSG:4326: [min_lon, min_lat, max_lon, max_lat].' + ) parser.add_argument('--use-granules-from-s3', action='store_true') parser.add_argument('granules', type=str.split, nargs='*', default='', help='GSLC granules.') args = parser.parse_args() args.granules = [item for sublist in args.granules for item in sublist] + if args.bounds is not None: + args.bounds = [float(item) for sublist in args.bounds for item in sublist] + if len(args.bounds) != 4: + parser.error('Bounds must have exactly 4 values: [min lon, min lat, max lon, max lat] in EPSG:4326.') time_series(**args.__dict__) From f06485cf63e09cb7982791bb5b343c40858e65c2 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 9 Oct 2024 15:44:09 -0800 Subject: [PATCH 16/40] finish implementing gslc prefix option --- src/hyp3_srg/back_projection.py | 14 ++++++++++++-- src/hyp3_srg/time_series.py | 30 ++++++++++++++++++------------ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/src/hyp3_srg/back_projection.py b/src/hyp3_srg/back_projection.py index e29a21c..a62336b 100644 --- a/src/hyp3_srg/back_projection.py +++ b/src/hyp3_srg/back_projection.py @@ -149,6 +149,14 @@ def main(): parser.add_argument('--earthdata-password', default=None, help="Password for NASA's EarthData") parser.add_argument('--bucket', help='AWS S3 bucket HyP3 for upload the final product(s)') parser.add_argument('--bucket-prefix', default='', help='Add a bucket prefix to product(s)') + parser.add_argument( + '--use-gslc-prefix', + action='store_true', + help=( + 'Upload GSLC granules to a subprefix located within the bucket and prefix given by the' + ' --bucket and --bucket-prefix options' + ) + ) parser.add_argument('--gpu', default=False, action='store_true', help='Use the GPU-based version of the workflow.') parser.add_argument( '--bounds', @@ -159,14 +167,16 @@ def main(): ) parser.add_argument('granules', type=str.split, nargs='+', help='Level-0 S1 granule(s) to back-project.') args = parser.parse_args() + args.granules = [item for sublist in args.granules for item in sublist] + if args.bounds is not None: args.bounds = [float(item) for sublist in args.bounds for item in sublist] if len(args.bounds) != 4: parser.error('Bounds must have exactly 4 values: [min lon, min lat, max lon, max lat] in EPSG:4326.') - # TODO: add a cli option to write granules to this sub-prefix - args.bucket_prefix += '/granules' + if args.use_gslc_prefix: + args.bucket_prefix += '/GSLC_granules' back_project(**args.__dict__) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index f8badb4..dcace59 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -281,7 +281,7 @@ def package_time_series( def time_series( granules: Iterable[str], bounds: list[float], - use_granules_from_s3: bool, + use_gslc_prefix: bool, bucket: str = None, bucket_prefix: str = '', work_dir: Optional[Path] = None, @@ -291,7 +291,7 @@ def time_series( Args: granules: List of Sentinel-1 GSLCs bounds: bounding box that was used to generate the GSLCs - use_granules_from_s3: Whether to download input granules from S3 + use_gslc_prefix: Whether to download input granules from S3 bucket: AWS S3 bucket for uploading the final product(s) bucket_prefix: Add a bucket prefix to the product(s) work_dir: Working directory for processing @@ -302,16 +302,15 @@ def time_series( if not sbas_dir.exists(): mkdir(sbas_dir) - # TODO: check this at cli parsing - if granules and use_granules_from_s3: - raise ValueError('granules must not be provided with --use-granules-from-s3') + if not (granules or use_gslc_prefix): + raise ValueError('use_gslc_prefix must be True if granules not provided') - # TODO: check this at cli parsing - if not granules: - if not use_granules_from_s3: - raise ValueError('--use-granules-from-s3 must be used if granules not provided') - # TODO: check that bucket and bucket_prefix were passed - granules = get_gslc_uris_from_s3(bucket, f'{bucket_prefix}/granules') + if use_gslc_prefix: + if granules: + raise ValueError('granules must not be provided if use_gslc_prefix is True') + if not (bucket and bucket_prefix): + raise ValueError('bucket and bucket_prefix must be given if use_gslc_prefix is True') + granules = get_gslc_uris_from_s3(bucket, f'{bucket_prefix}/GSLC_granules') granule_names = load_products(granules) dem_path = dem.download_dem_for_srg(bounds, work_dir) @@ -346,7 +345,14 @@ def main(): nargs='+', help='DEM extent bbox in EPSG:4326: [min_lon, min_lat, max_lon, max_lat].' ) - parser.add_argument('--use-granules-from-s3', action='store_true') + parser.add_argument( + '--use-gslc-prefix', + action='store_true', + help=( + 'Download GSLC input granules from a subprefix located within the bucket and prefix given by the' + ' --bucket and --bucket-prefix options' + ) + ) parser.add_argument('granules', type=str.split, nargs='*', default='', help='GSLC granules.') args = parser.parse_args() args.granules = [item for sublist in args.granules for item in sublist] From 0081c55bf7b126eeee1ec0bdacd2ea5f6253a8e2 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 9 Oct 2024 15:53:47 -0800 Subject: [PATCH 17/40] check if bucket and bucket_prefix given in back_projection workflow --- src/hyp3_srg/back_projection.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/hyp3_srg/back_projection.py b/src/hyp3_srg/back_projection.py index a62336b..d5226bc 100644 --- a/src/hyp3_srg/back_projection.py +++ b/src/hyp3_srg/back_projection.py @@ -92,6 +92,7 @@ def back_project( earthdata_password: str = None, bucket: str = None, bucket_prefix: str = '', + use_gslc_prefix: bool = False, work_dir: Optional[Path] = None, gpu: bool = False, ): @@ -104,9 +105,15 @@ def back_project( earthdata_password: Password for NASA's EarthData service bucket: AWS S3 bucket for uploading the final product(s) bucket_prefix: Add a bucket prefix to the product(s) + use_gslc_prefix: Upload GSLCs to a subprefix work_dir: Working directory for processing gpu: Use the GPU-based version of the workflow """ + if use_gslc_prefix: + if not (bucket and bucket_prefix): + raise ValueError('bucket and bucket_prefix must be given if use_gslc_prefix is True') + bucket_prefix += '/GSLC_granules' + utils.set_creds('EARTHDATA', earthdata_username, earthdata_password) if work_dir is None: work_dir = Path.cwd() @@ -167,17 +174,12 @@ def main(): ) parser.add_argument('granules', type=str.split, nargs='+', help='Level-0 S1 granule(s) to back-project.') args = parser.parse_args() - args.granules = [item for sublist in args.granules for item in sublist] - if args.bounds is not None: args.bounds = [float(item) for sublist in args.bounds for item in sublist] if len(args.bounds) != 4: parser.error('Bounds must have exactly 4 values: [min lon, min lat, max lon, max lat] in EPSG:4326.') - if args.use_gslc_prefix: - args.bucket_prefix += '/GSLC_granules' - back_project(**args.__dict__) From e50a7e22b3f846e1ff14a80db2b1e8d327769e5c Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 9 Oct 2024 15:55:04 -0800 Subject: [PATCH 18/40] newlines --- src/hyp3_srg/back_projection.py | 2 ++ src/hyp3_srg/time_series.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/src/hyp3_srg/back_projection.py b/src/hyp3_srg/back_projection.py index d5226bc..8fd5c70 100644 --- a/src/hyp3_srg/back_projection.py +++ b/src/hyp3_srg/back_projection.py @@ -174,7 +174,9 @@ def main(): ) parser.add_argument('granules', type=str.split, nargs='+', help='Level-0 S1 granule(s) to back-project.') args = parser.parse_args() + args.granules = [item for sublist in args.granules for item in sublist] + if args.bounds is not None: args.bounds = [float(item) for sublist in args.bounds for item in sublist] if len(args.bounds) != 4: diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index dcace59..27b0ba2 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -355,11 +355,14 @@ def main(): ) parser.add_argument('granules', type=str.split, nargs='*', default='', help='GSLC granules.') args = parser.parse_args() + args.granules = [item for sublist in args.granules for item in sublist] + if args.bounds is not None: args.bounds = [float(item) for sublist in args.bounds for item in sublist] if len(args.bounds) != 4: parser.error('Bounds must have exactly 4 values: [min lon, min lat, max lon, max lat] in EPSG:4326.') + time_series(**args.__dict__) From a781acf5d158f455ba9fd8735c172b0209ddddf1 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 11 Oct 2024 14:48:14 -0400 Subject: [PATCH 19/40] rename dockerfile for test purposes --- Dockerfile | 34 ++++++++++++++++++++++++++------ Dockerfile.gpu => Dockerfile.cpu | 34 ++++++-------------------------- 2 files changed, 34 insertions(+), 34 deletions(-) rename Dockerfile.gpu => Dockerfile.cpu (61%) diff --git a/Dockerfile b/Dockerfile index 08efa49..e4e579a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,12 @@ -FROM condaforge/mambaforge:latest as builder +FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 as builder -ENV USEGPU=false +# FIXME: should be able to find this dynamically +ARG GPU_ARCH=89 + +# GPU_ARCH and USEGPU environment variable used by build_proc.sh ENV FFTW_LIB=/usr/lib/x86_64-linux-gnu/libfftw3f.a +ENV GPU_ARCH=${GPU_ARCH} +ENV USEGPU=true ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl git build-essential gfortran libfftw3-dev && \ @@ -12,12 +17,12 @@ COPY . /hyp3-srg/ COPY ./scripts/build_proc.sh ./srg RUN cd /srg && ./build_proc.sh && cd / -FROM condaforge/mambaforge:latest as runner +FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 as runner # For opencontainers label definitions, see: # https://github.com/opencontainers/image-spec/blob/master/annotations.md LABEL org.opencontainers.image.title="HyP3 SRG" -LABEL org.opencontainers.image.description="HyP3 plugin for Stanford Radar Group SAR processing" +LABEL org.opencontainers.image.description="HyP3 plugin for Stanford Radar Group Processor SAR processing" LABEL org.opencontainers.image.vendor="Alaska Satellite Facility" LABEL org.opencontainers.image.authors="ASF Tools Team " LABEL org.opencontainers.image.licenses="BSD-3-Clause" @@ -27,13 +32,30 @@ LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" ARG CONDA_UID=1000 ARG CONDA_GID=1000 +ARG MINIFORGE_NAME=Miniforge3 +ARG MINIFORGE_VERSION=24.3.0-0 -ENV PROC_HOME=/srg +ENV CONDA_DIR=/opt/conda +ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 +ENV PATH=${CONDA_DIR}/bin:${PATH} ENV PYTHONDONTWRITEBYTECODE=true +ENV PROC_HOME=/srg ENV MYHOME=/home/conda ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl gfortran && \ +# Conda setup +RUN apt-get update && apt-get install --no-install-recommends --yes wget bzip2 ca-certificates git > /dev/null && \ + wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ + /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ + rm /tmp/miniforge.sh && \ + conda clean --tarballs --index-cache --packages --yes && \ + find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ + find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ + conda clean --force-pkgs-dirs --all --yes && \ + echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ + echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc + +RUN apt-get install -y --no-install-recommends unzip vim curl gfortran && \ apt-get clean && rm -rf /var/lib/apt/lists/* RUN groupadd -g "${CONDA_GID}" --system conda && \ diff --git a/Dockerfile.gpu b/Dockerfile.cpu similarity index 61% rename from Dockerfile.gpu rename to Dockerfile.cpu index e4e579a..08efa49 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.cpu @@ -1,12 +1,7 @@ -FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 as builder +FROM condaforge/mambaforge:latest as builder -# FIXME: should be able to find this dynamically -ARG GPU_ARCH=89 - -# GPU_ARCH and USEGPU environment variable used by build_proc.sh +ENV USEGPU=false ENV FFTW_LIB=/usr/lib/x86_64-linux-gnu/libfftw3f.a -ENV GPU_ARCH=${GPU_ARCH} -ENV USEGPU=true ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl git build-essential gfortran libfftw3-dev && \ @@ -17,12 +12,12 @@ COPY . /hyp3-srg/ COPY ./scripts/build_proc.sh ./srg RUN cd /srg && ./build_proc.sh && cd / -FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 as runner +FROM condaforge/mambaforge:latest as runner # For opencontainers label definitions, see: # https://github.com/opencontainers/image-spec/blob/master/annotations.md LABEL org.opencontainers.image.title="HyP3 SRG" -LABEL org.opencontainers.image.description="HyP3 plugin for Stanford Radar Group Processor SAR processing" +LABEL org.opencontainers.image.description="HyP3 plugin for Stanford Radar Group SAR processing" LABEL org.opencontainers.image.vendor="Alaska Satellite Facility" LABEL org.opencontainers.image.authors="ASF Tools Team " LABEL org.opencontainers.image.licenses="BSD-3-Clause" @@ -32,30 +27,13 @@ LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" ARG CONDA_UID=1000 ARG CONDA_GID=1000 -ARG MINIFORGE_NAME=Miniforge3 -ARG MINIFORGE_VERSION=24.3.0-0 -ENV CONDA_DIR=/opt/conda -ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 -ENV PATH=${CONDA_DIR}/bin:${PATH} -ENV PYTHONDONTWRITEBYTECODE=true ENV PROC_HOME=/srg +ENV PYTHONDONTWRITEBYTECODE=true ENV MYHOME=/home/conda ENV DEBIAN_FRONTEND=noninteractive -# Conda setup -RUN apt-get update && apt-get install --no-install-recommends --yes wget bzip2 ca-certificates git > /dev/null && \ - wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ - /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ - rm /tmp/miniforge.sh && \ - conda clean --tarballs --index-cache --packages --yes && \ - find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ - find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ - conda clean --force-pkgs-dirs --all --yes && \ - echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ - echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc - -RUN apt-get install -y --no-install-recommends unzip vim curl gfortran && \ +RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl gfortran && \ apt-get clean && rm -rf /var/lib/apt/lists/* RUN groupadd -g "${CONDA_GID}" --system conda && \ From b8dc431f4d2d9c14a5b8fb9457966aec0d119f11 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Fri, 11 Oct 2024 15:27:28 -0800 Subject: [PATCH 20/40] pin python to <3.13 due to hyp3lib bug --- environment.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 81ac848..febc7a4 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,9 @@ channels: - conda-forge - nodefaults dependencies: - - python>=3.10 + # TODO: hyp3lib fails to import `cgi` which has been removed in Python 3.13: https://docs.python.org/3/library/cgi.html + # we should decide how to resolve this + - python>=3.10,<3.13 - pip # For packaging, and testing - flake8 @@ -19,4 +21,4 @@ dependencies: - asf_search # For running - hyp3lib>=3,<4 - - s1_orbits \ No newline at end of file + - s1_orbits From 512de27d1693a54e79bc132319b099e0c02433b4 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 14 Oct 2024 13:43:34 -0400 Subject: [PATCH 21/40] testing multiple docker build --- .github/workflows/test-and-build.yml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-and-build.yml b/.github/workflows/test-and-build.yml index b5b3963..3700169 100644 --- a/.github/workflows/test-and-build.yml +++ b/.github/workflows/test-and-build.yml @@ -25,14 +25,28 @@ jobs: with: python_version: "3.10" - call-docker-ghcr-workflow: + call-docker-ghcr-workflow-cpu: needs: call-version-info-workflow # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@specify-docker-file with: - version_tag: ${{ needs.call-version-info-workflow.outputs.version_tag }} + version_tag: ${{ needs.call-version-info-workflow.outputs.version_tag }}.cpu release_branch: main develop_branch: develop user: tools-bot + file: Dockerfile.cpu + secrets: + USER_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + call-docker-ghcr-workflow-gpu: + needs: call-version-info-workflow + # Docs: https://github.com/ASFHyP3/actions + uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@specify-docker-file + with: + version_tag: ${{ needs.call-version-info-workflow.outputs.version_tag }}.gpu + release_branch: main + develop_branch: develop + user: tools-bot + file: Dockerfile secrets: USER_TOKEN: ${{ secrets.GITHUB_TOKEN }} From f3d9a3e06025f05ef6bc39f9a017fbc5a9f14347 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Mon, 14 Oct 2024 13:48:37 -0400 Subject: [PATCH 22/40] fixed grammar in comment --- src/hyp3_srg/dem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyp3_srg/dem.py b/src/hyp3_srg/dem.py index d092b19..2a7223b 100644 --- a/src/hyp3_srg/dem.py +++ b/src/hyp3_srg/dem.py @@ -32,7 +32,7 @@ def download_dem_for_srg(bounds: list[float], work_dir: Optional[Path]): Args: bounds: The bounds of the extent of the desired DEM - [min_lon, min_lat, max_lon, max_lat]. - work_dir: The directory to save create the DEM in + work_dir: The directory to save the DEM in Returns: The path to the downloaded DEM From 9cf7e11e36401b3cc7582d23491bd6d695fa645e Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 14 Oct 2024 11:15:58 -0800 Subject: [PATCH 23/40] fail on particular input granule --- src/hyp3_srg/back_projection.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hyp3_srg/back_projection.py b/src/hyp3_srg/back_projection.py index 8fd5c70..ccd187f 100644 --- a/src/hyp3_srg/back_projection.py +++ b/src/hyp3_srg/back_projection.py @@ -109,6 +109,8 @@ def back_project( work_dir: Working directory for processing gpu: Use the GPU-based version of the workflow """ + assert granules[0] != 'S1A_IW_RAW__0SDV_20240723T020812_20240723T020844_054882_06AF26_2CE5' + if use_gslc_prefix: if not (bucket and bucket_prefix): raise ValueError('bucket and bucket_prefix must be given if use_gslc_prefix is True') From 4a8d440a0c8cdb1506ccf39bae9c15c0b90acd22 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 14 Oct 2024 11:27:00 -0800 Subject: [PATCH 24/40] add todo --- src/hyp3_srg/back_projection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hyp3_srg/back_projection.py b/src/hyp3_srg/back_projection.py index ccd187f..2da1f9e 100644 --- a/src/hyp3_srg/back_projection.py +++ b/src/hyp3_srg/back_projection.py @@ -109,6 +109,7 @@ def back_project( work_dir: Working directory for processing gpu: Use the GPU-based version of the workflow """ + # TODO: remove this assert granules[0] != 'S1A_IW_RAW__0SDV_20240723T020812_20240723T020844_054882_06AF26_2CE5' if use_gslc_prefix: From 183bd4842037e5e78f2bf3dbfde63993db3149be Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 14 Oct 2024 11:46:57 -0800 Subject: [PATCH 25/40] pin to actions develop --- .github/workflows/test-and-build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-and-build.yml b/.github/workflows/test-and-build.yml index 3700169..fd23633 100644 --- a/.github/workflows/test-and-build.yml +++ b/.github/workflows/test-and-build.yml @@ -28,7 +28,7 @@ jobs: call-docker-ghcr-workflow-cpu: needs: call-version-info-workflow # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@specify-docker-file + uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@develop # TODO: pin to latest release with: version_tag: ${{ needs.call-version-info-workflow.outputs.version_tag }}.cpu release_branch: main @@ -41,7 +41,7 @@ jobs: call-docker-ghcr-workflow-gpu: needs: call-version-info-workflow # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@specify-docker-file + uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@develop # TODO: pin to latest release with: version_tag: ${{ needs.call-version-info-workflow.outputs.version_tag }}.gpu release_branch: main From 6f7866da8c7276ee0ca0e0f912452c2399c03116 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Mon, 14 Oct 2024 14:06:44 -0800 Subject: [PATCH 26/40] remove failure for particular granule --- src/hyp3_srg/back_projection.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/hyp3_srg/back_projection.py b/src/hyp3_srg/back_projection.py index 2da1f9e..8fd5c70 100644 --- a/src/hyp3_srg/back_projection.py +++ b/src/hyp3_srg/back_projection.py @@ -109,9 +109,6 @@ def back_project( work_dir: Working directory for processing gpu: Use the GPU-based version of the workflow """ - # TODO: remove this - assert granules[0] != 'S1A_IW_RAW__0SDV_20240723T020812_20240723T020844_054882_06AF26_2CE5' - if use_gslc_prefix: if not (bucket and bucket_prefix): raise ValueError('bucket and bucket_prefix must be given if use_gslc_prefix is True') From ad1adc30676de5535659dbef32471ab92812948f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 12:11:49 -0400 Subject: [PATCH 27/40] renamed docker files --- .github/workflows/test-and-build.yml | 4 ++-- Dockerfile | 34 +++++----------------------- Dockerfile.cpu => Dockerfile.gpu | 34 +++++++++++++++++++++++----- 3 files changed, 36 insertions(+), 36 deletions(-) rename Dockerfile.cpu => Dockerfile.gpu (61%) diff --git a/.github/workflows/test-and-build.yml b/.github/workflows/test-and-build.yml index 3700169..5bfc435 100644 --- a/.github/workflows/test-and-build.yml +++ b/.github/workflows/test-and-build.yml @@ -34,7 +34,7 @@ jobs: release_branch: main develop_branch: develop user: tools-bot - file: Dockerfile.cpu + file: Dockerfile secrets: USER_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -47,6 +47,6 @@ jobs: release_branch: main develop_branch: develop user: tools-bot - file: Dockerfile + file: Dockerfile.gpu secrets: USER_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/Dockerfile b/Dockerfile index e4e579a..08efa49 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,7 @@ -FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 as builder +FROM condaforge/mambaforge:latest as builder -# FIXME: should be able to find this dynamically -ARG GPU_ARCH=89 - -# GPU_ARCH and USEGPU environment variable used by build_proc.sh +ENV USEGPU=false ENV FFTW_LIB=/usr/lib/x86_64-linux-gnu/libfftw3f.a -ENV GPU_ARCH=${GPU_ARCH} -ENV USEGPU=true ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl git build-essential gfortran libfftw3-dev && \ @@ -17,12 +12,12 @@ COPY . /hyp3-srg/ COPY ./scripts/build_proc.sh ./srg RUN cd /srg && ./build_proc.sh && cd / -FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 as runner +FROM condaforge/mambaforge:latest as runner # For opencontainers label definitions, see: # https://github.com/opencontainers/image-spec/blob/master/annotations.md LABEL org.opencontainers.image.title="HyP3 SRG" -LABEL org.opencontainers.image.description="HyP3 plugin for Stanford Radar Group Processor SAR processing" +LABEL org.opencontainers.image.description="HyP3 plugin for Stanford Radar Group SAR processing" LABEL org.opencontainers.image.vendor="Alaska Satellite Facility" LABEL org.opencontainers.image.authors="ASF Tools Team " LABEL org.opencontainers.image.licenses="BSD-3-Clause" @@ -32,30 +27,13 @@ LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" ARG CONDA_UID=1000 ARG CONDA_GID=1000 -ARG MINIFORGE_NAME=Miniforge3 -ARG MINIFORGE_VERSION=24.3.0-0 -ENV CONDA_DIR=/opt/conda -ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 -ENV PATH=${CONDA_DIR}/bin:${PATH} -ENV PYTHONDONTWRITEBYTECODE=true ENV PROC_HOME=/srg +ENV PYTHONDONTWRITEBYTECODE=true ENV MYHOME=/home/conda ENV DEBIAN_FRONTEND=noninteractive -# Conda setup -RUN apt-get update && apt-get install --no-install-recommends --yes wget bzip2 ca-certificates git > /dev/null && \ - wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ - /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ - rm /tmp/miniforge.sh && \ - conda clean --tarballs --index-cache --packages --yes && \ - find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ - find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ - conda clean --force-pkgs-dirs --all --yes && \ - echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ - echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc - -RUN apt-get install -y --no-install-recommends unzip vim curl gfortran && \ +RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl gfortran && \ apt-get clean && rm -rf /var/lib/apt/lists/* RUN groupadd -g "${CONDA_GID}" --system conda && \ diff --git a/Dockerfile.cpu b/Dockerfile.gpu similarity index 61% rename from Dockerfile.cpu rename to Dockerfile.gpu index 08efa49..e4e579a 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.gpu @@ -1,7 +1,12 @@ -FROM condaforge/mambaforge:latest as builder +FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 as builder -ENV USEGPU=false +# FIXME: should be able to find this dynamically +ARG GPU_ARCH=89 + +# GPU_ARCH and USEGPU environment variable used by build_proc.sh ENV FFTW_LIB=/usr/lib/x86_64-linux-gnu/libfftw3f.a +ENV GPU_ARCH=${GPU_ARCH} +ENV USEGPU=true ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl git build-essential gfortran libfftw3-dev && \ @@ -12,12 +17,12 @@ COPY . /hyp3-srg/ COPY ./scripts/build_proc.sh ./srg RUN cd /srg && ./build_proc.sh && cd / -FROM condaforge/mambaforge:latest as runner +FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 as runner # For opencontainers label definitions, see: # https://github.com/opencontainers/image-spec/blob/master/annotations.md LABEL org.opencontainers.image.title="HyP3 SRG" -LABEL org.opencontainers.image.description="HyP3 plugin for Stanford Radar Group SAR processing" +LABEL org.opencontainers.image.description="HyP3 plugin for Stanford Radar Group Processor SAR processing" LABEL org.opencontainers.image.vendor="Alaska Satellite Facility" LABEL org.opencontainers.image.authors="ASF Tools Team " LABEL org.opencontainers.image.licenses="BSD-3-Clause" @@ -27,13 +32,30 @@ LABEL org.opencontainers.image.documentation="https://hyp3-docs.asf.alaska.edu" ARG CONDA_UID=1000 ARG CONDA_GID=1000 +ARG MINIFORGE_NAME=Miniforge3 +ARG MINIFORGE_VERSION=24.3.0-0 -ENV PROC_HOME=/srg +ENV CONDA_DIR=/opt/conda +ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 +ENV PATH=${CONDA_DIR}/bin:${PATH} ENV PYTHONDONTWRITEBYTECODE=true +ENV PROC_HOME=/srg ENV MYHOME=/home/conda ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y --no-install-recommends unzip vim curl gfortran && \ +# Conda setup +RUN apt-get update && apt-get install --no-install-recommends --yes wget bzip2 ca-certificates git > /dev/null && \ + wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \ + /bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \ + rm /tmp/miniforge.sh && \ + conda clean --tarballs --index-cache --packages --yes && \ + find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \ + find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \ + conda clean --force-pkgs-dirs --all --yes && \ + echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \ + echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc + +RUN apt-get install -y --no-install-recommends unzip vim curl gfortran && \ apt-get clean && rm -rf /var/lib/apt/lists/* RUN groupadd -g "${CONDA_GID}" --system conda && \ From 36a0f05f20ef2adefdde129d7978f4d1bb56a772 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 15 Oct 2024 10:13:44 -0800 Subject: [PATCH 28/40] pin docker action to latest release --- .github/workflows/test-and-build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-and-build.yml b/.github/workflows/test-and-build.yml index 8f6e275..c2a4467 100644 --- a/.github/workflows/test-and-build.yml +++ b/.github/workflows/test-and-build.yml @@ -28,7 +28,7 @@ jobs: call-docker-ghcr-workflow-cpu: needs: call-version-info-workflow # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@develop # TODO: pin to latest release + uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@0.12.0 with: version_tag: ${{ needs.call-version-info-workflow.outputs.version_tag }}.cpu release_branch: main @@ -41,7 +41,7 @@ jobs: call-docker-ghcr-workflow-gpu: needs: call-version-info-workflow # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@develop # TODO: pin to latest release + uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@0.12.0 with: version_tag: ${{ needs.call-version-info-workflow.outputs.version_tag }}.gpu release_branch: main From 604dea67b5abf5c8a189efb0e1468a099e0d3670 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 15 Oct 2024 10:32:59 -0800 Subject: [PATCH 29/40] update changelog, revert python pin --- CHANGELOG.md | 10 +++++++++- environment.yml | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51c47cb..db6d762 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,16 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [0.8.1] +### Added +* A `--use-gslc-prefix` option has been added to the `back_projection` and `time_series` workflows: + * This option causes `back_projection` to upload the GSLC outputs to a `GSLC_granules/` subprefix located within the S3 bucket and prefix given by the `--bucket` and `--bucket-prefix` options. + * This option causes `time_series` to download the GSLC inputs from the `GSLC_granules/` subprefix located within the bucket and prefix given by the `--bucket` and `--bucket-prefix` options. + ### Changed -* `time_series` can now search an s3 bucket for GSLCs (using `--gslc-bucket` and `--gslc-bucket-prefix`) if no GSLC granules are explicitly provided +* Releases and test deployments now trigger a Docker build for both a GPU-based image and a CPU-based image. The GPU image tag ends with `.gpu` and the CPU image tag ends with `.cpu`. + +### Fixed +* Fixed the parsing for the `--bounds` option for `time_series`. ## [0.8.0] diff --git a/environment.yml b/environment.yml index febc7a4..7c00306 100644 --- a/environment.yml +++ b/environment.yml @@ -5,7 +5,7 @@ channels: dependencies: # TODO: hyp3lib fails to import `cgi` which has been removed in Python 3.13: https://docs.python.org/3/library/cgi.html # we should decide how to resolve this - - python>=3.10,<3.13 + - python>=3.10 - pip # For packaging, and testing - flake8 From 5ab1d46d9891ff48776cc658d19b3ddba06d080c Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 15 Oct 2024 10:34:07 -0800 Subject: [PATCH 30/40] changelog version --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index db6d762..dcdc315 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.8.1] +## [0.9.0] ### Added * A `--use-gslc-prefix` option has been added to the `back_projection` and `time_series` workflows: From 54f8e6baae517055be39ca329f16a6a00111a91e Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 15 Oct 2024 10:35:02 -0800 Subject: [PATCH 31/40] remove old todo --- environment.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/environment.yml b/environment.yml index 7c00306..24403dc 100644 --- a/environment.yml +++ b/environment.yml @@ -3,8 +3,6 @@ channels: - conda-forge - nodefaults dependencies: - # TODO: hyp3lib fails to import `cgi` which has been removed in Python 3.13: https://docs.python.org/3/library/cgi.html - # we should decide how to resolve this - python>=3.10 - pip # For packaging, and testing From 180f1b5b70985a5e66d09a4b622478c18c644acd Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 15 Oct 2024 10:37:41 -0800 Subject: [PATCH 32/40] changelog tweak --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dcdc315..0de5635 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [0.9.0] ### Added -* A `--use-gslc-prefix` option has been added to the `back_projection` and `time_series` workflows: +* A new `--use-gslc-prefix` option has been added to the `back_projection` and `time_series` workflows: * This option causes `back_projection` to upload the GSLC outputs to a `GSLC_granules/` subprefix located within the S3 bucket and prefix given by the `--bucket` and `--bucket-prefix` options. * This option causes `time_series` to download the GSLC inputs from the `GSLC_granules/` subprefix located within the bucket and prefix given by the `--bucket` and `--bucket-prefix` options. From f4bb3b8e63b22ace306be0586a62c859d5641c56 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 14:52:06 -0400 Subject: [PATCH 33/40] remove references to manually pushing gpu image --- README.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 231e6bd..6b2fb51 100644 --- a/README.md +++ b/README.md @@ -77,21 +77,17 @@ aws ssm get-parameters --names /aws/service/ecs/optimized-ami/amazon-linux-2/gpu ### GPU Docker Container Once you have a compute environment set up as described above, you can build the GPU version of the container by running: ```bash -docker build --build-arg="GPU_ARCH={YOUR_ARCH}" -t ghcr.io/asfhyp3/hyp3-srg:{RELEASE}.gpu -f Dockerfile.gpu . +docker build --build-arg="GPU_ARCH={YOUR_ARCH}" -t srg -f Dockerfile.gpu . ``` You can get the value of `COMPUTE_CAPABILITY_VERSION` by running `nvidia-smi` on the instance to obtain GPU type, then cross-reference this information with NVIDIA's [GPU type compute capability list](https://developer.nvidia.com/cuda-gpus). For a g6.2xlarge instance, this would be: ```bash -docker --build-arg="GPU_ARCH=89" -t ghcr.io/asfhyp3/hyp3-srg:{RELEASE}.gpu -f Dockerfile.gpu . +docker --build-arg="GPU_ARCH=89" -t srg -f Dockerfile.gpu . ``` The compute capability version will always be the same for a given instance type, so you will only need to look this up once per instance type. The default value for this argument is `89` - the correct value for g6.2xlarge instances. **THE COMPUTE CAPABILITY VERSION MUST MATCH ON BOTH THE BUILDING AND RUNNING MACHINE!** -The value of `RELEASE` can be obtained from the git tags. - -You can push a manual container to HyP3-SRG's container repository by following [this guide](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry#pushing-container-images). - ### EC2 Setup > [!CAUTION] > Running the docker container on an Amazon Linux 2023 Deep Learning AMI runs, but will result in all zero outputs. Work is ongoing to determine what is causing this issue. For now, we recommend using option 2.3. @@ -104,5 +100,5 @@ When running on an EC2 instance, the following setup is recommended: 3. Use the latest AWS ECS-optimized GPU AMI (`aws ssm get-parameters --names /aws/service/ecs/optimized-ami/amazon-linux-2/gpu/recommended --region us-west-2`) 3. Build the GPU docker container with the correct compute capability version (see section above). To determine this value, run `nvidia-smi` on the instance to obtain GPU type, then cross-referencke this information with NVIDIA's [GPU type compute capability list](https://developer.nvidia.com/cuda-gpus). For a g6.2xlarge instance, this would be: ```bash -docker --build-arg="GPU_ARCH=89" -t ghcr.io/asfhyp3/hyp3-srg:{RELEASE}.gpu -f Dockerfile.gpu . +docker --build-arg="GPU_ARCH=89" -t srg -f Dockerfile.gpu . ``` From aa2a42739c48ab35b6a42123120d06aa12c70646 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 15:18:40 -0400 Subject: [PATCH 34/40] removed unnecessary checks --- src/hyp3_srg/time_series.py | 3 +-- tests/test_time_series.py | 6 ------ 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index 27b0ba2..d9759e6 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -31,11 +31,10 @@ def get_gslc_uris_from_s3(bucket: str, prefix: str = '') -> list[str]: Returns: uris: a list of uris to the zip files """ - bucket = bucket.replace('s3:', '').replace('/', '') res = utils.s3_list_objects(bucket, prefix) def is_valid_key(key): - return (key.endswith('.zip') or key.endswith('.geo')) and re.search('S1[AB]_IW_RAW', key.split('/')[-1]) + return key.endswith('.zip') and re.search('S1[AB]_IW_RAW', key.split('/')[-1]) keys = [item['Key'] for item in res['Contents'] if is_valid_key(item['Key'])] uris = ['/'.join(['s3://' + bucket, key]) for key in keys] diff --git a/tests/test_time_series.py b/tests/test_time_series.py index 75302b0..f862b04 100644 --- a/tests/test_time_series.py +++ b/tests/test_time_series.py @@ -58,15 +58,11 @@ def test_get_gslc_uris_from_s3(monkeypatch): { 'Key': f'{prefix}/S1A_IW_RAW_foo.bad_extension' }, - { - 'Key': f'{prefix}/S1B_IW_RAW_bar.geo' - } ] } correct_uris = [ f's3://{bucket}/{prefix}/S1A_IW_RAW_foo.zip', - f's3://{bucket}/{prefix}/S1B_IW_RAW_bar.geo' ] with monkeypatch.context() as m: @@ -75,5 +71,3 @@ def test_get_gslc_uris_from_s3(monkeypatch): uris = time_series.get_gslc_uris_from_s3(bucket, prefix) assert uris == correct_uris - uris = time_series.get_gslc_uris_from_s3(f's3://{bucket}/', prefix) - assert uris == correct_uris From 4a656d5d71f8d0806835fb67416e69094396faea Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 15:22:07 -0400 Subject: [PATCH 35/40] move s3 to global scope --- src/hyp3_srg/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyp3_srg/utils.py b/src/hyp3_srg/utils.py index 5f3ebcc..f879231 100644 --- a/src/hyp3_srg/utils.py +++ b/src/hyp3_srg/utils.py @@ -13,6 +13,7 @@ from shapely.geometry import Polygon, shape +S3 = client('s3') log = logging.getLogger(__name__) EARTHDATA_HOST = 'urs.earthdata.nasa.gov' @@ -275,7 +276,6 @@ def download_from_s3(uri: str, dest_dir: Optional[Path] = None) -> None: uri: URI of the file to download dest_dir: the directory to place the downloaded file in """ - S3 = client('s3') bucket, key, out_path = get_s3_args(uri, dest_dir) S3.download_file(bucket, key, out_path) return out_path From 27af46a03a8151f08565ab9d8a495f47f2ee1135 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 15:48:23 -0400 Subject: [PATCH 36/40] simplified search s3 function and removed test --- src/hyp3_srg/time_series.py | 6 +----- tests/test_time_series.py | 30 ------------------------------ 2 files changed, 1 insertion(+), 35 deletions(-) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index d9759e6..6fd4280 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -32,11 +32,7 @@ def get_gslc_uris_from_s3(bucket: str, prefix: str = '') -> list[str]: uris: a list of uris to the zip files """ res = utils.s3_list_objects(bucket, prefix) - - def is_valid_key(key): - return key.endswith('.zip') and re.search('S1[AB]_IW_RAW', key.split('/')[-1]) - - keys = [item['Key'] for item in res['Contents'] if is_valid_key(item['Key'])] + keys = [item['Key'] for item in res['Contents']] uris = ['/'.join(['s3://' + bucket, key]) for key in keys] return uris diff --git a/tests/test_time_series.py b/tests/test_time_series.py index f862b04..2c9bc7e 100644 --- a/tests/test_time_series.py +++ b/tests/test_time_series.py @@ -41,33 +41,3 @@ def test_get_size_from_dem(tmp_path): rsc_file.write(rsc_content.strip()) dem_width, dem_height = time_series.get_size_from_dem(dem_path=rsc_path) assert dem_width, dem_height == (1235, 873) - - -def test_get_gslc_uris_from_s3(monkeypatch): - bucket = 'bucket' - prefix = 'prefix' - - mock_response = { - 'Contents': [ - { - 'Key': f'{prefix}/S1A_IW_RAW_foo.zip' - }, - { - 'Key': f'{prefix}/prefibad_key.zip' - }, - { - 'Key': f'{prefix}/S1A_IW_RAW_foo.bad_extension' - }, - ] - } - - correct_uris = [ - f's3://{bucket}/{prefix}/S1A_IW_RAW_foo.zip', - ] - - with monkeypatch.context() as m: - mock_s3_list_objects = mock.Mock(return_value=mock_response) - m.setattr(utils, 's3_list_objects', mock_s3_list_objects) - - uris = time_series.get_gslc_uris_from_s3(bucket, prefix) - assert uris == correct_uris From 62a4f82c3e30d2010be17362cdcebaf9b1d9ecb0 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 15 Oct 2024 15:54:37 -0400 Subject: [PATCH 37/40] Update src/hyp3_srg/utils.py Co-authored-by: Jake Herrmann --- src/hyp3_srg/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/hyp3_srg/utils.py b/src/hyp3_srg/utils.py index f879231..eb976f9 100644 --- a/src/hyp3_srg/utils.py +++ b/src/hyp3_srg/utils.py @@ -263,7 +263,6 @@ def s3_list_objects(bucket: str, prefix: str = '') -> dict: Returns: res: dictionary containing the response """ - S3 = client('s3') bucket = bucket.replace('s3:', '').replace('/', '') res = S3.list_objects(Bucket=bucket, Prefix=prefix) return res From c2eed4124a0f24c8deefbca65b3f521720e0d8ef Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 15 Oct 2024 14:10:33 -0800 Subject: [PATCH 38/40] imports --- src/hyp3_srg/time_series.py | 1 - tests/test_time_series.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/hyp3_srg/time_series.py b/src/hyp3_srg/time_series.py index 6fd4280..79526da 100644 --- a/src/hyp3_srg/time_series.py +++ b/src/hyp3_srg/time_series.py @@ -4,7 +4,6 @@ import argparse import logging -import re import shutil from os import mkdir from pathlib import Path diff --git a/tests/test_time_series.py b/tests/test_time_series.py index 2c9bc7e..bd0e519 100644 --- a/tests/test_time_series.py +++ b/tests/test_time_series.py @@ -1,6 +1,4 @@ -from unittest import mock - -from hyp3_srg import time_series, utils +from hyp3_srg import time_series def test_create_time_series_product_name(): From 46dd802e71e04bcaf2632448adaaec2eb075ee23 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 15 Oct 2024 14:14:38 -0800 Subject: [PATCH 39/40] fix actions tags --- .github/workflows/test-and-build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-and-build.yml b/.github/workflows/test-and-build.yml index c2a4467..83c823f 100644 --- a/.github/workflows/test-and-build.yml +++ b/.github/workflows/test-and-build.yml @@ -28,7 +28,7 @@ jobs: call-docker-ghcr-workflow-cpu: needs: call-version-info-workflow # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@0.12.0 + uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@v0.12.0 with: version_tag: ${{ needs.call-version-info-workflow.outputs.version_tag }}.cpu release_branch: main @@ -41,7 +41,7 @@ jobs: call-docker-ghcr-workflow-gpu: needs: call-version-info-workflow # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@0.12.0 + uses: ASFHyP3/actions/.github/workflows/reusable-docker-ghcr.yml@v0.12.0 with: version_tag: ${{ needs.call-version-info-workflow.outputs.version_tag }}.gpu release_branch: main From 7e82cc70384d7b9987de42862430a24374e68a82 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Tue, 15 Oct 2024 14:16:56 -0800 Subject: [PATCH 40/40] python 3.13 --- .github/workflows/test-and-build.yml | 2 +- pyproject.toml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-and-build.yml b/.github/workflows/test-and-build.yml index 83c823f..0226065 100644 --- a/.github/workflows/test-and-build.yml +++ b/.github/workflows/test-and-build.yml @@ -17,7 +17,7 @@ jobs: with: local_package_name: hyp3_srg python_versions: >- - ["3.10", "3.11", "3.12"] + ["3.10", "3.11", "3.12", "3.13"] call-version-info-workflow: # Docs: https://github.com/ASFHyP3/actions diff --git a/pyproject.toml b/pyproject.toml index 6404d46..5b222ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,8 @@ classifiers=[ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] dependencies = [ "hyp3lib>=3,<4",