From d8fe719f31b2d224c1f1b0346104fe6823d7928d Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Wed, 15 May 2024 13:56:45 -0800 Subject: [PATCH 01/58] Update tag-version.yml --- .github/workflows/tag-version.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/tag-version.yml b/.github/workflows/tag-version.yml index f7097006..5c3c528f 100644 --- a/.github/workflows/tag-version.yml +++ b/.github/workflows/tag-version.yml @@ -7,7 +7,6 @@ on: jobs: call-bump-version-workflow: - needs: deploy uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.11.0 secrets: USER_TOKEN: ${{ secrets.TOOLS_BOT_PAK }} From f02552e59a3bb7ba050ed41f1d47dca3a0ed7dd4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 May 2024 18:39:30 +0000 Subject: [PATCH 02/58] --- updated-dependencies: - dependency-name: hyp3-sdk dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- requirements-its_live_monitoring.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-its_live_monitoring.txt b/requirements-its_live_monitoring.txt index 4fd8a39e..c3b13a70 100644 --- a/requirements-its_live_monitoring.txt +++ b/requirements-its_live_monitoring.txt @@ -1,5 +1,5 @@ geopandas==0.14.4 -hyp3-sdk==6.1.0 +hyp3-sdk==6.2.0 pandas==2.2.2 pystac-client==0.7.7 requests==2.31.0 From 4d4c48ec620231b93ceec2d5abea5d905177d748 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Thu, 23 May 2024 17:14:41 -0800 Subject: [PATCH 03/58] add a function to get the data coverage from https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l1c/items/, and modify qualifies_for_sentinel2_processing function --- its_live_monitoring/src/sentinel2.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index a3a220ec..a9923f30 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -11,6 +11,7 @@ import pystac import pystac_client import requests +import json from shapely.geometry import shape from constants import MAX_CLOUD_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS @@ -45,6 +46,16 @@ def get_sentinel2_stac_item(scene: str) -> pystac.Item: # noqa: D103 return item +def get_data_coverage(item: pystac.Item) -> float: + # MMM_MSIXXX_YYYYMMDDHHMMSS_Nxxyy_ROOO_Txxxxx_.SAFE + MMM, MSIXXX, YYYYMMDDHHMMSS, Nxxyy, ROOO, Txxxxx, prod_discriminator = item.id.split('_') + id = f'{MMM}_{Txxxxx[1:]}_{YYYYMMDDHHMMSS[:8]}_0_L1C' + URL = f'https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l1c/items/{id}' + response = requests.get(URL) + response_data = json.loads(response.text) + return response_data['properties']['sentinel:data_coverage'] + + def qualifies_for_sentinel2_processing( item: pystac.item.Item, max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, log_level: int = logging.DEBUG ) -> bool: @@ -93,6 +104,9 @@ def qualifies_for_sentinel2_processing( log.log(log_level, f'{item.id} disqualifies for processing because it has too much cloud cover') return False + if get_data_coverage(item) <= 0.7: + log.log(log_level, f'{item.id} disqualifies for processing because its data coverage is too small') + log.log(log_level, f'{item.id} qualifies for processing') return True From 5294861f51dbe193ffe291189830255dd14de049 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 24 May 2024 11:49:37 -0800 Subject: [PATCH 04/58] add get_data_coverage function and modify qualifies_for_sentinel2_processing function --- its_live_monitoring/src/constants.py | 1 + its_live_monitoring/src/sentinel2.py | 22 +++++++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/its_live_monitoring/src/constants.py b/its_live_monitoring/src/constants.py index 2ab82c03..efbf6ff6 100644 --- a/its_live_monitoring/src/constants.py +++ b/its_live_monitoring/src/constants.py @@ -2,3 +2,4 @@ MAX_PAIR_SEPARATION_IN_DAYS = 544 MAX_CLOUD_COVER_PERCENT = 60 +MAX_DATA_COVER_PERCENTAGE = 70 diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index a9923f30..e5716e1b 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -1,6 +1,5 @@ """Functions to support Sentinel-2 processing.""" -import json import logging import os from datetime import timedelta @@ -14,7 +13,7 @@ import json from shapely.geometry import shape -from constants import MAX_CLOUD_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS +from constants import MAX_CLOUD_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS, MAX_DATA_COVER_PERCENTAGE SENTINEL2_CATALOG_API = 'https://catalogue.dataspace.copernicus.eu/stac' @@ -52,8 +51,20 @@ def get_data_coverage(item: pystac.Item) -> float: id = f'{MMM}_{Txxxxx[1:]}_{YYYYMMDDHHMMSS[:8]}_0_L1C' URL = f'https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l1c/items/{id}' response = requests.get(URL) - response_data = json.loads(response.text) - return response_data['properties']['sentinel:data_coverage'] + if response.status_code != 200: + yyyy, mm, dd = YYYYMMDDHHMMSS[:4], YYYYMMDDHHMMSS[4:6], YYYYMMDDHHMMSS[6:8] + # https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/50/R/LR/2021/6/8/0/tileInfo.json + substr = f'{Txxxxx[1:3]}/{Txxxxx[3]}/{Txxxxx[4:6]}/{yyyy}/{str(int(mm))}/{str(int(dd))}/0' + URL2 = f'https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/{substr}/tileInfo.json' + response = requests.get(URL2) + if response.status_code != 200: + return None + else: + dic = response.json() + return dic['dataCoveragePercentage'] + else: + response_data = json.loads(response.text) + return response_data['properties']['sentinel:data_coverage'] def qualifies_for_sentinel2_processing( @@ -104,8 +115,9 @@ def qualifies_for_sentinel2_processing( log.log(log_level, f'{item.id} disqualifies for processing because it has too much cloud cover') return False - if get_data_coverage(item) <= 0.7: + if get_data_coverage(item) and get_data_coverage(item) <= MAX_DATA_COVER_PERCENTAGE: log.log(log_level, f'{item.id} disqualifies for processing because its data coverage is too small') + return False log.log(log_level, f'{item.id} qualifies for processing') return True From 98dee8da2cdd1c08ffb603135f37dcbb1bfee1e3 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 24 May 2024 12:12:53 -0800 Subject: [PATCH 05/58] modify style --- its_live_monitoring/src/sentinel2.py | 29 +++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index e5716e1b..1944ed5b 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -1,5 +1,4 @@ """Functions to support Sentinel-2 processing.""" - import logging import os from datetime import timedelta @@ -46,17 +45,25 @@ def get_sentinel2_stac_item(scene: str) -> pystac.Item: # noqa: D103 def get_data_coverage(item: pystac.Item) -> float: - # MMM_MSIXXX_YYYYMMDDHHMMSS_Nxxyy_ROOO_Txxxxx_.SAFE - MMM, MSIXXX, YYYYMMDDHHMMSS, Nxxyy, ROOO, Txxxxx, prod_discriminator = item.id.split('_') - id = f'{MMM}_{Txxxxx[1:]}_{YYYYMMDDHHMMSS[:8]}_0_L1C' - URL = f'https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l1c/items/{id}' - response = requests.get(URL) + """Get the data cover percentage of the scene + + Args: + item: STAC item of the desired Sentinel-2 scene + + Returns: + data cover percentage of the scene + """ + mmm, msixxx, yyyymmddhhmmss, nxxyy, rooo, txxxxx, prod_discriminator = item.id.split('_') + id = f'{mmm}_{txxxxx[1:]}_{yyyymmddhhmmss[:8]}_0_L1C' + url = f'https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l1c/items/{id}' + + response = requests.get(url) if response.status_code != 200: - yyyy, mm, dd = YYYYMMDDHHMMSS[:4], YYYYMMDDHHMMSS[4:6], YYYYMMDDHHMMSS[6:8] - # https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/50/R/LR/2021/6/8/0/tileInfo.json - substr = f'{Txxxxx[1:3]}/{Txxxxx[3]}/{Txxxxx[4:6]}/{yyyy}/{str(int(mm))}/{str(int(dd))}/0' - URL2 = f'https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/{substr}/tileInfo.json' - response = requests.get(URL2) + yyyy, mm, dd = yyyymmddhhmmss[:4], yyyymmddhhmmss[4:6], yyyymmddhhmmss[6:8] + substr = f'{txxxxx[1:3]}/{txxxxx[3]}/{txxxxx[4:6]}/{yyyy}/{str(int(mm))}/{str(int(dd))}/0' + url2 = f'https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/{substr}/tileInfo.json' + + response = requests.get(url2) if response.status_code != 200: return None else: From 09dc7c9aa9a8027ca5633866808ffd80867367bb Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 24 May 2024 12:15:35 -0800 Subject: [PATCH 06/58] modify style --- its_live_monitoring/src/sentinel2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 1944ed5b..eeb63569 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -45,13 +45,13 @@ def get_sentinel2_stac_item(scene: str) -> pystac.Item: # noqa: D103 def get_data_coverage(item: pystac.Item) -> float: - """Get the data cover percentage of the scene + """Get the data cover percentage of the scene. Args: - item: STAC item of the desired Sentinel-2 scene + item: STAC item of the desired Sentinel-2 scene. Returns: - data cover percentage of the scene + data cover percentage of the scene. """ mmm, msixxx, yyyymmddhhmmss, nxxyy, rooo, txxxxx, prod_discriminator = item.id.split('_') id = f'{mmm}_{txxxxx[1:]}_{yyyymmddhhmmss[:8]}_0_L1C' From 527a10220c9508668c0f11a9584e904772fb12ae Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 24 May 2024 12:27:58 -0800 Subject: [PATCH 07/58] modify style --- its_live_monitoring/src/sentinel2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index eeb63569..85a5665d 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -1,4 +1,6 @@ """Functions to support Sentinel-2 processing.""" + +import json import logging import os from datetime import timedelta @@ -9,10 +11,9 @@ import pystac import pystac_client import requests -import json from shapely.geometry import shape -from constants import MAX_CLOUD_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS, MAX_DATA_COVER_PERCENTAGE +from constants import MAX_CLOUD_COVER_PERCENT, MAX_DATA_COVER_PERCENTAGE, MAX_PAIR_SEPARATION_IN_DAYS SENTINEL2_CATALOG_API = 'https://catalogue.dataspace.copernicus.eu/stac' From 2bede0e2de881af6c2e5757dd9c2b41c5e488a40 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 24 May 2024 12:31:41 -0800 Subject: [PATCH 08/58] update CHANGELOG.md --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61befe78..6eb0c351 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.2] + +### Added +- data_coverage > Max_DATA_PERCENTAGE condition in the qualifies_for_sentinel2_processing function. + ## [0.5.1] ### Fixed From ab8c969c93327935569002676f5cfa7c4ed17b8c Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 24 May 2024 16:18:35 -0800 Subject: [PATCH 09/58] modify get_data_coverage function --- its_live_monitoring/src/constants.py | 2 +- its_live_monitoring/src/main.py | 26 ++++++++++------ its_live_monitoring/src/sentinel2.py | 44 ++++++++++------------------ 3 files changed, 33 insertions(+), 39 deletions(-) diff --git a/its_live_monitoring/src/constants.py b/its_live_monitoring/src/constants.py index efbf6ff6..71261931 100644 --- a/its_live_monitoring/src/constants.py +++ b/its_live_monitoring/src/constants.py @@ -2,4 +2,4 @@ MAX_PAIR_SEPARATION_IN_DAYS = 544 MAX_CLOUD_COVER_PERCENT = 60 -MAX_DATA_COVER_PERCENTAGE = 70 +MIN_DATA_COVER_PERCENT = 70 diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index a3eba6e0..d6fe5add 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -6,12 +6,13 @@ import os import sys from datetime import timedelta +from typing import Optional import geopandas as gpd import hyp3_sdk as sdk import pandas as pd -from constants import MAX_CLOUD_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS +from constants import MAX_CLOUD_COVER_PERCENT, MIN_DATA_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS from landsat import ( get_landsat_pairs_for_reference_scene, get_landsat_stac_item, @@ -86,12 +87,12 @@ def submit_pairs_for_processing(pairs: gpd.GeoDataFrame) -> sdk.Batch: # noqa: return jobs -def process_scene( - scene: str, - max_pair_separation: timedelta = timedelta(days=MAX_PAIR_SEPARATION_IN_DAYS), - max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, - submit: bool = True, -) -> sdk.Batch: +def process_scene(scene: str, + max_pair_separation: timedelta = timedelta(days=MAX_PAIR_SEPARATION_IN_DAYS), + max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, + min_data_cover: int = MIN_DATA_COVER_PERCENT, + s2_tile_path: str = None, + submit: bool = True,) -> sdk.Batch: """Trigger Landsat processing for a scene. Args: @@ -99,6 +100,8 @@ def process_scene( max_pair_separation: How many days back from a reference scene's acquisition date to search for secondary scenes. max_cloud_cover: The maximum percent a Landsat scene can be covered by clouds. + min_data_cover: The minimum percent of data coverage of the sentinel-2 scene + s2_tile_path: The path for the tile of the sentinel2 scene. submit: Submit pairs to HyP3 for processing. Returns: @@ -107,7 +110,7 @@ def process_scene( pairs = None if scene.startswith('S2'): reference = get_sentinel2_stac_item(f'{scene}.SAFE') - if qualifies_for_sentinel2_processing(reference, max_cloud_cover, logging.INFO): + if qualifies_for_sentinel2_processing(reference, s2_tile_path, max_cloud_cover, min_data_cover, logging.INFO): # hyp3-its-live will pull scenes from Google Cloud; ensure the new scene is there before processing # Note: Time between attempts is controlled by they SQS VisibilityTimout _ = raise_for_missing_in_google_cloud(scene) @@ -158,7 +161,12 @@ def lambda_handler(event: dict, context: object) -> dict: body = json.loads(record['body']) message = json.loads(body['Message']) product_id = 'landsat_product_id' if 'landsat_product_id' in message.keys() else 'name' - _ = process_scene(message[product_id]) + + s2_tile_path = None + if message[product_id].startswith('S2'): + s2_tile_path = message['tiles'][0]['path'] + + _ = process_scene(message[product_id], s2_tile_path=s2_tile_path) except Exception: log.exception(f'Could not process message {record["messageId"]}') batch_item_failures.append({'itemIdentifier': record['messageId']}) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 85a5665d..001b5be2 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -13,7 +13,7 @@ import requests from shapely.geometry import shape -from constants import MAX_CLOUD_COVER_PERCENT, MAX_DATA_COVER_PERCENTAGE, MAX_PAIR_SEPARATION_IN_DAYS +from constants import MAX_CLOUD_COVER_PERCENT, MIN_DATA_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS SENTINEL2_CATALOG_API = 'https://catalogue.dataspace.copernicus.eu/stac' @@ -45,44 +45,29 @@ def get_sentinel2_stac_item(scene: str) -> pystac.Item: # noqa: D103 return item -def get_data_coverage(item: pystac.Item) -> float: - """Get the data cover percentage of the scene. - - Args: - item: STAC item of the desired Sentinel-2 scene. - - Returns: - data cover percentage of the scene. - """ - mmm, msixxx, yyyymmddhhmmss, nxxyy, rooo, txxxxx, prod_discriminator = item.id.split('_') - id = f'{mmm}_{txxxxx[1:]}_{yyyymmddhhmmss[:8]}_0_L1C' - url = f'https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l1c/items/{id}' +def _get_data_coverage(s2_tile_path: str) -> float: + url = f'https://roda.sentinel-hub.com/sentinel-s2-l1c/{s2_tile_path}/tileInfo.json' response = requests.get(url) if response.status_code != 200: - yyyy, mm, dd = yyyymmddhhmmss[:4], yyyymmddhhmmss[4:6], yyyymmddhhmmss[6:8] - substr = f'{txxxxx[1:3]}/{txxxxx[3]}/{txxxxx[4:6]}/{yyyy}/{str(int(mm))}/{str(int(dd))}/0' - url2 = f'https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/{substr}/tileInfo.json' - - response = requests.get(url2) - if response.status_code != 200: - return None - else: - dic = response.json() - return dic['dataCoveragePercentage'] + return None else: - response_data = json.loads(response.text) - return response_data['properties']['sentinel:data_coverage'] + res_dic = response.json() + return res_dic['dataCoveragePercentage'] def qualifies_for_sentinel2_processing( - item: pystac.item.Item, max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, log_level: int = logging.DEBUG -) -> bool: + item: pystac.item.Item, s2_tile_path: str, + max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, + min_data_cover: int = MIN_DATA_COVER_PERCENT, + log_level: int = logging.DEBUG) -> bool: """Determines whether a scene is a valid Sentinel-2 product for processing. Args: - item: STAC item of the desired Sentinel-2 scene + item: STAC item of the desired Sentinel-2 scene. + s2_tile_path: The path of the tile of sentinel-2 scene. max_cloud_cover: The maximum allowable percentage of cloud cover. + min_data_cover: The minimum allowable percentage of data cover. log_level: The logging level Returns: @@ -123,7 +108,8 @@ def qualifies_for_sentinel2_processing( log.log(log_level, f'{item.id} disqualifies for processing because it has too much cloud cover') return False - if get_data_coverage(item) and get_data_coverage(item) <= MAX_DATA_COVER_PERCENTAGE: + data_cover_percentage = _get_data_coverage(s2_tile_path) + if data_cover_percentage is None or data_cover_percentage <= min_data_cover: log.log(log_level, f'{item.id} disqualifies for processing because its data coverage is too small') return False From f18cbb4822d2593c20d504da96456458f37d003e Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 24 May 2024 16:21:46 -0800 Subject: [PATCH 10/58] ruff check --fix to fix errors --- its_live_monitoring/src/main.py | 3 +-- its_live_monitoring/src/sentinel2.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index d6fe5add..c6eaa577 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -6,13 +6,12 @@ import os import sys from datetime import timedelta -from typing import Optional import geopandas as gpd import hyp3_sdk as sdk import pandas as pd -from constants import MAX_CLOUD_COVER_PERCENT, MIN_DATA_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS +from constants import MAX_CLOUD_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS, MIN_DATA_COVER_PERCENT from landsat import ( get_landsat_pairs_for_reference_scene, get_landsat_stac_item, diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 001b5be2..f603e027 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -13,7 +13,7 @@ import requests from shapely.geometry import shape -from constants import MAX_CLOUD_COVER_PERCENT, MIN_DATA_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS +from constants import MAX_CLOUD_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS, MIN_DATA_COVER_PERCENT SENTINEL2_CATALOG_API = 'https://catalogue.dataspace.copernicus.eu/stac' From 276e047377b64bd5b1c3a0314842da63b2fb1e70 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 24 May 2024 16:28:47 -0800 Subject: [PATCH 11/58] modify style --- its_live_monitoring/src/main.py | 14 ++++++++------ its_live_monitoring/src/sentinel2.py | 6 ++++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index c6eaa577..dd259d11 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -86,12 +86,14 @@ def submit_pairs_for_processing(pairs: gpd.GeoDataFrame) -> sdk.Batch: # noqa: return jobs -def process_scene(scene: str, - max_pair_separation: timedelta = timedelta(days=MAX_PAIR_SEPARATION_IN_DAYS), - max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, - min_data_cover: int = MIN_DATA_COVER_PERCENT, - s2_tile_path: str = None, - submit: bool = True,) -> sdk.Batch: +def process_scene( + scene: str, + max_pair_separation: timedelta = timedelta(days=MAX_PAIR_SEPARATION_IN_DAYS), + max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, + min_data_cover: int = MIN_DATA_COVER_PERCENT, + s2_tile_path: str = None, + submit: bool = True,)\ + -> sdk.Batch: """Trigger Landsat processing for a scene. Args: diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index f603e027..f3c7f654 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -57,10 +57,12 @@ def _get_data_coverage(s2_tile_path: str) -> float: def qualifies_for_sentinel2_processing( - item: pystac.item.Item, s2_tile_path: str, + item: pystac.item.Item, + s2_tile_path: str, max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, min_data_cover: int = MIN_DATA_COVER_PERCENT, - log_level: int = logging.DEBUG) -> bool: + log_level: int = logging.DEBUG)\ + -> bool: """Determines whether a scene is a valid Sentinel-2 product for processing. Args: From 33f31822a3b70eecefa6c8734aa99c3ab5a94d4d Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 24 May 2024 16:34:22 -0800 Subject: [PATCH 12/58] modify style --- its_live_monitoring/src/main.py | 14 +++++++------- its_live_monitoring/src/sentinel2.py | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index dd259d11..e9355574 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -87,13 +87,13 @@ def submit_pairs_for_processing(pairs: gpd.GeoDataFrame) -> sdk.Batch: # noqa: def process_scene( - scene: str, - max_pair_separation: timedelta = timedelta(days=MAX_PAIR_SEPARATION_IN_DAYS), - max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, - min_data_cover: int = MIN_DATA_COVER_PERCENT, - s2_tile_path: str = None, - submit: bool = True,)\ - -> sdk.Batch: + scene: str, + max_pair_separation: timedelta = timedelta(days=MAX_PAIR_SEPARATION_IN_DAYS), + max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, + min_data_cover: int = MIN_DATA_COVER_PERCENT, + s2_tile_path: str = None, + submit: bool = True, +) -> sdk.Batch: """Trigger Landsat processing for a scene. Args: diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index f3c7f654..ce58d4dc 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -57,12 +57,12 @@ def _get_data_coverage(s2_tile_path: str) -> float: def qualifies_for_sentinel2_processing( - item: pystac.item.Item, - s2_tile_path: str, - max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, - min_data_cover: int = MIN_DATA_COVER_PERCENT, - log_level: int = logging.DEBUG)\ - -> bool: + item: pystac.item.Item, + s2_tile_path: str, + max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, + min_data_cover: int = MIN_DATA_COVER_PERCENT, + log_level: int = logging.DEBUG, +) -> bool: """Determines whether a scene is a valid Sentinel-2 product for processing. Args: From c1ed0bd97147d1120e956182f88f6befaef54fe3 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 24 May 2024 16:57:43 -0800 Subject: [PATCH 13/58] modify argument s2_tile_path in qualifies_for_sentinel2_processing function --- its_live_monitoring/src/sentinel2.py | 15 +++++---------- tests/its_live_monitoring/test_sentinel2.py | 2 ++ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index ce58d4dc..2d79c02e 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -56,13 +56,7 @@ def _get_data_coverage(s2_tile_path: str) -> float: return res_dic['dataCoveragePercentage'] -def qualifies_for_sentinel2_processing( - item: pystac.item.Item, - s2_tile_path: str, - max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, - min_data_cover: int = MIN_DATA_COVER_PERCENT, - log_level: int = logging.DEBUG, -) -> bool: +def qualifies_for_sentinel2_processing(item: pystac.item.Item, s2_tile_path: str = None, max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, min_data_cover: int = MIN_DATA_COVER_PERCENT, log_level: int = logging.DEBUG,) -> bool: """Determines whether a scene is a valid Sentinel-2 product for processing. Args: @@ -110,9 +104,10 @@ def qualifies_for_sentinel2_processing( log.log(log_level, f'{item.id} disqualifies for processing because it has too much cloud cover') return False - data_cover_percentage = _get_data_coverage(s2_tile_path) - if data_cover_percentage is None or data_cover_percentage <= min_data_cover: - log.log(log_level, f'{item.id} disqualifies for processing because its data coverage is too small') + if s2_tile_path: + data_cover_percentage = _get_data_coverage(s2_tile_path) + if data_cover_percentage is None or data_cover_percentage <= min_data_cover: + log.log(log_level, f'{item.id} disqualifies for processing because its data coverage is too small') return False log.log(log_level, f'{item.id} qualifies for processing') diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 78bae82f..947cccaf 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -54,9 +54,11 @@ def test_qualifies_for_processing(pystac_item_factory): } collection = 'SENTINEL-2' + good_item = pystac_item_factory( id='XXX_XXXL1C_XXXX_XXXX_XXXX', datetime=datetime.now(), properties=properties, collection=collection ) + # s2_tile_path = f'tiles/19/D/EE/2024/4/30/1' assert sentinel2.qualifies_for_sentinel2_processing(good_item) From c451e30cf0da848d49f1cd5e81d96d223904c61d Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Fri, 24 May 2024 18:32:56 -0800 Subject: [PATCH 14/58] modify test_sentinel2.py --- its_live_monitoring/src/sentinel2.py | 2 +- tests/its_live_monitoring/test_sentinel2.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 2d79c02e..8d1c96e1 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -150,7 +150,7 @@ def get_sentinel2_pairs_for_reference_scene( log.debug(f'{item.id} disqualifies because it is from a different tile than the reference scene') continue - if not qualifies_for_sentinel2_processing(item, max_cloud_cover): + if not qualifies_for_sentinel2_processing(item, max_cloud_cover=max_cloud_cover): continue items.append(item) diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 947cccaf..55908729 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -8,7 +8,6 @@ import sentinel2 - @responses.activate def test_raise_for_missing_in_google_cloud(): existing_scene = 'S2B_MSIL1C_20200315T152259_N0209_R039_T13CES_20200315T181115' From 2f7c6ed055a4b253bddd17b2c8c19ba7c2e1fa0a Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 28 May 2024 08:49:45 -0800 Subject: [PATCH 15/58] modify the code style --- tests/its_live_monitoring/test_sentinel2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 55908729..947cccaf 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -8,6 +8,7 @@ import sentinel2 + @responses.activate def test_raise_for_missing_in_google_cloud(): existing_scene = 'S2B_MSIL1C_20200315T152259_N0209_R039_T13CES_20200315T181115' From 7d5b9defb5d82492fbfd5700080aad18bc0a3e87 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 28 May 2024 08:57:20 -0800 Subject: [PATCH 16/58] modify code style --- its_live_monitoring/src/sentinel2.py | 10 +++++++--- tests/its_live_monitoring/test_sentinel2.py | 1 - 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 8d1c96e1..721334b7 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -52,11 +52,15 @@ def _get_data_coverage(s2_tile_path: str) -> float: if response.status_code != 200: return None else: - res_dic = response.json() - return res_dic['dataCoveragePercentage'] + return response.json() -def qualifies_for_sentinel2_processing(item: pystac.item.Item, s2_tile_path: str = None, max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, min_data_cover: int = MIN_DATA_COVER_PERCENT, log_level: int = logging.DEBUG,) -> bool: +def qualifies_for_sentinel2_processing( + item: pystac.item.Item, s2_tile_path: str = None, + max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, + min_data_cover: int = MIN_DATA_COVER_PERCENT, + log_level: int = logging.DEBUG, +) -> bool: """Determines whether a scene is a valid Sentinel-2 product for processing. Args: diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 947cccaf..ce67a802 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -54,7 +54,6 @@ def test_qualifies_for_processing(pystac_item_factory): } collection = 'SENTINEL-2' - good_item = pystac_item_factory( id='XXX_XXXL1C_XXXX_XXXX_XXXX', datetime=datetime.now(), properties=properties, collection=collection ) From 7f8d7ec7fe0a0101cea5393bfdee1f295a8ae289 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Tue, 28 May 2024 10:05:48 -0800 Subject: [PATCH 17/58] cutover to earth-search v1; adjust s3 cloud cover percent; add min data coverage check --- CHANGELOG.md | 2 +- its_live_monitoring/src/constants.py | 5 -- its_live_monitoring/src/landsat.py | 11 +-- its_live_monitoring/src/main.py | 43 ++--------- its_live_monitoring/src/sentinel2.py | 111 +++++++++++++-------------- 5 files changed, 67 insertions(+), 105 deletions(-) delete mode 100644 its_live_monitoring/src/constants.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 6eb0c351..55b60959 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.5.2] ### Added -- data_coverage > Max_DATA_PERCENTAGE condition in the qualifies_for_sentinel2_processing function. +- Sentinel-2 will now be disqualified from processing if they do not have enough data coverage. ## [0.5.1] diff --git a/its_live_monitoring/src/constants.py b/its_live_monitoring/src/constants.py deleted file mode 100644 index 71261931..00000000 --- a/its_live_monitoring/src/constants.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Constants that are shared between multiple files.""" - -MAX_PAIR_SEPARATION_IN_DAYS = 544 -MAX_CLOUD_COVER_PERCENT = 60 -MIN_DATA_COVER_PERCENT = 70 diff --git a/its_live_monitoring/src/landsat.py b/its_live_monitoring/src/landsat.py index 7d5696cc..7ce804ff 100644 --- a/its_live_monitoring/src/landsat.py +++ b/its_live_monitoring/src/landsat.py @@ -11,8 +11,6 @@ import pystac import pystac_client -from constants import MAX_CLOUD_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS - LANDSAT_CATALOG_API = 'https://landsatlook.usgs.gov/stac-server' LANDSAT_CATALOG = pystac_client.Client.open(LANDSAT_CATALOG_API) @@ -20,6 +18,9 @@ LANDSAT_COLLECTION = LANDSAT_CATALOG.get_collection(LANDSAT_COLLECTION_NAME) LANDSAT_TILES_TO_PROCESS = json.loads((Path(__file__).parent / 'landsat_tiles_to_process.json').read_text()) +LANDSAT_MAX_PAIR_SEPARATION_IN_DAYS = 544 +LANDSAT_MAX_CLOUD_COVER_PERCENT = 60 + log = logging.getLogger('its_live_monitoring') log.setLevel(os.environ.get('LOGGING_LEVEL', 'INFO')) @@ -35,7 +36,7 @@ def get_landsat_stac_item(scene: str) -> pystac.Item: # noqa: D103 def qualifies_for_landsat_processing( - item: pystac.item.Item, max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, log_level: int = logging.DEBUG + item: pystac.item.Item, max_cloud_cover: int = LANDSAT_MAX_CLOUD_COVER_PERCENT, log_level: int = logging.DEBUG ) -> bool: """Determines whether a scene is a valid Landsat product for processing. @@ -77,8 +78,8 @@ def qualifies_for_landsat_processing( def get_landsat_pairs_for_reference_scene( reference: pystac.item.Item, - max_pair_separation: timedelta = timedelta(days=MAX_PAIR_SEPARATION_IN_DAYS), - max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, + max_pair_separation: timedelta = timedelta(days=LANDSAT_MAX_PAIR_SEPARATION_IN_DAYS), + max_cloud_cover: int = LANDSAT_MAX_CLOUD_COVER_PERCENT, ) -> gpd.GeoDataFrame: """Generate potential ITS_LIVE velocity pairs for a given Landsat scene. diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index e9355574..58e66210 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -5,13 +5,11 @@ import logging import os import sys -from datetime import timedelta import geopandas as gpd import hyp3_sdk as sdk import pandas as pd -from constants import MAX_CLOUD_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS, MIN_DATA_COVER_PERCENT from landsat import ( get_landsat_pairs_for_reference_scene, get_landsat_stac_item, @@ -88,21 +86,12 @@ def submit_pairs_for_processing(pairs: gpd.GeoDataFrame) -> sdk.Batch: # noqa: def process_scene( scene: str, - max_pair_separation: timedelta = timedelta(days=MAX_PAIR_SEPARATION_IN_DAYS), - max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, - min_data_cover: int = MIN_DATA_COVER_PERCENT, - s2_tile_path: str = None, submit: bool = True, ) -> sdk.Batch: """Trigger Landsat processing for a scene. Args: scene: Reference Landsat scene name to build pairs for. - max_pair_separation: How many days back from a reference scene's acquisition date to search for secondary - scenes. - max_cloud_cover: The maximum percent a Landsat scene can be covered by clouds. - min_data_cover: The minimum percent of data coverage of the sentinel-2 scene - s2_tile_path: The path for the tile of the sentinel2 scene. submit: Submit pairs to HyP3 for processing. Returns: @@ -110,17 +99,18 @@ def process_scene( """ pairs = None if scene.startswith('S2'): - reference = get_sentinel2_stac_item(f'{scene}.SAFE') - if qualifies_for_sentinel2_processing(reference, s2_tile_path, max_cloud_cover, min_data_cover, logging.INFO): + # Fixme: will throw if wrong collection! + reference = get_sentinel2_stac_item(scene) + if qualifies_for_sentinel2_processing(reference, logging.INFO): # hyp3-its-live will pull scenes from Google Cloud; ensure the new scene is there before processing # Note: Time between attempts is controlled by they SQS VisibilityTimout _ = raise_for_missing_in_google_cloud(scene) - pairs = get_sentinel2_pairs_for_reference_scene(reference, max_pair_separation, max_cloud_cover) + pairs = get_sentinel2_pairs_for_reference_scene(reference) else: reference = get_landsat_stac_item(scene) - if qualifies_for_landsat_processing(reference, max_cloud_cover, logging.INFO): - pairs = get_landsat_pairs_for_reference_scene(reference, max_pair_separation, max_cloud_cover) + if qualifies_for_landsat_processing(reference, logging.INFO): + pairs = get_landsat_pairs_for_reference_scene(reference) if pairs is None: return sdk.Batch() @@ -162,12 +152,7 @@ def lambda_handler(event: dict, context: object) -> dict: body = json.loads(record['body']) message = json.loads(body['Message']) product_id = 'landsat_product_id' if 'landsat_product_id' in message.keys() else 'name' - - s2_tile_path = None - if message[product_id].startswith('S2'): - s2_tile_path = message['tiles'][0]['path'] - - _ = process_scene(message[product_id], s2_tile_path=s2_tile_path) + _ = process_scene(message[product_id]) except Exception: log.exception(f'Could not process message {record["messageId"]}') batch_item_failures.append({'itemIdentifier': record['messageId']}) @@ -178,18 +163,6 @@ def main() -> None: """Command Line wrapper around `process_scene`.""" parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('reference', help='Reference Landsat scene name to build pairs for') - parser.add_argument( - '--max-pair-separation', - type=int, - default=MAX_PAIR_SEPARATION_IN_DAYS, - help="How many days back from a reference scene's acquisition date to search for secondary scenes", - ) - parser.add_argument( - '--max-cloud-cover', - type=int, - default=MAX_CLOUD_COVER_PERCENT, - help='The maximum percent a Landsat scene can be covered by clouds', - ) parser.add_argument('--submit', action='store_true', help='Submit pairs to HyP3 for processing') parser.add_argument('-v', '--verbose', action='store_true', help='Turn on verbose logging') args = parser.parse_args() @@ -199,7 +172,7 @@ def main() -> None: log.setLevel(logging.DEBUG) log.debug(' '.join(sys.argv)) - _ = process_scene(args.reference, timedelta(days=args.max_pair_separation), args.max_cloud_cover, args.submit) + _ = process_scene(args.reference, submit=args.submit) if __name__ == '__main__': diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 721334b7..aa056d97 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -11,17 +11,19 @@ import pystac import pystac_client import requests -from shapely.geometry import shape -from constants import MAX_CLOUD_COVER_PERCENT, MAX_PAIR_SEPARATION_IN_DAYS, MIN_DATA_COVER_PERCENT - -SENTINEL2_CATALOG_API = 'https://catalogue.dataspace.copernicus.eu/stac' +SENTINEL2_CATALOG_API = 'https://earth-search.aws.element84.com/v1/' SENTINEL2_CATALOG = pystac_client.Client.open(SENTINEL2_CATALOG_API) -SENTINEL2_COLLECTION_NAME = 'SENTINEL-2' +SENTINEL2_COLLECTION_NAME = 'sentinel-2-l1c' SENTINEL2_COLLECTION = SENTINEL2_CATALOG.get_collection(SENTINEL2_COLLECTION_NAME) SENTINEL2_TILES_TO_PROCESS = json.loads((Path(__file__).parent / 'sentinel2_tiles_to_process.json').read_text()) +SENTINEL2_MAX_PAIR_SEPARATION_IN_DAYS = 544 +SENTINEL2_MIN_PAIR_SEPARATION_IN_DAYS = 5 +SENTINEL2_MAX_CLOUD_COVER_PERCENT = 70 +SENTINEL2_MIN_DATA_COVERAGE = 70 + log = logging.getLogger('its_live_monitoring') log.setLevel(os.environ.get('LOGGING_LEVEL', 'INFO')) @@ -35,39 +37,43 @@ def raise_for_missing_in_google_cloud(scene_name: str) -> None: # noqa: D103 response.raise_for_status() +def add_data_coverage_to_item(item: pystac.Item) -> pystac.Item: # noqa: D103 + tile_info_path = item.assets['tileinfo_metadata'].href.rstrip('s3://') + + response = requests.get(f'https://roda.sentinel-hub.com/{tile_info_path}') + response.raise_for_status() + + item.properties['s2:data_coverage'] = response.json()['dataCoveragePercentage'] + return item + + def get_sentinel2_stac_item(scene: str) -> pystac.Item: # noqa: D103 - item = SENTINEL2_COLLECTION.get_item(scene) - if item is None: + results = SENTINEL2_CATALOG.search(collections=[SENTINEL2_COLLECTION_NAME], query=[f's2:product_uri={scene}.SAFE']) + + items = [item for page in results.pages() for item in page] + + if (n_items := len(items)) != 1: raise ValueError( - f'Scene {scene} not found in Sentinel-2 STAC collection: ' + f'{n_items} for {scene} found in Sentinel-2 STAC collection: ' f'{SENTINEL2_CATALOG_API}/collections/{SENTINEL2_COLLECTION_NAME}' ) - return item - -def _get_data_coverage(s2_tile_path: str) -> float: - url = f'https://roda.sentinel-hub.com/sentinel-s2-l1c/{s2_tile_path}/tileInfo.json' + item = items[0] + item = add_data_coverage_to_item(item) - response = requests.get(url) - if response.status_code != 200: - return None - else: - return response.json() + return item def qualifies_for_sentinel2_processing( - item: pystac.item.Item, s2_tile_path: str = None, - max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, - min_data_cover: int = MIN_DATA_COVER_PERCENT, - log_level: int = logging.DEBUG, + item: pystac.Item, + max_cloud_cover: int = SENTINEL2_MAX_CLOUD_COVER_PERCENT, + log_level: int = logging.DEBUG, ) -> bool: """Determines whether a scene is a valid Sentinel-2 product for processing. Args: item: STAC item of the desired Sentinel-2 scene. - s2_tile_path: The path of the tile of sentinel-2 scene. max_cloud_cover: The maximum allowable percentage of cloud cover. - min_data_cover: The minimum allowable percentage of data cover. log_level: The logging level Returns: @@ -77,7 +83,7 @@ def qualifies_for_sentinel2_processing( log.log(log_level, f'{item.id} disqualifies for processing because it is from the wrong collection') return False - if item.id.split('_')[3] == 'N0500': + if item.properties['s2:product_uri'].split('_')[3] == 'N0500': # Reprocessing activity: https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-2-msi/copernicus-sentinel-2-collection-1-availability-status # Naming convention: https://sentinels.copernicus.eu/web/sentinel/user-guides/sentinel-2-msi/naming-convention # Processing baselines: https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-2-msi/processing-baseline @@ -88,11 +94,12 @@ def qualifies_for_sentinel2_processing( ) return False - if not item.properties['productType'].endswith('1C'): + product_type = item.properties['s2:product_uri'].split('_')[3] + if not product_type.endswith('L1C'): log.log(log_level, f'{item.id} disqualifies for processing because it is the wrong product type.') return False - if item.properties['instrumentShortName'] != 'MSI': + if not product_type.startswith('MSI'): log.log(log_level, f'{item.id} disqualifies for processing because it was not imaged with the right instrument') return False @@ -108,56 +115,42 @@ def qualifies_for_sentinel2_processing( log.log(log_level, f'{item.id} disqualifies for processing because it has too much cloud cover') return False - if s2_tile_path: - data_cover_percentage = _get_data_coverage(s2_tile_path) - if data_cover_percentage is None or data_cover_percentage <= min_data_cover: - log.log(log_level, f'{item.id} disqualifies for processing because its data coverage is too small') - return False - log.log(log_level, f'{item.id} qualifies for processing') return True def get_sentinel2_pairs_for_reference_scene( - reference: pystac.item.Item, - max_pair_separation: timedelta = timedelta(days=MAX_PAIR_SEPARATION_IN_DAYS), - max_cloud_cover: int = MAX_CLOUD_COVER_PERCENT, + reference: pystac.Item, + max_pair_separation: timedelta = timedelta(days=SENTINEL2_MAX_PAIR_SEPARATION_IN_DAYS), + min_pair_separation: timedelta = timedelta(days=SENTINEL2_MIN_PAIR_SEPARATION_IN_DAYS), + max_cloud_cover: int = SENTINEL2_MAX_CLOUD_COVER_PERCENT, ) -> gpd.GeoDataFrame: """Generate potential ITS_LIVE velocity pairs for a given Sentinel-2 scene. Args: - reference: STAC item of the Sentinel-2 reference scene to find pairs for - max_pair_separation: How many days back from a reference scene's acquisition date to search for secondary scenes + reference: STAC item of the Sentinel-2 reference scene to find secondary scenes for + max_pair_separation: How many days back from a reference scene's acquisition date to start searching for + secondary scenes + min_pair_separation: How many days back from a reference scene's acquisition date to stop searching for + secondary scenes max_cloud_cover: The maximum percent of the secondary scene that can be covered by clouds Returns: A DataFrame with all potential pairs for a sentinel-2 reference scene. Metadata in the columns will be for the *secondary* scene unless specified otherwise. """ - # Sentinel-2 tiles overlap by 10 km, so searching by bbox or geometry, will pull in results from multiple tiles. - # Since tiles are all 110 km in their UTM zone, they will be at least 1 deg x 1 deg in lat lon. - # Thus, drawing a 0.25 degree square around a tile centroid should limit the search to a single tile. - search_bbox = shape(reference.geometry).centroid.buffer(0.25, cap_style='square').bounds - results = SENTINEL2_CATALOG.search( collections=[reference.collection_id], - bbox=search_bbox, - datetime=[reference.datetime - max_pair_separation, reference.datetime - timedelta(seconds=1)], - limit=1000, - method='GET', + query=[ + f'grid:code={reference.properties["grid:code"]}', + f'eo:cloud_cover<={SENTINEL2_MAX_CLOUD_COVER_PERCENT}', + ], + datetime=[reference.datetime - max_pair_separation, reference.datetime - min_pair_separation], ) - items = [] - for page in results.pages(): - for item in page: - if item.properties['tileId'] != reference.properties['tileId']: - log.debug(f'{item.id} disqualifies because it is from a different tile than the reference scene') - continue - - if not qualifies_for_sentinel2_processing(item, max_cloud_cover=max_cloud_cover): - continue - - items.append(item) + items = [ + item for page in results.pages() for item in page if qualifies_for_sentinel2_processing(item, max_cloud_cover) + ] log.debug(f'Found {len(items)} secondary scenes for {reference.id}') if len(items) == 0: @@ -166,9 +159,9 @@ def get_sentinel2_pairs_for_reference_scene( features = [] for item in items: feature = item.to_dict() - feature['properties']['reference'] = reference.id.rstrip('.SAFE') + feature['properties']['reference'] = reference.properties['s2:product_uri'].rstrip('.SAFE') feature['properties']['reference_acquisition'] = reference.datetime - feature['properties']['secondary'] = item.id.rstrip('.SAFE') + feature['properties']['secondary'] = item.properties['s2:product_uri'].rstrip('.SAFE') features.append(feature) df = gpd.GeoDataFrame.from_features(features) From 736e193121a25cdc513b1ad92a4af7dc3a6265ad Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 28 May 2024 11:29:04 -0800 Subject: [PATCH 18/58] modify one line in add_data_coverage_to_item function --- its_live_monitoring/src/sentinel2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index aa056d97..bf7e1edd 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -38,7 +38,7 @@ def raise_for_missing_in_google_cloud(scene_name: str) -> None: # noqa: D103 def add_data_coverage_to_item(item: pystac.Item) -> pystac.Item: # noqa: D103 - tile_info_path = item.assets['tileinfo_metadata'].href.rstrip('s3://') + tile_info_path = item.assets['tileinfo_metadata'].href[5:] response = requests.get(f'https://roda.sentinel-hub.com/{tile_info_path}') response.raise_for_status() From e2c23d74da4a21405342e95394f2272beefbf4a6 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Tue, 28 May 2024 14:56:24 -0500 Subject: [PATCH 19/58] add data coverage at end of qualifies to support secondaries --- its_live_monitoring/src/sentinel2.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index aa056d97..f5b48c1a 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -59,7 +59,6 @@ def get_sentinel2_stac_item(scene: str) -> pystac.Item: # noqa: D103 ) item = items[0] - item = add_data_coverage_to_item(item) return item @@ -79,11 +78,13 @@ def qualifies_for_sentinel2_processing( Returns: A bool that is True if the scene qualifies for Sentinel-2 processing, else False. """ + if item.collection_id != SENTINEL2_COLLECTION_NAME: log.log(log_level, f'{item.id} disqualifies for processing because it is from the wrong collection') return False - if item.properties['s2:product_uri'].split('_')[3] == 'N0500': + product_uri_split = item.properties['s2:product_uri'].split('_') + if product_uri_split[3] == 'N0500': # Reprocessing activity: https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-2-msi/copernicus-sentinel-2-collection-1-availability-status # Naming convention: https://sentinels.copernicus.eu/web/sentinel/user-guides/sentinel-2-msi/naming-convention # Processing baselines: https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-2-msi/processing-baseline @@ -94,27 +95,33 @@ def qualifies_for_sentinel2_processing( ) return False - product_type = item.properties['s2:product_uri'].split('_')[3] + product_type = product_uri_split[1] if not product_type.endswith('L1C'): log.log(log_level, f'{item.id} disqualifies for processing because it is the wrong product type.') return False - if not product_type.startswith('MSI'): + if 'msi' not in item.properties['instruments']: log.log(log_level, f'{item.id} disqualifies for processing because it was not imaged with the right instrument') return False - if item.properties['tileId'] not in SENTINEL2_TILES_TO_PROCESS: + grid_square = item.properties['grid:code'][5:] + if grid_square not in SENTINEL2_TILES_TO_PROCESS: log.log(log_level, f'{item.id} disqualifies for processing because it is not from a tile containing land-ice') return False - if item.properties.get('cloudCover', -1) < 0: + if item.properties.get('eo:cloud_cover', -1) < 0: log.log(log_level, f'{item.id} disqualifies for processing because cloud coverage is unknown') return False - if item.properties['cloudCover'] > max_cloud_cover: + if item.properties['eo:cloud_cover'] > max_cloud_cover: log.log(log_level, f'{item.id} disqualifies for processing because it has too much cloud cover') return False + item = add_data_coverage_to_item(item) + if item.properties['s2:data_coverage'] <= SENTINEL2_MIN_DATA_COVERAGE: + log.log(log_level, f'{item.id} disqualifies for processing because it has too little data coverage.') + return False + log.log(log_level, f'{item.id} qualifies for processing') return True From 5b14acbc5c8656084d5888f639f5fb1f5b95c579 Mon Sep 17 00:00:00 2001 From: jiangzhu Date: Tue, 28 May 2024 15:15:36 -0800 Subject: [PATCH 20/58] modify code style --- its_live_monitoring/src/sentinel2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 8013f37c..56fe2509 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -78,7 +78,6 @@ def qualifies_for_sentinel2_processing( Returns: A bool that is True if the scene qualifies for Sentinel-2 processing, else False. """ - if item.collection_id != SENTINEL2_COLLECTION_NAME: log.log(log_level, f'{item.id} disqualifies for processing because it is from the wrong collection') return False From 3444447920741c0d6f41106d67f9db536c0bbb7f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 29 May 2024 13:41:07 -0500 Subject: [PATCH 21/58] fixed tests to work with new stac api --- tests/its_live_monitoring/test_sentinel2.py | 100 ++++++++++++-------- 1 file changed, 59 insertions(+), 41 deletions(-) diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index ce67a802..d81213b4 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -2,6 +2,7 @@ from datetime import datetime from unittest.mock import MagicMock, patch +import pystac import pytest import requests import responses @@ -26,38 +27,51 @@ def test_raise_for_missing_in_google_cloud(): def test_get_sentinel2_stac_item(pystac_item_factory): - scene = 'S2B_MSIL1C_20200315T152259_N0209_R039_T13CES_20200315T181115.SAFE' + scene = 'S2B_13CES_20200315_0_L1C' properties = { - 'tileId': '13CES', - 'cloudCover': 28.188400000000005, - 'productType': 'S2MSI1C', - 'instrumentShortName': 'MSI', + 'grid:code': 'MGRS-13CES', + 'eo:cloud_cover': 28.188400000000005, + 's2:product_type': 'S2MSI1C', + 's2:data_coverage': 75, + 'instruments': ['msi'], } - collection = 'SENTINEL-2' + collection = 'sentinel-2-l1c' date_time = '2020-03-15T15:22:59.024Z' expected_item = pystac_item_factory(id=scene, datetime=date_time, properties=properties, collection=collection) - with patch('sentinel2.SENTINEL2_COLLECTION', MagicMock()): - sentinel2.SENTINEL2_COLLECTION.get_item.return_value = expected_item + class MockItemSearch: + def __init__(self, item: pystac.item.Item): + self.items = [item] if item else [] + + def pages(self): + return [self.items] + + with patch('sentinel2.SENTINEL2_CATALOG', MagicMock()): + sentinel2.SENTINEL2_CATALOG.search.return_value = MockItemSearch(expected_item) item = sentinel2.get_sentinel2_stac_item(scene) assert item.collection_id == collection assert item.properties == properties + with patch('sentinel2.SENTINEL2_CATALOG', MagicMock()): + sentinel2.SENTINEL2_CATALOG.search.return_value = MockItemSearch(None) + with pytest.raises(ValueError): + item = sentinel2.get_sentinel2_stac_item(scene) + def test_qualifies_for_processing(pystac_item_factory): properties = { - 'tileId': '19DEE', - 'cloudCover': 30, - 'productType': 'S2MSI1C', - 'instrumentShortName': 'MSI', + 'grid:code': 'MGRS-19DEE', + 'eo:cloud_cover': 30, + 's2:product_uri': 'S2B_MSIL1C_20240528T000000_N0510_R110_T22TCR_20240528T000000', + 's2:product_type': 'S2MSI1C', + 's2:data_coverage': 75, + 'instruments': ['msi'], } - collection = 'SENTINEL-2' - + collection = 'sentinel-2-l1c' good_item = pystac_item_factory( id='XXX_XXXL1C_XXXX_XXXX_XXXX', datetime=datetime.now(), properties=properties, collection=collection ) - # s2_tile_path = f'tiles/19/D/EE/2024/4/30/1' assert sentinel2.qualifies_for_sentinel2_processing(good_item) @@ -66,56 +80,58 @@ def test_qualifies_for_processing(pystac_item_factory): assert not sentinel2.qualifies_for_sentinel2_processing(item) item = deepcopy(good_item) - item.properties['productType'] = 'S2MSI2A' + item.properties['s2:product_type'] = 'S2MSI2A' assert not sentinel2.qualifies_for_sentinel2_processing(item) item = deepcopy(good_item) - item.properties['instrumentShortName'] = 'MIS' + item.properties['instruments'] = ['mis'] assert not sentinel2.qualifies_for_sentinel2_processing(item) item = deepcopy(good_item) - item.properties['tileId'] = '30BZZ' + item.properties['grid:code'] = 'MGRS-30BZZ' assert not sentinel2.qualifies_for_sentinel2_processing(item) item = deepcopy(good_item) - del item.properties['cloudCover'] + del item.properties['eo:cloud_cover'] assert not sentinel2.qualifies_for_sentinel2_processing(item) item = deepcopy(good_item) - item.properties['cloudCover'] = -1 + item.properties['eo:cloud_cover'] = -1 assert not sentinel2.qualifies_for_sentinel2_processing(item) item = deepcopy(good_item) - item.properties['cloudCover'] = 0 + item.properties['eo:cloud_cover'] = 0 assert sentinel2.qualifies_for_sentinel2_processing(item) item = deepcopy(good_item) - item.properties['cloudCover'] = 1 + item.properties['eo:cloud_cover'] = 1 assert sentinel2.qualifies_for_sentinel2_processing(item) item = deepcopy(good_item) - item.properties['cloudCover'] = sentinel2.MAX_CLOUD_COVER_PERCENT - 1 + item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT - 1 assert sentinel2.qualifies_for_sentinel2_processing(item) item = deepcopy(good_item) - item.properties['cloudCover'] = sentinel2.MAX_CLOUD_COVER_PERCENT + item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT assert sentinel2.qualifies_for_sentinel2_processing(item) item = deepcopy(good_item) - item.properties['cloudCover'] = sentinel2.MAX_CLOUD_COVER_PERCENT + 1 + item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT + 1 assert not sentinel2.qualifies_for_sentinel2_processing(item) def test_get_sentinel2_pairs_for_reference_scene(pystac_item_factory): - scene = 'S2B_MSIL1C_20240430T143829_N0510_R139_T22TCR_20240430T162923.SAFE' + scene = 'S2B_22TCR_20240528_0_L1C' properties = { - 'cloudCover': 28.1884, - 'tileId': '13CES', - 'productType': 'S2MSI1C', - 'instrumentShortName': 'MSI', + 'eo:cloud_cover': 28.1884, + 'grid:code': 'MGRS-13CES', + 's2:product_uri': 'S2B_MSIL1C_20240528T000000_N0510_R110_T22TCR_20240528T000000.SAFE', + 's2:product_type': 'S2MSI1C', + 's2:data_coverage': 75, + 'instruments': ['msi'], } - collection = 'SENTINEL-2' - date_time = '2024-04-30T14:38:29.024Z' + collection = 'sentinel-2-l1c' + date_time = '2024-05-28T00:00:00.000Z' geometry = { 'type': 'Polygon', 'coordinates': [ @@ -154,16 +170,15 @@ def test_get_sentinel2_pairs_for_reference_scene(pystac_item_factory): ref_item = pystac_item_factory( id=scene, datetime=date_time, properties=properties, collection=collection, geometry=geometry ) - sec_scenes = [ - 'S2B_MSIL1C_20240130T000000_N0510_R139_T22TCR_20240430T000000', - 'S2A_MSIL1C_20230824T000000_N0510_R139_T22TCR_20230824T000000', - 'S2B_MSIL1C_20220101T000000_N0510_R139_T22TCR_20220101T000000', + 'S2B_22TCR_20240528_0_L1C', + 'S2B_22TCR_20230528_0_L1C', + 'S2B_22TCR_20210528_0_L1C', ] sec_date_times = [ - '2024-01-30T00:00:00.000Z', - '2023-08-24T00:00:00.000Z', - '2022-01-01T00:00:00.000Z', + '2024-05-28T00:00:00.000Z', + '2023-05-28T00:00:00.000Z', + '2021-05-28T00:00:00.000Z', ] sec_items = [] for scene, date_time in zip(sec_scenes, sec_date_times): @@ -174,6 +189,9 @@ def test_get_sentinel2_pairs_for_reference_scene(pystac_item_factory): sentinel2.SENTINEL2_CATALOG.search().pages.return_value = (sec_items,) df = sentinel2.get_sentinel2_pairs_for_reference_scene(ref_item) - assert (df['tileId'] == ref_item.properties['tileId']).all() - assert (df['instrumentShortName'] == ref_item.properties['instrumentShortName']).all() + print(df.to_string()) + + assert (df['grid:code'] == ref_item.properties['grid:code']).all() + for instrument in df['instruments']: + assert instrument == ref_item.properties['instruments'] assert (df['reference_acquisition'] == ref_item.datetime).all() From 0298adc6e42aa375c26938c86a0f01c6f3a4092a Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 29 May 2024 13:41:39 -0500 Subject: [PATCH 22/58] fixed to use LANDSAT_MAX... rather than MAX... --- tests/its_live_monitoring/test_landsat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/its_live_monitoring/test_landsat.py b/tests/its_live_monitoring/test_landsat.py index 48bff7fb..7575a3dc 100644 --- a/tests/its_live_monitoring/test_landsat.py +++ b/tests/its_live_monitoring/test_landsat.py @@ -83,15 +83,15 @@ def test_qualifies_for_processing(pystac_item_factory): assert landsat.qualifies_for_landsat_processing(item) item = deepcopy(good_item) - item.properties['landsat:cloud_cover_land'] = landsat.MAX_CLOUD_COVER_PERCENT - 1 + item.properties['landsat:cloud_cover_land'] = landsat.LANDSAT_MAX_CLOUD_COVER_PERCENT - 1 assert landsat.qualifies_for_landsat_processing(item) item = deepcopy(good_item) - item.properties['landsat:cloud_cover_land'] = landsat.MAX_CLOUD_COVER_PERCENT + item.properties['landsat:cloud_cover_land'] = landsat.LANDSAT_MAX_CLOUD_COVER_PERCENT assert landsat.qualifies_for_landsat_processing(item) item = deepcopy(good_item) - item.properties['landsat:cloud_cover_land'] = landsat.MAX_CLOUD_COVER_PERCENT + 1 + item.properties['landsat:cloud_cover_land'] = landsat.LANDSAT_MAX_CLOUD_COVER_PERCENT + 1 assert not landsat.qualifies_for_landsat_processing(item) item = deepcopy(good_item) From bc8a36cab162154f8dad6036c1fa6f5f816401e1 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 29 May 2024 13:42:01 -0500 Subject: [PATCH 23/58] proper parameters for new stac api and docstrings --- its_live_monitoring/src/sentinel2.py | 33 +++++++++++++++++++++------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 8013f37c..8ff07f36 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -29,6 +29,11 @@ def raise_for_missing_in_google_cloud(scene_name: str) -> None: # noqa: D103 + """Raises a 'requests.HTTPError' if the scene is not in Google Cloud yet. + + Args: + scene_name: The scene to check for in Google Cloud. + """ root_url = 'https://storage.googleapis.com/gcp-public-data-sentinel-2/tiles' tile = f'{scene_name[39:41]}/{scene_name[41:42]}/{scene_name[42:44]}' @@ -38,6 +43,15 @@ def raise_for_missing_in_google_cloud(scene_name: str) -> None: # noqa: D103 def add_data_coverage_to_item(item: pystac.Item) -> pystac.Item: # noqa: D103 + """Adds the data coverage percentange (amount of the tile covered by valid data) to the item + as a property - 's2:data_coverage'. Raises 'requests.HTTPError' if the tile info metadata does not exist. + + Args: + item: The desired stac item to add data coverage too. + + Returns: + item: The stac item with data coverage added. + """ tile_info_path = item.assets['tileinfo_metadata'].href[5:] response = requests.get(f'https://roda.sentinel-hub.com/{tile_info_path}') @@ -48,18 +62,22 @@ def add_data_coverage_to_item(item: pystac.Item) -> pystac.Item: # noqa: D103 def get_sentinel2_stac_item(scene: str) -> pystac.Item: # noqa: D103 - results = SENTINEL2_CATALOG.search(collections=[SENTINEL2_COLLECTION_NAME], query=[f's2:product_uri={scene}.SAFE']) + """Retrieves a STAC item from the Sentinel-2 L1C Collection, throws ValueError if none found. - items = [item for page in results.pages() for item in page] + Args: + scene: The element84 scene name for the desired stac item. + Returns: + item: The desired stac item. + """ + results = SENTINEL2_CATALOG.search(collections=[SENTINEL2_COLLECTION_NAME], query=[f's2:product_uri={scene}.SAFE']) + items = [item for page in results.pages() for item in page] if (n_items := len(items)) != 1: raise ValueError( f'{n_items} for {scene} found in Sentinel-2 STAC collection: ' f'{SENTINEL2_CATALOG_API}/collections/{SENTINEL2_COLLECTION_NAME}' ) - item = items[0] - return item @@ -78,7 +96,6 @@ def qualifies_for_sentinel2_processing( Returns: A bool that is True if the scene qualifies for Sentinel-2 processing, else False. """ - if item.collection_id != SENTINEL2_COLLECTION_NAME: log.log(log_level, f'{item.id} disqualifies for processing because it is from the wrong collection') return False @@ -95,8 +112,7 @@ def qualifies_for_sentinel2_processing( ) return False - product_type = product_uri_split[1] - if not product_type.endswith('L1C'): + if not item.properties['s2:product_type'].endswith('1C'): log.log(log_level, f'{item.id} disqualifies for processing because it is the wrong product type.') return False @@ -117,7 +133,8 @@ def qualifies_for_sentinel2_processing( log.log(log_level, f'{item.id} disqualifies for processing because it has too much cloud cover') return False - item = add_data_coverage_to_item(item) + if 's2:data_coverage' not in item.properties.keys(): + item = add_data_coverage_to_item(item) if item.properties['s2:data_coverage'] <= SENTINEL2_MIN_DATA_COVERAGE: log.log(log_level, f'{item.id} disqualifies for processing because it has too little data coverage.') return False From ca993f33189de34c8f2192a5503d9de1d83126c2 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 29 May 2024 13:47:13 -0500 Subject: [PATCH 24/58] ruff --- its_live_monitoring/src/sentinel2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 8ff07f36..3abc0605 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -43,8 +43,9 @@ def raise_for_missing_in_google_cloud(scene_name: str) -> None: # noqa: D103 def add_data_coverage_to_item(item: pystac.Item) -> pystac.Item: # noqa: D103 - """Adds the data coverage percentange (amount of the tile covered by valid data) to the item - as a property - 's2:data_coverage'. Raises 'requests.HTTPError' if the tile info metadata does not exist. + """Adds the amount of the tile covered by valid data as the property 's2:data_coverage'. + + Raises 'requests.HTTPError' if no tile info metadata can be found. Args: item: The desired stac item to add data coverage too. From 2e1d092c7d9d3e71c94beb09d1ebbb9858b1177c Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 29 May 2024 13:51:04 -0500 Subject: [PATCH 25/58] removed print statement --- its_live_monitoring/src/message.json | 28 +++++++++++++++++++++ tests/its_live_monitoring/test_sentinel2.py | 2 -- 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 its_live_monitoring/src/message.json diff --git a/its_live_monitoring/src/message.json b/its_live_monitoring/src/message.json new file mode 100644 index 00000000..eff0765d --- /dev/null +++ b/its_live_monitoring/src/message.json @@ -0,0 +1,28 @@ +{ + "name": "S2B_MSIL1C_20201123T163619_N0500_R083_T16TDM_20230303T190246", + "id": "64c72310-c669-4396-a9aa-577ebd2933cf", + "path": "products/2020/11/23/S2B_MSIL1C_20201123T163619_N0500_R083_T16TDM_20230303T190246", + "timestamp": "2020-11-23T16: 36: 19.024Z", + "datatakeIdentifier": "GS2B_20201123T163619_019414_N05.00", + "sciHubIngestion": "2023-05-08T14: 12: 51.599Z", + "s3Ingestion": "2024-05-24T18: 13: 05.626Z", + "tiles": [ + { + "path": "tiles/16/T/DM/2020/11/23/1", + "timestamp": "2020-11-23T16: 42: 01.120Z", + "utmZone": 16, + "latitudeBand": "T", + "gridSquare": "DM", + "datastrip": { + "id": "S2B_OPER_MSI_L1C_DS_S2RP_20230303T190246_S20201123T163959_N05.00", + "path": "products/2020/11/23/S2B_MSIL1C_20201123T163619_N0500_R083_T16TDM_20230303T190246/datastrip/0" + } + } + ], + "datastrips": [ + { + "id": "S2B_OPER_MSI_L1C_DS_S2RP_20230303T190246_S20201123T163959_N05.00", + "path": "products/2020/11/23/S2B_MSIL1C_20201123T163619_N0500_R083_T16TDM_20230303T190246/datastrip/0" + } + ] +} \ No newline at end of file diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index d81213b4..7c49390b 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -189,8 +189,6 @@ def test_get_sentinel2_pairs_for_reference_scene(pystac_item_factory): sentinel2.SENTINEL2_CATALOG.search().pages.return_value = (sec_items,) df = sentinel2.get_sentinel2_pairs_for_reference_scene(ref_item) - print(df.to_string()) - assert (df['grid:code'] == ref_item.properties['grid:code']).all() for instrument in df['instruments']: assert instrument == ref_item.properties['instruments'] From 8bfef2fa8e13aacac264829868a2e391194129e9 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 29 May 2024 14:23:08 -0500 Subject: [PATCH 26/58] adjest product uris to be correct --- tests/its_live_monitoring/test_sentinel2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 7c49390b..6b2764c8 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -33,6 +33,7 @@ def test_get_sentinel2_stac_item(pystac_item_factory): 'eo:cloud_cover': 28.188400000000005, 's2:product_type': 'S2MSI1C', 's2:data_coverage': 75, + 's2:product_uri': 'S2B_MSIL1C_20200315T152259_N0209_R039_T13CES_20200315T181115.SAFE', 'instruments': ['msi'], } collection = 'sentinel-2-l1c' @@ -63,7 +64,7 @@ def test_qualifies_for_processing(pystac_item_factory): properties = { 'grid:code': 'MGRS-19DEE', 'eo:cloud_cover': 30, - 's2:product_uri': 'S2B_MSIL1C_20240528T000000_N0510_R110_T22TCR_20240528T000000', + 's2:product_uri': 'S2B_MSIL1C_20240528T000000_N0510_R110_T22TCR_20240528T000000.SAFE', 's2:product_type': 'S2MSI1C', 's2:data_coverage': 75, 'instruments': ['msi'], From 348e62037c28e59bfafc2acf0eb0e2d012af83f7 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Wed, 29 May 2024 14:30:25 -0500 Subject: [PATCH 27/58] delete message example --- its_live_monitoring/src/message.json | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 its_live_monitoring/src/message.json diff --git a/its_live_monitoring/src/message.json b/its_live_monitoring/src/message.json deleted file mode 100644 index eff0765d..00000000 --- a/its_live_monitoring/src/message.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "name": "S2B_MSIL1C_20201123T163619_N0500_R083_T16TDM_20230303T190246", - "id": "64c72310-c669-4396-a9aa-577ebd2933cf", - "path": "products/2020/11/23/S2B_MSIL1C_20201123T163619_N0500_R083_T16TDM_20230303T190246", - "timestamp": "2020-11-23T16: 36: 19.024Z", - "datatakeIdentifier": "GS2B_20201123T163619_019414_N05.00", - "sciHubIngestion": "2023-05-08T14: 12: 51.599Z", - "s3Ingestion": "2024-05-24T18: 13: 05.626Z", - "tiles": [ - { - "path": "tiles/16/T/DM/2020/11/23/1", - "timestamp": "2020-11-23T16: 42: 01.120Z", - "utmZone": 16, - "latitudeBand": "T", - "gridSquare": "DM", - "datastrip": { - "id": "S2B_OPER_MSI_L1C_DS_S2RP_20230303T190246_S20201123T163959_N05.00", - "path": "products/2020/11/23/S2B_MSIL1C_20201123T163619_N0500_R083_T16TDM_20230303T190246/datastrip/0" - } - } - ], - "datastrips": [ - { - "id": "S2B_OPER_MSI_L1C_DS_S2RP_20230303T190246_S20201123T163959_N05.00", - "path": "products/2020/11/23/S2B_MSIL1C_20201123T163619_N0500_R083_T16TDM_20230303T190246/datastrip/0" - } - ] -} \ No newline at end of file From 5a7a990b3afa73afc3bbed696e4700d1bf70823f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 31 May 2024 13:25:15 -0500 Subject: [PATCH 28/58] added tests for get_data_coverage_for_item --- tests/its_live_monitoring/test_sentinel2.py | 109 +++++++++++++------- 1 file changed, 71 insertions(+), 38 deletions(-) diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 6b2764c8..145384f2 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -32,7 +32,6 @@ def test_get_sentinel2_stac_item(pystac_item_factory): 'grid:code': 'MGRS-13CES', 'eo:cloud_cover': 28.188400000000005, 's2:product_type': 'S2MSI1C', - 's2:data_coverage': 75, 's2:product_uri': 'S2B_MSIL1C_20200315T152259_N0209_R039_T13CES_20200315T181115.SAFE', 'instruments': ['msi'], } @@ -66,7 +65,6 @@ def test_qualifies_for_processing(pystac_item_factory): 'eo:cloud_cover': 30, 's2:product_uri': 'S2B_MSIL1C_20240528T000000_N0510_R110_T22TCR_20240528T000000.SAFE', 's2:product_type': 'S2MSI1C', - 's2:data_coverage': 75, 'instruments': ['msi'], } collection = 'sentinel-2-l1c' @@ -74,51 +72,55 @@ def test_qualifies_for_processing(pystac_item_factory): id='XXX_XXXL1C_XXXX_XXXX_XXXX', datetime=datetime.now(), properties=properties, collection=collection ) - assert sentinel2.qualifies_for_sentinel2_processing(good_item) + with patch('sentinel2.get_data_coverage_for_item', (lambda x: 75.0)): + assert sentinel2.qualifies_for_sentinel2_processing(good_item) - item = deepcopy(good_item) - item.collection_id = 'foo' - assert not sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + item.collection_id = 'foo' + assert not sentinel2.qualifies_for_sentinel2_processing(item) - item = deepcopy(good_item) - item.properties['s2:product_type'] = 'S2MSI2A' - assert not sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + item.properties['s2:product_type'] = 'S2MSI2A' + assert not sentinel2.qualifies_for_sentinel2_processing(item) - item = deepcopy(good_item) - item.properties['instruments'] = ['mis'] - assert not sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + item.properties['instruments'] = ['mis'] + assert not sentinel2.qualifies_for_sentinel2_processing(item) - item = deepcopy(good_item) - item.properties['grid:code'] = 'MGRS-30BZZ' - assert not sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + item.properties['grid:code'] = 'MGRS-30BZZ' + assert not sentinel2.qualifies_for_sentinel2_processing(item) - item = deepcopy(good_item) - del item.properties['eo:cloud_cover'] - assert not sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + del item.properties['eo:cloud_cover'] + assert not sentinel2.qualifies_for_sentinel2_processing(item) - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = -1 - assert not sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = -1 + assert not sentinel2.qualifies_for_sentinel2_processing(item) - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = 0 - assert sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = 0 + assert sentinel2.qualifies_for_sentinel2_processing(item) - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = 1 - assert sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = 1 + assert sentinel2.qualifies_for_sentinel2_processing(item) - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT - 1 - assert sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT - 1 + assert sentinel2.qualifies_for_sentinel2_processing(item) - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT - assert sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT + assert sentinel2.qualifies_for_sentinel2_processing(item) - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT + 1 - assert not sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT + 1 + assert not sentinel2.qualifies_for_sentinel2_processing(item) + + with patch('sentinel2.get_data_coverage_for_item', (lambda x: 50.0)): + assert not sentinel2.qualifies_for_sentinel2_processing(good_item) def test_get_sentinel2_pairs_for_reference_scene(pystac_item_factory): @@ -128,7 +130,6 @@ def test_get_sentinel2_pairs_for_reference_scene(pystac_item_factory): 'grid:code': 'MGRS-13CES', 's2:product_uri': 'S2B_MSIL1C_20240528T000000_N0510_R110_T22TCR_20240528T000000.SAFE', 's2:product_type': 'S2MSI1C', - 's2:data_coverage': 75, 'instruments': ['msi'], } collection = 'sentinel-2-l1c' @@ -188,9 +189,41 @@ def test_get_sentinel2_pairs_for_reference_scene(pystac_item_factory): with patch('sentinel2.SENTINEL2_CATALOG', MagicMock()): sentinel2.SENTINEL2_CATALOG.search().pages.return_value = (sec_items,) - df = sentinel2.get_sentinel2_pairs_for_reference_scene(ref_item) + with patch('sentinel2.get_data_coverage_for_item', (lambda x: 75.0)): + df = sentinel2.get_sentinel2_pairs_for_reference_scene(ref_item) assert (df['grid:code'] == ref_item.properties['grid:code']).all() for instrument in df['instruments']: assert instrument == ref_item.properties['instruments'] assert (df['reference_acquisition'] == ref_item.datetime).all() + + +@responses.activate +def test_get_data_coverage_for_item(pystac_item_factory): + tile_path = 'sentinel-s2-l1c/tiles/13/C/ES/2024/5/28/0/tileInfo.json' + assets = { + 'tileinfo_metadata': pystac.Asset(href=f's3://{tile_path}') + } + item = pystac_item_factory( + id='scene_name', + datetime='2024-05-28T00:00:00.000Z', + properties={}, + collection='collection', + assets=assets + ) + url = f'https://roda.sentinel-hub.com/{tile_path}' + with responses.RequestsMock() as rsps: + rsps.add( + responses.GET, + url, + json={'dataCoveragePercentage': 99.0}, + status=200 + ) + assert sentinel2.get_data_coverage_for_item(item) == 99.0 + rsps.add( + responses.GET, + url, + status=404 + ) + with pytest.raises(requests.HTTPError): + sentinel2.get_data_coverage_for_item(item) \ No newline at end of file From 9971ea823d1ab780892af0e9fef6a819ca2b5b9e Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 31 May 2024 13:25:38 -0500 Subject: [PATCH 29/58] changed add_data_coverage_to_item to get_data_coverage_for_item --- its_live_monitoring/src/sentinel2.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 3abc0605..e6ec5f61 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -28,7 +28,7 @@ log.setLevel(os.environ.get('LOGGING_LEVEL', 'INFO')) -def raise_for_missing_in_google_cloud(scene_name: str) -> None: # noqa: D103 +def raise_for_missing_in_google_cloud(scene_name: str) -> None: """Raises a 'requests.HTTPError' if the scene is not in Google Cloud yet. Args: @@ -42,8 +42,8 @@ def raise_for_missing_in_google_cloud(scene_name: str) -> None: # noqa: D103 response.raise_for_status() -def add_data_coverage_to_item(item: pystac.Item) -> pystac.Item: # noqa: D103 - """Adds the amount of the tile covered by valid data as the property 's2:data_coverage'. +def get_data_coverage_for_item(item: pystac.Item) -> pystac.Item: + """Gets the percentage of the tile covered by valid data. Raises 'requests.HTTPError' if no tile info metadata can be found. @@ -51,18 +51,18 @@ def add_data_coverage_to_item(item: pystac.Item) -> pystac.Item: # noqa: D103 item: The desired stac item to add data coverage too. Returns: - item: The stac item with data coverage added. + data_coverage: The data coverage percentage as a float. """ tile_info_path = item.assets['tileinfo_metadata'].href[5:] response = requests.get(f'https://roda.sentinel-hub.com/{tile_info_path}') response.raise_for_status() + data_coverage = response.json()['dataCoveragePercentage'] - item.properties['s2:data_coverage'] = response.json()['dataCoveragePercentage'] - return item + return data_coverage -def get_sentinel2_stac_item(scene: str) -> pystac.Item: # noqa: D103 +def get_sentinel2_stac_item(scene: str) -> pystac.Item: """Retrieves a STAC item from the Sentinel-2 L1C Collection, throws ValueError if none found. Args: @@ -134,9 +134,7 @@ def qualifies_for_sentinel2_processing( log.log(log_level, f'{item.id} disqualifies for processing because it has too much cloud cover') return False - if 's2:data_coverage' not in item.properties.keys(): - item = add_data_coverage_to_item(item) - if item.properties['s2:data_coverage'] <= SENTINEL2_MIN_DATA_COVERAGE: + if get_data_coverage_for_item(item) <= SENTINEL2_MIN_DATA_COVERAGE: log.log(log_level, f'{item.id} disqualifies for processing because it has too little data coverage.') return False From 6a76d8b973cb01cbc0edc9cd5e7bb917758fcccd Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 31 May 2024 13:25:49 -0500 Subject: [PATCH 30/58] added assets to pystac_item_factory --- tests/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 67d4300c..2822fe45 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,7 @@ def create_pystac_item( collection: str, geometry: dict | None = None, bbox: list | None = None, + assets: dict = None ) -> pystac.item.Item: if isinstance(datetime, str): datetime = date_parser(datetime) @@ -27,6 +28,7 @@ def create_pystac_item( datetime=datetime, properties=properties, collection=collection, + assets=assets ) return expected_item From 4b99077ada21739de3ba191338b317ffd7b1ffdb Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 31 May 2024 13:26:24 -0500 Subject: [PATCH 31/58] ruff --- tests/conftest.py | 4 ++-- tests/its_live_monitoring/test_sentinel2.py | 25 +++++---------------- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2822fe45..a40d2ba2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,7 +16,7 @@ def create_pystac_item( collection: str, geometry: dict | None = None, bbox: list | None = None, - assets: dict = None + assets: dict = None, ) -> pystac.item.Item: if isinstance(datetime, str): datetime = date_parser(datetime) @@ -28,7 +28,7 @@ def create_pystac_item( datetime=datetime, properties=properties, collection=collection, - assets=assets + assets=assets, ) return expected_item diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 145384f2..c98e7d2d 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -201,29 +201,14 @@ def test_get_sentinel2_pairs_for_reference_scene(pystac_item_factory): @responses.activate def test_get_data_coverage_for_item(pystac_item_factory): tile_path = 'sentinel-s2-l1c/tiles/13/C/ES/2024/5/28/0/tileInfo.json' - assets = { - 'tileinfo_metadata': pystac.Asset(href=f's3://{tile_path}') - } + assets = {'tileinfo_metadata': pystac.Asset(href=f's3://{tile_path}')} item = pystac_item_factory( - id='scene_name', - datetime='2024-05-28T00:00:00.000Z', - properties={}, - collection='collection', - assets=assets + id='scene_name', datetime='2024-05-28T00:00:00.000Z', properties={}, collection='collection', assets=assets ) url = f'https://roda.sentinel-hub.com/{tile_path}' with responses.RequestsMock() as rsps: - rsps.add( - responses.GET, - url, - json={'dataCoveragePercentage': 99.0}, - status=200 - ) + rsps.add(responses.GET, url, json={'dataCoveragePercentage': 99.0}, status=200) assert sentinel2.get_data_coverage_for_item(item) == 99.0 - rsps.add( - responses.GET, - url, - status=404 - ) + rsps.add(responses.GET, url, status=404) with pytest.raises(requests.HTTPError): - sentinel2.get_data_coverage_for_item(item) \ No newline at end of file + sentinel2.get_data_coverage_for_item(item) From 0dd1e70b3ee8c1fd1c0a6b476323d1771ebd097f Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 31 May 2024 13:28:31 -0500 Subject: [PATCH 32/58] updated changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55b60959..eed46b51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.5.2] ### Added -- Sentinel-2 will now be disqualified from processing if they do not have enough data coverage. +- Sentinel-2 products will now be disqualified from processing if they do not have enough data coverage. +### Changed +- Switched from Dataspace's Sentinel-2 STAC API to Element84's ## [0.5.1] From c848455e800d1a3afda567f3cc7866825268ccc0 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 31 May 2024 13:28:52 -0500 Subject: [PATCH 33/58] removed fixme --- its_live_monitoring/src/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index 58e66210..a80bdf68 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -99,7 +99,6 @@ def process_scene( """ pairs = None if scene.startswith('S2'): - # Fixme: will throw if wrong collection! reference = get_sentinel2_stac_item(scene) if qualifies_for_sentinel2_processing(reference, logging.INFO): # hyp3-its-live will pull scenes from Google Cloud; ensure the new scene is there before processing From be8912871ec1afbd4162f24e25348d5d1422daab Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 31 May 2024 14:02:10 -0500 Subject: [PATCH 34/58] ensure that get_data_coverage_for_item returns float --- its_live_monitoring/src/sentinel2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index e6ec5f61..66bcc4bb 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -42,7 +42,7 @@ def raise_for_missing_in_google_cloud(scene_name: str) -> None: response.raise_for_status() -def get_data_coverage_for_item(item: pystac.Item) -> pystac.Item: +def get_data_coverage_for_item(item: pystac.Item) -> float: """Gets the percentage of the tile covered by valid data. Raises 'requests.HTTPError' if no tile info metadata can be found. @@ -57,7 +57,7 @@ def get_data_coverage_for_item(item: pystac.Item) -> pystac.Item: response = requests.get(f'https://roda.sentinel-hub.com/{tile_info_path}') response.raise_for_status() - data_coverage = response.json()['dataCoveragePercentage'] + data_coverage = float(response.json()['dataCoveragePercentage']) return data_coverage From 557e304532ae62fb4e55ffa299ba54cefa9f76e5 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Fri, 31 May 2024 14:18:08 -0500 Subject: [PATCH 35/58] remove cast to float --- its_live_monitoring/src/sentinel2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 66bcc4bb..c4756b8a 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -57,7 +57,7 @@ def get_data_coverage_for_item(item: pystac.Item) -> float: response = requests.get(f'https://roda.sentinel-hub.com/{tile_info_path}') response.raise_for_status() - data_coverage = float(response.json()['dataCoveragePercentage']) + data_coverage = response.json()['dataCoveragePercentage'] return data_coverage From be2739450e076c587d2e976db18109f1e14865a0 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 12 Jun 2024 14:01:56 -0800 Subject: [PATCH 36/58] Fix `qualifies_for_sentinel2_processing` kwarg --- its_live_monitoring/src/main.py | 2 +- its_live_monitoring/src/sentinel2.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index a80bdf68..c2502b3f 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -100,7 +100,7 @@ def process_scene( pairs = None if scene.startswith('S2'): reference = get_sentinel2_stac_item(scene) - if qualifies_for_sentinel2_processing(reference, logging.INFO): + if qualifies_for_sentinel2_processing(reference, log_level=logging.INFO): # hyp3-its-live will pull scenes from Google Cloud; ensure the new scene is there before processing # Note: Time between attempts is controlled by they SQS VisibilityTimout _ = raise_for_missing_in_google_cloud(scene) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index c4756b8a..2ac40bba 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -172,7 +172,8 @@ def get_sentinel2_pairs_for_reference_scene( ) items = [ - item for page in results.pages() for item in page if qualifies_for_sentinel2_processing(item, max_cloud_cover) + item for page in results.pages() for item in page + if qualifies_for_sentinel2_processing(item, max_cloud_cover=max_cloud_cover) ] log.debug(f'Found {len(items)} secondary scenes for {reference.id}') From 852fea74bc5396d1b7c12bcad0f07960bc20c032 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 12 Jun 2024 14:12:52 -0800 Subject: [PATCH 37/58] apply ruff --- its_live_monitoring/src/sentinel2.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 2ac40bba..9f83124c 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -172,7 +172,9 @@ def get_sentinel2_pairs_for_reference_scene( ) items = [ - item for page in results.pages() for item in page + item + for page in results.pages() + for item in page if qualifies_for_sentinel2_processing(item, max_cloud_cover=max_cloud_cover) ] From 5a43d8656d7a7f5562272e4e4fd446895084bff5 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 12 Jun 2024 14:25:54 -0800 Subject: [PATCH 38/58] skip data coverage if http error --- its_live_monitoring/src/sentinel2.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 9f83124c..ac0e606b 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -56,7 +56,12 @@ def get_data_coverage_for_item(item: pystac.Item) -> float: tile_info_path = item.assets['tileinfo_metadata'].href[5:] response = requests.get(f'https://roda.sentinel-hub.com/{tile_info_path}') - response.raise_for_status() + try: + response.raise_for_status() + except requests.HTTPError as e: + # TODO: what to do in this case? + print(e) + return 0 data_coverage = response.json()['dataCoveragePercentage'] return data_coverage From 17a3438d073f079bd69f047f08c366f8676de516 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 12 Jun 2024 14:50:08 -0800 Subject: [PATCH 39/58] check if item relative orbit matches reference relative orbit --- its_live_monitoring/src/sentinel2.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index ac0e606b..de2355b8 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -89,6 +89,7 @@ def get_sentinel2_stac_item(scene: str) -> pystac.Item: def qualifies_for_sentinel2_processing( item: pystac.Item, + reference: pystac.Item = None, max_cloud_cover: int = SENTINEL2_MAX_CLOUD_COVER_PERCENT, log_level: int = logging.DEBUG, ) -> bool: @@ -96,12 +97,24 @@ def qualifies_for_sentinel2_processing( Args: item: STAC item of the desired Sentinel-2 scene. + reference: STAC item of the Sentinel-2 reference scene. max_cloud_cover: The maximum allowable percentage of cloud cover. log_level: The logging level Returns: A bool that is True if the scene qualifies for Sentinel-2 processing, else False. """ + if reference is not None: + # TODO: is relative orbit always at slice [33:37]? + reference_relative_orbit = reference.properties['s2:product_uri'][33:37] + item_relative_orbit = item.properties['s2:product_uri'][33:37] + if item_relative_orbit != reference_relative_orbit: + log.log( + log_level, + f'{item.id} disqualifies for processing because its relative orbit ({item_relative_orbit}) ' + f'does not match that of the reference scene ({reference_relative_orbit})' + ) + if item.collection_id != SENTINEL2_COLLECTION_NAME: log.log(log_level, f'{item.id} disqualifies for processing because it is from the wrong collection') return False @@ -180,7 +193,7 @@ def get_sentinel2_pairs_for_reference_scene( item for page in results.pages() for item in page - if qualifies_for_sentinel2_processing(item, max_cloud_cover=max_cloud_cover) + if qualifies_for_sentinel2_processing(item, reference=reference, max_cloud_cover=max_cloud_cover) ] log.debug(f'Found {len(items)} secondary scenes for {reference.id}') From 4feee23a4d4180577c067cf4df984523a18b112a Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 12 Jun 2024 14:55:55 -0800 Subject: [PATCH 40/58] add missing return --- its_live_monitoring/src/sentinel2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index de2355b8..dd806db8 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -114,6 +114,7 @@ def qualifies_for_sentinel2_processing( f'{item.id} disqualifies for processing because its relative orbit ({item_relative_orbit}) ' f'does not match that of the reference scene ({reference_relative_orbit})' ) + return False if item.collection_id != SENTINEL2_COLLECTION_NAME: log.log(log_level, f'{item.id} disqualifies for processing because it is from the wrong collection') From 3f4f9a9a351a9283418f16a365d7942eb4043352 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Wed, 12 Jun 2024 15:03:24 -0800 Subject: [PATCH 41/58] ruff --- its_live_monitoring/src/sentinel2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index dd806db8..1e3415c2 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -112,7 +112,7 @@ def qualifies_for_sentinel2_processing( log.log( log_level, f'{item.id} disqualifies for processing because its relative orbit ({item_relative_orbit}) ' - f'does not match that of the reference scene ({reference_relative_orbit})' + f'does not match that of the reference scene ({reference_relative_orbit})', ) return False From 4c1a44789b412220a413af75509d3baf7078f71e Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 13 Jun 2024 12:02:21 -0500 Subject: [PATCH 42/58] persistent session and rel orb split --- its_live_monitoring/src/sentinel2.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 1e3415c2..9503cd3f 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -24,6 +24,8 @@ SENTINEL2_MAX_CLOUD_COVER_PERCENT = 70 SENTINEL2_MIN_DATA_COVERAGE = 70 +SESSION = requests.Session() + log = logging.getLogger('its_live_monitoring') log.setLevel(os.environ.get('LOGGING_LEVEL', 'INFO')) @@ -42,11 +44,9 @@ def raise_for_missing_in_google_cloud(scene_name: str) -> None: response.raise_for_status() -def get_data_coverage_for_item(item: pystac.Item) -> float: +def get_data_coverage_for_item(item: pystac.Item, log_level: int = logging.DEBUG) -> float: """Gets the percentage of the tile covered by valid data. - Raises 'requests.HTTPError' if no tile info metadata can be found. - Args: item: The desired stac item to add data coverage too. @@ -55,12 +55,11 @@ def get_data_coverage_for_item(item: pystac.Item) -> float: """ tile_info_path = item.assets['tileinfo_metadata'].href[5:] - response = requests.get(f'https://roda.sentinel-hub.com/{tile_info_path}') + response = SESSION.get(f'https://roda.sentinel-hub.com/{tile_info_path}') try: response.raise_for_status() except requests.HTTPError as e: - # TODO: what to do in this case? - print(e) + log.log(log_level, f'Data coverage could not be found for {item.id} due to {e}') return 0 data_coverage = response.json()['dataCoveragePercentage'] @@ -105,14 +104,13 @@ def qualifies_for_sentinel2_processing( A bool that is True if the scene qualifies for Sentinel-2 processing, else False. """ if reference is not None: - # TODO: is relative orbit always at slice [33:37]? - reference_relative_orbit = reference.properties['s2:product_uri'][33:37] - item_relative_orbit = item.properties['s2:product_uri'][33:37] + reference_relative_orbit = reference.properties['s2:product_uri'].split('_')[4] + item_relative_orbit = item.properties['s2:product_uri'].split('_')[4] if item_relative_orbit != reference_relative_orbit: log.log( log_level, f'{item.id} disqualifies for processing because its relative orbit ({item_relative_orbit}) ' - f'does not match that of the reference scene ({reference_relative_orbit})', + f'does not match that of the reference scene ({reference_relative_orbit}).', ) return False From 8560891edfc5d099b3c3579432ae2d9f2490999b Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 13 Jun 2024 09:31:59 -0800 Subject: [PATCH 43/58] enforce kwargs for landsat and sentinel --- its_live_monitoring/src/landsat.py | 8 ++++++-- its_live_monitoring/src/main.py | 2 +- its_live_monitoring/src/sentinel2.py | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/its_live_monitoring/src/landsat.py b/its_live_monitoring/src/landsat.py index 7ce804ff..07a5e37f 100644 --- a/its_live_monitoring/src/landsat.py +++ b/its_live_monitoring/src/landsat.py @@ -36,7 +36,7 @@ def get_landsat_stac_item(scene: str) -> pystac.Item: # noqa: D103 def qualifies_for_landsat_processing( - item: pystac.item.Item, max_cloud_cover: int = LANDSAT_MAX_CLOUD_COVER_PERCENT, log_level: int = logging.DEBUG + item: pystac.item.Item, *, max_cloud_cover: int = LANDSAT_MAX_CLOUD_COVER_PERCENT, log_level: int = logging.DEBUG ) -> bool: """Determines whether a scene is a valid Landsat product for processing. @@ -78,6 +78,7 @@ def qualifies_for_landsat_processing( def get_landsat_pairs_for_reference_scene( reference: pystac.item.Item, + *, max_pair_separation: timedelta = timedelta(days=LANDSAT_MAX_PAIR_SEPARATION_IN_DAYS), max_cloud_cover: int = LANDSAT_MAX_CLOUD_COVER_PERCENT, ) -> gpd.GeoDataFrame: @@ -103,7 +104,10 @@ def get_landsat_pairs_for_reference_scene( ) items = [ - item for page in results.pages() for item in page if qualifies_for_landsat_processing(item, max_cloud_cover) + item + for page in results.pages() + for item in page + if qualifies_for_landsat_processing(item, max_cloud_cover=max_cloud_cover) ] log.debug(f'Found {len(items)} secondary scenes for {reference.id}') diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index c2502b3f..e04f3412 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -108,7 +108,7 @@ def process_scene( else: reference = get_landsat_stac_item(scene) - if qualifies_for_landsat_processing(reference, logging.INFO): + if qualifies_for_landsat_processing(reference, log_level=logging.INFO): pairs = get_landsat_pairs_for_reference_scene(reference) if pairs is None: diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 9f83124c..7c31e433 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -84,6 +84,7 @@ def get_sentinel2_stac_item(scene: str) -> pystac.Item: def qualifies_for_sentinel2_processing( item: pystac.Item, + *, max_cloud_cover: int = SENTINEL2_MAX_CLOUD_COVER_PERCENT, log_level: int = logging.DEBUG, ) -> bool: @@ -144,6 +145,7 @@ def qualifies_for_sentinel2_processing( def get_sentinel2_pairs_for_reference_scene( reference: pystac.Item, + *, max_pair_separation: timedelta = timedelta(days=SENTINEL2_MAX_PAIR_SEPARATION_IN_DAYS), min_pair_separation: timedelta = timedelta(days=SENTINEL2_MIN_PAIR_SEPARATION_IN_DAYS), max_cloud_cover: int = SENTINEL2_MAX_CLOUD_COVER_PERCENT, From 4f8bc63f0ef21c5e130771e387baa1359dcc1ca7 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 13 Jun 2024 09:33:30 -0800 Subject: [PATCH 44/58] use function parameter not global constant --- its_live_monitoring/src/sentinel2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 7c31e433..95cb1332 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -168,7 +168,7 @@ def get_sentinel2_pairs_for_reference_scene( collections=[reference.collection_id], query=[ f'grid:code={reference.properties["grid:code"]}', - f'eo:cloud_cover<={SENTINEL2_MAX_CLOUD_COVER_PERCENT}', + f'eo:cloud_cover<={max_cloud_cover}', ], datetime=[reference.datetime - max_pair_separation, reference.datetime - min_pair_separation], ) From 4415aa5f244b9db6668243d267457ba50dcd13b8 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 13 Jun 2024 12:40:21 -0500 Subject: [PATCH 45/58] remove suffix rather than rstrip --- its_live_monitoring/src/sentinel2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 9503cd3f..78f7a4e9 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -202,9 +202,9 @@ def get_sentinel2_pairs_for_reference_scene( features = [] for item in items: feature = item.to_dict() - feature['properties']['reference'] = reference.properties['s2:product_uri'].rstrip('.SAFE') + feature['properties']['reference'] = reference.properties['s2:product_uri'].removesuffix('.SAFE') feature['properties']['reference_acquisition'] = reference.datetime - feature['properties']['secondary'] = item.properties['s2:product_uri'].rstrip('.SAFE') + feature['properties']['secondary'] = item.properties['s2:product_uri'].removesuffix('.SAFE') features.append(feature) df = gpd.GeoDataFrame.from_features(features) From 8921e183b00a1c437c256671871866cd3bff1df9 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 13 Jun 2024 12:55:06 -0500 Subject: [PATCH 46/58] updated tests for rel orbit qualification --- tests/its_live_monitoring/test_sentinel2.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index c98e7d2d..659bd710 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -119,6 +119,13 @@ def test_qualifies_for_processing(pystac_item_factory): item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT + 1 assert not sentinel2.qualifies_for_sentinel2_processing(item) + item = deepcopy(good_item) + reference = deepcopy(good_item) + assert sentinel2.qualifies_for_sentinel2_processing(item, reference=reference) + + item.properties['s2:product_uri'] = 'S2B_MSIL1C_20240528T000000_N0510_R100_T22TCR_20240528T000000.SAFE' + assert not sentinel2.qualifies_for_sentinel2_processing(item, reference=reference) + with patch('sentinel2.get_data_coverage_for_item', (lambda x: 50.0)): assert not sentinel2.qualifies_for_sentinel2_processing(good_item) @@ -210,5 +217,4 @@ def test_get_data_coverage_for_item(pystac_item_factory): rsps.add(responses.GET, url, json={'dataCoveragePercentage': 99.0}, status=200) assert sentinel2.get_data_coverage_for_item(item) == 99.0 rsps.add(responses.GET, url, status=404) - with pytest.raises(requests.HTTPError): - sentinel2.get_data_coverage_for_item(item) + assert sentinel2.get_data_coverage_for_item(item) == 0 From 8d9a85791fc9d4930538178a1f464c76c79e788a Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 13 Jun 2024 12:55:22 -0500 Subject: [PATCH 47/58] added comment --- its_live_monitoring/src/sentinel2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 78f7a4e9..4949b116 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -59,6 +59,7 @@ def get_data_coverage_for_item(item: pystac.Item, log_level: int = logging.DEBUG try: response.raise_for_status() except requests.HTTPError as e: + # Exiting when a secondary doesn't have tileinfo would be bad, so we return 0 to disqualify it. log.log(log_level, f'Data coverage could not be found for {item.id} due to {e}') return 0 data_coverage = response.json()['dataCoveragePercentage'] From 84f13811112ebb8abaf89c5e80835298056bfd22 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 13 Jun 2024 12:59:57 -0500 Subject: [PATCH 48/58] added arg to docstring for ruff --- its_live_monitoring/src/sentinel2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 4949b116..91ccddb8 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -49,6 +49,7 @@ def get_data_coverage_for_item(item: pystac.Item, log_level: int = logging.DEBUG Args: item: The desired stac item to add data coverage too. + log_level: The logging level. Returns: data_coverage: The data coverage percentage as a float. From 91f8b215555fdf466c6e477fd686fa750c9c5662 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 13 Jun 2024 10:10:32 -0800 Subject: [PATCH 49/58] changelog --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eed46b51..5651b57e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.5.2] -### Added -- Sentinel-2 products will now be disqualified from processing if they do not have enough data coverage. ### Changed -- Switched from Dataspace's Sentinel-2 STAC API to Element84's +- Sentinel-2 products are now disqualified from processing if they do not have enough data coverage. +- Sentinel-2 products are now disqualified from processing if the secondary scene's relative orbit does not match that of the reference scene. +- Switched from Dataspace's Sentinel-2 STAC API to Element84's. ## [0.5.1] From 8b218c71a3c014ce0e64aa2384a409bc4c7116e5 Mon Sep 17 00:00:00 2001 From: Jake Herrmann Date: Thu, 13 Jun 2024 10:22:26 -0800 Subject: [PATCH 50/58] Update its_live_monitoring/src/sentinel2.py Co-authored-by: Andrew Player --- its_live_monitoring/src/sentinel2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index bb05a30c..ce719ee7 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -99,7 +99,7 @@ def qualifies_for_sentinel2_processing( Args: item: STAC item of the desired Sentinel-2 scene. - reference: STAC item of the Sentinel-2 reference scene. + reference: STAC item of the Sentinel-2 reference scene for optional relative orbit comparison. max_cloud_cover: The maximum allowable percentage of cloud cover. log_level: The logging level From 9c25ad59e4c497835c515e3788e2dbf1e17a9532 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 13 Jun 2024 13:45:55 -0500 Subject: [PATCH 51/58] replace s3 with roda to fix 403 issue --- its_live_monitoring/src/sentinel2.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index ce719ee7..7d5fee20 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -44,7 +44,7 @@ def raise_for_missing_in_google_cloud(scene_name: str) -> None: response.raise_for_status() -def get_data_coverage_for_item(item: pystac.Item, log_level: int = logging.DEBUG) -> float: +def get_data_coverage_for_item(item: pystac.Item, log_level: int = logging.INFO) -> float: """Gets the percentage of the tile covered by valid data. Args: @@ -54,15 +54,10 @@ def get_data_coverage_for_item(item: pystac.Item, log_level: int = logging.DEBUG Returns: data_coverage: The data coverage percentage as a float. """ - tile_info_path = item.assets['tileinfo_metadata'].href[5:] - - response = SESSION.get(f'https://roda.sentinel-hub.com/{tile_info_path}') - try: - response.raise_for_status() - except requests.HTTPError as e: - # Exiting when a secondary doesn't have tileinfo would be bad, so we return 0 to disqualify it. - log.log(log_level, f'Data coverage could not be found for {item.id} due to {e}') - return 0 + tile_info_path = item.assets['tileinfo_metadata'].href.replace('s3://', 'https://roda.sentinel-hub.com/') + + response = SESSION.get(tile_info_path) + response.raise_for_status() data_coverage = response.json()['dataCoveragePercentage'] return data_coverage From 44c17508b269ea6c1fecfc040abb4acfee90d8f7 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 13 Jun 2024 13:48:50 -0500 Subject: [PATCH 52/58] add raises to data_coverage test --- tests/its_live_monitoring/test_sentinel2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 659bd710..17cbb177 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -217,4 +217,5 @@ def test_get_data_coverage_for_item(pystac_item_factory): rsps.add(responses.GET, url, json={'dataCoveragePercentage': 99.0}, status=200) assert sentinel2.get_data_coverage_for_item(item) == 99.0 rsps.add(responses.GET, url, status=404) - assert sentinel2.get_data_coverage_for_item(item) == 0 + with pytest.raises(requests.HTTPError): + sentinel2.get_data_coverage_for_item(item) From be5204acc0cc9007abfef950f278ada51680d31c Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 13 Jun 2024 14:01:36 -0500 Subject: [PATCH 53/58] add roda vs s3 data coverage check test --- tests/its_live_monitoring/test_sentinel2.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 17cbb177..628e4aca 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -209,13 +209,17 @@ def test_get_sentinel2_pairs_for_reference_scene(pystac_item_factory): def test_get_data_coverage_for_item(pystac_item_factory): tile_path = 'sentinel-s2-l1c/tiles/13/C/ES/2024/5/28/0/tileInfo.json' assets = {'tileinfo_metadata': pystac.Asset(href=f's3://{tile_path}')} - item = pystac_item_factory( + item_s3 = pystac_item_factory( id='scene_name', datetime='2024-05-28T00:00:00.000Z', properties={}, collection='collection', assets=assets ) + item_roda = deepcopy(item_s3) + item_roda.assets = {'tileinfo_metadata': pystac.Asset(href=f'https://roda.sentinel-hub.com/{tile_path}')} url = f'https://roda.sentinel-hub.com/{tile_path}' with responses.RequestsMock() as rsps: rsps.add(responses.GET, url, json={'dataCoveragePercentage': 99.0}, status=200) - assert sentinel2.get_data_coverage_for_item(item) == 99.0 + assert sentinel2.get_data_coverage_for_item(item_s3) == 99.0 + assert sentinel2.get_data_coverage_for_item(item_roda) == 99.0 rsps.add(responses.GET, url, status=404) with pytest.raises(requests.HTTPError): - sentinel2.get_data_coverage_for_item(item) + sentinel2.get_data_coverage_for_item(item_s3) + sentinel2.get_data_coverage_for_item(item_roda) \ No newline at end of file From 79842316f80c37c32e954c50e13c54d0f5e29fb5 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 13 Jun 2024 14:03:36 -0500 Subject: [PATCH 54/58] separate pytests.raises --- tests/its_live_monitoring/test_sentinel2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 628e4aca..dc47ea94 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -222,4 +222,5 @@ def test_get_data_coverage_for_item(pystac_item_factory): rsps.add(responses.GET, url, status=404) with pytest.raises(requests.HTTPError): sentinel2.get_data_coverage_for_item(item_s3) + with pytest.raises(requests.HTTPError): sentinel2.get_data_coverage_for_item(item_roda) \ No newline at end of file From 203cf9b827a8336ca8ee730a6c2a8d2d5a5eaacf Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 13 Jun 2024 14:03:57 -0500 Subject: [PATCH 55/58] EOF newline --- tests/its_live_monitoring/test_sentinel2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index dc47ea94..40632d77 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -223,4 +223,4 @@ def test_get_data_coverage_for_item(pystac_item_factory): with pytest.raises(requests.HTTPError): sentinel2.get_data_coverage_for_item(item_s3) with pytest.raises(requests.HTTPError): - sentinel2.get_data_coverage_for_item(item_roda) \ No newline at end of file + sentinel2.get_data_coverage_for_item(item_roda) From b5bc260cb128073a9e012b8be35e65daef415f08 Mon Sep 17 00:00:00 2001 From: Andrew Player Date: Thu, 13 Jun 2024 14:06:47 -0500 Subject: [PATCH 56/58] remove unused logging arg --- its_live_monitoring/src/sentinel2.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 7d5fee20..044e5886 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -44,12 +44,11 @@ def raise_for_missing_in_google_cloud(scene_name: str) -> None: response.raise_for_status() -def get_data_coverage_for_item(item: pystac.Item, log_level: int = logging.INFO) -> float: +def get_data_coverage_for_item(item: pystac.Item) -> float: """Gets the percentage of the tile covered by valid data. Args: item: The desired stac item to add data coverage too. - log_level: The logging level. Returns: data_coverage: The data coverage percentage as a float. From 13ef2d5356b1e82205354a79b4921fd35038ac83 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 13 Jun 2024 16:54:56 -0800 Subject: [PATCH 57/58] rename integration files for S2 changes and log ESA ID instead of E84 ID --- its_live_monitoring/src/sentinel2.py | 34 ++++++++++++------- ...> sentinel2-not-enough-data-coverage.json} | 0 ...nel2-valid-2.json => sentinel2-valid.json} | 0 3 files changed, 21 insertions(+), 13 deletions(-) rename tests/integration/{sentinel2-valid-1.json => sentinel2-not-enough-data-coverage.json} (100%) rename tests/integration/{sentinel2-valid-2.json => sentinel2-valid.json} (100%) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index 044e5886..a6fa9ef9 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -100,23 +100,25 @@ def qualifies_for_sentinel2_processing( Returns: A bool that is True if the scene qualifies for Sentinel-2 processing, else False. """ + item_scene_id = item.properties['s2:product_uri'].removesuffix('.SAFE') + if reference is not None: reference_relative_orbit = reference.properties['s2:product_uri'].split('_')[4] - item_relative_orbit = item.properties['s2:product_uri'].split('_')[4] + item_relative_orbit = item_scene_id.split('_')[4] if item_relative_orbit != reference_relative_orbit: log.log( log_level, - f'{item.id} disqualifies for processing because its relative orbit ({item_relative_orbit}) ' + f'{item_scene_id} disqualifies for processing because its relative orbit ({item_relative_orbit}) ' f'does not match that of the reference scene ({reference_relative_orbit}).', ) return False if item.collection_id != SENTINEL2_COLLECTION_NAME: - log.log(log_level, f'{item.id} disqualifies for processing because it is from the wrong collection') + log.log(log_level, f'{item_scene_id} disqualifies for processing because it is from the wrong collection') return False - product_uri_split = item.properties['s2:product_uri'].split('_') - if product_uri_split[3] == 'N0500': + processing_baseline = item_scene_id.split('_')[3] + if processing_baseline == 'N0500': # Reprocessing activity: https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-2-msi/copernicus-sentinel-2-collection-1-availability-status # Naming convention: https://sentinels.copernicus.eu/web/sentinel/user-guides/sentinel-2-msi/naming-convention # Processing baselines: https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-2-msi/processing-baseline @@ -128,31 +130,37 @@ def qualifies_for_sentinel2_processing( return False if not item.properties['s2:product_type'].endswith('1C'): - log.log(log_level, f'{item.id} disqualifies for processing because it is the wrong product type.') + log.log(log_level, f'{item_scene_id} disqualifies for processing because it is the wrong product type.') return False if 'msi' not in item.properties['instruments']: - log.log(log_level, f'{item.id} disqualifies for processing because it was not imaged with the right instrument') + log.log( + log_level, + f'{item_scene_id} disqualifies for processing because it was not imaged with the right instrument', + ) return False grid_square = item.properties['grid:code'][5:] if grid_square not in SENTINEL2_TILES_TO_PROCESS: - log.log(log_level, f'{item.id} disqualifies for processing because it is not from a tile containing land-ice') + log.log( + log_level, + f'{item_scene_id} disqualifies for processing because it is not from a tile containing land-ice', + ) return False if item.properties.get('eo:cloud_cover', -1) < 0: - log.log(log_level, f'{item.id} disqualifies for processing because cloud coverage is unknown') + log.log(log_level, f'{item_scene_id} disqualifies for processing because cloud coverage is unknown') return False if item.properties['eo:cloud_cover'] > max_cloud_cover: - log.log(log_level, f'{item.id} disqualifies for processing because it has too much cloud cover') + log.log(log_level, f'{item_scene_id} disqualifies for processing because it has too much cloud cover') return False if get_data_coverage_for_item(item) <= SENTINEL2_MIN_DATA_COVERAGE: - log.log(log_level, f'{item.id} disqualifies for processing because it has too little data coverage.') + log.log(log_level, f'{item_scene_id} disqualifies for processing because it has too little data coverage.') return False - log.log(log_level, f'{item.id} qualifies for processing') + log.log(log_level, f'{item_scene_id} qualifies for processing') return True @@ -193,7 +201,7 @@ def get_sentinel2_pairs_for_reference_scene( if qualifies_for_sentinel2_processing(item, reference=reference, max_cloud_cover=max_cloud_cover) ] - log.debug(f'Found {len(items)} secondary scenes for {reference.id}') + log.debug(f'Found {len(items)} secondary scenes for {reference.properties['s2:product_uri']}') if len(items) == 0: return gpd.GeoDataFrame({'reference': [], 'secondary': []}) diff --git a/tests/integration/sentinel2-valid-1.json b/tests/integration/sentinel2-not-enough-data-coverage.json similarity index 100% rename from tests/integration/sentinel2-valid-1.json rename to tests/integration/sentinel2-not-enough-data-coverage.json diff --git a/tests/integration/sentinel2-valid-2.json b/tests/integration/sentinel2-valid.json similarity index 100% rename from tests/integration/sentinel2-valid-2.json rename to tests/integration/sentinel2-valid.json From f7da5cadea77c4af01e8f1965689a0aa742503a8 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 13 Jun 2024 20:21:40 -0800 Subject: [PATCH 58/58] make s2 qualification function take relative orbit instead of entire reference item --- its_live_monitoring/src/sentinel2.py | 19 ++++++++++--------- tests/its_live_monitoring/test_sentinel2.py | 6 ++---- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/its_live_monitoring/src/sentinel2.py b/its_live_monitoring/src/sentinel2.py index a6fa9ef9..0404fba4 100644 --- a/its_live_monitoring/src/sentinel2.py +++ b/its_live_monitoring/src/sentinel2.py @@ -85,7 +85,7 @@ def get_sentinel2_stac_item(scene: str) -> pystac.Item: def qualifies_for_sentinel2_processing( item: pystac.Item, *, - reference: pystac.Item = None, + relative_orbit: str = None, max_cloud_cover: int = SENTINEL2_MAX_CLOUD_COVER_PERCENT, log_level: int = logging.DEBUG, ) -> bool: @@ -93,7 +93,7 @@ def qualifies_for_sentinel2_processing( Args: item: STAC item of the desired Sentinel-2 scene. - reference: STAC item of the Sentinel-2 reference scene for optional relative orbit comparison. + relative_orbit: scene must be from this relative orbit if provided. max_cloud_cover: The maximum allowable percentage of cloud cover. log_level: The logging level @@ -102,14 +102,13 @@ def qualifies_for_sentinel2_processing( """ item_scene_id = item.properties['s2:product_uri'].removesuffix('.SAFE') - if reference is not None: - reference_relative_orbit = reference.properties['s2:product_uri'].split('_')[4] + if relative_orbit is not None: item_relative_orbit = item_scene_id.split('_')[4] - if item_relative_orbit != reference_relative_orbit: + if item_relative_orbit != relative_orbit: log.log( log_level, f'{item_scene_id} disqualifies for processing because its relative orbit ({item_relative_orbit}) ' - f'does not match that of the reference scene ({reference_relative_orbit}).', + f'does not match the required relative orbit ({relative_orbit}).', ) return False @@ -194,21 +193,23 @@ def get_sentinel2_pairs_for_reference_scene( datetime=[reference.datetime - max_pair_separation, reference.datetime - min_pair_separation], ) + reference_scene_id = reference.properties['s2:product_uri'].removesuffix('.SAFE') + reference_orbit = reference_scene_id.split('_')[4] items = [ item for page in results.pages() for item in page - if qualifies_for_sentinel2_processing(item, reference=reference, max_cloud_cover=max_cloud_cover) + if qualifies_for_sentinel2_processing(item, relative_orbit=reference_orbit, max_cloud_cover=max_cloud_cover) ] - log.debug(f'Found {len(items)} secondary scenes for {reference.properties['s2:product_uri']}') + log.debug(f'Found {len(items)} secondary scenes for {reference_scene_id}') if len(items) == 0: return gpd.GeoDataFrame({'reference': [], 'secondary': []}) features = [] for item in items: feature = item.to_dict() - feature['properties']['reference'] = reference.properties['s2:product_uri'].removesuffix('.SAFE') + feature['properties']['reference'] = reference_scene_id feature['properties']['reference_acquisition'] = reference.datetime feature['properties']['secondary'] = item.properties['s2:product_uri'].removesuffix('.SAFE') features.append(feature) diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 40632d77..9ba503b5 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -120,11 +120,9 @@ def test_qualifies_for_processing(pystac_item_factory): assert not sentinel2.qualifies_for_sentinel2_processing(item) item = deepcopy(good_item) - reference = deepcopy(good_item) - assert sentinel2.qualifies_for_sentinel2_processing(item, reference=reference) + assert sentinel2.qualifies_for_sentinel2_processing(item, relative_orbit='R110') - item.properties['s2:product_uri'] = 'S2B_MSIL1C_20240528T000000_N0510_R100_T22TCR_20240528T000000.SAFE' - assert not sentinel2.qualifies_for_sentinel2_processing(item, reference=reference) + assert not sentinel2.qualifies_for_sentinel2_processing(item, relative_orbit='R100') with patch('sentinel2.get_data_coverage_for_item', (lambda x: 50.0)): assert not sentinel2.qualifies_for_sentinel2_processing(good_item)