From d46a65c01ad475e2df1fade816b969ebf8a3b4a9 Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 13 Nov 2023 12:40:15 +0000 Subject: [PATCH 01/23] cicd: fixed docker image name --- .github/workflows/docker-build-stage.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker-build-stage.yaml b/.github/workflows/docker-build-stage.yaml index 2c2d404..6021009 100644 --- a/.github/workflows/docker-build-stage.yaml +++ b/.github/workflows/docker-build-stage.yaml @@ -9,8 +9,7 @@ on: jobs: docker-build-push-dev: - # runs-on: ubuntu-latest - runs-on: spatialdays-self-hosted-runner-1 + runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v3 @@ -32,7 +31,7 @@ jobs: with: context: . file: Dockerfile - cache-from: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accesability-scanner:cache - cache-to: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accesability-scanner:cache,mode=max + cache-from: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessability-scanner:cache + cache-to: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessability-scanner:cache,mode=max push: true - tags: ${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accesability-scanner:${{ env.SHORT_SHA }} \ No newline at end of file + tags: ${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessability-scanner:${{ env.SHORT_SHA }} \ No newline at end of file From 62e7bcdaba11044e11d7c798092e49242656dd5e Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 13 Nov 2023 13:01:41 +0000 Subject: [PATCH 02/23] feat: added log --- database.py | 10 ++++++---- docker-compose.yaml | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/database.py b/database.py index 6e85124..a7c2eca 100644 --- a/database.py +++ b/database.py @@ -78,7 +78,7 @@ def store_collection_in_database( .first() ) if collection_db_entry is None: - logging.debug(f"Adding {_catalog_url} {_collection_id} to the database") + logger.debug(f"Adding {_catalog_url} {_collection_id} to the database") collection_db_entry = Collection() collection_db_entry.catalog_url = _catalog_url collection_db_entry.collection_id = _collection_id @@ -91,19 +91,21 @@ def store_collection_in_database( collection_db_entry.mpc_token_obtaining_url = _mpc_token_obtaining_url session.add(collection_db_entry) session.commit() - logging.info(f"Added {_catalog_url} {_collection_id} to the database") + logger.info(f"Added {_catalog_url} {_collection_id} to the database") else: - logging.debug(f"Updating {_catalog_url} {_collection_id} in the database") + logger.debug(f"Updating {_catalog_url} {_collection_id} in the database") collection_db_entry.http_downloadable = _http_downloadable collection_db_entry.requires_token = _requires_token collection_db_entry.is_from_mpc = _is_from_mpc collection_db_entry.mpc_token_obtaining_url = _mpc_token_obtaining_url session.commit() - logging.info(f"Updated {_catalog_url} {_collection_id} in the database") + logger.info(f"Updated {_catalog_url} {_collection_id} in the database") if __name__ == '__main__': plugin_enable_statement = sa.text("CREATE EXTENSION IF NOT EXISTS postgis;") with engine.connect() as conn: conn.execute(plugin_enable_statement) + logger.info("Enabled postgis extension") + print("Enabled postgis extension") base.metadata.create_all(engine) diff --git a/docker-compose.yaml b/docker-compose.yaml index 69ac3db..f990397 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -47,7 +47,7 @@ services: APP_HOST: "0.0.0.0" APP_PORT: "5000" APP_DEBUG: "True" - command: ["python", "database.py"] + command: ["/bin/sh", "-c", "sleep 5 && python database.py"] db: image: postgis/postgis:16-master From 4c7cd49b759c30328b79fc53829d5587e85dc644 Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 13 Nov 2023 14:09:25 +0000 Subject: [PATCH 03/23] cicd: fixed command --- database.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/database.py b/database.py index a7c2eca..06fffa9 100644 --- a/database.py +++ b/database.py @@ -104,8 +104,11 @@ def store_collection_in_database( if __name__ == '__main__': plugin_enable_statement = sa.text("CREATE EXTENSION IF NOT EXISTS postgis;") - with engine.connect() as conn: + with engine.begin() as conn: conn.execute(plugin_enable_statement) logger.info("Enabled postgis extension") print("Enabled postgis extension") + # commit the changes + + base.metadata.create_all(engine) From 5d2cc72abff78a152819d778049c29228bdabf26 Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 13 Nov 2023 15:04:26 +0000 Subject: [PATCH 04/23] fix: converted polygon to multipolygon before storing in the database --- docker-compose.yaml | 28 ++++++++++++++-------------- scrape.py | 7 ++++++- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index f990397..7c39333 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -19,20 +19,20 @@ services: # volumes: # - .:/app - # scrape: - # build: . - # depends_on: - # - db - # environment: - # DATABASE_HOST: "db" - # DATABASE_PORT: "5432" - # DATABASE_NAME: "stacaccessibility_db" - # DATABASE_USER: "postgres" - # DATABASE_PASSWORD: "postgres" - # APP_HOST: "0.0.0.0" - # APP_PORT: "5000" - # APP_DEBUG: "True" - # command: ["python", "scrape.py"] + scrape: + build: . + depends_on: + - db + environment: + DATABASE_HOST: "db" + DATABASE_PORT: "5432" + DATABASE_NAME: "stacaccessibility_db" + DATABASE_USER: "postgres" + DATABASE_PASSWORD: "postgres" + APP_HOST: "0.0.0.0" + APP_PORT: "5000" + APP_DEBUG: "True" + command: ["python", "scrape.py"] dbinit: build: . diff --git a/scrape.py b/scrape.py index e4b56a7..4cfff20 100644 --- a/scrape.py +++ b/scrape.py @@ -199,8 +199,13 @@ def check_if_sas_token_is_present_for_collection_on_mpc(_collection_id: str) -> token_present, token_url = check_if_sas_token_is_present_for_collection_on_mpc( results_collection_id) if token_present: - mpc_token_obtaining_url = token_url + mpc_token_obtaining_url = token_url + + # convert shapely_multipolygon_envelope to MultiPolygon + shapely_multipolygon_envelope = shapely.geometry.multipolygon.MultiPolygon( + [shapely_multipolygon_envelope] + ) store_collection_in_database( results_catalog_url, results_collection_id, From e29452757c1b4729da24c764d410592755953ca3 Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 13 Nov 2023 15:41:41 +0000 Subject: [PATCH 05/23] fix: converting poygon to multi polygon only if neccesarry --- scrape.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scrape.py b/scrape.py index 4cfff20..dd223d4 100644 --- a/scrape.py +++ b/scrape.py @@ -202,10 +202,12 @@ def check_if_sas_token_is_present_for_collection_on_mpc(_collection_id: str) -> mpc_token_obtaining_url = token_url - # convert shapely_multipolygon_envelope to MultiPolygon - shapely_multipolygon_envelope = shapely.geometry.multipolygon.MultiPolygon( - [shapely_multipolygon_envelope] - ) + # convert shapely_multipolygon_envelope to MultiPolygon if it is not multipolygon + if not isinstance(shapely_multipolygon_envelope, shapely.geometry.multipolygon.MultiPolygon): + shapely_multipolygon_envelope = shapely.geometry.multipolygon.MultiPolygon( + [shapely_multipolygon_envelope] + ) + store_collection_in_database( results_catalog_url, results_collection_id, From 3cf6ec6409583c4ea6f631ef5b2124f9ee4f6975 Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 13 Nov 2023 16:24:18 +0000 Subject: [PATCH 06/23] cicd: fix typo --- .github/workflows/docker-build-stage.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-build-stage.yaml b/.github/workflows/docker-build-stage.yaml index 6021009..d4cf9f5 100644 --- a/.github/workflows/docker-build-stage.yaml +++ b/.github/workflows/docker-build-stage.yaml @@ -31,7 +31,7 @@ jobs: with: context: . file: Dockerfile - cache-from: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessability-scanner:cache - cache-to: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessability-scanner:cache,mode=max + cache-from: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessibility-scanner:cache + cache-to: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessibility-scanner:cache,mode=max push: true - tags: ${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessability-scanner:${{ env.SHORT_SHA }} \ No newline at end of file + tags: ${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessibility-scanner:${{ env.SHORT_SHA }} \ No newline at end of file From aa9dd9a33eeb5a1a8c4307dceea3675cddab4ae4 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 15 Nov 2023 11:58:24 +0000 Subject: [PATCH 07/23] feat: experimental collection returner --- server.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/server.py b/server.py index a1f330e..919043c 100644 --- a/server.py +++ b/server.py @@ -10,6 +10,10 @@ from database import session, Collection from urllib.parse import urljoin +from shapely.geometry import shape +from flask import request, jsonify +from urllib.parse import urljoin + load_dotenv() APP_HOST = os.getenv("APP_HOST", "0.0.0.0") APP_PORT = os.getenv("APP_PORT", "5000") @@ -28,32 +32,27 @@ def healthz(): # in geojson format and filter the database for available collections @app.route("/get_collections", methods=["POST"]) def get_collections(): - aoi = flask.request.json.get("aoi", None) + data = request.json + aoi = data.get("aoi") if not aoi: - # send 400 bad request with message that aoi is required - return {"error": "aoi is required"}, 400 - - catalog_url = flask.request.json.get("catalog_url", None) - collection_id = flask.request.json.get("collection_id", None) - aoi_shapely = shapely.geometry.shape(aoi) + return {"error": "aoi is required"}, 400 + + aoi_shapely = shape(aoi) collections = session.query(Collection).filter( ga.functions.ST_Intersects( Collection.spatial_extent, ga.shape.from_shape(aoi_shapely, srid=4326) - ), + ) ) - if catalog_url: - collections = collections.filter(Collection.catalog_url == catalog_url) - - if collection_id: - collections = collections.filter(Collection.collection_id == collection_id) - + # Apply filters directly from request data + for key, value in data.items(): + if key in Collection.__table__.columns and value is not None: + collections = collections.filter(getattr(Collection, key) == value) collections = collections.all() results = {} for i in collections: - aoi_as_shapely = shapely.geometry.shape(aoi) - aoi_as_geojson = json.loads(shapely.to_geojson(aoi_as_shapely)) + aoi_as_geojson = json.loads(shapely.geometry.mapping(aoi_shapely)) results[i.collection_id] = { "catalog_url": i.catalog_url, "http_downloadable": i.http_downloadable, @@ -63,7 +62,7 @@ def get_collections(): "collection_stac_url": urljoin(i.catalog_url, f"collections/{i.collection_id}"), "aoi": aoi_as_geojson, } - return flask.jsonify(results), 200 + return jsonify(results), 200 if __name__ == "__main__": From 777b9cf0e8002db833f4ba89db18933e97253f97 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 15 Nov 2023 12:08:50 +0000 Subject: [PATCH 08/23] fix: declaring CORS --- server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.py b/server.py index a44932f..bf4ec23 100644 --- a/server.py +++ b/server.py @@ -17,7 +17,7 @@ APP_DEBUG = os.getenv("APP_DEBUG", "True") == "True" app = flask.Flask(__name__) -app = CORS(app) +CORS(app) # Create /healthz endpoint From d0ec9ff57192d53140d60fdbdf8a4f487cd2dfb4 Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 20 Nov 2023 10:52:21 +0000 Subject: [PATCH 09/23] cicd: added gunicorn hosting --- Dockerfile | 2 +- requirements.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3210b9d..3cd0292 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,4 +17,4 @@ COPY . . EXPOSE 5000 # Run the application -CMD ["python", "server.py"] +CMD ["gunicorn", "server:app"] diff --git a/requirements.txt b/requirements.txt index 5dbc3f9..8ded6cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,11 +8,12 @@ click-plugins==1.1.1 cligj==0.7.2 fiona==1.9.5 Flask==3.0.0 -Flask-Cors==3.0.10 +Flask-Cors==4.0.0 Flask-SQLAlchemy==3.1.1 GeoAlchemy2==0.14.2 geopandas==0.14.0 greenlet==3.0.1 +gunicorn==21.2.0 idna==3.4 itsdangerous==2.1.2 Jinja2==3.1.2 From bbac5e7c0606b6e73e044cce454666ce979c7f8b Mon Sep 17 00:00:00 2001 From: James Date: Mon, 20 Nov 2023 11:26:55 +0000 Subject: [PATCH 10/23] feat: add endpoint with slash --- server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index 919043c..ebeb8eb 100644 --- a/server.py +++ b/server.py @@ -31,11 +31,12 @@ def healthz(): # Make a POST endpoint which will take catalog_url and aoi # in geojson format and filter the database for available collections @app.route("/get_collections", methods=["POST"]) +@app.route("/get_collections/", methods=["POST"]) def get_collections(): data = request.json aoi = data.get("aoi") if not aoi: - return {"error": "aoi is required"}, 400 + return {"error": "aoi is required"}, 400 aoi_shapely = shape(aoi) collections = session.query(Collection).filter( From 9aeb5d9595f35731d59616f09bb86375adf28b6c Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 20 Nov 2023 11:27:58 +0000 Subject: [PATCH 11/23] cicd: updated dockerfile cmd --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3cd0292..3e8effa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,4 +17,4 @@ COPY . . EXPOSE 5000 # Run the application -CMD ["gunicorn", "server:app"] +CMD ["gunicorn", "server:app", "-b", "0.0.0.0:8000", "-w", "4"] From f5cce0c5178152d6b811f9036a470862dee0cb0a Mon Sep 17 00:00:00 2001 From: James Date: Mon, 20 Nov 2023 11:29:55 +0000 Subject: [PATCH 12/23] revert: change geojson responsse --- server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index ebeb8eb..19a7efe 100644 --- a/server.py +++ b/server.py @@ -53,7 +53,8 @@ def get_collections(): results = {} for i in collections: - aoi_as_geojson = json.loads(shapely.geometry.mapping(aoi_shapely)) + aoi_as_shapely = shapely.geometry.shape(aoi) + aoi_as_geojson = json.loads(shapely.to_geojson(aoi_as_shapely)) results[i.collection_id] = { "catalog_url": i.catalog_url, "http_downloadable": i.http_downloadable, From d1f3044b2d209bb45f5f36d05bf4f149d6feca95 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 20 Nov 2023 11:36:56 +0000 Subject: [PATCH 13/23] feat: remove aoi from response --- server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/server.py b/server.py index 19a7efe..0d08254 100644 --- a/server.py +++ b/server.py @@ -62,7 +62,6 @@ def get_collections(): "is_from_mpc": i.is_from_mpc, "mpc_token_obtaining_url": i.mpc_token_obtaining_url, "collection_stac_url": urljoin(i.catalog_url, f"collections/{i.collection_id}"), - "aoi": aoi_as_geojson, } return jsonify(results), 200 From c4ba00fd404c137a4363f5afb24600c01847acb7 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 20 Nov 2023 11:37:32 +0000 Subject: [PATCH 14/23] feat: remove aoi parsing in response loop --- server.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/server.py b/server.py index 0d08254..9e23632 100644 --- a/server.py +++ b/server.py @@ -53,8 +53,6 @@ def get_collections(): results = {} for i in collections: - aoi_as_shapely = shapely.geometry.shape(aoi) - aoi_as_geojson = json.loads(shapely.to_geojson(aoi_as_shapely)) results[i.collection_id] = { "catalog_url": i.catalog_url, "http_downloadable": i.http_downloadable, From 8417cf69665ae5cc1d65ceb377685f5dfe7aba84 Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 20 Nov 2023 12:01:38 +0000 Subject: [PATCH 15/23] cicd: updated docker compose --- docker-compose.yaml | 56 ++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 7c39333..b0e3fc6 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,26 +1,10 @@ version: "3" services: - # web: - # build: . - # ports: - # - "5005:5005" - # depends_on: - # - db - # environment: - # DATABASE_HOST: "db" - # DATABASE_PORT: "5432" - # DATABASE_NAME: "stacaccessibility_db" - # DATABASE_USER: "postgres" - # DATABASE_PASSWORD: "postgres" - # APP_HOST: "0.0.0.0" - # APP_PORT: "5005" - # APP_DEBUG: "True" - # volumes: - # - .:/app - - scrape: + web: build: . + ports: + - "8000:8000" depends_on: - db environment: @@ -30,9 +14,27 @@ services: DATABASE_USER: "postgres" DATABASE_PASSWORD: "postgres" APP_HOST: "0.0.0.0" - APP_PORT: "5000" + APP_PORT: "8000" APP_DEBUG: "True" - command: ["python", "scrape.py"] + volumes: + - .:/app + command: ["python", "server.py"] + + + # scrape: + # build: . + # depends_on: + # - db + # environment: + # DATABASE_HOST: "db" + # DATABASE_PORT: "5432" + # DATABASE_NAME: "stacaccessibility_db" + # DATABASE_USER: "postgres" + # DATABASE_PASSWORD: "postgres" + # APP_HOST: "0.0.0.0" + # APP_PORT: "8000" + # APP_DEBUG: "True" + # command: ["python", "scrape.py"] dbinit: build: . @@ -45,9 +47,9 @@ services: DATABASE_USER: "postgres" DATABASE_PASSWORD: "postgres" APP_HOST: "0.0.0.0" - APP_PORT: "5000" + APP_PORT: "8000" APP_DEBUG: "True" - command: ["/bin/sh", "-c", "sleep 5 && python database.py"] + command: ["python", "database.py"] db: image: postgis/postgis:16-master @@ -58,12 +60,4 @@ services: ports: - "15432:5432" - # db: - # image: postgres:13 - # environment: - # POSTGRES_USER: "postgres" - # POSTGRES_PASSWORD: "postgres" - # POSTGRES_DB: "stacaccessibility_db" - # ports: - # - "15432:5432" From c68afdbdf0a833451ecc91dffe7c993d1b9bbed1 Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 20 Nov 2023 12:02:16 +0000 Subject: [PATCH 16/23] cicd: added aoi key back into the payload --- server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index dd1842b..d96b9e1 100644 --- a/server.py +++ b/server.py @@ -55,6 +55,8 @@ def get_collections(): results = {} for i in collections: + aoi_as_shapely = ga.shape.to_shape(i.spatial_extent) + aoi_as_geojson = json.loads(shapely.to_geojson(aoi_as_shapely)) results[i.collection_id] = { "catalog_url": i.catalog_url, "http_downloadable": i.http_downloadable, @@ -62,8 +64,9 @@ def get_collections(): "is_from_mpc": i.is_from_mpc, "mpc_token_obtaining_url": i.mpc_token_obtaining_url, "collection_stac_url": urljoin(i.catalog_url, f"collections/{i.collection_id}"), + "aoi": aoi_as_geojson, } - return jsonify(results), 200 + return flask.jsonify(results), 200 if __name__ == "__main__": From 4a44038cfc81ca21373d3c248a9824ad59a64c02 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 20 Nov 2023 12:13:34 +0000 Subject: [PATCH 17/23] feat: added mpc further filtering --- server.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index dd1842b..4460d83 100644 --- a/server.py +++ b/server.py @@ -6,7 +6,7 @@ import flask from flask_cors import CORS from dotenv import load_dotenv -import shapely + import geoalchemy2 as ga from database import session, Collection from urllib.parse import urljoin @@ -37,6 +37,7 @@ def healthz(): def get_collections(): data = request.json aoi = data.get("aoi") + is_available_from_mpc = data.get("is_available_from_mpc") if not aoi: return {"error": "aoi is required"}, 400 @@ -51,6 +52,12 @@ def get_collections(): for key, value in data.items(): if key in Collection.__table__.columns and value is not None: collections = collections.filter(getattr(Collection, key) == value) + + if is_available_from_mpc: + collections = collections.filter( + (Collection.is_from_mpc == False) | + ((Collection.is_from_mpc == True) & (Collection.mpc_token_obtaining_url != None)) + ) collections = collections.all() results = {} From af1156fd3c351262ed39283ee50006654fa92158 Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 20 Nov 2023 12:13:52 +0000 Subject: [PATCH 18/23] cicd: reformated --- server.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index d96b9e1..07c6c92 100644 --- a/server.py +++ b/server.py @@ -63,7 +63,9 @@ def get_collections(): "requires_token": i.requires_token, "is_from_mpc": i.is_from_mpc, "mpc_token_obtaining_url": i.mpc_token_obtaining_url, - "collection_stac_url": urljoin(i.catalog_url, f"collections/{i.collection_id}"), + "collection_stac_url": urljoin( + i.catalog_url, f"collections/{i.collection_id}" + ), "aoi": aoi_as_geojson, } return flask.jsonify(results), 200 From 8d34ce80291ef4299759d6cb4a095c290eaac961 Mon Sep 17 00:00:00 2001 From: Ivica Matic Date: Mon, 20 Nov 2023 12:23:15 +0000 Subject: [PATCH 19/23] cicd: added is_available_from_mpc (WIP) --- scrape.py | 51 +++++++++++++++++++++++++++++++-------------------- server.py | 14 +++++++++----- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/scrape.py b/scrape.py index dd223d4..1166e18 100644 --- a/scrape.py +++ b/scrape.py @@ -25,9 +25,9 @@ def find_first_downloadable_asset_key(_assets: dict) -> str: for asset_key, asset_info in _assets.items(): asset_key_href = asset_info["href"].lower() if ( - asset_key_href.endswith(".tif") - or asset_key_href.endswith(".tiff") - or asset_key_href.endswith(".nc") + asset_key_href.endswith(".tif") + or asset_key_href.endswith(".tiff") + or asset_key_href.endswith(".nc") ): return asset_key # If no asset with specific extensions is found, return the first asset key @@ -56,7 +56,9 @@ def check_if_stac_item_is_http_downloadable(_stac_item: dict) -> bool: return False -def check_if_stac_item_is_http_directly_downloadable_without_token(_stac_item: dict) -> bool: +def check_if_stac_item_is_http_directly_downloadable_without_token( + _stac_item: dict, +) -> bool: """ Check if a STAC item is downloadable using http without a token or some signing mechanism. @@ -98,14 +100,12 @@ def check_if_sas_token_is_present_for_collection_on_mpc(_collection_id: str) -> Returns: Tuple of (True/False, URL to obtain the SAS token) """ - logger.info( - f"Checking if collection {_collection_id} has available token" + logger.info(f"Checking if collection {_collection_id} has available token") + token_check_url = ( + f"https://planetarycomputer.microsoft.com/api/sas/v1/token/{_collection_id}" ) - token_check_url = f"https://planetarycomputer.microsoft.com/api/sas/v1/token/{_collection_id}" try: - token_check_response = safe_request( - "GET", token_check_url - ) + token_check_response = safe_request("GET", token_check_url) token_check_response.raise_for_status() if token_check_response.status_code == 200: return True, token_check_url @@ -188,26 +188,37 @@ def check_if_sas_token_is_present_for_collection_on_mpc(_collection_id: str) -> if "planetarycomputer" in results_catalog_url: is_from_mpc = True - if check_if_stac_item_is_http_downloadable(response_json["features"][0]): + if check_if_stac_item_is_http_downloadable( + response_json["features"][0] + ): http_downloadable = True if check_if_stac_item_is_http_directly_downloadable_without_token( - response_json["features"][0]): + response_json["features"][0] + ): http_downloadable = True requires_token = False else: if "planetarycomputer" in results_catalog_url: - token_present, token_url = check_if_sas_token_is_present_for_collection_on_mpc( - results_collection_id) + ( + token_present, + token_url, + ) = check_if_sas_token_is_present_for_collection_on_mpc( + results_collection_id + ) if token_present: - mpc_token_obtaining_url = token_url - + mpc_token_obtaining_url = token_url # convert shapely_multipolygon_envelope to MultiPolygon if it is not multipolygon - if not isinstance(shapely_multipolygon_envelope, shapely.geometry.multipolygon.MultiPolygon): - shapely_multipolygon_envelope = shapely.geometry.multipolygon.MultiPolygon( - [shapely_multipolygon_envelope] + if not isinstance( + shapely_multipolygon_envelope, + shapely.geometry.multipolygon.MultiPolygon, + ): + shapely_multipolygon_envelope = ( + shapely.geometry.multipolygon.MultiPolygon( + [shapely_multipolygon_envelope] + ) ) - + store_collection_in_database( results_catalog_url, results_collection_id, diff --git a/server.py b/server.py index 582e4db..c6d9b22 100644 --- a/server.py +++ b/server.py @@ -11,8 +11,9 @@ from database import session, Collection from urllib.parse import urljoin +from shapely import to_geojson from shapely.geometry import shape -from flask import request, jsonify +from flask import request from urllib.parse import urljoin load_dotenv() @@ -52,18 +53,21 @@ def get_collections(): for key, value in data.items(): if key in Collection.__table__.columns and value is not None: collections = collections.filter(getattr(Collection, key) == value) - + if is_available_from_mpc: collections = collections.filter( - (Collection.is_from_mpc == False) | - ((Collection.is_from_mpc == True) & (Collection.mpc_token_obtaining_url != None)) + (Collection.is_from_mpc == False) + | ( + (Collection.is_from_mpc == True) + & (Collection.mpc_token_obtaining_url != None) + ) ) collections = collections.all() results = {} for i in collections: aoi_as_shapely = ga.shape.to_shape(i.spatial_extent) - aoi_as_geojson = json.loads(shapely.to_geojson(aoi_as_shapely)) + aoi_as_geojson = json.loads(to_geojson(aoi_as_shapely)) results[i.collection_id] = { "catalog_url": i.catalog_url, "http_downloadable": i.http_downloadable, From 0f7da5cb7c6a10cb258143c537aba9d2954b6c3a Mon Sep 17 00:00:00 2001 From: James Date: Tue, 21 Nov 2023 11:10:08 +0000 Subject: [PATCH 20/23] feat: update server --- scrape.py | 1 - server.py | 54 ++++++++++++++++++++++++++++++------------------------ 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/scrape.py b/scrape.py index 1166e18..b85e76b 100644 --- a/scrape.py +++ b/scrape.py @@ -27,7 +27,6 @@ def find_first_downloadable_asset_key(_assets: dict) -> str: if ( asset_key_href.endswith(".tif") or asset_key_href.endswith(".tiff") - or asset_key_href.endswith(".nc") ): return asset_key # If no asset with specific extensions is found, return the first asset key diff --git a/server.py b/server.py index c6d9b22..1531c4a 100644 --- a/server.py +++ b/server.py @@ -8,6 +8,7 @@ from dotenv import load_dotenv import geoalchemy2 as ga +from sqlalchemy import or_, and_ from database import session, Collection from urllib.parse import urljoin @@ -36,39 +37,44 @@ def healthz(): @app.route("/get_collections", methods=["POST"]) @app.route("/get_collections/", methods=["POST"]) def get_collections(): - data = request.json + data = request.get_json() aoi = data.get("aoi") - is_available_from_mpc = data.get("is_available_from_mpc") - if not aoi: - return {"error": "aoi is required"}, 400 + public = data.get("public") + mpc_with_token = data.get("mpc_with_token") aoi_shapely = shape(aoi) - collections = session.query(Collection).filter( - ga.functions.ST_Intersects( - Collection.spatial_extent, ga.shape.from_shape(aoi_shapely, srid=4326) + collections = ( + session.query(Collection) + .filter( + ga.functions.ST_Intersects( + Collection.spatial_extent, ga.shape.from_shape(aoi_shapely, srid=4326) + ) ) + .distinct() ) - # Apply filters directly from request data - for key, value in data.items(): - if key in Collection.__table__.columns and value is not None: - collections = collections.filter(getattr(Collection, key) == value) - - if is_available_from_mpc: - collections = collections.filter( - (Collection.is_from_mpc == False) - | ( - (Collection.is_from_mpc == True) - & (Collection.mpc_token_obtaining_url != None) + conditions = [] + if public or mpc_with_token: + if public: + conditions.append( + and_( + Collection.http_downloadable == True, + Collection.requires_token == False, + ) + ) + if mpc_with_token: + conditions.append( + and_(Collection.requires_token == True, Collection.is_from_mpc == True) ) - ) - collections = collections.all() - results = {} - for i in collections: + collections = collections.filter(or_(*conditions)) + collection_results = collections.all() + + response_data = {} + for i in collection_results: aoi_as_shapely = ga.shape.to_shape(i.spatial_extent) aoi_as_geojson = json.loads(to_geojson(aoi_as_shapely)) - results[i.collection_id] = { + response_data[i.collection_id] = { "catalog_url": i.catalog_url, "http_downloadable": i.http_downloadable, "requires_token": i.requires_token, @@ -79,7 +85,7 @@ def get_collections(): ), "aoi": aoi_as_geojson, } - return flask.jsonify(results), 200 + return flask.jsonify(response_data), 200 if __name__ == "__main__": From 06f4f968ae2d9206e692a33f4a7f7c3aa3af644d Mon Sep 17 00:00:00 2001 From: James Date: Tue, 21 Nov 2023 13:07:52 +0000 Subject: [PATCH 21/23] revert: reintroduce nc as available asset --- scrape.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scrape.py b/scrape.py index b85e76b..1166e18 100644 --- a/scrape.py +++ b/scrape.py @@ -27,6 +27,7 @@ def find_first_downloadable_asset_key(_assets: dict) -> str: if ( asset_key_href.endswith(".tif") or asset_key_href.endswith(".tiff") + or asset_key_href.endswith(".nc") ): return asset_key # If no asset with specific extensions is found, return the first asset key From 496aa3b3eac4b66ed4a594783fceb6cfa0599cb1 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 22 Nov 2023 12:49:54 +0000 Subject: [PATCH 22/23] feat: ammend server return --- server.py | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/server.py b/server.py index 1531c4a..ff4c1d5 100644 --- a/server.py +++ b/server.py @@ -64,27 +64,31 @@ def get_collections(): ) if mpc_with_token: conditions.append( - and_(Collection.requires_token == True, Collection.is_from_mpc == True) + and_( + Collection.requires_token == True, + Collection.is_from_mpc == True, + bool(Collection.mpc_token_obtaining_url != ""), + ) ) collections = collections.filter(or_(*conditions)) - collection_results = collections.all() - - response_data = {} + collection_results = collections.all() + + response_data = [] for i in collection_results: - aoi_as_shapely = ga.shape.to_shape(i.spatial_extent) - aoi_as_geojson = json.loads(to_geojson(aoi_as_shapely)) - response_data[i.collection_id] = { - "catalog_url": i.catalog_url, - "http_downloadable": i.http_downloadable, - "requires_token": i.requires_token, - "is_from_mpc": i.is_from_mpc, - "mpc_token_obtaining_url": i.mpc_token_obtaining_url, - "collection_stac_url": urljoin( - i.catalog_url, f"collections/{i.collection_id}" - ), - "aoi": aoi_as_geojson, - } + response_data.append( + { + "collection_id": i.collection_id, + "catalog_url": i.catalog_url, + "http_downloadable": i.http_downloadable, + "requires_token": i.requires_token, + "is_from_mpc": i.is_from_mpc, + "mpc_token_obtaining_url": i.mpc_token_obtaining_url, + "collection_stac_url": urljoin( + i.catalog_url, f"collections/{i.collection_id}" + ), + } + ) return flask.jsonify(response_data), 200 From 888abeeb78e0851069e140b8a4333b08a31d9d71 Mon Sep 17 00:00:00 2001 From: James Hinton <63542818+james-hinton@users.noreply.github.com> Date: Wed, 27 Mar 2024 15:29:39 +0000 Subject: [PATCH 23/23] Create README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..0071d9e --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# STAC Accessibility Scanner + +Loops through publically available STAC Catalogs to find the collections which are openly accessible. Otherwise most of the STAC Portal Frontend collections are not accessible.