Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge staging into main #6

Merged
merged 29 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
22530d1
Merge branch 'main' into staging
Nov 13, 2023
d46a65c
cicd: fixed docker image name
Nov 13, 2023
62e7bcd
feat: added log
Nov 13, 2023
4c7cd49
cicd: fixed command
Nov 13, 2023
5d2cc72
fix: converted polygon to multipolygon before storing in the database
Nov 13, 2023
e294527
fix: converting poygon to multi polygon only if neccesarry
Nov 13, 2023
3cf6ec6
cicd: fix typo
Nov 13, 2023
aa9dd9a
feat: experimental collection returner
james-hinton Nov 15, 2023
4382cc4
Merge pull request #1 from SpatialDays/main
james-hinton Nov 15, 2023
777b9cf
fix: declaring CORS
james-hinton Nov 15, 2023
d0ec9ff
cicd: added gunicorn hosting
Nov 20, 2023
bbac5e7
feat: add endpoint with slash
james-hinton Nov 20, 2023
9aeb5d9
cicd: updated dockerfile cmd
Nov 20, 2023
f5cce0c
revert: change geojson responsse
james-hinton Nov 20, 2023
d1f3044
feat: remove aoi from response
james-hinton Nov 20, 2023
c4ba00f
feat: remove aoi parsing in response loop
james-hinton Nov 20, 2023
1a7e151
Merge pull request #2 from SpatialDays/server-updates
Nov 20, 2023
8417cf6
cicd: updated docker compose
Nov 20, 2023
c68afdb
cicd: added aoi key back into the payload
Nov 20, 2023
4a44038
feat: added mpc further filtering
james-hinton Nov 20, 2023
af1156f
cicd: reformated
Nov 20, 2023
87020e3
Merge remote-tracking branch 'origin/staging' into add-mpc-available-…
Nov 20, 2023
8d34ce8
cicd: added is_available_from_mpc (WIP)
Nov 20, 2023
0f7da5c
feat: update server
james-hinton Nov 21, 2023
4cb9903
Merge pull request #3 from SpatialDays/add-mpc-available-key-into-pay…
james-hinton Nov 21, 2023
06f4f96
revert: reintroduce nc as available asset
james-hinton Nov 21, 2023
7fa3860
Merge pull request #4 from SpatialDays/add-mpc-available-key-into-pay…
james-hinton Nov 21, 2023
496aa3b
feat: ammend server return
james-hinton Nov 22, 2023
888abee
Create README.md
james-hinton Mar 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions .github/workflows/docker-build-stage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ on:

jobs:
docker-build-push-dev:
# runs-on: ubuntu-latest
runs-on: spatialdays-self-hosted-runner-1
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
Expand All @@ -32,7 +31,7 @@ jobs:
with:
context: .
file: Dockerfile
cache-from: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accesability-scanner:cache
cache-to: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accesability-scanner:cache,mode=max
cache-from: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessibility-scanner:cache
cache-to: type=registry,ref=${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessibility-scanner:cache,mode=max
push: true
tags: ${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accesability-scanner:${{ env.SHORT_SHA }}
tags: ${{ secrets.EO_PROJ_STAGING_DOCKER_REGISTRY_URL }}/stac-accessibility-scanner:${{ env.SHORT_SHA }}
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ COPY . .
EXPOSE 5000

# Run the application
CMD ["python", "server.py"]
CMD ["gunicorn", "server:app", "-b", "0.0.0.0:8000", "-w", "4"]
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# STAC Accessibility Scanner

Loops through publically available STAC Catalogs to find the collections which are openly accessible. Otherwise most of the STAC Portal Frontend collections are not accessible.
15 changes: 10 additions & 5 deletions database.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def store_collection_in_database(
.first()
)
if collection_db_entry is None:
logging.debug(f"Adding {_catalog_url} {_collection_id} to the database")
logger.debug(f"Adding {_catalog_url} {_collection_id} to the database")
collection_db_entry = Collection()
collection_db_entry.catalog_url = _catalog_url
collection_db_entry.collection_id = _collection_id
Expand All @@ -91,19 +91,24 @@ def store_collection_in_database(
collection_db_entry.mpc_token_obtaining_url = _mpc_token_obtaining_url
session.add(collection_db_entry)
session.commit()
logging.info(f"Added {_catalog_url} {_collection_id} to the database")
logger.info(f"Added {_catalog_url} {_collection_id} to the database")
else:
logging.debug(f"Updating {_catalog_url} {_collection_id} in the database")
logger.debug(f"Updating {_catalog_url} {_collection_id} in the database")
collection_db_entry.http_downloadable = _http_downloadable
collection_db_entry.requires_token = _requires_token
collection_db_entry.is_from_mpc = _is_from_mpc
collection_db_entry.mpc_token_obtaining_url = _mpc_token_obtaining_url
session.commit()
logging.info(f"Updated {_catalog_url} {_collection_id} in the database")
logger.info(f"Updated {_catalog_url} {_collection_id} in the database")


if __name__ == '__main__':
plugin_enable_statement = sa.text("CREATE EXTENSION IF NOT EXISTS postgis;")
with engine.connect() as conn:
with engine.begin() as conn:
conn.execute(plugin_enable_statement)
logger.info("Enabled postgis extension")
print("Enabled postgis extension")
# commit the changes


base.metadata.create_all(engine)
48 changes: 21 additions & 27 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
version: "3"

services:
# web:
# build: .
# ports:
# - "5005:5005"
# depends_on:
# - db
# environment:
# DATABASE_HOST: "db"
# DATABASE_PORT: "5432"
# DATABASE_NAME: "stacaccessibility_db"
# DATABASE_USER: "postgres"
# DATABASE_PASSWORD: "postgres"
# APP_HOST: "0.0.0.0"
# APP_PORT: "5005"
# APP_DEBUG: "True"
# volumes:
# - .:/app
web:
build: .
ports:
- "8000:8000"
depends_on:
- db
environment:
DATABASE_HOST: "db"
DATABASE_PORT: "5432"
DATABASE_NAME: "stacaccessibility_db"
DATABASE_USER: "postgres"
DATABASE_PASSWORD: "postgres"
APP_HOST: "0.0.0.0"
APP_PORT: "8000"
APP_DEBUG: "True"
volumes:
- .:/app
command: ["python", "server.py"]


# scrape:
# build: .
Expand All @@ -30,7 +32,7 @@ services:
# DATABASE_USER: "postgres"
# DATABASE_PASSWORD: "postgres"
# APP_HOST: "0.0.0.0"
# APP_PORT: "5000"
# APP_PORT: "8000"
# APP_DEBUG: "True"
# command: ["python", "scrape.py"]

Expand All @@ -45,7 +47,7 @@ services:
DATABASE_USER: "postgres"
DATABASE_PASSWORD: "postgres"
APP_HOST: "0.0.0.0"
APP_PORT: "5000"
APP_PORT: "8000"
APP_DEBUG: "True"
command: ["python", "database.py"]

Expand All @@ -58,12 +60,4 @@ services:
ports:
- "15432:5432"

# db:
# image: postgres:13
# environment:
# POSTGRES_USER: "postgres"
# POSTGRES_PASSWORD: "postgres"
# POSTGRES_DB: "stacaccessibility_db"
# ports:
# - "15432:5432"

3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ click-plugins==1.1.1
cligj==0.7.2
fiona==1.9.5
Flask==3.0.0
Flask-Cors==3.0.10
Flask-Cors==4.0.0
Flask-SQLAlchemy==3.1.1
GeoAlchemy2==0.14.2
geopandas==0.14.0
greenlet==3.0.1
gunicorn==21.2.0
idna==3.4
itsdangerous==2.1.2
Jinja2==3.1.2
Expand Down
46 changes: 32 additions & 14 deletions scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ def find_first_downloadable_asset_key(_assets: dict) -> str:
for asset_key, asset_info in _assets.items():
asset_key_href = asset_info["href"].lower()
if (
asset_key_href.endswith(".tif")
or asset_key_href.endswith(".tiff")
or asset_key_href.endswith(".nc")
asset_key_href.endswith(".tif")
or asset_key_href.endswith(".tiff")
or asset_key_href.endswith(".nc")
):
return asset_key
# If no asset with specific extensions is found, return the first asset key
Expand Down Expand Up @@ -56,7 +56,9 @@ def check_if_stac_item_is_http_downloadable(_stac_item: dict) -> bool:
return False


def check_if_stac_item_is_http_directly_downloadable_without_token(_stac_item: dict) -> bool:
def check_if_stac_item_is_http_directly_downloadable_without_token(
_stac_item: dict,
) -> bool:
"""
Check if a STAC item is downloadable using http without a token or some signing mechanism.

Expand Down Expand Up @@ -98,14 +100,12 @@ def check_if_sas_token_is_present_for_collection_on_mpc(_collection_id: str) ->
Returns: Tuple of (True/False, URL to obtain the SAS token)

"""
logger.info(
f"Checking if collection {_collection_id} has available token"
logger.info(f"Checking if collection {_collection_id} has available token")
token_check_url = (
f"https://planetarycomputer.microsoft.com/api/sas/v1/token/{_collection_id}"
)
token_check_url = f"https://planetarycomputer.microsoft.com/api/sas/v1/token/{_collection_id}"
try:
token_check_response = safe_request(
"GET", token_check_url
)
token_check_response = safe_request("GET", token_check_url)
token_check_response.raise_for_status()
if token_check_response.status_code == 200:
return True, token_check_url
Expand Down Expand Up @@ -188,19 +188,37 @@ def check_if_sas_token_is_present_for_collection_on_mpc(_collection_id: str) ->
if "planetarycomputer" in results_catalog_url:
is_from_mpc = True

if check_if_stac_item_is_http_downloadable(response_json["features"][0]):
if check_if_stac_item_is_http_downloadable(
response_json["features"][0]
):
http_downloadable = True
if check_if_stac_item_is_http_directly_downloadable_without_token(
response_json["features"][0]):
response_json["features"][0]
):
http_downloadable = True
requires_token = False
else:
if "planetarycomputer" in results_catalog_url:
token_present, token_url = check_if_sas_token_is_present_for_collection_on_mpc(
results_collection_id)
(
token_present,
token_url,
) = check_if_sas_token_is_present_for_collection_on_mpc(
results_collection_id
)
if token_present:
mpc_token_obtaining_url = token_url

# convert shapely_multipolygon_envelope to MultiPolygon if it is not multipolygon
if not isinstance(
shapely_multipolygon_envelope,
shapely.geometry.multipolygon.MultiPolygon,
):
shapely_multipolygon_envelope = (
shapely.geometry.multipolygon.MultiPolygon(
[shapely_multipolygon_envelope]
)
)

store_collection_in_database(
results_catalog_url,
results_collection_id,
Expand Down
92 changes: 58 additions & 34 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,24 @@
import flask
from flask_cors import CORS
from dotenv import load_dotenv
import shapely

import geoalchemy2 as ga
from sqlalchemy import or_, and_
from database import session, Collection
from urllib.parse import urljoin

from shapely import to_geojson
from shapely.geometry import shape
from flask import request
from urllib.parse import urljoin

load_dotenv()
APP_HOST = os.getenv("APP_HOST", "0.0.0.0")
APP_PORT = os.getenv("APP_PORT", "5000")
APP_DEBUG = os.getenv("APP_DEBUG", "True") == "True"

app = flask.Flask(__name__)
app = CORS(app)
CORS(app)


# Create /healthz endpoint
Expand All @@ -29,43 +35,61 @@ def healthz():
# Make a POST endpoint which will take catalog_url and aoi
# in geojson format and filter the database for available collections
@app.route("/get_collections", methods=["POST"])
@app.route("/get_collections/", methods=["POST"])
def get_collections():
aoi = flask.request.json.get("aoi", None)
if not aoi:
# send 400 bad request with message that aoi is required
return {"error": "aoi is required"}, 400

catalog_url = flask.request.json.get("catalog_url", None)
collection_id = flask.request.json.get("collection_id", None)
aoi_shapely = shapely.geometry.shape(aoi)
collections = session.query(Collection).filter(
ga.functions.ST_Intersects(
Collection.spatial_extent, ga.shape.from_shape(aoi_shapely, srid=4326)
),
)
data = request.get_json()
aoi = data.get("aoi")
public = data.get("public")
mpc_with_token = data.get("mpc_with_token")

if catalog_url:
collections = collections.filter(Collection.catalog_url == catalog_url)
aoi_shapely = shape(aoi)
collections = (
session.query(Collection)
.filter(
ga.functions.ST_Intersects(
Collection.spatial_extent, ga.shape.from_shape(aoi_shapely, srid=4326)
)
)
.distinct()
)

if collection_id:
collections = collections.filter(Collection.collection_id == collection_id)
conditions = []
if public or mpc_with_token:
if public:
conditions.append(
and_(
Collection.http_downloadable == True,
Collection.requires_token == False,
)
)
if mpc_with_token:
conditions.append(
and_(
Collection.requires_token == True,
Collection.is_from_mpc == True,
bool(Collection.mpc_token_obtaining_url != ""),
)
)

collections = collections.all()
collections = collections.filter(or_(*conditions))
collection_results = collections.all()

results = {}
for i in collections:
aoi_as_shapely = shapely.geometry.shape(aoi)
aoi_as_geojson = json.loads(shapely.to_geojson(aoi_as_shapely))
results[i.collection_id] = {
"catalog_url": i.catalog_url,
"http_downloadable": i.http_downloadable,
"requires_token": i.requires_token,
"is_from_mpc": i.is_from_mpc,
"mpc_token_obtaining_url": i.mpc_token_obtaining_url,
"collection_stac_url": urljoin(i.catalog_url, f"collections/{i.collection_id}"),
"aoi": aoi_as_geojson,
}
return flask.jsonify(results), 200
response_data = []
for i in collection_results:
response_data.append(
{
"collection_id": i.collection_id,
"catalog_url": i.catalog_url,
"http_downloadable": i.http_downloadable,
"requires_token": i.requires_token,
"is_from_mpc": i.is_from_mpc,
"mpc_token_obtaining_url": i.mpc_token_obtaining_url,
"collection_stac_url": urljoin(
i.catalog_url, f"collections/{i.collection_id}"
),
}
)
return flask.jsonify(response_data), 200


if __name__ == "__main__":
Expand Down
Loading