From 6e4a56c44383d77dfd4f0aaaa9c5e339728e8bd0 Mon Sep 17 00:00:00 2001 From: Jennifer Tran <12633533+botanical@users.noreply.github.com> Date: Thu, 8 Feb 2024 12:37:12 -0800 Subject: [PATCH] feature: notebook to reconcile collection metadata (#99) * Add collection metadata reconciliation notebook * Update collections with summaries information * Use glob instead of os.listdir * Revert summaries addition * Revert spacing on json files * Update reconciliation notebook to not update collection json files * Update description of cells and remove print statements * Add cell output from test run in dev * Update cell descriptions, run against dev once more --------- Co-authored-by: Jennifer Tran Co-authored-by: j08lue --- ...6_daily_GISS-E2-1-G_tas_kerchunk_DEMO.json | 8 +- .../collection-metadata-reconciliation.ipynb | 419 ++++++++++++++++++ 2 files changed, 420 insertions(+), 7 deletions(-) create mode 100644 transformation-scripts/collection-metadata-reconciliation.ipynb diff --git a/ingestion-data/collections/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO.json b/ingestion-data/collections/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO.json index 1d15d74..a99db23 100644 --- a/ingestion-data/collections/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO.json +++ b/ingestion-data/collections/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO.json @@ -98,11 +98,5 @@ "host" ] } - ], - "summaries": { - "datetime": [ - "1950-01-01T12:00:00Z", - "2014-12-31T12:00:00Z" - ] - } + ] } \ No newline at end of file diff --git a/transformation-scripts/collection-metadata-reconciliation.ipynb b/transformation-scripts/collection-metadata-reconciliation.ipynb new file mode 100644 index 0000000..3f2d956 --- /dev/null +++ b/transformation-scripts/collection-metadata-reconciliation.ipynb @@ -0,0 +1,419 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook to Reconcile Collection Metadata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook reconciles the collections in `/ingestion-data/collections` and retrieves the `summaries` value for each collection from the API, merges it to the existing collection in `veda-data` and posts the new collection to the API." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "import glob\n", + "import json\n", + "import requests\n", + "from cognito_client import CognitoClient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell retrieves collection JSON files from `/ingestion-data/collections/` and save collectionIds to a list." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "json_file_paths = glob.glob(\"../ingestion-data/collections/*.json\")\n", + "\n", + "file_paths_and_collection_ids = [\n", + " {\"filePath\": file_path, \"collectionId\": data[\"id\"]}\n", + " for file_path in json_file_paths\n", + " if \"id\" in (data := json.load(open(file_path, \"r\")))\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the testing mode to `True` when testing and `False` otherwise. When the testing mode is `True`, the notebook will be set to run against `dev` endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "testing_mode = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Have your Cognito `username` and `password` ready to set up Cognito Client to retrieve a token that will be used to access the STAC Ingestor API." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "dev_endpoint = \"https://dev.delta-backend.com/\"\n", + "dev_client_id = \"CHANGE ME\"\n", + "dev_user_pool_id = \"CHANGE ME\"\n", + "dev_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "staging_endpoint = \"https://staging-stac.delta-backend.com/\"\n", + "staging_client_id = \"CHANGE ME\"\n", + "staging_user_pool_id = \"CHANGE ME\"\n", + "staging_identity_pool_id = \"CHANGE ME\"\n", + "\n", + "ingestor_staging_url = \"https://ig9v64uky8.execute-api.us-west-2.amazonaws.com/staging/\"\n", + "ingestor_dev_url = \"https://dev.delta-backend.com/\"\n", + "\n", + "if testing_mode:\n", + " STAC_INGESTOR_API = ingestor_dev_url\n", + " VEDA_STAC_API = dev_endpoint\n", + "else:\n", + " STAC_INGESTOR_API = ingestor_staging_url\n", + " VEDA_STAC_API = staging_endpoint\n", + "\n", + "client = CognitoClient(\n", + " client_id=dev_client_id if testing_mode else staging_client_id,\n", + " user_pool_id=dev_user_pool_id if testing_mode else staging_user_pool_id,\n", + " identity_pool_id=dev_identity_pool_id if testing_mode else staging_identity_pool_id,\n", + ")\n", + "_ = client.login()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell sets up headers for requests." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "TOKEN = client.access_token\n", + "authorization_header = f\"Bearer {TOKEN}\"\n", + "headers = {\n", + " \"Authorization\": authorization_header,\n", + " \"content-type\": \"application/json\",\n", + " \"accept\": \"application/json\",\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell defines the functions that will be used to consolidate `summaries` and `links` to reconcile the collection metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "def post_reconciled_collection(collection, collection_id):\n", + " collection_url = f\"{STAC_INGESTOR_API}api/stac/collections/{collection_id}\"\n", + " ingest_url = f\"{STAC_INGESTOR_API}api/ingest/collections\"\n", + "\n", + " try:\n", + " response = requests.post(ingest_url, json=collection, headers=headers)\n", + " response.raise_for_status()\n", + " if response.status_code == 201:\n", + " print(\n", + " f\"Request was successful. Find the updated collection at {collection_url}\"\n", + " )\n", + " else:\n", + " print(\n", + " f\"Updating {collection_id} failed. Request failed with status code: {response.status_code}\"\n", + " )\n", + " except requests.RequestException as e:\n", + " print(\n", + " f\"Updating {collection_id} failed. An error occurred during the request: {e}\"\n", + " )\n", + " except Exception as e:\n", + " print(\n", + " f\"An unexpected error occurred while trying to update {collection_id}: {e}\"\n", + " )\n", + "\n", + "\n", + "def merge_summaries(existing_summaries, retrieved_summaries):\n", + " merged_summaries_dict = existing_summaries.copy()\n", + "\n", + " if retrieved_summaries:\n", + " for key, value in retrieved_summaries.items():\n", + " merged_summaries_dict.setdefault(key, value)\n", + "\n", + " return merged_summaries_dict\n", + "\n", + "\n", + "def retain_external_links(existing_links, retrieved_links):\n", + " unique_hrefs = set(link.get(\"href\") for link in existing_links)\n", + " additional_external_links = [\n", + " link\n", + " for link in retrieved_links\n", + " if link.get(\"rel\") == \"external\" and link.get(\"href\") not in unique_hrefs\n", + " ]\n", + "\n", + " retained_links = existing_links + additional_external_links\n", + " return retained_links" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell loops through `file_paths_and_collection_ids` to retrieve `summaries` and `links` information for each existing collection and publish the updated collection to the target ingestion `api/collections` endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/campfire-lst-night-diff\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/OMI_trno2-COG\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-global-da-tws\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_1B2b_Natural_Gas_Processing\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/grdi-shdi-raster\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/conus-reach\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_1B1a_Coal_Mining_Underground\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/ndvi_diff_Ian_2022-09-30_2022-09-05\n", + "An error occurred for collectionId entropy_difference_2022-09-05_2022-09-30: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/entropy_difference_2022-09-05_2022-09-30\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/snow-projections-median-585\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/social-vulnerability-index-socioeconomic\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_4B_Manure_Management\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/geoglam\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/OMSO2PCA-COG\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-global-da-tws-trend\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_2B5_Petrochemical_Production\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_1B2b_Natural_Gas_Production\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/facebook_population_density\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-tws-nonstationarity-index\n", + "An error occurred for collectionId modis-albedo-wsa-diff-2015-2022: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/modis-albedo-wsa-diff-2015-2022\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/social-vulnerability-index-overall\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/ceos-co2-flux-budgets-mean\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/grdi-v1-raster\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_6A_Landfills_Industrial\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_6B_Wastewater_Treatment_Domestic\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/snow-projections-median-245\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_6D_Composting\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/ecco-surface-height-change\n", + "Updating hls-l30-002-ej-reprocessed failed. An error occurred during the request: 422 Client Error: Unprocessable Entity for url: https://dev.delta-backend.com/api/ingest/collections\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/houston-aod\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/ls8-covid-19-example-data\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/IS2SITMOGR4-cog\n", + "An error occurred for collectionId hls-bais2-v2: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/hls-bais2-v2\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/recovery-proxy-maps-covid-19\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-global-da-swe\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_2C2_Ferroalloy_Production\n", + "An error occurred for collectionId campfire-ndvi-diff: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/campfire-ndvi-diff\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-global-da-gpp-trend\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/nightlights-500m-daily\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_4A_Enteric_Fermentation\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/eis_fire_newfirepix\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/barc-thomasfire\n", + "An error occurred for collectionId cygnss-watermask-ucar: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/cygnss-watermask-ucar\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/nightlights-hd-1band\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/grdi-cdr-raster\n", + "An error occurred for collectionId modis-lst-night-diff-2015-2022: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/modis-lst-night-diff-2015-2022\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-global-da-gpp\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-global-da-evap\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/houston-lst-day\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/landsat-c2l2-sr-antarctic-glaciers-pine-island\n", + "An error occurred for collectionId cygnss-watermask-ucberkeley: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/cygnss-watermask-ucberkeley\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-monthly-emissions_1B2b_Natural_Gas_Production\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-global-da-qs\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-global-da-qsb\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/blue-tarp-planetscope\n", + "An error occurred for collectionId landsat-c2l2-sr-lakes-aral-sea: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/landsat-c2l2-sr-lakes-aral-sea\n", + "An error occurred for collectionId damage_probability_2022-10-03: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/damage_probability_2022-10-03\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/caldor-fire-burn-severity\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/houston-lst-diff\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/co2-mean\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/nceo_africa_2017\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/grdi-vnl-slope-raster\n", + "An error occurred for collectionId disturbance-probability-percentile: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/disturbance-probability-percentile\n", + "An error occurred for collectionId landsat-c2l2-sr-lakes-tonle-sap: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/landsat-c2l2-sr-lakes-tonle-sap\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/disalexi-etsuppression\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/social-vulnerability-index-overall-nopop\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/houston-aod-diff\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-monthly-emissions_1A_Combustion_Stationary\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/nightlights-hd-monthly\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/social-vulnerability-index-household-nopop\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/slowdown-proxy-covid-19-changing-landscapes\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-daily-emissions_5_Forest_Fires\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/ceos-co2-flux-budgets\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/eis_fire_fireline\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_6A_Landfills_Municipal\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/houston-landcover\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/frp-max-thomasfire\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/social-vulnerability-index-minority\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/grdi-imr-raster\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/landsat-c2l2-sr-lakes-lake-balaton\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/modis-fire-anomalies-diff-covid-19-changing-landscape\n", + "An error occurred for collectionId landsat-nighttime-thermal: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/landsat-nighttime-thermal\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_1B2b_Natural_Gas_Transmission\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-global-da-totalprecip\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_6B_Wastewater_Treatment_Industrial\n", + "An error occurred for collectionId hls-bais2: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/hls-bais2\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-tws-anomaly\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_1B2a_Petroleum\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-monthly-emissions_4F_Field_Burning\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/CMIP585-winter-median-ta\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_5_Forest_Fires\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/grdi-vnl-raster\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/snow-projections-diff-245\n", + "An error occurred for collectionId campfire-nlcd: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/campfire-nlcd\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_1B2b_Natural_Gas_Distribution\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/togo-agriculture-covid-19\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/snow-projections-diff-585\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/houston-ndvi\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/eis_fire_perimeter\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/CMIP245-winter-median-ta\n", + "An error occurred for collectionId campfire-albedo-wsa-diff: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/campfire-albedo-wsa-diff\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/houston-lst-night\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/co2-diff\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/blue-tarp-detection\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-etsuppression\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_1B1a_Coal_Mining_Surface\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/caldor-fire-behavior\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-tvegsuppression\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/modis-annual-lai-2003-2020\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_1A_Combustion_Mobile\n", + "Updating hls-s30-002-ej-reprocessed failed. An error occurred during the request: 422 Client Error: Unprocessable Entity for url: https://dev.delta-backend.com/api/ingest/collections\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-monthly-emissions_4B_Manure_Management\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-global-da-streamflow\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/pzd-anomaly-covid-19\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/houston-urbanization\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/social-vulnerability-index-housing\n", + "An error occurred for collectionId modis-lst-day-diff-2015-2022: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/modis-lst-day-diff-2015-2022\n", + "An error occurred for collectionId hls-ndvi: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/hls-ndvi\n", + "An error occurred for collectionId modis_diff: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/modis_diff\n", + "An error occurred for collectionId landsat-c2l2-sr-lakes-vanern: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/landsat-c2l2-sr-lakes-vanern\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-global-da-gws\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/no2-monthly-diff\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/CMIP245-winter-median-pr\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_1A_Combustion_Stationary\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/social-vulnerability-index-socioeconomic-nopop\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/landsat-c2l2-sr-antarctic-glaciers-thwaites\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/bangladesh-landcover-2001-2020\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/mtbs-burn-severity\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_4C_Rice_Cultivation\n", + "An error occurred for collectionId landsat-c2l2-sr-lakes-lake-biwa: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/landsat-c2l2-sr-lakes-lake-biwa\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-monthly-emissions_1B2a_Petroleum\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/MO_NPP_npp_vgpm\n", + "An error occurred for collectionId fldas-soil-moisture-anomalies: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/fldas-soil-moisture-anomalies\n", + "An error occurred for collectionId modis-ndvi-diff-2015-2022: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/modis-ndvi-diff-2015-2022\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_1B1a_Abandoned_Coal\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/no2-monthly\n", + "An error occurred for collectionId oco2-geos-l3-daily: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/oco2-geos-l3-daily\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/social-vulnerability-index-housing-nopop\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/lis-tws-trend\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/sport-lis-vsm0_100cm-percentile\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-monthly-emissions_4C_Rice_Cultivation\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/social-vulnerability-index-minority-nopop\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/social-vulnerability-index-household\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/grdi-filled-missing-values-count\n", + "An error occurred for collectionId hls-swir-falsecolor-composite: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/hls-swir-falsecolor-composite\n", + "An error occurred for collectionId combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk_DEMO\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/grdi-v1-built\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/EPA-annual-emissions_4F_Field_Burning\n", + "An error occurred for collectionId campfire-lst-day-diff: 404 Client Error: Not Found for url: https://dev.delta-backend.com/api/stac/collections/campfire-lst-day-diff\n", + "Request was successful. Find the updated collection at https://dev.delta-backend.com/api/stac/collections/CMIP585-winter-median-pr\n" + ] + } + ], + "source": [ + "for item in file_paths_and_collection_ids:\n", + " collection_id = item[\"collectionId\"]\n", + " file_path = item[\"filePath\"]\n", + "\n", + " if VEDA_STAC_API == dev_endpoint:\n", + " url = f\"{VEDA_STAC_API}api/stac/collections/{collection_id}\"\n", + " elif VEDA_STAC_API == staging_endpoint:\n", + " url = f\"{VEDA_STAC_API}collections/{collection_id}\"\n", + "\n", + " try:\n", + " response = requests.get(url, headers=headers)\n", + " response.raise_for_status()\n", + " json_response = response.json()\n", + "\n", + " retrieved_summaries = json_response.get(\"summaries\", {})\n", + " retrieved_links = json_response.get(\"links\", {})\n", + "\n", + " with open(file_path, \"r\", encoding=\"utf-8\") as file:\n", + " collection = json.load(file)\n", + "\n", + " existing_summaries = collection.get(\"summaries\", {})\n", + " existing_links = collection.get(\"links\", {})\n", + "\n", + " collection[\"summaries\"] = merge_summaries(\n", + " existing_summaries, retrieved_summaries\n", + " )\n", + " collection[\"links\"] = retain_external_links(existing_links, retrieved_links)\n", + "\n", + " # Publish the updated collection to the target ingestion `api/collections` endpoint\n", + " post_reconciled_collection(collection, collection_id)\n", + "\n", + " except requests.RequestException as e:\n", + " print(f\"An error occurred for collectionId {collection_id}: {e}\")\n", + " except Exception as e:\n", + " print(f\"An unexpected error occurred for collectionId {collection_id}: {e}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}