NASA-IMPACT · botanical · Oct 31, 2024 · Oct 31, 2024 · Nov 4, 2024 · Nov 4, 2024
@@ -1,31 +1,36 @@
 # This GitHub Actions workflow automates the process of
 # publishing dataset collections to a staging environment
-# and creating a pull request (PR) in the veda-config repository
-# with the dataset configuration.
 # It is triggered by a pull request to the main branch
 # that modifies any files within the ingestion-data/dataset-config/ directory
 # The workflow includes steps to
 #   - publish the datasets,
-#   - create a PR in veda-config repository,
 #   - constantly updates the status of the workflow in the PR comment
 
-name: Publish collection to staging and create dataset config PR
+name: Publish collection to staging
 
 on:
   pull_request:
-    branches:
-      - main
+    branches: ['main', 'feat/automated-staging-publish']
-    branches: ['main', 'feat/automated-staging-publish']
+    branches: ['main']
-    branches: ['main', 'feat/automated-staging-publish']
+    branches: ['main']
     paths:
       # Run the workflow only if files inside this path are updated
-      - ingestion-data/dataset-config/*
+      # - ingestion-data/staging/dataset-config/*
+      - ingestion-data/testing/dataset-config/*
+
+  push:
+    branches:
+      - main
+
+permissions:
+  pull-requests: write
+  contents: read
 
 jobs:
   dataset-publication-and-configuration:
-    permissions:
-      pull-requests: write
-      contents: read
+    if: ${{ github.event_name == 'pull_request' && (github.event.action == 'synchronize' || github.event.action == 'opened') }}
     runs-on: ubuntu-latest
-
+    environment: staging
+    outputs:
+      publishedCollections: ${{ steps.publish-collections.outputs.success_collections }}
     steps:
       - uses: actions/checkout@v4
 
@@ -38,9 +43,8 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
           WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-          body='### Workflow Status
-          **Starting workflow...** [View action run]($WORKFLOW_URL)
-          '
+          body="### Workflow Status
+          **Starting workflow...** [View action run]($WORKFLOW_URL)"
 
           # Get the PR number
           PR_NUMBER=${{ github.event.pull_request.number }}
@@ -61,21 +65,30 @@ jobs:
 
           echo "COMMENT_ID=$COMMENT_ID" >> $GITHUB_OUTPUT
 
-      # Find only the updated files (file that differ from base)
+      # Find only the newly added files
       # Only .json files
       # The files are outputted to GITHUB_OUTPUT, which can be used in subsequent steps
-      - name: Get updated files
+      - name: Get newly added files
         id: changed-files
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files: |
             **.json
 
+      - name: List all newly added files
+        env:
+          ADDED_FILES: ${{ steps.changed-files.outputs.added_files }}
+        run: |
+          for file in ${ADDED_FILES}; do
+            echo "$file was added"
+          done
+
       # Uses service client creds to get token
       # No username/password needed
       - name: Get auth token
         id: get-token
         run: |
+          echo "Vars: $vars"
           response=$(curl -X POST \
             ${{ vars.STAGING_COGNITO_DOMAIN }}/oauth2/token \
             -H "Content-Type: application/x-www-form-urlencoded" \
@@ -92,10 +105,10 @@ jobs:
       # Used by other steps
       # If none of the requests are successful, workflow fails
       # Updates the PR comment with status of collection publication
-      - name: Publish all updated collections
+      - name: Publish all newly added collections to staging
         id: publish-collections
         env:
-          ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
+          ADDED_FILES: ${{ steps.changed-files.outputs.added_files }}
           WORKFLOWS_URL: ${{ vars.STAGING_WORKFLOWS_URL }}
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           AUTH_TOKEN: ${{ steps.get-token.outputs.ACCESS_TOKEN }}
@@ -120,7 +133,7 @@ jobs:
           status_message='### Collection Publication Status
           '
 
-          for file in "${ALL_CHANGED_FILES[@]}"; do
+          for file in "${ADDED_FILES[@]}"; do
             echo $file
             if [ -f "$file" ]; then
               dataset_config=$(jq '.' "$file")
@@ -143,7 +156,7 @@ jobs:
                 all_failed=false
               else
                 echo "$collection_id failed to publish ❌"
-                status_message+="- **$collection_id**: Failed to publish ❌
+                status_message+="- **$collection_id**: Failed to publish. Error code $status_code. ❌
                 "
               fi
             else
@@ -168,115 +181,61 @@ jobs:
           $status_message"
           gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
 
-      # Update PR comment
-      - name: Update PR comment for PR creation
-        if: success()
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
-        run: |
-          CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
-          UPDATED_BODY="$CURRENT_BODY
-
-          **Creating a PR in veda-config...**"
-          gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
-
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version: '3.9'
           cache: 'pip'
 
-      # Creates a slim dataset mdx file for each collection based on the dataset config json
-      - name: Create dataset mdx for given collections
-        env:
-          PUBLISHED_COLLECTION_FILES: ${{ steps.publish-collections.outputs.success_collections }}
-        run: |
-          pip install -r scripts/requirements.txt
-          for file in "${PUBLISHED_COLLECTION_FILES[@]}"
-          do
-            python3 scripts/mdx.py "$file"
-          done
-
-      - name: Set up Git
-        run: |
-          git config --global user.name "github-actions[bot]"
-          git config --global user.email "github-actions[bot]@users.noreply.github.com"
-
-      - name: Clone `veda-config`
-        env:
-          VEDA_CONFIG_GH_TOKEN: ${{ secrets.VEDA_CONFIG_GH_TOKEN }}
-        run: git clone https://${{ env.VEDA_CONFIG_GH_TOKEN }}@github.com/${{ vars.VEDA_CONFIG_REPO_ORG }}/${{ vars.VEDA_CONFIG_REPO_NAME }}.git
-
-      # Creates a PR in veda-config with the following changes:
-      # 1. the mdx files for all published collections
-      # 2. updates the stac/raster urls in .env file
-      # This step needs a GH_TOKEN that has permissions to create a PR in veda-config
-      - name: Create PR with changes
-        id: create-pr
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          VEDA_CONFIG_GH_TOKEN: ${{ secrets.VEDA_CONFIG_GH_TOKEN }}
-          COMMENT_ID: ${{ steps.publish-collections.outputs.COMMENT_ID }}
-          PUBLISHED_COLLECTION_FILES: ${{ steps.publish-collections.outputs.success_collections }}
-        run: |
-          files_string=$(IFS=$'\n'; echo "${PUBLISHED_COLLECTION_FILES[*]}")
-          hash=$(echo -n "$files_string" | md5sum | cut -d ' ' -f 1)
-          NEW_BRANCH="add-dataset-$hash"
-          cd ${{ vars.VEDA_CONFIG_REPO_NAME }}
-          git fetch origin
-          if git ls-remote --exit-code --heads origin $NEW_BRANCH; then
-            git push origin --delete $NEW_BRANCH
-          fi
-          git checkout -b $NEW_BRANCH
-
-          # Update the env vars to staging based on env vars
-          sed -i "s|${{ vars.ENV_FROM }}|${{ vars.ENV_TO }}|g" .env
-          cp -r ../datasets/* datasets/
-          git add .
-          git commit -m "Add dataset(s)"
-          git push origin $NEW_BRANCH
-          PR_URL=$(GITHUB_TOKEN=$VEDA_CONFIG_GH_TOKEN gh pr create -H $NEW_BRANCH -B develop --title 'Add dataset [Automated workflow]' --body-file <(echo "Add datasets (Automatically created by Github action)"))
-
-          echo "PR_URL=$PR_URL" >> $GITHUB_OUTPUT
-          echo "PR creation succeeded"
-
-      # Updates the comment with a link to the above PR
-      - name: Update PR comment with PR creation result
-        if: success()
+      # If the workflow fails at any point, the PR comment will be updated
+      - name: Update PR comment on overall workflow failure
+        if: failure()
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
         run: |
-          PR_URL=${{ steps.create-pr.outputs.PR_URL }}
+          WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
           CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
           UPDATED_BODY="$CURRENT_BODY
 
-          **A PR has been created with the dataset configuration: 🗺️ [PR link]($PR_URL)**"
+          ** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**"
           gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
 
-      - name: Update PR comment on PR creation failure
-        if: failure() && steps.create-pr.outcome == 'failure'
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
-        run: |
-          CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
-          UPDATED_BODY="$CURRENT_BODY
+  create-mdx-files:
+    runs-on: ubuntu-latest
+    needs: dataset-publication-and-configuration
+    steps:
+      - name: Use output from dataset-publication-and-configuration
 
-          **Failed ❌ to create a PR with the dataset configuration. 😔 **"
-          gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
+        run: |
+          echo "The output from the previous step is: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }}"
 
-      # If the workflow fails at any point, the PR comment will be updated
-      - name: Update PR comment on overall workflow failure
-        if: failure() && steps.create-pr.outcome != 'failure'
+      # Creates a slim dataset mdx file for each collection based on the dataset config json
+      - name: Create dataset mdx for given collections
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
+          PUBLISHED_COLLECTION_FILES: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }}
+        run: echo "NO-OP step"
+        # run: |
+        #   pip install -r scripts/requirements.txt
+        #   for file in "${PUBLISHED_COLLECTION_FILES[@]}"
+        #   do
+        #     python3 scripts/mdx.py "$file"
+        #   done
+
+  open-veda-config-pr-notification:
+    runs-on: ubuntu-latest
+    needs: create-mdx-files
+    steps:
+      - name: Notify user to open a new PR
         run: |
-          WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-          CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
-          UPDATED_BODY="$CURRENT_BODY
+          echo "The action is complete. Please open a new Pull Request in veda-config for the changes."
 
-          ** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**"
-          gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
+  publish-to-prod-on-pr-merge:
+    if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Publish to production on PR merge
+        run: echo "NO-OP. This step runs when a PR is merged."
@@ -0,0 +1,31 @@
+{
+    "collection": "hls-swir-falsecolor-composite-TEST",
+    "title": "HLS SWIR FalseColor Composite",
+    "spatial_extent": {
+        "xmin": -156.75,
+        "ymin": 20.80,
+        "xmax": -156.55,
+        "ymax": 20.94
+    },
+    "temporal_extent": {
+        "startdate": "2023-08-08T00:00:00Z",
+        "enddate": "2023-08-08T23:59:59Z"
+    },
+    "data_type": "cog",
+    "license": "CC0-1.0",
+    "description": "HLS falsecolor composite imagery using Bands 12, 8A, and 4.",
+    "is_periodic": false,
+    "time_density": "day",
+    "sample_files": [
+        "s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-08_SWIR_falsecolor_cog.tif",
+        "s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-13_SWIR_falsecolor_cog.tif"
+    ],
+    "discovery_items": [
+        {
+            "discovery": "s3",
+            "prefix": "maui-fire/",
+            "bucket": "veda-data-store-staging",
+            "filename_regex": "(.*)SWIR_falsecolor(.*).tif$"
+        }
+    ]
+}