feat: add automated publishing workflow with no-ops (#185)

* feat: add automated publishing workflow with no-ops * fix: caching * Update .github/workflows/pr.yml Co-authored-by: Alexandra Kirk <[email protected]> * Update .github/workflows/pr.yml Co-authored-by: Alexandra Kirk <[email protected]> * Update .github/workflows/pr.yml Co-authored-by: Alexandra Kirk <[email protected]> * Update .github/workflows/pr.yml Co-authored-by: Alexandra Kirk <[email protected]> * Update .github/workflows/pr.yml Co-authored-by: Alexandra Kirk <[email protected]> --------- Co-authored-by: Alexandra Kirk <[email protected]>
NASA-IMPACT · Nov 20, 2024 · 4d38db5 · 4d38db5
1 parent 951f9f0
commit 4d38db5
Show file tree

Hide file tree

Showing 2 changed files with 275 additions and 0 deletions.
diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml
@@ -0,0 +1,244 @@
+# This GitHub Actions workflow automates the process of
+# publishing dataset collections to a staging environment
+# It is triggered by a pull request to the main branch
+# that modifies any files within the ingestion-data/dataset-config/ directory
+# The workflow includes steps to
+#   - publish the datasets,
+#   - constantly updates the status of the workflow in the PR comment
+
+name: Publish collection to staging
+
+on:
+  pull_request:
+    branches: ['main']
+    paths:
+      # Run the workflow only if files inside this path are updated
+      # - ingestion-data/staging/dataset-config/*
+      - ingestion-data/testing/dataset-config/*
+
+  push:
+    branches:
+      - main
+
+permissions:
+  pull-requests: write
+  contents: read
+
+jobs:
+  publish-new-datasets:
+    if: ${{ github.event_name == 'pull_request' && (github.event.action == 'synchronize' || github.event.action == 'opened') }}
+    runs-on: ubuntu-latest
+    environment: staging
+    outputs:
+      publishedCollections: ${{ steps.publish-collections.outputs.success_collections }}
+    steps:
+      - uses: actions/checkout@v4
+
+      # Initializes the PR comment
+      # Edits existing or creates new comment
+      # Why? - Cleanliness!
+      - name: Initialize PR comment with workflow start
+        id: init-comment
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+          body="### Workflow Status
+          **Starting workflow...** [View action run]($WORKFLOW_URL)"
+
+          # Get the PR number
+          PR_NUMBER=${{ github.event.pull_request.number }}
+
+          # Fetch existing comments
+          COMMENTS=$(gh api repos/${{ github.repository }}/issues/${PR_NUMBER}/comments --jq '.[] | select(.body | contains("### Workflow Status")) | {id: .id, body: .body}')
+
+          # Check if a comment already exists
+          COMMENT_ID=$(echo "$COMMENTS" | jq -r '.id' | head -n 1)
+
+          if [ -z "$COMMENT_ID" ]; then
+            # No existing comment, create a new one
+            COMMENT_ID=$(gh api repos/${{ github.repository }}/issues/${PR_NUMBER}/comments -f body="$body" --jq '.id')
+          else
+            # Comment exists, overwrite the existing comment
+            gh api repos/${{ github.repository }}/issues/comments/$COMMENT_ID -X PATCH -f body="$body"
+          fi
+
+          echo "COMMENT_ID=$COMMENT_ID" >> $GITHUB_OUTPUT
+
+      # Find only the newly added files
+      # Only .json files
+      # The files are outputted to GITHUB_OUTPUT, which can be used in subsequent steps
+      - name: Get newly added files
+        id: changed-files
+        uses: tj-actions/changed-files@v45
+        with:
+          files: |
+            **.json
+
+      - name: List all newly added files
+        env:
+          ADDED_FILES: ${{ steps.changed-files.outputs.added_files }}
+        run: |
+          for file in ${ADDED_FILES}; do
+            echo "$file was added"
+          done
+
+      # Uses service client creds to get token
+      # No username/password needed
+      - name: Get auth token
+        id: get-token
+        run: |
+          echo "Vars: $vars"
+          response=$(curl -X POST \
+            ${{ vars.STAGING_COGNITO_DOMAIN }}/oauth2/token \
+            -H "Content-Type: application/x-www-form-urlencoded" \
+            -d "grant_type=client_credentials" \
+            -d "client_id=${{ vars.STAGING_CLIENT_ID }}" \
+            -d "client_secret=${{ secrets.STAGING_CLIENT_SECRET }}"
+          )
+
+          access_token=$(echo "$response" | jq -r '.access_token')
+          echo "ACCESS_TOKEN=$access_token" >> $GITHUB_OUTPUT
+
+      # Makes request to /dataset/publish endpoint
+      # Outputs only files that were successfully published
+      # Used by other steps
+      # If none of the requests are successful, workflow fails
+      # Updates the PR comment with status of collection publication
+      - name: Publish all newly added collections to staging
+        id: publish-collections
+        env:
+          ADDED_FILES: ${{ steps.changed-files.outputs.added_files }}
+          WORKFLOWS_URL: ${{ vars.STAGING_WORKFLOWS_URL }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          AUTH_TOKEN: ${{ steps.get-token.outputs.ACCESS_TOKEN }}
+          COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
+        run: |
+          if [ -z "$WORKFLOWS_URL" ]; then
+            echo "WORKFLOWS_URL is not set"
+            exit 1
+          fi
+
+          if [ -z "$AUTH_TOKEN" ]; then
+            echo "AUTH_TOKEN is not set"
+            exit 1
+          fi
+
+          publish_url="${WORKFLOWS_URL%/}/dataset/publish"
+          bearer_token=$AUTH_TOKEN
+
+          # Track successful publications
+          all_failed=true
+          success_collections=()
+          status_message='### Collection Publication Status
+          '
+
+          for file in "${ADDED_FILES[@]}"; do
+            echo $file
+            if [ -f "$file" ]; then
+              dataset_config=$(jq '.' "$file")
+              collection_id=$(jq -r '.collection' "$file")
+
+              response=$(curl -s -w "%{http_code}" -o response.txt -X POST "$publish_url" \
+                -H "Content-Type: application/json" \
+                -H "Authorization: Bearer $AUTH_TOKEN" \
+                -d "$dataset_config"
+              )
+
+              status_code=$(tail -n1 <<< "$response")
+
+              # Update status message based on response code
+              if [ "$status_code" -eq 200 ] || [ "$status_code" -eq 201 ]; then
+                echo "$collection_id successfully published ✅"
+                status_message+="- **$collection_id**: Successfully published ✅
+                "
+                success_collections+=("$file")
+                all_failed=false
+              else
+                echo "$collection_id failed to publish ❌"
+                status_message+="- **$collection_id**: Failed to publish. Error code $status_code. ❌
+                "
+              fi
+            else
+              echo "File $file does not exist"
+              exit 1
+            fi
+          done
+
+          # Exit workflow if all the requests fail
+          if [ "$all_failed" = true ]; then
+            echo "All collections failed to publish."
+            exit 1
+          fi
+
+          # Output only successful collections to be used in subsequent steps
+          echo "success_collections=$(IFS=','; echo "${success_collections[*]}")" >> $GITHUB_OUTPUT
+
+          # Update PR comment
+          CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
+          UPDATED_BODY="$CURRENT_BODY
+
+          $status_message"
+          gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.9'
+      - uses: actions/cache@v4
+        with:
+          path: ${{ env.pythonLocation }}
+          key: ${{ env.pythonLocation }}-pip-${{ hashFiles('requirements.txt') }}
+
+      # If the workflow fails at any point, the PR comment will be updated
+      - name: Update PR comment on overall workflow failure
+        if: failure()
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
+        run: |
+          WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+          CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
+          UPDATED_BODY="$CURRENT_BODY
+
+          ** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**"
+          gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
+
+  create-mdx-files:
+    runs-on: ubuntu-latest
+    needs: publish-new-datasets
+    steps:
+      - name: Use output from dataset-publication-and-configuration
+
+        run: |
+          echo "The output from the previous step is: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }}"
+
+      # Creates a slim dataset mdx file for each collection based on the dataset config json
+      - name: Create dataset mdx for given collections
+        env:
+          PUBLISHED_COLLECTION_FILES: ${{ needs.dataset-publication-and-configuration.outputs.publishedCollections }}
+        run: echo "NO-OP step"
+        # run: |
+        #   pip install -r scripts/requirements.txt
+        #   for file in "${PUBLISHED_COLLECTION_FILES[@]}"
+        #   do
+        #     python3 scripts/mdx.py "$file"
+        #   done
+
+  open-veda-config-pr:
+    runs-on: ubuntu-latest
+    needs: create-mdx-files
+    steps:
+      - name: Open veda-config PR
+        run: |
+          echo "NO-OP. Placeholder for future job that will open a Pull Request in veda-config for a dashboard preview for the new/changed datasets."
+
+  publish-to-prod-on-pr-merge:
+    if: ${{ github.event_name == 'pull_request' && github.event.action == 'closed' && github.event.pull_request.merged == true }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Publish to production on PR merge
+        run: echo "NO-OP. This step runs when a PR is merged."
diff --git a/ingestion-data/testing/dataset-config/test.json b/ingestion-data/testing/dataset-config/test.json
@@ -0,0 +1,31 @@
+{
+    "collection": "hls-swir-falsecolor-composite-TEST",
+    "title": "HLS SWIR FalseColor Composite",
+    "spatial_extent": {
+        "xmin": -156.75,
+        "ymin": 20.80,
+        "xmax": -156.55,
+        "ymax": 20.94
+    },
+    "temporal_extent": {
+        "startdate": "2023-08-08T00:00:00Z",
+        "enddate": "2023-08-08T23:59:59Z"
+    },
+    "data_type": "cog",
+    "license": "CC0-1.0",
+    "description": "HLS falsecolor composite imagery using Bands 12, 8A, and 4.",
+    "is_periodic": false,
+    "time_density": "day",
+    "sample_files": [
+        "s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-08_SWIR_falsecolor_cog.tif",
+        "s3://veda-data-store-staging/maui-fire/Lahaina_HLS_2023-08-13_SWIR_falsecolor_cog.tif"
+    ],
+    "discovery_items": [
+        {
+            "discovery": "s3",
+            "prefix": "maui-fire/",
+            "bucket": "veda-data-store-staging",
+            "filename_regex": "(.*)SWIR_falsecolor(.*).tif$"
+        }
+    ]
+}