dispatch job #372
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: dispatch job | |
on: | |
workflow_dispatch: | |
inputs: | |
repo: | |
description: 'The https github url for the recipe feedstock' | |
required: true | |
ref: | |
description: 'The tag or branch to target in your recipe repo' | |
required: true | |
default: 'main' | |
feedstock_subdir: | |
description: 'The subdir of the feedstock directory in the repo' | |
required: true | |
default: 'feedstock' | |
spark_params: | |
description: 'space delimited --conf values: https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/jobs-spark.html' | |
required: true | |
default: '--conf spark.executor.cores=16 --conf spark.executor.memory=60G --conf spark.executor.memoryOverhead=60G --conf spark.driver.memory=10G --conf spark.driver.memoryOverhead=4G --conf spark.shuffle.file.buffer=64k --conf spark.default.parallelism=1280 --conf spark.emr-serverless.executor.disk=200G --conf spark.hadoop.hive.metastore.client.factory.class=com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory --conf spark.emr-serverless.driverEnv.JAVA_HOME=/usr/lib/jvm/java-17-amazon-corretto.x86_64/ --conf spark.executorEnv.JAVA_HOME=/usr/lib/jvm/java-17-amazon-corretto.x86_64/' | |
job_name: | |
description: 'Name the EMR job' | |
required: true | |
permissions: | |
id-token: write # This is required for requesting the JWT | |
contents: read # This is required for actions/checkout | |
jobs: | |
name-job: | |
runs-on: ubuntu-latest | |
outputs: | |
repo_name: ${{ steps.string_manipulation.outputs.result }} | |
steps: | |
- name: manipuluate strings | |
id: string_manipulation | |
run: | | |
repo_name=$(basename -s .git "${{ github.event.inputs.repo }}") | |
echo "result=$repo_name" >> $GITHUB_OUTPUT | |
run-job: | |
if: contains('["ranchodeluxe","abarciauskas-bgse", "norlandrhagen", "sharkinsspatial", "moradology", "thodson-usgs"]', github.actor) | |
name: kickoff job ${{ needs.name-job.outputs.repo_name }}@${{ github.event.inputs.ref }} | |
needs: name-job | |
environment: veda-smce | |
outputs: | |
job_name: ${{ steps.report_ids.outputs.job_name }} | |
job_id: ${{ steps.report_ids.outputs.job_id }} | |
runs-on: ubuntu-latest | |
steps: | |
- name: checkout repository | |
uses: actions/checkout@v3 | |
- name: configure aws credentials | |
uses: aws-actions/configure-aws-credentials@v3 | |
with: | |
role-to-assume: arn:aws:iam::444055461661:role/github-actions-role-eodc | |
role-session-name: veda-pforge-run-job | |
role-duration-seconds: 3600 | |
aws-region: us-west-2 | |
- name: set up python 3.10 and cache pip deps | |
uses: actions/setup-python@v3 | |
with: | |
python-version: '3.10' | |
cache: 'pip' # caching pip dependencies | |
- run: pip install -r .github/workflows/requirements.txt | |
- name: echo inputs to user | |
run: | | |
echo "Manually triggered workflow": \ | |
${{ github.event.inputs.repo }} \ | |
${{ github.event.inputs.ref }} \ | |
${{ github.event.inputs.feedstock_subdir}} \ | |
${{ github.event.inputs.parallelism }} | |
- name: install zip | |
uses: montudor/action-zip@v1 | |
- name: submit job to emr serverless | |
id: executejob | |
continue-on-error: true | |
run: | | |
# TODO: make submit_spark_job.py or some other config.py checkout, build env and package on s3 | |
# before submission | |
python .github/workflows/submit_spark_job.py \ | |
--name=${{ github.event.inputs.job_name }} \ | |
--application-id="00firgpmjusj5e0l" \ | |
--execution-role-arn="arn:aws:iam::444055461661:role/veda-data-reader-dev" \ | |
--entry-point="s3://veda-pforge-emr-input-scripts-v4/runwrapper.py" \ | |
--entry-point-arguments="${{ github.event.inputs.repo }} ${{ github.event.inputs.ref }} ${{ github.event.inputs.feedstock_subdir }}" \ | |
--spark-submit-parameters="${{ github.event.inputs.spark_params }}" | |
env: | |
REPO: ${{ github.event.inputs.repo }} | |
REF: ${{ github.event.inputs.ref }} | |
FEEDSTOCK_SUBDIR: ${{ github.event.inputs.feedstock_subdir }} | |
PARALLELISM_OPTION: ${{ github.event.inputs.parallelism }} | |
JOB_NAME: ${{ github.event.inputs.job_name }} | |
- name: cleanup if submission failed | |
if: steps.executejob.outcome == 'failure' | |
run: | | |
echo "The previous command failed. Running cleanup logic..." | |
# force GH action to show failed result | |
exit 128 | |
- name: echo job metadata | |
id: report_ids | |
run: | | |
echo '############ CW DASHBOARD ################' | |
echo "https://us-west-2.console.aws.amazon.com/cloudwatch/home?region=us-west-2#dashboards/dashboard/veda-pangeo-forge-emr-jobs" | |
echo '############ JOB ID and JOB DASHBOARD ################' | |
python .github/workflows/submit_spark_job.py \ | |
--name="whatever" \ | |
--application-id="00firgpmjusj5e0l" \ | |
--execution-role-arn="arn:aws:iam::444055461661:role/veda-data-reader-dev" \ | |
--entry-point="s3://veda-pforge-emr-input-scripts-v4/runwrapper.py" \ | |
--workflow="getjob" | |
# monitor-job: | |
# runs-on: ubuntu-latest | |
# name: monitor job ${{ needs.name-job.outputs.repo_name }}@${{ github.event.inputs.ref }} | |
# environment: veda-smce | |
# needs: [name-job, run-job] | |
# steps: | |
# - name: Configure AWS credentials | |
# uses: aws-actions/configure-aws-credentials@v3 | |
# with: | |
# role-to-assume: arn:aws:iam::444055461661:role/github-actions-role-eodc | |
# role-session-name: veda-pforge-monitor-job | |
# role-duration-seconds: 43200 # note this has to match our timeout-minutes below for monitoring | |
# aws-region: us-west-2 |