-
Notifications
You must be signed in to change notification settings - Fork 2
133 lines (123 loc) · 5.33 KB
/
job-runner.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
name: dispatch job
on:
workflow_dispatch:
inputs:
repo:
description: 'The https github url for the recipe feedstock'
required: true
ref:
description: 'The tag or branch to target in your recipe repo'
required: true
default: 'main'
feedstock_subdir:
description: 'The subdir of the feedstock directory in the repo'
required: true
default: 'feedstock'
parallelism:
description: 'Number of partitions to divide the the Spark RDD into (usually equals [num-of-executors]*[num-of-vcpus])'
required: true
default: '1280'
job_name:
description: 'Name the EMR job'
required: true
permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
jobs:
name-job:
runs-on: ubuntu-latest
outputs:
repo_name: ${{ steps.string_manipulation.outputs.result }}
steps:
- name: manipuluate strings
id: string_manipulation
run: |
repo_name=$(basename -s .git "${{ github.event.inputs.repo }}")
echo "result=$repo_name" >> $GITHUB_OUTPUT
run-job:
if: contains('["ranchodeluxe","abarciauskas-bgse", "norlandrhagen", "sharkinsspatial", "moradology", "thodson-usgs"]', github.actor)
name: kickoff job ${{ needs.name-job.outputs.repo_name }}@${{ github.event.inputs.ref }}
needs: name-job
environment: veda-smce
outputs:
job_name: ${{ steps.report_ids.outputs.job_name }}
job_id: ${{ steps.report_ids.outputs.job_id }}
runs-on: ubuntu-latest
steps:
- name: checkout repository
uses: actions/checkout@v3
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v3
with:
role-to-assume: arn:aws:iam::444055461661:role/github-actions-role-eodc
role-session-name: veda-pforge-run-job
role-duration-seconds: 3600
aws-region: us-west-2
- name: set up python 3.10
uses: actions/setup-python@v3
with:
python-version: '3.10'
- name: echo inputs to user
run: |
echo "Manually triggered workflow": \
${{ github.event.inputs.repo }} \
${{ github.event.inputs.ref }} \
${{ github.event.inputs.feedstock_subdir}} \
${{ github.event.inputs.parallelism }}
- name: submit job to EMR serverless
id: executejob
continue-on-error: true
run: |
# TODO: make submit_spark_job.py or some other config.py checkout, build env and package on s3
# before submission
pip install boto3
python .github/workflows/submit_spark_job.py \
--name=${{ github.event.inputs.job_name }} \
--application-id="00firgpmjusj5e0l" \
--execution-role-arn="arn:aws:iam::444055461661:role/veda-data-reader-dev" \
--entry-point="s3://veda-pforge-emr-input-scripts-v4/runwrapper.py" \
--entry-point-arguments="${{ github.event.inputs.repo }} ${{ github.event.inputs.ref }} ${{ github.event.inputs.feedstock_subdir }}" \
--spark-submit-parameters="--conf spark.executor.cores=16 --conf spark.executor.memory=60G --conf spark.executor.memoryOverhead=60G --conf spark.driver.memory=10G --conf spark.driver.memoryOverhead=4G --conf spark.shuffle.file.buffer=64k --conf spark.default.parallelism=${{ github.event.inputs.ref }} --conf spark.emr-serverless.executor.disk=200G"
env:
REPO: ${{ github.event.inputs.repo }}
REF: ${{ github.event.inputs.ref }}
FEEDSTOCK_SUBDIR: ${{ github.event.inputs.feedstock_subdir }}
PARALLELISM_OPTION: ${{ github.event.inputs.parallelism }}
JOB_NAME: ${{ github.event.inputs.job_name }}
- name: cleanup if submission failed
if: steps.executejob.outcome == 'failure'
run: |
echo "The previous command failed. Running cleanup logic..."
# force GH action to show failed result
exit 128
#
# - name: echo JobID, JobName, FlinkDashboard to user
# id: report_ids
# run: |
# # TODO: we also need to report historyserver URL and flink dashboard URL
# # but this also requires us to think how we're going to have a thin
# # layer of authentication around these services so they aren't totally public
# echo '############ RECIPE JOB NAME ################'
# echo $RECIPE_JOB_NAME
# echo '############ FLINK JOB NAME ################'
# echo $JOB_NAME
# echo "job_name=$JOB_NAME" >> $GITHUB_OUTPUT
# echo '############ JOB ID ################'
# echo $JOB_ID
# echo "job_id=$JOB_ID" >> $GITHUB_OUTPUT
# echo '############ FLINK DASHBOARD ################'
# echo $FLINK_DASH
# echo "flink_dash=$FLINK_DASH" >> $GITHUB_OUTPUT
# monitor-job:
# runs-on: ubuntu-latest
# name: monitor job ${{ needs.name-job.outputs.repo_name }}@${{ github.event.inputs.ref }}
# environment: veda-smce
# needs: [name-job, run-job]
# steps:
# - name: Configure AWS credentials
# uses: aws-actions/configure-aws-credentials@v3
# with:
# role-to-assume: arn:aws:iam::444055461661:role/github-actions-role-eodc
# role-session-name: veda-pforge-monitor-job
# role-duration-seconds: 43200 # note this has to match our timeout-minutes below for monitoring
# aws-region: us-west-2