-
Notifications
You must be signed in to change notification settings - Fork 2
136 lines (125 loc) · 5.39 KB
/
job-runner.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
name: dispatch job
on:
workflow_dispatch:
inputs:
repo:
description: 'The https github url for the recipe feedstock'
required: true
ref:
description: 'The tag or branch to target in your recipe repo'
required: true
default: 'main'
feedstock_subdir:
description: 'The subdir of the feedstock directory in the repo'
required: true
default: 'feedstock'
parallelism:
description: 'Number of partitions to divide the the Spark RDD into (usually equals [num-of-executors]*[num-of-vcpus])'
required: true
default: '1280'
job_name:
description: 'Name the EMR job'
required: true
permissions:
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
jobs:
name-job:
runs-on: ubuntu-latest
outputs:
repo_name: ${{ steps.string_manipulation.outputs.result }}
steps:
- name: manipuluate strings
id: string_manipulation
run: |
repo_name=$(basename -s .git "${{ github.event.inputs.repo }}")
echo "result=$repo_name" >> $GITHUB_OUTPUT
run-job:
if: contains('["ranchodeluxe","abarciauskas-bgse", "norlandrhagen", "sharkinsspatial", "moradology", "thodson-usgs"]', github.actor)
name: kickoff job ${{ needs.name-job.outputs.repo_name }}@${{ github.event.inputs.ref }}
needs: name-job
environment: veda-smce
outputs:
job_name: ${{ steps.report_ids.outputs.job_name }}
job_id: ${{ steps.report_ids.outputs.job_id }}
runs-on: ubuntu-latest
steps:
- name: checkout repository
uses: actions/checkout@v3
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v3
with:
role-to-assume: arn:aws:iam::444055461661:role/github-actions-role-eodc
role-session-name: veda-pforge-run-job
role-duration-seconds: 3600
aws-region: us-west-2
- name: set up python 3.10
uses: actions/setup-python@v3
with:
python-version: '3.10'
- name: echo inputs to user
run: |
echo "Manually triggered workflow": \
${{ github.event.inputs.repo }} \
${{ github.event.inputs.ref }} \
${{ github.event.inputs.feedstock_subdir}} \
${{ github.event.inputs.parallelism }}
- name: pip install requirements
run: |
pip install boto3 tenacity
- name: submit job to EMR serverless
id: executejob
continue-on-error: true
run: |
# TODO: make submit_spark_job.py or some other config.py checkout, build env and package on s3
# before submission
python .github/workflows/submit_spark_job.py \
--name=${{ github.event.inputs.job_name }} \
--application-id="00firgpmjusj5e0l" \
--execution-role-arn="arn:aws:iam::444055461661:role/veda-data-reader-dev" \
--entry-point="s3://veda-pforge-emr-input-scripts-v4/runwrapper.py" \
--entry-point-arguments="${{ github.event.inputs.repo }} ${{ github.event.inputs.ref }} ${{ github.event.inputs.feedstock_subdir }}" \
--spark-submit-parameters="--conf spark.executor.cores=16 --conf spark.executor.memory=60G --conf spark.executor.memoryOverhead=60G --conf spark.driver.memory=10G --conf spark.driver.memoryOverhead=4G --conf spark.shuffle.file.buffer=64k --conf spark.default.parallelism=${{ github.event.inputs.ref }} --conf spark.emr-serverless.executor.disk=200G"
env:
REPO: ${{ github.event.inputs.repo }}
REF: ${{ github.event.inputs.ref }}
FEEDSTOCK_SUBDIR: ${{ github.event.inputs.feedstock_subdir }}
PARALLELISM_OPTION: ${{ github.event.inputs.parallelism }}
JOB_NAME: ${{ github.event.inputs.job_name }}
- name: cleanup if submission failed
if: steps.executejob.outcome == 'failure'
run: |
echo "The previous command failed. Running cleanup logic..."
# force GH action to show failed result
exit 128
#
# - name: echo JobID, JobName, FlinkDashboard to user
# id: report_ids
# run: |
# # TODO: we also need to report historyserver URL and flink dashboard URL
# # but this also requires us to think how we're going to have a thin
# # layer of authentication around these services so they aren't totally public
# echo '############ RECIPE JOB NAME ################'
# echo $RECIPE_JOB_NAME
# echo '############ FLINK JOB NAME ################'
# echo $JOB_NAME
# echo "job_name=$JOB_NAME" >> $GITHUB_OUTPUT
# echo '############ JOB ID ################'
# echo $JOB_ID
# echo "job_id=$JOB_ID" >> $GITHUB_OUTPUT
# echo '############ FLINK DASHBOARD ################'
# echo $FLINK_DASH
# echo "flink_dash=$FLINK_DASH" >> $GITHUB_OUTPUT
# monitor-job:
# runs-on: ubuntu-latest
# name: monitor job ${{ needs.name-job.outputs.repo_name }}@${{ github.event.inputs.ref }}
# environment: veda-smce
# needs: [name-job, run-job]
# steps:
# - name: Configure AWS credentials
# uses: aws-actions/configure-aws-credentials@v3
# with:
# role-to-assume: arn:aws:iam::444055461661:role/github-actions-role-eodc
# role-session-name: veda-pforge-monitor-job
# role-duration-seconds: 43200 # note this has to match our timeout-minutes below for monitoring
# aws-region: us-west-2