forked from douglaz/ignition-core
-
Notifications
You must be signed in to change notification settings - Fork 7
/
remote_hook.sh
executable file
·131 lines (106 loc) · 5.28 KB
/
remote_hook.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/bin/bash
# We suppose we are in a subdirectory of the root project
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
JOB_NAME="${1:?Please give a Job Name}"
JOB_DATE="${2?Please give the Job Date}"
JOB_TAG="${3?Please give the Job Tag}"
JOB_USER="${4?Please give the Job User}"
CONTROL_DIR="${5?Please give the Control Directory}"
SPARK_MEM_PARAM="${6?Please give the Job Memory Size to use}"
USE_YARN="${7?Please tell if we should use YARN (yes/no)}"
NOTIFY_ON_ERRORS="${8?Please tell if we will notify on errors (yes/no)}"
DRIVER_HEAP_SIZE="${9?Please tell driver heap size to use}"
shift 9
JOB_WITH_TAG=${JOB_NAME}.${JOB_TAG}
JOB_CONTROL_DIR="${CONTROL_DIR}/${JOB_WITH_TAG}"
MY_USER=$(whoami)
# Avoids problems when another user created our control dir
sudo mkdir -p "${JOB_CONTROL_DIR}"
sudo chown $MY_USER "${JOB_CONTROL_DIR}"
RUNNING_FILE="${JOB_CONTROL_DIR}/RUNNING"
# This should be the first thing in the script to avoid the wait remote job thinking we died
echo $$ > "${RUNNING_FILE}"
# Let us read the spark home even when the image doesn't give us the permission
sudo chmod o+rx /home/ec2-user
sudo chmod -R o+rx /home/ec2-user/spark
mkdir -p /media/tmp/spark-events
notify_error_and_exit() {
description="${1}"
echo "Exiting because: ${description}"
echo "${description}" > "${JOB_CONTROL_DIR}/FAILURE"
# Do not notify if NOTIFY_ON_ERRORS is different from yes
[[ "${NOTIFY_ON_ERRORS}" != 'yes' ]] && exit 1
# TODO: create some generic method to notify on errors
exit 1
}
get_first_present() {
for dir in "$@"; do
if [[ -e "$dir" ]]; then
echo $dir
break
fi
done
}
on_trap_exit() {
rm -f "${JOB_CONTROL_DIR}"/*.jar
rm -f "${RUNNING_FILE}"
}
install_and_run_zeppelin() {
if [[ ! -d "zeppelin" ]]; then
wget "http://www-us.apache.org/dist/zeppelin/zeppelin-0.8.0/zeppelin-0.8.0-bin-all.tgz" -O zeppelin.tar.gz
mkdir zeppelin
tar xvzf zeppelin.tar.gz -C zeppelin --strip-components 1 > /tmp/zeppelin_install.log
fi
if [[ -f "zeppelin/bin/zeppelin.sh" ]]; then
export MASTER="${JOB_MASTER}"
export ZEPPELIN_PORT="8081"
export SPARK_HOME=$(get_first_present /root/spark /opt/spark ~/spark*/)
export SPARK_SUBMIT_OPTIONS="--jars ${JAR_PATH} --executor-memory ${SPARK_MEM_PARAM}"
zeppelin/bin/zeppelin.sh
else
notify_error_and_exit "Zeppelin installation not found"
fi
}
install_and_run_jupyter() {
sudo yum -y install python3 python3-pip
sudo pip3 install jupyter pandas boto3 matplotlib numpy sklearn scipy toree
export SPARK_HOME=$(get_first_present /root/spark /opt/spark ~/spark*/)
export HADOOP_HOME=$(get_first_present /root/hadoop /opt/hadoop ~/hadoop*/)
export SPARK_CONF_DIR="${SPARK_HOME}/conf"
export HADOOP_CONF_DIR="${HADOOP_HOME}/conf"
export JOB_MASTER=${MASTER:-spark://${SPARK_MASTER_HOST}:7077}
export PYSPARK_PYTHON=$(which python3)
export PYSPARK_DRIVER_PYTHON=$(which jupyter)
export PYSPARK_DRIVER_PYTHON_OPTS="notebook --allow-root --ip=${SPARK_MASTER_HOST} --no-browser --port=8888"
sudo $(which jupyter) toree install --spark_home="${SPARK_HOME}" --spark_opts="--master ${JOB_MASTER} --executor-memory ${SPARK_MEM_PARAM} --driver-memory ${DRIVER_HEAP_SIZE}"
${SPARK_HOME}/bin/pyspark --master "${JOB_MASTER}" --executor-memory "${SPARK_MEM_PARAM}" --driver-memory "${DRIVER_HEAP_SIZE}"
}
trap "on_trap_exit" EXIT
SPARK_HOME=$(get_first_present /root/spark /opt/spark ~/spark*/)
source "${SPARK_HOME}/conf/spark-env.sh"
MAIN_CLASS="ignition.jobs.Runner"
cd "${DIR}" || notify_error_and_exit "Internal script error for job ${JOB_WITH_TAG}"
JAR_PATH_SRC=$(ls -t "${DIR}"/*assembly*.jar | head -1) # most recent jar
JAR_PATH="${JOB_CONTROL_DIR}/Ignition.jar"
cp ${JAR_PATH_SRC} ${JAR_PATH}
# If no $MASTER, then build a url using $SPARK_MASTER_HOST
export JOB_MASTER=${MASTER:-spark://${SPARK_MASTER_HOST}:7077}
if [[ "${USE_YARN}" == "yes" ]]; then
export YARN_MODE=true
export JOB_MASTER="yarn-client"
export SPARK_JAR=$(echo ${SPARK_HOME}/assembly/target/scala-*/spark-assembly-*.jar)
export SPARK_YARN_APP_JAR=${JAR_PATH}
export SPARK_WORKER_MEMORY=${SPARK_MEM_PARAM}
fi
if [[ "${JOB_NAME}" == "shell" ]]; then
${SPARK_HOME}/bin/spark-shell --master "${JOB_MASTER}" --jars ${JAR_PATH} --driver-memory "${DRIVER_HEAP_SIZE}" --driver-java-options "-Djava.io.tmpdir=/media/tmp -verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps" --executor-memory "${SPARK_MEM_PARAM}" || notify_error_and_exit "Execution failed for shell"
elif [[ "${JOB_NAME}" == "zeppelin" ]]; then
install_and_run_zeppelin
elif [[ "${JOB_NAME}" == "jupyter" ]]; then
install_and_run_jupyter
else
JOB_OUTPUT="${JOB_CONTROL_DIR}/output.log"
tail -F "${JOB_OUTPUT}" &
${SPARK_HOME}/bin/spark-submit --master "${JOB_MASTER}" --driver-memory "${DRIVER_HEAP_SIZE}" --driver-java-options "-Djava.io.tmpdir=/media/tmp -verbose:gc -XX:-PrintGCDetails -XX:+PrintGCTimeStamps " --class "${MAIN_CLASS}" ${JAR_PATH} "${JOB_NAME}" --runner-date "${JOB_DATE}" --runner-tag "${JOB_TAG}" --runner-user "${JOB_USER}" --runner-master "${JOB_MASTER}" --runner-executor-memory "${SPARK_MEM_PARAM}" "$@" >& "${JOB_OUTPUT}" || notify_error_and_exit "Execution failed for job ${JOB_WITH_TAG}"
fi
touch "${JOB_CONTROL_DIR}/SUCCESS"