-
Notifications
You must be signed in to change notification settings - Fork 0
/
create-cluster.sh
211 lines (182 loc) · 10.6 KB
/
create-cluster.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
########## Cluster Creation ##########
# Script for helping setup the AWS parallel cluster
# This is not designed to be run as a script, but rather to be run line-by-line
# in a terminal. Some parts are not re-entrant either.
##### Basic AWS Setup #####
# On AWS Console:
# - Create new EC2 key pair "hpc-pcluster" and download the .pem file
# - Create new EBS volume (~1 GB/student, can be expanded later with some work)
# - Register an elastic IP with a hpc-pcluster=true tag
# - Register domain in Route 53
# - Set that domain's DNS to point to the elastic IP
# - Add a CSV to S3 for the user keys (first column is username, second column is public key)
# - Create a tarball with host private and public host keys and upload to S3:
# mkdir -p host-keys
# ssh-keygen -q -N "" -t rsa -b 4096 -f host-keys/ssh_host_rsa_key
# ssh-keygen -q -N "" -t ecdsa -f host-keys/ssh_host_ecdsa_key
# ssh-keygen -q -N "" -t ed25519 -f host-keys/ssh_host_ed25519_key
# cd host-keys && tar -czf ../host-keys.tar.gz * && cd ..
# - Run the CloudFormation stack at https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/create/review?stackName=pcluster-slurm-db&templateURL=https://us-east-1-aws-parallelcluster.s3.amazonaws.com/templates/1-click/serverless-database.yaml with the values:
# - Stack name: hpc-pcluster-slurm-db
# - Database cluster name: hpc-slurm-accounting-cluster
# - Sizing: 0.5 to 2
# - VPC: one generated by ParallelCluster below (out of order, I know...)
# - CIDR blocks of 10.0.200.0/24 and 10.0.201.0/24
# FUTURE TODO: don't use redundancy for the database since it is just for accounting and can be rebuilt easily (and will be half the cost)
# Update these with any changed values
VENV="$PWD/hpc-aws"
export AWS_PROFILE="bushj"
export AWS_DEFAULT_REGION="us-east-1"
AMI_IMAGE_ID="rocky-88"
DB_CF_NAME="hpc-pcluster-slurm-db"
DOMAIN_NAME="mucluster.com"
USER_KEYS_S3="s3://mu-hpc-pcluster/user-keys.csv"
HOST_KEYS_S3="s3://mu-hpc-pcluster/host-keys.tar.gz"
GRAFANA_CONFIG_S3="s3://mu-hpc-pcluster/grafana.tar.gz"
EBS_VOLUME_ID="vol-02c42f64eace590fa"
GRAFANA_SG_NAME="grafana-sg" # this just needs to be unique to this VPC
CONFIG_FILE="pcluster-config.yaml"
# Allow use of spot instances (only needs to be done once for an entire AWS account)
aws iam create-service-linked-role --aws-service-name spot.amazonaws.com
##### Tool Setup #####
# Install AWS ParallelCluster tools
if ! [ -d "$VENV" ]; then
python3 -m venv "$VENV"
fi
source "$VENV/bin/activate"
python3 -m pip install --upgrade "aws-parallelcluster"
# Install NVM and NodeJS (LTS version)
if ! [ -e "$HOME/.nvm/nvm.sh" ]; then
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.38.0/install.sh | bash
chmod ug+x "$HOME/.nvm/nvm.sh"
source "$HOME/.nvm/nvm.sh"
nvm install --lts
export NVM_DIR="$HOME/.nvm"
[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
fi
##### Create Config #####
# Explanation of choices in the following:
# OS: rocky8 - free version of RHEL 8 (like what centos8 should be, exactly what Expanse uses)
# However, no pre-built images of Rocky 8 are available, so we have to build our own
# Head Node: t3a.large - 2 vCPU, 30% + burtsable, 8 GB RAM, 0.0752 $/hr
# Uses AMD EPYC 7000 series processor similar to Expanse and Bridges-2
# Compute Node: c5ad.2xlarge - 8 vCPU, 16 GB RAM, 300 GB NVMe SSD, 0.344 $/hr (spot price 0.1634 $/hr)
# Uses AMD EPYC 7002 series processor exaclty like Expanse and Bridges-2
# NOTE: This is not EFA compatible so it will be slow for MPI (but all of the EFA compatible instances are WAY more expensive: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa.html#efa-instance-types)
# The "2" option at the end causes us to make all machines public (instead of the 1/default option which makes compute nodes private)
# Would like the compute fleet to be in private subnet, but that would cost $150+ for the semester
if ! [ -e "$CONFIG_FILE" ]; then
pcluster configure --config "$CONFIG_FILE"
# Options:
# us-east-1
# hpc-pcluster
# slurm
# rocky8
# t3a.large
# 1
# compute
# 1
# c5ad.2xlarge
# 24
# y
# us-east-1a
# 2
#---------
# Creates VPC but does not actually launch the cluster itself yet
fi
##### Build Rocky 8 image #####
# Adapted from https://ciq.com/blog/how-to-use-aws-parallelcluster-3-8-0-with-rocky-linux-8/
# Create rocky-88.yaml file as described there but needed to:
# - get AMI link according to https://rockylinux.org/cloud-images/ (correct region, arch, and not using LVM)
# - add Image.RootVolume.Size parameter since it was running out of room (set it to 42 GB, default was ~37 GB)
pcluster build-image --image-id "$AMI_IMAGE_ID" --image-configuration rocky-88.yaml
# Takes about an hour to build... check progress with:
#pcluster describe-image --image-id "$AMI_IMAGE_ID"
#pcluster list-images --image-status PENDING
# If it fails need to delete the CloudFormation stack
##### Update Config #####
if ! which yq >/dev/null 2>&1; then brew install yq; fi
if ! which jq >/dev/null 2>&1; then brew install jq; fi
# Note: AMI_ID is surrounded by quotes (pcluster does not support the --output text option)
AMI_ID="$(pcluster describe-image --image-id "$AMI_IMAGE_ID" --query 'ec2AmiInfo.amiId')"
#AMI_ID="$(pcluster list-images --image-status AVAILABLE --query "images[?imageId=='$AMI_IMAGE_ID'].ec2AmiInfo.amiId")"
yq -i '.Image.CustomAmi = '"$AMI_ID" "$CONFIG_FILE"
# Set IP address
ELASTIC_IPS="$(aws ec2 describe-addresses --filters "Name=tag:hpc-pcluster,Values=true" "Name=domain,Values=vpc" --query "Addresses[?NetworkInterfaceId == null].PublicIp")"
if [ "$ELASTIC_IPS" = "[]" ]; then echo "!!! No elastic IPs available !!!";
elif [ "$(jq length <<<"$ELASTIC_IPS")" -gt 1 ]; then
# FUTURE TODO
echo "!!! Multiple elastic IPs available !!!";
ELASTIC_IP="$(jq -r '.[0]' <<<"$ELASTIC_IPS")"
yq -i '.HeadNode.Networking.ElasticIp = "'"$ELASTIC_IP"'"' "$CONFIG_FILE"
else
ELASTIC_IP="$(jq -r '.[0]' <<<"$ELASTIC_IPS")"
yq -i '.HeadNode.Networking.ElasticIp = "'"$ELASTIC_IP"'"' "$CONFIG_FILE"
fi
# Add persistent EBS volume for /home
yq -i '.SharedStorage += [{
"MountDir": "/home",
"Name": "home",
"StorageType": "Ebs",
"EbsSettings": { "VolumeId": "'"$EBS_VOLUME_ID"'" }
}]' "$CONFIG_FILE"
# Add initialization scripts
REPO="$(git remote get-url origin | sed -E -e 's~^(git@[^:]+:|https?://[^/]+/)([[:graph:]]*).git~\2~')"
REPO_URL="https://raw.githubusercontent.com/$REPO/main"
HN_SETUP_SCRIPT="$REPO_URL/head-node-setup.sh"
yq -i '.HeadNode.CustomActions.OnNodeStart.Sequence += [{"Script":"'"$HN_SETUP_SCRIPT"'","Args":["'"$DOMAIN_NAME"'","'"$USER_KEYS_S3"'","'"$HOST_KEYS_S3"'"]}]' "$CONFIG_FILE"
function add_s3_access() {
S3_BUCKET="$(sed -E -e "s~^s3://([^/]*)/(.*)$~\1~" <<< "$1")"
S3_KEY="$(sed -E -e "s~^s3://([^/]*)/(.*)$~\2~" <<< "$1")"
yq -i '.HeadNode.Iam.S3Access += [{"BucketName":"'"$S3_BUCKET"'","KeyName":"'"$S3_KEY"'"}]' "$CONFIG_FILE"
}
add_s3_access "$USER_KEYS_S3"
add_s3_access "$HOST_KEYS_S3"
add_s3_access "$GRAFANA_CONFIG_S3"
CN_SETUP_SCRIPT="$REPO_URL/compute-node-setup.sh"
yq -i '.Scheduling.SlurmQueues[0].CustomActions.OnNodeStart.Sequence += [{"Script":"'"$CN_SETUP_SCRIPT"'"}]' "$CONFIG_FILE"
# Add custom prolog/epilog scripts
HN_CONFIG_SCRIPT="$REPO_URL/head-node-config.sh"
PROLOG="$REPO_URL/50_hpc_cluster_slurm_prolog"
EPILOG="$REPO_URL/50_hpc_cluster_slurm_epilog"
yq -i '.HeadNode.CustomActions.OnNodeConfigured.Sequence += [{"Script":"'"$HN_CONFIG_SCRIPT"'","Args":["'"$PROLOG"'","'"$EPILOG"'"]}]' "$CONFIG_FILE"
# All other configuration changes
yq -i '. *d load("pcluster-config-extras.yaml")' "$CONFIG_FILE"
# Integrate Accounting
DB_URI="$(aws cloudformation describe-stacks --stack-name "$DB_CF_NAME" --query "Stacks[0].Outputs[?OutputKey=='DatabaseHost'].OutputValue" --output text)"
DB_PORT="$(aws cloudformation describe-stacks --stack-name "$DB_CF_NAME" --query "Stacks[0].Outputs[?OutputKey=='DatabasePort'].OutputValue" --output text)"
DB_USERNAME="$(aws cloudformation describe-stacks --stack-name "$DB_CF_NAME" --query "Stacks[0].Outputs[?OutputKey=='DatabaseAdminUser'].OutputValue" --output text)"
DB_SECRET_ARN="$(aws cloudformation describe-stacks --stack-name "$DB_CF_NAME" --query "Stacks[0].Outputs[?OutputKey=='DatabaseSecretArn'].OutputValue" --output text)"
DB_SEC_GROUP="$(aws cloudformation describe-stacks --stack-name "$DB_CF_NAME" --query "Stacks[0].Outputs[?OutputKey=='DatabaseClientSecurityGroup'].OutputValue" --output text)"
yq -i '.HeadNode.Networking.AdditionalSecurityGroups += ["'"$DB_SEC_GROUP"'"]' "$CONFIG_FILE"
yq -i '.Scheduling.SlurmSettings.Database.Uri = "'"$DB_URI:$DB_PORT"'"' "$CONFIG_FILE"
yq -i '.Scheduling.SlurmSettings.Database.UserName = "'"$DB_USERNAME"'"' "$CONFIG_FILE"
yq -i '.Scheduling.SlurmSettings.Database.PasswordSecretArn = "'"$DB_SECRET_ARN"'"' "$CONFIG_FILE"
##### Setup Grafana #####
SUBNET_ID="$(yq ".HeadNode.Networking.SubnetId" "$CONFIG_FILE")"
VPC_ID="$(aws ec2 describe-subnets --subnet-ids "$SUBNET_ID" --query "Subnets[0].VpcId" --output text)"
GF_SEC_GROUP="$(aws ec2 create-security-group --group-name "$GRAFANA_SG_NAME" --description "Open HTTP/HTTPS ports" --vpc-id "$VPC_ID" --output text 2>/dev/null)"
if [ -n "$GF_SEC_GROUP" ]; then
# newly created security group
aws ec2 authorize-security-group-ingress --group-id "$GF_SEC_GROUP" --protocol tcp --port 443 --cidr 0.0.0.0/0
aws ec2 authorize-security-group-ingress --group-id "$GF_SEC_GROUP" --protocol tcp --port 80 --cidr 0.0.0.0/0
else
# already exists
GF_SEC_GROUP="$(aws ec2 describe-security-groups --filters "Name=group-name,Values=$GRAFANA_SG_NAME" "Name=vpc-id,Values=$VPC_ID" --query "SecurityGroups[0].GroupId" --output text)"
fi
yq -i '.HeadNode.Networking.AdditionalSecurityGroups += ["'"$GF_SEC_GROUP"'"]' "$CONFIG_FILE"
yq -i '.Tags += [{"Key":"Grafana","Value":"true"}]' "$CONFIG_FILE"
##### Create the cluster #####
pcluster create-cluster --cluster-name hpc-cluster --cluster-configuration "$CONFIG_FILE"
echo
echo "Cluster creation started. Visit CloudFormation console to monitor progress."
# Other commands:
# echo pcluster ssh --region us-east-1 --cluster-name hpc-cluster -i hpc-pcluster.pem
# pcluster describe-cluster --cluster-name hpc-cluster
# pcluster update-compute-fleet --cluster-name hpc-cluster --status STOP_REQUESTED
# pcluster update-cluster --cluster-name hpc-cluster --cluster-configuration "$CONFIG_FILE"
# pcluster update-compute-fleet --cluster-name hpc-cluster --status START_REQUESTED
# pcluster delete-cluster --cluster-name hpc-cluster
# Note: about 5 minutes to boot an instance, good to keep one instance always running
# TODO:
# job numbering after rebuild