-
Notifications
You must be signed in to change notification settings - Fork 4
/
startrhacm.sh
executable file
·345 lines (323 loc) · 16.2 KB
/
startrhacm.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
#! /bin/bash
set -e
OS=$(uname -s | tr '[:upper:]' '[:lower:]')
BASE64="base64 -w 0"
if [ "${OS}" == "darwin" ]; then
BASE64="base64"
fi
# Helper function to format logs
function printlog() {
case ${1} in
title) printf "\n##### "
;;
info) printf "* "
;;
error) printf "^^^^^ "
;;
*) printlog error "Unexpected error in printlog function. Invalid input given: ${1}"
exit 1
;;
esac
printf "${2}\n"
}
# Helper function to check exports
function checkexports() {
if [[ -z "${LIFEGUARD_PATH}" ]]; then
printlog error "LIFEGUARD_PATH not defined. Please set LIFEGUARD_PATH to the local path of the Lifeguard repo."
exit 1
else
if (! ls ${LIFEGUARD_PATH} &>/dev/null); then
printlog error "Error getting to Lifeguard repo. Is LIFEGUARD_PATH set properly? Currently it's set to: ${LIFEGUARD_PATH}"
exit 1
fi
fi
if [[ -z "${RHACM_PIPELINE_PATH}" ]]; then
printlog error "RHACM_PIPELINE_PATH not defined. Please set RHACM_PIPELINE_PATH to the local path of the Pipeline repo."
exit 1
else
if (! ls ${RHACM_PIPELINE_PATH} &>/dev/null); then
printlog error "Error getting to Pipeline repo. Is RHACM_PIPELINE_PATH set properly? Currently it's set to: ${RHACM_PIPELINE_PATH}"
exit 1
fi
fi
if [[ -z "${RHACM_DEPLOY_PATH}" ]]; then
printlog error "RHACM_DEPLOY_PATH not defined. Please set RHACM_DEPLOY_PATH to the local path of the Deploy repo."
exit 1
else
if (! ls ${RHACM_DEPLOY_PATH} &>/dev/null); then
printlog error "Error getting to Deploy repo. Is RHACM_DEPLOY_PATH set properly? Currently it's set to: ${RHACM_DEPLOY_PATH}"
exit 1
fi
fi
}
# Helper function to query Quay for snapshot tags
function queryquay() {
QUAY_ORGANIZATION=${1}
printlog info "Searching Quay for tag ${RHACM_SNAPSHOT}"
# Store user-specified snapshot for logging
if [[ -n "${RHACM_SNAPSHOT}" ]]; then
USER_SNAPSHOT="${RHACM_SNAPSHOT}"
RHACM_SNAPSHOT=""
fi
# Iterate over the all the pages of the repo
HAS_ADDITIONAL="true"
i=0
while [[ "${HAS_ADDITIONAL}" == "true" ]] && [[ -z "${RHACM_SNAPSHOT}" ]]; do
((i=i+1))
HAS_ADDITIONAL=$(curl -s "https://quay.io/api/v1/repository/${QUAY_ORGANIZATION}/acm-custom-registry/tag/?onlyActiveTags=true&page=${i}" | jq -r '.has_additional')
SNAPSHOT_TAGS=$(curl -s "https://quay.io/api/v1/repository/${QUAY_ORGANIZATION}/acm-custom-registry/tag/?onlyActiveTags=true&page=${i}&specificTag=${USER_SNAPSHOT}" | jq -r '.tags[].name')
if [[ -z "${SNAPSHOT_TAGS}" ]]; then
break
fi
if [[ -n "${USER_SNAPSHOT}" ]]; then
RHACM_SNAPSHOT=$(echo "${SNAPSHOT_TAGS}" | head -n 1)
else
RHACM_SNAPSHOT=$(echo "${SNAPSHOT_TAGS}" | grep -v "^v\|nonesuch\|-$" | sort -r --version-sort | grep -F "${RHACM_VERSION}" | grep -F "${BRANCH}."| head -n 1)
fi
done
if [[ -z "${RHACM_SNAPSHOT}" ]]; then
printlog error "Error querying snapshot list--nothing was returned. Please check https://quay.io/api/v1/repository/${QUAY_ORGANIZATION}/acm-custom-registry/tag/, your network connection, and any conflicts in your exports:"
printlog error "Query used: RHACM_SNAPSHOT: '${USER_SNAPSHOT}' RHACM_VERSION: '${RHACM_VERSION}' RHACM_BRANCH '${RHACM_BRANCH}'"
return 1
fi
}
# Load configuration
printlog title "Loading configuration from utils/config.sh"
SCRIPT_DIR=$(dirname "${BASH_SOURCE[0]}")
if ls ${SCRIPT_DIR}/utils/config.sh &>/dev/null; then
if (! ${SCRIPT_DIR}/utils/config.sh); then
printlog error "Error running configuration script. Is the script executable? If not, run: chmod +x ${SCRIPT_DIR}/utils/config.sh"
exit 1
else
source ${SCRIPT_DIR}/utils/config.sh
checkexports
fi
else
printlog info "config.sh script not found--checking exports for LIFEGUARD_PATH, RHACM_PIPELINE_PATH, and RHACM_DEPLOY_PATH"
printlog info "(Location checked for script: ${SCRIPT_DIR}/utils/config.sh)"
checkexports
fi
# Verify we're pointed to the collective cluster
if [[ "${DISABLE_CLUSTER_CHECK}" != "true" ]]; then
CLUSTER=$(oc config get-contexts | awk '/^\052/ {print $3}' | awk '{gsub("^api-",""); gsub("(\\/|-red-chesterfield).*",""); print}')
if [[ "${KUBECONFIG}" == */lifeguard/clusterclaims/*/kubeconfig ]]; then
printlog error "KUBECONFIG is set to an existing claim's configuration file. Please unset before continuing: unset KUBECONFIG"
exit 1
elif [[ "${CLUSTER}" != "collective-aws" ]] || (! oc status &>/dev/null); then
printlog info "The oc CLI is not currently logged in to the collective cluster. Please configure the CLI and try again."
printlog info "KUBECONFIG is currently set: $(test -n "${KUBECONFIG}" && echo "true" || echo "false")"
printlog info "Current cluster: ${CLUSTER}"
printlog info "Link to Collective cluster login command: https://oauth-openshift.apps.collective.aws.red-chesterfield.com/oauth/token/request"
exit 1
fi
else
printlog info "Cluster check has been disabled. Verifying login."
if (! oc status &>/dev/null); then
printlog error "Error verifying cluster login. Please make sure you're logged in to a ClusterPool cluster."
exit 1
fi
fi
# Check to see whether the ClusterPool meets the minimum size
if [[ -n "${CLUSTERPOOL_MIN_SIZE}" ]] && [[ -n "${CLUSTERPOOL_NAME}" ]] && [[ -n "${CLUSTERPOOL_TARGET_NAMESPACE}" ]]; then
# If a ClusterClaim name was specified and it already exists, we'll continue on without checking pool size since it'll patch it
if [[ -z "${CLUSTERCLAIM_NAME}" ]] || ( [[ -n "${CLUSTERCLAIM_NAME}" ]] && (! oc get clusterclaim.hive -n ${CLUSTERPOOL_TARGET_NAMESPACE} ${CLUSTERCLAIM_NAME} &>/dev/null) ); then
printlog title "Checking for pool size for ClusterPool ${CLUSTERPOOL_NAME}"
POOL_SIZE=$(oc get clusterpool.hive -n ${CLUSTERPOOL_TARGET_NAMESPACE} ${CLUSTERPOOL_NAME} -o jsonpath={.spec.size})
if (( POOL_SIZE < CLUSTERPOOL_MIN_SIZE )); then
printlog info "The ClusterPool size ${POOL_SIZE} does not meet the minimum of ${CLUSTERPOOL_MIN_SIZE}. Patching the ClusterPool to increase the size of the pool."
oc scale clusterpool.hive ${CLUSTERPOOL_NAME} -n ${CLUSTERPOOL_TARGET_NAMESPACE} --replicas=${CLUSTERPOOL_MIN_SIZE}
fi
fi
fi
# Claim cluster from ClusterPool
printlog title "Creating ClusterClaim from ClusterPool ${CLUSTERPOOL_NAME}"
CLAIM_DIR=${LIFEGUARD_PATH}/clusterclaims
cd ${CLAIM_DIR}
printlog info "Switching to main branch and updating repo (if this exits, check the state of the local Lifeguard repo)"
git checkout main &>/dev/null
git pull &>/dev/null
# Set lifetime of claim to end of work day
if [[ -n "${CLUSTERCLAIM_END_TIME}" ]]; then
printlog info "Setting CLUSTERCLAIM_LIFETIME to end at hour ${CLUSTERCLAIM_END_TIME} of a 24 hour clock"
if [[ -n "${CLUSTERCLAIM_NAME}" ]] && (oc get clusterclaim.hive "${CLUSTERCLAIM_NAME}" -n ${CLUSTERPOOL_TARGET_NAMESPACE} &>/dev/null); then
printlog error "Found existing claim with name ${CLUSTERCLAIM_NAME}, so its lifetime (which is based on its creation time) will not be recalculated."
export CLUSTERCLAIM_LIFETIME=$(oc get clusterclaim.hive ${CLUSTERCLAIM_NAME} -n ${CLUSTERPOOL_TARGET_NAMESPACE} -o jsonpath='{.spec.lifetime}')
printlog error "Using claim's existing lifetime of ${CLUSTERCLAIM_LIFETIME}. If a different lifetime is desired, please manually edit the claim."
else
export CLUSTERCLAIM_LIFETIME="$((${CLUSTERCLAIM_END_TIME}-$(date "+%-H")-1))h$((60-$(date "+%-M")))m"
fi
fi
./apply.sh
printlog title "Setting KUBECONFIG and checking cluster access"
# Save the current KUBECONFIG in case we need it
PREVIOUS_KUBECONFIG=${KUBECONFIG}
# If we have a ClusterClaim name, use that to get the kubeconfig, otherwise just get the most recently modified (which is most likely the one we need)
if [[ -n "${CLUSTERCLAIM_NAME}" ]]; then
export KUBECONFIG=$(ls ${CLAIM_DIR}/${CLUSTERCLAIM_NAME}/kubeconfig)
else
export KUBECONFIG=$(ls -dt1 ${CLAIM_DIR}/*/kubeconfig | head -n 1)
fi
# Set namespace context in case it wasn't set or we're inside a pod specifying a different namespace in env
oc config set-context --current --namespace=default
# Verify cluster access
ATTEMPTS=0
MAX_ATTEMPTS=15
INTERVAL=20
FAILED="false"
while (! oc status) && FAILED="true" && (( ATTEMPTS != MAX_ATTEMPTS )); do
printlog error "Error logging in to cluster. Trying again in ${INTERVAL}s (Retry $((++ATTEMPTS))/${MAX_ATTEMPTS})"
sleep ${INTERVAL}
FAILED="false"
done
if [[ "${FAILED}" == "true" ]]; then
printlog error "Failed to login to cluster. Exiting."
exit 1
fi
# Get snapshot
printlog title "Getting snapshot for RHACM (defaults to latest version -- override version with RHACM_VERSION)"
RHACM_BRANCH=${RHACM_BRANCH:-$(echo "${RHACM_VERSION}" | grep -o "[[:digit:]]\+\.[[:digit:]]\+" || true)} # Create Pipeline branch from version, if specified
# Get latest downstream snapshot from Quay if DOWNSTREAM is set to "true"
if [[ "${DOWNSTREAM}" == "true" ]]; then
printlog info "Getting downstream snapshot"
queryquay "acm-d"
# If DOWNSTREAM is not "true", get snapshot from pipeline repo (defaults to latest edge version)
else
if [[ -z "${RHACM_SNAPSHOT}" ]]; then
printlog info "Getting upstream snapshot"
cd ${RHACM_PIPELINE_PATH}
git pull &>/dev/null
BRANCH=${RHACM_BRANCH:-$(git remote show origin | grep -o " [0-8]\+\.[0-9]\+-" | sort -uV | tail -1 | grep -o "[0-9]\+\.[0-9]\+")}
VERSION_NUM=${RHACM_VERSION:=""}
PIPELINE_PHASE=${PIPELINE_PHASE:-"dev"}
# Handle older pipeline phases
if [[ "${BRANCH}" == "2."[0-4] ]]; then
case "${PIPELINE_PHASE}" in
dev|nightly)
PIPELINE_PHASE="edge"
;;
preview)
PIPELINE_PHASE="stable"
;;
esac
fi
printlog info "Updating repo and switching to the ${BRANCH}-${PIPELINE_PHASE} branch (if this exits, check the state of the local Pipeline repo)"
git checkout ${BRANCH}-${PIPELINE_PHASE} &>/dev/null
git pull &>/dev/null
if (! ls ${RHACM_PIPELINE_PATH}/snapshots/manifest-* &>/dev/null); then
printlog error "The branch, ${BRANCH}-${PIPELINE_PHASE}, doesn't appear to have any snapshots/manifest-* files to parse a snapshot from."
if [[ -z "${RHACM_BRANCH}" ]]; then
BRANCH=${RHACM_BRANCH:-$(git remote show origin | grep -o " [0-8]\+\.[0-9]\+-" | sort -uV | tail -2 | head -1 | grep -o "[0-9]\+\.[0-9]\+")}
printlog info "RHACM_BRANCH was not set. Using an older branch: ${BRANCH}-${PIPELINE_PHASE}"
git checkout ${BRANCH}-${PIPELINE_PHASE} &>/dev/null
git pull &>/dev/null
else
printlog error "Please double check the Pipeline repo and set RHACM_BRANCH as needed."
exit 1
fi
fi
# Query Pipeline for snapshots--if the latest is not in Quay, try progressively older snapshots
ATTEMPTS=0
MAX_ATTEMPTS=5
FOUND="false"
while [[ "${FOUND}" == "false" ]] && (( ATTEMPTS != MAX_ATTEMPTS )); do
((ATTEMPTS=ATTEMPTS+1))
MANIFEST_TAG=$(ls ${RHACM_PIPELINE_PATH}/snapshots/manifest-* | grep -F "${VERSION_NUM}" | tail -n ${ATTEMPTS} | head -n 1 | grep -o "[[:digit:]]\{4\}\(-[[:digit:]]\{2\}\)\{5\}.*")
SNAPSHOT_TAG=$(echo ${MANIFEST_TAG} | grep -o "[[:digit:]]\{4\}\(-[[:digit:]]\{2\}\)\{5\}")
VERSION_NUM=$(echo ${MANIFEST_TAG} | grep -o "\([[:digit:]]\+\.\)\{2\}[[:digit:]]\+")
if [[ -n "${RHACM_VERSION}" && "${RHACM_VERSION}" != "${VERSION_NUM}" ]]; then
printlog error "There's an unexpected mismatch between the version provided, ${RHACM_VERSION}, and the version found, ${VERSION_NUM}. Please double check the Pipeline repo before continuing."
exit 1
fi
RHACM_SNAPSHOT="${VERSION_NUM}-SNAPSHOT-${SNAPSHOT_TAG}"
# Query Quay for snapshot parsed from Pipeline
if ! queryquay "stolostron"; then
printlog error "The pipeline snapshot was not found in Quay. Trying an older snapshot."
else
FOUND="true"
fi
done
elif ! queryquay "stolostron"; then
# Fail if manually provided snapshot is not present in Quay
printlog error "The provided snapshot ${RHACM_SNAPSHOT} was not found in Quay."
exit 1
fi
fi
printlog info "Using RHACM snapshot: ${RHACM_SNAPSHOT}"
# Deploy RHACM using retrieved snapshot
printlog title "Deploying Red Hat Advanced Cluster Management"
cd ${RHACM_DEPLOY_PATH}
printlog info "Updating repo and switching to the master branch (if this exits, check the state of the local Deploy repo)"
git checkout master &>/dev/null
git pull &>/dev/null
echo "${RHACM_SNAPSHOT}" > ${RHACM_DEPLOY_PATH}/snapshot.ver
if (! ls ${RHACM_DEPLOY_PATH}/prereqs/pull-secret.yaml &>/dev/null) && [[ -z "${QUAY_TOKEN}" ]]; then
printlog error "Error finding pull secret in deploy repo. Please consult https://github.com/stolostron/deploy on how to set it up."
exit 1
fi
# Deploy necessary downstream resources if required
if [[ "${DOWNSTREAM}" == "true" ]] || [[ "${INSTALL_ICSP}" == "true" ]]; then
if [[ -z "${QUAY_TOKEN}" ]]; then
DOWNSTREAM_QUAY_TOKEN=$(cat ${RHACM_DEPLOY_PATH}/prereqs/pull-secret.yaml | grep "\.dockerconfigjson" | sed 's/.*\.dockerconfigjson: //')
else
DOWNSTREAM_QUAY_TOKEN=${QUAY_TOKEN}
fi
DOWNSTREAM_QUAY_TOKEN=$(echo ${DOWNSTREAM_QUAY_TOKEN} | base64 --decode | sed "s/quay\.io/quay\.io:443/g")
OPENSHIFT_PULL_SECRET=$(oc get -n openshift-config secret pull-secret -o jsonpath='{.data.\.dockerconfigjson}' | base64 --decode)
FULL_TOKEN="${DOWNSTREAM_QUAY_TOKEN}${OPENSHIFT_PULL_SECRET}"
if [[ "${DOWNSTREAM}" == "true" ]]; then
printlog info "Setting up for downstream deployment"
export COMPOSITE_BUNDLE=true
export CUSTOM_REGISTRY_REPO="quay.io:443/acm-d"
export QUAY_TOKEN=$(echo ${DOWNSTREAM_QUAY_TOKEN} | ${BASE64})
else
printlog info "Installing ICSP"
fi
printlog info "Updating Openshift pull-secret in namespace openshift-config with a token for quay.io:433"
oc set data secret/pull-secret -n openshift-config --from-literal=.dockerconfigjson="$(jq -s '.[0] * .[1]' <<<${FULL_TOKEN})"
printlog info "Applying downstream resources (including ImageContentSourcePolicy to point to downstream repo)"
oc apply -k ${RHACM_DEPLOY_PATH}/addons/downstream
# Wait for cluster node to update with ICSP--if not all the nodes are up after this, we'll continue anyway
printlog info "Waiting up to 10 minutes for cluster nodes to update with ImageContentSourcePolicy change"
READY="false"
ATTEMPTS=0
MAX_ATTEMPTS=10
INTERVAL=60
while [[ "${READY}" == "false" ]] && (( ATTEMPTS != MAX_ATTEMPTS )); do
NODES=$(oc get nodes | grep "NotReady\|SchedulingDisabled" || true)
if [[ -n "${NODES}" ]]; then
echo "${NODES}"
printlog error "Waiting another ${INTERVAL}s for node update (Retry $((++ATTEMPTS))/${MAX_ATTEMPTS})"
sleep ${INTERVAL}
else
READY="true"
fi
done
fi
# Attempt the RHACM deploy twice in case of an unexpected failure or timeout
ATTEMPTS=0
MAX_ATTEMPTS=1
INTERVAL=30
FAILED="false"
export TARGET_NAMESPACE=${TARGET_NAMESPACE:-"open-cluster-management"}
while (! ./start.sh --silent) && FAILED="true" && (( ATTEMPTS != MAX_ATTEMPTS )); do
printlog error "RHACM deployment failed. Trying again in ${INTERVAL}s (Retry $((++ATTEMPTS))/${MAX_ATTEMPTS})"
sleep ${INTERVAL}
FAILED="false"
done
if [[ "${FAILED}" == "true" ]]; then
printlog error "RHACM deployment failed. If it appears to be intermittent, re-run the startrhacm script against the same claim to try the RHACM deployment again."
printlog error "Otherwise, either manually uninstall RHACM or delete the claim, and then try again."
exit 1
fi
# Set CLI to point to RHACM namespace
printlog title "Setting oc CLI context to ${TARGET_NAMESPACE} namespace"
oc config set-context --current --namespace=${TARGET_NAMESPACE}
printlog title "Information for claimed RHACM cluster (Note: RHACM may be completing final installation steps):"
printlog info "Set KUBECONFIG:\n export KUBECONFIG=$(echo ${KUBECONFIG})"
printlog info "Lifeguard ClusterClaim directory (containing cluster details and more):\n cd $(echo ${KUBECONFIG} | sed 's/kubeconfig//')"
# Set ClusterPool to target size post-deployment
if [[ -n "${CLUSTERPOOL_POST_DEPLOY_SIZE}" ]]; then
printlog info "Scaling ClusterPool ${CLUSTERPOOL_NAME} to ${CLUSTERPOOL_POST_DEPLOY_SIZE}"
export KUBECONFIG=${PREVIOUS_KUBECONFIG}
oc scale clusterpool.hive ${CLUSTERPOOL_NAME} -n ${CLUSTERPOOL_TARGET_NAMESPACE} --replicas=${CLUSTERPOOL_POST_DEPLOY_SIZE}
fi