From 3f8758c2c36fa2a7fb61e9bd7fbc4ad63e60aee3 Mon Sep 17 00:00:00 2001 From: Alex Brett Date: Tue, 9 Jul 2024 16:24:28 +0000 Subject: [PATCH 1/2] CP-49228: Updates to Portable SR Functionality Add a new option `-o` to xe-restore-metadata, which is used to distinguish whether to allow use of legacy backup VDIs, or enforce only use of the new format VDIs with known UUIDs. Also modify xe-restore-metadata such that it no longer stops searching the candidate list if only one VDI is found, but instead identifies all possible backup VDIs. If more than one is found, and you are doing anything other than listing the VDIs, the script will abort. This is to cover the case where a malicious legacy format VDI is present - we will detect it and the expected 'real' backup VDI. Modify xe-backup-metadata to always expect to use the deterministic UUID to identify the VDI to add backups to, do not rely on the `other-config:ctxs-pool-backup` property for identification in any way. This is XSA-459 / CVE-2024-31144 Also fix the following issues introduced with changes in v1.249.37: - Incorrect path to `xe` when calling vm-import - Issues using 'dry run' functionality due to shell quoting changes Signed-off-by: Alex Brett --- scripts/xe-backup-metadata | 32 +------ scripts/xe-restore-metadata | 164 ++++++++++++++++++++++++------------ 2 files changed, 111 insertions(+), 85 deletions(-) diff --git a/scripts/xe-backup-metadata b/scripts/xe-backup-metadata index d25e0ccfa15..1452fbaf708 100755 --- a/scripts/xe-backup-metadata +++ b/scripts/xe-backup-metadata @@ -55,23 +55,6 @@ function uuid5 { python -c "import uuid; print (uuid.uuid5(uuid.UUID('$1'), '$2'))" } -function validate_vdi_uuid { - # we check that vdi has the expected UUID which depends on the UUID of - # the SR. This is a deterministic hash of the SR UUID and the - # namespace UUID $NS. This UUID must match what Xapi's Uuidx module is using. - local NS="e93e0639-2bdb-4a59-8b46-352b3f408c19" - local sr="$1" - local vdi="$2" - local uuid - - uuid=$(uuid5 "$NS" "$sr") - if [ "$vdi" != "$uuid" ]; then - return 1 - else - return 0 - fi -} - function test_sr { sr_uuid_found=$(${XE} sr-list uuid="$1" --minimal) if [ "${sr_uuid_found}" != "$1" ]; then @@ -120,8 +103,8 @@ fi test_sr "${sr_uuid}" sr_name=$(${XE} sr-param-get uuid="${sr_uuid}" param-name=name-label) -# see if a backup VDI already exists on the selected SR -vdi_uuid=$(${XE} vdi-list other-config:ctxs-pool-backup=true sr-uuid="${sr_uuid}" params=uuid --minimal) +# assume use of the new format predictable UUID +vdi_uuid=$(${XE} vdi-list uuid="$(uuid5 "e93e0639-2bdb-4a59-8b46-352b3f408c19" "$sr_uuid")" --minimal) mnt= function cleanup { @@ -160,17 +143,6 @@ function cleanup { fi } -# if we can't validate the UUID of the VDI, prompt the user -if [ -n "${vdi_uuid}" ]; then - if ! validate_vdi_uuid "${sr_uuid}" "${vdi_uuid}" && [ "$yes" -eq 0 ]; then - echo "Backup VDI $vdi_uuid was most likley create by an earlier" - echo "version of this code. Make sure this is a VDI that you" - echo "created as we can't validate it without mounting it." - read -p "Continue? [Y/N]" -n 1 -r; echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi - fi -fi - echo "Using SR: ${sr_name}" if [ -z "${vdi_uuid}" ]; then if [ "${create_vdi}" -gt 0 ]; then diff --git a/scripts/xe-restore-metadata b/scripts/xe-restore-metadata index 8c6299e9c61..1f921f7ee83 100755 --- a/scripts/xe-restore-metadata +++ b/scripts/xe-restore-metadata @@ -35,11 +35,11 @@ default_restore_mode="all" debug="/bin/true" function usage { - echo "Usage: $0 [-h] [-v] [-y] [-n] [-p] [-f] [-x ] [-u ] [-m all|sr]" + echo "Usage: $0 [-h] [-v] [-y] [-n] [-p] [-f] [-o] [-x ] [-u ] [-m all|sr]" echo echo " -h: Display this help message" echo " -x: Specify the VDI UUID to override probing" - echo " -p: Just scan for a metadata VDI and print out its UUID to stdout" + echo " -p: Just scan for metadata VDI(s) and print out UUID(s) to stdout" echo " -u: UUID of the SR you wish to restore from" echo " -n: Perform a dry run of the metadata import commands (default: false)" echo " -l: Just list the available backup dates" @@ -48,6 +48,7 @@ function usage { echo " -v: Verbose output" echo " -y: Assume non-interactive mode and yes to all questions" echo " -f: Forcibly restore VM metadata, dangerous due to its destructive nature, please always do a dry run before using this (default: false)" + echo " -o: Allow use of legacy backup VDIs (this should not be used with SRs with untrusted VDIs)" echo exit 1 } @@ -75,7 +76,9 @@ just_probe=0 chosen_date="" restore_mode=${default_restore_mode} force=0 -while getopts "yhpvx:d:lnu:m:f" opt ; do +legacy=0 +specified_vdi= +while getopts "yhpvx:d:lnu:m:fo" opt ; do case $opt in h) usage ;; u) sr_uuid=${OPTARG} ;; @@ -85,9 +88,10 @@ while getopts "yhpvx:d:lnu:m:f" opt ; do v) debug="" ;; d) chosen_date=${OPTARG} ;; m) restore_mode=${OPTARG} ;; - x) vdis=${OPTARG} ;; + x) specified_vdi=${OPTARG} ;; y) yes=1 ;; f) force=1 ;; + o) legacy=1 ;; *) echo "Invalid option"; usage ;; esac done @@ -118,16 +122,75 @@ sr_name=$(${XE} sr-param-get uuid="${sr_uuid}" param-name=name-label) # probe first for a VDI with known UUID derived from the SR to avoid # scanning for a VDI backup_vdi=$(uuid5 "${NS}" "${sr_uuid}") -if [ -z "${vdis}" ]; then - vdis=$(${XE} vdi-list uuid="${backup_vdi}" sr-uuid="${sr_uuid}" read-only=false --minimal) + +# Only allow a specified VDI that does not match the known UUID if operating in +# legacy mode +if [ -n "${specified_vdi}" ]; then + if [ "${specified_vdi}" != "${backup_vdi}" ] && [ "$legacy" -eq 0 ]; then + echo "The specified VDI UUID is not permitted, if attempting to use a legacy backup VDI please use the -o flag" >&2 + exit 1 + fi + vdis=${specified_vdi} fi -# get a list of all VDIs if an override has not been provided on the cmd line if [ -z "${vdis}" ]; then - vdis=$(${XE} vdi-list params=uuid sr-uuid="${sr_uuid}" read-only=false --minimal) + if [ "$legacy" -eq 0 ]; then + # In non-legacy mode, only use the known backup_vdi UUID + vdis=$(${XE} vdi-list uuid="${backup_vdi}" sr-uuid="${sr_uuid}" read-only=false --minimal) + else + # In legacy mode, scan all VDIs + vdis=$(${XE} vdi-list params=uuid sr-uuid="${sr_uuid}" read-only=false --minimal) + fi fi mnt= +vdi_uuid= +vbd_uuid= +device= +function createvbd { + ${debug} echo -n "Creating VBD: " >&2 + vbd_uuid=$(${XE} vbd-create vm-uuid="${CONTROL_DOMAIN_UUID}" vdi-uuid="${vdi_uuid}" device=autodetect 2>/dev/null) + + if [ $? -ne 0 -o -z "${vbd_uuid}" ]; then + ${debug} echo "error creating VBD for VDI ${vdi_uuid}" >&2 + cleanup + return 1 + fi + + ${debug} echo "${vbd_uuid}" >&2 + + ${debug} echo -n "Plugging VBD: " >&2 + ${XE} vbd-plug uuid="${vbd_uuid}" + device=/dev/$(${XE} vbd-param-get uuid="${vbd_uuid}" param-name=device) + + if [ ! -b "${device}" ]; then + ${debug} echo "${device}: not a block special" >&2 + cleanup + return 1 + fi + + ${debug} echo "${device}" >&2 + return 0 +} + +function mountvbd { + mnt="/var/run/pool-backup-${vdi_uuid}" + mkdir -p "${mnt}" + /sbin/fsck -a "${device}" >/dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "File system integrity error. Please correct manually." >&2 + cleanup + return 1 + fi + mount "${device}" "${mnt}" >/dev/null 2>&1 + if [ $? -ne 0 ]; then + ${debug} echo failed >&2 + cleanup + return 1 + fi + return 0 +} + function cleanup { cd / if [ ! -z "${mnt}" ]; then @@ -165,65 +228,32 @@ function cleanup { if [ -z "${vdis}" ]; then echo "No VDIs found on SR." >&2 + if [ "$legacy" -eq 0 ]; then + echo "If you believe there may be a legacy backup VDI present, you can use the -o flag to search for it (this should not be used with untrusted VDIs)" >&2 + fi exit 0 fi trap cleanup SIGINT ERR +declare -a matched_vdis for vdi_uuid in ${vdis}; do - if [ "${vdi_uuid}" != "${backup_vdi}" ] && [ "$yes" -eq 0 ]; then - echo "Probing VDI ${vdi_uuid}." - echo "This VDI was created with a prior version of this code." - echo "Its validity can't be checked without mounting it first." - read -p "Continue? [Y/N]" -n 1 -r; echo - if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi - fi - - ${debug} echo -n "Creating VBD: " >&2 - vbd_uuid=$(${XE} vbd-create vm-uuid="${CONTROL_DOMAIN_UUID}" vdi-uuid="${vdi_uuid}" device=autodetect 2>/dev/null) - - if [ $? -ne 0 -o -z "${vbd_uuid}" ]; then - ${debug} echo "error creating VBD for VDI ${vdi_uuid}" >&2 - cleanup - continue - fi - - ${debug} echo "${vbd_uuid}" >&2 - - ${debug} echo -n "Plugging VBD: " >&2 - ${XE} vbd-plug uuid="${vbd_uuid}" - device=/dev/$(${XE} vbd-param-get uuid="${vbd_uuid}" param-name=device) - - if [ ! -b "${device}" ]; then - ${debug} echo "${device}: not a block special" >&2 - cleanup + createvbd + if [ $? -ne 0 ]; then continue fi - ${debug} echo "${device}" >&2 - ${debug} echo -n "Probing device: " >&2 mnt= if [ "$(file_exists "${device}" "/.ctxs-metadata-backup")" = y ]; then ${debug} echo "found metadata backup" >&2 - mnt="/var/run/pool-backup-${vdi_uuid}" - mkdir -p "${mnt}" - /sbin/e2fsck -p -f "${device}" >/dev/null 2>&1 + mountvbd if [ $? -ne 0 ]; then - echo "File system integrity error. Please correct manually." >&2 - cleanup continue fi - mount -o ro,nosuid,noexec,nodev "${device}" "${mnt}" >/dev/null 2>&1 - if [ $? -ne 0 ]; then - ${debug} echo failed >&2 - cleanup - else - if [ -e "${mnt}/.ctxs-metadata-backup" ]; then - ${debug} echo "Found backup metadata on VDI: ${vdi_uuid}" >&2 - xe vdi-param-set uuid="${vdi_uuid}" other-config:ctxs-pool-backup=true - break - fi + if [ -e "${mnt}/.ctxs-metadata-backup" ]; then + ${debug} echo "Found backup metadata on VDI: ${vdi_uuid}" >&2 + matched_vdis+=( ${vdi_uuid} ) fi else ${debug} echo "backup metadata not found" >&2 @@ -232,11 +262,35 @@ for vdi_uuid in ${vdis}; do done if [ $just_probe -gt 0 ]; then - echo "${vdi_uuid}" - cleanup + for vdi_uuid in "${matched_vdis[@]}"; do + echo "${vdi_uuid}" + done exit 0 fi +if [ "${#matched_vdis[@]}" -eq 0 ]; then + echo "Metadata backups not found." >&2 + exit 1 +fi + +if [ "${#matched_vdis[@]}" -gt 1 ]; then + echo "Multiple metadata backups found, please use -x to specify the VDI UUID to use" >&2 + exit 1 +fi + +vdi_uuid=${matched_vdis[0]} +xe vdi-param-set uuid="${vdi_uuid}" other-config:ctxs-pool-backup=true +createvbd +if [ $? -ne 0 ]; then + echo "Failure creating VBD for backup VDI ${vdi_uuid}" >&2 + exit 1 +fi +mountvbd +if [ $? -ne 0 ]; then + echo "Failure mounting backup VDI ${vdi_uuid}" >&2 + exit 1 +fi + cd "${mnt}" ${debug} echo "" >&2 @@ -323,8 +377,8 @@ else fi shopt -s nullglob for meta in *.vmmeta; do - echo xe vm-import filename="${meta}" sr-uuid="${sr_uuid}" --metadata --preserve"${force_flag}""${dry_run_flag}" - "@BINDIR@/bin/xe" vm-import filename="${full_dir}/${meta}" sr-uuid="${sr_uuid}" --preserve"${force_flag}""${dry_run_flag}" + echo xe vm-import filename="${meta}" sr-uuid="${sr_uuid}" --metadata --preserve${force_flag}${dry_run_flag} + "@BINDIR@/xe" vm-import filename="${full_dir}/${meta}" sr-uuid="${sr_uuid}" --metadata --preserve${force_flag}${dry_run_flag} if [ $? -gt 0 ]; then error_count=$(( $error_count + 1 )) else From b93e587f4af4177ff6be326c3fa53dcee52ab2d8 Mon Sep 17 00:00:00 2001 From: Alex Brett Date: Thu, 30 May 2024 08:39:59 +0000 Subject: [PATCH 2/2] CA-393578: Fix vbd cleanup in metadata scripts The xe-[backup,restore]-metadata scripts have cleanup logic designed to ensure we do not leave any vbd objects etc behind. This logic calls `vbd-unplug` with a 20s timeout, and then (regardless of the result) allows up to 10s for any device specified in the VBD to disappear - if it does not, it does not trigger a `vbd-destroy`. This logic fails in the case where a VDI is attached to a VM running on the same host, as the `device` field in the new VBD will be populated with the backend device for the running VM. In this case, the `vbd-unplug` fails immediately (as the vbd is not plugged because the original `vbd-plug` attempt fails as the VDI is in use), but then we sit waiting for 10s for the device to disappear (which is obviously does not), and then fail to trigger a `vbd-destroy`, leaving the VBD behind. Fix this by simply removing the wait for the device to disappear and always attempting a `vbd-destroy`, as I am not aware of any situation where this additional 10s wait will give any benefit given current behaviours. Signed-off-by: Alex Brett --- scripts/xe-backup-metadata | 21 ++------------------- scripts/xe-restore-metadata | 23 +++-------------------- 2 files changed, 5 insertions(+), 39 deletions(-) diff --git a/scripts/xe-backup-metadata b/scripts/xe-backup-metadata index 1452fbaf708..a6fb16a45ff 100755 --- a/scripts/xe-backup-metadata +++ b/scripts/xe-backup-metadata @@ -118,25 +118,8 @@ function cleanup { if [ ! -z "${vbd_uuid}" ]; then ${debug} echo -n "Unplugging VBD: " ${XE} vbd-unplug uuid="${vbd_uuid}" timeout=20 - # poll for the device to go away if we know its name - if [ "${device}" != "" ]; then - device_gone=0 - for ((i=0; i<10; i++)); do - ${debug} echo -n "." - if [ ! -b "${device}" ]; then - ${debug} echo " done" - device_gone=1 - break - fi - sleep 1 - done - if [ ${device_gone} -eq 0 ]; then - ${debug} echo " failed" - echo "Please destroy VBD ${vbd_uuid} manually." - else - ${XE} vbd-destroy uuid="${vbd_uuid}" - fi - fi + ${debug} echo -n "Destroying VBD: " + ${XE} vbd-destroy uuid="${vbd_uuid}" fi if [ ${fs_uninitialised} -eq 1 -a -n "${vdi_uuid}" ] ; then ${XE} vdi-destroy uuid="${vdi_uuid}" diff --git a/scripts/xe-restore-metadata b/scripts/xe-restore-metadata index 1f921f7ee83..635d2e8cd95 100755 --- a/scripts/xe-restore-metadata +++ b/scripts/xe-restore-metadata @@ -202,26 +202,9 @@ function cleanup { if [ ! -z "${vbd_uuid}" ]; then ${debug} echo -n "Unplugging VBD: " >&2 ${XE} vbd-unplug uuid="${vbd_uuid}" timeout=20 - # poll for the device to go away if we know its name - if [ "${device}" != "" ]; then - device_gone=0 - for ((i=0; i<10; i++)); do - ${debug} echo -n "." >&2 - if [ ! -b "${device}" ]; then - ${debug} echo " done" >&2 - device_gone=1 - break - fi - sleep 1 - done - if [ ${device_gone} -eq 0 ]; then - ${debug} echo " failed" >&2 - ${debug} echo "Please destroy VBD ${vbd_uuid} manually." >&2 - else - ${XE} vbd-destroy uuid="${vbd_uuid}" - vbd_uuid="" - fi - fi + ${debug} echo -n "Destroying VBD: " >&2 + ${XE} vbd-destroy uuid="${vbd_uuid}" + vbd_uuid="" device="" fi }