From 8eb26b6924f7f3981b7bbe44781aa2fa7de2fa43 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 16 Oct 2024 13:04:06 +0100 Subject: [PATCH 01/30] CP-50475: Remove unnecessary Parallel atoms from the xenopsd queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parallel atoms do quite a bit of unnecessary actions even when they are empty. They are also not needed when running a single task. They also show as spans in the traces. Removing them makes the traces shorter and easier to read. Co-authored-by: Edwin Török Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/lib/xenops_server.ml | 115 ++++++++++------------------- 1 file changed, 41 insertions(+), 74 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 669af5566a..d5e3da2943 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -1550,6 +1550,18 @@ let dequarantine_ops vgpus = fun vgpu -> PCI_dequarantine vgpu.physical_pci_address ) +(* Avoid generating list-based atoms with 1 or no actions in them *) +let collect_into apply = function [] -> [] | [op] -> [op] | lst -> apply lst + +let parallel name ~id = + collect_into (fun ls -> [Parallel (id, Printf.sprintf "%s VM=%s" name id, ls)]) + +let parallel_concat name ~id lst = parallel name ~id (List.concat lst) + +let parallel_map name ~id lst f = parallel name ~id (List.concat_map f lst) + +let map_or_empty f x = Option.value ~default:[] (Option.map f x) + let rec atomics_of_operation = function | VM_start (id, force) -> let vbds_rw, vbds_ro = VBD_DB.vbds id |> vbd_plug_sets in @@ -1574,36 +1586,21 @@ let rec atomics_of_operation = function (vbds_rw @ vbds_ro) (* keeping behaviour of vbd_plug_order: rw vbds must be plugged before ro vbds, see vbd_plug_sets *) - ; List.map + ; List.concat_map (fun (ty, vbds) -> - Parallel - ( id - , Printf.sprintf "VBD.epoch_begin %s vm=%s" ty id - , List.filter_map - (fun vbd -> - Option.map - (fun x -> - VBD_epoch_begin (vbd.Vbd.id, x, vbd.Vbd.persistent) - ) - vbd.Vbd.backend + parallel_map (Printf.sprintf "VBD.epoch_begin %s" ty) ~id vbds + (fun vbd -> + map_or_empty + (fun x -> + [VBD_epoch_begin (vbd.Vbd.id, x, vbd.Vbd.persistent)] ) - vbds - ) + vbd.Vbd.backend + ) ) [("RW", vbds_rw); ("RO", vbds_ro)] - ; [ - (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) - Parallel - ( id - , Printf.sprintf "VBD.plug RW vm=%s" id - , List.map (fun vbd -> VBD_plug vbd.Vbd.id) vbds_rw - ) - ; Parallel - ( id - , Printf.sprintf "VBD.plug RO vm=%s" id - , List.map (fun vbd -> VBD_plug vbd.Vbd.id) vbds_ro - ) - ] + (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) + ; parallel_map "VBD.plug RW" ~id vbds_rw (fun vbd -> [VBD_plug vbd.Vbd.id]) + ; parallel_map "VBD.plug RO" ~id vbds_ro (fun vbd -> [VBD_plug vbd.Vbd.id]) ; List.map (fun vif -> VIF_set_active (vif.Vif.id, true)) vifs ; List.map (fun vif -> VIF_plug vif.Vif.id) vifs ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, true)) vgpus @@ -1623,8 +1620,7 @@ let rec atomics_of_operation = function let pcis = PCI_DB.pcis id in let vusbs = VUSB_DB.vusbs id in [ - Option.value ~default:[] - (Option.map (fun x -> [VM_shutdown_domain (id, PowerOff, x)]) timeout) + map_or_empty (fun x -> [VM_shutdown_domain (id, PowerOff, x)]) timeout (* Before shutting down a VM, we need to unplug its VUSBs. *) ; List.map (fun vusb -> VUSB_unplug vusb.Vusb.id) vusbs ; [ @@ -1633,12 +1629,10 @@ let rec atomics_of_operation = function pause the domain before destroying the device model. *) Best_effort (VM_pause id) ; VM_destroy_device_model id - ; Parallel - ( id - , Printf.sprintf "VBD.unplug vm=%s" id - , List.map (fun vbd -> VBD_unplug (vbd.Vbd.id, true)) vbds - ) ] + ; parallel_map "VBD.unplug" ~id vbds (fun vbd -> + [VBD_unplug (vbd.Vbd.id, true)] + ) ; List.map (fun vif -> VIF_unplug (vif.Vif.id, true)) vifs ; List.map (fun pci -> PCI_unplug pci.Pci.id) pcis ; [VM_destroy id] @@ -1660,19 +1654,9 @@ let rec atomics_of_operation = function List.map (fun vbd -> VBD_set_active (vbd.Vbd.id, true)) (vbds_rw @ vbds_ro) - ; [ - (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) - Parallel - ( id - , Printf.sprintf "VBD.plug RW vm=%s" id - , List.map (fun vbd -> VBD_plug vbd.Vbd.id) vbds_rw - ) - ; Parallel - ( id - , Printf.sprintf "VBD.plug RO vm=%s" id - , List.map (fun vbd -> VBD_plug vbd.Vbd.id) vbds_ro - ) - ] + ; (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) + parallel_map "VBD.plug RW" ~id vbds_rw (fun vbd -> [VBD_plug vbd.Vbd.id]) + ; parallel_map "VBD.plug RO" ~id vbds_ro (fun vbd -> [VBD_plug vbd.Vbd.id]) ; (if restore_vifs then atomics_of_operation (VM_restore_vifs id) else []) ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, true)) vgpus (* Nvidia SRIOV PCI devices have been already been plugged *) @@ -1697,19 +1681,11 @@ let rec atomics_of_operation = function [ [VM_hook_script (id, Xenops_hooks.VM_pre_destroy, reason)] ; atomics_of_operation (VM_shutdown (id, timeout)) - ; [ - Parallel - ( id - , Printf.sprintf "VBD.epoch_end vm=%s" id - , List.filter_map - (fun vbd -> - Option.map - (fun x -> VBD_epoch_end (vbd.Vbd.id, x)) - vbd.Vbd.backend - ) - vbds - ) - ] + ; parallel_map "VBD.epoch_end" ~id vbds (fun vbd -> + map_or_empty + (fun x -> [VBD_epoch_end (vbd.Vbd.id, x)]) + vbd.Vbd.backend + ) ; List.map (fun vbd -> VBD_set_active (vbd.Vbd.id, false)) vbds ; List.map (fun vif -> VIF_set_active (vif.Vif.id, false)) vifs ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, false)) vgpus @@ -1725,23 +1701,14 @@ let rec atomics_of_operation = function Xenops_hooks.reason__clean_reboot in [ - Option.value ~default:[] - (Option.map (fun x -> [VM_shutdown_domain (id, Reboot, x)]) timeout) + map_or_empty (fun x -> [VM_shutdown_domain (id, Reboot, x)]) timeout ; [VM_hook_script (id, Xenops_hooks.VM_pre_destroy, reason)] ; atomics_of_operation (VM_shutdown (id, None)) - ; [ - Parallel - ( id - , Printf.sprintf "VBD.epoch_end vm=%s" id - , List.filter_map - (fun vbd -> - Option.map - (fun x -> VBD_epoch_end (vbd.Vbd.id, x)) - vbd.Vbd.backend - ) - vbds - ) - ] + ; parallel_map "VBD.epoch_end" ~id vbds (fun vbd -> + map_or_empty + (fun x -> [VBD_epoch_end (vbd.Vbd.id, x)]) + vbd.Vbd.backend + ) ; [ VM_hook_script (id, Xenops_hooks.VM_post_destroy, reason) ; VM_hook_script From 0425b0b182d739bd4fce78a36ae0b26338e2a17b Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 15 Oct 2024 12:41:07 +0100 Subject: [PATCH 02/30] CP-50475: parallelize device ops during VM lifecycle ops Operations on different devices should be independent and therefore can be parallelized. This both means parallelizing operations on different device types and on devices for the same type. An atom to serialize action has been introduced because the operation regarding a single device must be kept serialized. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/lib/xenops_server.ml | 167 ++++++++++++++++++----------- ocaml/xenopsd/lib/xenops_task.ml | 4 +- ocaml/xenopsd/lib/xenops_utils.ml | 3 +- 3 files changed, 109 insertions(+), 65 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index d5e3da2943..579ce5d6f0 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -162,6 +162,7 @@ type atomic = | VM_rename of (Vm.id * Vm.id * rename_when) | VM_import_metadata of (Vm.id * Metadata.t) | Parallel of Vm.id * string * atomic list + | Serial of Vm.id * string * atomic list | Best_effort of atomic [@@deriving rpcty] @@ -271,6 +272,9 @@ let rec name_of_atomic = function | Parallel (_, _, atomics) -> Printf.sprintf "Parallel (%s)" (String.concat " | " (List.map name_of_atomic atomics)) + | Serial (_, _, atomics) -> + Printf.sprintf "Serial (%s)" + (String.concat " & " (List.map name_of_atomic atomics)) | Best_effort atomic -> Printf.sprintf "Best_effort (%s)" (name_of_atomic atomic) @@ -1556,8 +1560,13 @@ let collect_into apply = function [] -> [] | [op] -> [op] | lst -> apply lst let parallel name ~id = collect_into (fun ls -> [Parallel (id, Printf.sprintf "%s VM=%s" name id, ls)]) +let serial name ~id = + collect_into (fun ls -> [Serial (id, Printf.sprintf "%s VM=%s" name id, ls)]) + let parallel_concat name ~id lst = parallel name ~id (List.concat lst) +let serial_concat name ~id lst = serial name ~id (List.concat lst) + let parallel_map name ~id lst f = parallel name ~id (List.concat_map f lst) let map_or_empty f x = Option.value ~default:[] (Option.map f x) @@ -1573,6 +1582,23 @@ let rec atomics_of_operation = function List.partition (is_nvidia_sriov vgpus) pcis in let no_sharept = List.exists is_no_sharept vgpus in + let plug_vbds typ vbds = + let pf = Printf.sprintf in + let name_multi = pf "VBDs.activate_epoch_and_plug %s" typ in + let name_one = pf "VBD.activate_epoch_and_plug %s" typ in + parallel_map name_multi ~id vbds (fun vbd -> + serial_concat name_one ~id + [ + [VBD_set_active (vbd.Vbd.id, true)] + ; map_or_empty + (fun x -> + [VBD_epoch_begin (vbd.Vbd.id, x, vbd.Vbd.persistent)] + ) + vbd.Vbd.backend + ; [VBD_plug vbd.Vbd.id] + ] + ) + in [ dequarantine_ops vgpus ; [ @@ -1581,35 +1607,35 @@ let rec atomics_of_operation = function ; VM_create (id, None, None, no_sharept) ; VM_build (id, force) ] - ; List.map - (fun vbd -> VBD_set_active (vbd.Vbd.id, true)) - (vbds_rw @ vbds_ro) - (* keeping behaviour of vbd_plug_order: rw vbds must be plugged before - ro vbds, see vbd_plug_sets *) - ; List.concat_map - (fun (ty, vbds) -> - parallel_map (Printf.sprintf "VBD.epoch_begin %s" ty) ~id vbds - (fun vbd -> - map_or_empty - (fun x -> - [VBD_epoch_begin (vbd.Vbd.id, x, vbd.Vbd.persistent)] - ) - vbd.Vbd.backend - ) - ) - [("RW", vbds_rw); ("RO", vbds_ro)] - (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) - ; parallel_map "VBD.plug RW" ~id vbds_rw (fun vbd -> [VBD_plug vbd.Vbd.id]) - ; parallel_map "VBD.plug RO" ~id vbds_ro (fun vbd -> [VBD_plug vbd.Vbd.id]) - ; List.map (fun vif -> VIF_set_active (vif.Vif.id, true)) vifs - ; List.map (fun vif -> VIF_plug vif.Vif.id) vifs - ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, true)) vgpus - ; List.map (fun pci -> PCI_plug (pci.Pci.id, false)) pcis_sriov + ; parallel_concat "Devices.plug (no qemu)" ~id + [ + (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) + serial_concat "VBDs.acticate_epoch_and_plug RW+RO" ~id + [plug_vbds "RW" vbds_rw; plug_vbds "RO" vbds_ro] + ; List.concat_map + (fun vif -> + serial "VIF.activate_and_plug" ~id + [VIF_set_active (vif.Vif.id, true); VIF_plug vif.Vif.id] + ) + vifs + ; serial_concat "VGPUs.activate & PCI.plug (SRIOV)" ~id + [ + parallel_map "VGPUs.activate" ~id vgpus (fun vgpu -> + [VGPU_set_active (vgpu.Vgpu.id, true)] + ) + ; parallel_map "PCIs.plug (SRIOV)" ~id pcis_sriov (fun pci -> + [PCI_plug (pci.Pci.id, false)] + ) + ] + ] ; [VM_create_device_model (id, false)] (* PCI and USB devices are hot-plugged into HVM guests via QEMU, so the following operations occur after creating the device models *) - ; List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other - ; List.map (fun vusb -> VUSB_plug vusb.Vusb.id) vusbs + ; parallel_concat "Devices.plug (qemu)" ~id + [ + List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other + ; List.map (fun vusb -> VUSB_plug vusb.Vusb.id) vusbs + ] (* At this point the domain is considered survivable. *) ; [VM_set_domain_action_request (id, None)] ] @@ -1622,7 +1648,9 @@ let rec atomics_of_operation = function [ map_or_empty (fun x -> [VM_shutdown_domain (id, PowerOff, x)]) timeout (* Before shutting down a VM, we need to unplug its VUSBs. *) - ; List.map (fun vusb -> VUSB_unplug vusb.Vusb.id) vusbs + ; parallel_map "VUSBs.unplug" ~id vusbs (fun vusb -> + [VUSB_unplug vusb.Vusb.id] + ) ; [ (* CA-315450: in a hard shutdown or snapshot revert, timeout=None and VM_shutdown_domain is not called. To avoid any interference, we @@ -1630,42 +1658,50 @@ let rec atomics_of_operation = function Best_effort (VM_pause id) ; VM_destroy_device_model id ] - ; parallel_map "VBD.unplug" ~id vbds (fun vbd -> - [VBD_unplug (vbd.Vbd.id, true)] - ) - ; List.map (fun vif -> VIF_unplug (vif.Vif.id, true)) vifs - ; List.map (fun pci -> PCI_unplug pci.Pci.id) pcis + ; parallel_concat "Devices.unplug" ~id + [ + List.map (fun vbd -> VBD_unplug (vbd.Vbd.id, true)) vbds + ; List.map (fun vif -> VIF_unplug (vif.Vif.id, true)) vifs + ; List.map (fun pci -> PCI_unplug pci.Pci.id) pcis + ] ; [VM_destroy id] ] |> List.concat | VM_restore_vifs id -> let vifs = VIF_DB.vifs id in - [ - List.map (fun vif -> VIF_set_active (vif.Vif.id, true)) vifs - ; List.map (fun vif -> VIF_plug vif.Vif.id) vifs - ] - |> List.concat + parallel_map "VIFs.activate_and_plug" ~id vifs (fun vif -> + serial "VIF.activate_and_plug" ~id + [VIF_set_active (vif.Vif.id, true); VIF_plug vif.Vif.id] + ) | VM_restore_devices (id, restore_vifs) -> let vbds_rw, vbds_ro = VBD_DB.vbds id |> vbd_plug_sets in let vgpus = VGPU_DB.vgpus id in let pcis = PCI_DB.pcis id |> pci_plug_order in let pcis_other = List.filter (is_not_nvidia_sriov vgpus) pcis in + let plug_vbds typ vbds = + let pf = Printf.sprintf in + let name_multi = pf "VBDs.activate_and_plug %s" typ in + let name_one = pf "VBD.activate_and_plug %s" typ in + parallel_map name_multi ~id vbds (fun vbd -> + serial name_one ~id + [VBD_set_active (vbd.Vbd.id, true); VBD_plug vbd.Vbd.id] + ) + in [ - List.map - (fun vbd -> VBD_set_active (vbd.Vbd.id, true)) - (vbds_rw @ vbds_ro) - ; (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) - parallel_map "VBD.plug RW" ~id vbds_rw (fun vbd -> [VBD_plug vbd.Vbd.id]) - ; parallel_map "VBD.plug RO" ~id vbds_ro (fun vbd -> [VBD_plug vbd.Vbd.id]) + (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) + plug_vbds "RW" vbds_rw + ; plug_vbds "RO" vbds_ro ; (if restore_vifs then atomics_of_operation (VM_restore_vifs id) else []) - ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, true)) vgpus - (* Nvidia SRIOV PCI devices have been already been plugged *) - ; [ - VM_create_device_model (id, true) - (* PCI and USB devices are hot-plugged into HVM guests via QEMU, so - the following operations occur after creating the device models *) - ] - ; List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other + ; (* Nvidia SRIOV PCI devices have been already been plugged *) + parallel_map "VGPUs.activate" ~id vgpus (fun vgpu -> + [VGPU_set_active (vgpu.Vgpu.id, true)] + ) + ; [VM_create_device_model (id, true)] + (* PCI and USB devices are hot-plugged into HVM guests via QEMU, so + the following operations occur after creating the device models *) + ; parallel_map "PCIs.plug" ~id pcis_other (fun pci -> + [PCI_plug (pci.Pci.id, true)] + ) ] |> List.concat | VM_poweroff (id, timeout) -> @@ -1678,17 +1714,24 @@ let rec atomics_of_operation = function else Xenops_hooks.reason__clean_shutdown in - [ - [VM_hook_script (id, Xenops_hooks.VM_pre_destroy, reason)] - ; atomics_of_operation (VM_shutdown (id, timeout)) - ; parallel_map "VBD.epoch_end" ~id vbds (fun vbd -> + let unplug_vbd vbd = + serial_concat "VBD.epoch_and_deactivate" ~id + [ map_or_empty (fun x -> [VBD_epoch_end (vbd.Vbd.id, x)]) vbd.Vbd.backend - ) - ; List.map (fun vbd -> VBD_set_active (vbd.Vbd.id, false)) vbds - ; List.map (fun vif -> VIF_set_active (vif.Vif.id, false)) vifs - ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, false)) vgpus + ; [VBD_set_active (vbd.Vbd.id, false)] + ] + in + [ + [VM_hook_script (id, Xenops_hooks.VM_pre_destroy, reason)] + ; atomics_of_operation (VM_shutdown (id, timeout)) + ; parallel_concat "Devices.deactivate" ~id + [ + List.concat_map unplug_vbd vbds + ; List.map (fun vif -> VIF_set_active (vif.Vif.id, false)) vifs + ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, false)) vgpus + ] ; [VM_hook_script (id, Xenops_hooks.VM_post_destroy, reason)] ] |> List.concat @@ -1825,7 +1868,7 @@ let rec perform_atomic ~progress_callback ?subtask:_ ?result (op : atomic) (Xenops_task.id_of_handle t) (List.length atoms) description in - let with_tracing = parallel_id_with_tracing parallel_id t in + let with_tracing = id_with_tracing parallel_id t in debug "begin_%s" parallel_id ; let task_list = queue_atomics_and_wait ~progress_callback ~max_parallel_atoms:10 @@ -1869,6 +1912,8 @@ let rec perform_atomic ~progress_callback ?subtask:_ ?result (op : atomic) List.iter (fun err -> match err with None -> () | Some e -> raise e) errors + | Serial (_, _, atoms) -> + List.iter (Fun.flip (perform_atomic ~progress_callback) t) atoms | VIF_plug id -> debug "VIF.plug %s" (VIF_DB.string_of_id id) ; B.VIF.plug t (VIF_DB.vm_of id) (VIF_DB.read_exn id) ; @@ -2468,7 +2513,7 @@ and trigger_cleanup_after_failure_atom op t = immediate_operation dbg id (VM_check_state id) | Best_effort op -> trigger_cleanup_after_failure_atom op t - | Parallel (_id, _description, ops) -> + | Parallel (_id, _description, ops) | Serial (_id, _description, ops) -> List.iter (fun op -> trigger_cleanup_after_failure_atom op t) ops | VM_rename (id1, id2, _) -> immediate_operation dbg id1 (VM_check_state id1) ; diff --git a/ocaml/xenopsd/lib/xenops_task.ml b/ocaml/xenopsd/lib/xenops_task.ml index 3fcaffefec..23d88beef1 100644 --- a/ocaml/xenopsd/lib/xenops_task.ml +++ b/ocaml/xenopsd/lib/xenops_task.ml @@ -70,8 +70,8 @@ let is_task task = function | _ -> None -let parallel_id_with_tracing parallel_id t = - Debug_info.make ~log:parallel_id ~tracing:(Xenops_task.tracing t) +let id_with_tracing id t = + Debug_info.make ~log:id ~tracing:(Xenops_task.tracing t) |> Debug_info.to_string let dbg_with_traceparent_of_task t = diff --git a/ocaml/xenopsd/lib/xenops_utils.ml b/ocaml/xenopsd/lib/xenops_utils.ml index d948f9865d..481ad1b610 100644 --- a/ocaml/xenopsd/lib/xenops_utils.ml +++ b/ocaml/xenopsd/lib/xenops_utils.ml @@ -620,8 +620,7 @@ let chunks size lst = [op] :: xs :: xss ) [] lst - |> List.map (fun xs -> List.rev xs) - |> List.rev + |> List.rev_map (fun xs -> List.rev xs) let really_kill pid = try Unixext.kill_and_wait pid From 7ad5f9517294b53a440f15f67abd2bf0802fbc53 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 17 Oct 2024 09:50:48 +0100 Subject: [PATCH 03/30] xapi_stdext_unix/test: Fix intermittent systemd cram test failure Instead of sleeping and hoping for the best, wait for the background job to finish. Signed-off-by: Andrii Sultanov --- .../libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t index 28790e8a32..bfa73c84c6 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t @@ -10,8 +10,8 @@ $ ./test_systemd.exe --server & @systemd.socket READY=1 - $ sleep 1 $ ./test_systemd.exe --notify + $ wait == Use socket files $ export TMPDIR=${TMPDIR:-/tmp} @@ -22,6 +22,7 @@ $ sleep 1 $ test -S "$NOTIFY_SOCKET" $ ./test_systemd.exe --notify + $ wait == Currently not run tests because of insufficient permissions == in cram to be manipulating this file From e83ba5eda7011bfc44a702e70f73f15c9607e4b0 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Fri, 18 Oct 2024 16:22:14 +0100 Subject: [PATCH 04/30] Fix a build warning with GCC 12.3.0 GCC correctly reports "pointer targets differ in signedness". Signed-off-by: Ross Lagerwall --- .../libs/xapi-stdext/lib/xapi-stdext-zerocheck/zerocheck_stub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-zerocheck/zerocheck_stub.c b/ocaml/libs/xapi-stdext/lib/xapi-stdext-zerocheck/zerocheck_stub.c index 776ef85484..4606cf95a4 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-zerocheck/zerocheck_stub.c +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-zerocheck/zerocheck_stub.c @@ -31,7 +31,7 @@ value is_all_zeros(value string, value length) for (i = len / 4; i > 0; i--) if (*p++ != 0) goto notallzero; - s = (unsigned char *) p; + s = (const char *) p; for (i = 0; i < len % 4; i++) if (s[i] != 0) goto notallzero; From e3f92131f8c689378a375d5e01d2a26ef395d339 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Fri, 18 Oct 2024 16:51:23 +0100 Subject: [PATCH 05/30] Remove use of deprecated syslog Standard* type Newer systemd warns that the "syslog" StandardOutput/StandardError type is deprecated and automatically uses the journal instead. Fix this by removing the explicit setting of StandardOutput/StandardError. Instead, the service will use the default values configured in systemd (DefaultStandardOutput/DefaultStandardError) which for a XenServer system will result in the output going to rsyslog. Signed-off-by: Ross Lagerwall --- ocaml/forkexecd/lib/fe_systemctl.ml | 2 -- scripts/varstored-guard.service | 2 -- scripts/xapi-nbd.service | 1 - 3 files changed, 5 deletions(-) diff --git a/ocaml/forkexecd/lib/fe_systemctl.ml b/ocaml/forkexecd/lib/fe_systemctl.ml index cd76bede41..b36ee6674a 100644 --- a/ocaml/forkexecd/lib/fe_systemctl.ml +++ b/ocaml/forkexecd/lib/fe_systemctl.ml @@ -60,8 +60,6 @@ let start_transient ?(env = Array.of_list default_env) ?(properties = []) ) ; ("SyslogIdentifier", syslog_key) ; ("SyslogLevel", "debug") - ; ("StandardOutput", "syslog") - ; ("StandardError", "inherit") ; ("StartLimitInterval", "0") (* no rate-limit, for bootstorms *) ; ("ExecStart", String.concat " " (cmd :: List.map Filename.quote args)) ; ("Type", Type.to_string exec_ty) diff --git a/scripts/varstored-guard.service b/scripts/varstored-guard.service index c9d1b9bd93..d7cb838336 100644 --- a/scripts/varstored-guard.service +++ b/scripts/varstored-guard.service @@ -9,8 +9,6 @@ Wants=message-switch.service syslog.target Type=simple Environment=OCAMLRUNPARAM=b ExecStart=/usr/sbin/varstored-guard -# Needed to ensure exceptions are logged when the program fails: -StandardError=syslog LimitNOFILE=4096 # restart but fail if more than 5 failures in 30s Restart=on-failure diff --git a/scripts/xapi-nbd.service b/scripts/xapi-nbd.service index bca7b551a1..fcbacd7cb3 100644 --- a/scripts/xapi-nbd.service +++ b/scripts/xapi-nbd.service @@ -11,7 +11,6 @@ Environment=OCAMLRUNPARAM=b # and the PathExists in xapi-nbd.path: any change must be made in all three files. ExecStart=/usr/sbin/xapi-nbd --certfile=/etc/xensource/xapi-ssl.pem StandardOutput=null -StandardError=syslog # restart but fail if more than 5 failures in 2s Restart=on-failure StartLimitBurst=5 From 1bdb22a9390e5be69c65b78bdde1d4c42f804725 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 21 Oct 2024 16:16:47 +0100 Subject: [PATCH 06/30] CA-400860: rrdp-netdev - drop xenctrl, use xenstore to get UUIDs from domids instead Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdp-netdev/dune | 2 +- ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml | 27 ++++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-netdev/dune b/ocaml/xcp-rrdd/bin/rrdp-netdev/dune index c5acc80a8b..55c31d4d9f 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-netdev/dune +++ b/ocaml/xcp-rrdd/bin/rrdp-netdev/dune @@ -3,6 +3,7 @@ (name rrdp_netdev) (libraries astring + ezxenstore.core integers netlink rrdd-plugin @@ -13,7 +14,6 @@ xapi-log xapi-rrd xapi-stdext-std - xenctrl ) ) diff --git a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml index 299bb9a97d..c7dab55ac9 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml @@ -13,11 +13,14 @@ *) open Rrdd_plugin +open Ezxenstore_core module D = Debug.Make (struct let name = "xcp-rrdp-netdev" end) module Process = Rrdd_plugin.Process (struct let name = "xcp-rrdd-netdev" end) +let fail = Printf.ksprintf failwith + type iface_stats = { tx_bytes: int64 (** bytes emitted *) ; tx_pkts: int64 (** packets emitted *) @@ -132,18 +135,16 @@ let transform_taps devs = newdevnames let generate_netdev_dss () = - let _, doms, _ = - Xenctrl.with_intf (fun xc -> Xenctrl_lib.domain_snapshot xc) - in - - let uuid_of_domid domains domid = - let _, uuid, _ = - try List.find (fun (_, _, domid') -> domid = domid') domains - with Not_found -> - failwith - (Printf.sprintf "Failed to find uuid corresponding to domid: %d" domid) - in - uuid + let uuid_of_domid domid = + try + Xenstore.with_xs (fun xs -> + let vm = xs.Xenstore.Xs.getdomainpath domid ^ "/vm" in + let vm_dir = xs.Xenstore.Xs.read vm in + xs.Xenstore.Xs.read (vm_dir ^ "/uuid") + ) + with e -> + fail "Failed to find uuid corresponding to domid: %d (%s)" domid + (Printexc.to_string e) in let dbg = "rrdp_netdev" in @@ -198,7 +199,7 @@ let generate_netdev_dss () = let vif_name = Printf.sprintf "vif_%d" d2 in (* Note: rx and tx are the wrong way round because from dom0 we see the vms backwards *) - let uuid = uuid_of_domid doms d1 in + let uuid = uuid_of_domid d1 in ( Rrd.VM uuid , Ds.ds_make ~name:(vif_name ^ "_tx") ~units:"B/s" ~description: From d157db0aecbec78bc7c396aab0158f4de287a68f Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Thu, 17 Oct 2024 06:04:59 +0000 Subject: [PATCH 07/30] CP-51870: Delegate restarting systemd services order to systemd services configuration Systemd services has good support for the services depends and orders in the Unit file, that is the place the restart order should be stated. However, the command `systemd stop foo bar ...` will override the order in the Unit file. As the number of the services grow up, it is really hard to manage the order in the systemd command In order to resolve the issue, `toolstack.target` is created to group and manage the toolstack services. - toolstack.target: `Wants: foo.service` will start foo.service when `systemctl start toolstack.target` - foo.service: `PartOf: toolstack.target` will restart/stop foo.service when `systemctl stop/restart toolstack.target` Note: Above two does not have to match, eg. if we do not want to start a service during `systemctl start toolstack.target`, we can remove it from the first list. - Following xenopsd services are no longer valid, just got removed * xenopsd * xenopsd-xenlight * xenopsd-simulator * xenopsd-libvirt Signed-off-by: Lin Liu --- python3/perfmon/perfmon.service | 1 + scripts/Makefile | 3 +++ scripts/toolstack.target | 27 ++++++++++++++++++++++++++ scripts/varstored-guard.service | 3 ++- scripts/xapi.service | 1 + scripts/xcp-networkd.service | 1 + scripts/xcp-rrdd-cpu.service | 1 + scripts/xcp-rrdd-dcmi.service | 1 + scripts/xcp-rrdd-iostat.service | 1 + scripts/xcp-rrdd-netdev.service | 1 + scripts/xcp-rrdd-squeezed.service | 1 + scripts/xcp-rrdd-xenpm.service | 1 + scripts/xcp-rrdd.service | 1 + scripts/xe-toolstack-restart | 32 ++++++++++++++----------------- 14 files changed, 56 insertions(+), 19 deletions(-) create mode 100644 scripts/toolstack.target diff --git a/python3/perfmon/perfmon.service b/python3/perfmon/perfmon.service index 1afa0cfc23..683039923f 100644 --- a/python3/perfmon/perfmon.service +++ b/python3/perfmon/perfmon.service @@ -2,6 +2,7 @@ Description=Performance monitoring/alarm generation daemon After=xapi.service Wants=xapi.service +PartOf=toolstack.target [Service] EnvironmentFile=-/etc/sysconfig/perfmon diff --git a/scripts/Makefile b/scripts/Makefile index 7583c80d62..503e783854 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -152,3 +152,6 @@ install: $(IDATA) mail-languages/ja-JP.json $(DESTDIR)/etc/xapi.d/mail-languages # uefi mkdir -p $(DESTDIR)/etc/xapi.d/efi-clone + +# toolstack.target to manage toolstack services as a group + $(IDATA) toolstack.target $(DESTDIR)/usr/lib/systemd/system/toolstack.target diff --git a/scripts/toolstack.target b/scripts/toolstack.target new file mode 100644 index 0000000000..c4019a4d23 --- /dev/null +++ b/scripts/toolstack.target @@ -0,0 +1,27 @@ +[Unit] +Description=toolstack Target to manage toolstack service restart +# wants to start following services when run `systemctl start toolstack.target` +# Note: `Wants` is used here instead of `Requires`, `Requires` will stop/restart +# whole toolstack.target on any service stop/restart +Wants=xapi.service +Wants=message-switch.service +Wants=forkexecd.service +Wants=perfmon.service +Wants=v6d.service +Wants=xcp-rrdd-iostat.service +Wants=xcp-rrdd-squeezed.service +Wants=xcp-rrdd-netdev.service +Wants=xcp-rrdd-dcmi.service +Wants=xcp-rrdd-cpu.service +Wants=xcp-rrdd-xenpm.service +Wants=xcp-rrdd-gpumon.service +Wants=xcp-rrdd.service +Wants=xcp-networkd.service +Wants=xenopsd-xc.service +Wants=squeezed.service +Wants=xapi-storage-script.service +Wants=xapi-clusterd.service +Wants=varstored-guard.service + +[Install] +WantedBy=multi-user.target diff --git a/scripts/varstored-guard.service b/scripts/varstored-guard.service index c9d1b9bd93..819b86c4c5 100644 --- a/scripts/varstored-guard.service +++ b/scripts/varstored-guard.service @@ -2,8 +2,9 @@ Description=Varstored XAPI socket deprivileging daemon Documentation=man:varstored-guard(1) After=message-switch.service syslog.target -Before=xapi-domains.service xenopsd.service +Before=xapi-domains.service xenopsd-xc.service Wants=message-switch.service syslog.target +PartOf=toolstack.target [Service] Type=simple diff --git a/scripts/xapi.service b/scripts/xapi.service index a4c825991d..d4cb858c93 100644 --- a/scripts/xapi.service +++ b/scripts/xapi.service @@ -16,6 +16,7 @@ After=xcp-rrdd.service After=xenopsd-xc.service After=xenstored.service After=stunnel@xapi.service +PartOf=toolstack.target Conflicts=shutdown.target diff --git a/scripts/xcp-networkd.service b/scripts/xcp-networkd.service index eb49512cf2..ade36bb8e5 100644 --- a/scripts/xcp-networkd.service +++ b/scripts/xcp-networkd.service @@ -3,6 +3,7 @@ Description=XCP networking daemon Documentation=man:xcp-networkd(1) After=forkexecd.service message-switch.service syslog.target Wants=forkexecd.service message-switch.service syslog.target +PartOf=toolstack.target [Service] Type=notify diff --git a/scripts/xcp-rrdd-cpu.service b/scripts/xcp-rrdd-cpu.service index 310828dda9..b0039ca0a4 100644 --- a/scripts/xcp-rrdd-cpu.service +++ b/scripts/xcp-rrdd-cpu.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon CPU plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-cpu diff --git a/scripts/xcp-rrdd-dcmi.service b/scripts/xcp-rrdd-dcmi.service index 64bab4f25b..2a2f22ec24 100644 --- a/scripts/xcp-rrdd-dcmi.service +++ b/scripts/xcp-rrdd-dcmi.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon IPMI DCMI power plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-dcmi diff --git a/scripts/xcp-rrdd-iostat.service b/scripts/xcp-rrdd-iostat.service index ce72447736..791cfd279a 100644 --- a/scripts/xcp-rrdd-iostat.service +++ b/scripts/xcp-rrdd-iostat.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon iostat plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-iostat diff --git a/scripts/xcp-rrdd-netdev.service b/scripts/xcp-rrdd-netdev.service index b961cc9d15..047b54bdf7 100644 --- a/scripts/xcp-rrdd-netdev.service +++ b/scripts/xcp-rrdd-netdev.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon network plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-netdev diff --git a/scripts/xcp-rrdd-squeezed.service b/scripts/xcp-rrdd-squeezed.service index bb33fca801..673663ba04 100644 --- a/scripts/xcp-rrdd-squeezed.service +++ b/scripts/xcp-rrdd-squeezed.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon squeezed plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-squeezed diff --git a/scripts/xcp-rrdd-xenpm.service b/scripts/xcp-rrdd-xenpm.service index 092bb4d4bb..56345eb1d4 100644 --- a/scripts/xcp-rrdd-xenpm.service +++ b/scripts/xcp-rrdd-xenpm.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon xenpm plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-xenpm diff --git a/scripts/xcp-rrdd.service b/scripts/xcp-rrdd.service index 81e4d78df6..92d1292bef 100644 --- a/scripts/xcp-rrdd.service +++ b/scripts/xcp-rrdd.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon After=forkexecd.service xenstored.service message-switch.service syslog.target Wants=forkexecd.service xenstored.service message-switch.service syslog.target +PartOf=toolstack.target [Service] Type=notify diff --git a/scripts/xe-toolstack-restart b/scripts/xe-toolstack-restart index 25856dc67a..d377ae7acb 100755 --- a/scripts/xe-toolstack-restart +++ b/scripts/xe-toolstack-restart @@ -27,11 +27,6 @@ echo "Executing $FILENAME" POOLCONF=`cat @ETCXENDIR@/pool.conf` if [ $POOLCONF == "master" ]; then MPATHALERT="mpathalert"; else MPATHALERT=""; fi -SERVICES="message-switch perfmon v6d xenopsd xenopsd-xc xenopsd-xenlight - xenopsd-simulator xenopsd-libvirt xcp-rrdd-iostat xcp-rrdd-squeezed - xcp-rrdd-netdev xcp-rrdd-cpu - xcp-rrdd-xenpm xcp-rrdd-gpumon xcp-rrdd xcp-networkd squeezed forkexecd - $MPATHALERT xapi-storage-script xapi-clusterd varstored-guard" tmp_file=$(mktemp --suffix="xe-toolstack-restart") systemctl stop stunnel@xapi > $tmp_file 2>&1 @@ -43,22 +38,23 @@ if [[ $kill_stunnel_exit_code != 0 ]]; then fi rm -f $tmp_file -TO_RESTART="" -for svc in $SERVICES ; do - # restart services only if systemd said they were enabled - systemctl is-enabled $svc >/dev/null 2>&1 +set -e - if [ $? -eq 0 ] ; then - TO_RESTART="$svc $TO_RESTART" - fi -done -systemctl stop xapi -systemctl stop ${TO_RESTART} +systemctl restart $MPATHALERT toolstack.target -set -e +# Check the status of toolstack services +for service in $(systemctl list-dependencies --plain --no-pager toolstack.target); do + # During system bootup, xcp-rrdd-dcmi.service often fail as + # `ipmitool dcmi discover` discover nothing, just ignore it for now + if [ "$service" == "xcp-rrdd-dcmi.service" ]; then + continue + fi -systemctl start ${TO_RESTART} -systemctl start xapi + if ! systemctl is-active --quiet "$service"; then + echo "$service failed to restart, $(systemctl status $service)" + exit 1 + fi +done rm -f $LOCKFILE echo "done." From 2c99f2da9581a44d23badbb2450cc7b3598d56ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 25 Oct 2024 15:52:42 +0100 Subject: [PATCH 08/30] CA-400199: open /dev/urandom on first use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2 recent optimizations have changed the Uuidx module to open /dev/urandom once on startup, instead of every time a value was requested. However 'networkd_db' runs in the installer environment, inside a chroot where /dev/urandom is not available. Open /dev/urandom on first use instead. Simplify the code and use a single implementation for both fast and secure urandom generation: * use a mutex to protect accesses to global urandom state * use an input channel, rather than a Unix file descriptor, this allows us to read many bytes in one go, and then generate multiple random numbers without having to make syscalls that often (syscalls are slow in this case because they require releasing the runtime mutex, which gives another thread the opportunity to run for 50ms). Fixes: a0176da73 ("CP-49135: open /dev/urandom just once") Fixes: a2d9fbe39 ("IH-577 Implement v7 UUID generation") Fixes: 6635a00d6 ("CP-49136: Introduce PRNG for generating non-secret UUIDs") This is slightly slower than before, but still fast enough: ``` │ uuidx creation/Uuidx.make │ 0.0004 mjw/run│ 16.0001 mnw/run│ 105.8801 ns/run│ │ uuidx creation/Uuidx.make_uuid_urnd │ 0.0004 mjw/run│ 16.0001 mnw/run│ 105.1474 ns/run│ ``` Previously this used to take ~88ns, so in fact the difference is barely noticable. Also remove the feature flag: the previous change was feature flagged too, but broke master anyway (I wouldn't have though anything *doesn't* have /dev/urandom available, and didn't feature flag that part, because in general it is not possible to feature flag startup code without races) Signed-off-by: Edwin Török --- ocaml/forkexecd/test/fe_test.ml | 2 +- ocaml/libs/uuid/uuidx.ml | 71 ++++++++++++++------------------- ocaml/libs/uuid/uuidx.mli | 5 --- ocaml/tests/bench/bench_uuid.ml | 2 - ocaml/xapi/xapi_globs.ml | 6 --- quality-gate.sh | 2 +- 6 files changed, 33 insertions(+), 55 deletions(-) diff --git a/ocaml/forkexecd/test/fe_test.ml b/ocaml/forkexecd/test/fe_test.ml index 1c5e46bc1f..870ac59160 100644 --- a/ocaml/forkexecd/test/fe_test.ml +++ b/ocaml/forkexecd/test/fe_test.ml @@ -292,7 +292,7 @@ let slave = function (* Printf.fprintf stderr "%s %d\n" total_fds (List.length present - 1) *) - if total_fds + 1 (* Uuid.dev_urandom *) <> List.length filtered then + if total_fds <> List.length filtered then fail "Expected %d fds; /proc/self/fd has %d: %s" total_fds (List.length filtered) ls diff --git a/ocaml/libs/uuid/uuidx.ml b/ocaml/libs/uuid/uuidx.ml index 65392ef448..7bcb74aae0 100644 --- a/ocaml/libs/uuid/uuidx.ml +++ b/ocaml/libs/uuid/uuidx.ml @@ -116,48 +116,39 @@ let is_uuid str = match of_string str with None -> false | Some _ -> true let dev_urandom = "/dev/urandom" -let dev_urandom_fd = Unix.openfile dev_urandom [Unix.O_RDONLY] 0o640 -(* we can't close this in at_exit, because Crowbar runs at_exit, and - it'll fail because this FD will then be closed -*) - -let read_bytes dev n = - let buf = Bytes.create n in - let read = Unix.read dev buf 0 n in - if read <> n then - raise End_of_file - else - Bytes.to_string buf - -let make_uuid_urnd () = of_bytes (read_bytes dev_urandom_fd 16) |> Option.get - -(* State for random number generation. Random.State.t isn't thread safe, so - only use this via with_non_csprng_state, which takes care of this. -*) -let rstate = Random.State.make_self_init () - -let rstate_m = Mutex.create () - -let with_non_csprng_state = - (* On OCaml 5 we could use Random.State.split instead, - and on OCaml 4 the mutex may not be strictly needed - *) - let finally () = Mutex.unlock rstate_m in - fun f -> - Mutex.lock rstate_m ; - Fun.protect ~finally (f rstate) - -(** Use non-CSPRNG by default, for CSPRNG see {!val:make_uuid_urnd} *) -let make_uuid_fast () = with_non_csprng_state Uuidm.v4_gen - -let make_default = ref make_uuid_urnd - -let make () = !make_default () +let generate = + let mutex = Mutex.create () in + let dev_urandom_ic = ref None in + let finally () = Mutex.unlock mutex in + let with_mutex fn = Mutex.lock mutex ; Fun.protect ~finally fn in + let close_ic () = + with_mutex @@ fun () -> + !dev_urandom_ic |> Option.iter close_in_noerr ; + dev_urandom_ic := None + in + fun n -> + with_mutex @@ fun () -> + let ic = + match !dev_urandom_ic with + | None -> + let ic = open_in_bin dev_urandom in + at_exit close_ic ; + dev_urandom_ic := Some ic ; + ic + | Some ic -> + ic + in + really_input_string ic n + +let make_uuid_urnd () = of_bytes (generate 16) |> Option.get + +let make_uuid_fast = make_uuid_urnd + +let make = make_uuid_urnd let make_v7_uuid_from_parts time_ns rand_b = Uuidm.v7_ns ~time_ns ~rand_b -let rand64 () = - with_non_csprng_state (fun rstate () -> Random.State.bits64 rstate) +let rand64 () = String.get_int64_ne (generate 8) 0 let now_ns = let start = Mtime_clock.counter () in @@ -174,7 +165,7 @@ let make_v7_uuid () = make_v7_uuid_from_parts (now_ns ()) (rand64 ()) type cookie = string let make_cookie () = - read_bytes dev_urandom_fd 64 + generate 64 |> String.to_seq |> Seq.map (fun c -> Printf.sprintf "%1x" (int_of_char c)) |> List.of_seq diff --git a/ocaml/libs/uuid/uuidx.mli b/ocaml/libs/uuid/uuidx.mli index 1e1ebc3251..8561a975cc 100644 --- a/ocaml/libs/uuid/uuidx.mli +++ b/ocaml/libs/uuid/uuidx.mli @@ -194,8 +194,3 @@ module Hash : sig (* UUID Version 5 derived from argument string and namespace UUID *) val string : string -> [< not_secret] t end - -(**/**) - -(* just for feature flag, to be removed *) -val make_default : (unit -> [< not_secret] t) ref diff --git a/ocaml/tests/bench/bench_uuid.ml b/ocaml/tests/bench/bench_uuid.ml index a04ff192d7..f13118e48d 100644 --- a/ocaml/tests/bench/bench_uuid.ml +++ b/ocaml/tests/bench/bench_uuid.ml @@ -1,7 +1,5 @@ open Bechamel -let () = Uuidx.make_default := Uuidx.make_uuid_fast - let benchmarks = Test.make_grouped ~name:"uuidx creation" [ diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 5407faf3bf..9a461a4e7b 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1612,12 +1612,6 @@ let other_options = , (fun () -> string_of_bool !disable_webserver) , "Disable the host webserver" ) - ; ( "use-prng-uuid-gen" - (* eventually this'll be the default, except for Sessions *) - , Arg.Unit (fun () -> Uuidx.make_default := Uuidx.make_uuid_fast) - , (fun () -> !Uuidx.make_default == Uuidx.make_uuid_fast |> string_of_bool) - , "Use PRNG based UUID generator instead of CSPRNG" - ) ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. diff --git a/quality-gate.sh b/quality-gate.sh index c1d122efd7..16a90270b1 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -40,7 +40,7 @@ mli-files () { } structural-equality () { - N=10 + N=9 EQ=$(git grep -r --count ' == ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$EQ" -eq "$N" ]; then echo "OK counted $EQ usages of ' == '" From 760c355e0aa685b58198cd46d3aeebabf00d03ef Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 22 Oct 2024 17:17:06 +0100 Subject: [PATCH 09/30] CP-51938: Generate XML alert for cluster health Signed-off-by: Vincent Liu --- ocaml/xapi/xapi_cluster_helpers.ml | 60 +++++++++++++++++++----------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/ocaml/xapi/xapi_cluster_helpers.ml b/ocaml/xapi/xapi_cluster_helpers.ml index 2582790e92..954b946b0f 100644 --- a/ocaml/xapi/xapi_cluster_helpers.ml +++ b/ocaml/xapi/xapi_cluster_helpers.ml @@ -112,35 +112,45 @@ let corosync3_enabled ~__context = let maybe_generate_alert ~__context ~num_hosts ~hosts_left ~hosts_joined ~quorum = let generate_alert join cluster_host = + let generate_alert_body host num_hosts quorum join = + let num_hosts = string_of_int num_hosts in + let quorum = string_of_int quorum in + let msg = + if join then + "Host has joined the cluster" + else + "Host has left the cluster" + in + String.concat "" + [ + "" + ; msg + ; "" + ; host + ; "" + ; "" + ; num_hosts + ; "" + ; "" + ; quorum + ; "" + ; "" + ] + in let host = Db.Cluster_host.get_host ~__context ~self:cluster_host in let host_uuid = Db.Host.get_uuid ~__context ~self:host in let host_name = Db.Host.get_name_label ~__context ~self:host in let body, name, priority = + let body = generate_alert_body host_name num_hosts quorum join in match join with | true -> - let body = - Printf.sprintf - "Host %s has joined the cluster, there are now %d host(s) in \ - cluster and %d host(s) are required to form a quorum" - host_name num_hosts quorum - in let name, priority = Api_messages.cluster_host_joining in (body, name, priority) | false -> - let body = - Printf.sprintf - "Host %s has left the cluster, there are now %d host(s) in \ - cluster and %d host(s) are required to form a quorum" - host_name num_hosts quorum - in let name, priority = Api_messages.cluster_host_leaving in (body, name, priority) in - Helpers.call_api_functions ~__context (fun rpc session_id -> - ignore - @@ Client.Client.Message.create ~rpc ~session_id ~name ~priority - ~cls:`Host ~obj_uuid:host_uuid ~body - ) + Xapi_alert.add ~msg:(name, priority) ~cls:`Host ~obj_uuid:host_uuid ~body in List.iter (generate_alert false) hosts_left ; List.iter (generate_alert true) hosts_joined ; @@ -150,10 +160,18 @@ let maybe_generate_alert ~__context ~num_hosts ~hosts_left ~hosts_joined ~quorum let pool_uuid = Db.Pool.get_uuid ~__context ~self:pool in let name, priority = Api_messages.cluster_quorum_approaching_lost in let body = - Printf.sprintf - "The cluster is losing quorum: currently %d host(s), need %d host(s) \ - for a quorum" - num_hosts quorum + String.concat "" + [ + "" + ; "Cluster is losing quorum" + ; "" + ; string_of_int num_hosts + ; "" + ; "" + ; string_of_int quorum + ; "" + ; "" + ] in Helpers.call_api_functions ~__context (fun rpc session_id -> ignore From c122bc404c4560a323cd6413de45ea9711a873a1 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Tue, 29 Oct 2024 04:20:18 +0000 Subject: [PATCH 10/30] CP-50546: Remove initscripts family initscripts family are legacy and want to be removed `service iptables save` call /usr/libexec/initscripts/legacy-actions/iptables/save, which call `exec /usr/libexec/iptables/iptables.init save`, to save iptables rules and remove initscripts, we call following directly `/usr/libexec/iptables/iptables.init save` `service` command are also updated to `systemctl` Signed-off-by: Lin Liu --- scripts/plugins/firewall-port | 4 ++-- scripts/xe-syslog-reconfigure | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/plugins/firewall-port b/scripts/plugins/firewall-port index 820a0608d9..b06707dbd2 100644 --- a/scripts/plugins/firewall-port +++ b/scripts/plugins/firewall-port @@ -37,14 +37,14 @@ case "${OP}" in iptables -I INPUT -j "${CHAIN}" fi # asuume chain is used if it exists iptables -I "${CHAIN}" $RULE - service iptables save + /usr/libexec/iptables/iptables.init save fi ;; close) if iptables -C $CHAIN $RULE 2>/dev/null then # close port if it was opened iptables -D $CHAIN $RULE - service iptables save + /usr/libexec/iptables/iptables.init save fi ;; check) diff --git a/scripts/xe-syslog-reconfigure b/scripts/xe-syslog-reconfigure index f9e7d3bd64..cc64a30304 100644 --- a/scripts/xe-syslog-reconfigure +++ b/scripts/xe-syslog-reconfigure @@ -42,4 +42,4 @@ else fi [ -s /etc/syslog.$$ ] && mv -f /etc/syslog.$$ $conf_file -service $service restart +systemctl restart $service From 36f9993f4b9e964fe2d56bc78bf88a9efda896bd Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 29 Oct 2024 13:06:49 +0000 Subject: [PATCH 11/30] Remove notion of weird string from sexpr library Removes the remnants of an escaping mechanism once used in xapi. It appears that the format "< --- ocaml/libs/sexpr/sExpr.ml | 35 +++----------------------------- ocaml/libs/sexpr/sExpr.mli | 8 +------- ocaml/libs/sexpr/sExprLexer.mli | 7 ------- ocaml/libs/sexpr/sExprLexer.mll | 7 +------ ocaml/libs/sexpr/sExprParser.mly | 14 ++++--------- 5 files changed, 9 insertions(+), 62 deletions(-) delete mode 100644 ocaml/libs/sexpr/sExprLexer.mli diff --git a/ocaml/libs/sexpr/sExpr.ml b/ocaml/libs/sexpr/sExpr.ml index ec354e373b..488142898c 100644 --- a/ocaml/libs/sexpr/sExpr.ml +++ b/ocaml/libs/sexpr/sExpr.ml @@ -11,11 +11,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. *) -type t = - | Node of t list - | Symbol of string - | String of string - | WeirdString of string * string +type t = Node of t list | Symbol of string | String of string let unescape_buf buf s = let aux esc = function @@ -84,31 +80,13 @@ let string_of sexpr = List.iter (fun i -> Buffer.add_char buf ' ' ; __string_of_rec i) l ) ; Buffer.add_char buf ')' - | Symbol s | String s | WeirdString (_, s) -> + | Symbol s | String s -> Buffer.add_string buf "\'" ; Buffer.add_string buf (escape s) ; Buffer.add_string buf "\'" in __string_of_rec sexpr ; Buffer.contents buf -let weird_of_string x = - let random_chars = "abcdefghijklmnopqrstuvwxyz" in - let randchar () = - String.sub random_chars (Random.int (String.length random_chars)) 1 - in - (* true if the parent string contains child as a substring, starting the - search forward from offset *) - let rec has_substring parent offset child = - String.length parent - offset >= String.length child - && (String.sub parent offset (String.length child) = child - || has_substring parent (offset + 1) child - ) - in - let rec find delim = - if has_substring x 0 delim then find (delim ^ randchar ()) else delim - in - WeirdString (find "xxx", x) - let rec output_fmt ff = function | Node list -> let rec aux ?(first = true) = function @@ -121,12 +99,5 @@ let rec output_fmt ff = function aux ~first t in Format.fprintf ff "@[(" ; aux list ; Format.fprintf ff ")@]" - | Symbol s | String s | WeirdString (_, s) -> + | Symbol s | String s -> Format.fprintf ff "\"%s\"" (escape s) - -(* - | Symbol s -> - Format.fprintf ff "%s" s - | WeirdString(tag, s) -> - Format.fprintf ff "<<%s<%s<%s<" tag s tag -*) diff --git a/ocaml/libs/sexpr/sExpr.mli b/ocaml/libs/sexpr/sExpr.mli index 28c3b8219c..e7ab5c68a1 100644 --- a/ocaml/libs/sexpr/sExpr.mli +++ b/ocaml/libs/sexpr/sExpr.mli @@ -11,16 +11,10 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. *) -type t = - | Node of t list - | Symbol of string - | String of string - | WeirdString of string * string +type t = Node of t list | Symbol of string | String of string val mkstring : string -> t val string_of : t -> string -val weird_of_string : string -> t - val output_fmt : Format.formatter -> t -> unit diff --git a/ocaml/libs/sexpr/sExprLexer.mli b/ocaml/libs/sexpr/sExprLexer.mli deleted file mode 100644 index 8d017ea982..0000000000 --- a/ocaml/libs/sexpr/sExprLexer.mli +++ /dev/null @@ -1,7 +0,0 @@ -val line : int ref - -val __ocaml_lex_tables : Lexing.lex_tables - -val token : Lexing.lexbuf -> SExprParser.token - -val __ocaml_lex_token_rec : Lexing.lexbuf -> int -> SExprParser.token diff --git a/ocaml/libs/sexpr/sExprLexer.mll b/ocaml/libs/sexpr/sExprLexer.mll index 94d72de193..bc674d7710 100644 --- a/ocaml/libs/sexpr/sExprLexer.mll +++ b/ocaml/libs/sexpr/sExprLexer.mll @@ -1,14 +1,9 @@ { open SExprParser - let line = ref 1 } rule token = parse - | [' ' '\t' '\r'] { token lexbuf } - | ';' [^ '\n']* '\n' { incr line; token lexbuf } - | '\n' { incr line; token lexbuf } - | "<<" ([^ '<']+ as tag1) '<' ([^ '<']* as s) '<' ([^ '<']+ as tag2) '<' - { if tag1=tag2 then WEIRD(tag1, s) else invalid_arg "Weird tag" } + | [' ' '\t' '\r' '\n']+ | ';' [^ '\n']* '\n' { token lexbuf } | '"' (([^ '"' '\\'] | ('\\' _))* as s) '"' { STRING s } | '\'' (([^ '\'' '\\'] | ('\\' _))* as s) '\'' { STRING s } | [^ '"' ' ' '\t' '\n' '(' ')']+ as s { SYMBOL s } diff --git a/ocaml/libs/sexpr/sExprParser.mly b/ocaml/libs/sexpr/sExprParser.mly index a18a62bd7e..3dbceb467a 100644 --- a/ocaml/libs/sexpr/sExprParser.mly +++ b/ocaml/libs/sexpr/sExprParser.mly @@ -1,17 +1,11 @@ %token SYMBOL STRING -%token WEIRD %token OPEN CLOSE -%start expr -%type expr +%start expr %% -expr_list: { [] } -| expr expr_list { $1 :: $2 }; - expr: -| OPEN expr_list CLOSE { SExpr.Node $2 } -| SYMBOL { SExpr.Symbol $1 } -| STRING { SExpr.mkstring $1 } -| WEIRD { (fun (tag, s) -> SExpr.WeirdString(tag, s)) $1 }; +| OPEN es = list(expr) CLOSE { SExpr.Node es } +| s = SYMBOL { SExpr.Symbol s } +| s = STRING { SExpr.mkstring s } From 19ad403eb3ea8f76cf2623b7fda7fe0475c55f38 Mon Sep 17 00:00:00 2001 From: Stephen Cheng Date: Wed, 30 Oct 2024 02:24:10 +0000 Subject: [PATCH 12/30] CA-399396: Adjust the jemalloc parameters for memory performance The new version (5.3.0) jemalloc caused a significant increase of memory usage compared to the version 3.6.0. Signed-off-by: Stephen Cheng --- ocaml/xenopsd/scripts/qemu-wrapper | 2 +- scripts/xapi-nbd.service | 2 +- scripts/xcp-networkd.service | 2 +- scripts/xcp-rrdd.service | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ocaml/xenopsd/scripts/qemu-wrapper b/ocaml/xenopsd/scripts/qemu-wrapper index 9d9fc9aef8..93f5c685ea 100644 --- a/ocaml/xenopsd/scripts/qemu-wrapper +++ b/ocaml/xenopsd/scripts/qemu-wrapper @@ -305,7 +305,7 @@ def main(argv): qemu_env["LD_PRELOAD"] = "/usr/lib64/libjemalloc.so.2" else: qemu_env["LD_PRELOAD"] = "/usr/lib64/libjemalloc.so.2:" + qemu_env["LD_PRELOAD"] - qemu_env["MALLOC_CONF"] = "narenas:1,tcache:false" + qemu_env["MALLOC_CONF"] = "background_thread:true,dirty_decay_ms:100,narenas:1,tcache:false" sys.stdout.flush() sys.stderr.flush() diff --git a/scripts/xapi-nbd.service b/scripts/xapi-nbd.service index bca7b551a1..6aabf845fe 100644 --- a/scripts/xapi-nbd.service +++ b/scripts/xapi-nbd.service @@ -5,7 +5,7 @@ Wants=xapi.service message-switch.service syslog.target [Service] Environment="LD_PRELOAD=/usr/lib64/libjemalloc.so.2" -Environment="MALLOC_CONF=narenas:1,tcache:false" +Environment="MALLOC_CONF=background_thread:true,dirty_decay_ms:100,narenas:1,tcache:false" Environment=OCAMLRUNPARAM=b # The --certfile option must match the server-cert-path in xapi.conf # and the PathExists in xapi-nbd.path: any change must be made in all three files. diff --git a/scripts/xcp-networkd.service b/scripts/xcp-networkd.service index eb49512cf2..6f5bebddfd 100644 --- a/scripts/xcp-networkd.service +++ b/scripts/xcp-networkd.service @@ -7,7 +7,7 @@ Wants=forkexecd.service message-switch.service syslog.target [Service] Type=notify Environment="LD_PRELOAD=/usr/lib64/libjemalloc.so.2" -Environment="MALLOC_CONF=narenas:1,tcache:false" +Environment="MALLOC_CONF=background_thread:true,dirty_decay_ms:100,narenas:1,tcache:false" Environment=OCAMLRUNPARAM=b EnvironmentFile=-/etc/sysconfig/xcp-networkd ExecStart=/usr/sbin/xcp-networkd $XCP_NETWORKD_OPTIONS diff --git a/scripts/xcp-rrdd.service b/scripts/xcp-rrdd.service index 81e4d78df6..ea5e42ad90 100644 --- a/scripts/xcp-rrdd.service +++ b/scripts/xcp-rrdd.service @@ -6,7 +6,7 @@ Wants=forkexecd.service xenstored.service message-switch.service syslog.target [Service] Type=notify Environment="LD_PRELOAD=/usr/lib64/libjemalloc.so.2" -Environment="MALLOC_CONF=narenas:1,tcache:false" +Environment="MALLOC_CONF=background_thread:true,dirty_decay_ms:100,narenas:1,tcache:false" Environment=OCAMLRUNPARAM=b EnvironmentFile=-/etc/sysconfig/xcp-rrdd ExecStart=/usr/sbin/xcp-rrdd $XCP_RRDD_OPTIONS From 176c9e34bafdd39e89a78ac140ae38881388258d Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 29 Oct 2024 15:01:06 +0000 Subject: [PATCH 13/30] CP-52039: Drop Semaphore from Xapi_stdext_threads Prior to version 4.12, OCaml's standard threads library (systhreads) had no builtin concept of a semaphore, so one was implemented in Xapi_stdext_threads. We replace all usages of this with Semaphore.Counting from the standard library and remove the implementation from Xapi_stdext_threads. Technically, the interface provided by the previous semaphore is more general: it permits arbitrary adjustments to the semaphore's counter, allowing for a "weighted" style of locking. However, this is only used in one place (with a weight value of 1, which is the same decrement/increment value as normal). Signed-off-by: Colin James --- ocaml/libs/http-lib/http_svr.ml | 2 +- ocaml/libs/http-lib/server_io.ml | 6 +- ocaml/libs/http-lib/server_io.mli | 2 +- .../lib/xapi-stdext-threads/semaphore.ml | 57 ------------------- .../lib/xapi-stdext-threads/semaphore.mli | 40 ------------- .../lib/xapi-stdext-threads/threadext.ml | 12 +++- .../lib/xapi-stdext-threads/threadext.mli | 4 ++ ocaml/networkd/lib/network_utils.ml | 7 ++- ocaml/xapi-aux/throttle.ml | 8 ++- ocaml/xapi/xapi_sr.ml | 1 - 10 files changed, 28 insertions(+), 111 deletions(-) delete mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.ml delete mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.mli diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 3c8ec7facb..54a8b96ba7 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -648,7 +648,7 @@ let start ?header_read_timeout ?header_total_timeout ?max_header_length ; body= handle_connection ~header_read_timeout ~header_total_timeout ~max_header_length x - ; lock= Xapi_stdext_threads.Semaphore.create conn_limit + ; lock= Semaphore.Counting.make conn_limit } in let server = Server_io.server handler socket in diff --git a/ocaml/libs/http-lib/server_io.ml b/ocaml/libs/http-lib/server_io.ml index 09abf253ee..c821a27c02 100644 --- a/ocaml/libs/http-lib/server_io.ml +++ b/ocaml/libs/http-lib/server_io.ml @@ -23,7 +23,7 @@ type handler = { name: string ; (* body should close the provided fd *) body: Unix.sockaddr -> Unix.file_descr -> unit - ; lock: Xapi_stdext_threads.Semaphore.t + ; lock: Semaphore.Counting.t } let handler_by_thread (h : handler) (s : Unix.file_descr) @@ -31,7 +31,7 @@ let handler_by_thread (h : handler) (s : Unix.file_descr) Thread.create (fun () -> Fun.protect - ~finally:(fun () -> Xapi_stdext_threads.Semaphore.release h.lock 1) + ~finally:(fun () -> Semaphore.Counting.release h.lock) (Debug.with_thread_named h.name (fun () -> h.body caller s)) ) () @@ -49,7 +49,7 @@ let establish_server ?(signal_fds = []) forker handler sock = @@ Polly.wait epoll 2 (-1) (fun _ fd _ -> (* If any of the signal_fd is active then bail out *) if List.mem fd signal_fds then raise PleaseClose ; - Xapi_stdext_threads.Semaphore.acquire handler.lock 1 ; + Semaphore.Counting.acquire handler.lock ; let s, caller = Unix.accept ~cloexec:true sock in try ignore (forker handler s caller) with exc -> diff --git a/ocaml/libs/http-lib/server_io.mli b/ocaml/libs/http-lib/server_io.mli index 3aca023474..3c52f53a80 100644 --- a/ocaml/libs/http-lib/server_io.mli +++ b/ocaml/libs/http-lib/server_io.mli @@ -16,7 +16,7 @@ type handler = { name: string (** used for naming the thread *) ; body: Unix.sockaddr -> Unix.file_descr -> unit (** function called in a thread for each connection*) - ; lock: Xapi_stdext_threads.Semaphore.t + ; lock: Semaphore.Counting.t } type server = { diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.ml deleted file mode 100644 index 06621049c9..0000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.ml +++ /dev/null @@ -1,57 +0,0 @@ -(* - * Copyright (C) Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -type t = {mutable n: int; m: Mutex.t; c: Condition.t} - -let create n = - if n <= 0 then - invalid_arg (Printf.sprintf "Semaphore value must be positive, got %d" n) ; - let m = Mutex.create () and c = Condition.create () in - {n; m; c} - -exception Inconsistent_state of string - -let inconsistent_state fmt = - Printf.ksprintf (fun msg -> raise (Inconsistent_state msg)) fmt - -let acquire s k = - if k <= 0 then - invalid_arg - (Printf.sprintf "Semaphore acquisition requires a positive value, got %d" - k - ) ; - Mutex.lock s.m ; - while s.n < k do - Condition.wait s.c s.m - done ; - if not (s.n >= k) then - inconsistent_state "Semaphore value cannot be smaller than %d, got %d" k s.n ; - s.n <- s.n - k ; - Condition.signal s.c ; - Mutex.unlock s.m - -let release s k = - if k <= 0 then - invalid_arg - (Printf.sprintf "Semaphore release requires a positive value, got %d" k) ; - Mutex.lock s.m ; - s.n <- s.n + k ; - Condition.signal s.c ; - Mutex.unlock s.m - -let execute_with_weight s k f = - acquire s k ; - Xapi_stdext_pervasives.Pervasiveext.finally f (fun () -> release s k) - -let execute s f = execute_with_weight s 1 f diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.mli deleted file mode 100644 index 207e612032..0000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.mli +++ /dev/null @@ -1,40 +0,0 @@ -(* - * Copyright (C) Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -type t - -exception Inconsistent_state of string - -val create : int -> t -(** [create n] create a semaphore with initial value [n] (a positive integer). - Raise {!Invalid_argument} if [n] <= 0 *) - -val acquire : t -> int -> unit -(** [acquire k s] block until the semaphore value is >= [k] (a positive integer), - then atomically decrement the semaphore value by [k]. - Raise {!Invalid_argument} if [k] <= 0 *) - -val release : t -> int -> unit -(** [release k s] atomically increment the semaphore value by [k] (a positive - integer). - Raise {!Invalid_argument} if [k] <= 0 *) - -val execute_with_weight : t -> int -> (unit -> 'a) -> 'a -(** [execute_with_weight s k f] {!acquire} the semaphore with [k], - then run [f ()], and finally {!release} the semaphore with the same value [k] - (even in case of failure in the execution of [f]). - Return the value of [f ()] or re-raise the exception if any. *) - -val execute : t -> (unit -> 'a) -> 'a -(** [execute s f] same as [{execute_with_weight} s 1 f] *) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml index 1ca5e916ef..311d985ca6 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml @@ -14,11 +14,20 @@ module M = Mutex +let finally = Xapi_stdext_pervasives.Pervasiveext.finally + module Mutex = struct (** execute the function f with the mutex hold *) let execute lock f = Mutex.lock lock ; - Xapi_stdext_pervasives.Pervasiveext.finally f (fun () -> Mutex.unlock lock) + finally f (fun () -> Mutex.unlock lock) +end + +module Semaphore = struct + let execute s f = + let module Semaphore = Semaphore.Counting in + Semaphore.acquire s ; + finally f (fun () -> Semaphore.release s) end (** Parallel List.iter. Remembers all exceptions and returns an association list mapping input x to an exception. @@ -60,7 +69,6 @@ module Delay = struct exception Pre_signalled let wait (x : t) (seconds : float) = - let finally = Xapi_stdext_pervasives.Pervasiveext.finally in let to_close = ref [] in let close' fd = if List.mem fd !to_close then Unix.close fd ; diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli index 057aedfa70..b5edcff21b 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli @@ -15,6 +15,10 @@ module Mutex : sig val execute : Mutex.t -> (unit -> 'a) -> 'a end +module Semaphore : sig + val execute : Semaphore.Counting.t -> (unit -> 'a) -> 'a +end + val thread_iter_all_exns : ('a -> unit) -> 'a list -> ('a * exn) list val thread_iter : ('a -> unit) -> 'a list -> unit diff --git a/ocaml/networkd/lib/network_utils.ml b/ocaml/networkd/lib/network_utils.ml index 39417cf117..4a473b2957 100644 --- a/ocaml/networkd/lib/network_utils.ml +++ b/ocaml/networkd/lib/network_utils.ml @@ -1197,12 +1197,13 @@ module Ovs = struct val appctl : ?log:bool -> string list -> string end = struct - module Semaphore = Xapi_stdext_threads.Semaphore + module Semaphore = Semaphore.Counting - let s = Semaphore.create 5 + let s = Semaphore.make 5 let vsctl ?log args = - Semaphore.execute s (fun () -> + let execute = Xapi_stdext_threads.Threadext.Semaphore.execute in + execute s (fun () -> call_script ~on_error:error_handler ?log ovs_vsctl ("--timeout=20" :: args) ) diff --git a/ocaml/xapi-aux/throttle.ml b/ocaml/xapi-aux/throttle.ml index 7be2ac9bd4..a9dacf7f16 100644 --- a/ocaml/xapi-aux/throttle.ml +++ b/ocaml/xapi-aux/throttle.ml @@ -17,10 +17,12 @@ module type SIZE = sig end module Make (Size : SIZE) = struct - module Semaphore = Xapi_stdext_threads.Semaphore + module Semaphore = Semaphore.Counting let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute + let execute = Xapi_stdext_threads.Threadext.Semaphore.execute + let semaphore = ref None let m = Mutex.create () @@ -29,11 +31,11 @@ module Make (Size : SIZE) = struct with_lock m @@ fun () -> match !semaphore with | None -> - let result = Semaphore.create (Size.n ()) in + let result = Semaphore.make (Size.n ()) in semaphore := Some result ; result | Some s -> s - let execute f = Semaphore.execute (get_semaphore ()) f + let execute f = execute (get_semaphore ()) f end diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index d572660e72..7a83493b2d 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -20,7 +20,6 @@ module Rrdd = Rrd_client.Client let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute module Listext = Xapi_stdext_std.Listext -module Semaphore = Xapi_stdext_threads.Semaphore module Unixext = Xapi_stdext_unix.Unixext let finally = Xapi_stdext_pervasives.Pervasiveext.finally From 395d5ad6ebc75353cab2f35c511d5b7ed0ce7b9f Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Tue, 29 Oct 2024 18:01:40 +0800 Subject: [PATCH 14/30] CA-400560: Fix version segment division error For example "1.2.3a" will be divided to [Int 1; Int 2; Str 3a] and "1.2.3" is divided to [Int 1; Int 2; Int 3]. It leads to "1.2.3" > "1.2.3a" which is incorrect. After fix, "1.2.3a" will be divided to [Int 1; Int 2; Int 3; Str a], then we can get the right compare result. Signed-off-by: Changlei Li --- ocaml/tests/test_rpm.ml | 6 +-- ocaml/xapi/rpm.ml | 95 ++++++++++++++++++++++------------------- 2 files changed, 55 insertions(+), 46 deletions(-) diff --git a/ocaml/tests/test_rpm.ml b/ocaml/tests/test_rpm.ml index da47d9a0ce..5b80215521 100644 --- a/ocaml/tests/test_rpm.ml +++ b/ocaml/tests/test_rpm.ml @@ -163,14 +163,14 @@ module PkgCompareVersionStringsTest = Generic.MakeStateless (struct ; (("1.0", "1.a"), ">") ; (("2.50", "2.5"), ">") ; (("XS3", "xs2"), "<") - ; (("1.2.3", "1.2.3a"), ">") + ; (("1.2.3", "1.2.3a"), "<") ; (("xs4", "xs.4"), "=") ; (("2a", "2.0"), "<") ; (("2a", "2b"), "<") ; (("1.0", "1.xs2"), ">") ; (("1.0_xs", "1.0.xs"), "=") - ; (("1.0x3", "1.0x04"), ">") - ; (("1.0O3", "1.0O04"), ">") + ; (("1.0x3", "1.0x04"), "<") + ; (("1.0O3", "1.0O04"), "<") ] end) diff --git a/ocaml/xapi/rpm.ml b/ocaml/xapi/rpm.ml index dc0838b9ef..18d4fa627b 100644 --- a/ocaml/xapi/rpm.ml +++ b/ocaml/xapi/rpm.ml @@ -52,10 +52,24 @@ module Pkg = struct type order = LT | EQ | GT - type segment_of_version = Int of int | Str of string + type version_segment = Int of int | Str of string let string_of_order = function LT -> "<" | EQ -> "=" | GT -> ">" + let order_of_int = function 0 -> EQ | r when r > 0 -> GT | _ -> LT + + let version_segment_of_string s = + let is_all_number str = + let r = Re.Posix.compile_pat {|^[0-9]+$|} in + Re.execp r str + in + match s with + | _ when is_all_number s -> ( + try Int (int_of_string s) with _ -> Str s + ) + | _ -> + Str s + let error_msg = Printf.sprintf "Failed to parse '%s'" let parse_epoch_version_release epoch_ver_rel = @@ -157,9 +171,40 @@ module Pkg = struct | None, None -> EQ + let compare_version_segment s1 s2 = + match (s1, s2) with + | Int i1, Int i2 -> + Int.compare i1 i2 |> order_of_int + | Str s1, Str s2 -> + String.compare s1 s2 |> order_of_int + | Int _, Str _ -> + GT + | Str _, Int _ -> + LT + + let split_version_string s = + let r = Re.Posix.compile_pat {|([0-9]+|[a-zA-Z]+|~)|} in + let len = String.length s in + let rec aux acc pos = + if pos >= len then + List.rev acc + else + match Re.exec_opt ~pos r s with + | Some groups -> + let matched = Re.Group.get groups 0 in + let next_pos = Re.Group.stop groups 0 in + aux (matched :: acc) next_pos + | None -> + List.rev acc + in + aux [] 0 + + let normalize v = split_version_string v |> List.map version_segment_of_string + let compare_version_strings s1 s2 = (* Compare versions or releases of RPM packages - * I.E. for "libpath-utils-0.2.1-29.el7.x86_64" and "libpath-utils-0.2.1a-30.el7.x86_64", + * I.E. for "libpath-utils-0.2.1-29.el7.x86_64" and + * "libpath-utils-0.2.1a-30.el7.x86_64", * this function compares: * versions between "0.2.1" and "0.2.1a", or * releases between "29.el7" and "30.el7". @@ -180,50 +225,14 @@ module Pkg = struct * "1.0" ">" "1.xs2" * "1.0_xs" "=" "1.0.xs" *) - let normalize v = - let split_letters_and_numbers s = - let r = Re.Posix.compile_pat {|^([^0-9]+)([0-9]+)$|} in - match Re.exec_opt r s with - | Some groups -> - [Re.Group.get groups 1; Re.Group.get groups 2] - | None -> - [s] - in - let number = Re.Posix.compile_pat "^[0-9]+$" in - v - |> Astring.String.cuts ~sep:"." - |> List.concat_map (fun s -> Astring.String.cuts ~sep:"_" s) - |> List.concat_map (fun s -> split_letters_and_numbers s) - |> List.map (fun s -> - if Re.execp number s then - match int_of_string s with i -> Int i | exception _ -> Str s - else - Str s - ) - in let rec compare_segments l1 l2 = match (l1, l2) with | c1 :: t1, c2 :: t2 -> ( - match (c1, c2) with - | Int s1, Int s2 -> - if s1 > s2 then - GT - else if s1 = s2 then - compare_segments t1 t2 - else - LT - | Int _, Str _ -> - GT - | Str _, Int _ -> - LT - | Str s1, Str s2 -> - let r = String.compare s1 s2 in - if r < 0 then - LT - else if r > 0 then - GT - else - compare_segments t1 t2 + match compare_version_segment c1 c2 with + | EQ -> + compare_segments t1 t2 + | r -> + r ) | _ :: _, [] -> GT From 5609e49cad79a892350aff5fba7775cf672c953b Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 30 Oct 2024 15:15:26 +0000 Subject: [PATCH 15/30] Do not include xapi-clusterd.service in toolstack.target This service is not enabled and started by default, but used on-demand whenever it is needed for clustering. The current Wants= option in the toolstack.target is causing xapi-clusterd.service to be started by xe-toolstack-restart even if it is not enabled. The fix is to replace Wants= in toolstack.target with WantedBy= in xapi-clusterd.service, as the latter only installs the dependency when enabling the service. Signed-off-by: Rob Hoes --- scripts/toolstack.target | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/toolstack.target b/scripts/toolstack.target index c4019a4d23..c49701c285 100644 --- a/scripts/toolstack.target +++ b/scripts/toolstack.target @@ -20,7 +20,6 @@ Wants=xcp-networkd.service Wants=xenopsd-xc.service Wants=squeezed.service Wants=xapi-storage-script.service -Wants=xapi-clusterd.service Wants=varstored-guard.service [Install] From c0482fefd643494d7bfa8fd4702352fb17c1a46e Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Wed, 30 Oct 2024 17:28:18 +0000 Subject: [PATCH 16/30] CA-401324: Update pvsproxy socket location A previous commit changed the socket location to /run/pvsproxy but this is problematic because the pvsproxy daemon runs as a deprivileged user and cannot create the socket. Instead, update the path to a location that the daemon has permission to create. Add a fallback to the original path to cope with older pvsproxy daemons. This fallback can be removed in the future. Co-developed-by: Pau Ruiz Safont Signed-off-by: Ross Lagerwall --- ocaml/networkd/bin/network_server.ml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/ocaml/networkd/bin/network_server.ml b/ocaml/networkd/bin/network_server.ml index 289ef66593..b398ca93b8 100644 --- a/ocaml/networkd/bin/network_server.ml +++ b/ocaml/networkd/bin/network_server.ml @@ -1474,10 +1474,21 @@ end module PVS_proxy = struct open S.PVS_proxy - let path = ref "/run/pvsproxy" + let path = ref "" + + let depriv_path = "/run/pvsproxy-state/socket" + + let legacy_path = "/opt/citrix/pvsproxy/socket/pvsproxy" + + let default_path () = + if Sys.file_exists depriv_path then + depriv_path + else + legacy_path let do_call call = - try Jsonrpc_client.with_rpc ~path:!path ~call () + let p = match !path with "" -> default_path () | path -> path in + try Jsonrpc_client.with_rpc ~path:p ~call () with e -> error "Error when calling PVS proxy: %s" (Printexc.to_string e) ; raise (Network_error PVS_proxy_connection_error) From a6187d53b7ce8b57c3304daf7961511c90f6d107 Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Wed, 30 Oct 2024 19:34:09 +0800 Subject: [PATCH 17/30] CA-400560: Support tilde in RPM version/release comparison Tilde `~` used in RPM version stands for pre-release. So version with `~` is earlier than the same version without `~`. For example: 1.2.3~beta < 1.2.3 1.xs8 > 1.xs8~2_1 Signed-off-by: Changlei Li --- ocaml/tests/test_rpm.ml | 22 +++++++++++++++ ocaml/xapi/rpm.ml | 61 ++++++++++++++++++++--------------------- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/ocaml/tests/test_rpm.ml b/ocaml/tests/test_rpm.ml index 5b80215521..983d9b7398 100644 --- a/ocaml/tests/test_rpm.ml +++ b/ocaml/tests/test_rpm.ml @@ -130,6 +130,19 @@ module PkgOfFullnameTest = Generic.MakeStateless (struct } ) ) + ; ( Io.Line "libpath-utils-2:0.2.1~rc1-29.xs8~2_1.x86_64" + , Ok + (Some + Pkg. + { + name= "libpath-utils" + ; epoch= Some 2 + ; version= "0.2.1~rc1" + ; release= "29.xs8~2_1" + ; arch= "x86_64" + } + ) + ) ; (Io.Line "libpath-utils-:0.2.1-29.el7.x86_64", Ok None) ; (Io.Line "libpath-utils-2:0.2.1-29.el7x86_64", Ok None) ; (* all RPM packages installed by default *) @@ -171,6 +184,15 @@ module PkgCompareVersionStringsTest = Generic.MakeStateless (struct ; (("1.0_xs", "1.0.xs"), "=") ; (("1.0x3", "1.0x04"), "<") ; (("1.0O3", "1.0O04"), "<") + ; (("1.2.3", "1.2.3~rc1"), ">") + ; (("1.2.3~rc1", "1.2.3~rc2"), "<") + ; (("1.2.3~rc1", "1.2.3~rc1"), "=") + ; (("1.2.3~rc1", "1.2.3~rc1.1"), "<") + ; (("1.2.3~rc1.1", "1.2.3~rc1.2"), "<") + ; (("1.2.3~rc1.1", "1.2.3~rc1_1"), "=") + ; (("1.2.3.xs8", "1.2.3.xs8~2_1"), ">") + ; (("1.2.3.xs8~2_1", "1.2.3.xs8~2_1~beta"), ">") + ; (("1.2.3.xs8~", "1.2.3.xs8"), "<") ] end) diff --git a/ocaml/xapi/rpm.ml b/ocaml/xapi/rpm.ml index 18d4fa627b..c9823170ae 100644 --- a/ocaml/xapi/rpm.ml +++ b/ocaml/xapi/rpm.ml @@ -52,24 +52,12 @@ module Pkg = struct type order = LT | EQ | GT - type version_segment = Int of int | Str of string + type version_segment = Int of int | Str of string | Tilde let string_of_order = function LT -> "<" | EQ -> "=" | GT -> ">" let order_of_int = function 0 -> EQ | r when r > 0 -> GT | _ -> LT - let version_segment_of_string s = - let is_all_number str = - let r = Re.Posix.compile_pat {|^[0-9]+$|} in - Re.execp r str - in - match s with - | _ when is_all_number s -> ( - try Int (int_of_string s) with _ -> Str s - ) - | _ -> - Str s - let error_msg = Printf.sprintf "Failed to parse '%s'" let parse_epoch_version_release epoch_ver_rel = @@ -177,29 +165,30 @@ module Pkg = struct Int.compare i1 i2 |> order_of_int | Str s1, Str s2 -> String.compare s1 s2 |> order_of_int + | Tilde, Tilde -> + EQ | Int _, Str _ -> GT | Str _, Int _ -> LT + | Tilde, _ -> + LT + | _, Tilde -> + GT - let split_version_string s = - let r = Re.Posix.compile_pat {|([0-9]+|[a-zA-Z]+|~)|} in - let len = String.length s in - let rec aux acc pos = - if pos >= len then - List.rev acc - else - match Re.exec_opt ~pos r s with - | Some groups -> - let matched = Re.Group.get groups 0 in - let next_pos = Re.Group.stop groups 0 in - aux (matched :: acc) next_pos - | None -> - List.rev acc - in - aux [] 0 + let split_version_string = + let r = Re.Posix.compile_pat {|[a-zA-Z]+|[0-9]+|~|} in + fun s -> s |> Re.all r |> List.map (fun g -> Re.Group.get g 0) - let normalize v = split_version_string v |> List.map version_segment_of_string + let normalize v = + let version_segment_of_string = function + | "~" -> + Tilde + | s -> ( + try Int (int_of_string s) with _ -> Str s + ) + in + v |> split_version_string |> List.map version_segment_of_string let compare_version_strings s1 s2 = (* Compare versions or releases of RPM packages @@ -218,12 +207,18 @@ module Pkg = struct * "1.0" ">" "1.a" * "2.50" ">" "2.5" * "XS3" "<" "xs2" - * "1.2.3" ">" "1.2.3a" + * "1.2.3" "<" "1.2.3a" * "xs4" "=" "xs.4" * "2a" "<" "2.0" * "2a" "<" "2b" * "1.0" ">" "1.xs2" * "1.0_xs" "=" "1.0.xs" + * "1.xs8" ">" "1.xs8~2_1" + * "1.2.3" ">" "1.2.3~beta" + * Some corner cases that don't follow standard RPM versioning conventions + * with tilde: + * "1.2.3~rc1~beta" "<" "1.2.3~rc1" + * "1.2.3~" "<" "1.2.3" *) let rec compare_segments l1 l2 = match (l1, l2) with @@ -234,6 +229,10 @@ module Pkg = struct | r -> r ) + | Tilde :: _, [] -> + LT + | [], Tilde :: _ -> + GT | _ :: _, [] -> GT | [], _ :: _ -> From e376e96fb53565221bccb97dca50887a08777394 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Mon, 4 Nov 2024 09:36:31 +0000 Subject: [PATCH 18/30] CA-401404: Only check previous active service status `systemctl list-dependencies --plain --no-pager` list uncessary xapi-clusterd service when xapi-clusterd-shutdown is started. Here instead of checking the status of all dependencies, we only check the status of previous enabled dependencies. This also complies with the behavior before toolstack.target Signed-off-by: Lin Liu --- scripts/xe-toolstack-restart | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/xe-toolstack-restart b/scripts/xe-toolstack-restart index d377ae7acb..55e82e8f3d 100755 --- a/scripts/xe-toolstack-restart +++ b/scripts/xe-toolstack-restart @@ -43,7 +43,11 @@ set -e systemctl restart $MPATHALERT toolstack.target # Check the status of toolstack services -for service in $(systemctl list-dependencies --plain --no-pager toolstack.target); do +for service in $(systemctl list-dependencies --plain --no-pager toolstack.target) $MPATHALERT; do + + # Skip check if the service is not enabled + systemctl is-enabled "$service" >/dev/null 2>&1 || continue + # During system bootup, xcp-rrdd-dcmi.service often fail as # `ipmitool dcmi discover` discover nothing, just ignore it for now if [ "$service" == "xcp-rrdd-dcmi.service" ]; then From 0149ee5c2433b8995d8b75f28c2037c9da4cba80 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 29 Oct 2024 15:28:13 +0000 Subject: [PATCH 19/30] CA-401242: avoid long-running, idle connections on VDI.pool_migrate When a VDI.pool_migrate starts at a pool member, a connection between the coordinator and that host remains open for the duration of the migration. This connection is completely idle. If the migration lasts for more than 12 hours, stunnel closes the connection due to inactivity, which cancels the migration. To avoid this use an internal API that uses short-running connection whenever possible to avoid interrupting the migration. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/message_forwarding.ml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 17ff3de026..cb0b82aa7f 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -5501,14 +5501,22 @@ functor in (snapshot, host) in + let op session_id rpc = + let sync_op () = + Client.VDI.pool_migrate ~rpc ~session_id ~vdi ~sr ~options + in + let async_op () = + Client.InternalAsync.VDI.pool_migrate ~rpc ~session_id ~vdi ~sr + ~options + in + Helpers.try_internal_async ~__context API.ref_VDI_of_rpc async_op + sync_op + in VM.reserve_memory_for_vm ~__context ~vm ~host ~snapshot ~host_op:`vm_migrate (fun () -> with_sr_andor_vdi ~__context ~vdi:(vdi, `mirror) ~doc:"VDI.mirror" (fun () -> - do_op_on ~local_fn ~__context ~host (fun session_id rpc -> - Client.VDI.pool_migrate ~rpc ~session_id ~vdi ~sr - ~options - ) + do_op_on ~local_fn ~__context ~host op ) ) ) From 3ca39ecb584662320c8185dc84af7649fe2b6fb3 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 30 Oct 2024 14:31:53 +0000 Subject: [PATCH 20/30] xapi_vdi: replaces nested if-elses with monadic Result This allows to reduce most of the indentation in check_operation_error, which returns searches for a single error and returns it. Signed-off-by: Pau Ruiz Safont --- ocaml/tests/test_vdi_allowed_operations.ml | 79 +-- ocaml/xapi/xapi_vdi.ml | 782 ++++++++++----------- ocaml/xapi/xapi_vdi.mli | 2 +- 3 files changed, 422 insertions(+), 441 deletions(-) diff --git a/ocaml/tests/test_vdi_allowed_operations.ml b/ocaml/tests/test_vdi_allowed_operations.ml index 579cf7331c..877b4fa48e 100644 --- a/ocaml/tests/test_vdi_allowed_operations.ml +++ b/ocaml/tests/test_vdi_allowed_operations.ml @@ -30,9 +30,8 @@ let setup_test ~__context ?sm_fun ?vdi_fun () = (vdi_ref, vdi_record) let check_same_error_code = - let open Alcotest in - let open Alcotest_comparators in - check (option error_code) "Same error code" + Alcotest.(check @@ result unit Alcotest_comparators.error_code) + "Same error code" let run_assert_equal_with_vdi ~__context ?(ha_enabled = false) ?sm_fun ?vdi_fun op exc = @@ -52,7 +51,7 @@ let test_ca98944 () = () ) `update - (Some (Api_errors.vdi_in_use, [])) ; + (Error (Api_errors.vdi_in_use, [])) ; (* Should raise vdi_in_use *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> @@ -61,7 +60,7 @@ let test_ca98944 () = () ) `update - (Some (Api_errors.vdi_in_use, [])) ; + (Error (Api_errors.vdi_in_use, [])) ; (* Should raise vdi_in_use *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> @@ -70,7 +69,7 @@ let test_ca98944 () = () ) `update - (Some (Api_errors.vdi_in_use, [])) ; + (Error (Api_errors.vdi_in_use, [])) ; (* Should raise other_operation_in_progress *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> @@ -79,14 +78,14 @@ let test_ca98944 () = () ) `update - (Some (Api_errors.other_operation_in_progress, [])) ; + (Error (Api_errors.other_operation_in_progress, [])) ; (* Should pass *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> make_vbd ~vDI:vdi_ref ~__context ~reserved:false ~currently_attached:false ~current_operations:[] () ) - `forget None + `forget (Ok ()) (* VDI.copy should be allowed if all attached VBDs are read-only. *) let test_ca101669 () = @@ -97,15 +96,15 @@ let test_ca101669 () = make_vbd ~__context ~vDI:vdi_ref ~currently_attached:true ~mode:`RW () ) `copy - (Some (Api_errors.vdi_in_use, [])) ; + (Error (Api_errors.vdi_in_use, [])) ; (* Attempting to copy a RO-attached VDI should pass. *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> make_vbd ~__context ~vDI:vdi_ref ~currently_attached:true ~mode:`RO () ) - `copy None ; + `copy (Ok ()) ; (* Attempting to copy an unattached VDI should pass. *) - run_assert_equal_with_vdi ~__context `copy None ; + run_assert_equal_with_vdi ~__context `copy (Ok ()) ; (* Attempting to copy RW- and RO-attached VDIs should fail with VDI_IN_USE. *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> @@ -115,7 +114,7 @@ let test_ca101669 () = make_vbd ~__context ~vDI:vdi_ref ~currently_attached:true ~mode:`RO () ) `copy - (Some (Api_errors.vdi_in_use, [])) + (Error (Api_errors.vdi_in_use, [])) let test_ca125187 () = let __context = Test_common.make_test_database () in @@ -128,7 +127,7 @@ let test_ca125187 () = Db.VDI.set_current_operations ~__context ~self:vdi_ref ~value:[("mytask", `copy)] ) - `copy None ; + `copy (Ok ()) ; (* A VBD can be plugged to a VDI which is being copied. This is required as * the VBD is plugged after the VDI is marked with the copy operation. *) let _, _ = @@ -162,7 +161,7 @@ let test_ca126097 () = Db.VDI.set_current_operations ~__context ~self:vdi_ref ~value:[("mytask", `copy)] ) - `clone None ; + `clone (Ok ()) ; (* Attempting to snapshot a VDI being copied should be allowed. *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> @@ -173,7 +172,7 @@ let test_ca126097 () = ~value:[("mytask", `copy)] ) `snapshot - (Some (Api_errors.operation_not_allowed, [])) + (Error (Api_errors.operation_not_allowed, [])) (** Tests for the checks related to changed block tracking *) let test_cbt = @@ -189,7 +188,7 @@ let test_cbt = Db.SM.remove_from_features ~__context ~self:sm ~key:"VDI_CONFIG_CBT" ) op - (Some (Api_errors.sr_operation_not_supported, [])) + (Error (Api_errors.sr_operation_not_supported, [])) in let test_sm_feature_check = for_vdi_operations all_cbt_operations test_sm_feature_check @@ -202,7 +201,7 @@ let test_cbt = Db.VDI.set_is_a_snapshot ~__context ~self:vdi ~value:true ) op - (Some (Api_errors.operation_not_allowed, [])) + (Error (Api_errors.operation_not_allowed, [])) ) in let test_cbt_enable_disable_vdi_type_check = @@ -213,21 +212,21 @@ let test_cbt = Db.VDI.set_type ~__context ~self:vdi ~value:`metadata ) op - (Some (Api_errors.vdi_incompatible_type, [])) ; + (Error (Api_errors.vdi_incompatible_type, [])) ; run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi -> Db.VDI.set_type ~__context ~self:vdi ~value:`redo_log ) op - (Some (Api_errors.vdi_incompatible_type, [])) ; + (Error (Api_errors.vdi_incompatible_type, [])) ; run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi -> Db.VDI.set_type ~__context ~self:vdi ~value:`user) - op None ; + op (Ok ()) ; run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi -> Db.VDI.set_type ~__context ~self:vdi ~value:`system ) - op None + op (Ok ()) ) in let test_cbt_enable_disable_not_allowed_for_reset_on_boot = @@ -238,7 +237,7 @@ let test_cbt = Db.VDI.set_on_boot ~__context ~self:vdi ~value:`reset ) op - (Some (Api_errors.vdi_on_boot_mode_incompatible_with_operation, [])) + (Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, [])) ) in let test_cbt_enable_disable_can_be_performed_live = @@ -249,7 +248,7 @@ let test_cbt = Test_common.make_vbd ~__context ~vDI:vdi ~currently_attached:true ~mode:`RW () ) - op None + op (Ok ()) ) in let test_cbt_metadata_vdi_type_check = @@ -273,7 +272,7 @@ let test_cbt = Db.VDI.set_type ~__context ~self:vdi ~value:`cbt_metadata ) op - (Some (Api_errors.vdi_incompatible_type, [])) + (Error (Api_errors.vdi_incompatible_type, [])) ) in let test_vdi_cbt_enabled_check = @@ -288,7 +287,7 @@ let test_cbt = Db.VDI.set_cbt_enabled ~__context ~self:vdi ~value:true ) op - (Some (Api_errors.vdi_cbt_enabled, [])) + (Error (Api_errors.vdi_cbt_enabled, [])) ) in let test_vdi_data_destroy () = @@ -308,31 +307,31 @@ let test_cbt = ) (* ensure VDI.data_destroy works before introducing errors *) [ - ((fun vdi -> pass_data_destroy vdi), None) + ((fun vdi -> pass_data_destroy vdi), Ok ()) ; ( (fun vdi -> pass_data_destroy vdi ; Db.VDI.set_is_a_snapshot ~__context ~self:vdi ~value:false ) - , Some (Api_errors.operation_not_allowed, []) + , Error (Api_errors.operation_not_allowed, []) ) ; ( (fun vdi -> pass_data_destroy vdi ; let sr = Db.VDI.get_SR ~__context ~self:vdi in Db.SR.set_is_tools_sr ~__context ~self:sr ~value:true ) - , Some (Api_errors.sr_operation_not_supported, []) + , Error (Api_errors.sr_operation_not_supported, []) ) ; ( (fun vdi -> pass_data_destroy vdi ; Db.VDI.set_cbt_enabled ~__context ~self:vdi ~value:false ) - , Some (Api_errors.vdi_no_cbt_metadata, []) + , Error (Api_errors.vdi_no_cbt_metadata, []) ) ; ( (fun vdi -> pass_data_destroy vdi ; Db.VDI.set_type ~__context ~self:vdi ~value:`cbt_metadata ) - , None + , Ok () ) ; (* VDI.data_destroy should wait a bit for the VDIs to be unplugged and destroyed, instead of failing immediately in check_operation_error, @@ -346,7 +345,7 @@ let test_cbt = in pass_data_destroy vdi ) - , None + , Ok () ) ; ( (fun vdi -> (* Set up the fields corresponding to a VM snapshot *) @@ -359,7 +358,7 @@ let test_cbt = in pass_data_destroy vdi ) - , None + , Ok () ) ; ( (fun vdi -> let vM = Test_common.make_vm ~__context () in @@ -369,7 +368,7 @@ let test_cbt = in pass_data_destroy vdi ) - , None + , Ok () ) ] in @@ -389,7 +388,7 @@ let test_cbt = Db.VDI.set_cbt_enabled ~__context ~self:vDI ~value:true ; Db.VDI.set_is_a_snapshot ~__context ~self:vDI ~value:true ) - , None + , Ok () ) in List.iter @@ -407,17 +406,17 @@ let test_cbt = in () ) - , Some (Api_errors.vdi_in_use, []) + , Error (Api_errors.vdi_in_use, []) ) ; (* positive test checks no errors thrown for cbt_metadata or cbt_enabled VDIs *) ( (fun vDI -> Db.VDI.set_cbt_enabled ~__context ~self:vDI ~value:true ; Db.VDI.set_type ~__context ~self:vDI ~value:`cbt_metadata ) - , None + , Ok () ) ; ( (fun vDI -> Db.VDI.set_cbt_enabled ~__context ~self:vDI ~value:true) - , None + , Ok () ) ; test_cbt_enabled_snapshot_vdi_linked_to_vm_snapshot ~vbd_currently_attached:false @@ -467,14 +466,14 @@ let test_operations_restricted_during_rpu = Db.SM.set_features ~__context ~self:sm ~value:[("VDI_MIRROR", 1L)] ) `mirror - (Some (Api_errors.not_supported_during_upgrade, [])) ; + (Error (Api_errors.not_supported_during_upgrade, [])) ; Db.Pool.remove_from_other_config ~__context ~self:pool ~key:Xapi_globs.rolling_upgrade_in_progress ; run_assert_equal_with_vdi ~__context ~sm_fun:(fun sm -> Db.SM.set_features ~__context ~self:sm ~value:[("VDI_MIRROR", 1L)] ) - `mirror None + `mirror (Ok ()) in let test_update_allowed_operations () = let __context = Mock.make_context_with_new_db "Mock context" in @@ -523,7 +522,7 @@ let test_null_vm = () in (* This shouldn't throw an exception *) - let (_ : _ option) = + let (_ : _ result) = Xapi_vdi.check_operation_error ~__context false vdi_record vdi_ref op in () diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index ab8c543a36..a2978de0b7 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -22,49 +22,49 @@ open D (**************************************************************************************) (* current/allowed operations checking *) +let feature_of_op = + let open Smint in + function + | `forget | `copy | `force_unlock | `blocked -> + None + | `snapshot -> + Some Vdi_snapshot + | `destroy -> + Some Vdi_delete + | `resize -> + Some Vdi_resize + | `update -> + Some Vdi_update + | `resize_online -> + Some Vdi_resize_online + | `generate_config -> + Some Vdi_generate_config + | `clone -> + Some Vdi_clone + | `mirror -> + Some Vdi_mirror + | `enable_cbt | `disable_cbt | `data_destroy | `list_changed_blocks -> + Some Vdi_configure_cbt + | `set_on_boot -> + Some Vdi_reset_on_boot + let check_sm_feature_error (op : API.vdi_operations) sm_features sr = - let required_sm_feature = - Smint.( - match op with - | `forget | `copy | `force_unlock | `blocked -> - None - | `snapshot -> - Some Vdi_snapshot - | `destroy -> - Some Vdi_delete - | `resize -> - Some Vdi_resize - | `update -> - Some Vdi_update - | `resize_online -> - Some Vdi_resize_online - | `generate_config -> - Some Vdi_generate_config - | `clone -> - Some Vdi_clone - | `mirror -> - Some Vdi_mirror - | `enable_cbt | `disable_cbt | `data_destroy | `list_changed_blocks -> - Some Vdi_configure_cbt - | `set_on_boot -> - Some Vdi_reset_on_boot - ) - in - match required_sm_feature with + match feature_of_op op with | None -> - None + Ok () | Some feature -> if Smint.(has_capability feature sm_features) then - None + Ok () else - Some (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) + Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) -(** Checks to see if an operation is valid in this state. Returns [Some exception] - if not and [None] if everything is ok. If the [vbd_records] parameter is +(** Checks to see if an operation is valid in this state. Returns [Error exception] + if not and [Ok ()] if everything is ok. If the [vbd_records] parameter is specified, it should contain at least all the VBD records from the database that are linked to this VDI. *) let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) ?vbd_records ha_enabled record _ref' op = + let ( let* ) = Result.bind in let _ref = Ref.string_of _ref' in let current_ops = record.Db_actions.vDI_current_operations in let reset_on_boot = record.Db_actions.vDI_on_boot = `reset in @@ -83,14 +83,18 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) 5. HA prevents you from deleting statefiles or metadata volumes 6. During rolling pool upgrade, only operations known by older releases are allowed *) - if - Helpers.rolling_upgrade_in_progress ~__context - && not (List.mem op Xapi_globs.rpu_allowed_vdi_operations) - then - Some (Api_errors.not_supported_during_upgrade, []) - else - (* Don't fail with other_operation_in_progress if VDI mirroring is in progress - * and destroy is called as part of VDI mirroring *) + let* () = + if + Helpers.rolling_upgrade_in_progress ~__context + && not (List.mem op Xapi_globs.rpu_allowed_vdi_operations) + then + Error (Api_errors.not_supported_during_upgrade, []) + else + Ok () + in + let* () = + (* Don't fail with other_operation_in_progress if VDI mirroring is in + progress and destroy is called as part of VDI mirroring *) let is_vdi_mirroring_in_progress = List.exists (fun (_, op) -> op = `mirror) current_ops && op = `destroy in @@ -98,373 +102,351 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) List.exists (fun (_, op) -> op <> `copy) current_ops && not is_vdi_mirroring_in_progress then - Some (Api_errors.other_operation_in_progress, ["VDI"; _ref]) - else (* check to see whether it's a local cd drive *) - let sr = record.Db_actions.vDI_SR in - let sr_type = Db.SR.get_type ~__context ~self:sr in - let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in - (* Check to see if any PBDs are attached *) - let open Xapi_database.Db_filter_types in - let pbds_attached = - match pbd_records with - | [] -> - Db.PBD.get_records_where ~__context - ~expr: - (And - ( Eq (Field "SR", Literal (Ref.string_of sr)) - , Eq (Field "currently_attached", Literal "true") - ) - ) - | _ -> - List.filter - (fun (_, pbd_record) -> - pbd_record.API.pBD_SR = sr - && pbd_record.API.pBD_currently_attached - ) - pbd_records - in - if pbds_attached = [] && List.mem op [`resize] then - Some (Api_errors.sr_no_pbds, [Ref.string_of sr]) - else - (* check to see whether VBDs exist which are using this VDI *) - - (* Only a 'live' operation can be performed if there are active (even RO) devices *) - let my_active_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Or - ( Eq (Field "currently_attached", Literal "true") - , Eq (Field "reserved", Literal "true") - ) - ) - ) - ) - | Some records -> - List.map snd - (List.filter - (fun (_, vbd_record) -> - vbd_record.Db_actions.vBD_VDI = _ref' - && (vbd_record.Db_actions.vBD_currently_attached - || vbd_record.Db_actions.vBD_reserved - ) - ) - records - ) - in - let my_active_rw_vbd_records = - List.filter - (fun vbd -> vbd.Db_actions.vBD_mode = `RW) - my_active_vbd_records - in - (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) - let my_has_current_operation_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Not (Eq (Field "current_operations", Literal "()")) - ) - ) - ) - | Some records -> - List.map snd - (List.filter - (fun (_, vbd_record) -> - vbd_record.Db_actions.vBD_VDI = _ref' - && vbd_record.Db_actions.vBD_current_operations <> [] - ) - records - ) - in - (* If the VBD is currently_attached then some operations can still be performed ie: - VDI.clone (if the VM is suspended we have to have the 'allow_clone_suspended_vm'' flag) - VDI.snapshot; VDI.resize_online; 'blocked' (CP-831) - VDI.data_destroy: it is not allowed on VDIs linked to a VM, but the - implementation first waits for the VDI's VBDs to be unplugged and - destroyed, and the checks are performed there. - *) - let operation_can_be_performed_live = - match op with - | `snapshot - | `resize_online - | `blocked - | `clone - | `mirror - | `enable_cbt - | `disable_cbt - | `data_destroy -> - true - | _ -> - false - in - let operation_can_be_performed_with_ro_attach = - operation_can_be_performed_live - || match op with `copy -> true | _ -> false - in - (* NB RO vs RW sharing checks are done in xapi_vbd.ml *) - let blocked_by_attach = - let blocked_by_attach = - if operation_can_be_performed_live then - false - else if operation_can_be_performed_with_ro_attach then - my_active_rw_vbd_records <> [] - else - my_active_vbd_records <> [] - in - let allow_attached_vbds = - (* We use Valid_ref_list.list to ignore exceptions due to invalid references that - could propagate to the message forwarding layer, which calls this - function to check for errors - these exceptions would prevent the - actual XenAPI function from being run. Checks called from the - message forwarding layer should not fail with an exception. *) - let true_for_all_active_vbds f = - Valid_ref_list.for_all f my_active_vbd_records - in - match op with - | `list_changed_blocks -> - let vbd_connected_to_vm_snapshot vbd = - let vm = vbd.Db_actions.vBD_VM in - Db.is_valid_ref __context vm - && Db.VM.get_is_a_snapshot ~__context ~self:vm - in - (* We allow list_changed_blocks on VDIs attached to snapshot VMs, - because VM.checkpoint may set the currently_attached fields of the - snapshot's VBDs to true, and this would block list_changed_blocks. *) - true_for_all_active_vbds vbd_connected_to_vm_snapshot - | _ -> - false - in - blocked_by_attach && not allow_attached_vbds - in - if blocked_by_attach then - Some - ( Api_errors.vdi_in_use - , [_ref; Record_util.vdi_operations_to_string op] + Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) + else + Ok () + in + (* check to see whether it's a local cd drive *) + let sr = record.Db_actions.vDI_SR in + let sr_type = Db.SR.get_type ~__context ~self:sr in + let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in + (* Check to see if any PBDs are attached *) + let open Xapi_database.Db_filter_types in + let pbds_attached = + match pbd_records with + | [] -> + Db.PBD.get_records_where ~__context + ~expr: + (And + ( Eq (Field "SR", Literal (Ref.string_of sr)) + , Eq (Field "currently_attached", Literal "true") + ) ) - else if - (* data_destroy first waits for all the VBDs to disappear in its - implementation, so it is harmless to allow it when any of the VDI's - VBDs have operations in progress. This ensures that we avoid the retry - mechanism of message forwarding and only use the event loop. *) - my_has_current_operation_vbd_records <> [] && op <> `data_destroy - then - Some (Api_errors.other_operation_in_progress, ["VDI"; _ref]) - else - let sm_features = - Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type - in - let sm_feature_error = check_sm_feature_error op sm_features sr in - if sm_feature_error <> None then - sm_feature_error - else - let allowed_for_cbt_metadata_vdi = - match op with - | `clone - | `copy - | `disable_cbt - | `enable_cbt - | `mirror - | `resize - | `resize_online - | `snapshot - | `set_on_boot -> - false - | `blocked - | `data_destroy - | `destroy - | `list_changed_blocks - | `force_unlock - | `forget - | `generate_config - | `update -> - true - in - if - (not allowed_for_cbt_metadata_vdi) - && record.Db_actions.vDI_type = `cbt_metadata - then - Some - ( Api_errors.vdi_incompatible_type - , [_ref; Record_util.vdi_type_to_string `cbt_metadata] - ) - else - let allowed_when_cbt_enabled = - match op with - | `mirror | `set_on_boot -> - false - | `blocked - | `clone - | `copy - | `data_destroy - | `destroy - | `disable_cbt - | `enable_cbt - | `list_changed_blocks - | `force_unlock - | `forget - | `generate_config - | `resize - | `resize_online - | `snapshot - | `update -> - true - in - if - (not allowed_when_cbt_enabled) - && record.Db_actions.vDI_cbt_enabled - then - Some (Api_errors.vdi_cbt_enabled, [_ref]) - else - let check_destroy () = - if sr_type = "udev" then - Some (Api_errors.vdi_is_a_physical_device, [_ref]) - else if is_tools_sr then - Some - (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) - else if List.mem record.Db_actions.vDI_type [`rrd] then - Some (Api_errors.vdi_has_rrds, [_ref]) - else if - ha_enabled - && List.mem record.Db_actions.vDI_type - [`ha_statefile; `redo_log] - then - Some (Api_errors.ha_is_enabled, []) - else if - List.mem record.Db_actions.vDI_type - [`ha_statefile; `metadata] - && Xapi_pool_helpers.ha_enable_in_progress ~__context - then - Some (Api_errors.ha_enable_in_progress, []) - else if - List.mem record.Db_actions.vDI_type - [`ha_statefile; `metadata] - && Xapi_pool_helpers.ha_disable_in_progress ~__context - then - Some (Api_errors.ha_disable_in_progress, []) - else - None - in - match op with - | `forget -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type - [`ha_statefile; `redo_log] - then - Some (Api_errors.ha_is_enabled, []) - else if List.mem record.Db_actions.vDI_type [`rrd] then - Some (Api_errors.vdi_has_rrds, [_ref]) - else - None - | `destroy -> - check_destroy () - | `data_destroy -> - if not record.Db_actions.vDI_is_a_snapshot then - Some - ( Api_errors.operation_not_allowed - , ["VDI is not a snapshot: " ^ _ref] - ) - else if not record.Db_actions.vDI_cbt_enabled then - Some (Api_errors.vdi_no_cbt_metadata, [_ref]) - else - check_destroy () - | `resize -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type - [`ha_statefile; `redo_log] - then - Some (Api_errors.ha_is_enabled, []) - else - None - | `resize_online -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type - [`ha_statefile; `redo_log] - then - Some (Api_errors.ha_is_enabled, []) - else - None - | `snapshot when record.Db_actions.vDI_sharable -> - Some (Api_errors.vdi_is_sharable, [_ref]) - | (`snapshot | `copy) when reset_on_boot -> - Some - ( Api_errors.vdi_on_boot_mode_incompatible_with_operation - , [] + | _ -> + List.filter + (fun (_, pbd_record) -> + pbd_record.API.pBD_SR = sr && pbd_record.API.pBD_currently_attached + ) + pbd_records + in + let* () = + if pbds_attached = [] && List.mem op [`resize] then + Error (Api_errors.sr_no_pbds, [Ref.string_of sr]) + else + Ok () + in + + (* check to see whether VBDs exist which are using this VDI *) + + (* Only a 'live' operation can be performed if there are active (even RO) devices *) + let my_active_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Or + ( Eq (Field "currently_attached", Literal "true") + , Eq (Field "reserved", Literal "true") ) - | `snapshot -> - if List.exists (fun (_, op) -> op = `copy) current_ops then - Some - ( Api_errors.operation_not_allowed - , ["Snapshot operation not allowed during copy."] - ) - else - None - | `copy -> - if - List.mem record.Db_actions.vDI_type - [`ha_statefile; `redo_log] - then - Some - ( Api_errors.operation_not_allowed - , [ - "VDI containing HA statefile or redo log cannot be \ - copied (check the VDI's allowed operations)." - ] - ) - else - None - | `enable_cbt | `disable_cbt -> - if record.Db_actions.vDI_is_a_snapshot then - Some - ( Api_errors.operation_not_allowed - , ["VDI is a snapshot: " ^ _ref] - ) - else if - not (List.mem record.Db_actions.vDI_type [`user; `system]) - then - Some - ( Api_errors.vdi_incompatible_type - , [ - _ref - ; Record_util.vdi_type_to_string - record.Db_actions.vDI_type - ] - ) - else if reset_on_boot then - Some - ( Api_errors.vdi_on_boot_mode_incompatible_with_operation - , [] - ) - else - None - | `mirror - | `clone - | `generate_config - | `force_unlock - | `set_on_boot - | `list_changed_blocks - | `blocked - | `update -> - None + ) + ) + ) + | Some records -> + List.map snd + (List.filter + (fun (_, vbd_record) -> + vbd_record.Db_actions.vBD_VDI = _ref' + && (vbd_record.Db_actions.vBD_currently_attached + || vbd_record.Db_actions.vBD_reserved + ) + ) + records + ) + in + let my_active_rw_vbd_records = + List.filter (fun vbd -> vbd.Db_actions.vBD_mode = `RW) my_active_vbd_records + in + (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) + let my_has_current_operation_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Not (Eq (Field "current_operations", Literal "()")) + ) + ) + ) + | Some records -> + List.map snd + (List.filter + (fun (_, vbd_record) -> + vbd_record.Db_actions.vBD_VDI = _ref' + && vbd_record.Db_actions.vBD_current_operations <> [] + ) + records + ) + in + (* If the VBD is currently_attached then some operations can still be + performed ie: VDI.clone (if the VM is suspended we have to have the + 'allow_clone_suspended_vm' flag); VDI.snapshot; VDI.resize_online; + 'blocked' (CP-831); VDI.data_destroy: it is not allowed on VDIs linked + to a VM, but the implementation first waits for the VDI's VBDs to be + unplugged and destroyed, and the checks are performed there. + *) + let operation_can_be_performed_live = + match op with + | `snapshot + | `resize_online + | `blocked + | `clone + | `mirror + | `enable_cbt + | `disable_cbt + | `data_destroy -> + true + | _ -> + false + in + let operation_can_be_performed_with_ro_attach = + operation_can_be_performed_live + || match op with `copy -> true | _ -> false + in + (* NB RO vs RW sharing checks are done in xapi_vbd.ml *) + let blocked_by_attach = + let blocked_by_attach = + if operation_can_be_performed_live then + false + else if operation_can_be_performed_with_ro_attach then + my_active_rw_vbd_records <> [] + else + my_active_vbd_records <> [] + in + let allow_attached_vbds = + (* We use Valid_ref_list.list to ignore exceptions due to invalid + references that could propagate to the message forwarding layer, which + calls this function to check for errors - these exceptions would + prevent the actual XenAPI function from being run. Checks called from + the message forwarding layer should not fail with an exception. *) + let true_for_all_active_vbds f = + Valid_ref_list.for_all f my_active_vbd_records + in + match op with + | `list_changed_blocks -> + let vbd_connected_to_vm_snapshot vbd = + let vm = vbd.Db_actions.vBD_VM in + Db.is_valid_ref __context vm + && Db.VM.get_is_a_snapshot ~__context ~self:vm + in + (* We allow list_changed_blocks on VDIs attached to snapshot VMs, + because VM.checkpoint may set the currently_attached fields of the + snapshot's VBDs to true, and this would block list_changed_blocks. *) + true_for_all_active_vbds vbd_connected_to_vm_snapshot + | _ -> + false + in + blocked_by_attach && not allow_attached_vbds + in + let* () = + if blocked_by_attach then + Error + (Api_errors.vdi_in_use, [_ref; Record_util.vdi_operations_to_string op]) + else if + (* data_destroy first waits for all the VBDs to disappear in its + implementation, so it is harmless to allow it when any of the VDI's + VBDs have operations in progress. This ensures that we avoid the retry + mechanism of message forwarding and only use the event loop. *) + my_has_current_operation_vbd_records <> [] && op <> `data_destroy + then + Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) + else + Ok () + in + let sm_features = + Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type + in + let* () = check_sm_feature_error op sm_features sr in + let allowed_for_cbt_metadata_vdi = + match op with + | `clone + | `copy + | `disable_cbt + | `enable_cbt + | `mirror + | `resize + | `resize_online + | `snapshot + | `set_on_boot -> + false + | `blocked + | `data_destroy + | `destroy + | `list_changed_blocks + | `force_unlock + | `forget + | `generate_config + | `update -> + true + in + let* () = + if + (not allowed_for_cbt_metadata_vdi) + && record.Db_actions.vDI_type = `cbt_metadata + then + Error + ( Api_errors.vdi_incompatible_type + , [_ref; Record_util.vdi_type_to_string `cbt_metadata] + ) + else + Ok () + in + let allowed_when_cbt_enabled = + match op with + | `mirror | `set_on_boot -> + false + | `blocked + | `clone + | `copy + | `data_destroy + | `destroy + | `disable_cbt + | `enable_cbt + | `list_changed_blocks + | `force_unlock + | `forget + | `generate_config + | `resize + | `resize_online + | `snapshot + | `update -> + true + in + let* () = + if (not allowed_when_cbt_enabled) && record.Db_actions.vDI_cbt_enabled then + Error (Api_errors.vdi_cbt_enabled, [_ref]) + else + Ok () + in + let check_destroy () = + if sr_type = "udev" then + Error (Api_errors.vdi_is_a_physical_device, [_ref]) + else if is_tools_sr then + Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) + else if List.mem record.Db_actions.vDI_type [`rrd] then + Error (Api_errors.vdi_has_rrds, [_ref]) + else if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else if + List.mem record.Db_actions.vDI_type [`ha_statefile; `metadata] + && Xapi_pool_helpers.ha_enable_in_progress ~__context + then + Error (Api_errors.ha_enable_in_progress, []) + else if + List.mem record.Db_actions.vDI_type [`ha_statefile; `metadata] + && Xapi_pool_helpers.ha_disable_in_progress ~__context + then + Error (Api_errors.ha_disable_in_progress, []) + else + Ok () + in + match op with + | `forget -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else if List.mem record.Db_actions.vDI_type [`rrd] then + Error (Api_errors.vdi_has_rrds, [_ref]) + else + Ok () + | `destroy -> + check_destroy () + | `data_destroy -> + if not record.Db_actions.vDI_is_a_snapshot then + Error + (Api_errors.operation_not_allowed, ["VDI is not a snapshot: " ^ _ref]) + else if not record.Db_actions.vDI_cbt_enabled then + Error (Api_errors.vdi_no_cbt_metadata, [_ref]) + else + check_destroy () + | `resize -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else + Ok () + | `resize_online -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else + Ok () + | `snapshot when record.Db_actions.vDI_sharable -> + Error (Api_errors.vdi_is_sharable, [_ref]) + | (`snapshot | `copy) when reset_on_boot -> + Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) + | `snapshot -> + if List.exists (fun (_, op) -> op = `copy) current_ops then + Error + ( Api_errors.operation_not_allowed + , ["Snapshot operation not allowed during copy."] + ) + else + Ok () + | `copy -> + if List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] then + Error + ( Api_errors.operation_not_allowed + , [ + "VDI containing HA statefile or redo log cannot be copied (check \ + the VDI's allowed operations)." + ] + ) + else + Ok () + | `enable_cbt | `disable_cbt -> + if record.Db_actions.vDI_is_a_snapshot then + Error (Api_errors.operation_not_allowed, ["VDI is a snapshot: " ^ _ref]) + else if not (List.mem record.Db_actions.vDI_type [`user; `system]) then + Error + ( Api_errors.vdi_incompatible_type + , [_ref; Record_util.vdi_type_to_string record.Db_actions.vDI_type] + ) + else if reset_on_boot then + Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) + else + Ok () + | `mirror + | `clone + | `generate_config + | `force_unlock + | `set_on_boot + | `list_changed_blocks + | `blocked + | `update -> + Ok () let assert_operation_valid ~__context ~self ~(op : API.vdi_operations) = let pool = Helpers.get_pool ~__context in let ha_enabled = Db.Pool.get_ha_enabled ~__context ~self:pool in let all = Db.VDI.get_record_internal ~__context ~self in match check_operation_error ~__context ha_enabled all self op with - | None -> + | Ok () -> () - | Some (a, b) -> + | Error (a, b) -> raise (Api_errors.Server_error (a, b)) let update_allowed_operations_internal ~__context ~self ~sr_records ~pbd_records @@ -501,7 +483,7 @@ let update_allowed_operations_internal ~__context ~self ~sr_records ~pbd_records check_operation_error ~__context ~sr_records ~pbd_records ?vbd_records ha_enabled all self x with - | None -> + | Ok () -> [x] | _ -> [] diff --git a/ocaml/xapi/xapi_vdi.mli b/ocaml/xapi/xapi_vdi.mli index 0731a5f608..45569a12fd 100644 --- a/ocaml/xapi/xapi_vdi.mli +++ b/ocaml/xapi/xapi_vdi.mli @@ -28,7 +28,7 @@ val check_operation_error : -> Db_actions.vDI_t -> API.ref_VDI -> API.vdi_operations - -> (string * string list) option + -> (unit, string * string list) Result.t (** Checks to see if an operation is valid in this state. Returns Some exception if not and None if everything is ok. *) From e40b3fc95601abd99d16e2c405c1878467f446f7 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 31 Oct 2024 14:12:58 +0000 Subject: [PATCH 21/30] datamodel: Add all VDI operations to the SR operations variant There's no seeming reason these were missing, and they need to be added to be able to map the VDI operations to SR ones for better error messages Signed-off-by: Pau Ruiz Safont --- ocaml/idl/datamodel.ml | 7 +++++++ ocaml/idl/datamodel_common.ml | 2 +- ocaml/idl/schematest.ml | 2 +- ocaml/tests/record_util/old_record_util.ml | 15 +++++++++++++++ ocaml/xapi-cli-server/record_util.ml | 14 ++++++++++++++ 5 files changed, 38 insertions(+), 2 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index 5fb25cd26a..e21369be25 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -4181,6 +4181,13 @@ module SR = struct , "Exporting a bitmap that shows the changed blocks between two VDIs" ) ; ("vdi_set_on_boot", "Setting the on_boot field of the VDI") + ; ("vdi_blocked", "Blocking other operations for a VDI") + ; ("vdi_copy", "Copying the VDI") + ; ("vdi_force_unlock", "Forcefully unlocking the VDI") + ; ("vdi_forget", "Forgetting about the VDI") + ; ("vdi_generate_config", "Generating the configuration of the VDI") + ; ("vdi_resize_online", "Resizing the VDI online") + ; ("vdi_update", "Refreshing the fields on the VDI") ; ("pbd_create", "Creating a PBD for this SR") ; ("pbd_destroy", "Destroying one of this SR's PBDs") ] diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index 3fb163cc96..a5fb8bd381 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 783 +let schema_minor_vsn = 784 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 016a90960f..595289dfd2 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "8fcd8892ec0c7d130b0da44c5fd3990b" +let last_known_schema_hash = "b427bac09aca4eabc9407738a9155326" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/tests/record_util/old_record_util.ml b/ocaml/tests/record_util/old_record_util.ml index c854f27f5a..855a2b74b7 100644 --- a/ocaml/tests/record_util/old_record_util.ml +++ b/ocaml/tests/record_util/old_record_util.ml @@ -341,6 +341,21 @@ let sr_operation_to_string : API.storage_operations -> string = function "PBD.create" | `pbd_destroy -> "PBD.destroy" + (* The following ones were added after the file got introduced *) + | `vdi_blocked -> + "VDI.blocked" + | `vdi_copy -> + "VDI.copy" + | `vdi_force_unlock -> + "VDI.force_unlock" + | `vdi_forget -> + "VDI.forget" + | `vdi_generate_config -> + "VDI.generate_config" + | `vdi_resize_online -> + "VDI.resize_online" + | `vdi_update -> + "VDI.update" let vbd_operation_to_string = function | `attach -> diff --git a/ocaml/xapi-cli-server/record_util.ml b/ocaml/xapi-cli-server/record_util.ml index a7a4dd2ec7..d28b6b5f76 100644 --- a/ocaml/xapi-cli-server/record_util.ml +++ b/ocaml/xapi-cli-server/record_util.ml @@ -160,6 +160,20 @@ let sr_operation_to_string : API.storage_operations -> string = function "VDI.data_destroy" | `vdi_list_changed_blocks -> "VDI.list_changed_blocks" + | `vdi_blocked -> + "VDI.blocked" + | `vdi_copy -> + "VDI.copy" + | `vdi_force_unlock -> + "VDI.force_unlock" + | `vdi_forget -> + "VDI.forget" + | `vdi_generate_config -> + "VDI.generate_config" + | `vdi_resize_online -> + "VDI.resize_online" + | `vdi_update -> + "VDI.update" | `pbd_create -> "PBD.create" | `pbd_destroy -> From 8b7cfd6f90e80588cb09f9db073695e1fe90f7c6 Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Wed, 6 Nov 2024 16:41:11 +0800 Subject: [PATCH 22/30] CA-401498: Fix test_systemd occasional timeout CI check "Run OCaml tests" on github failed occasionally. The cause is test_systemd timeout. Add sleep 1 between server start and client to fix it. Signed-off-by: Changlei Li --- ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t index bfa73c84c6..e3b19dbaff 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t @@ -10,6 +10,7 @@ $ ./test_systemd.exe --server & @systemd.socket READY=1 + $ sleep 1 $ ./test_systemd.exe --notify $ wait From 6a666827dc6caae0cffb645f8a8511be8e04ffee Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 5 Nov 2024 17:06:10 +0000 Subject: [PATCH 23/30] CA-399629: make daily-license-check aware of never The license field is not always a valid date, sometimes it contains a special value to signify there's no expiry date. Signed-off-by: Pau Ruiz Safont --- ocaml/license/daily_license_check.ml | 37 +++++++++++-------- .../tests/alerts/test_daily_license_check.ml | 1 + 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/ocaml/license/daily_license_check.ml b/ocaml/license/daily_license_check.ml index 3b6edecbb3..9a376d0e59 100644 --- a/ocaml/license/daily_license_check.ml +++ b/ocaml/license/daily_license_check.ml @@ -9,28 +9,35 @@ let seconds_per_30_days = 30. *. seconds_per_day let days_to_expiry now expiry = (expiry /. seconds_per_day) -. (now /. seconds_per_day) +let get_expiry_date license = + List.assoc_opt "expiry" license + |> Fun.flip Option.bind (fun e -> if e = "never" then None else Some e) + |> Option.map Xapi_stdext_date.Date.of_iso8601 + |> Option.map Xapi_stdext_date.Date.to_unix_time + let get_hosts all_license_params threshold = List.fold_left (fun acc (name_label, license_params) -> - let expiry = List.assoc "expiry" license_params in - let expiry = Xapi_stdext_date.Date.(to_unix_time (of_iso8601 expiry)) in - if expiry < threshold then - name_label :: acc - else - acc + match get_expiry_date license_params with + | Some expiry when expiry < threshold -> + name_label :: acc + | _ -> + acc ) [] all_license_params let check_license now pool_license_state all_license_params = - let expiry = List.assoc "expiry" pool_license_state in - let expiry = Xapi_stdext_date.Date.(to_unix_time (of_iso8601 expiry)) in - let days = days_to_expiry now expiry in - if days <= 0. then - Expired (get_hosts all_license_params now) - else if days <= 30. then - Expiring (get_hosts all_license_params (now +. seconds_per_30_days)) - else - Good + match get_expiry_date pool_license_state with + | Some expiry -> + let days = days_to_expiry now expiry in + if days <= 0. then + Expired (get_hosts all_license_params now) + else if days <= 30. then + Expiring (get_hosts all_license_params (now +. seconds_per_30_days)) + else + Good + | None -> + Good let get_info_from_db rpc session_id = let pool = List.hd (XenAPI.Pool.get_all ~rpc ~session_id) in diff --git a/ocaml/tests/alerts/test_daily_license_check.ml b/ocaml/tests/alerts/test_daily_license_check.ml index 067d93288c..025ad19ef8 100644 --- a/ocaml/tests/alerts/test_daily_license_check.ml +++ b/ocaml/tests/alerts/test_daily_license_check.ml @@ -47,6 +47,7 @@ let expiry_samples = [ (([("expiry", "20170101T00:00:00Z")], []), Good) ; (([("expiry", "20160701T04:01:00Z")], []), Good) + ; (([("expiry", "never")], []), Good) ; (([("expiry", "20160701T04:00:00Z")], []), Expiring []) ; (([("expiry", "20160616T00:00:00Z")], []), Expiring []) ; (([("expiry", "20160601T04:00:01Z")], []), Expiring []) From b4f90280404abbc28e9920b1b40ab0c880aee8b3 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 30 Sep 2024 17:48:40 +0100 Subject: [PATCH 24/30] license_check: clean up interface Internalize the concept of expiration dates, including "never", and use Date to manage all the dates, instead of using unix time Signed-off-by: Pau Ruiz Safont --- ocaml/tests/test_pool_license.ml | 11 +---------- ocaml/xapi/license_check.ml | 27 +++++++++++++++++---------- ocaml/xapi/license_check.mli | 5 +++-- ocaml/xapi/xapi_pool.ml | 11 +---------- 4 files changed, 22 insertions(+), 32 deletions(-) diff --git a/ocaml/tests/test_pool_license.ml b/ocaml/tests/test_pool_license.ml index aad9a145c1..4e0f528e19 100644 --- a/ocaml/tests/test_pool_license.ml +++ b/ocaml/tests/test_pool_license.ml @@ -198,16 +198,7 @@ module PoolLicenseState = Generic.MakeStateful (struct Xapi_pool_license.get_lowest_edition_with_expiry ~__context ~hosts ~edition_to_int in - let pool_expiry = - match expiry with - | None -> - "never" - | Some date -> - if date = Date.of_unix_time License_check.never then - "never" - else - Date.to_rfc3339 date - in + let pool_expiry = License_check.serialize_expiry expiry in (pool_edition, pool_expiry) (* Tuples of (host_license_state list, expected pool license state) *) diff --git a/ocaml/xapi/license_check.ml b/ocaml/xapi/license_check.ml index e6df516f35..1d2a4f65ed 100644 --- a/ocaml/xapi/license_check.ml +++ b/ocaml/xapi/license_check.ml @@ -13,27 +13,34 @@ *) module L = Debug.Make (struct let name = "license" end) -let never, _ = - let start_of_epoch = Unix.gmtime 0. in - Unix.mktime {start_of_epoch with Unix.tm_year= 130} +module Date = Xapi_stdext_date.Date + +let never = Ptime.of_year 2030 |> Option.get |> Date.of_ptime + +let serialize_expiry = function + | None -> + "never" + | Some date when Date.equal date never -> + "never" + | Some date -> + Date.to_rfc3339 date let get_expiry_date ~__context ~host = let license = Db.Host.get_license_params ~__context ~self:host in - if List.mem_assoc "expiry" license then - Some (Xapi_stdext_date.Date.of_iso8601 (List.assoc "expiry" license)) - else - None + List.assoc_opt "expiry" license + |> Fun.flip Option.bind (fun e -> if e = "never" then None else Some e) + |> Option.map Xapi_stdext_date.Date.of_iso8601 let check_expiry ~__context ~host = let expired = match get_expiry_date ~__context ~host with | None -> false (* No expiry date means no expiry :) *) - | Some date -> - Unix.time () > Xapi_stdext_date.Date.to_unix_time date + | Some expiry -> + Xapi_stdext_date.Date.(is_later ~than:expiry (now ())) in if expired then - raise (Api_errors.Server_error (Api_errors.license_expired, [])) + raise Api_errors.(Server_error (license_expired, [])) let vm ~__context _vm = (* Here we check that the license is still valid - this should be the only place where this happens *) diff --git a/ocaml/xapi/license_check.mli b/ocaml/xapi/license_check.mli index 610faaf9e0..10a5ca6aca 100644 --- a/ocaml/xapi/license_check.mli +++ b/ocaml/xapi/license_check.mli @@ -16,8 +16,9 @@ * @group Licensing *) -val never : float -(** The expiry date that is considered to be "never". *) +val serialize_expiry : Xapi_stdext_date.Date.t option -> string +(** Get the string corresponding with the expiry that can be stored in xapi's + DB *) val get_expiry_date : __context:Context.t -> host:API.ref_host -> Xapi_stdext_date.Date.t option diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 044507bc9c..acb22cdcfc 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3179,16 +3179,7 @@ let get_license_state ~__context ~self:_ = Xapi_pool_license.get_lowest_edition_with_expiry ~__context ~hosts ~edition_to_int in - let pool_expiry = - match expiry with - | None -> - "never" - | Some date -> - if date = Date.of_unix_time License_check.never then - "never" - else - Date.to_rfc3339 date - in + let pool_expiry = License_check.serialize_expiry expiry in [("edition", pool_edition); ("expiry", pool_expiry)] let apply_edition ~__context ~self:_ ~edition = From 365af695d8e3882cba2bc338472bc943ac14597c Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 30 Sep 2024 17:50:17 +0100 Subject: [PATCH 25/30] license_check: update the concept of "never" This now matches the concept of xenserver's licensing daemon, which changed it in the last year. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/license_check.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/license_check.ml b/ocaml/xapi/license_check.ml index 1d2a4f65ed..f5cb38225d 100644 --- a/ocaml/xapi/license_check.ml +++ b/ocaml/xapi/license_check.ml @@ -15,7 +15,7 @@ module L = Debug.Make (struct let name = "license" end) module Date = Xapi_stdext_date.Date -let never = Ptime.of_year 2030 |> Option.get |> Date.of_ptime +let never = Ptime.of_year 2100 |> Option.get |> Date.of_ptime let serialize_expiry = function | None -> From 7db5e8b9d1bb9d6db22a5c18cc5d6a1b8f68ec4d Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 5 Nov 2024 16:59:06 +0000 Subject: [PATCH 26/30] daily_license_check: Do not use floats for handling time Instead use the Date, Ptime and Ptime.Span Signed-off-by: Pau Ruiz Safont --- ocaml/license/daily_license_check.ml | 46 +++++++++++-------- ocaml/license/daily_license_check_main.ml | 2 +- ocaml/license/dune | 1 + .../tests/alerts/test_daily_license_check.ml | 7 ++- 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/ocaml/license/daily_license_check.ml b/ocaml/license/daily_license_check.ml index 9a376d0e59..9a84a415de 100644 --- a/ocaml/license/daily_license_check.ml +++ b/ocaml/license/daily_license_check.ml @@ -1,39 +1,45 @@ module XenAPI = Client.Client +module Date = Xapi_stdext_date.Date type result = Good | Expiring of string list | Expired of string list -let seconds_per_day = 3600. *. 24. +let a_month_after date = + let days_30 = Ptime.Span.unsafe_of_d_ps (30, 0L) in + Date.to_ptime date + |> (fun d -> Ptime.add_span d days_30) + |> Option.fold ~none:date ~some:Date.of_ptime -let seconds_per_30_days = 30. *. seconds_per_day +let days_to_expiry ~expiry now = + Ptime.diff (Date.to_ptime expiry) (Date.to_ptime now) |> Ptime.Span.to_d_ps + |> fun (days, picosec) -> + let with_fraction = if days < 0 then Fun.id else fun d -> d + 1 in + if picosec = 0L then days else with_fraction days -let days_to_expiry now expiry = - (expiry /. seconds_per_day) -. (now /. seconds_per_day) - -let get_expiry_date license = - List.assoc_opt "expiry" license +let get_expiry_date pool_license = + List.assoc_opt "expiry" pool_license |> Fun.flip Option.bind (fun e -> if e = "never" then None else Some e) |> Option.map Xapi_stdext_date.Date.of_iso8601 - |> Option.map Xapi_stdext_date.Date.to_unix_time let get_hosts all_license_params threshold = - List.fold_left - (fun acc (name_label, license_params) -> - match get_expiry_date license_params with - | Some expiry when expiry < threshold -> - name_label :: acc - | _ -> - acc + List.filter_map + (fun (name_label, license_params) -> + let ( let* ) = Option.bind in + let* expiry = get_expiry_date license_params in + if Date.is_earlier expiry ~than:threshold then + Some name_label + else + None ) - [] all_license_params + all_license_params let check_license now pool_license_state all_license_params = match get_expiry_date pool_license_state with | Some expiry -> - let days = days_to_expiry now expiry in - if days <= 0. then + let days = days_to_expiry ~expiry now in + if days <= 0 then Expired (get_hosts all_license_params now) - else if days <= 30. then - Expiring (get_hosts all_license_params (now +. seconds_per_30_days)) + else if days <= 30 then + Expiring (get_hosts all_license_params (a_month_after now)) else Good | None -> diff --git a/ocaml/license/daily_license_check_main.ml b/ocaml/license/daily_license_check_main.ml index 8a2202e2a5..58ba7258e1 100644 --- a/ocaml/license/daily_license_check_main.ml +++ b/ocaml/license/daily_license_check_main.ml @@ -14,7 +14,7 @@ let _ = in Xapi_stdext_pervasives.Pervasiveext.finally (fun () -> - let now = Unix.time () in + let now = Xapi_stdext_date.Date.now () in let pool, pool_license_state, all_license_params = Daily_license_check.get_info_from_db rpc session_id in diff --git a/ocaml/license/dune b/ocaml/license/dune index f37d069598..942f41733f 100644 --- a/ocaml/license/dune +++ b/ocaml/license/dune @@ -4,6 +4,7 @@ (modules daily_license_check) (libraries http_lib + ptime xapi-consts xapi-client xapi-types diff --git a/ocaml/tests/alerts/test_daily_license_check.ml b/ocaml/tests/alerts/test_daily_license_check.ml index 025ad19ef8..47a6fb763a 100644 --- a/ocaml/tests/alerts/test_daily_license_check.ml +++ b/ocaml/tests/alerts/test_daily_license_check.ml @@ -36,8 +36,7 @@ let expiry = in Alcotest.testable pp_expiry equals -let check_time = - Xapi_stdext_date.Date.(to_unix_time (of_iso8601 "20160601T04:00:00Z")) +let check_time = Xapi_stdext_date.Date.(of_iso8601 "20160601T04:00:00Z") let test_expiry ((pool_license_state, all_license_params), expected) () = let result = check_license check_time pool_license_state all_license_params in @@ -59,7 +58,7 @@ let expiry_samples = ; ("host1", [("expiry", "20160615T00:00:00Z")]) ] ) - , Expiring ["host1"; "host0"] + , Expiring ["host0"; "host1"] ) ; ( ( [("expiry", "20160615T00:00:00Z")] , [ @@ -75,7 +74,7 @@ let expiry_samples = ; ("host1", [("expiry", "20150601T00:00:00Z")]) ] ) - , Expired ["host1"; "host0"] + , Expired ["host0"; "host1"] ) ; ( ( [("expiry", "20160101T00:00:00Z")] , [ From 77e3a3e820c19ee5722d0c09f9972f9f762c296f Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 21 Oct 2024 19:08:14 +0100 Subject: [PATCH 27/30] CA-400060: Introduce new field for sm class `host_pending_features` represents the features that are available on some of the hosts in the pool, but not all of them. Note the way this field is initialised in the `SM.create` code means that it will only contain new features that appear during upgrade. This means a feature that is added into `SM.features` during creation will stay there even if it is not available on all hosts. But we should not end up in this situation in the first place. Also change the meaning of `Sm.features` to be pool-wide. Signed-off-by: Vincent Liu --- ocaml/idl/datamodel.ml | 10 ++++++++++ ocaml/idl/datamodel_common.ml | 2 +- ocaml/idl/datamodel_lifecycle.ml | 2 ++ ocaml/idl/schematest.ml | 2 +- ocaml/sdk-gen/csharp/gen_csharp_binding.ml | 4 ++++ ocaml/tests/common/test_common.ml | 9 +++++---- 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index e21369be25..83d5d1740c 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -5001,11 +5001,21 @@ module SM = struct , "capabilities of the SM plugin, with capability version \ numbers" ) + ; ( Changed + , "24.37.0" + , "features are now pool-wide, instead of what is available on \ + the coordinator sm" + ) ] ~ty:(Map (String, Int)) "features" "capabilities of the SM plugin, with capability version numbers" ~default_value:(Some (VMap [])) + ; field ~in_oss_since:None ~qualifier:DynamicRO ~lifecycle:[] + ~ty:(Map (Ref _host, Set String)) + ~internal_only:true "host_pending_features" + "SM features that are waiting to be declared per host." + ~default_value:(Some (VMap [])) ; field ~lifecycle:[(Published, rel_miami, "additional configuration")] ~default_value:(Some (VMap [])) diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index a5fb8bd381..80c5076fef 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 784 +let schema_minor_vsn = 785 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 60e46afb03..fb728685a5 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -51,6 +51,8 @@ let prototyped_of_field = function Some "22.26.0" | "VTPM", "persistence_backend" -> Some "22.26.0" + | "SM", "host_pending_features" -> + Some "24.36.0-next" | "host", "last_update_hash" -> Some "24.10.0" | "host", "pending_guidances_full" -> diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 595289dfd2..2c4a87453b 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "b427bac09aca4eabc9407738a9155326" +let last_known_schema_hash = "18df8c33434e3df1982e11ec55d1f3f8" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/sdk-gen/csharp/gen_csharp_binding.ml b/ocaml/sdk-gen/csharp/gen_csharp_binding.ml index bbf3360c89..c9112b680e 100644 --- a/ocaml/sdk-gen/csharp/gen_csharp_binding.ml +++ b/ocaml/sdk-gen/csharp/gen_csharp_binding.ml @@ -1184,6 +1184,10 @@ and json_serialization_attr fr = (exposed_class_name v) | Map (String, String) -> sprintf "\n [JsonConverter(typeof(StringStringMapConverter))]" + | Map (Ref u, Set String) -> + sprintf + "\n [JsonConverer(typeof(XenRefStringSetMapConverter<%s>))]" + (exposed_class_name u) | Map (Ref _, _) | Map (_, Ref _) -> failwith (sprintf "Need converter for %s" fr.field_name) | _ -> diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 7908eb4e3f..297a68398c 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -342,12 +342,13 @@ let default_sm_features = let make_sm ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ?(_type = "sm") ?(name_label = "") ?(name_description = "") ?(vendor = "") ?(copyright = "") ?(version = "") ?(required_api_version = "") - ?(capabilities = []) ?(features = default_sm_features) ?(configuration = []) - ?(other_config = []) ?(driver_filename = "/dev/null") - ?(required_cluster_stack = []) () = + ?(capabilities = []) ?(features = default_sm_features) + ?(host_pending_features = []) ?(configuration = []) ?(other_config = []) + ?(driver_filename = "/dev/null") ?(required_cluster_stack = []) () = Db.SM.create ~__context ~ref ~uuid ~_type ~name_label ~name_description ~vendor ~copyright ~version ~required_api_version ~capabilities ~features - ~configuration ~other_config ~driver_filename ~required_cluster_stack ; + ~host_pending_features ~configuration ~other_config ~driver_filename + ~required_cluster_stack ; ref let make_sr ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) From 7f14bfcc2ee6ed583330263ae47dc87a7ba665ea Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 21 Oct 2024 19:10:44 +0100 Subject: [PATCH 28/30] CA-400060: Sm feature intersection NEW Sm features that are found during an upgrde will now only be available when they are available on all of the hosts. Add logic to keep track of features that are only availabe on some of the hosts in the pool, and declare them in `Sm.feature` only when all of the hosts have declared this. Also move `Storage_access.on_xapi_start` to `dbsync_slave` as this needs to be run on all hosts for each sm to get a chance to say what features they have. Signed-off-by: Vincent Liu --- ocaml/tests/test_sm_features.ml | 42 +++++++++++++++++++ ocaml/xapi/dbsync_master.ml | 1 - ocaml/xapi/dbsync_slave.ml | 3 ++ ocaml/xapi/smint.ml | 17 ++++++++ ocaml/xapi/xapi_globs.ml | 2 + ocaml/xapi/xapi_sm.ml | 71 +++++++++++++++++++++++++++++++-- 6 files changed, 131 insertions(+), 5 deletions(-) diff --git a/ocaml/tests/test_sm_features.ml b/ocaml/tests/test_sm_features.ml index a78de4a54a..091d58d4f6 100644 --- a/ocaml/tests/test_sm_features.ml +++ b/ocaml/tests/test_sm_features.ml @@ -160,6 +160,21 @@ let test_sequences = } ] +let test_intersection_sequences = + ( { + raw= ["VDI_MIRROR"] + ; smapiv1_features= [(Vdi_mirror, 1L)] + ; smapiv2_features= ["VDI_MIRROR/1"] + ; sm= {capabilities= ["VDI_MIRROR"]; features= [("VDI_MIRROR", 1L)]} + } + , { + raw= ["VDI_MIRROR"] + ; smapiv1_features= [(Vdi_mirror, 2L)] + ; smapiv2_features= ["VDI_MIRROR/2"] + ; sm= {capabilities= ["VDI_MIRROR"]; features= [("VDI_MIRROR", 1L)]} + } + ) + module ParseSMAPIv1Features = Generic.MakeStateless (struct module Io = struct type input_t = string list @@ -249,6 +264,32 @@ module CreateSMObject = Generic.MakeStateful (struct ) end) +module CompatSMFeatures = Generic.MakeStateless (struct + module Io = struct + type input_t = (string * string) list + + type output_t = string list + + let string_of_input_t = Test_printers.(list (fun (x, y) -> x ^ "," ^ y)) + + let string_of_output_t = Test_printers.(list Fun.id) + end + + let transform l = + List.split l |> fun (x, y) -> + (Smint.parse_string_int64_features x, Smint.parse_string_int64_features y) + |> fun (x, y) -> Smint.compat_features x y |> List.map Smint.unparse_feature + + let tests = + let r1, r2 = test_intersection_sequences in + `QuickAndAutoDocumented + [ + ( List.combine r1.smapiv2_features r2.smapiv2_features + , r1.smapiv2_features + ) + ] +end) + let tests = List.map (fun (s, t) -> (Format.sprintf "sm_features_%s" s, t)) @@ -256,4 +297,5 @@ let tests = ("parse_smapiv1_features", ParseSMAPIv1Features.tests) ; ("create_smapiv2_features", CreateSMAPIv2Features.tests) ; ("create_sm_object", CreateSMObject.tests) + ; ("compat_sm_features", CompatSMFeatures.tests) ] diff --git a/ocaml/xapi/dbsync_master.ml b/ocaml/xapi/dbsync_master.ml index aad7434dc0..cac05f37e8 100644 --- a/ocaml/xapi/dbsync_master.ml +++ b/ocaml/xapi/dbsync_master.ml @@ -373,7 +373,6 @@ let update_env __context = in the db for cancelling *) Cancel_tasks.cancel_tasks_on_host ~__context ~host_opt:None ; (* Update the SM plugin table *) - Storage_access.on_xapi_start ~__context ; if !Xapi_globs.create_tools_sr then create_tools_sr_noexn __context ; ensure_vm_metrics_records_exist_noexn __context ; diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 3b90a3a05c..942d308107 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -362,6 +362,9 @@ let update_env __context sync_keys = switched_sync Xapi_globs.sync_refresh_localhost_info (fun () -> refresh_localhost_info ~__context info ) ; + switched_sync Xapi_globs.sync_sm_records (fun () -> + Storage_access.on_xapi_start ~__context + ) ; switched_sync Xapi_globs.sync_local_vdi_activations (fun () -> Storage_access.refresh_local_vdi_activations ~__context ) ; diff --git a/ocaml/xapi/smint.ml b/ocaml/xapi/smint.ml index 25019a1829..8797e0d7cf 100644 --- a/ocaml/xapi/smint.ml +++ b/ocaml/xapi/smint.ml @@ -110,6 +110,8 @@ let capability_of_feature : feature -> capability = fst let known_features = List.map fst string_to_capability_table +let unparse_feature (f, v) = f ^ "/" ^ Int64.to_string v + let parse_string_int64_features features = let scan feature = match String.split_on_char '/' feature with @@ -134,6 +136,21 @@ let parse_string_int64_features features = |> List.filter_map scan |> List.sort_uniq (fun (x, _) (y, _) -> compare x y) +(** [compat_features features1 features2] finds the compatible features in the input +features lists. We assume features backwards compatible, i.e. if there are FOO/1 and + FOO/2 are present, then we assume they can both do FOO/1*) +let compat_features features1 features2 = + let features2 = List.to_seq features2 |> Hashtbl.of_seq in + List.filter_map + (fun (f1, v1) -> + match Hashtbl.find_opt features2 f1 with + | Some v2 -> + Some (f1, Int64.min v1 v2) + | None -> + None + ) + features1 + let parse_capability_int64_features strings = List.map (function c, v -> (List.assoc c string_to_capability_table, v)) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 9a461a4e7b..efdcabfbdb 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -340,6 +340,8 @@ let sync_switch_off = "nosync" (* dbsync_slave *) let sync_local_vdi_activations = "sync_local_vdi_activations" +let sync_sm_records = "sync_sm_records" + let sync_create_localhost = "sync_create_localhost" let sync_set_cache_sr = "sync_set_cache_sr" diff --git a/ocaml/xapi/xapi_sm.ml b/ocaml/xapi/xapi_sm.ml index ba3d7c8242..9badc179c0 100644 --- a/ocaml/xapi/xapi_sm.ml +++ b/ocaml/xapi/xapi_sm.ml @@ -18,6 +18,8 @@ (* The SMAPIv1 plugins are a static set in the filesystem. The SMAPIv2 plugins are a dynamic set hosted in driver domains. *) +module Listext = Xapi_stdext_std.Listext + let finally = Xapi_stdext_pervasives.Pervasiveext.finally (* We treat versions as '.'-separated integer lists under the usual @@ -36,7 +38,7 @@ let create_from_query_result ~__context q = if String.lowercase_ascii q.driver <> "storage_access" then ( let features = Smint.parse_string_int64_features q.features in let capabilities = List.map fst features in - info "Registering SM plugin %s (version %s)" + info "%s Registering SM plugin %s (version %s)" __FUNCTION__ (String.lowercase_ascii q.driver) q.version ; Db.SM.create ~__context ~ref:r ~uuid:u @@ -44,19 +46,80 @@ let create_from_query_result ~__context q = ~name_label:q.name ~name_description:q.description ~vendor:q.vendor ~copyright:q.copyright ~version:q.version ~required_api_version:q.required_api_version ~capabilities ~features - ~configuration:q.configuration ~other_config:[] + ~host_pending_features:[] ~configuration:q.configuration ~other_config:[] ~driver_filename:(Sm_exec.cmd_name q.driver) ~required_cluster_stack:q.required_cluster_stack ) +let find_pending_features existing_features features = + Listext.List.set_difference features existing_features + +(** [addto_pending_hosts_features ~__context self new_features] will add [new_features] +to pending features of host [self]. It then returns a list of currently pending features *) +let addto_pending_hosts_features ~__context self new_features = + let host = Helpers.get_localhost ~__context in + let new_features = + List.map (fun (f, v) -> Smint.unparse_feature (f, v)) new_features + in + let curr_pending_features = + Db.SM.get_host_pending_features ~__context ~self + |> List.remove_assoc host + |> List.cons (host, new_features) + in + Db.SM.set_host_pending_features ~__context ~self ~value:curr_pending_features ; + List.iter + (fun (h, f) -> + debug "%s: current pending features for host %s, sm %s, features %s" + __FUNCTION__ (Ref.string_of h) (Ref.string_of self) (String.concat "," f) + ) + curr_pending_features ; + List.map + (fun (h, f) -> (h, Smint.parse_string_int64_features f)) + curr_pending_features + +let valid_hosts_pending_features ~__context pending_features = + if List.length pending_features <> List.length (Db.Host.get_all ~__context) + then ( + debug "%s: Not enough hosts have registered their sm features" __FUNCTION__ ; + [] + ) else + List.map snd pending_features |> fun l -> + List.fold_left Smint.compat_features + (* The list in theory cannot be empty due to the if condition check, but do + this just in case *) + (List.nth_opt l 0 |> Option.fold ~none:[] ~some:Fun.id) + (List.tl l) + +let remove_valid_features_from_pending ~__context ~self valid_features = + let valid_features = List.map Smint.unparse_feature valid_features in + let new_pending_feature = + Db.SM.get_host_pending_features ~__context ~self + |> List.map (fun (h, pending_features) -> + (h, Listext.List.set_difference pending_features valid_features) + ) + in + Db.SM.set_host_pending_features ~__context ~self ~value:new_pending_feature + let update_from_query_result ~__context (self, r) q_result = let open Storage_interface in let _type = String.lowercase_ascii q_result.driver in if _type <> "storage_access" then ( let driver_filename = Sm_exec.cmd_name q_result.driver in - let features = Smint.parse_string_int64_features q_result.features in + let existing_features = Db.SM.get_features ~__context ~self in + let new_features = + Smint.parse_string_int64_features q_result.features + |> find_pending_features existing_features + |> addto_pending_hosts_features ~__context self + |> valid_hosts_pending_features ~__context + in + remove_valid_features_from_pending ~__context ~self new_features ; + let features = existing_features @ new_features in + List.iter + (fun (f, v) -> debug "%s: declaring new features %s:%Ld" __FUNCTION__ f v) + new_features ; + let capabilities = List.map fst features in - info "Registering SM plugin %s (version %s)" + info "%s Registering SM plugin %s (version %s)" __FUNCTION__ (String.lowercase_ascii q_result.driver) q_result.version ; if r.API.sM_type <> _type then From 2ec0ac68689ad24c38cf789bd094f4db058bda19 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 21 Oct 2024 19:14:43 +0100 Subject: [PATCH 29/30] CA-400060: Reject pool join if sm features mismatch Implement a new `assert_sm_features_compatiable` in pre_join_checks so that if the joining host does not have compatible sm features, it will be denied entry into the pool. Signed-off-by: Vincent Liu --- ocaml/idl/datamodel_errors.ml | 7 +++++ ocaml/xapi-consts/api_errors.ml | 3 ++ ocaml/xapi/xapi_pool.ml | 49 ++++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index aead3e0abc..80b36218f2 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -890,6 +890,13 @@ let _ = "The host joining the pool has different CA certificates from the pool \ coordinator while using the same name, uninstall them and try again." () ; + error Api_errors.pool_joining_sm_features_incompatible + ["pool_sm_ref"; "candidate_sm_ref"] + ~doc: + "The host joining the pool has an incompatible set of sm features from \ + the pool coordinator. Make sure the sm are of the same versions and try \ + again." + () ; (* External directory service *) error Api_errors.subject_cannot_be_resolved [] diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index ebafbdaa11..53e9e06176 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -754,6 +754,9 @@ let pool_joining_host_tls_verification_mismatch = let pool_joining_host_ca_certificates_conflict = add_error "POOL_JOINING_HOST_CA_CERTIFICATES_CONFLICT" +let pool_joining_sm_features_incompatible = + add_error "POOL_JOINING_SM_FEATURES_INCOMPATIBLE" + (*workload balancing*) let wlb_not_initialized = add_error "WLB_NOT_INITIALIZED" diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index acb22cdcfc..eb716ce766 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -839,6 +839,52 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = ) ) in + let assert_sm_features_compatible () = + (* We consider the case where the existing pool has FOO/m, and the candidate having FOO/n, + where n >= m, to be compatible. Not vice versa. *) + let features_compatible coor_features candidate_features = + (* The pool features must not be reduced or downgraded, although it is fine + the other way around. *) + Smint.compat_features coor_features candidate_features = coor_features + in + + let master_sms = Client.SM.get_all ~rpc ~session_id in + List.iter + (fun sm -> + let master_sm_type = Client.SM.get_type ~rpc ~session_id ~self:sm in + let candidate_sm_ref, candidate_sm_rec = + match + Db.SM.get_records_where ~__context + ~expr:(Eq (Field "type", Literal master_sm_type)) + with + | [(sm_ref, sm_rec)] -> + (sm_ref, sm_rec) + | _ -> + raise + Api_errors.( + Server_error + ( pool_joining_sm_features_incompatible + , [Ref.string_of sm; ""] + ) + ) + in + + let coor_sm_features = + Client.SM.get_features ~rpc ~session_id ~self:sm + in + let candidate_sm_features = candidate_sm_rec.API.sM_features in + if not (features_compatible coor_sm_features candidate_sm_features) then + raise + Api_errors.( + Server_error + ( pool_joining_sm_features_incompatible + , [Ref.string_of sm; Ref.string_of candidate_sm_ref] + ) + ) + ) + master_sms + in + (* call pre-join asserts *) assert_pool_size_unrestricted () ; assert_management_interface_exists () ; @@ -872,7 +918,8 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = assert_tls_verification_matches () ; assert_ca_certificates_compatible () ; assert_not_in_updating_on_me () ; - assert_no_hosts_in_updating () + assert_no_hosts_in_updating () ; + assert_sm_features_compatible () let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : API.ref_host = From 3e2e970af2f1c501b5a8ebb76252df2e2babf3f9 Mon Sep 17 00:00:00 2001 From: Colin James Date: Thu, 7 Nov 2024 13:58:39 +0000 Subject: [PATCH 30/30] Document Rbac module Introduces an interface file for the Rbac module within xapi in order to document the intent of each of its functions. Signed-off-by: Colin James --- ocaml/xapi/rbac.ml | 5 --- ocaml/xapi/rbac.mli | 104 ++++++++++++++++++++++++++++++++++++++++++++ quality-gate.sh | 2 +- 3 files changed, 105 insertions(+), 6 deletions(-) create mode 100644 ocaml/xapi/rbac.mli diff --git a/ocaml/xapi/rbac.ml b/ocaml/xapi/rbac.ml index feefcf4143..2b311a7e56 100644 --- a/ocaml/xapi/rbac.ml +++ b/ocaml/xapi/rbac.ml @@ -243,11 +243,6 @@ let assert_permission_name ~__context ~permission = let assert_permission ~__context ~permission = assert_permission_name ~__context ~permission:permission.role_name_label -(* this is necessary to break dependency cycle between rbac and taskhelper *) -let init_task_helper_rbac_has_permission_fn = - if !TaskHelper.rbac_assert_permission_fn = None then - TaskHelper.rbac_assert_permission_fn := Some assert_permission - let has_permission_name ~__context ~permission = let session_id = get_session_of_context ~__context ~permission in is_access_allowed ~__context ~session_id ~permission diff --git a/ocaml/xapi/rbac.mli b/ocaml/xapi/rbac.mli new file mode 100644 index 0000000000..6905379a31 --- /dev/null +++ b/ocaml/xapi/rbac.mli @@ -0,0 +1,104 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val is_access_allowed : + __context:Context.t + -> session_id:[`session] Ref.t + -> permission:string + -> bool +(** Determines whether the session associated with the provided + context has the specified permission. The permission set is cached + (on the coordinator only) to benefit successive queries for the + same session. *) + +val check : + ?extra_dmsg:string + -> ?extra_msg:string + -> ?args:(string * Rpc.t) list + -> ?keys:string list + -> __context:Context.t + -> fn:(unit -> 'a) + -> [`session] Ref.t + -> string + -> 'a +(** [check] executes a function associated with an action if the + session associated with the provided context is authorised to + perform the action. + + The [?extra_dmsg] and [?extra_msg] parameters allow for extra + information in debugging and error messages. + + The [?keys] parameter specifies which fields of a (string -> _) + map are RBAC-protected. It is primarily associated with + auto-generated methods such as add_to_other_config. However, if + [?keys] is non-empty, then [?args] must also be consulted as the + related methods that require this protection specify their key + name as a parameter. Otherwise, [?args] is mostly used to log + calls within the RBAC audit log. *) + +val check_with_new_task : + ?extra_dmsg:string + -> ?extra_msg:string + -> ?task_desc:string + -> ?args:(string * Rpc.t) list + -> fn:(unit -> 'a) + -> [`session] Ref.t + -> string + -> 'a +(** Defined in terms of [check] but using a context associated with a + freshly-created task. *) + +val assert_permission_name : __context:Context.t -> permission:string -> unit +(** Performs a dry run of the [check] function with a no-op action + guarded by the provided permission (as a name). *) + +val assert_permission : + __context:Context.t -> permission:Db_actions.role_t -> unit +(** Performs a dry run of the [check] function with a no-op action + guarded by the provided permission (as a database role). *) + +val has_permission : __context:Context.t -> permission:Db_actions.role_t -> bool +(** [has_permission ctx p] determines if the session associated with + the context [ctx] is authorised to perform a specific action. + + [p] is of the type defined by the generated [Db_actions] module, + as [Xapi_role] simulates a database for the checking of static + role sets (as emitted in [Rbac_static]) and only appeals to the + xapi DB for additional roles. *) + +val is_rbac_enabled_for_http_action : string -> bool +(** [is_rbac_enabled_for_http_action route] determines whether RBAC + checks should be applied to the provided HTTP [route]. + + Some routes are precluded from RBAC checks because they are + assumed to only be used by code paths where RBAC has already been + checked or will be checked internally (e.g. /post_cli). *) + +val permission_of_action : + ?args:(string * Rpc.t) list -> keys:string list -> string -> string +(** Constructs the name of a permission associated with using an + RBAC-protected key with a specified action. + + For example, if [keys] specifies "folder" as a protected key name + for the action SR.remove_from_other_config, the permission name + associated with that is "SR.remove_from_other_config/key:folder" + - which is consistent with the format that [Rbac_static] contains. *) + +val nofn : unit -> unit +(** Named function that does nothing, e.g. (fun _ -> ()). + Used as a dummy action for RBAC checking. *) + +val destroy_session_permissions_tbl : session_id:[`session] Ref.t -> unit +(** Removes any cached permission set for the given session. This is + called when xapi destroys the DB entry for a session. *) diff --git a/quality-gate.sh b/quality-gate.sh index 16a90270b1..db8444b53e 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=499 + N=498 # do not count ml files from the tests in ocaml/{tests/perftest/quicktest} MLIS=$(git ls-files -- '**/*.mli' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) MLS=$(git ls-files -- '**/*.ml' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;)