Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CA-380580: cross-pool migration - no CPU checks for halted VMs, move CPU check to the target host #6175

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 27 additions & 11 deletions ocaml/xapi/cpuid_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ let threads_per_core = Map_check.(field "threads_per_core" int)

let vendor = Map_check.(field "vendor" string)

let get_flags_for_vm ~__context vm cpu_info =
let get_flags_for_vm ~__context domain_type cpu_info =
let features_field =
match Helpers.domain_type ~__context ~self:vm with
match domain_type with
| `hvm | `pv_in_pvh | `pvh ->
features_hvm
| `pv ->
Expand Down Expand Up @@ -79,36 +79,51 @@ let next_boot_cpu_features ~__context ~vm =
Map_check.getf features_field_boot pool_cpu_info
|> Xenops_interface.CPU_policy.to_string

let get_host_cpu_info ~__context ~vm:_ ~host ?remote () =
let get_host_cpu_info ~__context ~host ?remote () =
match remote with
| None ->
Db.Host.get_cpu_info ~__context ~self:host
| Some (rpc, session_id) ->
Client.Client.Host.get_cpu_info ~rpc ~session_id ~self:host

let get_host_compatibility_info ~__context ~vm ~host ?remote () =
get_host_cpu_info ~__context ~vm ~host ?remote ()
|> get_flags_for_vm ~__context vm
let get_host_compatibility_info ~__context ~domain_type ~host ?remote () =
get_host_cpu_info ~__context ~host ?remote ()
|> get_flags_for_vm ~__context domain_type

(* Compare the CPU on which the given VM was last booted to the CPU of the given host. *)
let assert_vm_is_compatible ~__context ~vm ~host ?remote () =
let assert_vm_is_compatible ~__context ~vm ~host =
let vm_ref, vm_rec, domain_type =
match vm with
| `db self ->
( self
, Db.VM.get_record ~__context ~self
, Helpers.domain_type ~__context ~self
)
| `import (vm_rec, dt) ->
(* Ref.null, because the VM to be imported does not yet have a ref *)
(Ref.null, vm_rec, Helpers.check_domain_type dt)
in
let fail msg =
raise
(Api_errors.Server_error
( Api_errors.vm_incompatible_with_this_host
, [Ref.string_of vm; Ref.string_of host; msg]
, [Ref.string_of vm_ref; Ref.string_of host; msg]
)
)
in
if Db.VM.get_power_state ~__context ~self:vm <> `Halted then
if vm_rec.API.vM_power_state <> `Halted then (
let host_uuid = Db.Host.get_uuid ~__context ~self:host in
debug "Checking CPU compatibility of %s VM %s with host %s"
(Record_util.domain_type_to_string domain_type)
vm_rec.API.vM_uuid host_uuid ;
let open Xapi_xenops_queue in
let module Xenopsd = (val make_client (default_xenopsd ()) : XENOPS) in
let dbg = Context.string_of_task __context in
try
let host_cpu_vendor, host_cpu_features =
get_host_compatibility_info ~__context ~vm ~host ?remote ()
get_host_compatibility_info ~__context ~domain_type ~host ()
in
let vm_cpu_info = Db.VM.get_last_boot_CPU_flags ~__context ~self:vm in
let vm_cpu_info = vm_rec.API.vM_last_boot_CPU_flags in
if List.mem_assoc cpu_info_vendor_key vm_cpu_info then (
(* Check the VM was last booted on a CPU with the same vendor as this host's CPU. *)
let vm_cpu_vendor = List.assoc cpu_info_vendor_key vm_cpu_info in
Expand Down Expand Up @@ -141,3 +156,4 @@ let assert_vm_is_compatible ~__context ~vm ~host ?remote () =
fail
"Host does not have new leveling feature keys - not comparing VM's \
flags"
)
8 changes: 4 additions & 4 deletions ocaml/xapi/cpuid_helpers.mli
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ val next_boot_cpu_features : __context:Context.t -> vm:[`VM] API.Ref.t -> string

val assert_vm_is_compatible :
__context:Context.t
-> vm:[`VM] API.Ref.t
-> vm:[`db of [`VM] API.Ref.t | `import of API.vM_t * API.domain_type]
-> host:[`host] API.Ref.t
-> ?remote:(Rpc.call -> Rpc.response Client.Id.t) * [< `session] Ref.t
-> unit
-> unit
(** Checks whether the CPU vendor and features used by the VM are compatible
with the given host. The VM can be one that is currently in the DB, or a record
coming from a metadata import as used for cross-pool migration. *)

val vendor : string Map_check.field

Expand All @@ -42,7 +43,6 @@ val features_hvm_host : [`host] Xenops_interface.CPU_policy.t Map_check.field

val get_host_cpu_info :
__context:Context.t
-> vm:[`VM] API.Ref.t
-> host:[`host] API.Ref.t
-> ?remote:(Rpc.call -> Rpc.response Client.Id.t) * [< `session] Ref.t
-> unit
Expand Down
19 changes: 17 additions & 2 deletions ocaml/xapi/export.ml
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ let rec update_table ~__context ~include_snapshots ~preserve_power_state
&& Db.is_valid_ref __context vdi
then
add_vdi vdi ;
(* Add also the guest metrics *)
(* Add also the metrics and guest metrics *)
add vm.API.vM_metrics ;
add vm.API.vM_guest_metrics ;
(* Add the hosts links *)
add vm.API.vM_resident_on ;
Expand Down Expand Up @@ -264,7 +265,7 @@ let make_vm ?(with_snapshot_metadata = false) ~preserve_power_state table
; API.vM_resident_on= lookup table (Ref.string_of vm.API.vM_resident_on)
; API.vM_affinity= lookup table (Ref.string_of vm.API.vM_affinity)
; API.vM_consoles= []
; API.vM_metrics= Ref.null
; API.vM_metrics= lookup table (Ref.string_of vm.API.vM_metrics)
; API.vM_guest_metrics= lookup table (Ref.string_of vm.API.vM_guest_metrics)
; API.vM_protection_policy= Ref.null
; API.vM_bios_strings= vm.API.vM_bios_strings
Expand All @@ -277,6 +278,15 @@ let make_vm ?(with_snapshot_metadata = false) ~preserve_power_state table
; snapshot= API.rpc_of_vM_t vm
}

(** Convert a VM metrics reference to an obj *)
let make_vmm table __context self =
let vmm = Db.VM_metrics.get_record ~__context ~self in
{
cls= Datamodel_common._vm_metrics
; id= Ref.string_of (lookup table (Ref.string_of self))
; snapshot= API.rpc_of_vM_metrics_t vmm
}

(** Convert a guest-metrics reference to an obj *)
let make_gm table __context self =
let gm = Db.VM_guest_metrics.get_record ~__context ~self in
Expand Down Expand Up @@ -506,6 +516,10 @@ let make_all ~with_snapshot_metadata ~preserve_power_state table __context =
(make_vm ~with_snapshot_metadata ~preserve_power_state table __context)
(filter table (Db.VM.get_all ~__context))
in
let vmms =
List.map (make_vmm table __context)
(filter table (Db.VM_metrics.get_all ~__context))
in
let gms =
List.map (make_gm table __context)
(filter table (Db.VM_guest_metrics.get_all ~__context))
Expand Down Expand Up @@ -566,6 +580,7 @@ let make_all ~with_snapshot_metadata ~preserve_power_state table __context =
[
hosts
; vms
; vmms
; gms
; vbds
; vifs
Expand Down
86 changes: 81 additions & 5 deletions ocaml/xapi/import.ml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ type metadata_options = {
* - If the migration is for real, we will expect the VM export code on the source host to have mapped the VDI locations onto their
* mirrored counterparts which are present on this host. *)
live: bool
; check_cpu: bool
; (* An optional src VDI -> destination VDI rewrite list *)
vdi_map: (string * string) list
}
Expand All @@ -75,6 +76,13 @@ type config = {
let is_live config =
match config.import_type with Metadata_import {live; _} -> live | _ -> false

let needs_cpu_check config =
match config.import_type with
| Metadata_import {check_cpu; _} ->
check_cpu
| _ ->
false

(** List of (datamodel classname * Reference in export * Reference in database) *)
type table = (string * string * string) list

Expand Down Expand Up @@ -253,8 +261,8 @@ let assert_can_restore_backup ~__context rpc session_id (x : header) =
import_vms

let assert_can_live_import __context vm_record =
let host = Helpers.get_localhost ~__context in
let assert_memory_available () =
let host = Helpers.get_localhost ~__context in
let host_mem_available =
Memory_check.host_compute_free_memory_with_maximum_compression ~__context
~host None
Expand Down Expand Up @@ -416,7 +424,7 @@ module VM : HandlerTools = struct
| Skip
| Clean_import of API.vM_t

let precheck __context config _rpc _session_id _state x =
let precheck __context config _rpc _session_id state x =
let vm_record = get_vm_record x.snapshot in
let is_default_template =
vm_record.API.vM_is_default_template
Expand Down Expand Up @@ -511,6 +519,28 @@ module VM : HandlerTools = struct
| Replace (_, vm_record) | Clean_import vm_record ->
if is_live config then
assert_can_live_import __context vm_record ;
( if needs_cpu_check config then
let vmm_record =
find_in_export
(Ref.string_of vm_record.API.vM_metrics)
state.export
|> API.vM_metrics_t_of_rpc
in
let host = Helpers.get_localhost ~__context in
(* Don't check CPUID on 'unspecified' snapshots *)
match
( vm_record.API.vM_is_a_snapshot
, vmm_record.API.vM_metrics_current_domain_type
)
with
| true, `unspecified ->
(* A snapshot which was taken from a shutdown VM. *)
()
| _, dt ->
Cpuid_helpers.assert_vm_is_compatible ~__context
~vm:(`import (vm_record, dt))
~host
) ;
import_action
| _ ->
import_action
Expand Down Expand Up @@ -730,6 +760,15 @@ module VM : HandlerTools = struct
then
Db.VM.set_suspend_SR ~__context ~self:vm ~value:Ref.null ;
Db.VM.set_parent ~__context ~self:vm ~value:vm_record.API.vM_parent ;
( try
let vmm = lookup vm_record.API.vM_metrics state.table in
(* We have VM_metrics in the imported metadata, so use it, and destroy
the record created by VM.create_from_record above. *)
let replaced_vmm = Db.VM.get_metrics ~__context ~self:vm in
Db.VM.set_metrics ~__context ~self:vm ~value:vmm ;
Db.VM_metrics.destroy ~__context ~self:replaced_vmm
with _ -> ()
) ;
( try
let gm = lookup vm_record.API.vM_guest_metrics state.table in
Db.VM.set_guest_metrics ~__context ~self:vm ~value:gm
Expand Down Expand Up @@ -805,6 +844,40 @@ module GuestMetrics : HandlerTools = struct
state.table <- (x.cls, x.id, Ref.string_of gm) :: state.table
end

(** Create the VM metrics *)
module Metrics : HandlerTools = struct
type precheck_t = OK

let precheck __context _config _rpc _session_id _state _x = OK

let handle_dry_run __context _config _rpc _session_id state x _precheck_result
=
let dummy_gm = Ref.make () in
state.table <- (x.cls, x.id, Ref.string_of dummy_gm) :: state.table

let handle __context _config _rpc _session_id state x _precheck_result =
let vmm_record = API.vM_metrics_t_of_rpc x.snapshot in
let vmm = Ref.make () in
Db.VM_metrics.create ~__context ~ref:vmm
~uuid:(Uuidx.to_string (Uuidx.make ()))
~memory_actual:vmm_record.API.vM_metrics_memory_actual
~vCPUs_number:vmm_record.API.vM_metrics_VCPUs_number
~vCPUs_utilisation:vmm_record.API.vM_metrics_VCPUs_utilisation
~vCPUs_CPU:vmm_record.API.vM_metrics_VCPUs_CPU
~vCPUs_params:vmm_record.API.vM_metrics_VCPUs_params
~vCPUs_flags:vmm_record.API.vM_metrics_VCPUs_flags
~state:vmm_record.API.vM_metrics_state
~start_time:vmm_record.API.vM_metrics_start_time
~install_time:vmm_record.API.vM_metrics_install_time
~last_updated:vmm_record.API.vM_metrics_last_updated
~other_config:vmm_record.API.vM_metrics_other_config
~hvm:vmm_record.API.vM_metrics_hvm
~nested_virt:vmm_record.API.vM_metrics_nested_virt
~nomigrate:vmm_record.API.vM_metrics_nomigrate
~current_domain_type:vmm_record.API.vM_metrics_current_domain_type ;
state.table <- (x.cls, x.id, Ref.string_of vmm) :: state.table
end

(** If we're restoring VM metadata only then lookup the SR by uuid. If we can't find
the SR then we will still try to match VDIs later (except CDROMs) *)
module SR : HandlerTools = struct
Expand Down Expand Up @@ -1910,6 +1983,7 @@ module HostHandler = MakeHandler (Host)
module SRHandler = MakeHandler (SR)
module VDIHandler = MakeHandler (VDI)
module GuestMetricsHandler = MakeHandler (GuestMetrics)
module MetricsHandler = MakeHandler (Metrics)
module VMHandler = MakeHandler (VM)
module NetworkHandler = MakeHandler (Net)
module GPUGroupHandler = MakeHandler (GPUGroup)
Expand All @@ -1928,6 +2002,7 @@ let handlers =
; (Datamodel_common._sr, SRHandler.handle)
; (Datamodel_common._vdi, VDIHandler.handle)
; (Datamodel_common._vm_guest_metrics, GuestMetricsHandler.handle)
; (Datamodel_common._vm_metrics, MetricsHandler.handle)
; (Datamodel_common._vm, VMHandler.handle)
; (Datamodel_common._network, NetworkHandler.handle)
; (Datamodel_common._gpu_group, GPUGroupHandler.handle)
Expand Down Expand Up @@ -2265,13 +2340,14 @@ let metadata_handler (req : Request.t) s _ =
let force = find_query_flag req.Request.query "force" in
let dry_run = find_query_flag req.Request.query "dry_run" in
let live = find_query_flag req.Request.query "live" in
let check_cpu = find_query_flag req.Request.query "check_cpu" in
let vdi_map = read_map_params "vdi" req.Request.query in
info
"VM.import_metadata: force = %b; full_restore = %b dry_run = %b; \
live = %b; vdi_map = [ %s ]"
force full_restore dry_run live
live = %b; check_cpu = %b; vdi_map = [ %s ]"
force full_restore dry_run live check_cpu
(String.concat "; " (List.map (fun (a, b) -> a ^ "=" ^ b) vdi_map)) ;
let metadata_options = {dry_run; live; vdi_map} in
let metadata_options = {dry_run; live; vdi_map; check_cpu} in
let config =
{import_type= Metadata_import metadata_options; full_restore; force}
in
Expand Down
4 changes: 3 additions & 1 deletion ocaml/xapi/importexport.ml
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ type vm_export_import = {
; dry_run: bool
; live: bool
; send_snapshots: bool
; check_cpu: bool
}

(* Copy VM metadata to a remote pool *)
Expand All @@ -269,11 +270,12 @@ let remote_metadata_export_import ~__context ~rpc ~session_id ~remote_address
match which with
| `All ->
[]
| `Only {live; dry_run; send_snapshots; _} ->
| `Only {live; dry_run; send_snapshots; check_cpu; _} ->
[
Printf.sprintf "live=%b" live
; Printf.sprintf "dry_run=%b" dry_run
; Printf.sprintf "export_snapshots=%b" send_snapshots
; Printf.sprintf "check_cpu=%b" check_cpu
]
in
let params = Printf.sprintf "restore=%b" restore :: params in
Expand Down
2 changes: 1 addition & 1 deletion ocaml/xapi/message_forwarding.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2473,7 +2473,7 @@ functor
try bool_of_string (List.assoc "force" options) with _ -> false
in
if not force then
Cpuid_helpers.assert_vm_is_compatible ~__context ~vm ~host () ;
Cpuid_helpers.assert_vm_is_compatible ~__context ~vm:(`db vm) ~host ;
let source_host = Db.VM.get_resident_on ~__context ~self:vm in
with_vm_operation ~__context ~self:vm ~doc:"VM.pool_migrate"
~op:`pool_migrate ~strict:(not force) (fun () ->
Expand Down
1 change: 1 addition & 0 deletions ocaml/xapi/xapi_dr.ml
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ let recover_vms ~__context ~vms ~session_to ~force =
{
Import.dry_run= false
; Import.live= false
; check_cpu= false
; vdi_map= [] (* we expect the VDI metadata to be present *)
}
in
Expand Down
2 changes: 1 addition & 1 deletion ocaml/xapi/xapi_vm.ml
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ let resume ~__context ~vm ~start_paused ~force =
) ;
let host = Helpers.get_localhost ~__context in
if not force then
Cpuid_helpers.assert_vm_is_compatible ~__context ~vm ~host () ;
Cpuid_helpers.assert_vm_is_compatible ~__context ~vm:(`db vm) ~host ;
(* Update CPU feature set, which will be passed to xenopsd *)
Xapi_xenops.resume ~__context ~self:vm ~start_paused ~force

Expand Down
4 changes: 2 additions & 2 deletions ocaml/xapi/xapi_vm_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ let assert_matches_control_domain_affinity ~__context ~self ~host =
let assert_enough_pcpus ~__context ~self ~host ?remote () =
let vcpus = Db.VM.get_VCPUs_max ~__context ~self in
let pcpus =
Cpuid_helpers.get_host_cpu_info ~__context ~vm:self ~host ?remote ()
Cpuid_helpers.get_host_cpu_info ~__context ~host ?remote ()
|> Map_check.getf Cpuid_helpers.cpu_count
|> Int64.of_int
in
Expand Down Expand Up @@ -699,7 +699,7 @@ let assert_can_boot_here ~__context ~self ~host ~snapshot ~do_cpuid_check
assert_hardware_platform_support ~__context ~vm:self
~host:(Helpers.LocalObject host) ;
if do_cpuid_check then
Cpuid_helpers.assert_vm_is_compatible ~__context ~vm:self ~host () ;
Cpuid_helpers.assert_vm_is_compatible ~__context ~vm:(`db self) ~host ;
if do_sr_check then
assert_can_see_SRs ~__context ~self ~host ;
assert_can_see_networks ~__context ~self ~host ;
Expand Down
Loading
Loading