From c64798513519132019b28fb50ca6c7f07a445587 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 30 Sep 2024 14:51:55 +0100 Subject: [PATCH] CP-51683: Make Cluster_health non-exp feature Remove all the gating on cluster_health enabled as an experimental feature now that it is enabled by default. Signed-off-by: Vincent Liu --- ocaml/xapi/xapi_cluster.ml | 23 +++++++--------- ocaml/xapi/xapi_cluster_helpers.ml | 43 +++++++++++++----------------- ocaml/xapi/xapi_cluster_host.ml | 34 ++++++++++------------- ocaml/xapi/xapi_clustering.ml | 28 +++++++++---------- 4 files changed, 54 insertions(+), 74 deletions(-) diff --git a/ocaml/xapi/xapi_cluster.ml b/ocaml/xapi/xapi_cluster.ml index 355bf175527..498a0ea4111 100644 --- a/ocaml/xapi/xapi_cluster.ml +++ b/ocaml/xapi/xapi_cluster.ml @@ -65,15 +65,12 @@ let create ~__context ~pIF ~cluster_stack ~pool_auto_join ~token_timeout let hostuuid = Inventory.lookup Inventory._installation_uuid in let hostname = Db.Host.get_hostname ~__context ~self:host in let member = - if Xapi_cluster_helpers.cluster_health_enabled ~__context then - Extended - { - ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr) - ; hostuuid - ; hostname - } - else - IPv4 (ipstr_of_address ip_addr) + Extended + { + ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr) + ; hostuuid + ; hostname + } in let token_timeout_ms = Int64.of_float (token_timeout *. 1000.0) in let token_timeout_coefficient_ms = @@ -298,8 +295,6 @@ let pool_resync ~__context ~self:_ = find or create a matching cluster_host which is also enabled *) let cstack_sync ~__context ~self = - if Xapi_cluster_helpers.cluster_health_enabled ~__context then ( - debug "%s: sync db data with cluster stack" __FUNCTION__ ; - Watcher.on_corosync_update ~__context ~cluster:self - ["Updates due to cluster api calls"] - ) + debug "%s: sync db data with cluster stack" __FUNCTION__ ; + Watcher.on_corosync_update ~__context ~cluster:self + ["Updates due to cluster api calls"] diff --git a/ocaml/xapi/xapi_cluster_helpers.ml b/ocaml/xapi/xapi_cluster_helpers.ml index b46389f8a86..2582790e929 100644 --- a/ocaml/xapi/xapi_cluster_helpers.ml +++ b/ocaml/xapi/xapi_cluster_helpers.ml @@ -104,11 +104,6 @@ let with_cluster_operation ~__context ~(self : [`Cluster] API.Ref.t) ~doc ~op with _ -> () ) -let cluster_health_enabled ~__context = - let pool = Helpers.get_pool ~__context in - let restrictions = Db.Pool.get_restrictions ~__context ~self:pool in - List.assoc_opt "restrict_cluster_health" restrictions = Some "false" - let corosync3_enabled ~__context = let pool = Helpers.get_pool ~__context in let restrictions = Db.Pool.get_restrictions ~__context ~self:pool in @@ -147,23 +142,21 @@ let maybe_generate_alert ~__context ~num_hosts ~hosts_left ~hosts_joined ~quorum ~cls:`Host ~obj_uuid:host_uuid ~body ) in - if cluster_health_enabled ~__context then ( - List.iter (generate_alert false) hosts_left ; - List.iter (generate_alert true) hosts_joined ; - (* only generate this alert when the number of hosts is decreasing *) - if hosts_left <> [] && num_hosts <= quorum then - let pool = Helpers.get_pool ~__context in - let pool_uuid = Db.Pool.get_uuid ~__context ~self:pool in - let name, priority = Api_messages.cluster_quorum_approaching_lost in - let body = - Printf.sprintf - "The cluster is losing quorum: currently %d host(s), need %d host(s) \ - for a quorum" - num_hosts quorum - in - Helpers.call_api_functions ~__context (fun rpc session_id -> - ignore - @@ Client.Client.Message.create ~rpc ~session_id ~name ~priority - ~cls:`Pool ~obj_uuid:pool_uuid ~body - ) - ) + List.iter (generate_alert false) hosts_left ; + List.iter (generate_alert true) hosts_joined ; + (* only generate this alert when the number of hosts is decreasing *) + if hosts_left <> [] && num_hosts <= quorum then + let pool = Helpers.get_pool ~__context in + let pool_uuid = Db.Pool.get_uuid ~__context ~self:pool in + let name, priority = Api_messages.cluster_quorum_approaching_lost in + let body = + Printf.sprintf + "The cluster is losing quorum: currently %d host(s), need %d host(s) \ + for a quorum" + num_hosts quorum + in + Helpers.call_api_functions ~__context (fun rpc session_id -> + ignore + @@ Client.Client.Message.create ~rpc ~session_id ~name ~priority + ~cls:`Pool ~obj_uuid:pool_uuid ~body + ) diff --git a/ocaml/xapi/xapi_cluster_host.ml b/ocaml/xapi/xapi_cluster_host.ml index 9644ca8cd78..e022f75c706 100644 --- a/ocaml/xapi/xapi_cluster_host.ml +++ b/ocaml/xapi/xapi_cluster_host.ml @@ -126,15 +126,12 @@ let join_internal ~__context ~self = let host = Db.Cluster_host.get_host ~__context ~self in let hostname = Db.Host.get_hostname ~__context ~self:host in let member = - if Xapi_cluster_helpers.cluster_health_enabled ~__context then - Extended - { - ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr) - ; hostuuid - ; hostname - } - else - IPv4 (ipstr_of_address ip_addr) + Extended + { + ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr) + ; hostuuid + ; hostname + } in let ip_list = List.filter_map @@ -341,17 +338,14 @@ let enable ~__context ~self = let hostuuid = Inventory.lookup Inventory._installation_uuid in let hostname = Db.Host.get_hostname ~__context ~self:host in let member = - if Xapi_cluster_helpers.cluster_health_enabled ~__context then - Cluster_interface.( - Extended - { - ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr) - ; hostuuid - ; hostname - } - ) - else - Cluster_interface.(IPv4 (ipstr_of_address ip_addr)) + Cluster_interface.( + Extended + { + ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr) + ; hostuuid + ; hostname + } + ) in let cluster_ref = Db.Cluster_host.get_cluster ~__context ~self in let cluster_stack = diff --git a/ocaml/xapi/xapi_clustering.ml b/ocaml/xapi/xapi_clustering.ml index ec6efe81d00..d2b61be2f55 100644 --- a/ocaml/xapi/xapi_clustering.ml +++ b/ocaml/xapi/xapi_clustering.ml @@ -675,21 +675,19 @@ module Watcher = struct let is_master = Helpers.is_pool_master ~__context ~host in let daemon_enabled = Daemon.is_enabled () in if is_master && daemon_enabled then ( - if Xapi_cluster_helpers.cluster_health_enabled ~__context then - if Atomic.compare_and_set cluster_change_watcher false true then ( - debug "%s: create watcher for corosync-notifyd on coordinator" - __FUNCTION__ ; - Atomic.set finish_watch false ; - let _ : Thread.t = - Thread.create (fun () -> watch_cluster_change ~__context ~host) () - in - () - ) else - (* someone else must have gone into the if branch above and created the thread - before us, leave it to them *) - debug - "%s: not create watcher for corosync-notifyd as it already exists" - __FUNCTION__ ; + if Atomic.compare_and_set cluster_change_watcher false true then ( + debug "%s: create watcher for corosync-notifyd on coordinator" + __FUNCTION__ ; + Atomic.set finish_watch false ; + let _ : Thread.t = + Thread.create (fun () -> watch_cluster_change ~__context ~host) () + in + () + ) else + (* someone else must have gone into the if branch above and created the thread + before us, leave it to them *) + debug "%s: not create watcher for corosync-notifyd as it already exists" + __FUNCTION__ ; if Xapi_cluster_helpers.corosync3_enabled ~__context then if Atomic.compare_and_set cluster_stack_watcher false true then (