Skip to content

Commit

Permalink
CP-51683: Make Cluster_health non-exp feature (#6023)
Browse files Browse the repository at this point in the history
Remove all the gating on cluster_health enabled as an experimental
feature now that it is enabled by default.
  • Loading branch information
Vincent-lau authored Oct 21, 2024
2 parents 45d934e + c647985 commit daa9938
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 74 deletions.
23 changes: 9 additions & 14 deletions ocaml/xapi/xapi_cluster.ml
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,12 @@ let create ~__context ~pIF ~cluster_stack ~pool_auto_join ~token_timeout
let hostuuid = Inventory.lookup Inventory._installation_uuid in
let hostname = Db.Host.get_hostname ~__context ~self:host in
let member =
if Xapi_cluster_helpers.cluster_health_enabled ~__context then
Extended
{
ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr)
; hostuuid
; hostname
}
else
IPv4 (ipstr_of_address ip_addr)
Extended
{
ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr)
; hostuuid
; hostname
}
in
let token_timeout_ms = Int64.of_float (token_timeout *. 1000.0) in
let token_timeout_coefficient_ms =
Expand Down Expand Up @@ -298,8 +295,6 @@ let pool_resync ~__context ~self:_ =
find or create a matching cluster_host which is also enabled *)

let cstack_sync ~__context ~self =
if Xapi_cluster_helpers.cluster_health_enabled ~__context then (
debug "%s: sync db data with cluster stack" __FUNCTION__ ;
Watcher.on_corosync_update ~__context ~cluster:self
["Updates due to cluster api calls"]
)
debug "%s: sync db data with cluster stack" __FUNCTION__ ;
Watcher.on_corosync_update ~__context ~cluster:self
["Updates due to cluster api calls"]
43 changes: 18 additions & 25 deletions ocaml/xapi/xapi_cluster_helpers.ml
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,6 @@ let with_cluster_operation ~__context ~(self : [`Cluster] API.Ref.t) ~doc ~op
with _ -> ()
)

let cluster_health_enabled ~__context =
let pool = Helpers.get_pool ~__context in
let restrictions = Db.Pool.get_restrictions ~__context ~self:pool in
List.assoc_opt "restrict_cluster_health" restrictions = Some "false"

let corosync3_enabled ~__context =
let pool = Helpers.get_pool ~__context in
let restrictions = Db.Pool.get_restrictions ~__context ~self:pool in
Expand Down Expand Up @@ -147,23 +142,21 @@ let maybe_generate_alert ~__context ~num_hosts ~hosts_left ~hosts_joined ~quorum
~cls:`Host ~obj_uuid:host_uuid ~body
)
in
if cluster_health_enabled ~__context then (
List.iter (generate_alert false) hosts_left ;
List.iter (generate_alert true) hosts_joined ;
(* only generate this alert when the number of hosts is decreasing *)
if hosts_left <> [] && num_hosts <= quorum then
let pool = Helpers.get_pool ~__context in
let pool_uuid = Db.Pool.get_uuid ~__context ~self:pool in
let name, priority = Api_messages.cluster_quorum_approaching_lost in
let body =
Printf.sprintf
"The cluster is losing quorum: currently %d host(s), need %d host(s) \
for a quorum"
num_hosts quorum
in
Helpers.call_api_functions ~__context (fun rpc session_id ->
ignore
@@ Client.Client.Message.create ~rpc ~session_id ~name ~priority
~cls:`Pool ~obj_uuid:pool_uuid ~body
)
)
List.iter (generate_alert false) hosts_left ;
List.iter (generate_alert true) hosts_joined ;
(* only generate this alert when the number of hosts is decreasing *)
if hosts_left <> [] && num_hosts <= quorum then
let pool = Helpers.get_pool ~__context in
let pool_uuid = Db.Pool.get_uuid ~__context ~self:pool in
let name, priority = Api_messages.cluster_quorum_approaching_lost in
let body =
Printf.sprintf
"The cluster is losing quorum: currently %d host(s), need %d host(s) \
for a quorum"
num_hosts quorum
in
Helpers.call_api_functions ~__context (fun rpc session_id ->
ignore
@@ Client.Client.Message.create ~rpc ~session_id ~name ~priority
~cls:`Pool ~obj_uuid:pool_uuid ~body
)
34 changes: 14 additions & 20 deletions ocaml/xapi/xapi_cluster_host.ml
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,12 @@ let join_internal ~__context ~self =
let host = Db.Cluster_host.get_host ~__context ~self in
let hostname = Db.Host.get_hostname ~__context ~self:host in
let member =
if Xapi_cluster_helpers.cluster_health_enabled ~__context then
Extended
{
ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr)
; hostuuid
; hostname
}
else
IPv4 (ipstr_of_address ip_addr)
Extended
{
ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr)
; hostuuid
; hostname
}
in
let ip_list =
List.filter_map
Expand Down Expand Up @@ -341,17 +338,14 @@ let enable ~__context ~self =
let hostuuid = Inventory.lookup Inventory._installation_uuid in
let hostname = Db.Host.get_hostname ~__context ~self:host in
let member =
if Xapi_cluster_helpers.cluster_health_enabled ~__context then
Cluster_interface.(
Extended
{
ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr)
; hostuuid
; hostname
}
)
else
Cluster_interface.(IPv4 (ipstr_of_address ip_addr))
Cluster_interface.(
Extended
{
ip= Ipaddr.of_string_exn (ipstr_of_address ip_addr)
; hostuuid
; hostname
}
)
in
let cluster_ref = Db.Cluster_host.get_cluster ~__context ~self in
let cluster_stack =
Expand Down
28 changes: 13 additions & 15 deletions ocaml/xapi/xapi_clustering.ml
Original file line number Diff line number Diff line change
Expand Up @@ -675,21 +675,19 @@ module Watcher = struct
let is_master = Helpers.is_pool_master ~__context ~host in
let daemon_enabled = Daemon.is_enabled () in
if is_master && daemon_enabled then (
if Xapi_cluster_helpers.cluster_health_enabled ~__context then
if Atomic.compare_and_set cluster_change_watcher false true then (
debug "%s: create watcher for corosync-notifyd on coordinator"
__FUNCTION__ ;
Atomic.set finish_watch false ;
let _ : Thread.t =
Thread.create (fun () -> watch_cluster_change ~__context ~host) ()
in
()
) else
(* someone else must have gone into the if branch above and created the thread
before us, leave it to them *)
debug
"%s: not create watcher for corosync-notifyd as it already exists"
__FUNCTION__ ;
if Atomic.compare_and_set cluster_change_watcher false true then (
debug "%s: create watcher for corosync-notifyd on coordinator"
__FUNCTION__ ;
Atomic.set finish_watch false ;
let _ : Thread.t =
Thread.create (fun () -> watch_cluster_change ~__context ~host) ()
in
()
) else
(* someone else must have gone into the if branch above and created the thread
before us, leave it to them *)
debug "%s: not create watcher for corosync-notifyd as it already exists"
__FUNCTION__ ;

if Xapi_cluster_helpers.corosync3_enabled ~__context then
if Atomic.compare_and_set cluster_stack_watcher false true then (
Expand Down

0 comments on commit daa9938

Please sign in to comment.