Skip to content

Commit

Permalink
CA-386552 XSI-1534 Failed to disable pool HA after missing HA statefile
Browse files Browse the repository at this point in the history
Backport 5a639b1

The issue arises because:

* xapi_ha.ml, function disable_internal
* When a static VDI is removed from an SR (manually by deleting it from
  an NFS server, for example)
* As the SR is re-scanned, it is also removed from the xapi database
* But the re-scan does not remove it from the list of static VDIs in
  /etc/xensource/static-vidis
* when the function is called, it obtains the list of static VDIs (the
  UUIds)
* and looks them up in the database - which fails
* static VDIs references are further listed in Pool.ha_statefile and can
  be stale.

This patch hardens the code path to avoid failure when the statefile VDI
has been removed manually.

Signed-off-by: Christian Lindig <[email protected]>
  • Loading branch information
Christian Lindig authored and lindig committed Aug 19, 2024
1 parent 5d1d9ef commit 8744458
Showing 1 changed file with 20 additions and 9 deletions.
29 changes: 20 additions & 9 deletions ocaml/xapi/xapi_ha.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,7 @@ let emergency_ha_disable __context soft =
)

let ha_release_resources __context localhost =
let __FUNCTION__ = "ha_release_resources" in
Monitor.stop () ;

(* Why aren't we calling Xha_statefile.detach_existing_statefiles?
Expand All @@ -1168,14 +1169,17 @@ let ha_release_resources __context localhost =
let statefile_vdis =
Db.Pool.get_ha_statefiles ~__context ~self:(Helpers.get_pool ~__context)
and deactivate_and_detach_vdi vdi_str =
let uuid = Db.VDI.get_uuid ~__context ~self:(Ref.of_string vdi_str) in
Helpers.log_exn_continue
(Printf.sprintf "detaching statefile VDI uuid: %s" uuid)
(fun () ->
Static_vdis.permanent_vdi_deactivate_by_uuid ~__context ~uuid ;
Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid
)
()
match Db.VDI.get_uuid ~__context ~self:(Ref.of_string vdi_str) with
| uuid ->
Helpers.log_exn_continue
(Printf.sprintf "detaching statefile VDI uuid: %s" uuid)
(fun () ->
Static_vdis.permanent_vdi_deactivate_by_uuid ~__context ~uuid ;
Static_vdis.permanent_vdi_detach_by_uuid ~__context ~uuid
)
()
| exception _e ->
warn "%s: VDI %s not found in database" __FUNCTION__ vdi_str
in
List.iter deactivate_and_detach_vdi statefile_vdis ;
(* Deactivate and detach any metadata VDIs *)
Expand Down Expand Up @@ -1516,9 +1520,16 @@ let abort_new_master ~__context ~address =
let disable_internal __context =
debug "Disabling HA on the Pool" ;
let pool = Helpers.get_pool ~__context in
(* Avoid stale static VDIs *)
Static_vdis.gc () ;
(* Find the HA metadata and statefile VDIs for later *)
let statefile_vdis =
List.map Ref.of_string (Db.Pool.get_ha_statefiles ~__context ~self:pool)
let is_valid ref =
if Db.is_valid_ref __context ref then Some ref else None
in
Db.Pool.get_ha_statefiles ~__context ~self:pool
|> List.map Ref.of_string
|> List.filter_map is_valid
in
let metadata_vdis =
List.map
Expand Down

0 comments on commit 8744458

Please sign in to comment.