Skip to content

Commit

Permalink
CA-388210: SMAPIv3 debugging: log PID
Browse files Browse the repository at this point in the history
Log PID on successful and failed operations, and log full cmdline for newly spawned processes.

This can be used to debug stuck scripts, so that we know which invocation is the one that is stuck.

Signed-off-by: Edwin Török <[email protected]>
  • Loading branch information
edwintorok committed Nov 18, 2024
1 parent b241d2b commit 44ff426
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 17 deletions.
4 changes: 3 additions & 1 deletion ocaml/xapi-storage-script/lib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ module Process = struct

type t = {
exit_status: (unit, exit_or_signal) Result.t
; pid: int
; stdout: string
; stderr: string
}
Expand Down Expand Up @@ -176,6 +177,7 @@ module Process = struct
let run ~env ~prog ~args ~input =
let ( let@ ) f x = f x in
let@ p = with_process ~env ~prog ~args in
let pid = p#pid in
let sender = send p#stdin input in
let receiver_out = receive p#stdout in
let receiver_err = receive p#stderr in
Expand All @@ -185,7 +187,7 @@ module Process = struct
Lwt.both sender receiver >>= fun ((), (stdout, stderr)) ->
p#status >>= fun status ->
let exit_status = Output.exit_or_signal_of_unix status in
Lwt.return {Output.exit_status; stdout; stderr}
Lwt.return {Output.exit_status; pid; stdout; stderr}
)
(function
| Lwt.Canceled as exn ->
Expand Down
3 changes: 2 additions & 1 deletion ocaml/xapi-storage-script/lib.mli
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ module Process : sig

type t = {
exit_status: (unit, exit_or_signal) result
; pid: int
; stdout: string
; stderr: string
}
Expand All @@ -78,7 +79,7 @@ module Process : sig
-> Output.t Lwt.t
(** Runs a cli program, writes [input] into its stdin, then closing the fd,
and finally waits for the program to finish and returns the exit status,
its stdout and stderr. *)
the pid, and its stdout and stderr. *)
end

module DirWatcher : sig
Expand Down
28 changes: 17 additions & 11 deletions ocaml/xapi-storage-script/main.ml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ let domain_of ~dp ~vm' =
| "0" ->
(* SM tries to use this in filesystem paths, so cannot have /,
and systemd might be a bit unhappy with - *)
"u0-" ^ dp |> String.map ~f:(function '/' | '-' -> '_' | c -> c)
"u0-" ^ dp |> String.map (function '/' | '-' -> '_' | c -> c)
| _ ->
vm

Expand Down Expand Up @@ -477,6 +477,8 @@ let fork_exec_rpc :
)
>>>= fun input ->
let input = compat_in input |> Jsonrpc.to_string in
debug (fun m -> m "Running %s" @@ Filename.quote_command script_name args)
>>= fun () ->
Process.run ~env ~prog:script_name ~args ~input >>= fun output ->
let fail_because ~cause description =
fail
Expand All @@ -500,12 +502,13 @@ let fork_exec_rpc :
with
| Error _ ->
error (fun m ->
m "%s failed and printed bad error json: %s" script_name
output.Process.Output.stdout
m "%s[%d] failed and printed bad error json: %s" script_name
output.pid output.Process.Output.stdout
)
>>= fun () ->
error (fun m ->
m "%s failed, stderr: %s" script_name output.Process.Output.stderr
m "%s[%d] failed, stderr: %s" script_name output.pid
output.Process.Output.stderr
)
>>= fun () ->
fail_because "non-zero exit and bad json on stdout"
Expand All @@ -516,12 +519,12 @@ let fork_exec_rpc :
with
| Error _ ->
error (fun m ->
m "%s failed and printed bad error json: %s" script_name
output.Process.Output.stdout
m "%s[%d] failed and printed bad error json: %s" script_name
output.pid output.Process.Output.stdout
)
>>= fun () ->
error (fun m ->
m "%s failed, stderr: %s" script_name
m "%s[%d] failed, stderr: %s" script_name output.pid
output.Process.Output.stderr
)
>>= fun () ->
Expand All @@ -532,7 +535,9 @@ let fork_exec_rpc :
)
)
| Error (Signal signal) ->
error (fun m -> m "%s caught a signal and failed" script_name)
error (fun m ->
m "%s[%d] caught a signal and failed" script_name output.pid
)
>>= fun () -> fail_because "signalled" ~cause:(Signal.to_string signal)
| Ok () -> (
(* Parse the json on stdout. We get back a JSON-RPC
Expand All @@ -544,8 +549,8 @@ let fork_exec_rpc :
with
| Error _ ->
error (fun m ->
m "%s succeeded but printed bad json: %s" script_name
output.Process.Output.stdout
m "%s[%d] succeeded but printed bad json: %s" script_name
output.pid output.Process.Output.stdout
)
>>= fun () ->
fail
Expand All @@ -554,7 +559,8 @@ let fork_exec_rpc :
)
| Ok response ->
info (fun m ->
m "%s succeeded: %s" script_name output.Process.Output.stdout
m "%s[%d] succeeded: %s" script_name output.pid
output.Process.Output.stdout
)
>>= fun () ->
let response = compat_out response in
Expand Down
25 changes: 21 additions & 4 deletions ocaml/xapi-storage-script/test_lib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,20 @@ let test_run_status =
let module P = Process in
let test () =
let* output = P.run ~prog:"true" ~args:[] ~input:"" ~env:[] in
let expected = P.Output.{exit_status= Ok (); stdout= ""; stderr= ""} in
let expected =
P.Output.{exit_status= Ok (); pid= output.pid; stdout= ""; stderr= ""}
in
Alcotest.(check output_c) "Exit status is correct" expected output ;

let* output = P.run ~prog:"false" ~args:[] ~input:"" ~env:[] in
let expected =
P.Output.{exit_status= Error (Exit_non_zero 1); stdout= ""; stderr= ""}
P.Output.
{
exit_status= Error (Exit_non_zero 1)
; pid= output.pid
; stdout= ""
; stderr= ""
}
in
Alcotest.(check output_c) "Exit status is correct" expected output ;

Expand All @@ -121,15 +129,24 @@ let test_run_output =
let test () =
let content = "@@@@@@" in
let* output = P.run ~prog:"cat" ~args:["-"] ~input:content ~env:[] in
let expected = P.Output.{exit_status= Ok (); stdout= content; stderr= ""} in
let expected =
P.Output.
{exit_status= Ok (); pid= output.pid; stdout= content; stderr= ""}
in
Alcotest.(check output_c) "Stdout is correct" expected output ;

let* output = P.run ~prog:"cat" ~args:[content] ~input:content ~env:[] in
let stderr =
Printf.sprintf "cat: %s: No such file or directory\n" content
in
let expected =
P.Output.{exit_status= Error (Exit_non_zero 1); stdout= ""; stderr}
P.Output.
{
exit_status= Error (Exit_non_zero 1)
; pid= output.pid
; stdout= ""
; stderr
}
in
Alcotest.(check output_c) "Stderr is correct" expected output ;
Lwt.return ()
Expand Down

0 comments on commit 44ff426

Please sign in to comment.