From 27018c13fe781373ebf1c7bb31af256083e6642b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 20 Aug 2024 17:49:24 +0100 Subject: [PATCH 1/5] CP-49141: add OCaml timeslice setter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Uses Gc.Memprof to run a hook function periodically. This checks whether we are holding any locks, and if not and sufficient time has elapsed since the last, then we yield. POSIX timers wouldn't work here, because they deliver signals, and there are too many places in XAPI that don't handle EINTR properly. Signed-off-by: Edwin Török --- ocaml/libs/timeslice/dune | 5 ++++ ocaml/libs/timeslice/timeslice.ml | 48 ++++++++++++++++++++++++++++++ ocaml/libs/timeslice/timeslice.mli | 30 +++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 ocaml/libs/timeslice/dune create mode 100644 ocaml/libs/timeslice/timeslice.ml create mode 100644 ocaml/libs/timeslice/timeslice.mli diff --git a/ocaml/libs/timeslice/dune b/ocaml/libs/timeslice/dune new file mode 100644 index 00000000000..aa525bb84e2 --- /dev/null +++ b/ocaml/libs/timeslice/dune @@ -0,0 +1,5 @@ +(library + (name xapi_timeslice) + (package xapi) + (libraries threads.posix mtime mtime.clock.os) +) diff --git a/ocaml/libs/timeslice/timeslice.ml b/ocaml/libs/timeslice/timeslice.ml new file mode 100644 index 00000000000..45e53574585 --- /dev/null +++ b/ocaml/libs/timeslice/timeslice.ml @@ -0,0 +1,48 @@ +(* avoid allocating an extra option every time *) +let invalid_holder = -1 + +let last_lock_holder = Atomic.make invalid_holder + +let me () = Thread.self () |> Thread.id + +let lock_acquired () = + (* these need to be very low overhead, so just keep track of the last lock holder, + i.e. track only one high-priority lock at a time + *) + Atomic.set last_lock_holder (me ()) + +let lock_released () = Atomic.set last_lock_holder invalid_holder + +let[@inline always] am_i_holding_locks () = + let last = Atomic.get last_lock_holder in + last <> invalid_holder && last = me () + +let yield_interval = Atomic.make Mtime.Span.zero + +(* TODO: use bechamel.monotonic-clock instead, which has lower overhead, + but not in the right place in xs-opam yet +*) +let last_yield = Atomic.make (Mtime_clock.counter ()) + +let periodic_hook (_ : Gc.Memprof.allocation) = + ( if not (am_i_holding_locks ()) then + let elapsed = Mtime_clock.count (Atomic.get last_yield) in + if Mtime.Span.compare elapsed (Atomic.get yield_interval) > 0 then ( + let now = Mtime_clock.counter () in + Atomic.set last_yield now ; Thread.yield () + ) + ) ; + None + +let periodic = + Gc.Memprof. + {null_tracker with alloc_minor= periodic_hook; alloc_major= periodic_hook} + +let set ?(sampling_rate = 1e-4) interval = + Atomic.set yield_interval + (Mtime.Span.of_float_ns @@ (interval *. 1e9) |> Option.get) ; + Gc.Memprof.start ~sampling_rate ~callstack_size:0 periodic + +let clear () = + Gc.Memprof.stop () ; + Atomic.set yield_interval Mtime.Span.zero diff --git a/ocaml/libs/timeslice/timeslice.mli b/ocaml/libs/timeslice/timeslice.mli new file mode 100644 index 00000000000..0b8f0bf1c6c --- /dev/null +++ b/ocaml/libs/timeslice/timeslice.mli @@ -0,0 +1,30 @@ +val set : ?sampling_rate:float -> float -> unit +(** [set ?sampling_rate interval] calls [Thread.yield ()] at most [interval] seconds. + + The implementation of [Thread.yield] guarantees since OCaml 4.09 that we'll switch to a different OCaml thread, + if one exists that is not blocked (i.e. it doesn't rely on [sched_yield] which may run the same thread again, + but uses pthread mutexes and condition variables to ensure the current thread isn't immediately runnable). + + The setting is global for the entire process, and currently uses [Gc.Memprof] to ensure that a hook function is called periodically, + although it depends on the allocation rate of the program whether it gets called at all. + + Another alternative would be to use {!val:Unix.set_itimer}, but XAPI doesn't cope with [EINTR] in a lot of places, + and POSIX interval timers rely on signals to notify of elapsed time. + + We could also have a dedicated thread that sleeps for a certain amount of time, but if it is an OCaml thread, + we'd have no guarantees it'd get scheduled often enough (and it couldn't interrupt other threads anyway, + by the time you'd be running the handler you already gave up running something else). + + It may be desirable to avoid yielding if we are currently holding a lock, see {!val:lock_acquired}, and {!val:lock_released} + to notify this module when that happens. +*) + +val clear : unit -> unit +(** [clear ()] undoes the changes made by [set]. + This is useful for testing multiple timeslices in the same program. *) + +val lock_acquired : unit -> unit +(** [lock_acquired ()] notifies about lock acquisition. *) + +val lock_released : unit -> unit +(** [lock_acquired ()] notifies about lock release. *) From 4d1b7d25f165bcfda4faba92d3480da00492b57e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 20 Aug 2024 18:05:19 +0100 Subject: [PATCH 2/5] CP-52709: add timeslice configuration to all services MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And apply on startup. Signed-off-by: Edwin Török --- ocaml/libs/timeslice/dune | 2 +- ocaml/xapi-idl/lib/dune | 1 + ocaml/xapi-idl/lib/xcp_service.ml | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/ocaml/libs/timeslice/dune b/ocaml/libs/timeslice/dune index aa525bb84e2..5e946a66b21 100644 --- a/ocaml/libs/timeslice/dune +++ b/ocaml/libs/timeslice/dune @@ -1,5 +1,5 @@ (library (name xapi_timeslice) - (package xapi) + (package xapi-idl) (libraries threads.posix mtime mtime.clock.os) ) diff --git a/ocaml/xapi-idl/lib/dune b/ocaml/xapi-idl/lib/dune index fed65ab1257..8f0d7ca27de 100644 --- a/ocaml/xapi-idl/lib/dune +++ b/ocaml/xapi-idl/lib/dune @@ -26,6 +26,7 @@ unix uri uuidm + xapi_timeslice xapi-backtrace xapi-consts xapi-log diff --git a/ocaml/xapi-idl/lib/xcp_service.ml b/ocaml/xapi-idl/lib/xcp_service.ml index 01c65bc49fb..b9ec0313c0c 100644 --- a/ocaml/xapi-idl/lib/xcp_service.ml +++ b/ocaml/xapi-idl/lib/xcp_service.ml @@ -163,6 +163,25 @@ let setify = in loop [] +(** How long to let an OCaml thread run, before + switching to another thread. + This needs to be as small as possible to reduce latency. + + Too small values reduce performance due to context switching overheads + + 4ms = 1/HZ in Dom0 seems like a good default, + a better value will be written by a boot time service. + *) +let timeslice = ref 0.05 + +let apply_timeslice () = + let interval = !timeslice in + D.debug "Setting timeslice to %.3fs" interval ; + if interval >= 0.05 then + D.debug "Timeslice same as or larger than default: not setting" + else + Xapi_timeslice.Timeslice.set interval + let common_options = [ ( "use-switch" @@ -236,6 +255,11 @@ let common_options = , (fun () -> !config_dir) , "Location of directory containing configuration file fragments" ) + ; ( "timeslice" + , Arg.Set_float timeslice + , (fun () -> Printf.sprintf "%.3f" !timeslice) + , "timeslice in seconds" + ) ] let loglevel () = !log_level @@ -454,6 +478,7 @@ let configure_common ~options ~resources arg_parse_fn = failwith (String.concat "\n" lines) ) resources ; + apply_timeslice () ; Sys.set_signal Sys.sigpipe Sys.Signal_ignore let configure ?(argv = Sys.argv) ?(options = []) ?(resources = []) () = From 764cbf1777df048b5e7e1bad7ff13698a2042d80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 20 Aug 2024 18:32:30 +0100 Subject: [PATCH 3/5] CP-52709: add simple measurement code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/libs/timeslice/simple_measure.ml | 47 +++++++++++++++++++++++++ ocaml/libs/timeslice/simple_measure.mli | 33 +++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 ocaml/libs/timeslice/simple_measure.ml create mode 100644 ocaml/libs/timeslice/simple_measure.mli diff --git a/ocaml/libs/timeslice/simple_measure.ml b/ocaml/libs/timeslice/simple_measure.ml new file mode 100644 index 00000000000..9ca069b0e54 --- /dev/null +++ b/ocaml/libs/timeslice/simple_measure.ml @@ -0,0 +1,47 @@ +(** 95% confidence interval, and median value *) +type t = {low: float; median: float; high: float} + +let span_to_s s = Mtime.Span.to_float_ns s *. 1e-9 + +let ci95 measurements = + let n = Array.length measurements in + Array.sort Float.compare measurements ; + let median = measurements.(n / 2) in + (* "Performance Evaluation of Computer and Communication Systems", Table A. 1 *) + let n = float n in + let d = 0.98 *. sqrt n in + let lo = (n /. 2.) -. d |> Float.to_int + and hi = (n /. 2.) +. 1. +. d |> Float.ceil |> Float.to_int in + {low= measurements.(lo - 1); median; high= measurements.(hi - 1)} + +let measure ?(n = 1001) ?(inner = 10) f = + if n <= 70 then (* some of the formulas below are not valid for smaller [n] *) + invalid_arg (Printf.sprintf "n must be at least 70: %d" n) ; + (* warmup *) + Sys.opaque_identity (f ()) ; + + let measure_inner _ = + let m = Mtime_clock.counter () in + for _ = 1 to inner do + (* opaque_identity prevents the call from being optimized away *) + Sys.opaque_identity (f ()) + done ; + let elapsed = Mtime_clock.count m in + span_to_s elapsed /. float inner + in + let measurements = Array.init n measure_inner in + ci95 measurements + +let measure_min ?(n = 1001) f arg = + (* warmup *) + Sys.opaque_identity (f arg) ; + let measure_one _ = + let m = Mtime_clock.counter () in + Sys.opaque_identity (f arg) ; + let elapsed = Mtime_clock.count m in + span_to_s elapsed + in + Seq.ints 0 + |> Seq.take n + |> Seq.map measure_one + |> Seq.fold_left Float.min Float.max_float diff --git a/ocaml/libs/timeslice/simple_measure.mli b/ocaml/libs/timeslice/simple_measure.mli new file mode 100644 index 00000000000..ec6d782e91a --- /dev/null +++ b/ocaml/libs/timeslice/simple_measure.mli @@ -0,0 +1,33 @@ +(** Measure the speed of an operation in a very simple and robust way. + More detailed measurements can be dune using [Bechamel]. +*) + +(** 95% confidence interval, and median value *) +type t = {low: float; median: float; high: float} + +val measure : ?n:int -> ?inner:int -> (unit -> unit) -> t +(** [measure ?n ?inner f] measures [n] times the duration of [inner] iterations of [f ()]. + + Returns the median of the inner measurements, and a 95% confidence interval. + The median is used, because it makes no assumptions about the distribution of the samples, + i.e. it doesn't require a normal (Gaussian) distribution. + + The inner measurements use a simple average, because we only know the duration of [inner] iterations, + not the duration of each individual call to [f ()]. + The purpose of the [inner] iterations is to reduce measurement overhead. + + @param n iteration count for the outer loop, must be more than [70]. + @param n iteration count for the inner loop + @param f function to measure + + @raises Invalid_argument if [n<70] + *) + +val measure_min : ?n:int -> ('a -> unit) -> 'a -> float +(** [measure_min ?n:int f arg] is the minimum amount of time that [f arg] takes. + + This should be used when we try to measure the maximum speed of some operation (e.g. cached memory accesses), + while ignoring latencies/hickups introduced by other processes on the system. + + It shouldn't be used for measuring the overhead of an operation, because the hickups may be part of that overhead. + *) From b30e7685bf9cd293eb932597cdfa8a6160c55318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Tue, 20 Aug 2024 18:52:12 +0100 Subject: [PATCH 4/5] CP-52709: recommended measurement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/libs/timeslice/dune | 2 +- ocaml/libs/timeslice/recommended.ml | 34 ++++++++++++++++++++++++++++ ocaml/libs/timeslice/recommended.mli | 8 +++++++ 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 ocaml/libs/timeslice/recommended.ml create mode 100644 ocaml/libs/timeslice/recommended.mli diff --git a/ocaml/libs/timeslice/dune b/ocaml/libs/timeslice/dune index 5e946a66b21..94eff6b3a39 100644 --- a/ocaml/libs/timeslice/dune +++ b/ocaml/libs/timeslice/dune @@ -1,5 +1,5 @@ (library (name xapi_timeslice) (package xapi-idl) - (libraries threads.posix mtime mtime.clock.os) + (libraries threads.posix mtime mtime.clock.os xapi-log) ) diff --git a/ocaml/libs/timeslice/recommended.ml b/ocaml/libs/timeslice/recommended.ml new file mode 100644 index 00000000000..489889e0f56 --- /dev/null +++ b/ocaml/libs/timeslice/recommended.ml @@ -0,0 +1,34 @@ +module D = Debug.Make (struct let name = "timeslice_recommended" end) + +let yield_stop = Atomic.make false + +let yield_worker () = + while not (Atomic.get yield_stop) do + Thread.yield () + done + +let yield_overhead () = + (* Thread.yield only has an effect if another thread exists, + so create one that yields back immediately *) + D.debug "Measuring Thread.yield overhead" ; + Atomic.set yield_stop false ; + let t = Thread.create yield_worker () in + let measured = Simple_measure.measure Thread.yield in + D.debug "Thread.yield overhead: %.6fs <= %.6fs <= %.6fs" measured.low + measured.median measured.high ; + D.debug "Waiting for worker thread to stop" ; + Atomic.set yield_stop true ; + Thread.join t ; + measured.median + +let measure ?(max_overhead_percentage = 1.0) () = + let overhead = yield_overhead () in + let interval = overhead /. (max_overhead_percentage /. 100.) in + D.debug "Recommended timeslice interval = %.4fs" interval ; + (* Avoid too high or too low intervals: + do not go below 1ms (our HZ is 250, and max is 1000, the kernel would round up anyway) + do not go above 50ms (the current default in OCaml 4.14) + *) + let interval = interval |> Float.max 0.001 |> Float.min 0.050 in + D.debug "Final recommeded timeslice interval = %.4fs" interval ; + interval diff --git a/ocaml/libs/timeslice/recommended.mli b/ocaml/libs/timeslice/recommended.mli new file mode 100644 index 00000000000..ee9e83130ef --- /dev/null +++ b/ocaml/libs/timeslice/recommended.mli @@ -0,0 +1,8 @@ +val measure : ?max_overhead_percentage:float -> unit -> float +(** [measure ?max_overhead_percentage ()] returns the recommended timeslice for the current system. + + The returned value should be used in a call to {!val:Timeslice.set}. + + @param max_overhead_percentage default 1% + @returns [interval] such that [overhead / interval <= max_overhead_percentage / 100] + *) From 572559c6728c5679397f962565ab47b27e402795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 22 Aug 2024 17:42:42 +0100 Subject: [PATCH 5/5] CP-52709: Enable timeslice setting during unit tests by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edwin Török --- ocaml/tests/common/dune | 1 + ocaml/tests/common/suite_init.ml | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ocaml/tests/common/dune b/ocaml/tests/common/dune index 29acca3d2cb..a8ab57a4a23 100644 --- a/ocaml/tests/common/dune +++ b/ocaml/tests/common/dune @@ -28,6 +28,7 @@ xapi-stdext-date xapi-stdext-threads.scheduler xapi-stdext-unix + xapi_timeslice ) ) diff --git a/ocaml/tests/common/suite_init.ml b/ocaml/tests/common/suite_init.ml index e63deae17b5..adb9c501e88 100644 --- a/ocaml/tests/common/suite_init.ml +++ b/ocaml/tests/common/suite_init.ml @@ -11,4 +11,6 @@ let harness_init () = Filename.concat Test_common.working_area "xapi-inventory" ; Xcp_client.use_switch := false ; Pool_role.set_pool_role_for_test () ; - Message_forwarding.register_callback_fns () + Message_forwarding.register_callback_fns () ; + (* for unit tests use a fixed value *) + Xapi_timeslice.Timeslice.set 0.004