diff --git a/ocaml/libs/timeslice/dune b/ocaml/libs/timeslice/dune new file mode 100644 index 0000000000..94eff6b3a3 --- /dev/null +++ b/ocaml/libs/timeslice/dune @@ -0,0 +1,5 @@ +(library + (name xapi_timeslice) + (package xapi-idl) + (libraries threads.posix mtime mtime.clock.os xapi-log) +) diff --git a/ocaml/libs/timeslice/recommended.ml b/ocaml/libs/timeslice/recommended.ml new file mode 100644 index 0000000000..489889e0f5 --- /dev/null +++ b/ocaml/libs/timeslice/recommended.ml @@ -0,0 +1,34 @@ +module D = Debug.Make (struct let name = "timeslice_recommended" end) + +let yield_stop = Atomic.make false + +let yield_worker () = + while not (Atomic.get yield_stop) do + Thread.yield () + done + +let yield_overhead () = + (* Thread.yield only has an effect if another thread exists, + so create one that yields back immediately *) + D.debug "Measuring Thread.yield overhead" ; + Atomic.set yield_stop false ; + let t = Thread.create yield_worker () in + let measured = Simple_measure.measure Thread.yield in + D.debug "Thread.yield overhead: %.6fs <= %.6fs <= %.6fs" measured.low + measured.median measured.high ; + D.debug "Waiting for worker thread to stop" ; + Atomic.set yield_stop true ; + Thread.join t ; + measured.median + +let measure ?(max_overhead_percentage = 1.0) () = + let overhead = yield_overhead () in + let interval = overhead /. (max_overhead_percentage /. 100.) in + D.debug "Recommended timeslice interval = %.4fs" interval ; + (* Avoid too high or too low intervals: + do not go below 1ms (our HZ is 250, and max is 1000, the kernel would round up anyway) + do not go above 50ms (the current default in OCaml 4.14) + *) + let interval = interval |> Float.max 0.001 |> Float.min 0.050 in + D.debug "Final recommeded timeslice interval = %.4fs" interval ; + interval diff --git a/ocaml/libs/timeslice/recommended.mli b/ocaml/libs/timeslice/recommended.mli new file mode 100644 index 0000000000..ee9e83130e --- /dev/null +++ b/ocaml/libs/timeslice/recommended.mli @@ -0,0 +1,8 @@ +val measure : ?max_overhead_percentage:float -> unit -> float +(** [measure ?max_overhead_percentage ()] returns the recommended timeslice for the current system. + + The returned value should be used in a call to {!val:Timeslice.set}. + + @param max_overhead_percentage default 1% + @returns [interval] such that [overhead / interval <= max_overhead_percentage / 100] + *) diff --git a/ocaml/libs/timeslice/simple_measure.ml b/ocaml/libs/timeslice/simple_measure.ml new file mode 100644 index 0000000000..9ca069b0e5 --- /dev/null +++ b/ocaml/libs/timeslice/simple_measure.ml @@ -0,0 +1,47 @@ +(** 95% confidence interval, and median value *) +type t = {low: float; median: float; high: float} + +let span_to_s s = Mtime.Span.to_float_ns s *. 1e-9 + +let ci95 measurements = + let n = Array.length measurements in + Array.sort Float.compare measurements ; + let median = measurements.(n / 2) in + (* "Performance Evaluation of Computer and Communication Systems", Table A. 1 *) + let n = float n in + let d = 0.98 *. sqrt n in + let lo = (n /. 2.) -. d |> Float.to_int + and hi = (n /. 2.) +. 1. +. d |> Float.ceil |> Float.to_int in + {low= measurements.(lo - 1); median; high= measurements.(hi - 1)} + +let measure ?(n = 1001) ?(inner = 10) f = + if n <= 70 then (* some of the formulas below are not valid for smaller [n] *) + invalid_arg (Printf.sprintf "n must be at least 70: %d" n) ; + (* warmup *) + Sys.opaque_identity (f ()) ; + + let measure_inner _ = + let m = Mtime_clock.counter () in + for _ = 1 to inner do + (* opaque_identity prevents the call from being optimized away *) + Sys.opaque_identity (f ()) + done ; + let elapsed = Mtime_clock.count m in + span_to_s elapsed /. float inner + in + let measurements = Array.init n measure_inner in + ci95 measurements + +let measure_min ?(n = 1001) f arg = + (* warmup *) + Sys.opaque_identity (f arg) ; + let measure_one _ = + let m = Mtime_clock.counter () in + Sys.opaque_identity (f arg) ; + let elapsed = Mtime_clock.count m in + span_to_s elapsed + in + Seq.ints 0 + |> Seq.take n + |> Seq.map measure_one + |> Seq.fold_left Float.min Float.max_float diff --git a/ocaml/libs/timeslice/simple_measure.mli b/ocaml/libs/timeslice/simple_measure.mli new file mode 100644 index 0000000000..ec6d782e91 --- /dev/null +++ b/ocaml/libs/timeslice/simple_measure.mli @@ -0,0 +1,33 @@ +(** Measure the speed of an operation in a very simple and robust way. + More detailed measurements can be dune using [Bechamel]. +*) + +(** 95% confidence interval, and median value *) +type t = {low: float; median: float; high: float} + +val measure : ?n:int -> ?inner:int -> (unit -> unit) -> t +(** [measure ?n ?inner f] measures [n] times the duration of [inner] iterations of [f ()]. + + Returns the median of the inner measurements, and a 95% confidence interval. + The median is used, because it makes no assumptions about the distribution of the samples, + i.e. it doesn't require a normal (Gaussian) distribution. + + The inner measurements use a simple average, because we only know the duration of [inner] iterations, + not the duration of each individual call to [f ()]. + The purpose of the [inner] iterations is to reduce measurement overhead. + + @param n iteration count for the outer loop, must be more than [70]. + @param n iteration count for the inner loop + @param f function to measure + + @raises Invalid_argument if [n<70] + *) + +val measure_min : ?n:int -> ('a -> unit) -> 'a -> float +(** [measure_min ?n:int f arg] is the minimum amount of time that [f arg] takes. + + This should be used when we try to measure the maximum speed of some operation (e.g. cached memory accesses), + while ignoring latencies/hickups introduced by other processes on the system. + + It shouldn't be used for measuring the overhead of an operation, because the hickups may be part of that overhead. + *) diff --git a/ocaml/libs/timeslice/timeslice.ml b/ocaml/libs/timeslice/timeslice.ml new file mode 100644 index 0000000000..45e5357458 --- /dev/null +++ b/ocaml/libs/timeslice/timeslice.ml @@ -0,0 +1,48 @@ +(* avoid allocating an extra option every time *) +let invalid_holder = -1 + +let last_lock_holder = Atomic.make invalid_holder + +let me () = Thread.self () |> Thread.id + +let lock_acquired () = + (* these need to be very low overhead, so just keep track of the last lock holder, + i.e. track only one high-priority lock at a time + *) + Atomic.set last_lock_holder (me ()) + +let lock_released () = Atomic.set last_lock_holder invalid_holder + +let[@inline always] am_i_holding_locks () = + let last = Atomic.get last_lock_holder in + last <> invalid_holder && last = me () + +let yield_interval = Atomic.make Mtime.Span.zero + +(* TODO: use bechamel.monotonic-clock instead, which has lower overhead, + but not in the right place in xs-opam yet +*) +let last_yield = Atomic.make (Mtime_clock.counter ()) + +let periodic_hook (_ : Gc.Memprof.allocation) = + ( if not (am_i_holding_locks ()) then + let elapsed = Mtime_clock.count (Atomic.get last_yield) in + if Mtime.Span.compare elapsed (Atomic.get yield_interval) > 0 then ( + let now = Mtime_clock.counter () in + Atomic.set last_yield now ; Thread.yield () + ) + ) ; + None + +let periodic = + Gc.Memprof. + {null_tracker with alloc_minor= periodic_hook; alloc_major= periodic_hook} + +let set ?(sampling_rate = 1e-4) interval = + Atomic.set yield_interval + (Mtime.Span.of_float_ns @@ (interval *. 1e9) |> Option.get) ; + Gc.Memprof.start ~sampling_rate ~callstack_size:0 periodic + +let clear () = + Gc.Memprof.stop () ; + Atomic.set yield_interval Mtime.Span.zero diff --git a/ocaml/libs/timeslice/timeslice.mli b/ocaml/libs/timeslice/timeslice.mli new file mode 100644 index 0000000000..0b8f0bf1c6 --- /dev/null +++ b/ocaml/libs/timeslice/timeslice.mli @@ -0,0 +1,30 @@ +val set : ?sampling_rate:float -> float -> unit +(** [set ?sampling_rate interval] calls [Thread.yield ()] at most [interval] seconds. + + The implementation of [Thread.yield] guarantees since OCaml 4.09 that we'll switch to a different OCaml thread, + if one exists that is not blocked (i.e. it doesn't rely on [sched_yield] which may run the same thread again, + but uses pthread mutexes and condition variables to ensure the current thread isn't immediately runnable). + + The setting is global for the entire process, and currently uses [Gc.Memprof] to ensure that a hook function is called periodically, + although it depends on the allocation rate of the program whether it gets called at all. + + Another alternative would be to use {!val:Unix.set_itimer}, but XAPI doesn't cope with [EINTR] in a lot of places, + and POSIX interval timers rely on signals to notify of elapsed time. + + We could also have a dedicated thread that sleeps for a certain amount of time, but if it is an OCaml thread, + we'd have no guarantees it'd get scheduled often enough (and it couldn't interrupt other threads anyway, + by the time you'd be running the handler you already gave up running something else). + + It may be desirable to avoid yielding if we are currently holding a lock, see {!val:lock_acquired}, and {!val:lock_released} + to notify this module when that happens. +*) + +val clear : unit -> unit +(** [clear ()] undoes the changes made by [set]. + This is useful for testing multiple timeslices in the same program. *) + +val lock_acquired : unit -> unit +(** [lock_acquired ()] notifies about lock acquisition. *) + +val lock_released : unit -> unit +(** [lock_acquired ()] notifies about lock release. *) diff --git a/ocaml/tests/common/dune b/ocaml/tests/common/dune index 29acca3d2c..a8ab57a4a2 100644 --- a/ocaml/tests/common/dune +++ b/ocaml/tests/common/dune @@ -28,6 +28,7 @@ xapi-stdext-date xapi-stdext-threads.scheduler xapi-stdext-unix + xapi_timeslice ) ) diff --git a/ocaml/tests/common/suite_init.ml b/ocaml/tests/common/suite_init.ml index e63deae17b..adb9c501e8 100644 --- a/ocaml/tests/common/suite_init.ml +++ b/ocaml/tests/common/suite_init.ml @@ -11,4 +11,6 @@ let harness_init () = Filename.concat Test_common.working_area "xapi-inventory" ; Xcp_client.use_switch := false ; Pool_role.set_pool_role_for_test () ; - Message_forwarding.register_callback_fns () + Message_forwarding.register_callback_fns () ; + (* for unit tests use a fixed value *) + Xapi_timeslice.Timeslice.set 0.004 diff --git a/ocaml/xapi-idl/lib/dune b/ocaml/xapi-idl/lib/dune index fed65ab125..8f0d7ca27d 100644 --- a/ocaml/xapi-idl/lib/dune +++ b/ocaml/xapi-idl/lib/dune @@ -26,6 +26,7 @@ unix uri uuidm + xapi_timeslice xapi-backtrace xapi-consts xapi-log diff --git a/ocaml/xapi-idl/lib/xcp_service.ml b/ocaml/xapi-idl/lib/xcp_service.ml index 01c65bc49f..b9ec0313c0 100644 --- a/ocaml/xapi-idl/lib/xcp_service.ml +++ b/ocaml/xapi-idl/lib/xcp_service.ml @@ -163,6 +163,25 @@ let setify = in loop [] +(** How long to let an OCaml thread run, before + switching to another thread. + This needs to be as small as possible to reduce latency. + + Too small values reduce performance due to context switching overheads + + 4ms = 1/HZ in Dom0 seems like a good default, + a better value will be written by a boot time service. + *) +let timeslice = ref 0.05 + +let apply_timeslice () = + let interval = !timeslice in + D.debug "Setting timeslice to %.3fs" interval ; + if interval >= 0.05 then + D.debug "Timeslice same as or larger than default: not setting" + else + Xapi_timeslice.Timeslice.set interval + let common_options = [ ( "use-switch" @@ -236,6 +255,11 @@ let common_options = , (fun () -> !config_dir) , "Location of directory containing configuration file fragments" ) + ; ( "timeslice" + , Arg.Set_float timeslice + , (fun () -> Printf.sprintf "%.3f" !timeslice) + , "timeslice in seconds" + ) ] let loglevel () = !log_level @@ -454,6 +478,7 @@ let configure_common ~options ~resources arg_parse_fn = failwith (String.concat "\n" lines) ) resources ; + apply_timeslice () ; Sys.set_signal Sys.sigpipe Sys.Signal_ignore let configure ?(argv = Sys.argv) ?(options = []) ?(resources = []) () =