diff --git a/ocaml/hwtune/dune b/ocaml/hwtune/dune new file mode 100644 index 0000000000..37139c593c --- /dev/null +++ b/ocaml/hwtune/dune @@ -0,0 +1,5 @@ +(executable + (public_name xshwtune) + (libraries xapi_timeslice unix xapi-log) + (package xapi) +) diff --git a/ocaml/hwtune/xshwtune.ml b/ocaml/hwtune/xshwtune.ml new file mode 100644 index 0000000000..a46237442e --- /dev/null +++ b/ocaml/hwtune/xshwtune.ml @@ -0,0 +1,44 @@ +(* Tunes parameters for the hardware and kernel that we are currently running on. *) + +module D = Debug.Make (struct let name = "xshwtune" end) + +let () = + Debug.log_to_stdout () ; + let recommended = Xapi_timeslice.Recommended.measure () in + D.debug "Recommended OCaml timeslice: %.6fs" recommended ; + + let itimer_min = + Sys.set_signal Sys.sigvtalrm Sys.Signal_ignore ; + let _ = + Unix.setitimer Unix.ITIMER_VIRTUAL + Unix.{it_value= 1e-6; it_interval= 1e-6} + in + let actual = Unix.getitimer Unix.ITIMER_VIRTUAL in + actual.Unix.it_value + in + + D.debug "POSIX itimer granularity: %.6fs" itimer_min ; + let recommended = Float.max itimer_min recommended in + + (* just in case something goes very wrong, ensure it is not too small or big *) + let recommended = recommended |> Float.max 0.001 |> Float.min 0.050 in + D.debug "Adjusted timeslice: %.6fs" recommended ; + + (* Use consistent rounding in debug messages and conf file, + by converting to string in a single place. + *) + let recommended = Printf.sprintf "%.3f" recommended in + + D.info "OCaml timeslice: %s" recommended ; + + let write_conf path = + Out_channel.with_open_text path @@ fun ch -> + Printf.fprintf ch "timeslice=%s" recommended + in + + Array.iteri + (fun i arg -> + if i > 0 then + write_conf arg + ) + Sys.argv diff --git a/ocaml/libs/timeslice/dune b/ocaml/libs/timeslice/dune index aa525bb84e..94eff6b3a3 100644 --- a/ocaml/libs/timeslice/dune +++ b/ocaml/libs/timeslice/dune @@ -1,5 +1,5 @@ (library (name xapi_timeslice) - (package xapi) - (libraries threads.posix mtime mtime.clock.os) + (package xapi-idl) + (libraries threads.posix mtime mtime.clock.os xapi-log) ) diff --git a/ocaml/libs/timeslice/recommended.ml b/ocaml/libs/timeslice/recommended.ml new file mode 100644 index 0000000000..489889e0f5 --- /dev/null +++ b/ocaml/libs/timeslice/recommended.ml @@ -0,0 +1,34 @@ +module D = Debug.Make (struct let name = "timeslice_recommended" end) + +let yield_stop = Atomic.make false + +let yield_worker () = + while not (Atomic.get yield_stop) do + Thread.yield () + done + +let yield_overhead () = + (* Thread.yield only has an effect if another thread exists, + so create one that yields back immediately *) + D.debug "Measuring Thread.yield overhead" ; + Atomic.set yield_stop false ; + let t = Thread.create yield_worker () in + let measured = Simple_measure.measure Thread.yield in + D.debug "Thread.yield overhead: %.6fs <= %.6fs <= %.6fs" measured.low + measured.median measured.high ; + D.debug "Waiting for worker thread to stop" ; + Atomic.set yield_stop true ; + Thread.join t ; + measured.median + +let measure ?(max_overhead_percentage = 1.0) () = + let overhead = yield_overhead () in + let interval = overhead /. (max_overhead_percentage /. 100.) in + D.debug "Recommended timeslice interval = %.4fs" interval ; + (* Avoid too high or too low intervals: + do not go below 1ms (our HZ is 250, and max is 1000, the kernel would round up anyway) + do not go above 50ms (the current default in OCaml 4.14) + *) + let interval = interval |> Float.max 0.001 |> Float.min 0.050 in + D.debug "Final recommeded timeslice interval = %.4fs" interval ; + interval diff --git a/ocaml/libs/timeslice/recommended.mli b/ocaml/libs/timeslice/recommended.mli new file mode 100644 index 0000000000..ee9e83130e --- /dev/null +++ b/ocaml/libs/timeslice/recommended.mli @@ -0,0 +1,8 @@ +val measure : ?max_overhead_percentage:float -> unit -> float +(** [measure ?max_overhead_percentage ()] returns the recommended timeslice for the current system. + + The returned value should be used in a call to {!val:Timeslice.set}. + + @param max_overhead_percentage default 1% + @returns [interval] such that [overhead / interval <= max_overhead_percentage / 100] + *) diff --git a/ocaml/libs/timeslice/simple_measure.ml b/ocaml/libs/timeslice/simple_measure.ml new file mode 100644 index 0000000000..9ca069b0e5 --- /dev/null +++ b/ocaml/libs/timeslice/simple_measure.ml @@ -0,0 +1,47 @@ +(** 95% confidence interval, and median value *) +type t = {low: float; median: float; high: float} + +let span_to_s s = Mtime.Span.to_float_ns s *. 1e-9 + +let ci95 measurements = + let n = Array.length measurements in + Array.sort Float.compare measurements ; + let median = measurements.(n / 2) in + (* "Performance Evaluation of Computer and Communication Systems", Table A. 1 *) + let n = float n in + let d = 0.98 *. sqrt n in + let lo = (n /. 2.) -. d |> Float.to_int + and hi = (n /. 2.) +. 1. +. d |> Float.ceil |> Float.to_int in + {low= measurements.(lo - 1); median; high= measurements.(hi - 1)} + +let measure ?(n = 1001) ?(inner = 10) f = + if n <= 70 then (* some of the formulas below are not valid for smaller [n] *) + invalid_arg (Printf.sprintf "n must be at least 70: %d" n) ; + (* warmup *) + Sys.opaque_identity (f ()) ; + + let measure_inner _ = + let m = Mtime_clock.counter () in + for _ = 1 to inner do + (* opaque_identity prevents the call from being optimized away *) + Sys.opaque_identity (f ()) + done ; + let elapsed = Mtime_clock.count m in + span_to_s elapsed /. float inner + in + let measurements = Array.init n measure_inner in + ci95 measurements + +let measure_min ?(n = 1001) f arg = + (* warmup *) + Sys.opaque_identity (f arg) ; + let measure_one _ = + let m = Mtime_clock.counter () in + Sys.opaque_identity (f arg) ; + let elapsed = Mtime_clock.count m in + span_to_s elapsed + in + Seq.ints 0 + |> Seq.take n + |> Seq.map measure_one + |> Seq.fold_left Float.min Float.max_float diff --git a/ocaml/libs/timeslice/simple_measure.mli b/ocaml/libs/timeslice/simple_measure.mli new file mode 100644 index 0000000000..ec6d782e91 --- /dev/null +++ b/ocaml/libs/timeslice/simple_measure.mli @@ -0,0 +1,33 @@ +(** Measure the speed of an operation in a very simple and robust way. + More detailed measurements can be dune using [Bechamel]. +*) + +(** 95% confidence interval, and median value *) +type t = {low: float; median: float; high: float} + +val measure : ?n:int -> ?inner:int -> (unit -> unit) -> t +(** [measure ?n ?inner f] measures [n] times the duration of [inner] iterations of [f ()]. + + Returns the median of the inner measurements, and a 95% confidence interval. + The median is used, because it makes no assumptions about the distribution of the samples, + i.e. it doesn't require a normal (Gaussian) distribution. + + The inner measurements use a simple average, because we only know the duration of [inner] iterations, + not the duration of each individual call to [f ()]. + The purpose of the [inner] iterations is to reduce measurement overhead. + + @param n iteration count for the outer loop, must be more than [70]. + @param n iteration count for the inner loop + @param f function to measure + + @raises Invalid_argument if [n<70] + *) + +val measure_min : ?n:int -> ('a -> unit) -> 'a -> float +(** [measure_min ?n:int f arg] is the minimum amount of time that [f arg] takes. + + This should be used when we try to measure the maximum speed of some operation (e.g. cached memory accesses), + while ignoring latencies/hickups introduced by other processes on the system. + + It shouldn't be used for measuring the overhead of an operation, because the hickups may be part of that overhead. + *) diff --git a/ocaml/tests/common/dune b/ocaml/tests/common/dune index c578f5f978..d76558ef39 100644 --- a/ocaml/tests/common/dune +++ b/ocaml/tests/common/dune @@ -27,6 +27,7 @@ xapi-types xapi-stdext-date xapi-stdext-unix + xapi_timeslice ) ) diff --git a/ocaml/tests/common/suite_init.ml b/ocaml/tests/common/suite_init.ml index e63deae17b..adb9c501e8 100644 --- a/ocaml/tests/common/suite_init.ml +++ b/ocaml/tests/common/suite_init.ml @@ -11,4 +11,6 @@ let harness_init () = Filename.concat Test_common.working_area "xapi-inventory" ; Xcp_client.use_switch := false ; Pool_role.set_pool_role_for_test () ; - Message_forwarding.register_callback_fns () + Message_forwarding.register_callback_fns () ; + (* for unit tests use a fixed value *) + Xapi_timeslice.Timeslice.set 0.004 diff --git a/ocaml/xapi-idl/lib/dune b/ocaml/xapi-idl/lib/dune index fed65ab125..8f0d7ca27d 100644 --- a/ocaml/xapi-idl/lib/dune +++ b/ocaml/xapi-idl/lib/dune @@ -26,6 +26,7 @@ unix uri uuidm + xapi_timeslice xapi-backtrace xapi-consts xapi-log diff --git a/ocaml/xapi-idl/lib/xcp_service.ml b/ocaml/xapi-idl/lib/xcp_service.ml index 01c65bc49f..32201a5a03 100644 --- a/ocaml/xapi-idl/lib/xcp_service.ml +++ b/ocaml/xapi-idl/lib/xcp_service.ml @@ -163,6 +163,26 @@ let setify = in loop [] +(** How long to let an OCaml thread run, before + switching to another thread. + This needs to be as small as possible to reduce latency. + + Too small values reduce performance due to context switching overheads + + 4ms = 1/HZ in Dom0 seems like a good default, + a better value will be written by a boot time service. + *) +let timeslice = ref 0.05 + +let apply_timeslice () = + Debug.log_to_stdout () ; + let interval = !timeslice in + D.debug "Setting timeslice to %.3fs" interval ; + if interval >= 0.05 then + D.debug "Timeslice same as or larger than default: not setting" + else + Xapi_timeslice.Timeslice.set interval + let common_options = [ ( "use-switch" @@ -236,6 +256,11 @@ let common_options = , (fun () -> !config_dir) , "Location of directory containing configuration file fragments" ) + ; ( "timeslice" + , Arg.Set_float timeslice + , (fun () -> Printf.sprintf "%.3f" !timeslice) + , "timeslice in seconds" + ) ] let loglevel () = !log_level @@ -454,6 +479,7 @@ let configure_common ~options ~resources arg_parse_fn = failwith (String.concat "\n" lines) ) resources ; + apply_timeslice () ; Sys.set_signal Sys.sigpipe Sys.Signal_ignore let configure ?(argv = Sys.argv) ?(options = []) ?(resources = []) () = diff --git a/scripts/Makefile b/scripts/Makefile index 503e783854..6498049600 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -58,6 +58,7 @@ install: $(IDATA) xapi-domains.service $(DESTDIR)/usr/lib/systemd/system/xapi-domains.service $(IDATA) generate-iscsi-iqn.service $(DESTDIR)/usr/lib/systemd/system/generate-iscsi-iqn.service $(IDATA) xapi.service $(DESTDIR)/usr/lib/systemd/system/xapi.service + $(IDATA) xshwtune.service $(DESTDIR)/usr/lib/systemd/system/xshwtune.service $(IDATA) attach-static-vdis.service $(DESTDIR)/usr/lib/systemd/system/attach-static-vdis.service $(IDATA) save-boot-info.service $(DESTDIR)/usr/lib/systemd/system/save-boot-info.service $(IDATA) mpathalert.service $(DESTDIR)/usr/lib/systemd/system/mpathalert.service diff --git a/scripts/xshwtune.service b/scripts/xshwtune.service new file mode 100644 index 0000000000..c6ad638726 --- /dev/null +++ b/scripts/xshwtune.service @@ -0,0 +1,15 @@ +[Unit] +Description=Tune system for current hardware and kernel +Before=xapi.service xenopsd-xc.service +ConditionPathExists=/etc/xenserver/features.d/ocamltimeslice +ConditionPathExists=!@ETCDIR@/xenopsd.conf.d/xshwtune.conf + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecCondition=/bin/bash -c '[ "$($