From 0fbd6c700844b21fccf65fbdcbe75ff909e3584c Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 24 Sep 2024 14:05:11 +0100 Subject: [PATCH 1/6] IH-615: rrdd - Factor out Xenctrl functionality into a separate library Several metrics collectors still rely on a similar function in xcp_rrdd, but plugins will use this factored-out version. Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/lib/plugin/dune | 16 ++++++ ocaml/xcp-rrdd/lib/plugin/xenctrl_lib.ml | 59 +++++++++++++++++++++++ ocaml/xcp-rrdd/lib/plugin/xenctrl_lib.mli | 18 +++++++ 3 files changed, 93 insertions(+) create mode 100644 ocaml/xcp-rrdd/lib/plugin/xenctrl_lib.ml create mode 100644 ocaml/xcp-rrdd/lib/plugin/xenctrl_lib.mli diff --git a/ocaml/xcp-rrdd/lib/plugin/dune b/ocaml/xcp-rrdd/lib/plugin/dune index 12710f3305e..b2370504780 100644 --- a/ocaml/xcp-rrdd/lib/plugin/dune +++ b/ocaml/xcp-rrdd/lib/plugin/dune @@ -22,6 +22,22 @@ ) ) +(library + (name rrdd_plugin_xenctrl) + (public_name rrdd-plugin.xenctrl) + (flags (:standard -bin-annot)) + (wrapped false) + (modules xenctrl_lib) + (libraries + astring + xenctrl + ezxenstore.core + uuid + xapi-log + threads.posix + ) +) + (library (name rrdd_plugin_local) (public_name rrdd-plugin.local) diff --git a/ocaml/xcp-rrdd/lib/plugin/xenctrl_lib.ml b/ocaml/xcp-rrdd/lib/plugin/xenctrl_lib.ml new file mode 100644 index 00000000000..a486567d78c --- /dev/null +++ b/ocaml/xcp-rrdd/lib/plugin/xenctrl_lib.ml @@ -0,0 +1,59 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module D = Debug.Make (struct let name = "xcp-rrdp-xenctrl-lib" end) + +let uuid_blacklist = ["00000000-0000-0000"; "deadbeef-dead-beef"] + +module IntSet = Set.Make (Int) + +let domain_snapshot xc = + let metadata_of_domain dom = + let ( let* ) = Option.bind in + let* uuid_raw = Uuidx.of_int_array dom.Xenctrl.handle in + let uuid = Uuidx.to_string uuid_raw in + let domid = dom.Xenctrl.domid in + let start = String.sub uuid 0 18 in + (* Actively hide migrating VM uuids, these are temporary and xenops writes + the original and the final uuid to xenstore *) + let uuid_from_key key = + let path = Printf.sprintf "/vm/%s/%s" uuid key in + try Ezxenstore_core.Xenstore.(with_xs (fun xs -> xs.read path)) + with Xs_protocol.Enoent _hint -> + D.info "Couldn't read path %s; falling back to actual uuid" path ; + uuid + in + let stable_uuid = Option.fold ~none:uuid ~some:uuid_from_key in + if List.mem start uuid_blacklist then + None + else + let key = + if Astring.String.is_suffix ~affix:"000000000000" uuid then + Some "origin-uuid" + else if Astring.String.is_suffix ~affix:"000000000001" uuid then + Some "final-uuid" + else + None + in + Some (dom, stable_uuid key, domid) + in + let domains = + Xenctrl.domain_getinfolist xc 0 |> List.filter_map metadata_of_domain + in + let timestamp = Unix.gettimeofday () in + let domain_paused (d, uuid, _) = + if d.Xenctrl.paused then Some uuid else None + in + let paused_uuids = List.filter_map domain_paused domains in + (timestamp, domains, paused_uuids) diff --git a/ocaml/xcp-rrdd/lib/plugin/xenctrl_lib.mli b/ocaml/xcp-rrdd/lib/plugin/xenctrl_lib.mli new file mode 100644 index 00000000000..558158b438c --- /dev/null +++ b/ocaml/xcp-rrdd/lib/plugin/xenctrl_lib.mli @@ -0,0 +1,18 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(* Provides a list of running, non-migrating, and paused VMs *) +val domain_snapshot : + Xenctrl.handle + -> float * (Xenctrl.domaininfo * string * int) list * string list From 772229489e8593b165371743b6a93400d3beb5ef Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 24 Sep 2024 13:04:14 +0100 Subject: [PATCH 2/6] IH-615: Move netdev_dss into a separate RRDD plugin It still currently reads from a file written to by networkd and deserializes the stats. Signed-off-by: Andrii Sultanov --- Makefile | 4 +- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 132 --------------- ocaml/xcp-rrdd/bin/rrdp-netdev/dune | 22 +++ ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml | 157 ++++++++++++++++++ .../xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.mli | 0 .../bin/rrdp-scripts/sysconfig-rrdd-plugins | 2 +- scripts/xe-toolstack-restart | 1 + 7 files changed, 183 insertions(+), 135 deletions(-) create mode 100644 ocaml/xcp-rrdd/bin/rrdp-netdev/dune create mode 100644 ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml create mode 100644 ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.mli diff --git a/Makefile b/Makefile index 186b6c3e92f..53d01a4b063 100644 --- a/Makefile +++ b/Makefile @@ -109,7 +109,7 @@ quality-gate: install-scripts: $(MAKE) -C scripts install - + install-python3: $(MAKE) -C python3 install @@ -164,7 +164,7 @@ install-dune1: dune install $(DUNE_IU_PACKAGES1) DUNE_IU_PACKAGES2=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(OPTDIR) --libdir=$(LIBDIR) --mandir=$(MANDIR) --libexecdir=$(OPTDIR)/libexec --datadir=$(DOCDIR) xapi xe - + install-dune2: dune install $(DUNE_IU_PACKAGES2) diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index dbfbd8cb73b..69d55a217d3 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -415,137 +415,6 @@ let dss_hostload xc domains = ) ] -(*****************************************************) -(* network related code *) -(*****************************************************) - -let dss_netdev doms = - let uuid_of_domid domains domid = - let _, uuid, _ = - try List.find (fun (_, _, domid') -> domid = domid') domains - with Not_found -> - failwith - (Printf.sprintf "Failed to find uuid corresponding to domid: %d" domid) - in - uuid - in - let open Network_stats in - let stats = Network_stats.read_stats () in - let dss, sum_rx, sum_tx = - List.fold_left - (fun (dss, sum_rx, sum_tx) (dev, stat) -> - if not Astring.String.(is_prefix ~affix:"vif" dev) then - let pif_name = "pif_" ^ dev in - ( ( Rrd.Host - , Ds.ds_make ~name:(pif_name ^ "_rx") - ~description: - ("Bytes per second received on physical interface " ^ dev) - ~units:"B/s" ~value:(Rrd.VT_Int64 stat.rx_bytes) ~ty:Rrd.Derive - ~min:0.0 ~default:true () - ) - :: ( Rrd.Host - , Ds.ds_make ~name:(pif_name ^ "_tx") - ~description: - ("Bytes per second sent on physical interface " ^ dev) - ~units:"B/s" ~value:(Rrd.VT_Int64 stat.tx_bytes) - ~ty:Rrd.Derive ~min:0.0 ~default:true () - ) - :: ( Rrd.Host - , Ds.ds_make ~name:(pif_name ^ "_rx_errors") - ~description: - ("Receive errors per second on physical interface " ^ dev) - ~units:"err/s" ~value:(Rrd.VT_Int64 stat.rx_errors) - ~ty:Rrd.Derive ~min:0.0 ~default:false () - ) - :: ( Rrd.Host - , Ds.ds_make ~name:(pif_name ^ "_tx_errors") - ~description: - ("Transmit errors per second on physical interface " ^ dev) - ~units:"err/s" ~value:(Rrd.VT_Int64 stat.tx_errors) - ~ty:Rrd.Derive ~min:0.0 ~default:false () - ) - :: dss - , Int64.add stat.rx_bytes sum_rx - , Int64.add stat.tx_bytes sum_tx - ) - else - ( ( try - let d1, d2 = - Scanf.sscanf dev "vif%d.%d" (fun d1 d2 -> (d1, d2)) - in - let vif_name = Printf.sprintf "vif_%d" d2 in - (* Note: rx and tx are the wrong way round because from dom0 we - see the vms backwards *) - let uuid = uuid_of_domid doms d1 in - ( Rrd.VM uuid - , Ds.ds_make ~name:(vif_name ^ "_tx") ~units:"B/s" - ~description: - ("Bytes per second transmitted on virtual interface \ - number '" - ^ string_of_int d2 - ^ "'" - ) - ~value:(Rrd.VT_Int64 stat.rx_bytes) ~ty:Rrd.Derive ~min:0.0 - ~default:true () - ) - :: ( Rrd.VM uuid - , Ds.ds_make ~name:(vif_name ^ "_rx") ~units:"B/s" - ~description: - ("Bytes per second received on virtual interface \ - number '" - ^ string_of_int d2 - ^ "'" - ) - ~value:(Rrd.VT_Int64 stat.tx_bytes) ~ty:Rrd.Derive - ~min:0.0 ~default:true () - ) - :: ( Rrd.VM uuid - , Ds.ds_make ~name:(vif_name ^ "_rx_errors") ~units:"err/s" - ~description: - ("Receive errors per second on virtual interface \ - number '" - ^ string_of_int d2 - ^ "'" - ) - ~value:(Rrd.VT_Int64 stat.tx_errors) ~ty:Rrd.Derive - ~min:0.0 ~default:false () - ) - :: ( Rrd.VM uuid - , Ds.ds_make ~name:(vif_name ^ "_tx_errors") ~units:"err/s" - ~description: - ("Transmit errors per second on virtual interface \ - number '" - ^ string_of_int d2 - ^ "'" - ) - ~value:(Rrd.VT_Int64 stat.rx_errors) ~ty:Rrd.Derive - ~min:0.0 ~default:false () - ) - :: dss - with _ -> dss - ) - , sum_rx - , sum_tx - ) - ) - ([], 0L, 0L) stats - in - [ - ( Rrd.Host - , Ds.ds_make ~name:"pif_aggr_rx" - ~description:"Bytes per second received on all physical interfaces" - ~units:"B/s" ~value:(Rrd.VT_Int64 sum_rx) ~ty:Rrd.Derive ~min:0.0 - ~default:true () - ) - ; ( Rrd.Host - , Ds.ds_make ~name:"pif_aggr_tx" - ~description:"Bytes per second sent on all physical interfaces" - ~units:"B/s" ~value:(Rrd.VT_Int64 sum_tx) ~ty:Rrd.Derive ~min:0.0 - ~default:true () - ) - ] - @ dss - (*****************************************************) (* memory stats *) (*****************************************************) @@ -834,7 +703,6 @@ let dom0_stat_generators = ; ("vcpus", fun xc _ domains -> dss_vcpus xc domains) ; ("loadavg", fun _ _ _ -> dss_loadavg ()) ; ("hostload", fun xc _ domains -> dss_hostload xc domains) - ; ("netdev", fun _ _ domains -> dss_netdev domains) ; ("cache", fun _ timestamp _ -> dss_cache timestamp) ] diff --git a/ocaml/xcp-rrdd/bin/rrdp-netdev/dune b/ocaml/xcp-rrdd/bin/rrdp-netdev/dune new file mode 100644 index 00000000000..7c538027368 --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdp-netdev/dune @@ -0,0 +1,22 @@ +(executable + (modes exe) + (name rrdp_netdev) + (libraries + astring + rrdd-plugin + rrdd-plugin.xenctrl + rrdd_plugins_libs + xapi-idl + xapi-idl.network + xapi-idl.rrd + xapi-log + xapi-rrd + xenctrl + ) +) + +(install + (package xapi) + (files (rrdp_netdev.exe as xcp-rrdd-plugins/xcp-rrdd-netdev)) + (section libexec_root) +) diff --git a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml new file mode 100644 index 00000000000..55be1e88a0b --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml @@ -0,0 +1,157 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Rrdd_plugin + +module D = Debug.Make (struct let name = "xcp-rrdp-netdev" end) + +module Process = Rrdd_plugin.Process (struct let name = "xcp-rrdd-netdev" end) + +let generate_netdev_dss doms () = + let uuid_of_domid domains domid = + let _, uuid, _ = + try List.find (fun (_, _, domid') -> domid = domid') domains + with Not_found -> + failwith + (Printf.sprintf "Failed to find uuid corresponding to domid: %d" domid) + in + uuid + in + let open Network_stats in + let stats = Network_stats.read_stats () in + let dss, sum_rx, sum_tx = + List.fold_left + (fun (dss, sum_rx, sum_tx) (dev, stat) -> + if not Astring.String.(is_prefix ~affix:"vif" dev) then + let pif_name = "pif_" ^ dev in + ( ( Rrd.Host + , Ds.ds_make ~name:(pif_name ^ "_rx") + ~description: + ("Bytes per second received on physical interface " ^ dev) + ~units:"B/s" ~value:(Rrd.VT_Int64 stat.rx_bytes) ~ty:Rrd.Derive + ~min:0.0 ~default:true () + ) + :: ( Rrd.Host + , Ds.ds_make ~name:(pif_name ^ "_tx") + ~description: + ("Bytes per second sent on physical interface " ^ dev) + ~units:"B/s" ~value:(Rrd.VT_Int64 stat.tx_bytes) + ~ty:Rrd.Derive ~min:0.0 ~default:true () + ) + :: ( Rrd.Host + , Ds.ds_make ~name:(pif_name ^ "_rx_errors") + ~description: + ("Receive errors per second on physical interface " ^ dev) + ~units:"err/s" ~value:(Rrd.VT_Int64 stat.rx_errors) + ~ty:Rrd.Derive ~min:0.0 ~default:false () + ) + :: ( Rrd.Host + , Ds.ds_make ~name:(pif_name ^ "_tx_errors") + ~description: + ("Transmit errors per second on physical interface " ^ dev) + ~units:"err/s" ~value:(Rrd.VT_Int64 stat.tx_errors) + ~ty:Rrd.Derive ~min:0.0 ~default:false () + ) + :: dss + , Int64.add stat.rx_bytes sum_rx + , Int64.add stat.tx_bytes sum_tx + ) + else + ( ( try + let d1, d2 = + Scanf.sscanf dev "vif%d.%d" (fun d1 d2 -> (d1, d2)) + in + let vif_name = Printf.sprintf "vif_%d" d2 in + (* Note: rx and tx are the wrong way round because from dom0 we + see the vms backwards *) + let uuid = uuid_of_domid doms d1 in + ( Rrd.VM uuid + , Ds.ds_make ~name:(vif_name ^ "_tx") ~units:"B/s" + ~description: + ("Bytes per second transmitted on virtual interface \ + number '" + ^ string_of_int d2 + ^ "'" + ) + ~value:(Rrd.VT_Int64 stat.rx_bytes) ~ty:Rrd.Derive ~min:0.0 + ~default:true () + ) + :: ( Rrd.VM uuid + , Ds.ds_make ~name:(vif_name ^ "_rx") ~units:"B/s" + ~description: + ("Bytes per second received on virtual interface \ + number '" + ^ string_of_int d2 + ^ "'" + ) + ~value:(Rrd.VT_Int64 stat.tx_bytes) ~ty:Rrd.Derive + ~min:0.0 ~default:true () + ) + :: ( Rrd.VM uuid + , Ds.ds_make ~name:(vif_name ^ "_rx_errors") ~units:"err/s" + ~description: + ("Receive errors per second on virtual interface \ + number '" + ^ string_of_int d2 + ^ "'" + ) + ~value:(Rrd.VT_Int64 stat.tx_errors) ~ty:Rrd.Derive + ~min:0.0 ~default:false () + ) + :: ( Rrd.VM uuid + , Ds.ds_make ~name:(vif_name ^ "_tx_errors") ~units:"err/s" + ~description: + ("Transmit errors per second on virtual interface \ + number '" + ^ string_of_int d2 + ^ "'" + ) + ~value:(Rrd.VT_Int64 stat.rx_errors) ~ty:Rrd.Derive + ~min:0.0 ~default:false () + ) + :: dss + with _ -> dss + ) + , sum_rx + , sum_tx + ) + ) + ([], 0L, 0L) stats + in + [ + ( Rrd.Host + , Ds.ds_make ~name:"pif_aggr_rx" + ~description:"Bytes per second received on all physical interfaces" + ~units:"B/s" ~value:(Rrd.VT_Int64 sum_rx) ~ty:Rrd.Derive ~min:0.0 + ~default:true () + ) + ; ( Rrd.Host + , Ds.ds_make ~name:"pif_aggr_tx" + ~description:"Bytes per second sent on all physical interfaces" + ~units:"B/s" ~value:(Rrd.VT_Int64 sum_tx) ~ty:Rrd.Derive ~min:0.0 + ~default:true () + ) + ] + @ dss + +let _ = + Xenctrl.with_intf (fun xc -> + let _, domains, _ = Xenctrl_lib.domain_snapshot xc in + Process.initialise () ; + (* Share one page per virtual NIC - documentation specifies max is 512 *) + let shared_page_count = 512 in + Process.main_loop ~neg_shift:0.5 + ~target:(Reporter.Local shared_page_count) ~protocol:Rrd_interface.V2 + ~dss_f:(generate_netdev_dss domains) + ) diff --git a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.mli b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/xcp-rrdd/bin/rrdp-scripts/sysconfig-rrdd-plugins b/ocaml/xcp-rrdd/bin/rrdp-scripts/sysconfig-rrdd-plugins index ced7c537254..97846e704e4 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-scripts/sysconfig-rrdd-plugins +++ b/ocaml/xcp-rrdd/bin/rrdp-scripts/sysconfig-rrdd-plugins @@ -1 +1 @@ -PLUGINS="xcp-rrdd-iostat xcp-rrdd-squeezed xcp-rrdd-xenpm xcp-rrdd-dcmi" +PLUGINS="xcp-rrdd-iostat xcp-rrdd-squeezed xcp-rrdd-xenpm xcp-rrdd-dcmi xcp-rrdd-netdev" diff --git a/scripts/xe-toolstack-restart b/scripts/xe-toolstack-restart index 32ee88609c5..1ceeeddbe5b 100755 --- a/scripts/xe-toolstack-restart +++ b/scripts/xe-toolstack-restart @@ -29,6 +29,7 @@ POOLCONF=`cat @ETCXENDIR@/pool.conf` if [ $POOLCONF == "master" ]; then MPATHALERT="mpathalert"; else MPATHALERT=""; fi SERVICES="message-switch perfmon v6d xenopsd xenopsd-xc xenopsd-xenlight xenopsd-simulator xenopsd-libvirt xcp-rrdd-iostat xcp-rrdd-squeezed + xcp-rrdd-netdev xcp-rrdd-xenpm xcp-rrdd-gpumon xcp-rrdd xcp-networkd squeezed forkexecd $MPATHALERT xapi-storage-script xapi-clusterd varstored-guard" From b3ea09222b7d203575588294f97abaa017587847 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 24 Sep 2024 14:26:59 +0100 Subject: [PATCH 3/6] IH-615: Move CPU-related data-source collection into a separate RRDD plugin Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdd/rrdd_common.ml | 4 - ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 219 ---------------- ocaml/xcp-rrdd/bin/rrdp-cpu/dune | 21 ++ ocaml/xcp-rrdd/bin/rrdp-cpu/rrdd_common.ml | 18 ++ ocaml/xcp-rrdd/bin/rrdp-cpu/rrdd_common.mli | 15 ++ ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml | 246 ++++++++++++++++++ ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.mli | 0 .../bin/rrdp-scripts/sysconfig-rrdd-plugins | 2 +- quality-gate.sh | 2 +- scripts/xe-toolstack-restart | 4 +- 10 files changed, 304 insertions(+), 227 deletions(-) delete mode 100644 ocaml/xcp-rrdd/bin/rrdd/rrdd_common.ml create mode 100644 ocaml/xcp-rrdd/bin/rrdp-cpu/dune create mode 100644 ocaml/xcp-rrdd/bin/rrdp-cpu/rrdd_common.ml create mode 100644 ocaml/xcp-rrdd/bin/rrdp-cpu/rrdd_common.mli create mode 100644 ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml create mode 100644 ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.mli diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_common.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_common.ml deleted file mode 100644 index dd86dbcf1dd..00000000000 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_common.ml +++ /dev/null @@ -1,4 +0,0 @@ -let loadavg () = - let split_colon line = Astring.String.fields ~empty:false line in - let all = Xapi_stdext_unix.Unixext.string_of_file "/proc/loadavg" in - try float_of_string (List.hd (split_colon all)) with _ -> -1. diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 69d55a217d3..d7fec8abbe0 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -200,221 +200,6 @@ end module Watcher = Watch.WatchXenstore (Meminfo) -(*****************************************************) -(* cpu related code *) -(*****************************************************) - -let xen_flag_complement = Int64.(shift_left 1L 63 |> lognot) - -(* This function is used for getting vcpu stats of the VMs present on this host. *) -let dss_vcpus xc doms = - List.fold_left - (fun dss (dom, uuid, domid) -> - let maxcpus = dom.Xenctrl.max_vcpu_id + 1 in - let rec cpus i dss = - if i >= maxcpus then - dss - else - let vcpuinfo = Xenctrl.domain_get_vcpuinfo xc domid i in - (* Workaround for Xen leaking the flag XEN_RUNSTATE_UPDATE; using a - mask of its complement ~(1 << 63) *) - let cpu_time = - Int64.( - to_float @@ logand vcpuinfo.Xenctrl.cputime xen_flag_complement - ) - in - (* Convert from nanoseconds to seconds *) - let cpu_time = cpu_time /. 1.0e9 in - let cputime_rrd = - ( Rrd.VM uuid - , Ds.ds_make ~name:(Printf.sprintf "cpu%d" i) ~units:"(fraction)" - ~description:(Printf.sprintf "CPU%d usage" i) - ~value:(Rrd.VT_Float cpu_time) ~ty:Rrd.Derive ~default:true - ~min:0.0 ~max:1.0 () - ) - in - cpus (i + 1) (cputime_rrd :: dss) - in - (* Runstate info is per-domain rather than per-vcpu *) - let dss = - let dom_cpu_time = - Int64.(to_float @@ logand dom.Xenctrl.cpu_time xen_flag_complement) - in - let dom_cpu_time = - dom_cpu_time /. (1.0e9 *. float_of_int dom.Xenctrl.nr_online_vcpus) - in - try - let ri = Xenctrl.domain_get_runstate_info xc domid in - ( Rrd.VM uuid - , Ds.ds_make ~name:"runstate_fullrun" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time0 /. 1.0e9)) - ~description:"Fraction of time that all VCPUs are running" - ~ty:Rrd.Derive ~default:false ~min:0.0 () - ) - :: ( Rrd.VM uuid - , Ds.ds_make ~name:"runstate_full_contention" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time1 /. 1.0e9)) - ~description: - "Fraction of time that all VCPUs are runnable (i.e., \ - waiting for CPU)" - ~ty:Rrd.Derive ~default:false ~min:0.0 () - ) - :: ( Rrd.VM uuid - , Ds.ds_make ~name:"runstate_concurrency_hazard" - ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time2 /. 1.0e9)) - ~description: - "Fraction of time that some VCPUs are running and some are \ - runnable" - ~ty:Rrd.Derive ~default:false ~min:0.0 () - ) - :: ( Rrd.VM uuid - , Ds.ds_make ~name:"runstate_blocked" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time3 /. 1.0e9)) - ~description: - "Fraction of time that all VCPUs are blocked or offline" - ~ty:Rrd.Derive ~default:false ~min:0.0 () - ) - :: ( Rrd.VM uuid - , Ds.ds_make ~name:"runstate_partial_run" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time4 /. 1.0e9)) - ~description: - "Fraction of time that some VCPUs are running, and some are \ - blocked" - ~ty:Rrd.Derive ~default:false ~min:0.0 () - ) - :: ( Rrd.VM uuid - , Ds.ds_make ~name:"runstate_partial_contention" - ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time5 /. 1.0e9)) - ~description: - "Fraction of time that some VCPUs are runnable and some are \ - blocked" - ~ty:Rrd.Derive ~default:false ~min:0.0 () - ) - :: ( Rrd.VM uuid - , Ds.ds_make - ~name:(Printf.sprintf "cpu_usage") - ~units:"(fraction)" - ~description:(Printf.sprintf "Domain CPU usage") - ~value:(Rrd.VT_Float dom_cpu_time) ~ty:Rrd.Derive ~default:true - ~min:0.0 ~max:1.0 () - ) - :: dss - with _ -> dss - in - try cpus 0 dss with _ -> dss - ) - [] doms - -let physcpus = ref [||] - -let dss_pcpus xc = - let len = Array.length !physcpus in - let newinfos = - if len = 0 then ( - let physinfo = Xenctrl.physinfo xc in - let pcpus = physinfo.Xenctrl.nr_cpus in - physcpus := if pcpus > 0 then Array.make pcpus 0L else [||] ; - Xenctrl.pcpu_info xc pcpus - ) else - Xenctrl.pcpu_info xc len - in - let dss, len_newinfos = - Array.fold_left - (fun (acc, i) v -> - ( ( Rrd.Host - , Ds.ds_make ~name:(Printf.sprintf "cpu%d" i) ~units:"(fraction)" - ~description:("Physical cpu usage for cpu " ^ string_of_int i) - ~value:(Rrd.VT_Float (Int64.to_float v /. 1.0e9)) - ~min:0.0 ~max:1.0 ~ty:Rrd.Derive ~default:true - ~transform:(fun x -> 1.0 -. x) - () - ) - :: acc - , i + 1 - ) - ) - ([], 0) newinfos - in - let sum_array = Array.fold_left (fun acc v -> Int64.add acc v) 0L newinfos in - let avg_array = Int64.to_float sum_array /. float_of_int len_newinfos in - let avgcpu_ds = - ( Rrd.Host - , Ds.ds_make ~name:"cpu_avg" ~units:"(fraction)" - ~description:"Average physical cpu usage" - ~value:(Rrd.VT_Float (avg_array /. 1.0e9)) - ~min:0.0 ~max:1.0 ~ty:Rrd.Derive ~default:true - ~transform:(fun x -> 1.0 -. x) - () - ) - in - avgcpu_ds :: dss - -let dss_loadavg () = - [ - ( Rrd.Host - , Ds.ds_make ~name:"loadavg" ~units:"(fraction)" - ~description:"Domain0 loadavg" - ~value:(Rrd.VT_Float (Rrdd_common.loadavg ())) - ~ty:Rrd.Gauge ~default:true () - ) - ] - -let count_power_state_running_domains domains = - List.fold_left - (fun count (dom, _, _) -> - if not dom.Xenctrl.paused then count + 1 else count - ) - 0 domains - -let dss_hostload xc domains = - let physinfo = Xenctrl.physinfo xc in - let pcpus = physinfo.Xenctrl.nr_cpus in - let rec sum acc n f = - match n with n when n >= 0 -> sum (acc + f n) (n - 1) f | _ -> acc - in - let load = - List.fold_left - (fun acc (dom, _, domid) -> - sum 0 dom.Xenctrl.max_vcpu_id (fun id -> - let vcpuinfo = Xenctrl.domain_get_vcpuinfo xc domid id in - if vcpuinfo.Xenctrl.online && not vcpuinfo.Xenctrl.blocked then - 1 - else - 0 - ) - + acc - ) - 0 domains - in - let running_domains = count_power_state_running_domains domains in - - let load_per_cpu = float_of_int load /. float_of_int pcpus in - [ - ( Rrd.Host - , Ds.ds_make ~name:"hostload" ~units:"(fraction)" - ~description: - ("Host load per physical cpu, where load refers to " - ^ "the number of vCPU(s) in running or runnable status." - ) - ~value:(Rrd.VT_Float load_per_cpu) ~min:0.0 ~ty:Rrd.Gauge ~default:true - () - ) - ; ( Rrd.Host - , Ds.ds_make ~name:"running_vcpus" ~units:"count" - ~description:"The total number of running vCPUs per host" - ~value:(Rrd.VT_Int64 (Int64.of_int load)) - ~min:0.0 ~ty:Rrd.Gauge ~default:true () - ) - ; ( Rrd.Host - , Ds.ds_make ~name:"running_domains" ~units:"count" - ~description:"The total number of running domains per host" - ~value:(Rrd.VT_Int64 (Int64.of_int running_domains)) - ~min:0.0 ~ty:Rrd.Gauge ~default:true () - ) - ] - (*****************************************************) (* memory stats *) (*****************************************************) @@ -699,10 +484,6 @@ let dom0_stat_generators = ("ha", fun _ _ _ -> Rrdd_ha_stats.all ()) ; ("mem_host", fun xc _ _ -> dss_mem_host xc) ; ("mem_vms", fun _ _ domains -> dss_mem_vms domains) - ; ("pcpus", fun xc _ _ -> dss_pcpus xc) - ; ("vcpus", fun xc _ domains -> dss_vcpus xc domains) - ; ("loadavg", fun _ _ _ -> dss_loadavg ()) - ; ("hostload", fun xc _ domains -> dss_hostload xc domains) ; ("cache", fun _ timestamp _ -> dss_cache timestamp) ] diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/dune b/ocaml/xcp-rrdd/bin/rrdp-cpu/dune new file mode 100644 index 00000000000..b654417bf0a --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/dune @@ -0,0 +1,21 @@ +(executable + (modes exe) + (name rrdp_cpu) + (libraries + astring + rrdd-plugin + rrdd-plugin.xenctrl + rrdd_plugins_libs + xapi-idl.rrd + xapi-log + xapi-rrd + xapi-stdext-unix + xenctrl + ) +) + +(install + (package xapi) + (files (rrdp_cpu.exe as xcp-rrdd-plugins/xcp-rrdd-cpu)) + (section libexec_root) +) diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdd_common.ml b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdd_common.ml new file mode 100644 index 00000000000..ec60aadc043 --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdd_common.ml @@ -0,0 +1,18 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +let loadavg () = + let split_colon line = Astring.String.fields ~empty:false line in + let all = Xapi_stdext_unix.Unixext.string_of_file "/proc/loadavg" in + try float_of_string (List.hd (split_colon all)) with _ -> -1. diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdd_common.mli b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdd_common.mli new file mode 100644 index 00000000000..dc460df1be7 --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdd_common.mli @@ -0,0 +1,15 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val loadavg : unit -> float diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml new file mode 100644 index 00000000000..8faf484f2b0 --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml @@ -0,0 +1,246 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Rrdd_plugin + +module D = Debug.Make (struct let name = "xcp-rrdp-cpu" end) + +module Process = Rrdd_plugin.Process (struct let name = "xcp-rrdd-cpu" end) + +let xen_flag_complement = Int64.(shift_left 1L 63 |> lognot) + +(* This function is used for getting vcpu stats of the VMs present on this host. *) +let dss_vcpus xc doms = + List.fold_left + (fun dss (dom, uuid, domid) -> + let maxcpus = dom.Xenctrl.max_vcpu_id + 1 in + let rec cpus i dss = + if i >= maxcpus then + dss + else + let vcpuinfo = Xenctrl.domain_get_vcpuinfo xc domid i in + (* Workaround for Xen leaking the flag XEN_RUNSTATE_UPDATE; using a + mask of its complement ~(1 << 63) *) + let cpu_time = + Int64.( + to_float @@ logand vcpuinfo.Xenctrl.cputime xen_flag_complement + ) + in + (* Convert from nanoseconds to seconds *) + let cpu_time = cpu_time /. 1.0e9 in + let cputime_rrd = + ( Rrd.VM uuid + , Ds.ds_make ~name:(Printf.sprintf "cpu%d" i) ~units:"(fraction)" + ~description:(Printf.sprintf "CPU%d usage" i) + ~value:(Rrd.VT_Float cpu_time) ~ty:Rrd.Derive ~default:true + ~min:0.0 ~max:1.0 () + ) + in + cpus (i + 1) (cputime_rrd :: dss) + in + (* Runstate info is per-domain rather than per-vcpu *) + let dss = + let dom_cpu_time = + Int64.(to_float @@ logand dom.Xenctrl.cpu_time xen_flag_complement) + in + let dom_cpu_time = + dom_cpu_time /. (1.0e9 *. float_of_int dom.Xenctrl.nr_online_vcpus) + in + try + let ri = Xenctrl.domain_get_runstate_info xc domid in + ( Rrd.VM uuid + , Ds.ds_make ~name:"runstate_fullrun" ~units:"(fraction)" + ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time0 /. 1.0e9)) + ~description:"Fraction of time that all VCPUs are running" + ~ty:Rrd.Derive ~default:false ~min:0.0 () + ) + :: ( Rrd.VM uuid + , Ds.ds_make ~name:"runstate_full_contention" ~units:"(fraction)" + ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time1 /. 1.0e9)) + ~description: + "Fraction of time that all VCPUs are runnable (i.e., \ + waiting for CPU)" + ~ty:Rrd.Derive ~default:false ~min:0.0 () + ) + :: ( Rrd.VM uuid + , Ds.ds_make ~name:"runstate_concurrency_hazard" + ~units:"(fraction)" + ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time2 /. 1.0e9)) + ~description: + "Fraction of time that some VCPUs are running and some are \ + runnable" + ~ty:Rrd.Derive ~default:false ~min:0.0 () + ) + :: ( Rrd.VM uuid + , Ds.ds_make ~name:"runstate_blocked" ~units:"(fraction)" + ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time3 /. 1.0e9)) + ~description: + "Fraction of time that all VCPUs are blocked or offline" + ~ty:Rrd.Derive ~default:false ~min:0.0 () + ) + :: ( Rrd.VM uuid + , Ds.ds_make ~name:"runstate_partial_run" ~units:"(fraction)" + ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time4 /. 1.0e9)) + ~description: + "Fraction of time that some VCPUs are running, and some are \ + blocked" + ~ty:Rrd.Derive ~default:false ~min:0.0 () + ) + :: ( Rrd.VM uuid + , Ds.ds_make ~name:"runstate_partial_contention" + ~units:"(fraction)" + ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time5 /. 1.0e9)) + ~description: + "Fraction of time that some VCPUs are runnable and some are \ + blocked" + ~ty:Rrd.Derive ~default:false ~min:0.0 () + ) + :: ( Rrd.VM uuid + , Ds.ds_make + ~name:(Printf.sprintf "cpu_usage") + ~units:"(fraction)" + ~description:(Printf.sprintf "Domain CPU usage") + ~value:(Rrd.VT_Float dom_cpu_time) ~ty:Rrd.Derive ~default:true + ~min:0.0 ~max:1.0 () + ) + :: dss + with _ -> dss + in + try cpus 0 dss with _ -> dss + ) + [] doms + +let physcpus = ref [||] + +let dss_pcpus xc = + let len = Array.length !physcpus in + let newinfos = + if len = 0 then ( + let physinfo = Xenctrl.physinfo xc in + let pcpus = physinfo.Xenctrl.nr_cpus in + physcpus := if pcpus > 0 then Array.make pcpus 0L else [||] ; + Xenctrl.pcpu_info xc pcpus + ) else + Xenctrl.pcpu_info xc len + in + let dss, len_newinfos = + Array.fold_left + (fun (acc, i) v -> + ( ( Rrd.Host + , Ds.ds_make ~name:(Printf.sprintf "cpu%d" i) ~units:"(fraction)" + ~description:("Physical cpu usage for cpu " ^ string_of_int i) + ~value:(Rrd.VT_Float (Int64.to_float v /. 1.0e9)) + ~min:0.0 ~max:1.0 ~ty:Rrd.Derive ~default:true + ~transform:(fun x -> 1.0 -. x) + () + ) + :: acc + , i + 1 + ) + ) + ([], 0) newinfos + in + let sum_array = Array.fold_left (fun acc v -> Int64.add acc v) 0L newinfos in + let avg_array = Int64.to_float sum_array /. float_of_int len_newinfos in + let avgcpu_ds = + ( Rrd.Host + , Ds.ds_make ~name:"cpu_avg" ~units:"(fraction)" + ~description:"Average physical cpu usage" + ~value:(Rrd.VT_Float (avg_array /. 1.0e9)) + ~min:0.0 ~max:1.0 ~ty:Rrd.Derive ~default:true + ~transform:(fun x -> 1.0 -. x) + () + ) + in + avgcpu_ds :: dss + +let dss_loadavg () = + [ + ( Rrd.Host + , Ds.ds_make ~name:"loadavg" ~units:"(fraction)" + ~description:"Domain0 loadavg" + ~value:(Rrd.VT_Float (Rrdd_common.loadavg ())) + ~ty:Rrd.Gauge ~default:true () + ) + ] + +let count_power_state_running_domains domains = + List.fold_left + (fun count (dom, _, _) -> + if not dom.Xenctrl.paused then count + 1 else count + ) + 0 domains + +let dss_hostload xc domains = + let physinfo = Xenctrl.physinfo xc in + let pcpus = physinfo.Xenctrl.nr_cpus in + let rec sum acc n f = + match n with n when n >= 0 -> sum (acc + f n) (n - 1) f | _ -> acc + in + let load = + List.fold_left + (fun acc (dom, _, domid) -> + sum 0 dom.Xenctrl.max_vcpu_id (fun id -> + let vcpuinfo = Xenctrl.domain_get_vcpuinfo xc domid id in + if vcpuinfo.Xenctrl.online && not vcpuinfo.Xenctrl.blocked then + 1 + else + 0 + ) + + acc + ) + 0 domains + in + let running_domains = count_power_state_running_domains domains in + + let load_per_cpu = float_of_int load /. float_of_int pcpus in + [ + ( Rrd.Host + , Ds.ds_make ~name:"hostload" ~units:"(fraction)" + ~description: + ("Host load per physical cpu, where load refers to " + ^ "the number of vCPU(s) in running or runnable status." + ) + ~value:(Rrd.VT_Float load_per_cpu) ~min:0.0 ~ty:Rrd.Gauge ~default:true + () + ) + ; ( Rrd.Host + , Ds.ds_make ~name:"running_vcpus" ~units:"count" + ~description:"The total number of running vCPUs per host" + ~value:(Rrd.VT_Int64 (Int64.of_int load)) + ~min:0.0 ~ty:Rrd.Gauge ~default:true () + ) + ; ( Rrd.Host + , Ds.ds_make ~name:"running_domains" ~units:"count" + ~description:"The total number of running domains per host" + ~value:(Rrd.VT_Int64 (Int64.of_int running_domains)) + ~min:0.0 ~ty:Rrd.Gauge ~default:true () + ) + ] + +let generate_cpu_ds_list xc domains () = + dss_pcpus xc @ dss_vcpus xc domains @ dss_loadavg () @ dss_hostload xc domains + +let _ = + Xenctrl.with_intf (fun xc -> + let _, domains, _ = Xenctrl_lib.domain_snapshot xc in + Process.initialise () ; + (* Share one page per PCPU and dom each *) + let physinfo = Xenctrl.physinfo xc in + let shared_page_count = physinfo.Xenctrl.nr_cpus + List.length domains in + + Process.main_loop ~neg_shift:0.5 + ~target:(Reporter.Local shared_page_count) ~protocol:Rrd_interface.V2 + ~dss_f:(generate_cpu_ds_list xc domains) + ) diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.mli b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/xcp-rrdd/bin/rrdp-scripts/sysconfig-rrdd-plugins b/ocaml/xcp-rrdd/bin/rrdp-scripts/sysconfig-rrdd-plugins index 97846e704e4..e0650a06dcd 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-scripts/sysconfig-rrdd-plugins +++ b/ocaml/xcp-rrdd/bin/rrdp-scripts/sysconfig-rrdd-plugins @@ -1 +1 @@ -PLUGINS="xcp-rrdd-iostat xcp-rrdd-squeezed xcp-rrdd-xenpm xcp-rrdd-dcmi xcp-rrdd-netdev" +PLUGINS="xcp-rrdd-iostat xcp-rrdd-squeezed xcp-rrdd-xenpm xcp-rrdd-dcmi xcp-rrdd-netdev xcp-rrdd-cpu" diff --git a/quality-gate.sh b/quality-gate.sh index 47e97fa37e2..11a6dee143a 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=509 + N=508 # do not count ml files from the tests in ocaml/{tests/perftest/quicktest} MLIS=$(git ls-files -- '**/*.mli' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) MLS=$(git ls-files -- '**/*.ml' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) diff --git a/scripts/xe-toolstack-restart b/scripts/xe-toolstack-restart index 1ceeeddbe5b..25856dc67ad 100755 --- a/scripts/xe-toolstack-restart +++ b/scripts/xe-toolstack-restart @@ -18,7 +18,7 @@ LOCKFILE='/dev/shm/xe_toolstack_restart.lock' ( flock -x -n 200 -if [ "$?" != 0 ]; then +if [ "$?" != 0 ]; then echo "Exiting: cannot lock $LOCKFILE. Is an instance of $0 running already?" exit 1 fi @@ -29,7 +29,7 @@ POOLCONF=`cat @ETCXENDIR@/pool.conf` if [ $POOLCONF == "master" ]; then MPATHALERT="mpathalert"; else MPATHALERT=""; fi SERVICES="message-switch perfmon v6d xenopsd xenopsd-xc xenopsd-xenlight xenopsd-simulator xenopsd-libvirt xcp-rrdd-iostat xcp-rrdd-squeezed - xcp-rrdd-netdev + xcp-rrdd-netdev xcp-rrdd-cpu xcp-rrdd-xenpm xcp-rrdd-gpumon xcp-rrdd xcp-networkd squeezed forkexecd $MPATHALERT xapi-storage-script xapi-clusterd varstored-guard" From aa996553bc6cf4dfe7a85afaaaa4106d481f9e0c Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 25 Sep 2024 10:57:10 +0100 Subject: [PATCH 4/6] rrdp-iostat: Use a shared Xenctrl get_doms_stat function Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdp-iostat/dune | 3 +- ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml | 49 +++---------------- 2 files changed, 8 insertions(+), 44 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-iostat/dune b/ocaml/xcp-rrdd/bin/rrdp-iostat/dune index 4721f71aed1..3880709282a 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-iostat/dune +++ b/ocaml/xcp-rrdd/bin/rrdp-iostat/dune @@ -4,12 +4,13 @@ (libraries astring cstruct - + ezxenstore.core inotify mtime mtime.clock.os rrdd-plugin + rrdd-plugin.xenctrl rrdd_plugins_libs str stringext diff --git a/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml b/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml index b8c60edec7e..1502a07f9fa 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml @@ -22,41 +22,6 @@ module Process = Process (struct let name = "xcp-rrdd-iostat" end) open Process open Ezxenstore_core.Xenstore -let with_xc_and_xs f = Xenctrl.with_intf (fun xc -> with_xs (fun xs -> f xc xs)) - -(* Return a list of (domid, uuid) pairs for domUs running on this host *) -let get_running_domUs xc xs = - let metadata_of_domain di = - let open Xenctrl in - let domid = di.domid in - let ( let* ) = Option.bind in - let* uuid_raw = Uuidx.of_int_array di.handle in - let uuid = Uuidx.to_string uuid_raw in - - (* Actively hide migrating VM uuids, these are temporary and xenops - writes the original and the final uuid to xenstore *) - let uuid_from_key key = - let path = Printf.sprintf "/vm/%s/%s" uuid key in - try xs.read path - with Xs_protocol.Enoent _hint -> - D.info "Couldn't read path %s; falling back to actual uuid" path ; - uuid - in - let stable_uuid = Option.fold ~none:uuid ~some:uuid_from_key in - - let key = - if Astring.String.is_suffix ~affix:"000000000000" uuid then - Some "origin-uuid" - else if Astring.String.is_suffix ~affix:"000000000001" uuid then - Some "final-uuid" - else - None - in - Some (domid, stable_uuid key) - in - (* Do not list dom0 *) - Xenctrl.domain_getinfolist xc 1 |> List.filter_map metadata_of_domain - (* A mapping of VDIs to the VMs they are plugged to, in which position, and the device-id *) let vdi_to_vm_map : (string * (string * string * int)) list ref = ref [] @@ -71,11 +36,11 @@ let update_vdi_to_vm_map () = ["/local/domain/0/backend/vbd"; "/local/domain/0/backend/vbd3"] in try - let domUs = with_xc_and_xs get_running_domUs in + let _, domUs, _ = Xenctrl.with_intf Xenctrl_lib.domain_snapshot in D.debug "Running domUs: [%s]" (String.concat "; " (List.map - (fun (domid, uuid) -> + (fun (_, uuid, domid) -> Printf.sprintf "%d (%s)" domid (String.sub uuid 0 8) ) domUs @@ -83,7 +48,7 @@ let update_vdi_to_vm_map () = ) ; with_xs (fun xs -> List.map - (fun (domid, vm) -> + (fun (_, vm, domid) -> (* Get VBDs for this domain *) let enoents = ref 0 in let vbds = @@ -981,18 +946,16 @@ let gen_metrics () = in (* relations between dom-id, vm-uuid, device pos, dev-id, etc *) - let domUs = with_xc_and_xs get_running_domUs in + let _, domUs, _ = Xenctrl.with_intf Xenctrl_lib.domain_snapshot in let vdi_to_vm = get_vdi_to_vm_map () in let get_stats_blktap3_by_vdi vdi = if List.mem_assoc vdi vdi_to_vm then let vm_uuid, _pos, devid = List.assoc vdi vdi_to_vm in - match - List.filter (fun (_domid', vm_uuid') -> vm_uuid' = vm_uuid) domUs - with + match List.filter (fun (_, vm_uuid', _) -> vm_uuid' = vm_uuid) domUs with | [] -> (None, None) - | (domid, _vm_uuid) :: _ -> + | (_, _, domid) :: _ -> let find_blktap3 blktap3_assoc_list = let key = (domid, devid) in if List.mem_assoc key blktap3_assoc_list then From 6bf1d7407c4167e234b8802a19221b973b73ebd0 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 19 Sep 2024 07:59:56 +0100 Subject: [PATCH 5/6] Update docs about RRDD plugins Signed-off-by: Andrii Sultanov --- doc/content/toolstack/high-level/daemons.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/content/toolstack/high-level/daemons.md b/doc/content/toolstack/high-level/daemons.md index 103798bb0d5..bb1d7607fff 100644 --- a/doc/content/toolstack/high-level/daemons.md +++ b/doc/content/toolstack/high-level/daemons.md @@ -20,6 +20,9 @@ xcp-rrdd - xcp-rrdd-iostat - xcp-rrdd-squeezed - xcp-rrdd-xenpm + - xcp-rrdd-dcmi + - xcp-rrdd-netdev + - xcp-rrdd-cpu xcp-networkd : a host network manager which takes care of configuring interfaces, bridges From aa631b9e9b8976e6b41bb567fc9736e82fb9ef47 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 25 Sep 2024 13:34:38 +0100 Subject: [PATCH 6/6] rrdd: Add .service and config files Signed-off-by: Andrii Sultanov --- scripts/Makefile | 12 ++++++++++++ scripts/xcp-rrdd-conf | 6 ++++++ scripts/xcp-rrdd-cpu.service | 15 +++++++++++++++ scripts/xcp-rrdd-dcmi.service | 15 +++++++++++++++ scripts/xcp-rrdd-iostat.service | 15 +++++++++++++++ scripts/xcp-rrdd-netdev.service | 15 +++++++++++++++ scripts/xcp-rrdd-squeezed.service | 15 +++++++++++++++ scripts/xcp-rrdd-sysconfig | 3 +++ scripts/xcp-rrdd-tmp | 1 + scripts/xcp-rrdd-xenpm.service | 15 +++++++++++++++ scripts/xcp-rrdd.service | 21 +++++++++++++++++++++ 11 files changed, 133 insertions(+) create mode 100644 scripts/xcp-rrdd-conf create mode 100644 scripts/xcp-rrdd-cpu.service create mode 100644 scripts/xcp-rrdd-dcmi.service create mode 100644 scripts/xcp-rrdd-iostat.service create mode 100644 scripts/xcp-rrdd-netdev.service create mode 100644 scripts/xcp-rrdd-squeezed.service create mode 100644 scripts/xcp-rrdd-sysconfig create mode 100644 scripts/xcp-rrdd-tmp create mode 100644 scripts/xcp-rrdd-xenpm.service create mode 100644 scripts/xcp-rrdd.service diff --git a/scripts/Makefile b/scripts/Makefile index 4c04da3943c..7583c80d624 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -74,11 +74,23 @@ install: $(IDATA) xapi-nbd.path $(DESTDIR)/usr/lib/systemd/system/xapi-nbd.path $(IDATA) 10-stunnel-increase-number-of-file-descriptors.conf $(DESTDIR)/etc/systemd/system/stunnel@xapi.service.d/10-stunnel-increase-number-of-file-descriptors.conf $(IDATA) 11-stunnel-gencert.conf $(DESTDIR)/etc/systemd/system/stunnel@xapi.service.d/11-stunnel-gencert.conf + $(IDATA) xcp-rrdd.service $(DESTDIR)/usr/lib/systemd/system/xcp-rrdd.service + $(IDATA) xcp-rrdd-xenpm.service $(DESTDIR)/usr/lib/systemd/system/xcp-rrdd-xenpm.service + $(IDATA) xcp-rrdd-iostat.service $(DESTDIR)/usr/lib/systemd/system/xcp-rrdd-iostat.service + $(IDATA) xcp-rrdd-squeezed.service $(DESTDIR)/usr/lib/systemd/system/xcp-rrdd-squeezed.service + $(IDATA) xcp-rrdd-squeezed.service $(DESTDIR)/usr/lib/systemd/system/xcp-rrdd-squeezed.service + $(IDATA) xcp-rrdd-dcmi.service $(DESTDIR)/usr/lib/systemd/system/xcp-rrdd-dcmi.service + $(IDATA) xcp-rrdd-cpu.service $(DESTDIR)/usr/lib/systemd/system/xcp-rrdd-cpu.service + $(IDATA) xcp-rrdd-netdev.service $(DESTDIR)/usr/lib/systemd/system/xcp-rrdd-netdev.service mkdir -p $(DESTDIR)$(ETCXENDIR)/master.d $(IPROG) on-master-start $(DESTDIR)$(ETCXENDIR)/master.d/01-example $(IPROG) mpathalert-daemon $(DESTDIR)$(ETCXENDIR)/master.d/03-mpathalert-daemon mkdir -p $(DESTDIR)/etc/sysconfig $(IPROG) sysconfig-xapi $(DESTDIR)/etc/sysconfig/xapi + $(IPROG) xcp-rrdd-sysconfig $(DESTDIR)/etc/sysconfig/xcp-rrdd + $(IPROG) xcp-rrdd-conf $(DESTDIR)/etc/xcp-rrdd.conf + mkdir -p $(DESTDIR)/usr/lib/tmpfiles.d + $(IPROG) xcp-rrdd-tmp $(DESTDIR)/usr/lib/tmpfiles.d/xcp-rrdd.conf $(IPROG) nbd-firewall-config.sh $(DESTDIR)$(LIBEXECDIR) $(IPROG) update-ca-bundle.sh $(DESTDIR)$(OPTDIR)/bin mkdir -p $(DESTDIR)$(OPTDIR)/debug diff --git a/scripts/xcp-rrdd-conf b/scripts/xcp-rrdd-conf new file mode 100644 index 00000000000..5014b73d66e --- /dev/null +++ b/scripts/xcp-rrdd-conf @@ -0,0 +1,6 @@ +# The xcp-rrdd config file + +inventory = /etc/xensource-inventory + +disable-logging-for = http +loglevel = info diff --git a/scripts/xcp-rrdd-cpu.service b/scripts/xcp-rrdd-cpu.service new file mode 100644 index 00000000000..310828dda94 --- /dev/null +++ b/scripts/xcp-rrdd-cpu.service @@ -0,0 +1,15 @@ +[Unit] +Description=XCP RRD daemon CPU plugin +After=xcp-rrdd.service +Requires=xcp-rrdd.service + +[Service] +ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-cpu +StandardError=null +# restart but fail if more than 5 failures in 30s +Restart=on-failure +StartLimitBurst=5 +StartLimitInterval=30s + +[Install] +WantedBy=multi-user.target diff --git a/scripts/xcp-rrdd-dcmi.service b/scripts/xcp-rrdd-dcmi.service new file mode 100644 index 00000000000..64bab4f25b3 --- /dev/null +++ b/scripts/xcp-rrdd-dcmi.service @@ -0,0 +1,15 @@ +[Unit] +Description=XCP RRD daemon IPMI DCMI power plugin +After=xcp-rrdd.service +Requires=xcp-rrdd.service + +[Service] +ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-dcmi +StandardError=null +# restart but fail if more than 5 failures in 30s +Restart=on-failure +StartLimitBurst=5 +StartLimitInterval=30s + +[Install] +WantedBy=multi-user.target diff --git a/scripts/xcp-rrdd-iostat.service b/scripts/xcp-rrdd-iostat.service new file mode 100644 index 00000000000..ce724477367 --- /dev/null +++ b/scripts/xcp-rrdd-iostat.service @@ -0,0 +1,15 @@ +[Unit] +Description=XCP RRD daemon iostat plugin +After=xcp-rrdd.service +Requires=xcp-rrdd.service + +[Service] +ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-iostat +StandardError=null +# restart but fail if more than 5 failures in 30s +Restart=on-failure +StartLimitBurst=5 +StartLimitInterval=30s + +[Install] +WantedBy=multi-user.target diff --git a/scripts/xcp-rrdd-netdev.service b/scripts/xcp-rrdd-netdev.service new file mode 100644 index 00000000000..b961cc9d15c --- /dev/null +++ b/scripts/xcp-rrdd-netdev.service @@ -0,0 +1,15 @@ +[Unit] +Description=XCP RRD daemon network plugin +After=xcp-rrdd.service +Requires=xcp-rrdd.service + +[Service] +ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-netdev +StandardError=null +# restart but fail if more than 5 failures in 30s +Restart=on-failure +StartLimitBurst=5 +StartLimitInterval=30s + +[Install] +WantedBy=multi-user.target diff --git a/scripts/xcp-rrdd-squeezed.service b/scripts/xcp-rrdd-squeezed.service new file mode 100644 index 00000000000..bb33fca801c --- /dev/null +++ b/scripts/xcp-rrdd-squeezed.service @@ -0,0 +1,15 @@ +[Unit] +Description=XCP RRD daemon squeezed plugin +After=xcp-rrdd.service +Requires=xcp-rrdd.service + +[Service] +ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-squeezed +StandardError=null +# restart but fail if more than 5 failures in 30s +Restart=on-failure +StartLimitBurst=5 +StartLimitInterval=30s + +[Install] +WantedBy=multi-user.target diff --git a/scripts/xcp-rrdd-sysconfig b/scripts/xcp-rrdd-sysconfig new file mode 100644 index 00000000000..b0c159f9016 --- /dev/null +++ b/scripts/xcp-rrdd-sysconfig @@ -0,0 +1,3 @@ +# Additional options for the XCP RRD deamon. +# XCP_RRDD_OPTIONS= : any extra command-line startup arguments for xcp-rddd +XCP_RRDD_OPTIONS= diff --git a/scripts/xcp-rrdd-tmp b/scripts/xcp-rrdd-tmp new file mode 100644 index 00000000000..b829da2fe3c --- /dev/null +++ b/scripts/xcp-rrdd-tmp @@ -0,0 +1 @@ +d /dev/shm/metrics 0775 root rrdmetrics - diff --git a/scripts/xcp-rrdd-xenpm.service b/scripts/xcp-rrdd-xenpm.service new file mode 100644 index 00000000000..092bb4d4bb9 --- /dev/null +++ b/scripts/xcp-rrdd-xenpm.service @@ -0,0 +1,15 @@ +[Unit] +Description=XCP RRD daemon xenpm plugin +After=xcp-rrdd.service +Requires=xcp-rrdd.service + +[Service] +ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-xenpm +StandardError=null +# restart but fail if more than 5 failures in 30s +Restart=on-failure +StartLimitBurst=5 +StartLimitInterval=30s + +[Install] +WantedBy=multi-user.target diff --git a/scripts/xcp-rrdd.service b/scripts/xcp-rrdd.service new file mode 100644 index 00000000000..81e4d78df68 --- /dev/null +++ b/scripts/xcp-rrdd.service @@ -0,0 +1,21 @@ +[Unit] +Description=XCP RRD daemon +After=forkexecd.service xenstored.service message-switch.service syslog.target +Wants=forkexecd.service xenstored.service message-switch.service syslog.target + +[Service] +Type=notify +Environment="LD_PRELOAD=/usr/lib64/libjemalloc.so.2" +Environment="MALLOC_CONF=narenas:1,tcache:false" +Environment=OCAMLRUNPARAM=b +EnvironmentFile=-/etc/sysconfig/xcp-rrdd +ExecStart=/usr/sbin/xcp-rrdd $XCP_RRDD_OPTIONS +SuccessExitStatus=0 +# StandardError=null +# restart but fail if more than 5 failures in 30s +Restart=on-failure +StartLimitBurst=5 +StartLimitInterval=30s + +[Install] +WantedBy=multi-user.target