From 30501ada3cab4f464f4889948d7247035b5fb48b Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Mon, 30 Sep 2024 14:13:09 +0100 Subject: [PATCH 001/121] CP-51694: Add testing of C# date converter Signed-off-by: Danilo Del Busso --- .github/workflows/generate-and-build-sdks.yml | 7 + .../autogen/XenServerTest/DateTimeTests.cs | 139 ++++++++++++++++++ .../XenServerTest/XenServerTest.csproj | 27 ++++ .../sdk-gen/csharp/autogen/src/Converters.cs | 15 +- 4 files changed, 183 insertions(+), 5 deletions(-) create mode 100644 ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs create mode 100644 ocaml/sdk-gen/csharp/autogen/XenServerTest/XenServerTest.csproj diff --git a/.github/workflows/generate-and-build-sdks.yml b/.github/workflows/generate-and-build-sdks.yml index a439c969b50..8187c391508 100644 --- a/.github/workflows/generate-and-build-sdks.yml +++ b/.github/workflows/generate-and-build-sdks.yml @@ -138,6 +138,13 @@ jobs: name: SDK_Source_CSharp path: source/ + - name: Test C# SDK + shell: pwsh + run: | + dotnet test source/XenServerTest ` + --disable-build-servers ` + --verbosity=normal + - name: Build C# SDK shell: pwsh run: | diff --git a/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs b/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs new file mode 100644 index 00000000000..0bda9474eb0 --- /dev/null +++ b/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs @@ -0,0 +1,139 @@ +/* + * Copyright (c) Cloud Software Group, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1) Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2) Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +using System.Reflection; +using Newtonsoft.Json; +using XenAPI; +using Console = System.Console; + +namespace XenServerTest; + +internal class DateTimeObject +{ + [JsonConverter(typeof(XenDateTimeConverter))] + public DateTime Date { get; set; } +} + +[TestClass] +public class DateTimeTests +{ + private readonly JsonSerializerSettings _settings = new() + { + Converters = new List { new XenDateTimeConverter() } + }; + + [TestMethod] + [DynamicData(nameof(GetTestData), DynamicDataSourceType.Method, + DynamicDataDisplayName = nameof(GetCustomDynamicDataDisplayName))] + public void TestXenDateTimeConverter(string dateString, DateTime expectedDateTime) + { + try + { + var jsonDateString = "{ \"Date\" : \"" + dateString + "\" }"; + var actualDateTime = JsonConvert.DeserializeObject(jsonDateString, _settings); + + Assert.IsNotNull(actualDateTime, $"Failed to convert '{dateString}'"); + Assert.IsTrue(expectedDateTime.Equals(actualDateTime.Date), + $"Conversion of '{dateString}' resulted in an incorrect DateTime value"); + } + catch (Exception ex) + { + // Log the error or mark this specific data entry as failed + Console.WriteLine($@"Error processing dateString '{dateString}': {ex.Message}"); + Assert.Fail($"An error occurred while processing '{dateString}'"); + } + } + + public static string GetCustomDynamicDataDisplayName(MethodInfo methodInfo, object[] data) + { + return $"{methodInfo.Name}: '{data[0] as string}'"; + } + + public static IEnumerable GetTestData() + { + // no dashes, no colons + yield return new object[] { "20220101T123045", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; + yield return new object[] { "20220101T123045Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; + yield return new object[] { "20220101T123045+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] { "20220101T123045+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] { "20220101T123045+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + + yield return new object[] + { "20220101T123045.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + yield return new object[] + { "20220101T123045.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + yield return new object[] + { "20220101T123045.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "20220101T123045.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "20220101T123045.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + + // no dashes, with colons + yield return new object[] + { "20220101T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; + yield return new object[] { "20220101T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; + yield return new object[] { "20220101T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] { "20220101T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] + { "20220101T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + + yield return new object[] + { "20220101T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + yield return new object[] + { "20220101T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + yield return new object[] + { "20220101T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "20220101T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "20220101T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + + // dashes and colons + yield return new object[] + { "2022-01-01T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; + yield return new object[] { "2022-01-01T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; + yield return new object[] { "2022-01-01T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] + { "2022-01-01T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] + { "2022-01-01T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + + yield return new object[] + { "2022-01-01T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + yield return new object[] + { "2022-01-01T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + yield return new object[] + { "2022-01-01T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "2022-01-01T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "2022-01-01T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + } +} diff --git a/ocaml/sdk-gen/csharp/autogen/XenServerTest/XenServerTest.csproj b/ocaml/sdk-gen/csharp/autogen/XenServerTest/XenServerTest.csproj new file mode 100644 index 00000000000..8300b4b7edb --- /dev/null +++ b/ocaml/sdk-gen/csharp/autogen/XenServerTest/XenServerTest.csproj @@ -0,0 +1,27 @@ + + + + net6.0 + enable + enable + + false + true + + + + + + + + + + + + + + + + + + diff --git a/ocaml/sdk-gen/csharp/autogen/src/Converters.cs b/ocaml/sdk-gen/csharp/autogen/src/Converters.cs index 32b02d987a6..6f828fdc0a6 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Converters.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Converters.cs @@ -31,10 +31,12 @@ using System.Collections.Generic; using System.Globalization; using System.Linq; +using System.Runtime.CompilerServices; using Newtonsoft.Json; using Newtonsoft.Json.Converters; using Newtonsoft.Json.Linq; +[assembly: InternalsVisibleTo("XenServerTest")] namespace XenAPI { @@ -437,12 +439,16 @@ internal class XenDateTimeConverter : IsoDateTimeConverter public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { - string str = JToken.Load(reader).ToString(); + // JsonReader may have already parsed the date for us + if (reader.ValueType != null && reader.ValueType == typeof(DateTime)) + { + return reader.Value; + } - DateTime result; + var str = JToken.Load(reader).ToString(); if (DateTime.TryParseExact(str, DateFormatsUtc, CultureInfo.InvariantCulture, - DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out result)) + DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var result)) return result; if (DateTime.TryParseExact(str, DateFormatsLocal, CultureInfo.InvariantCulture, @@ -454,9 +460,8 @@ public override object ReadJson(JsonReader reader, Type objectType, object exist public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) { - if (value is DateTime) + if (value is DateTime dateTime) { - var dateTime = (DateTime)value; dateTime = dateTime.ToUniversalTime(); var text = dateTime.ToString(DateFormatsUtc[0], CultureInfo.InvariantCulture); writer.WriteValue(text); From dc1ef200a16e9d2a0597705157a5201a1221fad6 Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Mon, 30 Sep 2024 14:51:59 +0100 Subject: [PATCH 002/121] CP-51694: Add testing of Java date deserializer Signed-off-by: Danilo Del Busso --- ocaml/sdk-gen/java/autogen/xen-api/pom.xml | 12 ++ .../test/java/CustomDateDeserializerTest.java | 123 ++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 ocaml/sdk-gen/java/autogen/xen-api/src/test/java/CustomDateDeserializerTest.java diff --git a/ocaml/sdk-gen/java/autogen/xen-api/pom.xml b/ocaml/sdk-gen/java/autogen/xen-api/pom.xml index 66e1b633db2..c3a6cabdfda 100644 --- a/ocaml/sdk-gen/java/autogen/xen-api/pom.xml +++ b/ocaml/sdk-gen/java/autogen/xen-api/pom.xml @@ -62,6 +62,13 @@ httpclient5 5.3 + + + org.junit.jupiter + junit-jupiter + 5.11.1 + test + @@ -119,6 +126,11 @@ + + org.apache.maven.plugins + maven-surefire-plugin + 3.5.0 + diff --git a/ocaml/sdk-gen/java/autogen/xen-api/src/test/java/CustomDateDeserializerTest.java b/ocaml/sdk-gen/java/autogen/xen-api/src/test/java/CustomDateDeserializerTest.java new file mode 100644 index 00000000000..f125e1d1174 --- /dev/null +++ b/ocaml/sdk-gen/java/autogen/xen-api/src/test/java/CustomDateDeserializerTest.java @@ -0,0 +1,123 @@ +/* + * Copyright (c) Cloud Software Group, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1) Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2) Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.xensource.xenapi.CustomDateDeserializer; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class CustomDateDeserializerTest { + + private static Stream provideDateStringsAndExpectedDates() { + Hashtable dates = new Hashtable<>(); + + // no dashes, no colons + dates.put("20220101T123045", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T123045Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T123045+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T123045+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T123045+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + + dates.put("20220101T123045.123", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T123045.123Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T123045.123+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T123045.123+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T123045.123+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + + // no dashes, with colons + dates.put("20220101T12:30:45", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T12:30:45Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T12:30:45+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T12:30:45+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T12:30:45+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + + dates.put("20220101T12:30:45.123", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T12:30:45.123Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T12:30:45.123+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T12:30:45.123+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T12:30:45.123+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + + // dashes and colons + dates.put("2022-01-01T12:30:45", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("2022-01-01T12:30:45Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("2022-01-01T12:30:45+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("2022-01-01T12:30:45+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("2022-01-01T12:30:45+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + + dates.put("2022-01-01T12:30:45.123", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("2022-01-01T12:30:45.123Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("2022-01-01T12:30:45.123+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("2022-01-01T12:30:45.123+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("2022-01-01T12:30:45.123+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + + + return dates.entrySet().stream() + .map(entry -> Arguments.of(entry.getKey(), entry.getValue())); + } + + private static Date createDate(int year, int month, int day, int hour, int minute, int seconds, int milliseconds, TimeZone timeZone) { + Calendar calendar = new GregorianCalendar(timeZone); + calendar.set(year, month, day, hour, minute, seconds); + calendar.set(Calendar.MILLISECOND, milliseconds); + return calendar.getTime(); + } + + private static ObjectMapper createObjectMapperWithCustomDeserializer() { + ObjectMapper mapper = new ObjectMapper(); + SimpleModule module = new SimpleModule(); + module.addDeserializer(Date.class, new CustomDateDeserializer()); + mapper.registerModule(module); + return mapper; + } + + @ParameterizedTest + @MethodSource("provideDateStringsAndExpectedDates") + public void shouldParseDateStringsCorrectlyWithCustomDeserializer(String dateString, Date expectedDate) throws Exception { + ObjectMapper mapper = createObjectMapperWithCustomDeserializer(); + + Date parsedDate = mapper.readValue("\"" + dateString + "\"", Date.class); + + SimpleDateFormat outputFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS Z"); + String parsedDateString = outputFormat.format(parsedDate); + String expectedDateString = outputFormat.format(expectedDate); + + assertEquals(expectedDate, parsedDate, + () -> "Failed to parse datetime value: " + dateString + + ". Parsed date: " + parsedDateString + + ", expected: " + expectedDateString); + } +} From 8eb26b6924f7f3981b7bbe44781aa2fa7de2fa43 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 16 Oct 2024 13:04:06 +0100 Subject: [PATCH 003/121] CP-50475: Remove unnecessary Parallel atoms from the xenopsd queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parallel atoms do quite a bit of unnecessary actions even when they are empty. They are also not needed when running a single task. They also show as spans in the traces. Removing them makes the traces shorter and easier to read. Co-authored-by: Edwin Török Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/lib/xenops_server.ml | 115 ++++++++++------------------- 1 file changed, 41 insertions(+), 74 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 669af5566a1..d5e3da29433 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -1550,6 +1550,18 @@ let dequarantine_ops vgpus = fun vgpu -> PCI_dequarantine vgpu.physical_pci_address ) +(* Avoid generating list-based atoms with 1 or no actions in them *) +let collect_into apply = function [] -> [] | [op] -> [op] | lst -> apply lst + +let parallel name ~id = + collect_into (fun ls -> [Parallel (id, Printf.sprintf "%s VM=%s" name id, ls)]) + +let parallel_concat name ~id lst = parallel name ~id (List.concat lst) + +let parallel_map name ~id lst f = parallel name ~id (List.concat_map f lst) + +let map_or_empty f x = Option.value ~default:[] (Option.map f x) + let rec atomics_of_operation = function | VM_start (id, force) -> let vbds_rw, vbds_ro = VBD_DB.vbds id |> vbd_plug_sets in @@ -1574,36 +1586,21 @@ let rec atomics_of_operation = function (vbds_rw @ vbds_ro) (* keeping behaviour of vbd_plug_order: rw vbds must be plugged before ro vbds, see vbd_plug_sets *) - ; List.map + ; List.concat_map (fun (ty, vbds) -> - Parallel - ( id - , Printf.sprintf "VBD.epoch_begin %s vm=%s" ty id - , List.filter_map - (fun vbd -> - Option.map - (fun x -> - VBD_epoch_begin (vbd.Vbd.id, x, vbd.Vbd.persistent) - ) - vbd.Vbd.backend + parallel_map (Printf.sprintf "VBD.epoch_begin %s" ty) ~id vbds + (fun vbd -> + map_or_empty + (fun x -> + [VBD_epoch_begin (vbd.Vbd.id, x, vbd.Vbd.persistent)] ) - vbds - ) + vbd.Vbd.backend + ) ) [("RW", vbds_rw); ("RO", vbds_ro)] - ; [ - (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) - Parallel - ( id - , Printf.sprintf "VBD.plug RW vm=%s" id - , List.map (fun vbd -> VBD_plug vbd.Vbd.id) vbds_rw - ) - ; Parallel - ( id - , Printf.sprintf "VBD.plug RO vm=%s" id - , List.map (fun vbd -> VBD_plug vbd.Vbd.id) vbds_ro - ) - ] + (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) + ; parallel_map "VBD.plug RW" ~id vbds_rw (fun vbd -> [VBD_plug vbd.Vbd.id]) + ; parallel_map "VBD.plug RO" ~id vbds_ro (fun vbd -> [VBD_plug vbd.Vbd.id]) ; List.map (fun vif -> VIF_set_active (vif.Vif.id, true)) vifs ; List.map (fun vif -> VIF_plug vif.Vif.id) vifs ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, true)) vgpus @@ -1623,8 +1620,7 @@ let rec atomics_of_operation = function let pcis = PCI_DB.pcis id in let vusbs = VUSB_DB.vusbs id in [ - Option.value ~default:[] - (Option.map (fun x -> [VM_shutdown_domain (id, PowerOff, x)]) timeout) + map_or_empty (fun x -> [VM_shutdown_domain (id, PowerOff, x)]) timeout (* Before shutting down a VM, we need to unplug its VUSBs. *) ; List.map (fun vusb -> VUSB_unplug vusb.Vusb.id) vusbs ; [ @@ -1633,12 +1629,10 @@ let rec atomics_of_operation = function pause the domain before destroying the device model. *) Best_effort (VM_pause id) ; VM_destroy_device_model id - ; Parallel - ( id - , Printf.sprintf "VBD.unplug vm=%s" id - , List.map (fun vbd -> VBD_unplug (vbd.Vbd.id, true)) vbds - ) ] + ; parallel_map "VBD.unplug" ~id vbds (fun vbd -> + [VBD_unplug (vbd.Vbd.id, true)] + ) ; List.map (fun vif -> VIF_unplug (vif.Vif.id, true)) vifs ; List.map (fun pci -> PCI_unplug pci.Pci.id) pcis ; [VM_destroy id] @@ -1660,19 +1654,9 @@ let rec atomics_of_operation = function List.map (fun vbd -> VBD_set_active (vbd.Vbd.id, true)) (vbds_rw @ vbds_ro) - ; [ - (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) - Parallel - ( id - , Printf.sprintf "VBD.plug RW vm=%s" id - , List.map (fun vbd -> VBD_plug vbd.Vbd.id) vbds_rw - ) - ; Parallel - ( id - , Printf.sprintf "VBD.plug RO vm=%s" id - , List.map (fun vbd -> VBD_plug vbd.Vbd.id) vbds_ro - ) - ] + ; (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) + parallel_map "VBD.plug RW" ~id vbds_rw (fun vbd -> [VBD_plug vbd.Vbd.id]) + ; parallel_map "VBD.plug RO" ~id vbds_ro (fun vbd -> [VBD_plug vbd.Vbd.id]) ; (if restore_vifs then atomics_of_operation (VM_restore_vifs id) else []) ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, true)) vgpus (* Nvidia SRIOV PCI devices have been already been plugged *) @@ -1697,19 +1681,11 @@ let rec atomics_of_operation = function [ [VM_hook_script (id, Xenops_hooks.VM_pre_destroy, reason)] ; atomics_of_operation (VM_shutdown (id, timeout)) - ; [ - Parallel - ( id - , Printf.sprintf "VBD.epoch_end vm=%s" id - , List.filter_map - (fun vbd -> - Option.map - (fun x -> VBD_epoch_end (vbd.Vbd.id, x)) - vbd.Vbd.backend - ) - vbds - ) - ] + ; parallel_map "VBD.epoch_end" ~id vbds (fun vbd -> + map_or_empty + (fun x -> [VBD_epoch_end (vbd.Vbd.id, x)]) + vbd.Vbd.backend + ) ; List.map (fun vbd -> VBD_set_active (vbd.Vbd.id, false)) vbds ; List.map (fun vif -> VIF_set_active (vif.Vif.id, false)) vifs ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, false)) vgpus @@ -1725,23 +1701,14 @@ let rec atomics_of_operation = function Xenops_hooks.reason__clean_reboot in [ - Option.value ~default:[] - (Option.map (fun x -> [VM_shutdown_domain (id, Reboot, x)]) timeout) + map_or_empty (fun x -> [VM_shutdown_domain (id, Reboot, x)]) timeout ; [VM_hook_script (id, Xenops_hooks.VM_pre_destroy, reason)] ; atomics_of_operation (VM_shutdown (id, None)) - ; [ - Parallel - ( id - , Printf.sprintf "VBD.epoch_end vm=%s" id - , List.filter_map - (fun vbd -> - Option.map - (fun x -> VBD_epoch_end (vbd.Vbd.id, x)) - vbd.Vbd.backend - ) - vbds - ) - ] + ; parallel_map "VBD.epoch_end" ~id vbds (fun vbd -> + map_or_empty + (fun x -> [VBD_epoch_end (vbd.Vbd.id, x)]) + vbd.Vbd.backend + ) ; [ VM_hook_script (id, Xenops_hooks.VM_post_destroy, reason) ; VM_hook_script From 0425b0b182d739bd4fce78a36ae0b26338e2a17b Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 15 Oct 2024 12:41:07 +0100 Subject: [PATCH 004/121] CP-50475: parallelize device ops during VM lifecycle ops Operations on different devices should be independent and therefore can be parallelized. This both means parallelizing operations on different device types and on devices for the same type. An atom to serialize action has been introduced because the operation regarding a single device must be kept serialized. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/lib/xenops_server.ml | 167 ++++++++++++++++++----------- ocaml/xenopsd/lib/xenops_task.ml | 4 +- ocaml/xenopsd/lib/xenops_utils.ml | 3 +- 3 files changed, 109 insertions(+), 65 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index d5e3da29433..579ce5d6f05 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -162,6 +162,7 @@ type atomic = | VM_rename of (Vm.id * Vm.id * rename_when) | VM_import_metadata of (Vm.id * Metadata.t) | Parallel of Vm.id * string * atomic list + | Serial of Vm.id * string * atomic list | Best_effort of atomic [@@deriving rpcty] @@ -271,6 +272,9 @@ let rec name_of_atomic = function | Parallel (_, _, atomics) -> Printf.sprintf "Parallel (%s)" (String.concat " | " (List.map name_of_atomic atomics)) + | Serial (_, _, atomics) -> + Printf.sprintf "Serial (%s)" + (String.concat " & " (List.map name_of_atomic atomics)) | Best_effort atomic -> Printf.sprintf "Best_effort (%s)" (name_of_atomic atomic) @@ -1556,8 +1560,13 @@ let collect_into apply = function [] -> [] | [op] -> [op] | lst -> apply lst let parallel name ~id = collect_into (fun ls -> [Parallel (id, Printf.sprintf "%s VM=%s" name id, ls)]) +let serial name ~id = + collect_into (fun ls -> [Serial (id, Printf.sprintf "%s VM=%s" name id, ls)]) + let parallel_concat name ~id lst = parallel name ~id (List.concat lst) +let serial_concat name ~id lst = serial name ~id (List.concat lst) + let parallel_map name ~id lst f = parallel name ~id (List.concat_map f lst) let map_or_empty f x = Option.value ~default:[] (Option.map f x) @@ -1573,6 +1582,23 @@ let rec atomics_of_operation = function List.partition (is_nvidia_sriov vgpus) pcis in let no_sharept = List.exists is_no_sharept vgpus in + let plug_vbds typ vbds = + let pf = Printf.sprintf in + let name_multi = pf "VBDs.activate_epoch_and_plug %s" typ in + let name_one = pf "VBD.activate_epoch_and_plug %s" typ in + parallel_map name_multi ~id vbds (fun vbd -> + serial_concat name_one ~id + [ + [VBD_set_active (vbd.Vbd.id, true)] + ; map_or_empty + (fun x -> + [VBD_epoch_begin (vbd.Vbd.id, x, vbd.Vbd.persistent)] + ) + vbd.Vbd.backend + ; [VBD_plug vbd.Vbd.id] + ] + ) + in [ dequarantine_ops vgpus ; [ @@ -1581,35 +1607,35 @@ let rec atomics_of_operation = function ; VM_create (id, None, None, no_sharept) ; VM_build (id, force) ] - ; List.map - (fun vbd -> VBD_set_active (vbd.Vbd.id, true)) - (vbds_rw @ vbds_ro) - (* keeping behaviour of vbd_plug_order: rw vbds must be plugged before - ro vbds, see vbd_plug_sets *) - ; List.concat_map - (fun (ty, vbds) -> - parallel_map (Printf.sprintf "VBD.epoch_begin %s" ty) ~id vbds - (fun vbd -> - map_or_empty - (fun x -> - [VBD_epoch_begin (vbd.Vbd.id, x, vbd.Vbd.persistent)] - ) - vbd.Vbd.backend - ) - ) - [("RW", vbds_rw); ("RO", vbds_ro)] - (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) - ; parallel_map "VBD.plug RW" ~id vbds_rw (fun vbd -> [VBD_plug vbd.Vbd.id]) - ; parallel_map "VBD.plug RO" ~id vbds_ro (fun vbd -> [VBD_plug vbd.Vbd.id]) - ; List.map (fun vif -> VIF_set_active (vif.Vif.id, true)) vifs - ; List.map (fun vif -> VIF_plug vif.Vif.id) vifs - ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, true)) vgpus - ; List.map (fun pci -> PCI_plug (pci.Pci.id, false)) pcis_sriov + ; parallel_concat "Devices.plug (no qemu)" ~id + [ + (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) + serial_concat "VBDs.acticate_epoch_and_plug RW+RO" ~id + [plug_vbds "RW" vbds_rw; plug_vbds "RO" vbds_ro] + ; List.concat_map + (fun vif -> + serial "VIF.activate_and_plug" ~id + [VIF_set_active (vif.Vif.id, true); VIF_plug vif.Vif.id] + ) + vifs + ; serial_concat "VGPUs.activate & PCI.plug (SRIOV)" ~id + [ + parallel_map "VGPUs.activate" ~id vgpus (fun vgpu -> + [VGPU_set_active (vgpu.Vgpu.id, true)] + ) + ; parallel_map "PCIs.plug (SRIOV)" ~id pcis_sriov (fun pci -> + [PCI_plug (pci.Pci.id, false)] + ) + ] + ] ; [VM_create_device_model (id, false)] (* PCI and USB devices are hot-plugged into HVM guests via QEMU, so the following operations occur after creating the device models *) - ; List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other - ; List.map (fun vusb -> VUSB_plug vusb.Vusb.id) vusbs + ; parallel_concat "Devices.plug (qemu)" ~id + [ + List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other + ; List.map (fun vusb -> VUSB_plug vusb.Vusb.id) vusbs + ] (* At this point the domain is considered survivable. *) ; [VM_set_domain_action_request (id, None)] ] @@ -1622,7 +1648,9 @@ let rec atomics_of_operation = function [ map_or_empty (fun x -> [VM_shutdown_domain (id, PowerOff, x)]) timeout (* Before shutting down a VM, we need to unplug its VUSBs. *) - ; List.map (fun vusb -> VUSB_unplug vusb.Vusb.id) vusbs + ; parallel_map "VUSBs.unplug" ~id vusbs (fun vusb -> + [VUSB_unplug vusb.Vusb.id] + ) ; [ (* CA-315450: in a hard shutdown or snapshot revert, timeout=None and VM_shutdown_domain is not called. To avoid any interference, we @@ -1630,42 +1658,50 @@ let rec atomics_of_operation = function Best_effort (VM_pause id) ; VM_destroy_device_model id ] - ; parallel_map "VBD.unplug" ~id vbds (fun vbd -> - [VBD_unplug (vbd.Vbd.id, true)] - ) - ; List.map (fun vif -> VIF_unplug (vif.Vif.id, true)) vifs - ; List.map (fun pci -> PCI_unplug pci.Pci.id) pcis + ; parallel_concat "Devices.unplug" ~id + [ + List.map (fun vbd -> VBD_unplug (vbd.Vbd.id, true)) vbds + ; List.map (fun vif -> VIF_unplug (vif.Vif.id, true)) vifs + ; List.map (fun pci -> PCI_unplug pci.Pci.id) pcis + ] ; [VM_destroy id] ] |> List.concat | VM_restore_vifs id -> let vifs = VIF_DB.vifs id in - [ - List.map (fun vif -> VIF_set_active (vif.Vif.id, true)) vifs - ; List.map (fun vif -> VIF_plug vif.Vif.id) vifs - ] - |> List.concat + parallel_map "VIFs.activate_and_plug" ~id vifs (fun vif -> + serial "VIF.activate_and_plug" ~id + [VIF_set_active (vif.Vif.id, true); VIF_plug vif.Vif.id] + ) | VM_restore_devices (id, restore_vifs) -> let vbds_rw, vbds_ro = VBD_DB.vbds id |> vbd_plug_sets in let vgpus = VGPU_DB.vgpus id in let pcis = PCI_DB.pcis id |> pci_plug_order in let pcis_other = List.filter (is_not_nvidia_sriov vgpus) pcis in + let plug_vbds typ vbds = + let pf = Printf.sprintf in + let name_multi = pf "VBDs.activate_and_plug %s" typ in + let name_one = pf "VBD.activate_and_plug %s" typ in + parallel_map name_multi ~id vbds (fun vbd -> + serial name_one ~id + [VBD_set_active (vbd.Vbd.id, true); VBD_plug vbd.Vbd.id] + ) + in [ - List.map - (fun vbd -> VBD_set_active (vbd.Vbd.id, true)) - (vbds_rw @ vbds_ro) - ; (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) - parallel_map "VBD.plug RW" ~id vbds_rw (fun vbd -> [VBD_plug vbd.Vbd.id]) - ; parallel_map "VBD.plug RO" ~id vbds_ro (fun vbd -> [VBD_plug vbd.Vbd.id]) + (* rw vbds must be plugged before ro vbds, see vbd_plug_sets *) + plug_vbds "RW" vbds_rw + ; plug_vbds "RO" vbds_ro ; (if restore_vifs then atomics_of_operation (VM_restore_vifs id) else []) - ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, true)) vgpus - (* Nvidia SRIOV PCI devices have been already been plugged *) - ; [ - VM_create_device_model (id, true) - (* PCI and USB devices are hot-plugged into HVM guests via QEMU, so - the following operations occur after creating the device models *) - ] - ; List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other + ; (* Nvidia SRIOV PCI devices have been already been plugged *) + parallel_map "VGPUs.activate" ~id vgpus (fun vgpu -> + [VGPU_set_active (vgpu.Vgpu.id, true)] + ) + ; [VM_create_device_model (id, true)] + (* PCI and USB devices are hot-plugged into HVM guests via QEMU, so + the following operations occur after creating the device models *) + ; parallel_map "PCIs.plug" ~id pcis_other (fun pci -> + [PCI_plug (pci.Pci.id, true)] + ) ] |> List.concat | VM_poweroff (id, timeout) -> @@ -1678,17 +1714,24 @@ let rec atomics_of_operation = function else Xenops_hooks.reason__clean_shutdown in - [ - [VM_hook_script (id, Xenops_hooks.VM_pre_destroy, reason)] - ; atomics_of_operation (VM_shutdown (id, timeout)) - ; parallel_map "VBD.epoch_end" ~id vbds (fun vbd -> + let unplug_vbd vbd = + serial_concat "VBD.epoch_and_deactivate" ~id + [ map_or_empty (fun x -> [VBD_epoch_end (vbd.Vbd.id, x)]) vbd.Vbd.backend - ) - ; List.map (fun vbd -> VBD_set_active (vbd.Vbd.id, false)) vbds - ; List.map (fun vif -> VIF_set_active (vif.Vif.id, false)) vifs - ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, false)) vgpus + ; [VBD_set_active (vbd.Vbd.id, false)] + ] + in + [ + [VM_hook_script (id, Xenops_hooks.VM_pre_destroy, reason)] + ; atomics_of_operation (VM_shutdown (id, timeout)) + ; parallel_concat "Devices.deactivate" ~id + [ + List.concat_map unplug_vbd vbds + ; List.map (fun vif -> VIF_set_active (vif.Vif.id, false)) vifs + ; List.map (fun vgpu -> VGPU_set_active (vgpu.Vgpu.id, false)) vgpus + ] ; [VM_hook_script (id, Xenops_hooks.VM_post_destroy, reason)] ] |> List.concat @@ -1825,7 +1868,7 @@ let rec perform_atomic ~progress_callback ?subtask:_ ?result (op : atomic) (Xenops_task.id_of_handle t) (List.length atoms) description in - let with_tracing = parallel_id_with_tracing parallel_id t in + let with_tracing = id_with_tracing parallel_id t in debug "begin_%s" parallel_id ; let task_list = queue_atomics_and_wait ~progress_callback ~max_parallel_atoms:10 @@ -1869,6 +1912,8 @@ let rec perform_atomic ~progress_callback ?subtask:_ ?result (op : atomic) List.iter (fun err -> match err with None -> () | Some e -> raise e) errors + | Serial (_, _, atoms) -> + List.iter (Fun.flip (perform_atomic ~progress_callback) t) atoms | VIF_plug id -> debug "VIF.plug %s" (VIF_DB.string_of_id id) ; B.VIF.plug t (VIF_DB.vm_of id) (VIF_DB.read_exn id) ; @@ -2468,7 +2513,7 @@ and trigger_cleanup_after_failure_atom op t = immediate_operation dbg id (VM_check_state id) | Best_effort op -> trigger_cleanup_after_failure_atom op t - | Parallel (_id, _description, ops) -> + | Parallel (_id, _description, ops) | Serial (_id, _description, ops) -> List.iter (fun op -> trigger_cleanup_after_failure_atom op t) ops | VM_rename (id1, id2, _) -> immediate_operation dbg id1 (VM_check_state id1) ; diff --git a/ocaml/xenopsd/lib/xenops_task.ml b/ocaml/xenopsd/lib/xenops_task.ml index 3fcaffefec0..23d88beef18 100644 --- a/ocaml/xenopsd/lib/xenops_task.ml +++ b/ocaml/xenopsd/lib/xenops_task.ml @@ -70,8 +70,8 @@ let is_task task = function | _ -> None -let parallel_id_with_tracing parallel_id t = - Debug_info.make ~log:parallel_id ~tracing:(Xenops_task.tracing t) +let id_with_tracing id t = + Debug_info.make ~log:id ~tracing:(Xenops_task.tracing t) |> Debug_info.to_string let dbg_with_traceparent_of_task t = diff --git a/ocaml/xenopsd/lib/xenops_utils.ml b/ocaml/xenopsd/lib/xenops_utils.ml index d948f9865d9..481ad1b6101 100644 --- a/ocaml/xenopsd/lib/xenops_utils.ml +++ b/ocaml/xenopsd/lib/xenops_utils.ml @@ -620,8 +620,7 @@ let chunks size lst = [op] :: xs :: xss ) [] lst - |> List.map (fun xs -> List.rev xs) - |> List.rev + |> List.rev_map (fun xs -> List.rev xs) let really_kill pid = try Unixext.kill_and_wait pid From 7ad5f9517294b53a440f15f67abd2bf0802fbc53 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 17 Oct 2024 09:50:48 +0100 Subject: [PATCH 005/121] xapi_stdext_unix/test: Fix intermittent systemd cram test failure Instead of sleeping and hoping for the best, wait for the background job to finish. Signed-off-by: Andrii Sultanov --- .../libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t index 28790e8a32d..bfa73c84c63 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t @@ -10,8 +10,8 @@ $ ./test_systemd.exe --server & @systemd.socket READY=1 - $ sleep 1 $ ./test_systemd.exe --notify + $ wait == Use socket files $ export TMPDIR=${TMPDIR:-/tmp} @@ -22,6 +22,7 @@ $ sleep 1 $ test -S "$NOTIFY_SOCKET" $ ./test_systemd.exe --notify + $ wait == Currently not run tests because of insufficient permissions == in cram to be manipulating this file From e83ba5eda7011bfc44a702e70f73f15c9607e4b0 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Fri, 18 Oct 2024 16:22:14 +0100 Subject: [PATCH 006/121] Fix a build warning with GCC 12.3.0 GCC correctly reports "pointer targets differ in signedness". Signed-off-by: Ross Lagerwall --- .../libs/xapi-stdext/lib/xapi-stdext-zerocheck/zerocheck_stub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-zerocheck/zerocheck_stub.c b/ocaml/libs/xapi-stdext/lib/xapi-stdext-zerocheck/zerocheck_stub.c index 776ef854849..4606cf95a4e 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-zerocheck/zerocheck_stub.c +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-zerocheck/zerocheck_stub.c @@ -31,7 +31,7 @@ value is_all_zeros(value string, value length) for (i = len / 4; i > 0; i--) if (*p++ != 0) goto notallzero; - s = (unsigned char *) p; + s = (const char *) p; for (i = 0; i < len % 4; i++) if (s[i] != 0) goto notallzero; From e3f92131f8c689378a375d5e01d2a26ef395d339 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Fri, 18 Oct 2024 16:51:23 +0100 Subject: [PATCH 007/121] Remove use of deprecated syslog Standard* type Newer systemd warns that the "syslog" StandardOutput/StandardError type is deprecated and automatically uses the journal instead. Fix this by removing the explicit setting of StandardOutput/StandardError. Instead, the service will use the default values configured in systemd (DefaultStandardOutput/DefaultStandardError) which for a XenServer system will result in the output going to rsyslog. Signed-off-by: Ross Lagerwall --- ocaml/forkexecd/lib/fe_systemctl.ml | 2 -- scripts/varstored-guard.service | 2 -- scripts/xapi-nbd.service | 1 - 3 files changed, 5 deletions(-) diff --git a/ocaml/forkexecd/lib/fe_systemctl.ml b/ocaml/forkexecd/lib/fe_systemctl.ml index cd76bede41a..b36ee6674ae 100644 --- a/ocaml/forkexecd/lib/fe_systemctl.ml +++ b/ocaml/forkexecd/lib/fe_systemctl.ml @@ -60,8 +60,6 @@ let start_transient ?(env = Array.of_list default_env) ?(properties = []) ) ; ("SyslogIdentifier", syslog_key) ; ("SyslogLevel", "debug") - ; ("StandardOutput", "syslog") - ; ("StandardError", "inherit") ; ("StartLimitInterval", "0") (* no rate-limit, for bootstorms *) ; ("ExecStart", String.concat " " (cmd :: List.map Filename.quote args)) ; ("Type", Type.to_string exec_ty) diff --git a/scripts/varstored-guard.service b/scripts/varstored-guard.service index c9d1b9bd939..d7cb838336f 100644 --- a/scripts/varstored-guard.service +++ b/scripts/varstored-guard.service @@ -9,8 +9,6 @@ Wants=message-switch.service syslog.target Type=simple Environment=OCAMLRUNPARAM=b ExecStart=/usr/sbin/varstored-guard -# Needed to ensure exceptions are logged when the program fails: -StandardError=syslog LimitNOFILE=4096 # restart but fail if more than 5 failures in 30s Restart=on-failure diff --git a/scripts/xapi-nbd.service b/scripts/xapi-nbd.service index bca7b551a14..fcbacd7cb37 100644 --- a/scripts/xapi-nbd.service +++ b/scripts/xapi-nbd.service @@ -11,7 +11,6 @@ Environment=OCAMLRUNPARAM=b # and the PathExists in xapi-nbd.path: any change must be made in all three files. ExecStart=/usr/sbin/xapi-nbd --certfile=/etc/xensource/xapi-ssl.pem StandardOutput=null -StandardError=syslog # restart but fail if more than 5 failures in 2s Restart=on-failure StartLimitBurst=5 From 1bdb22a9390e5be69c65b78bdde1d4c42f804725 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 21 Oct 2024 16:16:47 +0100 Subject: [PATCH 008/121] CA-400860: rrdp-netdev - drop xenctrl, use xenstore to get UUIDs from domids instead Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdp-netdev/dune | 2 +- ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml | 27 ++++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-netdev/dune b/ocaml/xcp-rrdd/bin/rrdp-netdev/dune index c5acc80a8be..55c31d4d9f7 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-netdev/dune +++ b/ocaml/xcp-rrdd/bin/rrdp-netdev/dune @@ -3,6 +3,7 @@ (name rrdp_netdev) (libraries astring + ezxenstore.core integers netlink rrdd-plugin @@ -13,7 +14,6 @@ xapi-log xapi-rrd xapi-stdext-std - xenctrl ) ) diff --git a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml index 299bb9a97df..c7dab55ac94 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml @@ -13,11 +13,14 @@ *) open Rrdd_plugin +open Ezxenstore_core module D = Debug.Make (struct let name = "xcp-rrdp-netdev" end) module Process = Rrdd_plugin.Process (struct let name = "xcp-rrdd-netdev" end) +let fail = Printf.ksprintf failwith + type iface_stats = { tx_bytes: int64 (** bytes emitted *) ; tx_pkts: int64 (** packets emitted *) @@ -132,18 +135,16 @@ let transform_taps devs = newdevnames let generate_netdev_dss () = - let _, doms, _ = - Xenctrl.with_intf (fun xc -> Xenctrl_lib.domain_snapshot xc) - in - - let uuid_of_domid domains domid = - let _, uuid, _ = - try List.find (fun (_, _, domid') -> domid = domid') domains - with Not_found -> - failwith - (Printf.sprintf "Failed to find uuid corresponding to domid: %d" domid) - in - uuid + let uuid_of_domid domid = + try + Xenstore.with_xs (fun xs -> + let vm = xs.Xenstore.Xs.getdomainpath domid ^ "/vm" in + let vm_dir = xs.Xenstore.Xs.read vm in + xs.Xenstore.Xs.read (vm_dir ^ "/uuid") + ) + with e -> + fail "Failed to find uuid corresponding to domid: %d (%s)" domid + (Printexc.to_string e) in let dbg = "rrdp_netdev" in @@ -198,7 +199,7 @@ let generate_netdev_dss () = let vif_name = Printf.sprintf "vif_%d" d2 in (* Note: rx and tx are the wrong way round because from dom0 we see the vms backwards *) - let uuid = uuid_of_domid doms d1 in + let uuid = uuid_of_domid d1 in ( Rrd.VM uuid , Ds.ds_make ~name:(vif_name ^ "_tx") ~units:"B/s" ~description: From d157db0aecbec78bc7c396aab0158f4de287a68f Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Thu, 17 Oct 2024 06:04:59 +0000 Subject: [PATCH 009/121] CP-51870: Delegate restarting systemd services order to systemd services configuration Systemd services has good support for the services depends and orders in the Unit file, that is the place the restart order should be stated. However, the command `systemd stop foo bar ...` will override the order in the Unit file. As the number of the services grow up, it is really hard to manage the order in the systemd command In order to resolve the issue, `toolstack.target` is created to group and manage the toolstack services. - toolstack.target: `Wants: foo.service` will start foo.service when `systemctl start toolstack.target` - foo.service: `PartOf: toolstack.target` will restart/stop foo.service when `systemctl stop/restart toolstack.target` Note: Above two does not have to match, eg. if we do not want to start a service during `systemctl start toolstack.target`, we can remove it from the first list. - Following xenopsd services are no longer valid, just got removed * xenopsd * xenopsd-xenlight * xenopsd-simulator * xenopsd-libvirt Signed-off-by: Lin Liu --- python3/perfmon/perfmon.service | 1 + scripts/Makefile | 3 +++ scripts/toolstack.target | 27 ++++++++++++++++++++++++++ scripts/varstored-guard.service | 3 ++- scripts/xapi.service | 1 + scripts/xcp-networkd.service | 1 + scripts/xcp-rrdd-cpu.service | 1 + scripts/xcp-rrdd-dcmi.service | 1 + scripts/xcp-rrdd-iostat.service | 1 + scripts/xcp-rrdd-netdev.service | 1 + scripts/xcp-rrdd-squeezed.service | 1 + scripts/xcp-rrdd-xenpm.service | 1 + scripts/xcp-rrdd.service | 1 + scripts/xe-toolstack-restart | 32 ++++++++++++++----------------- 14 files changed, 56 insertions(+), 19 deletions(-) create mode 100644 scripts/toolstack.target diff --git a/python3/perfmon/perfmon.service b/python3/perfmon/perfmon.service index 1afa0cfc237..683039923fb 100644 --- a/python3/perfmon/perfmon.service +++ b/python3/perfmon/perfmon.service @@ -2,6 +2,7 @@ Description=Performance monitoring/alarm generation daemon After=xapi.service Wants=xapi.service +PartOf=toolstack.target [Service] EnvironmentFile=-/etc/sysconfig/perfmon diff --git a/scripts/Makefile b/scripts/Makefile index 7583c80d624..503e7838546 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -152,3 +152,6 @@ install: $(IDATA) mail-languages/ja-JP.json $(DESTDIR)/etc/xapi.d/mail-languages # uefi mkdir -p $(DESTDIR)/etc/xapi.d/efi-clone + +# toolstack.target to manage toolstack services as a group + $(IDATA) toolstack.target $(DESTDIR)/usr/lib/systemd/system/toolstack.target diff --git a/scripts/toolstack.target b/scripts/toolstack.target new file mode 100644 index 00000000000..c4019a4d232 --- /dev/null +++ b/scripts/toolstack.target @@ -0,0 +1,27 @@ +[Unit] +Description=toolstack Target to manage toolstack service restart +# wants to start following services when run `systemctl start toolstack.target` +# Note: `Wants` is used here instead of `Requires`, `Requires` will stop/restart +# whole toolstack.target on any service stop/restart +Wants=xapi.service +Wants=message-switch.service +Wants=forkexecd.service +Wants=perfmon.service +Wants=v6d.service +Wants=xcp-rrdd-iostat.service +Wants=xcp-rrdd-squeezed.service +Wants=xcp-rrdd-netdev.service +Wants=xcp-rrdd-dcmi.service +Wants=xcp-rrdd-cpu.service +Wants=xcp-rrdd-xenpm.service +Wants=xcp-rrdd-gpumon.service +Wants=xcp-rrdd.service +Wants=xcp-networkd.service +Wants=xenopsd-xc.service +Wants=squeezed.service +Wants=xapi-storage-script.service +Wants=xapi-clusterd.service +Wants=varstored-guard.service + +[Install] +WantedBy=multi-user.target diff --git a/scripts/varstored-guard.service b/scripts/varstored-guard.service index c9d1b9bd939..819b86c4c58 100644 --- a/scripts/varstored-guard.service +++ b/scripts/varstored-guard.service @@ -2,8 +2,9 @@ Description=Varstored XAPI socket deprivileging daemon Documentation=man:varstored-guard(1) After=message-switch.service syslog.target -Before=xapi-domains.service xenopsd.service +Before=xapi-domains.service xenopsd-xc.service Wants=message-switch.service syslog.target +PartOf=toolstack.target [Service] Type=simple diff --git a/scripts/xapi.service b/scripts/xapi.service index a4c825991dd..d4cb858c93b 100644 --- a/scripts/xapi.service +++ b/scripts/xapi.service @@ -16,6 +16,7 @@ After=xcp-rrdd.service After=xenopsd-xc.service After=xenstored.service After=stunnel@xapi.service +PartOf=toolstack.target Conflicts=shutdown.target diff --git a/scripts/xcp-networkd.service b/scripts/xcp-networkd.service index eb49512cf24..ade36bb8e5b 100644 --- a/scripts/xcp-networkd.service +++ b/scripts/xcp-networkd.service @@ -3,6 +3,7 @@ Description=XCP networking daemon Documentation=man:xcp-networkd(1) After=forkexecd.service message-switch.service syslog.target Wants=forkexecd.service message-switch.service syslog.target +PartOf=toolstack.target [Service] Type=notify diff --git a/scripts/xcp-rrdd-cpu.service b/scripts/xcp-rrdd-cpu.service index 310828dda94..b0039ca0a44 100644 --- a/scripts/xcp-rrdd-cpu.service +++ b/scripts/xcp-rrdd-cpu.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon CPU plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-cpu diff --git a/scripts/xcp-rrdd-dcmi.service b/scripts/xcp-rrdd-dcmi.service index 64bab4f25b3..2a2f22ec249 100644 --- a/scripts/xcp-rrdd-dcmi.service +++ b/scripts/xcp-rrdd-dcmi.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon IPMI DCMI power plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-dcmi diff --git a/scripts/xcp-rrdd-iostat.service b/scripts/xcp-rrdd-iostat.service index ce724477367..791cfd279ae 100644 --- a/scripts/xcp-rrdd-iostat.service +++ b/scripts/xcp-rrdd-iostat.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon iostat plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-iostat diff --git a/scripts/xcp-rrdd-netdev.service b/scripts/xcp-rrdd-netdev.service index b961cc9d15c..047b54bdf7b 100644 --- a/scripts/xcp-rrdd-netdev.service +++ b/scripts/xcp-rrdd-netdev.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon network plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-netdev diff --git a/scripts/xcp-rrdd-squeezed.service b/scripts/xcp-rrdd-squeezed.service index bb33fca801c..673663ba04e 100644 --- a/scripts/xcp-rrdd-squeezed.service +++ b/scripts/xcp-rrdd-squeezed.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon squeezed plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-squeezed diff --git a/scripts/xcp-rrdd-xenpm.service b/scripts/xcp-rrdd-xenpm.service index 092bb4d4bb9..56345eb1d4a 100644 --- a/scripts/xcp-rrdd-xenpm.service +++ b/scripts/xcp-rrdd-xenpm.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon xenpm plugin After=xcp-rrdd.service Requires=xcp-rrdd.service +PartOf=toolstack.target [Service] ExecStart=/opt/xensource/libexec/xcp-rrdd-plugins/xcp-rrdd-xenpm diff --git a/scripts/xcp-rrdd.service b/scripts/xcp-rrdd.service index 81e4d78df68..92d1292bef1 100644 --- a/scripts/xcp-rrdd.service +++ b/scripts/xcp-rrdd.service @@ -2,6 +2,7 @@ Description=XCP RRD daemon After=forkexecd.service xenstored.service message-switch.service syslog.target Wants=forkexecd.service xenstored.service message-switch.service syslog.target +PartOf=toolstack.target [Service] Type=notify diff --git a/scripts/xe-toolstack-restart b/scripts/xe-toolstack-restart index 25856dc67ad..d377ae7acbd 100755 --- a/scripts/xe-toolstack-restart +++ b/scripts/xe-toolstack-restart @@ -27,11 +27,6 @@ echo "Executing $FILENAME" POOLCONF=`cat @ETCXENDIR@/pool.conf` if [ $POOLCONF == "master" ]; then MPATHALERT="mpathalert"; else MPATHALERT=""; fi -SERVICES="message-switch perfmon v6d xenopsd xenopsd-xc xenopsd-xenlight - xenopsd-simulator xenopsd-libvirt xcp-rrdd-iostat xcp-rrdd-squeezed - xcp-rrdd-netdev xcp-rrdd-cpu - xcp-rrdd-xenpm xcp-rrdd-gpumon xcp-rrdd xcp-networkd squeezed forkexecd - $MPATHALERT xapi-storage-script xapi-clusterd varstored-guard" tmp_file=$(mktemp --suffix="xe-toolstack-restart") systemctl stop stunnel@xapi > $tmp_file 2>&1 @@ -43,22 +38,23 @@ if [[ $kill_stunnel_exit_code != 0 ]]; then fi rm -f $tmp_file -TO_RESTART="" -for svc in $SERVICES ; do - # restart services only if systemd said they were enabled - systemctl is-enabled $svc >/dev/null 2>&1 +set -e - if [ $? -eq 0 ] ; then - TO_RESTART="$svc $TO_RESTART" - fi -done -systemctl stop xapi -systemctl stop ${TO_RESTART} +systemctl restart $MPATHALERT toolstack.target -set -e +# Check the status of toolstack services +for service in $(systemctl list-dependencies --plain --no-pager toolstack.target); do + # During system bootup, xcp-rrdd-dcmi.service often fail as + # `ipmitool dcmi discover` discover nothing, just ignore it for now + if [ "$service" == "xcp-rrdd-dcmi.service" ]; then + continue + fi -systemctl start ${TO_RESTART} -systemctl start xapi + if ! systemctl is-active --quiet "$service"; then + echo "$service failed to restart, $(systemctl status $service)" + exit 1 + fi +done rm -f $LOCKFILE echo "done." From f0b632236e5589a59e02def75531e51d5c8e32fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 26 Apr 2024 15:14:27 +0100 Subject: [PATCH 010/121] rrdd: avoid constructing intermediate lists, use Seq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables extending the type without causing performance issues, and should reduce the work for the garbage collector. Signed-off-by: Edwin Török --- ocaml/libs/xapi-rrd/lib/rrd.ml | 2 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 14 ++++---------- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 15 ++++++++------- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli | 2 +- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 8 ++++++-- ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml | 2 +- 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 0b67cc9efc5..ade27f677fd 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -459,7 +459,7 @@ let ds_update rrd timestamp values transforms new_domid = (** Update the rrd with named values rather than just an ordered array *) let ds_update_named rrd timestamp ~new_domid valuesandtransforms = let valuesandtransforms = - valuesandtransforms |> List.to_seq |> StringMap.of_seq + valuesandtransforms |> StringMap.of_seq in let get_value_and_transform {ds_name; _} = Option.value ~default:(VT_Unknown, Identity) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 34a44e92dfe..89ccd03f943 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -97,7 +97,7 @@ let update_rrds timestamp dss uuid_domids paused_vms = in OwnerMap.update owner merge all in - let dss = List.fold_left consolidate OwnerMap.empty dss in + let dss = Seq.fold_left consolidate OwnerMap.empty dss in (* the first parameter and ds.ds_name are equivalent *) let to_named_updates (_, ds) = @@ -123,9 +123,7 @@ let update_rrds timestamp dss uuid_domids paused_vms = unreliable" by_how_much ; let process_vm vm_uuid dss = - let named_updates = - StringMap.to_seq dss |> Seq.map to_named_updates |> List.of_seq - in + let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in match StringMap.find_opt vm_uuid uuid_domids with @@ -155,9 +153,7 @@ let update_rrds timestamp dss uuid_domids paused_vms = __FUNCTION__ vm_uuid in let process_sr sr_uuid dss = - let named_updates = - StringMap.to_seq dss |> Seq.map to_named_updates |> List.of_seq - in + let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in try (* First, potentially update the rrd with any new default dss *) @@ -175,9 +171,7 @@ let update_rrds timestamp dss uuid_domids paused_vms = with _ -> log_backtrace () in let process_host dss = - let named_updates = - StringMap.to_seq dss |> Seq.map to_named_updates |> List.of_seq - in + let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in match !host_rrd with diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index 9662af66611..bd625bdae30 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -764,22 +764,23 @@ module Plugin = struct ) (* Read, parse, and combine metrics from all registered plugins. *) - let read_stats () : (Rrd.ds_owner * Ds.ds) list = + let read_stats () : (Rrd.ds_owner * Ds.ds) Seq.t = let plugins = with_lock registered_m (fun _ -> List.of_seq (Hashtbl.to_seq registered) ) in - let process_plugin acc (uid, plugin) = + let process_plugin (uid, plugin) = try let payload = get_payload ~uid plugin in - List.rev_append payload.Rrd_protocol.datasources acc - with _ -> acc + List.to_seq payload.Rrd_protocol.datasources + with _ -> Seq.empty in List.iter decr_skip_count plugins ; plugins - |> List.filter (Fun.negate skip) - |> List.fold_left process_plugin [] + |> List.to_seq + |> Seq.filter (Fun.negate skip) + |> Seq.flat_map process_plugin end module Local = Make (struct @@ -805,7 +806,7 @@ module Plugin = struct let deregister = Local.deregister (* Read, parse, and combine metrics from all registered plugins. *) - let read_stats () : (Rrd.ds_owner * Ds.ds) list = Local.read_stats () + let read_stats () : (Rrd.ds_owner * Ds.ds) Seq.t = Local.read_stats () end module HA = struct diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli index 8fbe6f41992..2911895a4a3 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli @@ -69,7 +69,7 @@ module Plugin : sig val next_reading : string -> float - val read_stats : unit -> (Rrd.ds_owner * Ds.ds) list + val read_stats : unit -> (Rrd.ds_owner * Ds.ds) Seq.t module Local : sig val register : diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 48da4c60ae7..f48a7a62171 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -508,9 +508,13 @@ let do_monitor_write xc writers = let timestamp, domains, my_paused_vms = domain_snapshot xc in let tagged_dom0_stats = generate_all_dom0_stats xc timestamp domains in write_dom0_stats writers (Int64.of_float timestamp) tagged_dom0_stats ; - let dom0_stats = List.concat_map snd tagged_dom0_stats in + let dom0_stats = + tagged_dom0_stats + |> List.to_seq + |> Seq.flat_map (fun l -> l |> snd |> List.to_seq) + in let plugins_stats = Rrdd_server.Plugin.read_stats () in - let stats = List.rev_append plugins_stats dom0_stats in + let stats = Seq.append plugins_stats dom0_stats in Rrdd_stats.print_snapshot () ; let uuid_domids = List.map (fun (_, u, i) -> (u, i)) domains in Rrdd_monitor.update_rrds timestamp stats uuid_domids my_paused_vms ; diff --git a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml index 8fe6a1c551c..27d64870fe4 100644 --- a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml @@ -53,7 +53,7 @@ let update_rrds_test ~dss ~uuid_domids ~paused_vms ~expected_vm_rrds ~expected_sr_rrds ~expected_host_dss = let test () = reset_rrdd_shared_state () ; - Rrdd_monitor.update_rrds 12345.0 dss uuid_domids paused_vms ; + Rrdd_monitor.update_rrds 12345.0 (List.to_seq dss) uuid_domids paused_vms ; check_datasources "VM" (Some Rrdd_shared.vm_rrds) expected_vm_rrds ; check_datasources "SR" (Some Rrdd_shared.sr_rrds) expected_sr_rrds ; check_datasources "Host" (host_rrds !Rrdd_shared.host_rrd) expected_host_dss From 8b2b2d249c9f8b8a399b8c067fbcd13a6a037668 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Oct 2024 13:41:51 +0100 Subject: [PATCH 011/121] CA-391651 - rrd: Remove deprecated member of rra struct Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index ade27f677fd..7189b48753f 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -161,8 +161,6 @@ type rra = { ; rra_data: Fring.t array (** Stored data, one ring per datasource *) ; rra_cdps: cdp_prep array (** scratch area for consolidated datapoint preparation *) - ; mutable rra_updatehook: (rrd -> int -> unit) option - (** Hook that gets called when an update happens *) } (** The container for the DSs and RRAs. Also specifies the period between pdps *) @@ -185,7 +183,6 @@ let copy_rra x = ; rra_xff= x.rra_xff ; rra_data= Array.map Fring.copy x.rra_data ; rra_cdps= Array.map copy_cdp_prep x.rra_cdps - ; rra_updatehook= x.rra_updatehook } let copy_ds x = @@ -322,7 +319,6 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = ) rra.rra_cdps ; do_cfs rra new_start_pdp_offset pdps ; - match rra.rra_updatehook with None -> () | Some f -> f rrd rra_step_cnt ) in Array.iter updatefn rrd.rrd_rras @@ -486,7 +482,6 @@ let rra_create cf row_cnt pdp_cnt xff = ; rra_cdps= [||] (* defer creation of the data until we know how many dss we're storing *) - ; rra_updatehook= None (* DEPRECATED *) } let ds_create name ty ?(min = neg_infinity) ?(max = infinity) ?(mrhb = infinity) @@ -784,7 +779,6 @@ let from_xml input = ; rra_xff= float_of_string xff ; rra_data= database ; rra_cdps= Array.of_list cdps - ; rra_updatehook= None } ) i From cacf52a5101ddae7d9940e8385ae415b8327a4b0 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 11:25:32 +0100 Subject: [PATCH 012/121] CA-391651: Make timestamps of data collectors in xcp-rrdd independent Instead of getting one timestamp for all collectors, get them closer to the actual measurement time. Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index f48a7a62171..c0ed92d05fc 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -466,7 +466,6 @@ let domain_snapshot xc = let domains = Xenctrl.domain_getinfolist xc 0 |> List.filter_map metadata_of_domain in - let timestamp = Unix.gettimeofday () in let domain_paused (d, uuid, _) = if d.Xenctrl.paused then Some uuid else None in @@ -474,7 +473,7 @@ let domain_snapshot xc = let domids = List.map (fun (_, _, i) -> i) domains |> IntSet.of_list in let domains_only k v = Option.map (Fun.const v) (IntSet.find_opt k domids) in Hashtbl.filter_map_inplace domains_only Rrdd_shared.memory_targets ; - (timestamp, domains, paused_uuids) + (domains, paused_uuids) let dom0_stat_generators = [ @@ -484,13 +483,16 @@ let dom0_stat_generators = ; ("cache", fun _ timestamp _ -> dss_cache timestamp) ] -let generate_all_dom0_stats xc timestamp domains = +let generate_all_dom0_stats xc domains = let handle_generator (name, generator) = - (name, handle_exn name (fun _ -> generator xc timestamp domains) []) + let timestamp = Unix.gettimeofday () in + ( name + , (timestamp, handle_exn name (fun _ -> generator xc timestamp domains) []) + ) in List.map handle_generator dom0_stat_generators -let write_dom0_stats writers timestamp tagged_dss = +let write_dom0_stats writers tagged_dss = let write_dss (name, writer) = match List.assoc_opt name tagged_dss with | None -> @@ -498,20 +500,23 @@ let write_dom0_stats writers timestamp tagged_dss = "Could not write stats for \"%s\": no stats were associated with \ this name" name - | Some dss -> + | Some (timestamp, dss) -> + let timestamp = Int64.of_float timestamp in writer.Rrd_writer.write_payload {timestamp; datasources= dss} in List.iter write_dss writers let do_monitor_write xc writers = Rrdd_libs.Stats.time_this "monitor" (fun _ -> - let timestamp, domains, my_paused_vms = domain_snapshot xc in - let tagged_dom0_stats = generate_all_dom0_stats xc timestamp domains in - write_dom0_stats writers (Int64.of_float timestamp) tagged_dom0_stats ; + let domains, my_paused_vms = domain_snapshot xc in + let tagged_dom0_stats = generate_all_dom0_stats xc domains in + write_dom0_stats writers tagged_dom0_stats ; let dom0_stats = tagged_dom0_stats |> List.to_seq - |> Seq.flat_map (fun l -> l |> snd |> List.to_seq) + |> Seq.flat_map (fun (name, (timestamp, dss)) -> + (timestamp, List.to_seq dss) + ) in let plugins_stats = Rrdd_server.Plugin.read_stats () in let stats = Seq.append plugins_stats dom0_stats in From a7bc62d321492ef8ff326196907c3eed5bde467f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 11:44:21 +0100 Subject: [PATCH 013/121] CA-391651: rrdd_server - read plugins' timestamps, don't just ignore them Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 8 +++++--- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index bd625bdae30..2b2a6cca004 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -764,7 +764,7 @@ module Plugin = struct ) (* Read, parse, and combine metrics from all registered plugins. *) - let read_stats () : (Rrd.ds_owner * Ds.ds) Seq.t = + let read_stats () = let plugins = with_lock registered_m (fun _ -> List.of_seq (Hashtbl.to_seq registered) @@ -773,7 +773,9 @@ module Plugin = struct let process_plugin (uid, plugin) = try let payload = get_payload ~uid plugin in - List.to_seq payload.Rrd_protocol.datasources + let timestamp = payload.Rrd_protocol.timestamp |> Int64.to_float in + let dss = List.to_seq payload.Rrd_protocol.datasources in + (timestamp, dss) with _ -> Seq.empty in List.iter decr_skip_count plugins ; @@ -806,7 +808,7 @@ module Plugin = struct let deregister = Local.deregister (* Read, parse, and combine metrics from all registered plugins. *) - let read_stats () : (Rrd.ds_owner * Ds.ds) Seq.t = Local.read_stats () + let read_stats () = Local.read_stats () end module HA = struct diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli index 2911895a4a3..d32540390c0 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli @@ -69,7 +69,7 @@ module Plugin : sig val next_reading : string -> float - val read_stats : unit -> (Rrd.ds_owner * Ds.ds) Seq.t + val read_stats : unit -> float * (Rrd.ds_owner * Ds.ds) Seq.t module Local : sig val register : From fdcb38651fa35706c24c11f7eda408a6184f492e Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 15:12:51 +0100 Subject: [PATCH 014/121] CA-391651: Propagate the timestamp inside RRD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of timestamps taken at read time in xcp-rrdd, propagate timestamps taken by plugins (or collectors in xcp-rrdd itself) and use these when updating values. Also process datasources without flattening them all into one list. This allows to process datasources with the same timestamp (provided by the plugin or dom0 collector in xcp_rrdd) at once. Co-authored-by: Edwin Török Signed-off-by: Edwin Török Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 49 ++++---- ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml | 4 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 111 ++++++++++--------- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 58 +++++----- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli | 2 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml | 9 +- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 13 ++- 7 files changed, 130 insertions(+), 116 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 7189b48753f..845f49aebc7 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -318,7 +318,7 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = cdp.cdp_value <- cdp_init ) rra.rra_cdps ; - do_cfs rra new_start_pdp_offset pdps ; + do_cfs rra new_start_pdp_offset pdps ) in Array.iter updatefn rrd.rrd_rras @@ -363,7 +363,12 @@ let process_ds_value ds value interval new_domid = rate let ds_update rrd timestamp values transforms new_domid = - (* Interval is the time between this and the last update *) + (* Interval is the time between this and the last update + + Currently ds_update is called with datasources that belong to a single + plugin, correspondingly they all have the same timestamp. + Further refactoring is needed if timestamps per measurement are to be + introduced. *) let interval = timestamp -. rrd.last_updated in (* Work around the clock going backwards *) let interval = if interval < 0. then 5. else interval in @@ -452,11 +457,10 @@ let ds_update rrd timestamp values transforms new_domid = v2s ) -(** Update the rrd with named values rather than just an ordered array *) -let ds_update_named rrd timestamp ~new_domid valuesandtransforms = - let valuesandtransforms = - valuesandtransforms |> StringMap.of_seq - in +(** Update the rrd with named values rather than just an ordered array + Must be called with datasources coming from a single plugin, with + [timestamp] and [uid] representing it *) +let ds_update_named rrd ~new_domid timestamp valuesandtransforms = let get_value_and_transform {ds_name; _} = Option.value ~default:(VT_Unknown, Identity) (StringMap.find_opt ds_name valuesandtransforms) @@ -497,8 +501,7 @@ let ds_create name ty ?(min = neg_infinity) ?(max = infinity) ?(mrhb = infinity) ; ds_unknown_sec= 0.0 } -let rrd_create dss rras timestep inittime = - (* Use the standard update routines to initialise everything to correct values *) +let rrd_create dss rras timestep timestamp = let rrd = { last_updated= 0.0 @@ -510,16 +513,16 @@ let rrd_create dss rras timestep inittime = { rra with rra_data= - Array.init (Array.length dss) (fun i -> - let ds = dss.(i) in - Fring.make rra.rra_row_cnt nan ds.ds_min ds.ds_max - ) + Array.map + (fun ds -> Fring.make rra.rra_row_cnt nan ds.ds_min ds.ds_max) + dss ; rra_cdps= - Array.init (Array.length dss) (fun i -> - let ds = dss.(i) in + Array.map + (fun ds -> let cdp_init = cf_init_value rra.rra_cf ds in {cdp_value= cdp_init; cdp_unknown_pdps= 0} - ) + ) + dss } ) rras @@ -527,21 +530,20 @@ let rrd_create dss rras timestep inittime = in let values = Array.map (fun ds -> ds.ds_last) dss in let transforms = Array.make (Array.length values) Identity in - ds_update rrd inittime values transforms true ; + (* Use the standard update routines to initialise everything to correct values *) + ds_update rrd timestamp values transforms true ; rrd (** Add in a new DS into a pre-existing RRD. Preserves data of all the other archives and fills the new one full of NaNs. Note that this doesn't fill in the CDP values correctly at the moment! - - @param now = Unix.gettimeofday () *) -let rrd_add_ds rrd now newds = +let rrd_add_ds rrd timestamp newds = if List.mem newds.ds_name (ds_names rrd) then rrd else - let npdps = Int64.of_float now /// rrd.timestep in + let npdps = Int64.of_float timestamp /// rrd.timestep in { rrd with rrd_dss= Array.append rrd.rrd_dss [|newds|] @@ -631,15 +633,14 @@ let find_best_rras rrd pdp_interval cf start = in List.filter (contains_time newstarttime) rras -(* now = Unix.gettimeofday () *) -let query_named_ds rrd now ds_name cf = +let query_named_ds rrd as_of_time ds_name cf = let n = Utils.array_index ds_name (Array.map (fun ds -> ds.ds_name) rrd.rrd_dss) in if n = -1 then raise (Invalid_data_source ds_name) else - let rras = find_best_rras rrd 0 (Some cf) (Int64.of_float now) in + let rras = find_best_rras rrd 0 (Some cf) (Int64.of_float as_of_time) in match rras with | [] -> raise No_RRA_Available diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml index 4cf580ed590..60f4c75dac0 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml @@ -228,5 +228,7 @@ let put_rrd_handler (req : Http.Request.t) (s : Unix.file_descr) _ = ) else ( debug "Receiving RRD for resident VM uuid=%s. Replacing in hashtable." uuid ; let domid = int_of_string (List.assoc "domid" query) in - with_lock mutex (fun _ -> Hashtbl.replace vm_rrds uuid {rrd; dss= []; domid}) + with_lock mutex (fun _ -> + Hashtbl.replace vm_rrds uuid {rrd; dss= Rrd.StringMap.empty; domid} + ) ) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 89ccd03f943..2a91ddafdad 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -26,7 +26,7 @@ let create_rras use_min_max = let step = 5L (** Create a rrd *) -let create_fresh_rrd use_min_max dss = +let create_fresh_rrd use_min_max dss timestamp = let rras = create_rras use_min_max in let dss = Array.of_list @@ -43,24 +43,36 @@ let create_fresh_rrd use_min_max dss = dss ) in - Rrd.rrd_create dss rras step (Unix.gettimeofday ()) + Rrd.rrd_create dss rras step timestamp -let merge_new_dss rrd dss = - let should_enable_ds ds = !Rrdd_shared.enable_all_dss || ds.ds_default in - let enabled_dss = List.filter should_enable_ds dss in - let current_dss = Rrd.ds_names rrd |> StringSet.of_list in +(* Check if new (enabled) datasources appeared, and add them to the RRD *) +let merge_new_dss rrdi dss = + let should_enable_ds _ (_, ds) = + !Rrdd_shared.enable_all_dss || ds.ds_default + in + let default_dss = StringMap.filter should_enable_ds dss in + (* NOTE: It's enough to check if all the default datasources have been added + to the RRD_INFO, because if a non-default one has been enabled at runtime, + it's added to the RRD immediately and we don't need to bother *) let new_dss = - List.filter - (fun ds -> not (StringSet.mem ds.ds_name current_dss)) - enabled_dss + StringMap.filter + (fun ds_name _ -> not (StringMap.mem ds_name rrdi.dss)) + default_dss in - let now = Unix.gettimeofday () in - List.fold_left - (fun rrd ds -> - rrd_add_ds rrd now - (Rrd.ds_create ds.ds_name ds.Ds.ds_type ~mrhb:300.0 Rrd.VT_Unknown) - ) - rrd new_dss + (* fold on Map is not tail-recursive, but the depth of the stack should be + log of the number of entries at worst, so this should be alright. + Previous conversions to List are also not tail-recursive with identical + stack depth *) + let merge_keys _key a _b = Some a in + let updated_dss = StringMap.union merge_keys dss rrdi.dss in + ( updated_dss + , StringMap.fold + (fun _key (timestamp, ds) rrd -> + rrd_add_ds rrd timestamp + (Rrd.ds_create ds.ds_name ds.Ds.ds_type ~mrhb:300.0 Rrd.VT_Unknown) + ) + new_dss rrdi.rrd + ) module OwnerMap = Map.Make (struct type t = ds_owner @@ -84,11 +96,11 @@ end) update, we assume that the domain has gone and we stream the RRD to the master. We also have a list of the currently rebooting VMs to ensure we don't accidentally archive the RRD. *) -let update_rrds timestamp dss uuid_domids paused_vms = +let update_rrds uuid_domids paused_vms (timestamp, dss) = let uuid_domids = List.to_seq uuid_domids |> StringMap.of_seq in let paused_vms = List.to_seq paused_vms |> StringSet.of_seq in let consolidate all (owner, ds) = - let add_ds_to = StringMap.add ds.ds_name ds in + let add_ds_to = StringMap.add ds.ds_name (timestamp, ds) in let merge = function | None -> Some (add_ds_to StringMap.empty) @@ -98,10 +110,9 @@ let update_rrds timestamp dss uuid_domids paused_vms = OwnerMap.update owner merge all in let dss = Seq.fold_left consolidate OwnerMap.empty dss in - - (* the first parameter and ds.ds_name are equivalent *) - let to_named_updates (_, ds) = - (ds.ds_name, (ds.ds_value, ds.ds_pdp_transform_function)) + let to_named_updates (_, ds) = (ds.ds_value, ds.ds_pdp_transform_function) in + let map_keys_to_list dss = + StringMap.bindings dss |> List.map snd |> List.map snd in (* Here we do the synchronising between the dom0 view of the world and our @@ -109,12 +120,13 @@ let update_rrds timestamp dss uuid_domids paused_vms = the world *) Xapi_stdext_threads.Threadext.Mutex.execute mutex (fun _ -> let out_of_date, by_how_much = + let reading_timestamp = Unix.gettimeofday () in match !host_rrd with | None -> (false, 0.) | Some rrdi -> - ( rrdi.rrd.Rrd.last_updated > timestamp - , abs_float (timestamp -. rrdi.rrd.Rrd.last_updated) + ( rrdi.rrd.Rrd.last_updated > reading_timestamp + , abs_float (reading_timestamp -. rrdi.rrd.Rrd.last_updated) ) in if out_of_date then @@ -122,30 +134,26 @@ let update_rrds timestamp dss uuid_domids paused_vms = "Clock just went backwards by %.0f seconds: RRD data may now be \ unreliable" by_how_much ; - let process_vm vm_uuid dss = - let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in - let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in - + let process_vm vm_uuid (dss : (float * Ds.ds) Rrd.StringMap.t) = match StringMap.find_opt vm_uuid uuid_domids with | Some domid -> ( (* First, potentially update the rrd with any new default dss *) match Hashtbl.find_opt vm_rrds vm_uuid with | Some rrdi -> - let rrd = merge_new_dss rrdi.rrd dss in - Hashtbl.replace vm_rrds vm_uuid {rrd; dss; domid} ; + let updated_dss, rrd = merge_new_dss rrdi dss in + Hashtbl.replace vm_rrds vm_uuid {rrd; dss= updated_dss; domid} ; (* CA-34383: Memory updates from paused domains serve no useful purpose. During a migrate such updates can also cause undesirable discontinuities in the observed value of memory_actual. Hence, we ignore changes from paused domains: *) - if not (StringSet.mem vm_uuid paused_vms) then ( - Rrd.ds_update_named rrd timestamp - ~new_domid:(domid <> rrdi.domid) named_updates ; - rrdi.dss <- dss ; - rrdi.domid <- domid - ) + let named_updates = StringMap.map to_named_updates dss in + if not (StringSet.mem vm_uuid paused_vms) then + Rrd.ds_update_named rrd ~new_domid:(domid <> rrdi.domid) + timestamp named_updates | None -> debug "%s: Creating fresh RRD for VM uuid=%s" __FUNCTION__ vm_uuid ; - let rrd = create_fresh_rrd !use_min_max dss in + let dss_list = map_keys_to_list dss in + let rrd = create_fresh_rrd !use_min_max dss_list timestamp in Hashtbl.replace vm_rrds vm_uuid {rrd; dss; domid} ) | None -> @@ -153,39 +161,36 @@ let update_rrds timestamp dss uuid_domids paused_vms = __FUNCTION__ vm_uuid in let process_sr sr_uuid dss = - let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in - let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in try (* First, potentially update the rrd with any new default dss *) match Hashtbl.find_opt sr_rrds sr_uuid with | Some rrdi -> - let rrd = merge_new_dss rrdi.rrd dss in - Hashtbl.replace sr_rrds sr_uuid {rrd; dss; domid= 0} ; - Rrd.ds_update_named rrd timestamp ~new_domid:false named_updates ; - rrdi.dss <- dss ; - rrdi.domid <- 0 + let updated_dss, rrd = merge_new_dss rrdi dss in + Hashtbl.replace sr_rrds sr_uuid {rrd; dss= updated_dss; domid= 0} ; + let named_updates = StringMap.map to_named_updates dss in + Rrd.ds_update_named rrd ~new_domid:false timestamp named_updates | None -> debug "%s: Creating fresh RRD for SR uuid=%s" __FUNCTION__ sr_uuid ; - let rrd = create_fresh_rrd !use_min_max dss in + let dss_list = map_keys_to_list dss in + let rrd = create_fresh_rrd !use_min_max dss_list timestamp in Hashtbl.replace sr_rrds sr_uuid {rrd; dss; domid= 0} with _ -> log_backtrace () in let process_host dss = - let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in - let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in - match !host_rrd with | None -> debug "%s: Creating fresh RRD for localhost" __FUNCTION__ ; - let rrd = create_fresh_rrd true dss in + let dss_list = map_keys_to_list dss in + let rrd = create_fresh_rrd true dss_list timestamp in (* Always always create localhost rrds with min/max enabled *) host_rrd := Some {rrd; dss; domid= 0} | Some rrdi -> - rrdi.dss <- dss ; - let rrd = merge_new_dss rrdi.rrd dss in - host_rrd := Some {rrd; dss; domid= 0} ; - Rrd.ds_update_named rrd timestamp ~new_domid:false named_updates + let updated_dss, rrd = merge_new_dss rrdi dss in + host_rrd := Some {rrd; dss= updated_dss; domid= 0} ; + let named_updates = StringMap.map to_named_updates dss in + Rrd.ds_update_named rrd ~new_domid:false timestamp named_updates in + let process_dss ds_owner dss = match ds_owner with | Host -> diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index 2b2a6cca004..c0adc41dfc4 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -59,7 +59,8 @@ let push_sr_rrd (sr_uuid : string) (path : string) : unit = | Some rrd -> debug "Pushing RRD for SR uuid=%s locally" sr_uuid ; with_lock mutex (fun _ -> - Hashtbl.replace sr_rrds sr_uuid {rrd; dss= []; domid= 0} + Hashtbl.replace sr_rrds sr_uuid + {rrd; dss= Rrd.StringMap.empty; domid= 0} ) | None -> () @@ -256,7 +257,9 @@ module Deprecated = struct ) ) in - with_lock mutex (fun () -> host_rrd := Some {rrd; dss= []; domid= 0}) + with_lock mutex (fun () -> + host_rrd := Some {rrd; dss= Rrd.StringMap.empty; domid= 0} + ) with _ -> () end @@ -264,7 +267,9 @@ let push_rrd_local uuid domid : unit = try let rrd = get_rrd ~uuid in debug "Pushing RRD for VM uuid=%s locally" uuid ; - with_lock mutex (fun _ -> Hashtbl.replace vm_rrds uuid {rrd; dss= []; domid}) + with_lock mutex (fun _ -> + Hashtbl.replace vm_rrds uuid {rrd; dss= Rrd.StringMap.empty; domid} + ) with _ -> () let push_rrd_remote uuid member_address : unit = @@ -345,12 +350,11 @@ let fail_missing name = raise (Rrdd_error (Datasource_missing name)) name {ds_name}. The operation fails if rrdi does not contain any live datasource with the name {ds_name} *) let add_ds ~rrdi ~ds_name = - match List.find_opt (fun ds -> ds.Ds.ds_name = ds_name) rrdi.dss with + match Rrd.StringMap.find_opt ds_name rrdi.dss with | None -> fail_missing ds_name - | Some ds -> - let now = Unix.gettimeofday () in - Rrd.rrd_add_ds rrdi.rrd now + | Some (timestamp, ds) -> + Rrd.rrd_add_ds rrdi.rrd timestamp (Rrd.ds_create ds.ds_name ds.ds_type ~mrhb:300.0 Rrd.VT_Unknown) let add rrds uuid domid ds_name rrdi = @@ -391,7 +395,6 @@ let query_possible_dss rrdi = 'live' ds, then it is enabled if it exists in the set rrdi.rrd. If we have an 'archival' ds, then it is enabled if it is also an enabled 'live' ds, otherwise it is disabled. *) - let module SMap = Map.Make (String) in let module SSet = Set.Make (String) in let open Ds in let open Data_source in @@ -401,26 +404,22 @@ let query_possible_dss rrdi = let enabled_names = Rrd.ds_names rrdi.rrd |> SSet.of_list in let is_live_ds_enabled ds = SSet.mem ds.ds_name enabled_names in live_sources - |> List.to_seq - |> Seq.map (fun ds -> - ( ds.ds_name - , { - name= ds.ds_name - ; description= ds.ds_description - ; enabled= is_live_ds_enabled ds - ; standard= ds.ds_default - ; min= ds.ds_min - ; max= ds.ds_max - ; units= ds.ds_units - } - ) + |> Rrd.StringMap.map (fun (_timestamp, ds) -> + { + name= ds.ds_name + ; description= ds.ds_description + ; enabled= is_live_ds_enabled ds + ; standard= ds.ds_default + ; min= ds.ds_min + ; max= ds.ds_max + ; units= ds.ds_units + } ) - |> SMap.of_seq in let name_to_disabled_dss = archival_sources |> Seq.filter_map (fun ds -> - if SMap.mem ds.Rrd.ds_name name_to_live_dss then + if Rrd.StringMap.mem ds.Rrd.ds_name name_to_live_dss then None else Some @@ -437,10 +436,9 @@ let query_possible_dss rrdi = ) ) in - SMap.add_seq name_to_disabled_dss name_to_live_dss - |> SMap.to_seq - |> Seq.map snd - |> List.of_seq + Rrd.StringMap.add_seq name_to_disabled_dss name_to_live_dss + |> Rrd.StringMap.bindings + |> List.map snd let query_possible_host_dss () : Data_source.t list = with_lock mutex (fun () -> @@ -775,14 +773,14 @@ module Plugin = struct let payload = get_payload ~uid plugin in let timestamp = payload.Rrd_protocol.timestamp |> Int64.to_float in let dss = List.to_seq payload.Rrd_protocol.datasources in - (timestamp, dss) - with _ -> Seq.empty + Some (timestamp, dss) + with _ -> None in List.iter decr_skip_count plugins ; plugins |> List.to_seq |> Seq.filter (Fun.negate skip) - |> Seq.flat_map process_plugin + |> Seq.filter_map process_plugin end module Local = Make (struct diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli index d32540390c0..971cdc29860 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli @@ -69,7 +69,7 @@ module Plugin : sig val next_reading : string -> float - val read_stats : unit -> float * (Rrd.ds_owner * Ds.ds) Seq.t + val read_stats : unit -> (float * (Rrd.ds_owner * Ds.ds) Seq.t) Seq.t module Local : sig val register : diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml index 0dc1a82ce2f..4c1f85b140f 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml @@ -75,7 +75,14 @@ let use_min_max = ref false let mutex = Mutex.create () -type rrd_info = {rrd: Rrd.rrd; mutable dss: Ds.ds list; mutable domid: int} +type rrd_info = { + rrd: Rrd.rrd + ; mutable dss: (float * Ds.ds) Rrd.StringMap.t + (* Important: this must contain the entire list of datasources associated + with the RRD, even the ones disabled by default, as rrd_add_ds calls + can enable DSs at runtime *) + ; mutable domid: int +} (* RRDs *) let vm_rrds : (string, rrd_info) Hashtbl.t = Hashtbl.create 32 diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index c0ed92d05fc..94ed27765bd 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -514,15 +514,15 @@ let do_monitor_write xc writers = let dom0_stats = tagged_dom0_stats |> List.to_seq - |> Seq.flat_map (fun (name, (timestamp, dss)) -> - (timestamp, List.to_seq dss) - ) + |> Seq.map (fun (name, (timestamp, dss)) -> (timestamp, List.to_seq dss)) in let plugins_stats = Rrdd_server.Plugin.read_stats () in let stats = Seq.append plugins_stats dom0_stats in Rrdd_stats.print_snapshot () ; let uuid_domids = List.map (fun (_, u, i) -> (u, i)) domains in - Rrdd_monitor.update_rrds timestamp stats uuid_domids my_paused_vms ; + + (* stats are grouped per plugin, which provides its timestamp *) + Seq.iter (Rrdd_monitor.update_rrds uuid_domids my_paused_vms) stats ; Rrdd_libs.Constants.datasource_dump_file |> Rrdd_server.dump_host_dss_to_file ; @@ -541,10 +541,11 @@ let monitor_write_loop writers = Rrdd_shared.last_loop_end_time := Unix.gettimeofday () ) ; Thread.delay !Rrdd_shared.timeslice - with _ -> + with e -> debug "Monitor/write thread caught an exception. Pausing for 10s, \ - then restarting." ; + then restarting: %s" + (Printexc.to_string e) ; log_backtrace () ; Thread.delay 10. done From 0a6daf2d3cd74ddd8b0db9be8e4262a26279e329 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 13:32:06 +0100 Subject: [PATCH 015/121] CA-391651: Rename 'new_domid' parameter to 'new_rrd' RRDs can have different owners, and new ones can show up without new domids being involved (SRs, VMs are also owner types). Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 12 ++++++------ ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 845f49aebc7..0ff4ac59f82 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -327,7 +327,7 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = it's dependent on the time interval between updates. To be able to deal with gauge DSs, we multiply by the interval so that it cancels the subsequent divide by interval later on *) -let process_ds_value ds value interval new_domid = +let process_ds_value ds value interval new_rrd = if interval > ds.ds_mrhb then nan else @@ -342,7 +342,7 @@ let process_ds_value ds value interval new_domid = in let rate = - match (ds.ds_ty, new_domid) with + match (ds.ds_ty, new_rrd) with | Absolute, _ | Derive, true -> value_raw | Gauge, _ -> @@ -362,7 +362,7 @@ let process_ds_value ds value interval new_domid = ds.ds_last <- value ; rate -let ds_update rrd timestamp values transforms new_domid = +let ds_update rrd timestamp values transforms new_rrd = (* Interval is the time between this and the last update Currently ds_update is called with datasources that belong to a single @@ -400,7 +400,7 @@ let ds_update rrd timestamp values transforms new_domid = (* Calculate the values we're going to store based on the input data and the type of the DS *) let v2s = Array.mapi - (fun i value -> process_ds_value rrd.rrd_dss.(i) value interval new_domid) + (fun i value -> process_ds_value rrd.rrd_dss.(i) value interval new_rrd) values in (* Update the PDP accumulators up until the most recent PDP *) @@ -460,7 +460,7 @@ let ds_update rrd timestamp values transforms new_domid = (** Update the rrd with named values rather than just an ordered array Must be called with datasources coming from a single plugin, with [timestamp] and [uid] representing it *) -let ds_update_named rrd ~new_domid timestamp valuesandtransforms = +let ds_update_named rrd ~new_rrd timestamp valuesandtransforms = let get_value_and_transform {ds_name; _} = Option.value ~default:(VT_Unknown, Identity) (StringMap.find_opt ds_name valuesandtransforms) @@ -468,7 +468,7 @@ let ds_update_named rrd ~new_domid timestamp valuesandtransforms = let ds_values, ds_transforms = Array.split (Array.map get_value_and_transform rrd.rrd_dss) in - ds_update rrd timestamp ds_values ds_transforms new_domid + ds_update rrd timestamp ds_values ds_transforms new_rrd (** Get registered DS names *) let ds_names rrd = Array.to_list (Array.map (fun ds -> ds.ds_name) rrd.rrd_dss) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 2a91ddafdad..ea2101c1c5e 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -148,8 +148,8 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = ignore changes from paused domains: *) let named_updates = StringMap.map to_named_updates dss in if not (StringSet.mem vm_uuid paused_vms) then - Rrd.ds_update_named rrd ~new_domid:(domid <> rrdi.domid) - timestamp named_updates + Rrd.ds_update_named rrd ~new_rrd:(domid <> rrdi.domid) timestamp + named_updates | None -> debug "%s: Creating fresh RRD for VM uuid=%s" __FUNCTION__ vm_uuid ; let dss_list = map_keys_to_list dss in @@ -168,7 +168,7 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = let updated_dss, rrd = merge_new_dss rrdi dss in Hashtbl.replace sr_rrds sr_uuid {rrd; dss= updated_dss; domid= 0} ; let named_updates = StringMap.map to_named_updates dss in - Rrd.ds_update_named rrd ~new_domid:false timestamp named_updates + Rrd.ds_update_named rrd ~new_rrd:false timestamp named_updates | None -> debug "%s: Creating fresh RRD for SR uuid=%s" __FUNCTION__ sr_uuid ; let dss_list = map_keys_to_list dss in @@ -188,7 +188,7 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = let updated_dss, rrd = merge_new_dss rrdi dss in host_rrd := Some {rrd; dss= updated_dss; domid= 0} ; let named_updates = StringMap.map to_named_updates dss in - Rrd.ds_update_named rrd ~new_domid:false timestamp named_updates + Rrd.ds_update_named rrd ~new_rrd:false timestamp named_updates in let process_dss ds_owner dss = From 8aa596d708bb9d8b21122c4d579832e2bf702012 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 15:25:59 +0100 Subject: [PATCH 016/121] CA-391651 - rrd: Carry indices with datasources ds_update* functions previously relied on being called for the entirety of datasources of a certain RRD. Instead, operate on chunks of datasources provided by the same plugin. These chunks are not contiguous (more detailed explanation in the comments in the code), so indices to rrd_dss and the associated structures need to be carried with each datasource. Also turns the value*transform tuple into a type for more explicit accesses. Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 182 +++++++++++++++--------- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 4 +- 2 files changed, 119 insertions(+), 67 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 0ff4ac59f82..176cd36289a 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -172,6 +172,13 @@ and rrd = { ; rrd_rras: rra array } +(** Parts of the datasources used in updating RRDs to minimize transferred data *) + +and ds_value_and_transform = { + value: ds_value_type + ; transform: ds_transform_function +} + let copy_cdp_prep x = {cdp_value= x.cdp_value; cdp_unknown_pdps= x.cdp_unknown_pdps} @@ -228,41 +235,42 @@ let get_times time timestep = (** Update the CDP value with a number (start_pdp_offset) of PDPs. *) let do_cfs rra start_pdp_offset pdps = - for i = 0 to Array.length pdps - 1 do - let cdp = rra.rra_cdps.(i) in - if Utils.isnan pdps.(i) then ( - (* CDP is an accumulator for the average. If we've got some unknowns, we need to - renormalize. ie, CDP contains \sum_{i=0}^j{ (1/n) x_i} where n is the number of - values we expect to have. If we have unknowns, we need to multiply the whole - thing by \frac{n_{old}}{n_{new}} *) - let olddiv = rra.rra_pdp_cnt - cdp.cdp_unknown_pdps in - let newdiv = olddiv - start_pdp_offset in - if newdiv > 0 then ( - cdp.cdp_value <- - cdp.cdp_value *. float_of_int olddiv /. float_of_int newdiv ; - cdp.cdp_unknown_pdps <- cdp.cdp_unknown_pdps + start_pdp_offset - ) - ) else - let cdpv = cdp.cdp_value in - cdp.cdp_value <- - ( match rra.rra_cf with - | CF_Average -> - cdpv - +. pdps.(i) - *. float_of_int start_pdp_offset - /. float_of_int rra.rra_pdp_cnt - | CF_Min -> - min cdpv pdps.(i) - | CF_Max -> - max cdpv pdps.(i) - | CF_Last -> - pdps.(i) + Array.iter + (fun (i, pdp) -> + let cdp = rra.rra_cdps.(i) in + if Utils.isnan pdp then ( + (* CDP is an accumulator for the average. If we've got some unknowns, we need to + renormalize. ie, CDP contains \sum_{i=0}^j{ (1/n) x_i} where n is the number of + values we expect to have. If we have unknowns, we need to multiply the whole + thing by \frac{n_{old}}{n_{new}} *) + let olddiv = rra.rra_pdp_cnt - cdp.cdp_unknown_pdps in + let newdiv = olddiv - start_pdp_offset in + if newdiv > 0 then ( + cdp.cdp_value <- + cdp.cdp_value *. float_of_int olddiv /. float_of_int newdiv ; + cdp.cdp_unknown_pdps <- cdp.cdp_unknown_pdps + start_pdp_offset ) - done + ) else + let cdpv = cdp.cdp_value in + cdp.cdp_value <- + ( match rra.rra_cf with + | CF_Average -> + cdpv + +. pdp + *. float_of_int start_pdp_offset + /. float_of_int rra.rra_pdp_cnt + | CF_Min -> + min cdpv pdp + | CF_Max -> + max cdpv pdp + | CF_Last -> + pdp + ) + ) + pdps (** Update the RRAs with a number of PDPs. *) let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = - (* debug "rra_update";*) let updatefn rra = let start_pdp_offset = rra.rra_pdp_cnt @@ -287,37 +295,39 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = repeated values is simply the value itself. *) let primaries = Array.map - (fun cdp -> + (fun (i, _) -> + let cdp = rra.rra_cdps.(i) in if cdp.cdp_unknown_pdps <= int_of_float (rra.rra_xff *. float_of_int rra.rra_pdp_cnt) then - cdp.cdp_value + (i, cdp.cdp_value) else - nan + (i, nan) ) - rra.rra_cdps + pdps in let secondaries = pdps in - let push i value = Fring.push rra.rra_data.(i) value in - Array.iteri push primaries ; + let push (i, value) = Fring.push rra.rra_data.(i) value in + Array.iter push primaries ; for _ = 1 to min (rra_step_cnt - 1) rra.rra_row_cnt do - Array.iteri push secondaries + Array.iter push secondaries done ; (* Reinitialise the CDP preparation area *) let new_start_pdp_offset = (elapsed_pdp_st - start_pdp_offset) mod rra.rra_pdp_cnt in - Array.iteri - (fun i cdp -> + Array.iter + (fun (i, _) -> + let cdp = rra.rra_cdps.(i) in let ds = rrd.rrd_dss.(i) in let cdp_init = cf_init_value rra.rra_cf ds in cdp.cdp_unknown_pdps <- 0 ; cdp.cdp_value <- cdp_init ) - rra.rra_cdps ; + pdps ; do_cfs rra new_start_pdp_offset pdps ) in @@ -362,7 +372,7 @@ let process_ds_value ds value interval new_rrd = ds.ds_last <- value ; rate -let ds_update rrd timestamp values transforms new_rrd = +let ds_update rrd timestamp valuesandtransforms new_rrd = (* Interval is the time between this and the last update Currently ds_update is called with datasources that belong to a single @@ -399,13 +409,16 @@ let ds_update rrd timestamp values transforms new_rrd = (* Calculate the values we're going to store based on the input data and the type of the DS *) let v2s = - Array.mapi - (fun i value -> process_ds_value rrd.rrd_dss.(i) value interval new_rrd) - values + Array.map + (fun (i, {value; _}) -> + let v = process_ds_value rrd.rrd_dss.(i) value interval new_rrd in + (i, v) + ) + valuesandtransforms in (* Update the PDP accumulators up until the most recent PDP *) - Array.iteri - (fun i value -> + Array.iter + (fun (i, value) -> let ds = rrd.rrd_dss.(i) in if Utils.isnan value then ds.ds_unknown_sec <- pre_int @@ -418,10 +431,11 @@ let ds_update rrd timestamp values transforms new_rrd = if elapsed_pdp_st > 0 then ( (* Calculate the PDPs for each DS *) let pdps = - Array.mapi - (fun i ds -> + Array.map + (fun (i, {transform; _}) -> + let ds = rrd.rrd_dss.(i) in if interval > ds.ds_mrhb then - nan + (i, nan) else let raw = ds.ds_value @@ -430,21 +444,21 @@ let ds_update rrd timestamp values transforms new_rrd = ) in (* Apply the transform after the raw value has been calculated *) - let raw = apply_transform_function transforms.(i) raw in + let raw = apply_transform_function transform raw in (* Make sure the values are not out of bounds after all the processing *) if raw < ds.ds_min || raw > ds.ds_max then - nan + (i, nan) else - raw + (i, raw) ) - rrd.rrd_dss + valuesandtransforms in rra_update rrd proc_pdp_st elapsed_pdp_st pdps ; (* Reset the PDP accumulators *) - Array.iteri - (fun i value -> + Array.iter + (fun (i, value) -> let ds = rrd.rrd_dss.(i) in if Utils.isnan value then ( ds.ds_value <- 0.0 ; @@ -461,14 +475,49 @@ let ds_update rrd timestamp values transforms new_rrd = Must be called with datasources coming from a single plugin, with [timestamp] and [uid] representing it *) let ds_update_named rrd ~new_rrd timestamp valuesandtransforms = - let get_value_and_transform {ds_name; _} = - Option.value ~default:(VT_Unknown, Identity) - (StringMap.find_opt ds_name valuesandtransforms) - in - let ds_values, ds_transforms = - Array.split (Array.map get_value_and_transform rrd.rrd_dss) + (* NOTE: + RRD data is stored in several arrays, with the same index pointing to the + same datasource's data in different arrays. This dependency is not always + obvious and doesn't apply to everything, i.e. 'rrd_dss' stores datasources + one after another, but the 'rrd_rras' are actually sideways matrices, + with rrd_rras.(i).rra_data containing Frings for _all_ datasources, not + just the i-th datasource. So if one datasource is removed or adjusted, + one needs to update RRAs by iterating over all 'rrd_rras', not just + changing the i-th array. + + rrdd_monitor processes datasources per plugin (and then per owner), so the + list of 'valuesandtransforms' all come with a single timestamp. But these + datasources can be located all over the 'rrd_dss' array, not necessarily + consecutively. Non-exhaustive examples of why that can happen: + 1) initially disabled datasources can be enabled at runtime behind our + back, which adds them to the end of the rrd_dss array + 2) on toolstack restart, RRDs are restored from the filesystem, but the + new order of registration of plugins might not necessarily be the same + as the one before the restart (so they might be consecutive, but static + chunk indexes can't be assumed) + 3) rrd_monitor iterates over the hash table of registered plugins, which + means that plugins registered later can end up earlier in its ordering + + All this means that plugin's datasources can not be assumed to be + consecutive and each datasource should carry its index in rrd's arrays + with itself, they can't just be processed in chunks. + + (This is due to how this used to be organized historically, with all of + the RRD's datasources processed at once with the server's timestamp, even + though they could have come from different plugins originally) + *) + let arr, _ = + Array.fold_left + (fun (arr, i) {ds_name; _} -> + match StringMap.find_opt ds_name valuesandtransforms with + | Some ds -> + (Array.append arr [|(i, ds)|], i + 1) + | None -> + (arr, i + 1) + ) + ([||], 0) rrd.rrd_dss in - ds_update rrd timestamp ds_values ds_transforms new_rrd + ds_update rrd timestamp arr new_rrd (** Get registered DS names *) let ds_names rrd = Array.to_list (Array.map (fun ds -> ds.ds_name) rrd.rrd_dss) @@ -528,10 +577,11 @@ let rrd_create dss rras timestep timestamp = rras } in - let values = Array.map (fun ds -> ds.ds_last) dss in - let transforms = Array.make (Array.length values) Identity in + let valuesandtransforms = + Array.mapi (fun i ds -> (i, {value= ds.ds_last; transform= Identity})) dss + in (* Use the standard update routines to initialise everything to correct values *) - ds_update rrd timestamp values transforms true ; + ds_update rrd timestamp valuesandtransforms true ; rrd (** Add in a new DS into a pre-existing RRD. Preserves data of all the other archives diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index ea2101c1c5e..2b7c6eda420 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -110,7 +110,9 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = OwnerMap.update owner merge all in let dss = Seq.fold_left consolidate OwnerMap.empty dss in - let to_named_updates (_, ds) = (ds.ds_value, ds.ds_pdp_transform_function) in + let to_named_updates (_, ds) = + {value= ds.ds_value; transform= ds.ds_pdp_transform_function} + in let map_keys_to_list dss = StringMap.bindings dss |> List.map snd |> List.map snd in From ef071a6e129c8a9cb9afd41bc0b9c69e559d7f0f Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Thu, 24 Oct 2024 14:48:48 +0800 Subject: [PATCH 017/121] CA-400559: API Error too_many_groups is not in go SDK Reason: API errors in go sdk are generated from Api_errors.errors. Error is filled in Api_errors.errors when defined using add_error function. Error too_many_groups is not defined using add_error function. Fix: Define too_many_groups using add_error function. Signed-off-by: Changlei Li --- ocaml/xapi-consts/api_errors.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 97880cde57a..ebafbdaa111 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1394,4 +1394,4 @@ let telemetry_next_collection_too_late = (* FIPS/CC_PREPARATIONS *) let illegal_in_fips_mode = add_error "ILLEGAL_IN_FIPS_MODE" -let too_many_groups = "TOO_MANY_GROUPS" +let too_many_groups = add_error "TOO_MANY_GROUPS" From 0d265537c3d363b3f3b188ca3d98ca5a411e0100 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 22 Oct 2024 16:55:25 +0100 Subject: [PATCH 018/121] chore: annotate types for non-returning functions These become warnings in ocaml 5.0+ Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi.ml | 2 +- ocaml/xapi/xapi_ha.ml | 2 +- ocaml/xenopsd/cli/xn.ml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index ed6323663e3..ca87e740efb 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -180,7 +180,7 @@ let init_args () = Xapi_globs.xenopsd_queues := ["xenopsd"] ) -let wait_to_die () = +let wait_to_die () : unit = (* don't call Thread.join cos this interacts strangely with OCAML runtime and stops the OCAML-level signal handlers ever getting called... Thread.delay is fine tho' *) while true do diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index ddfbc357fb2..b6ba195f823 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -130,7 +130,7 @@ let uuid_of_host_address address = let on_master_failure () = (* The plan is: keep asking if I should be the master. If I'm rejected then query the live set and see if someone else has been marked as master, if so become a slave of them. *) - let become_master () = + let become_master () : unit = info "This node will become the master" ; Xapi_pool_transition.become_master () ; info "Waiting for server restart" ; diff --git a/ocaml/xenopsd/cli/xn.ml b/ocaml/xenopsd/cli/xn.ml index 4c1251cccbd..a6ed6a884bd 100644 --- a/ocaml/xenopsd/cli/xn.ml +++ b/ocaml/xenopsd/cli/xn.ml @@ -1051,7 +1051,7 @@ let unix_proxy path = | 0 -> let buf = Bytes.make 16384 '\000' in let accept, _ = Unix.accept listen in - let copy a b = + let copy a b : unit = while true do let n = Unix.read a buf 0 (Bytes.length buf) in if n = 0 then exit 0 ; From c62d74268e9d9a90da68f87bfa5c3bab3633dfd5 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 18 Oct 2024 17:50:49 +0100 Subject: [PATCH 019/121] IH-728: Remove unused copy_into function Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 29 --------------------- ocaml/xapi-idl/storage/storage_skeleton.ml | 1 - ocaml/xapi-storage-script/main.ml | 1 - ocaml/xapi/storage_migrate.ml | 9 ------- ocaml/xapi/storage_mux.ml | 4 --- ocaml/xapi/storage_smapiv1.ml | 4 --- ocaml/xapi/storage_smapiv1_wrapper.ml | 5 ---- 7 files changed, 53 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index f5bd93de60b..30fad50da00 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -976,21 +976,6 @@ module StorageAPI (R : RPC) = struct ~description:["when true, verify remote server certificate"] Types.bool - (** [copy_into task sr vdi url sr2] copies the data from [vdi] into a remote - system [url]'s [sr2] *) - let copy_into = - let dest_vdi_p = Param.mk ~name:"dest_vdi" Vdi.t in - declare "DATA.copy_into" [] - (dbg_p - @-> sr_p - @-> vdi_p - @-> url_p - @-> dest_p - @-> dest_vdi_p - @-> verify_dest_p - @-> returning task_id_p err - ) - let copy = let result_p = Param.mk ~name:"task_id" Task.id in declare "DATA.copy" [] @@ -1344,17 +1329,6 @@ module type Server_impl = sig val get_by_name : context -> dbg:debug_info -> name:string -> sr * vdi_info module DATA : sig - val copy_into : - context - -> dbg:debug_info - -> sr:sr - -> vdi:vdi - -> url:string - -> dest:sr - -> dest_vdi:vdi - -> verify_dest:bool - -> Task.id - val copy : context -> dbg:debug_info @@ -1549,9 +1523,6 @@ module Server (Impl : Server_impl) () = struct Impl.VDI.list_changed_blocks () ~dbg ~sr ~vdi_from ~vdi_to ) ; S.get_by_name (fun dbg name -> Impl.get_by_name () ~dbg ~name) ; - S.DATA.copy_into (fun dbg sr vdi url dest dest_vdi verify_dest -> - Impl.DATA.copy_into () ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~verify_dest - ) ; S.DATA.copy (fun dbg sr vdi dp url dest verify_dest -> Impl.DATA.copy () ~dbg ~sr ~vdi ~dp ~url ~dest ~verify_dest ) ; diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 25283ed473b..631c4a44d35 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -152,7 +152,6 @@ end let get_by_name ctx ~dbg ~name = u "get_by_name" module DATA = struct - let copy_into ctx ~dbg ~sr ~vdi ~url ~dest ~dest_vdi = u "DATA.copy_into" let copy ctx ~dbg ~sr ~vdi ~dp ~url ~dest = u "DATA.copy" diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 96c68e73a82..c69d28847d1 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1739,7 +1739,6 @@ let bind ~volume_script_dir = S.VDI.get_url (u "VDI.get_url") ; S.DATA.MIRROR.start (u "DATA.MIRROR.start") ; S.Policy.get_backend_vm (u "Policy.get_backend_vm") ; - S.DATA.copy_into (u "DATA.copy_into") ; S.DATA.MIRROR.receive_cancel (u "DATA.MIRROR.receive_cancel") ; S.SR.update_snapshot_info_src (u "SR.update_snapshot_info_src") ; S.DATA.MIRROR.stop (u "DATA.MIRROR.stop") ; diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 468cddb2bf0..5e3c5ca77f4 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -578,9 +578,6 @@ let copy' ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~verify_dest = perform_cleanup_actions !on_fail ; raise e -let copy_into ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~verify_dest = - copy' ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~verify_dest - let remove_from_sm_config vdi_info key = { vdi_info with @@ -1368,12 +1365,6 @@ let copy ~dbg ~sr ~vdi ~dp ~url ~dest ~verify_dest = copy ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~url ~dest ~verify_dest ) -let copy_into ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~verify_dest = - with_task_and_thread ~dbg (fun task -> - copy_into ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~url ~dest ~dest_vdi - ~verify_dest - ) - (* The remote end of this call, SR.update_snapshot_info_dest, is implemented in * the SMAPIv1 section of storage_migrate.ml. It needs to access the setters * for snapshot_of, snapshot_time and is_a_snapshot, which we don't want to add diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index a2cfc468f5f..dc49d2e75b7 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -825,10 +825,6 @@ module Mux = struct let copy () ~dbg = with_dbg ~name:"DATA.copy" ~dbg @@ fun dbg -> Storage_migrate.copy ~dbg - let copy_into () ~dbg = - with_dbg ~name:"DATA.copy_into" ~dbg @@ fun dbg -> - Storage_migrate.copy_into ~dbg - module MIRROR = struct let start () ~dbg = with_dbg ~name:"DATA.MIRROR.start" ~dbg @@ fun dbg -> diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index bc5023006aa..84f1a1e86c4 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1202,10 +1202,6 @@ module SMAPIv1 : Server_impl = struct let get_by_name _context ~dbg:_ ~name:_ = assert false module DATA = struct - let copy_into _context ~dbg:_ ~sr:_ ~vdi:_ ~url:_ ~dest:_ ~dest_vdi:_ - ~verify_dest:_ = - assert false - let copy _context ~dbg:_ ~sr:_ ~vdi:_ ~dp:_ ~url:_ ~dest:_ ~verify_dest:_ = assert false diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 469be6a53c1..66ddd55e084 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -920,11 +920,6 @@ functor Impl.get_by_name context ~dbg ~name module DATA = struct - let copy_into context ~dbg ~sr ~vdi ~url ~dest = - info "DATA.copy_into dbg:%s sr:%s vdi:%s url:%s dest:%s" dbg - (s_of_sr sr) (s_of_vdi vdi) url (s_of_sr dest) ; - Impl.DATA.copy_into context ~dbg ~sr ~vdi ~url ~dest - let copy context ~dbg ~sr ~vdi ~dp ~url ~dest = info "DATA.copy dbg:%s sr:%s vdi:%s url:%s dest:%s" dbg (s_of_sr sr) (s_of_vdi vdi) url (s_of_sr dest) ; From 14b48b0a5865c74d7a64c1759924b8e1c9431045 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 25 Oct 2024 13:39:20 +0100 Subject: [PATCH 020/121] IH-728: Remove unused `dp` from `copy` function There is no remote invocation of this function, so should be safe. Signed-off-by: Vincent Liu --- ocaml/xapi-idl/storage/storage_interface.ml | 6 ++---- ocaml/xapi-idl/storage/storage_skeleton.ml | 3 +-- ocaml/xapi/storage_migrate.ml | 6 +++--- ocaml/xapi/storage_smapiv1.ml | 2 +- ocaml/xapi/storage_smapiv1_wrapper.ml | 4 ++-- ocaml/xapi/xapi_vm_migrate.ml | 2 +- 6 files changed, 10 insertions(+), 13 deletions(-) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index 30fad50da00..aa5754fabb9 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -982,7 +982,6 @@ module StorageAPI (R : RPC) = struct (dbg_p @-> sr_p @-> vdi_p - @-> dp_p @-> url_p @-> dest_p @-> verify_dest_p @@ -1334,7 +1333,6 @@ module type Server_impl = sig -> dbg:debug_info -> sr:sr -> vdi:vdi - -> dp:dp -> url:string -> dest:sr -> verify_dest:bool @@ -1523,8 +1521,8 @@ module Server (Impl : Server_impl) () = struct Impl.VDI.list_changed_blocks () ~dbg ~sr ~vdi_from ~vdi_to ) ; S.get_by_name (fun dbg name -> Impl.get_by_name () ~dbg ~name) ; - S.DATA.copy (fun dbg sr vdi dp url dest verify_dest -> - Impl.DATA.copy () ~dbg ~sr ~vdi ~dp ~url ~dest ~verify_dest + S.DATA.copy (fun dbg sr vdi url dest verify_dest -> + Impl.DATA.copy () ~dbg ~sr ~vdi ~url ~dest ~verify_dest ) ; S.DATA.MIRROR.start (fun dbg sr vdi dp url dest verify_dest -> Impl.DATA.MIRROR.start () ~dbg ~sr ~vdi ~dp ~url ~dest ~verify_dest diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index 631c4a44d35..cced1a7f6f5 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -152,8 +152,7 @@ end let get_by_name ctx ~dbg ~name = u "get_by_name" module DATA = struct - - let copy ctx ~dbg ~sr ~vdi ~dp ~url ~dest = u "DATA.copy" + let copy ctx ~dbg ~sr ~vdi ~url ~dest = u "DATA.copy" module MIRROR = struct (** [start task sr vdi url sr2] creates a VDI in remote [url]'s [sr2] and diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 5e3c5ca77f4..1fbb3545767 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -1229,7 +1229,7 @@ let nbd_handler req s sr vdi dp = | None -> () -let copy ~task ~dbg ~sr ~vdi ~dp:_ ~url ~dest ~verify_dest = +let copy ~task ~dbg ~sr ~vdi ~url ~dest ~verify_dest = debug "copy sr:%s vdi:%s url:%s dest:%s verify_dest:%B" (Storage_interface.Sr.string_of sr) (Storage_interface.Vdi.string_of vdi) @@ -1360,9 +1360,9 @@ let start ~dbg ~sr ~vdi ~dp ~url ~dest ~verify_dest = start' ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~url ~dest ~verify_dest ) -let copy ~dbg ~sr ~vdi ~dp ~url ~dest ~verify_dest = +let copy ~dbg ~sr ~vdi ~url ~dest ~verify_dest = with_task_and_thread ~dbg (fun task -> - copy ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~url ~dest ~verify_dest + copy ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~url ~dest ~verify_dest ) (* The remote end of this call, SR.update_snapshot_info_dest, is implemented in diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index 84f1a1e86c4..0bb0dd9d267 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -1202,7 +1202,7 @@ module SMAPIv1 : Server_impl = struct let get_by_name _context ~dbg:_ ~name:_ = assert false module DATA = struct - let copy _context ~dbg:_ ~sr:_ ~vdi:_ ~dp:_ ~url:_ ~dest:_ ~verify_dest:_ = + let copy _context ~dbg:_ ~sr:_ ~vdi:_ ~url:_ ~dest:_ ~verify_dest:_ = assert false module MIRROR = struct diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 66ddd55e084..ae1f21f72f3 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -920,10 +920,10 @@ functor Impl.get_by_name context ~dbg ~name module DATA = struct - let copy context ~dbg ~sr ~vdi ~dp ~url ~dest = + let copy context ~dbg ~sr ~vdi ~url ~dest = info "DATA.copy dbg:%s sr:%s vdi:%s url:%s dest:%s" dbg (s_of_sr sr) (s_of_vdi vdi) url (s_of_sr dest) ; - Impl.DATA.copy context ~dbg ~sr ~vdi ~dp ~url ~dest + Impl.DATA.copy context ~dbg ~sr ~vdi ~url ~dest module MIRROR = struct let start context ~dbg ~sr ~vdi ~dp ~url ~dest = diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index d35a6b98718..4ac14efa270 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -985,7 +985,7 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far let mirror_to_remote new_dp = let task = if not vconf.do_mirror then - SMAPI.DATA.copy dbg vconf.sr vconf.location new_dp remote.sm_url dest_sr + SMAPI.DATA.copy dbg vconf.sr vconf.location remote.sm_url dest_sr is_intra_pool else (* Though we have no intention of "write", here we use the same mode as the From 755764e517958d2370fb49e365c85eb3e252d307 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 25 Oct 2024 13:44:10 +0100 Subject: [PATCH 021/121] IH-728: Give copy(s) better names Rather than copy and copy', rename them into `copy_into_sr` and `copy_into_vdi`, which is what they actually do. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 1fbb3545767..9aa214228ab 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -442,7 +442,8 @@ let progress_callback start len t y = Storage_task.set_state t (Task.Pending new_progress) ; signal (Storage_task.id_of_handle t) -let copy' ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~verify_dest = +(** [copy_into_vdi] is similar to [copy_into_sr] but requires a [dest_vdi] parameter *) +let copy_into_vdi ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~verify_dest = let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in let module Remote = StorageAPI (Idl.Exn.GenClient (struct let rpc = @@ -837,7 +838,7 @@ let start' ~task ~dbg:_ ~sr ~vdi ~dp ~url ~dest ~verify_dest = (* Copy the snapshot to the remote *) let new_parent = Storage_task.with_subtask task "copy" (fun () -> - copy' ~task ~dbg ~sr ~vdi:snapshot.vdi ~url ~dest + copy_into_vdi ~task ~dbg ~sr ~vdi:snapshot.vdi ~url ~dest ~dest_vdi:result.Mirror.copy_diffs_to ~verify_dest ) |> vdi_info @@ -1229,7 +1230,10 @@ let nbd_handler req s sr vdi dp = | None -> () -let copy ~task ~dbg ~sr ~vdi ~url ~dest ~verify_dest = +(** [copy_into_sr] does not requires a dest vdi to be provided, instead, it will + find the nearest vdi on the [dest] sr, and if there is no such vdi, it will + create one. *) +let copy_into_sr ~task ~dbg ~sr ~vdi ~url ~dest ~verify_dest = debug "copy sr:%s vdi:%s url:%s dest:%s verify_dest:%B" (Storage_interface.Sr.string_of sr) (Storage_interface.Vdi.string_of vdi) @@ -1314,7 +1318,7 @@ let copy ~task ~dbg ~sr ~vdi ~url ~dest ~verify_dest = Remote.VDI.create dbg dest {local_vdi with sm_config= []} in let remote_copy = - copy' ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi:remote_base.vdi + copy_into_vdi ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi:remote_base.vdi ~verify_dest |> vdi_info in @@ -1362,7 +1366,8 @@ let start ~dbg ~sr ~vdi ~dp ~url ~dest ~verify_dest = let copy ~dbg ~sr ~vdi ~url ~dest ~verify_dest = with_task_and_thread ~dbg (fun task -> - copy ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~url ~dest ~verify_dest + copy_into_sr ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~url ~dest + ~verify_dest ) (* The remote end of this call, SR.update_snapshot_info_dest, is implemented in From c11556801ffaedc7ad160614e3474d0add2db656 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 18 Oct 2024 17:58:19 +0100 Subject: [PATCH 022/121] IH-728: Explicitly log important steps in SXM Also make the logging information more verbose. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 9aa214228ab..06ab90a4f19 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -16,7 +16,11 @@ module D = Debug.Make (struct let name = "storage_migrate" end) open D -module SMPERF = Debug.Make (struct let name = "SMPERF" end) +(** As SXM is such a long running process, we dedicate this to log important + milestones during the SXM process *) +module SXM = Debug.Make (struct + let name = "SXM" +end) module Listext = Xapi_stdext_std.Listext open Xapi_stdext_pervasives.Pervasiveext @@ -534,7 +538,7 @@ let copy_into_vdi ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~verify_dest = ; verify_dest } ) ; - SMPERF.debug "mirror.copy: copy initiated local_vdi:%s dest_vdi:%s" + SXM.info "%s: copy initiated local_vdi:%s dest_vdi:%s" __FUNCTION__ (Storage_interface.Vdi.string_of vdi) (Storage_interface.Vdi.string_of dest_vdi) ; finally @@ -567,7 +571,9 @@ let copy_into_vdi ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~verify_dest = Remote.DP.destroy dbg remote_dp false ; State.remove_copy id ) ; - SMPERF.debug "mirror.copy: copy complete" ; + SXM.info "%s: copy complete for local_vdi:%s dest_vdi:%s" __FUNCTION__ + (Storage_interface.Vdi.string_of vdi) + (Storage_interface.Vdi.string_of dest_vdi) ; debug "setting remote content_id <- %s" local_vdi.content_id ; Remote.VDI.set_content_id dbg dest dest_vdi local_vdi.content_id ; (* PR-1255: XXX: this is useful because we don't have content_ids by default *) @@ -658,13 +664,7 @@ let dbg_and_tracing_of_task task = |> Debug_info.to_string let start' ~task ~dbg:_ ~sr ~vdi ~dp ~url ~dest ~verify_dest = - debug "Mirror.start sr:%s vdi:%s url:%s dest:%s verify_dest:%B" - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - url - (Storage_interface.Sr.string_of dest) - verify_dest ; - SMPERF.debug "mirror.start called sr:%s vdi:%s url:%s dest:%s verify_dest:%B" + SXM.info "%s sr:%s vdi:%s url:%s dest:%s verify_dest:%B" __FUNCTION__ (Storage_interface.Sr.string_of sr) (Storage_interface.Vdi.string_of vdi) url @@ -794,7 +794,8 @@ let start' ~task ~dbg:_ ~sr ~vdi ~dp ~url ~dest ~verify_dest = State.add id (State.Send_op alm) ; debug "Updated" ; - debug "About to snapshot VDI = %s" (string_of_vdi_info local_vdi) ; + SXM.info "%s About to snapshot VDI = %s" __FUNCTION__ + (string_of_vdi_info local_vdi) ; let local_vdi = add_to_sm_config local_vdi "mirror" ("nbd:" ^ dp) in let local_vdi = add_to_sm_config local_vdi "base_mirror" id in let snapshot = @@ -810,8 +811,8 @@ let start' ~task ~dbg:_ ~sr ~vdi ~dp ~url ~dest ~verify_dest = | e -> raise e in - SMPERF.debug - "mirror.start: snapshot created, mirror initiated vdi:%s snapshot_of:%s" + SXM.info "%s: snapshot created, mirror initiated vdi:%s snapshot_of:%s" + __FUNCTION__ (Storage_interface.Vdi.string_of snapshot.vdi) (Storage_interface.Vdi.string_of local_vdi.vdi) ; on_fail := (fun () -> Local.VDI.destroy dbg sr snapshot.vdi) :: !on_fail ; From 34ec0967c39c9d824c406f26f88cb9b8319ccfbc Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 25 Oct 2024 13:54:22 +0100 Subject: [PATCH 023/121] IH-728: Restructure the Storage_migrate code - Introduce new `MigrateLocal` and `MigrateRemote` modules which contains the main implementations of the migration logic. - Move the actual exposed SMAPIv2 functions in one place towards the end of the file, rather than spreading across the entire file. These refactoring should all be statically verifiable by the compiler. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 1502 +++++++++++++++++---------------- 1 file changed, 770 insertions(+), 732 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 06ab90a4f19..297601da1f1 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -29,8 +29,6 @@ open Xmlrpc_client open Storage_interface open Storage_task -let vm_of_s = Storage_interface.Vm.of_string - module State = struct module Receive_state = struct type t = { @@ -364,6 +362,8 @@ let tapdisk_of_attach_info (backend : Storage_interface.backend) = (Storage_interface.(rpc_of backend) backend |> Rpc.to_string) ; None +let vm_of_s = Storage_interface.Vm.of_string + let with_activated_disk ~dbg ~sr ~vdi ~dp f = let attached_vdi = Option.map @@ -446,145 +446,6 @@ let progress_callback start len t y = Storage_task.set_state t (Task.Pending new_progress) ; signal (Storage_task.id_of_handle t) -(** [copy_into_vdi] is similar to [copy_into_sr] but requires a [dest_vdi] parameter *) -let copy_into_vdi ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~verify_dest = - let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url - end)) in - debug "copy local=%s/%s url=%s remote=%s/%s verify_dest=%B" - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - url - (Storage_interface.Sr.string_of dest) - (Storage_interface.Vdi.string_of dest_vdi) - verify_dest ; - (* Check the remote SR exists *) - let srs = Remote.SR.list dbg in - if not (List.mem dest srs) then - failwith - (Printf.sprintf "Remote SR %s not found" - (Storage_interface.Sr.string_of dest) - ) ; - let vdis = Remote.SR.scan dbg dest in - let remote_vdi = - try List.find (fun x -> x.vdi = dest_vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Remote VDI %s not found" - (Storage_interface.Vdi.string_of dest_vdi) - ) - in - let dest_content_id = remote_vdi.content_id in - (* Find the local VDI *) - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Local VDI %s not found" - (Storage_interface.Vdi.string_of vdi) - ) - in - debug "copy local content_id=%s" local_vdi.content_id ; - debug "copy remote content_id=%s" dest_content_id ; - if local_vdi.virtual_size > remote_vdi.virtual_size then ( - (* This should never happen provided the higher-level logic is working properly *) - error "copy local virtual_size=%Ld > remote virtual_size = %Ld" - local_vdi.virtual_size remote_vdi.virtual_size ; - failwith "local VDI is larger than the remote VDI" - ) ; - let on_fail : (unit -> unit) list ref = ref [] in - let base_vdi = - try - let x = (List.find (fun x -> x.content_id = dest_content_id) vdis).vdi in - debug "local VDI has content_id = %s; we will perform an incremental copy" - dest_content_id ; - Some x - with _ -> - debug "no local VDI has content_id = %s; we will perform a full copy" - dest_content_id ; - None - in - try - let remote_dp = Uuidx.(to_string (make ())) in - let base_dp = Uuidx.(to_string (make ())) in - let leaf_dp = Uuidx.(to_string (make ())) in - let dest_vdi_url = - let url' = Http.Url.of_string url in - Http.Url.set_uri url' - (Printf.sprintf "%s/nbd/%s/%s/%s" (Http.Url.get_uri url') - (Storage_interface.Sr.string_of dest) - (Storage_interface.Vdi.string_of dest_vdi) - remote_dp - ) - |> Http.Url.to_string - in - debug "copy remote NBD URL = %s" dest_vdi_url ; - let id = State.copy_id_of (sr, vdi) in - debug "Persisting state for copy (id=%s)" id ; - State.add id - State.( - Copy_op - Copy_state. - { - base_dp - ; leaf_dp - ; remote_dp - ; dest_sr= dest - ; copy_vdi= remote_vdi.vdi - ; remote_url= url - ; verify_dest - } - ) ; - SXM.info "%s: copy initiated local_vdi:%s dest_vdi:%s" __FUNCTION__ - (Storage_interface.Vdi.string_of vdi) - (Storage_interface.Vdi.string_of dest_vdi) ; - finally - (fun () -> - debug "activating RW datapath %s on remote" remote_dp ; - ignore (Remote.VDI.attach2 dbg remote_dp dest dest_vdi true) ; - Remote.VDI.activate dbg remote_dp dest dest_vdi ; - with_activated_disk ~dbg ~sr ~vdi:base_vdi ~dp:base_dp (fun base_path -> - with_activated_disk ~dbg ~sr ~vdi:(Some vdi) ~dp:leaf_dp (fun src -> - let verify_cert = - if verify_dest then Stunnel_client.pool () else None - in - let dd = - Sparse_dd_wrapper.start - ~progress_cb:(progress_callback 0.05 0.9 task) - ~verify_cert ?base:base_path true (Option.get src) - dest_vdi_url remote_vdi.virtual_size - in - Storage_task.with_cancel task - (fun () -> Sparse_dd_wrapper.cancel dd) - (fun () -> - try Sparse_dd_wrapper.wait dd - with Sparse_dd_wrapper.Cancelled -> - Storage_task.raise_cancelled task - ) - ) - ) - ) - (fun () -> - Remote.DP.destroy dbg remote_dp false ; - State.remove_copy id - ) ; - SXM.info "%s: copy complete for local_vdi:%s dest_vdi:%s" __FUNCTION__ - (Storage_interface.Vdi.string_of vdi) - (Storage_interface.Vdi.string_of dest_vdi) ; - debug "setting remote content_id <- %s" local_vdi.content_id ; - Remote.VDI.set_content_id dbg dest dest_vdi local_vdi.content_id ; - (* PR-1255: XXX: this is useful because we don't have content_ids by default *) - debug "setting local content_id <- %s" local_vdi.content_id ; - Local.VDI.set_content_id dbg sr local_vdi.vdi local_vdi.content_id ; - Some (Vdi_info remote_vdi) - with e -> - error "Caught %s: performing cleanup actions" (Printexc.to_string e) ; - perform_cleanup_actions !on_fail ; - raise e - let remove_from_sm_config vdi_info key = { vdi_info with @@ -595,522 +456,777 @@ let add_to_sm_config vdi_info key value = let vdi_info = remove_from_sm_config vdi_info key in {vdi_info with sm_config= (key, value) :: vdi_info.sm_config} -let stop ~dbg ~id = - (* Find the local VDI *) - let alm = State.find_active_local_mirror id in - match alm with - | Some alm -> - ( match alm.State.Send_state.remote_info with - | Some remote_info -> ( - let sr, vdi = State.of_mirror_id id in - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Local VDI %s not found" - (Storage_interface.Vdi.string_of vdi) - ) - in - let local_vdi = add_to_sm_config local_vdi "mirror" "null" in - let local_vdi = remove_from_sm_config local_vdi "base_mirror" in - (* Disable mirroring on the local machine *) - let snapshot = Local.VDI.snapshot dbg sr local_vdi in - Local.VDI.destroy dbg sr snapshot.vdi ; - (* Destroy the snapshot, if it still exists *) - let snap = - try - Some - (List.find - (fun x -> - List.mem_assoc "base_mirror" x.sm_config - && List.assoc "base_mirror" x.sm_config = id - ) - vdis - ) - with _ -> None - in - ( match snap with - | Some s -> - debug "Found snapshot VDI: %s" - (Storage_interface.Vdi.string_of s.vdi) ; - Local.VDI.destroy dbg sr s.vdi - | None -> - debug "Snapshot VDI already cleaned up" - ) ; - let remote_url = - Storage_utils.connection_args_of_uri - ~verify_dest:remote_info.State.Send_state.verify_dest - remote_info.State.Send_state.url - in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" - remote_url - end)) in - try Remote.DATA.MIRROR.receive_cancel dbg id with _ -> () - ) - | None -> - () - ) ; - State.remove_local_mirror id - | None -> - raise (Storage_interface.Storage_error (Does_not_exist ("mirror", id))) - let dbg_and_tracing_of_task task = Debug_info.make ~log:(Storage_task.get_dbg task) ~tracing:(Storage_task.tracing task) |> Debug_info.to_string -let start' ~task ~dbg:_ ~sr ~vdi ~dp ~url ~dest ~verify_dest = - SXM.info "%s sr:%s vdi:%s url:%s dest:%s verify_dest:%B" __FUNCTION__ - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - url - (Storage_interface.Sr.string_of dest) - verify_dest ; - let remote_url = Http.Url.of_string url in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" - (Storage_utils.connection_args_of_uri ~verify_dest url) - end)) in - (* Find the local VDI *) - let dbg = dbg_and_tracing_of_task task in - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> failwith "Local VDI not found" - in - let id = State.mirror_id_of (sr, local_vdi.vdi) in - debug "Adding to active local mirrors before sending: id=%s" id ; - let alm = - State.Send_state. - { - url - ; dest_sr= dest - ; remote_info= None - ; local_dp= dp - ; tapdev= None - ; failed= false - ; watchdog= None - } - in - - State.add id (State.Send_op alm) ; - debug "Added" ; - (* A list of cleanup actions to perform if the operation should fail. *) - let on_fail : (unit -> unit) list ref = ref [] in - try - let similar_vdis = Local.VDI.similar_content dbg sr vdi in - let similars = - List.filter - (fun x -> x <> "") - (List.map (fun vdi -> vdi.content_id) similar_vdis) - in - debug "Similar VDIs to = [ %s ]" - (String.concat "; " - (List.map - (fun x -> - Printf.sprintf "(vdi=%s,content_id=%s)" - (Storage_interface.Vdi.string_of x.vdi) - x.content_id - ) - similar_vdis - ) - ) ; - let result_ty = - Remote.DATA.MIRROR.receive_start dbg dest local_vdi id similars +(** This module [MigrateLocal] consists of the concrete implementations of the +migration part of SMAPI. Functions inside this module are sender driven, which means +they tend to be executed on the sender side. although there is not a hard rule +on what is executed on the sender side, this provides some heuristics. *) +module MigrateLocal = struct + (** [copy_into_vdi] is similar to [copy_into_sr] but requires a [dest_vdi] parameter *) + let copy_into_vdi ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~verify_dest = + let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in + let module Remote = StorageAPI (Idl.Exn.GenClient (struct + let rpc = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url + end)) in + debug "copy local=%s/%s url=%s remote=%s/%s verify_dest=%B" + (Storage_interface.Sr.string_of sr) + (Storage_interface.Vdi.string_of vdi) + url + (Storage_interface.Sr.string_of dest) + (Storage_interface.Vdi.string_of dest_vdi) + verify_dest ; + (* Check the remote SR exists *) + let srs = Remote.SR.list dbg in + if not (List.mem dest srs) then + failwith + (Printf.sprintf "Remote SR %s not found" + (Storage_interface.Sr.string_of dest) + ) ; + let vdis = Remote.SR.scan dbg dest in + let remote_vdi = + try List.find (fun x -> x.vdi = dest_vdi) vdis + with Not_found -> + failwith + (Printf.sprintf "Remote VDI %s not found" + (Storage_interface.Vdi.string_of dest_vdi) + ) in - let result = match result_ty with Mirror.Vhd_mirror x -> x in - (* Enable mirroring on the local machine *) - let mirror_dp = result.Mirror.mirror_datapath in - let uri = - Printf.sprintf "/services/SM/nbd/%s/%s/%s" - (Storage_interface.Sr.string_of dest) - (Storage_interface.Vdi.string_of result.Mirror.mirror_vdi.vdi) - mirror_dp + let dest_content_id = remote_vdi.content_id in + (* Find the local VDI *) + let vdis = Local.SR.scan dbg sr in + let local_vdi = + try List.find (fun x -> x.vdi = vdi) vdis + with Not_found -> + failwith + (Printf.sprintf "Local VDI %s not found" + (Storage_interface.Vdi.string_of vdi) + ) in - let dest_url = Http.Url.set_uri remote_url uri in - let request = - Http.Request.make - ~query:(Http.Url.get_query_params dest_url) - ~version:"1.0" ~user_agent:"smapiv2" Http.Put uri + debug "copy local content_id=%s" local_vdi.content_id ; + debug "copy remote content_id=%s" dest_content_id ; + if local_vdi.virtual_size > remote_vdi.virtual_size then ( + (* This should never happen provided the higher-level logic is working properly *) + error "copy local virtual_size=%Ld > remote virtual_size = %Ld" + local_vdi.virtual_size remote_vdi.virtual_size ; + failwith "local VDI is larger than the remote VDI" + ) ; + let on_fail : (unit -> unit) list ref = ref [] in + let base_vdi = + try + let x = + (List.find (fun x -> x.content_id = dest_content_id) vdis).vdi + in + debug + "local VDI has content_id = %s; we will perform an incremental copy" + dest_content_id ; + Some x + with _ -> + debug "no local VDI has content_id = %s; we will perform a full copy" + dest_content_id ; + None in - let verify_cert = if verify_dest then Stunnel_client.pool () else None in - let transport = Xmlrpc_client.transport_of_url ~verify_cert dest_url in - debug "Searching for data path: %s" dp ; - let attach_info = Local.DP.attach_info "nbd" sr vdi dp in - on_fail := (fun () -> Remote.DATA.MIRROR.receive_cancel dbg id) :: !on_fail ; - let tapdev = - match tapdisk_of_attach_info attach_info with - | Some tapdev -> - let pid = Tapctl.get_tapdisk_pid tapdev in - let path = Printf.sprintf "/var/run/blktap-control/nbdclient%d" pid in - with_transport ~stunnel_wait_disconnect:false transport - (with_http request (fun (_response, s) -> - let control_fd = Unix.socket Unix.PF_UNIX Unix.SOCK_STREAM 0 in - finally - (fun () -> - Unix.connect control_fd (Unix.ADDR_UNIX path) ; - let msg = dp in - let len = String.length msg in - let written = - Unixext.send_fd_substring control_fd msg 0 len [] s - in - if written <> len then ( - error "Failed to transfer fd to %s" path ; - failwith "Internal error transferring fd to tapdisk" - ) - ) - (fun () -> Unix.close control_fd) + try + let remote_dp = Uuidx.(to_string (make ())) in + let base_dp = Uuidx.(to_string (make ())) in + let leaf_dp = Uuidx.(to_string (make ())) in + let dest_vdi_url = + let url' = Http.Url.of_string url in + Http.Url.set_uri url' + (Printf.sprintf "%s/nbd/%s/%s/%s" (Http.Url.get_uri url') + (Storage_interface.Sr.string_of dest) + (Storage_interface.Vdi.string_of dest_vdi) + remote_dp + ) + |> Http.Url.to_string + in + debug "copy remote NBD URL = %s" dest_vdi_url ; + let id = State.copy_id_of (sr, vdi) in + debug "Persisting state for copy (id=%s)" id ; + State.add id + State.( + Copy_op + Copy_state. + { + base_dp + ; leaf_dp + ; remote_dp + ; dest_sr= dest + ; copy_vdi= remote_vdi.vdi + ; remote_url= url + ; verify_dest + } + ) ; + SXM.info "%s: copy initiated local_vdi:%s dest_vdi:%s" __FUNCTION__ + (Storage_interface.Vdi.string_of vdi) + (Storage_interface.Vdi.string_of dest_vdi) ; + finally + (fun () -> + debug "activating RW datapath %s on remote" remote_dp ; + ignore (Remote.VDI.attach2 dbg remote_dp dest dest_vdi true) ; + Remote.VDI.activate dbg remote_dp dest dest_vdi ; + with_activated_disk ~dbg ~sr ~vdi:base_vdi ~dp:base_dp + (fun base_path -> + with_activated_disk ~dbg ~sr ~vdi:(Some vdi) ~dp:leaf_dp + (fun src -> + let verify_cert = + if verify_dest then Stunnel_client.pool () else None + in + let dd = + Sparse_dd_wrapper.start + ~progress_cb:(progress_callback 0.05 0.9 task) + ~verify_cert ?base:base_path true (Option.get src) + dest_vdi_url remote_vdi.virtual_size + in + Storage_task.with_cancel task + (fun () -> Sparse_dd_wrapper.cancel dd) + (fun () -> + try Sparse_dd_wrapper.wait dd + with Sparse_dd_wrapper.Cancelled -> + Storage_task.raise_cancelled task + ) + ) + ) + ) + (fun () -> + Remote.DP.destroy dbg remote_dp false ; + State.remove_copy id + ) ; + SXM.info "%s: copy complete for local_vdi:%s dest_vdi:%s" __FUNCTION__ + (Storage_interface.Vdi.string_of vdi) + (Storage_interface.Vdi.string_of dest_vdi) ; + debug "setting remote content_id <- %s" local_vdi.content_id ; + Remote.VDI.set_content_id dbg dest dest_vdi local_vdi.content_id ; + (* PR-1255: XXX: this is useful because we don't have content_ids by default *) + debug "setting local content_id <- %s" local_vdi.content_id ; + Local.VDI.set_content_id dbg sr local_vdi.vdi local_vdi.content_id ; + Some (Vdi_info remote_vdi) + with e -> + error "Caught %s: performing cleanup actions" (Printexc.to_string e) ; + perform_cleanup_actions !on_fail ; + raise e + + (** [copy_into_sr] does not requires a dest vdi to be provided, instead, it will + find the nearest vdi on the [dest] sr, and if there is no such vdi, it will + create one. *) + let copy_into_sr ~task ~dbg ~sr ~vdi ~url ~dest ~verify_dest = + debug "copy sr:%s vdi:%s url:%s dest:%s verify_dest:%B" + (Storage_interface.Sr.string_of sr) + (Storage_interface.Vdi.string_of vdi) + url + (Storage_interface.Sr.string_of dest) + verify_dest ; + let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in + let module Remote = StorageAPI (Idl.Exn.GenClient (struct + let rpc = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url + end)) in + (* Find the local VDI *) + try + let vdis = Local.SR.scan dbg sr in + let local_vdi = + try List.find (fun x -> x.vdi = vdi) vdis + with Not_found -> failwith (Printf.sprintf "Local VDI not found") + in + try + let similar_vdis = Local.VDI.similar_content dbg sr vdi in + let similars = List.map (fun vdi -> vdi.content_id) similar_vdis in + debug "Similar VDIs = [ %s ]" + (String.concat "; " + (List.map + (fun x -> + Printf.sprintf "(vdi=%s,content_id=%s)" + (Storage_interface.Vdi.string_of x.vdi) + x.content_id + ) + similar_vdis ) - ) ; - tapdev - | None -> - failwith "Not attached" + ) ; + let remote_vdis = Remote.SR.scan dbg dest in + (* We drop cbt_metadata VDIs that do not have any actual data *) + let remote_vdis = + List.filter (fun vdi -> vdi.ty <> "cbt_metadata") remote_vdis + in + let nearest = + List.fold_left + (fun acc content_id -> + match acc with + | Some _ -> + acc + | None -> ( + try + Some + (List.find + (fun vdi -> + vdi.content_id = content_id + && vdi.virtual_size <= local_vdi.virtual_size + ) + remote_vdis + ) + with Not_found -> None + ) + ) + None similars + in + debug "Nearest VDI: content_id=%s vdi=%s" + (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) + (Option.fold ~none:"None" + ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) + nearest + ) ; + let remote_base = + match nearest with + | Some vdi -> + debug "Cloning VDI" ; + let vdi_clone = Remote.VDI.clone dbg dest vdi in + debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; + ( if vdi_clone.virtual_size <> local_vdi.virtual_size then + let new_size = + Remote.VDI.resize dbg dest vdi_clone.vdi + local_vdi.virtual_size + in + debug "Resize remote clone VDI to %Ld: result %Ld" + local_vdi.virtual_size new_size + ) ; + vdi_clone + | None -> + debug "Creating a blank remote VDI" ; + Remote.VDI.create dbg dest {local_vdi with sm_config= []} + in + let remote_copy = + copy_into_vdi ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi:remote_base.vdi + ~verify_dest + |> vdi_info + in + let snapshot = Remote.VDI.snapshot dbg dest remote_copy in + Remote.VDI.destroy dbg dest remote_copy.vdi ; + Some (Vdi_info snapshot) + with e -> + error "Caught %s: copying snapshots vdi" (Printexc.to_string e) ; + raise (Storage_error (Internal_error (Printexc.to_string e))) + with + | Storage_error (Backend_error (code, params)) + | Api_errors.Server_error (code, params) -> + raise (Storage_error (Backend_error (code, params))) + | e -> + raise (Storage_error (Internal_error (Printexc.to_string e))) + + let start ~task ~dbg:_ ~sr ~vdi ~dp ~url ~dest ~verify_dest = + SXM.info "%s sr:%s vdi:%s url:%s dest:%s verify_dest:%B" __FUNCTION__ + (Storage_interface.Sr.string_of sr) + (Storage_interface.Vdi.string_of vdi) + url + (Storage_interface.Sr.string_of dest) + verify_dest ; + let remote_url = Http.Url.of_string url in + let module Remote = StorageAPI (Idl.Exn.GenClient (struct + let rpc = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" + (Storage_utils.connection_args_of_uri ~verify_dest url) + end)) in + (* Find the local VDI *) + let dbg = dbg_and_tracing_of_task task in + let vdis = Local.SR.scan dbg sr in + let local_vdi = + try List.find (fun x -> x.vdi = vdi) vdis + with Not_found -> failwith "Local VDI not found" in - debug "Updating active local mirrors: id=%s" id ; + let id = State.mirror_id_of (sr, local_vdi.vdi) in + debug "Adding to active local mirrors before sending: id=%s" id ; let alm = State.Send_state. { url ; dest_sr= dest - ; remote_info= - Some - { - dp= mirror_dp - ; vdi= result.Mirror.mirror_vdi.vdi - ; url - ; verify_dest - } + ; remote_info= None ; local_dp= dp - ; tapdev= Some tapdev + ; tapdev= None ; failed= false ; watchdog= None } in State.add id (State.Send_op alm) ; - debug "Updated" ; - SXM.info "%s About to snapshot VDI = %s" __FUNCTION__ - (string_of_vdi_info local_vdi) ; - let local_vdi = add_to_sm_config local_vdi "mirror" ("nbd:" ^ dp) in - let local_vdi = add_to_sm_config local_vdi "base_mirror" id in - let snapshot = - try Local.VDI.snapshot dbg sr local_vdi with - | Storage_interface.Storage_error (Backend_error (code, _)) - when code = "SR_BACKEND_FAILURE_44" -> - raise - (Api_errors.Server_error - ( Api_errors.sr_source_space_insufficient - , [Storage_interface.Sr.string_of sr] - ) - ) - | e -> - raise e - in - SXM.info "%s: snapshot created, mirror initiated vdi:%s snapshot_of:%s" - __FUNCTION__ - (Storage_interface.Vdi.string_of snapshot.vdi) - (Storage_interface.Vdi.string_of local_vdi.vdi) ; - on_fail := (fun () -> Local.VDI.destroy dbg sr snapshot.vdi) :: !on_fail ; - (let rec inner () = - let alm_opt = State.find_active_local_mirror id in - match alm_opt with - | Some alm -> - let stats = Tapctl.stats (Tapctl.create ()) tapdev in - if stats.Tapctl.Stats.nbd_mirror_failed = 1 then ( - error "Tapdisk mirroring has failed" ; - Updates.add (Dynamic.Mirror id) updates - ) ; - alm.State.Send_state.watchdog <- - Some - (Scheduler.one_shot scheduler (Scheduler.Delta 5) - "tapdisk_watchdog" inner + debug "Added" ; + (* A list of cleanup actions to perform if the operation should fail. *) + let on_fail : (unit -> unit) list ref = ref [] in + try + let similar_vdis = Local.VDI.similar_content dbg sr vdi in + let similars = + List.filter + (fun x -> x <> "") + (List.map (fun vdi -> vdi.content_id) similar_vdis) + in + debug "Similar VDIs to = [ %s ]" + (String.concat "; " + (List.map + (fun x -> + Printf.sprintf "(vdi=%s,content_id=%s)" + (Storage_interface.Vdi.string_of x.vdi) + x.content_id + ) + similar_vdis + ) + ) ; + let result_ty = + Remote.DATA.MIRROR.receive_start dbg dest local_vdi id similars + in + let result = match result_ty with Mirror.Vhd_mirror x -> x in + (* Enable mirroring on the local machine *) + let mirror_dp = result.Mirror.mirror_datapath in + let uri = + Printf.sprintf "/services/SM/nbd/%s/%s/%s" + (Storage_interface.Sr.string_of dest) + (Storage_interface.Vdi.string_of result.Mirror.mirror_vdi.vdi) + mirror_dp + in + let dest_url = Http.Url.set_uri remote_url uri in + let request = + Http.Request.make + ~query:(Http.Url.get_query_params dest_url) + ~version:"1.0" ~user_agent:"smapiv2" Http.Put uri + in + let verify_cert = if verify_dest then Stunnel_client.pool () else None in + let transport = Xmlrpc_client.transport_of_url ~verify_cert dest_url in + debug "Searching for data path: %s" dp ; + let attach_info = Local.DP.attach_info "nbd" sr vdi dp in + on_fail := + (fun () -> Remote.DATA.MIRROR.receive_cancel dbg id) :: !on_fail ; + let tapdev = + match tapdisk_of_attach_info attach_info with + | Some tapdev -> + let pid = Tapctl.get_tapdisk_pid tapdev in + let path = + Printf.sprintf "/var/run/blktap-control/nbdclient%d" pid + in + with_transport ~stunnel_wait_disconnect:false transport + (with_http request (fun (_response, s) -> + let control_fd = + Unix.socket Unix.PF_UNIX Unix.SOCK_STREAM 0 + in + finally + (fun () -> + Unix.connect control_fd (Unix.ADDR_UNIX path) ; + let msg = dp in + let len = String.length msg in + let written = + Unixext.send_fd_substring control_fd msg 0 len [] s + in + if written <> len then ( + error "Failed to transfer fd to %s" path ; + failwith "Internal error transferring fd to tapdisk" + ) + ) + (fun () -> Unix.close control_fd) ) - | None -> - () - in - inner () - ) ; - on_fail := (fun () -> Local.DATA.MIRROR.stop dbg id) :: !on_fail ; - (* Copy the snapshot to the remote *) - let new_parent = - Storage_task.with_subtask task "copy" (fun () -> - copy_into_vdi ~task ~dbg ~sr ~vdi:snapshot.vdi ~url ~dest - ~dest_vdi:result.Mirror.copy_diffs_to ~verify_dest - ) - |> vdi_info - in - debug "Local VDI %s = remote VDI %s" - (Storage_interface.Vdi.string_of snapshot.vdi) - (Storage_interface.Vdi.string_of new_parent.vdi) ; - Remote.VDI.compose dbg dest result.Mirror.copy_diffs_to - result.Mirror.mirror_vdi.vdi ; - Remote.VDI.remove_from_sm_config dbg dest result.Mirror.mirror_vdi.vdi - "base_mirror" ; - debug "Local VDI %s now mirrored to remote VDI: %s" - (Storage_interface.Vdi.string_of local_vdi.vdi) - (Storage_interface.Vdi.string_of result.Mirror.mirror_vdi.vdi) ; - debug "Destroying dummy VDI on remote" ; - Remote.VDI.destroy dbg dest result.Mirror.dummy_vdi ; - debug "Destroying snapshot on src" ; - Local.VDI.destroy dbg sr snapshot.vdi ; - Some (Mirror_id id) - with - | Storage_error (Sr_not_attached sr_uuid) -> - error " Caught exception %s:%s. Performing cleanup." - Api_errors.sr_not_attached sr_uuid ; - perform_cleanup_actions !on_fail ; - raise (Api_errors.Server_error (Api_errors.sr_not_attached, [sr_uuid])) - | e -> - error "Caught %s: performing cleanup actions" (Api_errors.to_string e) ; - perform_cleanup_actions !on_fail ; - raise e - -(* XXX: PR-1255: copy the xenopsd 'raise Exception' pattern *) -let stop ~dbg ~id = - try stop ~dbg ~id with - | Storage_error (Backend_error (code, params)) - | Api_errors.Server_error (code, params) -> - raise (Storage_error (Backend_error (code, params))) - | e -> - raise e + ) ; + tapdev + | None -> + failwith "Not attached" + in + debug "Updating active local mirrors: id=%s" id ; + let alm = + State.Send_state. + { + url + ; dest_sr= dest + ; remote_info= + Some + { + dp= mirror_dp + ; vdi= result.Mirror.mirror_vdi.vdi + ; url + ; verify_dest + } + ; local_dp= dp + ; tapdev= Some tapdev + ; failed= false + ; watchdog= None + } + in -let stat ~dbg:_ ~id = - let recv_opt = State.find_active_receive_mirror id in - let send_opt = State.find_active_local_mirror id in - let copy_opt = State.find_active_copy id in - let open State in - let failed = - match send_opt with - | Some send_state -> - let failed = - match send_state.Send_state.tapdev with - | Some tapdev -> ( - try - let stats = Tapctl.stats (Tapctl.create ()) tapdev in - stats.Tapctl.Stats.nbd_mirror_failed = 1 - with _ -> - debug "Using cached copy of failure status" ; - send_state.Send_state.failed - ) - | None -> - false - in - send_state.Send_state.failed <- failed ; - failed - | None -> - false - in - let state = - (match recv_opt with Some _ -> [Mirror.Receiving] | None -> []) - @ (match send_opt with Some _ -> [Mirror.Sending] | None -> []) - @ match copy_opt with Some _ -> [Mirror.Copying] | None -> [] - in - if state = [] then raise (Storage_error (Does_not_exist ("mirror", id))) ; - let src, dst = - match (recv_opt, send_opt, copy_opt) with - | Some receive_state, _, _ -> - ( receive_state.Receive_state.remote_vdi - , receive_state.Receive_state.leaf_vdi + State.add id (State.Send_op alm) ; + debug "Updated" ; + SXM.info "%s About to snapshot VDI = %s" __FUNCTION__ + (string_of_vdi_info local_vdi) ; + let local_vdi = add_to_sm_config local_vdi "mirror" ("nbd:" ^ dp) in + let local_vdi = add_to_sm_config local_vdi "base_mirror" id in + let snapshot = + try Local.VDI.snapshot dbg sr local_vdi with + | Storage_interface.Storage_error (Backend_error (code, _)) + when code = "SR_BACKEND_FAILURE_44" -> + raise + (Api_errors.Server_error + ( Api_errors.sr_source_space_insufficient + , [Storage_interface.Sr.string_of sr] + ) + ) + | e -> + raise e + in + SXM.info "%s: snapshot created, mirror initiated vdi:%s snapshot_of:%s" + __FUNCTION__ + (Storage_interface.Vdi.string_of snapshot.vdi) + (Storage_interface.Vdi.string_of local_vdi.vdi) ; + on_fail := (fun () -> Local.VDI.destroy dbg sr snapshot.vdi) :: !on_fail ; + (let rec inner () = + let alm_opt = State.find_active_local_mirror id in + match alm_opt with + | Some alm -> + let stats = Tapctl.stats (Tapctl.create ()) tapdev in + if stats.Tapctl.Stats.nbd_mirror_failed = 1 then ( + error "Tapdisk mirroring has failed" ; + Updates.add (Dynamic.Mirror id) updates + ) ; + alm.State.Send_state.watchdog <- + Some + (Scheduler.one_shot scheduler (Scheduler.Delta 5) + "tapdisk_watchdog" inner + ) + | None -> + () + in + inner () + ) ; + on_fail := (fun () -> Local.DATA.MIRROR.stop dbg id) :: !on_fail ; + (* Copy the snapshot to the remote *) + let new_parent = + Storage_task.with_subtask task "copy" (fun () -> + copy_into_vdi ~task ~dbg ~sr ~vdi:snapshot.vdi ~url ~dest + ~dest_vdi:result.Mirror.copy_diffs_to ~verify_dest ) - | _, Some send_state, _ -> - let dst_vdi = - match send_state.Send_state.remote_info with - | Some remote_info -> - remote_info.Send_state.vdi - | None -> - Storage_interface.Vdi.of_string "" - in - (snd (of_mirror_id id), dst_vdi) - | _, _, Some copy_state -> - (snd (of_copy_id id), copy_state.Copy_state.copy_vdi) - | _ -> - failwith "Invalid" - in - {Mirror.source_vdi= src; dest_vdi= dst; state; failed} - -let list ~dbg = - let send_ops, recv_ops, copy_ops = State.map_of () in - let get_ids map = List.map fst map in - let ids = - get_ids send_ops @ get_ids recv_ops @ get_ids copy_ops - |> Listext.List.setify - in - List.map (fun id -> (id, stat ~dbg ~id)) ids - -let killall ~dbg = - let send_ops, recv_ops, copy_ops = State.map_of () in - List.iter - (fun (id, send_state) -> - debug "Send in progress: %s" id ; - List.iter log_and_ignore_exn - [ - (fun () -> stop ~dbg ~id) - ; (fun () -> - Local.DP.destroy dbg send_state.State.Send_state.local_dp true - ) - ] - ) - send_ops ; - List.iter - (fun (id, copy_state) -> - debug "Copy in progress: %s" id ; - List.iter log_and_ignore_exn - [ - (fun () -> - Local.DP.destroy dbg copy_state.State.Copy_state.leaf_dp true - ) - ; (fun () -> - Local.DP.destroy dbg copy_state.State.Copy_state.base_dp true - ) - ] ; - let remote_url = - Storage_utils.connection_args_of_uri - ~verify_dest:copy_state.State.Copy_state.verify_dest - copy_state.State.Copy_state.remote_url + |> vdi_info in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url - end)) in - List.iter log_and_ignore_exn - [ - (fun () -> - Remote.DP.destroy dbg copy_state.State.Copy_state.remote_dp true - ) - ; (fun () -> - Remote.VDI.destroy dbg copy_state.State.Copy_state.dest_sr - copy_state.State.Copy_state.copy_vdi + debug "Local VDI %s = remote VDI %s" + (Storage_interface.Vdi.string_of snapshot.vdi) + (Storage_interface.Vdi.string_of new_parent.vdi) ; + Remote.VDI.compose dbg dest result.Mirror.copy_diffs_to + result.Mirror.mirror_vdi.vdi ; + Remote.VDI.remove_from_sm_config dbg dest result.Mirror.mirror_vdi.vdi + "base_mirror" ; + debug "Local VDI %s now mirrored to remote VDI: %s" + (Storage_interface.Vdi.string_of local_vdi.vdi) + (Storage_interface.Vdi.string_of result.Mirror.mirror_vdi.vdi) ; + debug "Destroying dummy VDI on remote" ; + Remote.VDI.destroy dbg dest result.Mirror.dummy_vdi ; + debug "Destroying snapshot on src" ; + Local.VDI.destroy dbg sr snapshot.vdi ; + Some (Mirror_id id) + with + | Storage_error (Sr_not_attached sr_uuid) -> + error " Caught exception %s:%s. Performing cleanup." + Api_errors.sr_not_attached sr_uuid ; + perform_cleanup_actions !on_fail ; + raise (Api_errors.Server_error (Api_errors.sr_not_attached, [sr_uuid])) + | e -> + error "Caught %s: performing cleanup actions" (Api_errors.to_string e) ; + perform_cleanup_actions !on_fail ; + raise e + + let stop ~dbg ~id = + (* Find the local VDI *) + let alm = State.find_active_local_mirror id in + match alm with + | Some alm -> + ( match alm.State.Send_state.remote_info with + | Some remote_info -> ( + let sr, vdi = State.of_mirror_id id in + let vdis = Local.SR.scan dbg sr in + let local_vdi = + try List.find (fun x -> x.vdi = vdi) vdis + with Not_found -> + failwith + (Printf.sprintf "Local VDI %s not found" + (Storage_interface.Vdi.string_of vdi) + ) + in + let local_vdi = add_to_sm_config local_vdi "mirror" "null" in + let local_vdi = remove_from_sm_config local_vdi "base_mirror" in + (* Disable mirroring on the local machine *) + let snapshot = Local.VDI.snapshot dbg sr local_vdi in + Local.VDI.destroy dbg sr snapshot.vdi ; + (* Destroy the snapshot, if it still exists *) + let snap = + try + Some + (List.find + (fun x -> + List.mem_assoc "base_mirror" x.sm_config + && List.assoc "base_mirror" x.sm_config = id + ) + vdis + ) + with _ -> None + in + ( match snap with + | Some s -> + debug "Found snapshot VDI: %s" + (Storage_interface.Vdi.string_of s.vdi) ; + Local.VDI.destroy dbg sr s.vdi + | None -> + debug "Snapshot VDI already cleaned up" + ) ; + let remote_url = + Storage_utils.connection_args_of_uri + ~verify_dest:remote_info.State.Send_state.verify_dest + remote_info.State.Send_state.url + in + let module Remote = StorageAPI (Idl.Exn.GenClient (struct + let rpc = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" + remote_url + end)) in + try Remote.DATA.MIRROR.receive_cancel dbg id with _ -> () ) - ] - ) - copy_ops ; - List.iter - (fun (id, _recv_state) -> - debug "Receive in progress: %s" id ; - log_and_ignore_exn (fun () -> Local.DATA.MIRROR.receive_cancel dbg id) - ) - recv_ops ; - State.clear () - -let receive_start ~dbg ~sr ~vdi_info ~id ~similar = - let on_fail : (unit -> unit) list ref = ref [] in - let vdis = Local.SR.scan dbg sr in - (* We drop cbt_metadata VDIs that do not have any actual data *) - let vdis = List.filter (fun vdi -> vdi.ty <> "cbt_metadata") vdis in - let leaf_dp = Local.DP.create dbg Uuidx.(to_string (make ())) in - try - let vdi_info = {vdi_info with sm_config= [("base_mirror", id)]} in - let leaf = Local.VDI.create dbg sr vdi_info in - info "Created leaf VDI for mirror receive: %s" (string_of_vdi_info leaf) ; - on_fail := (fun () -> Local.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; - let dummy = Local.VDI.snapshot dbg sr leaf in - on_fail := (fun () -> Local.VDI.destroy dbg sr dummy.vdi) :: !on_fail ; - debug "Created dummy snapshot for mirror receive: %s" - (string_of_vdi_info dummy) ; - let _ = Local.VDI.attach3 dbg leaf_dp sr leaf.vdi (vm_of_s "0") true in - Local.VDI.activate3 dbg leaf_dp sr leaf.vdi (vm_of_s "0") ; - let nearest = - List.fold_left - (fun acc content_id -> - match acc with - | Some _ -> - acc - | None -> ( - try - Some - (List.find - (fun vdi -> - vdi.content_id = content_id - && vdi.virtual_size <= vdi_info.virtual_size - ) - vdis - ) - with Not_found -> None + | None -> + () + ) ; + State.remove_local_mirror id + | None -> + raise (Storage_interface.Storage_error (Does_not_exist ("mirror", id))) + + let stat ~dbg:_ ~id = + let recv_opt = State.find_active_receive_mirror id in + let send_opt = State.find_active_local_mirror id in + let copy_opt = State.find_active_copy id in + let open State in + let failed = + match send_opt with + | Some send_state -> + let failed = + match send_state.Send_state.tapdev with + | Some tapdev -> ( + try + let stats = Tapctl.stats (Tapctl.create ()) tapdev in + stats.Tapctl.Stats.nbd_mirror_failed = 1 + with _ -> + debug "Using cached copy of failure status" ; + send_state.Send_state.failed + ) + | None -> + false + in + send_state.Send_state.failed <- failed ; + failed + | None -> + false + in + let state = + (match recv_opt with Some _ -> [Mirror.Receiving] | None -> []) + @ (match send_opt with Some _ -> [Mirror.Sending] | None -> []) + @ match copy_opt with Some _ -> [Mirror.Copying] | None -> [] + in + if state = [] then raise (Storage_error (Does_not_exist ("mirror", id))) ; + let src, dst = + match (recv_opt, send_opt, copy_opt) with + | Some receive_state, _, _ -> + ( receive_state.Receive_state.remote_vdi + , receive_state.Receive_state.leaf_vdi ) - ) - None similar + | _, Some send_state, _ -> + let dst_vdi = + match send_state.Send_state.remote_info with + | Some remote_info -> + remote_info.Send_state.vdi + | None -> + Storage_interface.Vdi.of_string "" + in + (snd (of_mirror_id id), dst_vdi) + | _, _, Some copy_state -> + (snd (of_copy_id id), copy_state.Copy_state.copy_vdi) + | _ -> + failwith "Invalid" in - debug "Nearest VDI: content_id=%s vdi=%s" - (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) - (Option.fold ~none:"None" - ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) - nearest - ) ; - let parent = - match nearest with - | Some vdi -> - debug "Cloning VDI" ; - let vdi = add_to_sm_config vdi "base_mirror" id in - let vdi_clone = Local.VDI.clone dbg sr vdi in - debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; - ( if vdi_clone.virtual_size <> vdi_info.virtual_size then - let new_size = - Local.VDI.resize dbg sr vdi_clone.vdi vdi_info.virtual_size - in - debug "Resize local clone VDI to %Ld: result %Ld" - vdi_info.virtual_size new_size - ) ; - vdi_clone - | None -> - debug "Creating a blank remote VDI" ; - Local.VDI.create dbg sr vdi_info + {Mirror.source_vdi= src; dest_vdi= dst; state; failed} + + let list ~dbg = + let send_ops, recv_ops, copy_ops = State.map_of () in + let get_ids map = List.map fst map in + let ids = + get_ids send_ops @ get_ids recv_ops @ get_ids copy_ops + |> Listext.List.setify in - debug "Parent disk content_id=%s" parent.content_id ; - State.add id - State.( - Recv_op - Receive_state. - { - sr - ; dummy_vdi= dummy.vdi - ; leaf_vdi= leaf.vdi - ; leaf_dp - ; parent_vdi= parent.vdi - ; remote_vdi= vdi_info.vdi - } - ) ; - let nearest_content_id = Option.map (fun x -> x.content_id) nearest in - Mirror.Vhd_mirror - { - Mirror.mirror_vdi= leaf - ; mirror_datapath= leaf_dp - ; copy_diffs_from= nearest_content_id - ; copy_diffs_to= parent.vdi - ; dummy_vdi= dummy.vdi - } - with e -> + List.map (fun id -> (id, stat ~dbg ~id)) ids + + let killall ~dbg = + let send_ops, recv_ops, copy_ops = State.map_of () in List.iter - (fun op -> - try op () - with e -> - debug "Caught exception in on_fail: %s" (Printexc.to_string e) + (fun (id, send_state) -> + debug "Send in progress: %s" id ; + List.iter log_and_ignore_exn + [ + (fun () -> stop ~dbg ~id) + ; (fun () -> + Local.DP.destroy dbg send_state.State.Send_state.local_dp true + ) + ] ) - !on_fail ; - raise e - -let receive_finalize ~dbg ~id = - let recv_state = State.find_active_receive_mirror id in - let open State.Receive_state in - Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; - State.remove_receive_mirror id - -let receive_cancel ~dbg ~id = - let receive_state = State.find_active_receive_mirror id in - let open State.Receive_state in - Option.iter - (fun r -> - log_and_ignore_exn (fun () -> Local.DP.destroy dbg r.leaf_dp false) ; + send_ops ; + List.iter + (fun (id, copy_state) -> + debug "Copy in progress: %s" id ; + List.iter log_and_ignore_exn + [ + (fun () -> + Local.DP.destroy dbg copy_state.State.Copy_state.leaf_dp true + ) + ; (fun () -> + Local.DP.destroy dbg copy_state.State.Copy_state.base_dp true + ) + ] ; + let remote_url = + Storage_utils.connection_args_of_uri + ~verify_dest:copy_state.State.Copy_state.verify_dest + copy_state.State.Copy_state.remote_url + in + let module Remote = StorageAPI (Idl.Exn.GenClient (struct + let rpc = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url + end)) in + List.iter log_and_ignore_exn + [ + (fun () -> + Remote.DP.destroy dbg copy_state.State.Copy_state.remote_dp true + ) + ; (fun () -> + Remote.VDI.destroy dbg copy_state.State.Copy_state.dest_sr + copy_state.State.Copy_state.copy_vdi + ) + ] + ) + copy_ops ; + List.iter + (fun (id, _recv_state) -> + debug "Receive in progress: %s" id ; + log_and_ignore_exn (fun () -> Local.DATA.MIRROR.receive_cancel dbg id) + ) + recv_ops ; + State.clear () +end + +(** module [MigrateRemote] is similar to [MigrateLocal], but most of these functions +tend to be executed on the receiver side. *) +module MigrateRemote = struct + let receive_start ~dbg ~sr ~vdi_info ~id ~similar = + let on_fail : (unit -> unit) list ref = ref [] in + let vdis = Local.SR.scan dbg sr in + (* We drop cbt_metadata VDIs that do not have any actual data *) + let vdis = List.filter (fun vdi -> vdi.ty <> "cbt_metadata") vdis in + let leaf_dp = Local.DP.create dbg Uuidx.(to_string (make ())) in + try + let vdi_info = {vdi_info with sm_config= [("base_mirror", id)]} in + let leaf = Local.VDI.create dbg sr vdi_info in + info "Created leaf VDI for mirror receive: %s" (string_of_vdi_info leaf) ; + on_fail := (fun () -> Local.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; + let dummy = Local.VDI.snapshot dbg sr leaf in + on_fail := (fun () -> Local.VDI.destroy dbg sr dummy.vdi) :: !on_fail ; + debug "Created dummy snapshot for mirror receive: %s" + (string_of_vdi_info dummy) ; + let _ = Local.VDI.attach3 dbg leaf_dp sr leaf.vdi (vm_of_s "0") true in + Local.VDI.activate3 dbg leaf_dp sr leaf.vdi (vm_of_s "0") ; + let nearest = + List.fold_left + (fun acc content_id -> + match acc with + | Some _ -> + acc + | None -> ( + try + Some + (List.find + (fun vdi -> + vdi.content_id = content_id + && vdi.virtual_size <= vdi_info.virtual_size + ) + vdis + ) + with Not_found -> None + ) + ) + None similar + in + debug "Nearest VDI: content_id=%s vdi=%s" + (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) + (Option.fold ~none:"None" + ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) + nearest + ) ; + let parent = + match nearest with + | Some vdi -> + debug "Cloning VDI" ; + let vdi = add_to_sm_config vdi "base_mirror" id in + let vdi_clone = Local.VDI.clone dbg sr vdi in + debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; + ( if vdi_clone.virtual_size <> vdi_info.virtual_size then + let new_size = + Local.VDI.resize dbg sr vdi_clone.vdi vdi_info.virtual_size + in + debug "Resize local clone VDI to %Ld: result %Ld" + vdi_info.virtual_size new_size + ) ; + vdi_clone + | None -> + debug "Creating a blank remote VDI" ; + Local.VDI.create dbg sr vdi_info + in + debug "Parent disk content_id=%s" parent.content_id ; + State.add id + State.( + Recv_op + Receive_state. + { + sr + ; dummy_vdi= dummy.vdi + ; leaf_vdi= leaf.vdi + ; leaf_dp + ; parent_vdi= parent.vdi + ; remote_vdi= vdi_info.vdi + } + ) ; + let nearest_content_id = Option.map (fun x -> x.content_id) nearest in + Mirror.Vhd_mirror + { + Mirror.mirror_vdi= leaf + ; mirror_datapath= leaf_dp + ; copy_diffs_from= nearest_content_id + ; copy_diffs_to= parent.vdi + ; dummy_vdi= dummy.vdi + } + with e -> List.iter - (fun v -> log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr v)) - [r.dummy_vdi; r.leaf_vdi; r.parent_vdi] - ) - receive_state ; - State.remove_receive_mirror id + (fun op -> + try op () + with e -> + debug "Caught exception in on_fail: %s" (Printexc.to_string e) + ) + !on_fail ; + raise e + + let receive_finalize ~dbg ~id = + let recv_state = State.find_active_receive_mirror id in + let open State.Receive_state in + Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; + State.remove_receive_mirror id + + let receive_cancel ~dbg ~id = + let receive_state = State.find_active_receive_mirror id in + let open State.Receive_state in + Option.iter + (fun r -> + log_and_ignore_exn (fun () -> Local.DP.destroy dbg r.leaf_dp false) ; + List.iter + (fun v -> log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr v)) + [r.dummy_vdi; r.leaf_vdi; r.parent_vdi] + ) + receive_state ; + State.remove_receive_mirror id +end exception Timeout of Mtime.Span.t @@ -1231,111 +1347,6 @@ let nbd_handler req s sr vdi dp = | None -> () -(** [copy_into_sr] does not requires a dest vdi to be provided, instead, it will - find the nearest vdi on the [dest] sr, and if there is no such vdi, it will - create one. *) -let copy_into_sr ~task ~dbg ~sr ~vdi ~url ~dest ~verify_dest = - debug "copy sr:%s vdi:%s url:%s dest:%s verify_dest:%B" - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - url - (Storage_interface.Sr.string_of dest) - verify_dest ; - let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url - end)) in - (* Find the local VDI *) - try - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> failwith (Printf.sprintf "Local VDI not found") - in - try - let similar_vdis = Local.VDI.similar_content dbg sr vdi in - let similars = List.map (fun vdi -> vdi.content_id) similar_vdis in - debug "Similar VDIs = [ %s ]" - (String.concat "; " - (List.map - (fun x -> - Printf.sprintf "(vdi=%s,content_id=%s)" - (Storage_interface.Vdi.string_of x.vdi) - x.content_id - ) - similar_vdis - ) - ) ; - let remote_vdis = Remote.SR.scan dbg dest in - (* We drop cbt_metadata VDIs that do not have any actual data *) - let remote_vdis = - List.filter (fun vdi -> vdi.ty <> "cbt_metadata") remote_vdis - in - let nearest = - List.fold_left - (fun acc content_id -> - match acc with - | Some _ -> - acc - | None -> ( - try - Some - (List.find - (fun vdi -> - vdi.content_id = content_id - && vdi.virtual_size <= local_vdi.virtual_size - ) - remote_vdis - ) - with Not_found -> None - ) - ) - None similars - in - debug "Nearest VDI: content_id=%s vdi=%s" - (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) - (Option.fold ~none:"None" - ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) - nearest - ) ; - let remote_base = - match nearest with - | Some vdi -> - debug "Cloning VDI" ; - let vdi_clone = Remote.VDI.clone dbg dest vdi in - debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; - ( if vdi_clone.virtual_size <> local_vdi.virtual_size then - let new_size = - Remote.VDI.resize dbg dest vdi_clone.vdi - local_vdi.virtual_size - in - debug "Resize remote clone VDI to %Ld: result %Ld" - local_vdi.virtual_size new_size - ) ; - vdi_clone - | None -> - debug "Creating a blank remote VDI" ; - Remote.VDI.create dbg dest {local_vdi with sm_config= []} - in - let remote_copy = - copy_into_vdi ~task ~dbg ~sr ~vdi ~url ~dest ~dest_vdi:remote_base.vdi - ~verify_dest - |> vdi_info - in - let snapshot = Remote.VDI.snapshot dbg dest remote_copy in - Remote.VDI.destroy dbg dest remote_copy.vdi ; - Some (Vdi_info snapshot) - with e -> - error "Caught %s: copying snapshots vdi" (Printexc.to_string e) ; - raise (Storage_error (Internal_error (Printexc.to_string e))) - with - | Storage_error (Backend_error (code, params)) - | Api_errors.Server_error (code, params) -> - raise (Storage_error (Backend_error (code, params))) - | e -> - raise (Storage_error (Internal_error (Printexc.to_string e))) - let with_task_and_thread ~dbg f = let task = Storage_task.add tasks dbg.Debug_info.log (fun task -> @@ -1360,17 +1371,44 @@ let with_task_and_thread ~dbg f = in Storage_task.id_of_handle task -let start ~dbg ~sr ~vdi ~dp ~url ~dest ~verify_dest = +(* The following functions acts as wrappers of the migration part of SMAPIv2. Some of + them are just direct calling of the functions inside the Migrate module. Leave it + this way so that they all stay in one place rather than being spread around the + file. *) + +let copy ~dbg ~sr ~vdi ~url ~dest ~verify_dest = with_task_and_thread ~dbg (fun task -> - start' ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~url ~dest ~verify_dest + MigrateLocal.copy_into_sr ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~url + ~dest ~verify_dest ) -let copy ~dbg ~sr ~vdi ~url ~dest ~verify_dest = +let start ~dbg ~sr ~vdi ~dp ~url ~dest ~verify_dest = with_task_and_thread ~dbg (fun task -> - copy_into_sr ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~url ~dest + MigrateLocal.start ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~url ~dest ~verify_dest ) +(* XXX: PR-1255: copy the xenopsd 'raise Exception' pattern *) +let stop ~dbg ~id = + try MigrateLocal.stop ~dbg ~id with + | Storage_error (Backend_error (code, params)) + | Api_errors.Server_error (code, params) -> + raise (Storage_error (Backend_error (code, params))) + | e -> + raise e + +let list = MigrateLocal.list + +let killall = MigrateLocal.killall + +let stat = MigrateLocal.stat + +let receive_start = MigrateRemote.receive_start + +let receive_finalize = MigrateRemote.receive_finalize + +let receive_cancel = MigrateRemote.receive_cancel + (* The remote end of this call, SR.update_snapshot_info_dest, is implemented in * the SMAPIv1 section of storage_migrate.ml. It needs to access the setters * for snapshot_of, snapshot_time and is_a_snapshot, which we don't want to add From 4ca9fa34ab0d9cbed31f439cc963f0649a98e5d9 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 18 Oct 2024 19:11:48 +0100 Subject: [PATCH 024/121] IH-728: Be more explicit about `mirror_id` and log messages Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 297601da1f1..3609a8afb8b 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -738,6 +738,9 @@ module MigrateLocal = struct in let id = State.mirror_id_of (sr, local_vdi.vdi) in debug "Adding to active local mirrors before sending: id=%s" id ; + let mirror_id = State.mirror_id_of (sr, local_vdi.vdi) in + debug "%s: Adding to active local mirrors before sending: id=%s" + __FUNCTION__ mirror_id ; let alm = State.Send_state. { @@ -751,8 +754,8 @@ module MigrateLocal = struct } in - State.add id (State.Send_op alm) ; - debug "Added" ; + State.add mirror_id (State.Send_op alm) ; + debug "%s Added mirror %s to active local mirrors" __FUNCTION__ mirror_id ; (* A list of cleanup actions to perform if the operation should fail. *) let on_fail : (unit -> unit) list ref = ref [] in try @@ -774,7 +777,7 @@ module MigrateLocal = struct ) ) ; let result_ty = - Remote.DATA.MIRROR.receive_start dbg dest local_vdi id similars + Remote.DATA.MIRROR.receive_start dbg dest local_vdi mirror_id similars in let result = match result_ty with Mirror.Vhd_mirror x -> x in (* Enable mirroring on the local machine *) @@ -796,7 +799,7 @@ module MigrateLocal = struct debug "Searching for data path: %s" dp ; let attach_info = Local.DP.attach_info "nbd" sr vdi dp in on_fail := - (fun () -> Remote.DATA.MIRROR.receive_cancel dbg id) :: !on_fail ; + (fun () -> Remote.DATA.MIRROR.receive_cancel dbg mirror_id) :: !on_fail ; let tapdev = match tapdisk_of_attach_info attach_info with | Some tapdev -> @@ -829,7 +832,7 @@ module MigrateLocal = struct | None -> failwith "Not attached" in - debug "Updating active local mirrors: id=%s" id ; + debug "%s Updating active local mirrors: id=%s" __FUNCTION__ mirror_id ; let alm = State.Send_state. { @@ -850,12 +853,14 @@ module MigrateLocal = struct } in - State.add id (State.Send_op alm) ; - debug "Updated" ; + State.add mirror_id (State.Send_op alm) ; + debug "%s Updated mirror_id %s in the active local mirror" __FUNCTION__ + mirror_id ; + SXM.info "%s About to snapshot VDI = %s" __FUNCTION__ (string_of_vdi_info local_vdi) ; let local_vdi = add_to_sm_config local_vdi "mirror" ("nbd:" ^ dp) in - let local_vdi = add_to_sm_config local_vdi "base_mirror" id in + let local_vdi = add_to_sm_config local_vdi "base_mirror" mirror_id in let snapshot = try Local.VDI.snapshot dbg sr local_vdi with | Storage_interface.Storage_error (Backend_error (code, _)) @@ -875,13 +880,13 @@ module MigrateLocal = struct (Storage_interface.Vdi.string_of local_vdi.vdi) ; on_fail := (fun () -> Local.VDI.destroy dbg sr snapshot.vdi) :: !on_fail ; (let rec inner () = - let alm_opt = State.find_active_local_mirror id in + let alm_opt = State.find_active_local_mirror mirror_id in match alm_opt with | Some alm -> let stats = Tapctl.stats (Tapctl.create ()) tapdev in if stats.Tapctl.Stats.nbd_mirror_failed = 1 then ( error "Tapdisk mirroring has failed" ; - Updates.add (Dynamic.Mirror id) updates + Updates.add (Dynamic.Mirror mirror_id) updates ) ; alm.State.Send_state.watchdog <- Some @@ -893,7 +898,7 @@ module MigrateLocal = struct in inner () ) ; - on_fail := (fun () -> Local.DATA.MIRROR.stop dbg id) :: !on_fail ; + on_fail := (fun () -> Local.DATA.MIRROR.stop dbg mirror_id) :: !on_fail ; (* Copy the snapshot to the remote *) let new_parent = Storage_task.with_subtask task "copy" (fun () -> @@ -916,7 +921,7 @@ module MigrateLocal = struct Remote.VDI.destroy dbg dest result.Mirror.dummy_vdi ; debug "Destroying snapshot on src" ; Local.VDI.destroy dbg sr snapshot.vdi ; - Some (Mirror_id id) + Some (Mirror_id mirror_id) with | Storage_error (Sr_not_attached sr_uuid) -> error " Caught exception %s:%s. Performing cleanup." From 20fb26595944080c7615b07256fa4fa7362d985a Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 25 Oct 2024 12:24:56 +0100 Subject: [PATCH 025/121] IH-728: Refactor tracing logic Previously a task was constructed based on the log and tracing of a dbg of the type Debug_info.t, and then later on a dbg is constructed based on the previously constructued task. Instead of that, just convert the first dbg into a string and thread it through the call. Signed-off-by: Vincent Liu --- ocaml/xapi/storage_migrate.ml | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 3609a8afb8b..3279846a5a5 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -456,12 +456,6 @@ let add_to_sm_config vdi_info key value = let vdi_info = remove_from_sm_config vdi_info key in {vdi_info with sm_config= (key, value) :: vdi_info.sm_config} -let dbg_and_tracing_of_task task = - Debug_info.make - ~log:(Storage_task.get_dbg task) - ~tracing:(Storage_task.tracing task) - |> Debug_info.to_string - (** This module [MigrateLocal] consists of the concrete implementations of the migration part of SMAPI. Functions inside this module are sender driven, which means they tend to be executed on the sender side. although there is not a hard rule @@ -716,7 +710,7 @@ module MigrateLocal = struct | e -> raise (Storage_error (Internal_error (Printexc.to_string e))) - let start ~task ~dbg:_ ~sr ~vdi ~dp ~url ~dest ~verify_dest = + let start ~task ~dbg ~sr ~vdi ~dp ~url ~dest ~verify_dest = SXM.info "%s sr:%s vdi:%s url:%s dest:%s verify_dest:%B" __FUNCTION__ (Storage_interface.Sr.string_of sr) (Storage_interface.Vdi.string_of vdi) @@ -730,7 +724,6 @@ module MigrateLocal = struct (Storage_utils.connection_args_of_uri ~verify_dest url) end)) in (* Find the local VDI *) - let dbg = dbg_and_tracing_of_task task in let vdis = Local.SR.scan dbg sr in let local_vdi = try List.find (fun x -> x.vdi = vdi) vdis @@ -1383,14 +1376,14 @@ let with_task_and_thread ~dbg f = let copy ~dbg ~sr ~vdi ~url ~dest ~verify_dest = with_task_and_thread ~dbg (fun task -> - MigrateLocal.copy_into_sr ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~url - ~dest ~verify_dest + MigrateLocal.copy_into_sr ~task ~dbg:(Debug_info.to_string dbg) ~sr ~vdi + ~url ~dest ~verify_dest ) let start ~dbg ~sr ~vdi ~dp ~url ~dest ~verify_dest = with_task_and_thread ~dbg (fun task -> - MigrateLocal.start ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~url ~dest - ~verify_dest + MigrateLocal.start ~task ~dbg:(Debug_info.to_string dbg) ~sr ~vdi ~dp ~url + ~dest ~verify_dest ) (* XXX: PR-1255: copy the xenopsd 'raise Exception' pattern *) From 2c99f2da9581a44d23badbb2450cc7b3598d56ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 25 Oct 2024 15:52:42 +0100 Subject: [PATCH 026/121] CA-400199: open /dev/urandom on first use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2 recent optimizations have changed the Uuidx module to open /dev/urandom once on startup, instead of every time a value was requested. However 'networkd_db' runs in the installer environment, inside a chroot where /dev/urandom is not available. Open /dev/urandom on first use instead. Simplify the code and use a single implementation for both fast and secure urandom generation: * use a mutex to protect accesses to global urandom state * use an input channel, rather than a Unix file descriptor, this allows us to read many bytes in one go, and then generate multiple random numbers without having to make syscalls that often (syscalls are slow in this case because they require releasing the runtime mutex, which gives another thread the opportunity to run for 50ms). Fixes: a0176da73 ("CP-49135: open /dev/urandom just once") Fixes: a2d9fbe39 ("IH-577 Implement v7 UUID generation") Fixes: 6635a00d6 ("CP-49136: Introduce PRNG for generating non-secret UUIDs") This is slightly slower than before, but still fast enough: ``` │ uuidx creation/Uuidx.make │ 0.0004 mjw/run│ 16.0001 mnw/run│ 105.8801 ns/run│ │ uuidx creation/Uuidx.make_uuid_urnd │ 0.0004 mjw/run│ 16.0001 mnw/run│ 105.1474 ns/run│ ``` Previously this used to take ~88ns, so in fact the difference is barely noticable. Also remove the feature flag: the previous change was feature flagged too, but broke master anyway (I wouldn't have though anything *doesn't* have /dev/urandom available, and didn't feature flag that part, because in general it is not possible to feature flag startup code without races) Signed-off-by: Edwin Török --- ocaml/forkexecd/test/fe_test.ml | 2 +- ocaml/libs/uuid/uuidx.ml | 71 ++++++++++++++------------------- ocaml/libs/uuid/uuidx.mli | 5 --- ocaml/tests/bench/bench_uuid.ml | 2 - ocaml/xapi/xapi_globs.ml | 6 --- quality-gate.sh | 2 +- 6 files changed, 33 insertions(+), 55 deletions(-) diff --git a/ocaml/forkexecd/test/fe_test.ml b/ocaml/forkexecd/test/fe_test.ml index 1c5e46bc1f9..870ac591601 100644 --- a/ocaml/forkexecd/test/fe_test.ml +++ b/ocaml/forkexecd/test/fe_test.ml @@ -292,7 +292,7 @@ let slave = function (* Printf.fprintf stderr "%s %d\n" total_fds (List.length present - 1) *) - if total_fds + 1 (* Uuid.dev_urandom *) <> List.length filtered then + if total_fds <> List.length filtered then fail "Expected %d fds; /proc/self/fd has %d: %s" total_fds (List.length filtered) ls diff --git a/ocaml/libs/uuid/uuidx.ml b/ocaml/libs/uuid/uuidx.ml index 65392ef4485..7bcb74aae04 100644 --- a/ocaml/libs/uuid/uuidx.ml +++ b/ocaml/libs/uuid/uuidx.ml @@ -116,48 +116,39 @@ let is_uuid str = match of_string str with None -> false | Some _ -> true let dev_urandom = "/dev/urandom" -let dev_urandom_fd = Unix.openfile dev_urandom [Unix.O_RDONLY] 0o640 -(* we can't close this in at_exit, because Crowbar runs at_exit, and - it'll fail because this FD will then be closed -*) - -let read_bytes dev n = - let buf = Bytes.create n in - let read = Unix.read dev buf 0 n in - if read <> n then - raise End_of_file - else - Bytes.to_string buf - -let make_uuid_urnd () = of_bytes (read_bytes dev_urandom_fd 16) |> Option.get - -(* State for random number generation. Random.State.t isn't thread safe, so - only use this via with_non_csprng_state, which takes care of this. -*) -let rstate = Random.State.make_self_init () - -let rstate_m = Mutex.create () - -let with_non_csprng_state = - (* On OCaml 5 we could use Random.State.split instead, - and on OCaml 4 the mutex may not be strictly needed - *) - let finally () = Mutex.unlock rstate_m in - fun f -> - Mutex.lock rstate_m ; - Fun.protect ~finally (f rstate) - -(** Use non-CSPRNG by default, for CSPRNG see {!val:make_uuid_urnd} *) -let make_uuid_fast () = with_non_csprng_state Uuidm.v4_gen - -let make_default = ref make_uuid_urnd - -let make () = !make_default () +let generate = + let mutex = Mutex.create () in + let dev_urandom_ic = ref None in + let finally () = Mutex.unlock mutex in + let with_mutex fn = Mutex.lock mutex ; Fun.protect ~finally fn in + let close_ic () = + with_mutex @@ fun () -> + !dev_urandom_ic |> Option.iter close_in_noerr ; + dev_urandom_ic := None + in + fun n -> + with_mutex @@ fun () -> + let ic = + match !dev_urandom_ic with + | None -> + let ic = open_in_bin dev_urandom in + at_exit close_ic ; + dev_urandom_ic := Some ic ; + ic + | Some ic -> + ic + in + really_input_string ic n + +let make_uuid_urnd () = of_bytes (generate 16) |> Option.get + +let make_uuid_fast = make_uuid_urnd + +let make = make_uuid_urnd let make_v7_uuid_from_parts time_ns rand_b = Uuidm.v7_ns ~time_ns ~rand_b -let rand64 () = - with_non_csprng_state (fun rstate () -> Random.State.bits64 rstate) +let rand64 () = String.get_int64_ne (generate 8) 0 let now_ns = let start = Mtime_clock.counter () in @@ -174,7 +165,7 @@ let make_v7_uuid () = make_v7_uuid_from_parts (now_ns ()) (rand64 ()) type cookie = string let make_cookie () = - read_bytes dev_urandom_fd 64 + generate 64 |> String.to_seq |> Seq.map (fun c -> Printf.sprintf "%1x" (int_of_char c)) |> List.of_seq diff --git a/ocaml/libs/uuid/uuidx.mli b/ocaml/libs/uuid/uuidx.mli index 1e1ebc3251c..8561a975cc1 100644 --- a/ocaml/libs/uuid/uuidx.mli +++ b/ocaml/libs/uuid/uuidx.mli @@ -194,8 +194,3 @@ module Hash : sig (* UUID Version 5 derived from argument string and namespace UUID *) val string : string -> [< not_secret] t end - -(**/**) - -(* just for feature flag, to be removed *) -val make_default : (unit -> [< not_secret] t) ref diff --git a/ocaml/tests/bench/bench_uuid.ml b/ocaml/tests/bench/bench_uuid.ml index a04ff192d76..f13118e48db 100644 --- a/ocaml/tests/bench/bench_uuid.ml +++ b/ocaml/tests/bench/bench_uuid.ml @@ -1,7 +1,5 @@ open Bechamel -let () = Uuidx.make_default := Uuidx.make_uuid_fast - let benchmarks = Test.make_grouped ~name:"uuidx creation" [ diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 5407faf3bf4..9a461a4e7bb 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1612,12 +1612,6 @@ let other_options = , (fun () -> string_of_bool !disable_webserver) , "Disable the host webserver" ) - ; ( "use-prng-uuid-gen" - (* eventually this'll be the default, except for Sessions *) - , Arg.Unit (fun () -> Uuidx.make_default := Uuidx.make_uuid_fast) - , (fun () -> !Uuidx.make_default == Uuidx.make_uuid_fast |> string_of_bool) - , "Use PRNG based UUID generator instead of CSPRNG" - ) ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. diff --git a/quality-gate.sh b/quality-gate.sh index c1d122efd72..16a90270b17 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -40,7 +40,7 @@ mli-files () { } structural-equality () { - N=10 + N=9 EQ=$(git grep -r --count ' == ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$EQ" -eq "$N" ]; then echo "OK counted $EQ usages of ' == '" From 760c355e0aa685b58198cd46d3aeebabf00d03ef Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 22 Oct 2024 17:17:06 +0100 Subject: [PATCH 027/121] CP-51938: Generate XML alert for cluster health Signed-off-by: Vincent Liu --- ocaml/xapi/xapi_cluster_helpers.ml | 60 +++++++++++++++++++----------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/ocaml/xapi/xapi_cluster_helpers.ml b/ocaml/xapi/xapi_cluster_helpers.ml index 2582790e929..954b946b0fa 100644 --- a/ocaml/xapi/xapi_cluster_helpers.ml +++ b/ocaml/xapi/xapi_cluster_helpers.ml @@ -112,35 +112,45 @@ let corosync3_enabled ~__context = let maybe_generate_alert ~__context ~num_hosts ~hosts_left ~hosts_joined ~quorum = let generate_alert join cluster_host = + let generate_alert_body host num_hosts quorum join = + let num_hosts = string_of_int num_hosts in + let quorum = string_of_int quorum in + let msg = + if join then + "Host has joined the cluster" + else + "Host has left the cluster" + in + String.concat "" + [ + "" + ; msg + ; "" + ; host + ; "" + ; "" + ; num_hosts + ; "" + ; "" + ; quorum + ; "" + ; "" + ] + in let host = Db.Cluster_host.get_host ~__context ~self:cluster_host in let host_uuid = Db.Host.get_uuid ~__context ~self:host in let host_name = Db.Host.get_name_label ~__context ~self:host in let body, name, priority = + let body = generate_alert_body host_name num_hosts quorum join in match join with | true -> - let body = - Printf.sprintf - "Host %s has joined the cluster, there are now %d host(s) in \ - cluster and %d host(s) are required to form a quorum" - host_name num_hosts quorum - in let name, priority = Api_messages.cluster_host_joining in (body, name, priority) | false -> - let body = - Printf.sprintf - "Host %s has left the cluster, there are now %d host(s) in \ - cluster and %d host(s) are required to form a quorum" - host_name num_hosts quorum - in let name, priority = Api_messages.cluster_host_leaving in (body, name, priority) in - Helpers.call_api_functions ~__context (fun rpc session_id -> - ignore - @@ Client.Client.Message.create ~rpc ~session_id ~name ~priority - ~cls:`Host ~obj_uuid:host_uuid ~body - ) + Xapi_alert.add ~msg:(name, priority) ~cls:`Host ~obj_uuid:host_uuid ~body in List.iter (generate_alert false) hosts_left ; List.iter (generate_alert true) hosts_joined ; @@ -150,10 +160,18 @@ let maybe_generate_alert ~__context ~num_hosts ~hosts_left ~hosts_joined ~quorum let pool_uuid = Db.Pool.get_uuid ~__context ~self:pool in let name, priority = Api_messages.cluster_quorum_approaching_lost in let body = - Printf.sprintf - "The cluster is losing quorum: currently %d host(s), need %d host(s) \ - for a quorum" - num_hosts quorum + String.concat "" + [ + "" + ; "Cluster is losing quorum" + ; "" + ; string_of_int num_hosts + ; "" + ; "" + ; string_of_int quorum + ; "" + ; "" + ] in Helpers.call_api_functions ~__context (fun rpc session_id -> ignore From c122bc404c4560a323cd6413de45ea9711a873a1 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Tue, 29 Oct 2024 04:20:18 +0000 Subject: [PATCH 028/121] CP-50546: Remove initscripts family initscripts family are legacy and want to be removed `service iptables save` call /usr/libexec/initscripts/legacy-actions/iptables/save, which call `exec /usr/libexec/iptables/iptables.init save`, to save iptables rules and remove initscripts, we call following directly `/usr/libexec/iptables/iptables.init save` `service` command are also updated to `systemctl` Signed-off-by: Lin Liu --- scripts/plugins/firewall-port | 4 ++-- scripts/xe-syslog-reconfigure | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/plugins/firewall-port b/scripts/plugins/firewall-port index 820a0608d94..b06707dbd28 100644 --- a/scripts/plugins/firewall-port +++ b/scripts/plugins/firewall-port @@ -37,14 +37,14 @@ case "${OP}" in iptables -I INPUT -j "${CHAIN}" fi # asuume chain is used if it exists iptables -I "${CHAIN}" $RULE - service iptables save + /usr/libexec/iptables/iptables.init save fi ;; close) if iptables -C $CHAIN $RULE 2>/dev/null then # close port if it was opened iptables -D $CHAIN $RULE - service iptables save + /usr/libexec/iptables/iptables.init save fi ;; check) diff --git a/scripts/xe-syslog-reconfigure b/scripts/xe-syslog-reconfigure index f9e7d3bd649..cc64a303044 100644 --- a/scripts/xe-syslog-reconfigure +++ b/scripts/xe-syslog-reconfigure @@ -42,4 +42,4 @@ else fi [ -s /etc/syslog.$$ ] && mv -f /etc/syslog.$$ $conf_file -service $service restart +systemctl restart $service From 36f9993f4b9e964fe2d56bc78bf88a9efda896bd Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 29 Oct 2024 13:06:49 +0000 Subject: [PATCH 029/121] Remove notion of weird string from sexpr library Removes the remnants of an escaping mechanism once used in xapi. It appears that the format "< --- ocaml/libs/sexpr/sExpr.ml | 35 +++----------------------------- ocaml/libs/sexpr/sExpr.mli | 8 +------- ocaml/libs/sexpr/sExprLexer.mli | 7 ------- ocaml/libs/sexpr/sExprLexer.mll | 7 +------ ocaml/libs/sexpr/sExprParser.mly | 14 ++++--------- 5 files changed, 9 insertions(+), 62 deletions(-) delete mode 100644 ocaml/libs/sexpr/sExprLexer.mli diff --git a/ocaml/libs/sexpr/sExpr.ml b/ocaml/libs/sexpr/sExpr.ml index ec354e373b1..488142898c2 100644 --- a/ocaml/libs/sexpr/sExpr.ml +++ b/ocaml/libs/sexpr/sExpr.ml @@ -11,11 +11,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. *) -type t = - | Node of t list - | Symbol of string - | String of string - | WeirdString of string * string +type t = Node of t list | Symbol of string | String of string let unescape_buf buf s = let aux esc = function @@ -84,31 +80,13 @@ let string_of sexpr = List.iter (fun i -> Buffer.add_char buf ' ' ; __string_of_rec i) l ) ; Buffer.add_char buf ')' - | Symbol s | String s | WeirdString (_, s) -> + | Symbol s | String s -> Buffer.add_string buf "\'" ; Buffer.add_string buf (escape s) ; Buffer.add_string buf "\'" in __string_of_rec sexpr ; Buffer.contents buf -let weird_of_string x = - let random_chars = "abcdefghijklmnopqrstuvwxyz" in - let randchar () = - String.sub random_chars (Random.int (String.length random_chars)) 1 - in - (* true if the parent string contains child as a substring, starting the - search forward from offset *) - let rec has_substring parent offset child = - String.length parent - offset >= String.length child - && (String.sub parent offset (String.length child) = child - || has_substring parent (offset + 1) child - ) - in - let rec find delim = - if has_substring x 0 delim then find (delim ^ randchar ()) else delim - in - WeirdString (find "xxx", x) - let rec output_fmt ff = function | Node list -> let rec aux ?(first = true) = function @@ -121,12 +99,5 @@ let rec output_fmt ff = function aux ~first t in Format.fprintf ff "@[(" ; aux list ; Format.fprintf ff ")@]" - | Symbol s | String s | WeirdString (_, s) -> + | Symbol s | String s -> Format.fprintf ff "\"%s\"" (escape s) - -(* - | Symbol s -> - Format.fprintf ff "%s" s - | WeirdString(tag, s) -> - Format.fprintf ff "<<%s<%s<%s<" tag s tag -*) diff --git a/ocaml/libs/sexpr/sExpr.mli b/ocaml/libs/sexpr/sExpr.mli index 28c3b8219cb..e7ab5c68a1a 100644 --- a/ocaml/libs/sexpr/sExpr.mli +++ b/ocaml/libs/sexpr/sExpr.mli @@ -11,16 +11,10 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. *) -type t = - | Node of t list - | Symbol of string - | String of string - | WeirdString of string * string +type t = Node of t list | Symbol of string | String of string val mkstring : string -> t val string_of : t -> string -val weird_of_string : string -> t - val output_fmt : Format.formatter -> t -> unit diff --git a/ocaml/libs/sexpr/sExprLexer.mli b/ocaml/libs/sexpr/sExprLexer.mli deleted file mode 100644 index 8d017ea982d..00000000000 --- a/ocaml/libs/sexpr/sExprLexer.mli +++ /dev/null @@ -1,7 +0,0 @@ -val line : int ref - -val __ocaml_lex_tables : Lexing.lex_tables - -val token : Lexing.lexbuf -> SExprParser.token - -val __ocaml_lex_token_rec : Lexing.lexbuf -> int -> SExprParser.token diff --git a/ocaml/libs/sexpr/sExprLexer.mll b/ocaml/libs/sexpr/sExprLexer.mll index 94d72de1935..bc674d77103 100644 --- a/ocaml/libs/sexpr/sExprLexer.mll +++ b/ocaml/libs/sexpr/sExprLexer.mll @@ -1,14 +1,9 @@ { open SExprParser - let line = ref 1 } rule token = parse - | [' ' '\t' '\r'] { token lexbuf } - | ';' [^ '\n']* '\n' { incr line; token lexbuf } - | '\n' { incr line; token lexbuf } - | "<<" ([^ '<']+ as tag1) '<' ([^ '<']* as s) '<' ([^ '<']+ as tag2) '<' - { if tag1=tag2 then WEIRD(tag1, s) else invalid_arg "Weird tag" } + | [' ' '\t' '\r' '\n']+ | ';' [^ '\n']* '\n' { token lexbuf } | '"' (([^ '"' '\\'] | ('\\' _))* as s) '"' { STRING s } | '\'' (([^ '\'' '\\'] | ('\\' _))* as s) '\'' { STRING s } | [^ '"' ' ' '\t' '\n' '(' ')']+ as s { SYMBOL s } diff --git a/ocaml/libs/sexpr/sExprParser.mly b/ocaml/libs/sexpr/sExprParser.mly index a18a62bd7e5..3dbceb467af 100644 --- a/ocaml/libs/sexpr/sExprParser.mly +++ b/ocaml/libs/sexpr/sExprParser.mly @@ -1,17 +1,11 @@ %token SYMBOL STRING -%token WEIRD %token OPEN CLOSE -%start expr -%type expr +%start expr %% -expr_list: { [] } -| expr expr_list { $1 :: $2 }; - expr: -| OPEN expr_list CLOSE { SExpr.Node $2 } -| SYMBOL { SExpr.Symbol $1 } -| STRING { SExpr.mkstring $1 } -| WEIRD { (fun (tag, s) -> SExpr.WeirdString(tag, s)) $1 }; +| OPEN es = list(expr) CLOSE { SExpr.Node es } +| s = SYMBOL { SExpr.Symbol s } +| s = STRING { SExpr.mkstring s } From a897b53abca2c0dc1f71967a914bd10a60a6299e Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 25 Oct 2024 15:10:38 +0100 Subject: [PATCH 030/121] CA-391651: Use per-datasource last_updated timestamp during updating and archiving This ensures that the interval between the current timestamp and previous update is calculated correctly. Keep the same external interface through rrdd_http_handler by adding an 'internal' parameter to xml exporting functions. Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 40 +++++++++++++++++--------- ocaml/libs/xapi-rrd/unix/rrd_unix.ml | 7 +++-- ocaml/libs/xapi-rrd/unix/rrd_unix.mli | 13 ++++++--- ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml | 5 ++-- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 4 ++- ocaml/xcp-rrdd/bin/rrddump/rrddump.ml | 5 ++-- 6 files changed, 48 insertions(+), 26 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 176cd36289a..962353971ac 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -129,6 +129,7 @@ type ds = { ; mutable ds_value: float (** Current calculated rate of the PDP *) ; mutable ds_unknown_sec: float (** Number of seconds that are unknown in the current PDP *) + ; mutable ds_last_updated: float (** Last time this datasource was updated *) } [@@deriving rpc] @@ -202,6 +203,7 @@ let copy_ds x = ; ds_last= x.ds_last ; ds_value= x.ds_value ; ds_unknown_sec= x.ds_unknown_sec + ; ds_last_updated= x.ds_last_updated } let copy_rrd x = @@ -379,12 +381,14 @@ let ds_update rrd timestamp valuesandtransforms new_rrd = plugin, correspondingly they all have the same timestamp. Further refactoring is needed if timestamps per measurement are to be introduced. *) - let interval = timestamp -. rrd.last_updated in + let first_ds_index, _ = valuesandtransforms.(0) in + let last_updated = rrd.rrd_dss.(first_ds_index).ds_last_updated in + let interval = timestamp -. last_updated in (* Work around the clock going backwards *) let interval = if interval < 0. then 5. else interval in (* start time (st) and age of the last processed pdp and the currently occupied one *) - let proc_pdp_st, _proc_pdp_age = get_times rrd.last_updated rrd.timestep in + let proc_pdp_st, _proc_pdp_age = get_times last_updated rrd.timestep in let occu_pdp_st, occu_pdp_age = get_times timestamp rrd.timestep in (* The number of pdps that should result from this update *) @@ -412,6 +416,7 @@ let ds_update rrd timestamp valuesandtransforms new_rrd = Array.map (fun (i, {value; _}) -> let v = process_ds_value rrd.rrd_dss.(i) value interval new_rrd in + rrd.rrd_dss.(i).ds_last_updated <- timestamp ; (i, v) ) valuesandtransforms @@ -548,6 +553,7 @@ let ds_create name ty ?(min = neg_infinity) ?(max = infinity) ?(mrhb = infinity) ; ds_last= init ; ds_value= 0.0 ; ds_unknown_sec= 0.0 + ; ds_last_updated= 0.0 } let rrd_create dss rras timestep timestamp = @@ -706,11 +712,11 @@ let from_xml input = let read_header i = ignore (get_el "version" i) ; let step = get_el "step" i in - let last_update = get_el "lastupdate" i in + let last_update = float_of_string (get_el "lastupdate" i) in (step, last_update) in - let read_dss i = + let read_dss i rrd_last_update = let read_ds i = read_block "ds" (fun i -> @@ -722,6 +728,10 @@ let from_xml input = ignore (get_el "last_ds" i) ; let value = get_el "value" i in let unknown_sec = get_el "unknown_sec" i in + let last_updated = + try float_of_string (get_el "last_updated" i) + with _ -> rrd_last_update + in { ds_name= name ; ds_ty= @@ -742,11 +752,12 @@ let from_xml input = ; (* float_of_string "last_ds"; *) ds_value= float_of_string value ; ds_unknown_sec= float_of_string unknown_sec + ; ds_last_updated= last_updated } ) i in - let dss = read_all "ds" read_ds i [] in + let dss = Array.of_list (read_all "ds" read_ds i []) in dss in @@ -791,7 +802,7 @@ let from_xml input = let cols = try Array.length data.(0) with _ -> -1 in let db = Array.init cols (fun i -> - let ds = List.nth dss i in + let ds = dss.(i) in Fring.make rows nan ds.ds_min ds.ds_max ) in @@ -844,13 +855,13 @@ let from_xml input = read_block "rrd" (fun i -> let step, last_update = read_header i in - let dss = read_dss i in + let dss = read_dss i last_update in let rras = read_rras dss i in let rrd = { - last_updated= float_of_string last_update + last_updated= last_update ; timestep= Int64.of_string step - ; rrd_dss= Array.of_list dss + ; rrd_dss= dss ; rrd_rras= Array.of_list rras } in @@ -884,7 +895,7 @@ let from_xml input = ) input -let xml_to_output rrd output = +let xml_to_output internal rrd output = (* We use an output channel for Xmlm-compat buffered output. Provided we flush at the end we should be safe. *) let tag n fn output = @@ -906,7 +917,9 @@ let xml_to_output rrd output = tag "value" (data (Utils.f_to_s ds.ds_value)) output ; tag "unknown_sec" (data (Printf.sprintf "%d" (int_of_float ds.ds_unknown_sec))) - output + output ; + if internal then + tag "last_updated" (data (Utils.f_to_s ds.ds_last_updated)) output ) output in @@ -968,9 +981,7 @@ let xml_to_output rrd output = (fun output -> tag "version" (data "0003") output ; tag "step" (data (Int64.to_string rrd.timestep)) output ; - tag "lastupdate" - (data (Printf.sprintf "%Ld" (Int64.of_float rrd.last_updated))) - output ; + tag "lastupdate" (data (Utils.f_to_s rrd.last_updated)) output ; do_dss rrd.rrd_dss output ; do_rras rrd.rrd_rras output ) @@ -1002,6 +1013,7 @@ module Json = struct ; ("last_ds", string (ds_value_to_string ds.ds_last)) ; ("value", float ds.ds_value) ; ("unknown_sec", float ds.ds_unknown_sec) + ; ("last_updated", float ds.ds_last_updated) ] let cdp x = diff --git a/ocaml/libs/xapi-rrd/unix/rrd_unix.ml b/ocaml/libs/xapi-rrd/unix/rrd_unix.ml index da91c99fd65..745361fb31d 100644 --- a/ocaml/libs/xapi-rrd/unix/rrd_unix.ml +++ b/ocaml/libs/xapi-rrd/unix/rrd_unix.ml @@ -30,12 +30,13 @@ let with_out_channel_output fd f = ) (fun () -> Out_channel.close_noerr oc) -let xml_to_fd rrd fd = with_out_channel_output fd (Rrd.xml_to_output rrd) +let xml_to_fd internal rrd fd = + with_out_channel_output fd (Rrd.xml_to_output internal rrd) let json_to_fd rrd fd = let payload = Rrd.json_to_string rrd |> Bytes.unsafe_of_string in let len = Bytes.length payload in Unix.write fd payload 0 len |> ignore -let to_fd ?(json = false) rrd fd = - (if json then json_to_fd else xml_to_fd) rrd fd +let to_fd ?(json = false) ?(internal = false) rrd fd = + (if json then json_to_fd else xml_to_fd internal) rrd fd diff --git a/ocaml/libs/xapi-rrd/unix/rrd_unix.mli b/ocaml/libs/xapi-rrd/unix/rrd_unix.mli index bddb4553413..eb06cde2119 100644 --- a/ocaml/libs/xapi-rrd/unix/rrd_unix.mli +++ b/ocaml/libs/xapi-rrd/unix/rrd_unix.mli @@ -1,11 +1,11 @@ (* Copyright (C) Citrix Systems Inc. - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; version 2.1 only. with the special exception on linking described in file LICENSE. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -15,5 +15,10 @@ This module provides Unix tools for dealing with RRDs *) -val to_fd : ?json:bool -> Rrd.rrd -> Unix.file_descr -> unit -(** Serialize the rrd to xml / json and offer it through a file descriptor *) +val to_fd : ?json:bool -> ?internal:bool -> Rrd.rrd -> Unix.file_descr -> unit +(** Serialize the rrd to xml / json and offer it through a file descriptor. + + If [internal] is true (false is the default), then the output is not + guaranteed to be compatible with external tools, and can only be parsed + by xcp-rrdd. + *) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml index 4c1f85b140f..08807e39b74 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml @@ -137,7 +137,7 @@ let send_rrd ?(session_id : string option) let open Xmlrpc_client in with_transport transport (with_http request (fun (_response, fd) -> - try Rrd_unix.to_fd rrd fd with _ -> log_backtrace () + try Rrd_unix.to_fd ~internal:true rrd fd with _ -> log_backtrace () ) ) ; debug "Sending RRD complete." @@ -161,7 +161,8 @@ let archive_rrd_internal ?(transport = None) ~uuid ~rrd () = 0o755 ; let base_filename = Rrdd_libs.Constants.rrd_location ^ "/" ^ uuid in Xapi_stdext_unix.Unixext.atomic_write_to_file (base_filename ^ ".gz") - 0o644 (fun fd -> Gzip.Default.compress fd (Rrd_unix.to_fd rrd) + 0o644 (fun fd -> + Gzip.Default.compress fd (Rrd_unix.to_fd ~internal:true rrd) ) ; (* If there's an uncompressed one hanging around, remove it. *) Xapi_stdext_unix.Unixext.unlink_safe base_filename diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 94ed27765bd..50000b2bee0 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -514,7 +514,9 @@ let do_monitor_write xc writers = let dom0_stats = tagged_dom0_stats |> List.to_seq - |> Seq.map (fun (name, (timestamp, dss)) -> (timestamp, List.to_seq dss)) + |> Seq.map (fun (_name, (timestamp, dss)) -> + (timestamp, List.to_seq dss) + ) in let plugins_stats = Rrdd_server.Plugin.read_stats () in let stats = Seq.append plugins_stats dom0_stats in diff --git a/ocaml/xcp-rrdd/bin/rrddump/rrddump.ml b/ocaml/xcp-rrdd/bin/rrddump/rrddump.ml index cd0f1675f0d..8d759fed20b 100644 --- a/ocaml/xcp-rrdd/bin/rrddump/rrddump.ml +++ b/ocaml/xcp-rrdd/bin/rrddump/rrddump.ml @@ -32,10 +32,11 @@ let text_export rrd = Int64.sub last_cdp_time (Int64.mul (Int64.of_int i) rra_timestep) in for j = 0 to Array.length rrd.rrd_dss - 1 do - Printf.printf "Doing ds: %s\n" rrd.rrd_dss.(j).ds_name ; + let ds = rrd.rrd_dss.(j) in + Printf.printf "Doing ds: %s\n" ds.ds_name ; let oc = open_out - (Printf.sprintf "rrd_data_%s_%s_%Ld.dat" rrd.rrd_dss.(j).ds_name + (Printf.sprintf "rrd_data_%s_%s_%Ld.dat" ds.ds_name (cf_type_to_string rra.rra_cf) (Int64.mul (Int64.of_int (rra.rra_pdp_cnt * rra.rra_row_cnt)) From 7052ddd926b00c2299fe5fd8e0ab3104bc54cc89 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 21 Oct 2024 08:59:27 +0100 Subject: [PATCH 031/121] CA-391651: rrd - don't iterate over lists needlessly Add an "unsafe" version of rrd_add_ds to avoid checking if the datasource already exists twice. Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 52 +++++++++++++------------ ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 4 +- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 962353971ac..6f264627066 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -590,6 +590,33 @@ let rrd_create dss rras timestep timestamp = ds_update rrd timestamp valuesandtransforms true ; rrd +(** Add the datasource even if it exists in the RRD already. *) +let rrd_add_ds_unsafe rrd timestamp newds = + let npdps = Int64.of_float timestamp /// rrd.timestep in + { + rrd with + rrd_dss= Array.append rrd.rrd_dss [|newds|] + ; rrd_rras= + Array.map + (fun rra -> + let cdp_init = cf_init_value rra.rra_cf newds in + let fring = + Fring.make rra.rra_row_cnt nan newds.ds_min newds.ds_max + in + let nunknowns = + Int64.to_int (Int64.rem npdps (Int64.of_int rra.rra_pdp_cnt)) + in + { + rra with + rra_data= Array.append rra.rra_data [|fring|] + ; rra_cdps= + Array.append rra.rra_cdps + [|{cdp_value= cdp_init; cdp_unknown_pdps= nunknowns}|] + } + ) + rrd.rrd_rras + } + (** Add in a new DS into a pre-existing RRD. Preserves data of all the other archives and fills the new one full of NaNs. Note that this doesn't fill in the CDP values correctly at the moment! @@ -599,30 +626,7 @@ let rrd_add_ds rrd timestamp newds = if List.mem newds.ds_name (ds_names rrd) then rrd else - let npdps = Int64.of_float timestamp /// rrd.timestep in - { - rrd with - rrd_dss= Array.append rrd.rrd_dss [|newds|] - ; rrd_rras= - Array.map - (fun rra -> - let cdp_init = cf_init_value rra.rra_cf newds in - let fring = - Fring.make rra.rra_row_cnt nan newds.ds_min newds.ds_max - in - let nunknowns = - Int64.to_int (Int64.rem npdps (Int64.of_int rra.rra_pdp_cnt)) - in - { - rra with - rra_data= Array.append rra.rra_data [|fring|] - ; rra_cdps= - Array.append rra.rra_cdps - [|{cdp_value= cdp_init; cdp_unknown_pdps= nunknowns}|] - } - ) - rrd.rrd_rras - } + rrd_add_ds_unsafe rrd timestamp newds (** Remove the named DS from an RRD. Removes all of the data associated with it, too *) let rrd_remove_ds rrd ds_name = diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 2b7c6eda420..df4fa798785 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -68,7 +68,9 @@ let merge_new_dss rrdi dss = ( updated_dss , StringMap.fold (fun _key (timestamp, ds) rrd -> - rrd_add_ds rrd timestamp + (* SAFETY: verified that these datasources aren't enabled above + already, in a more efficient way than RRD does it *) + rrd_add_ds_unsafe rrd timestamp (Rrd.ds_create ds.ds_name ds.Ds.ds_type ~mrhb:300.0 Rrd.VT_Unknown) ) new_dss rrdi.rrd From 65d16f5d4dc77e3a03748a077476f4cc56259800 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 23 Oct 2024 13:59:16 +0100 Subject: [PATCH 032/121] CA-391651: rrdd_monitor - Handle missing datasources by resetting them explicitly If RRD's datasource disappears and is not reported by the plugin (or collected by xcp-rrdd), then it's not going to be refreshed, keeping its old value and timestamp. Previously, this was handled nicely because the whole RRD was updated at once, and updates were defaulted to VT_Unknown, Identity before being assigned actually collected values. After RRD collection was changed to process datasources in chunks per source, we need to explicitly find the ones that weren't updated on this iteration, and explicitly reset them. (This is not just a theoretical exercise, it can happen when a VIF gets unplugged or destroyed, for example, with its stats not being reset and continuing to display old values) Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 267 ++++++++++++++++++------ ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 2 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli | 2 +- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 6 +- 4 files changed, 209 insertions(+), 68 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index df4fa798785..c46a33d6f96 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -91,27 +91,67 @@ module OwnerMap = Map.Make (struct String.compare a b end) +(** Converts all the updates collected from various sources in the form of + (uid * timestamp * (ds_owner * ds) Seq.t) Seq.t + into two OwnerMaps, one mapping an owner to a (flattened) Set of its + datasources (used to determine missing datasources), and another mapping + the owner to a Map of datasources grouped by plugin (used during updates) + *) +let convert_to_owner_map dss = + let consolidate (per_owner_map, per_plugin_map) (source_uid, timestamp, dss) = + let add_to_plugin (per_owner_map, per_plugin_map) (owner, ds) = + let add_dsts_to = StringMap.add ds.ds_name (timestamp, ds) in + let add_ds_to = StringSet.add ds.ds_name in + let merge = function + | None -> + Some (add_ds_to StringSet.empty) + | Some dss -> + Some (add_ds_to dss) + in + let per_owner_map = OwnerMap.update owner merge per_owner_map in + let add_plugin_ds_to = + StringMap.update source_uid (function + | None -> + Some (timestamp, add_dsts_to StringMap.empty) + | Some (timestamp, dss) -> + Some (timestamp, add_dsts_to dss) + ) + in + let plugin_merge = function + | None -> + Some (add_plugin_ds_to StringMap.empty) + | Some plugins_dss -> + Some (add_plugin_ds_to plugins_dss) + in + let per_plugin_map : + (float * (float * ds) StringMap.t) StringMap.t OwnerMap.t = + OwnerMap.update owner plugin_merge per_plugin_map + in + (per_owner_map, per_plugin_map) + in + Seq.fold_left add_to_plugin (per_owner_map, per_plugin_map) dss + in + let per_owner_map, per_plugin_map = + Seq.fold_left consolidate (OwnerMap.empty, OwnerMap.empty) dss + in + (per_owner_map, per_plugin_map) + (** Updates all of the hosts rrds. We are passed a list of uuids that is used as the primary source for which VMs are resident on us. When a new uuid turns up that we haven't got an RRD for in our hashtbl, we create a new one. When a uuid for which we have an RRD for doesn't appear to have any stats this update, we assume that the domain has gone and we stream the RRD to the master. We also have a list of the currently rebooting VMs to ensure we - don't accidentally archive the RRD. *) -let update_rrds uuid_domids paused_vms (timestamp, dss) = + don't accidentally archive the RRD. + Also resets the value of datasources that are enabled in the RRD, but + weren't updated on this refresh cycle. + *) +let update_rrds uuid_domids paused_vms plugins_dss = let uuid_domids = List.to_seq uuid_domids |> StringMap.of_seq in let paused_vms = List.to_seq paused_vms |> StringSet.of_seq in - let consolidate all (owner, ds) = - let add_ds_to = StringMap.add ds.ds_name (timestamp, ds) in - let merge = function - | None -> - Some (add_ds_to StringMap.empty) - | Some dss -> - Some (add_ds_to dss) - in - OwnerMap.update owner merge all + let per_owner_flattened_map, per_plugin_map = + convert_to_owner_map plugins_dss in - let dss = Seq.fold_left consolidate OwnerMap.empty dss in let to_named_updates (_, ds) = {value= ds.ds_value; transform= ds.ds_pdp_transform_function} in @@ -119,6 +159,37 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = StringMap.bindings dss |> List.map snd |> List.map snd in + (* Determine datasources missing from this batch for this RRD, reset + them to default Unknown values *) + let handle_missing_stats rrd dss = + let named_update = {value= VT_Unknown; transform= Identity} in + (* Check which of the enabled data sources are missing from the update batch *) + let missing_dss = + Array.fold_left + (fun missing (ds : Rrd.ds) -> + if StringSet.mem ds.ds_name dss then + missing + else + StringMap.add ds.ds_name named_update missing + ) + StringMap.empty rrd.rrd_dss + in + missing_dss + in + let reset_missing_data = + (* NOTE: This processes already added and enabled datasources that have + not been provided a value on this refresh cycle, so no data sources need + to be added to RRDs *) + (* NOTE: new_rrd is always false, since it's only 'true' currently if a VM's + domid does not correspond to rrdi.domid, which would already have been + fixed by replacing rrdi.domid with the current domid when updating with + provided datasources before this function is called *) + let missing_data_timestamp = Unix.gettimeofday () in + fun rrd dss -> + if not (StringMap.is_empty dss) then + Rrd.ds_update_named rrd ~new_rrd:false missing_data_timestamp dss + in + (* Here we do the synchronising between the dom0 view of the world and our Hashtbl. By the end of this execute block, the Hashtbl correctly represents the world *) @@ -138,71 +209,141 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = "Clock just went backwards by %.0f seconds: RRD data may now be \ unreliable" by_how_much ; - let process_vm vm_uuid (dss : (float * Ds.ds) Rrd.StringMap.t) = + let process_vm vm_uuid + (plugins_dss : (float * (float * Ds.ds) Rrd.StringMap.t) StringMap.t) + available_dss = match StringMap.find_opt vm_uuid uuid_domids with - | Some domid -> ( - (* First, potentially update the rrd with any new default dss *) - match Hashtbl.find_opt vm_rrds vm_uuid with - | Some rrdi -> - let updated_dss, rrd = merge_new_dss rrdi dss in - Hashtbl.replace vm_rrds vm_uuid {rrd; dss= updated_dss; domid} ; - (* CA-34383: Memory updates from paused domains serve no useful - purpose. During a migrate such updates can also cause undesirable - discontinuities in the observed value of memory_actual. Hence, we - ignore changes from paused domains: *) - let named_updates = StringMap.map to_named_updates dss in - if not (StringSet.mem vm_uuid paused_vms) then - Rrd.ds_update_named rrd ~new_rrd:(domid <> rrdi.domid) timestamp - named_updates - | None -> - debug "%s: Creating fresh RRD for VM uuid=%s" __FUNCTION__ vm_uuid ; - let dss_list = map_keys_to_list dss in - let rrd = create_fresh_rrd !use_min_max dss_list timestamp in - Hashtbl.replace vm_rrds vm_uuid {rrd; dss; domid} - ) + | Some domid -> + (* Deal with datasources per plugin *) + let vm_rrdi = Hashtbl.find_opt vm_rrds vm_uuid in + let vm_rrdi = + (* SAFETY: Entries in String/OwnerMap are only present if + they contain a list of datasources, and thus the rrd is + definitely Some after .fold above. + This applies to all such constructs in process_* functions *) + Option.get + (StringMap.fold + (fun _uid (timestamp, dss) vm_rrd -> + (* First, potentially update the rrd with any new default dss *) + match vm_rrd with + | Some rrdi -> + let updated_dss, rrd = merge_new_dss rrdi dss in + (* CA-34383: Memory updates from paused domains serve no useful + purpose. During a migrate such updates can also cause undesirable + discontinuities in the observed value of memory_actual. Hence, we + ignore changes from paused domains: *) + ( if not (StringSet.mem vm_uuid paused_vms) then + let named_updates = + StringMap.map to_named_updates dss + in + Rrd.ds_update_named rrd + ~new_rrd:(domid <> rrdi.domid) timestamp + named_updates + ) ; + Some {rrd; dss= updated_dss; domid} + | None -> + debug "%s: Creating fresh RRD for VM uuid=%s" + __FUNCTION__ vm_uuid ; + let dss_list = map_keys_to_list dss in + let rrd = + create_fresh_rrd !use_min_max dss_list timestamp + in + Some {rrd; dss; domid} + ) + plugins_dss vm_rrdi + ) + in + let missing_updates = + handle_missing_stats vm_rrdi.rrd available_dss + in + reset_missing_data vm_rrdi.rrd missing_updates ; + + Hashtbl.replace vm_rrds vm_uuid vm_rrdi | None -> info "%s: VM uuid=%s is not resident in this host, ignoring rrds" __FUNCTION__ vm_uuid in - let process_sr sr_uuid dss = + let process_sr sr_uuid plugins_dss available_dss = try - (* First, potentially update the rrd with any new default dss *) - match Hashtbl.find_opt sr_rrds sr_uuid with - | Some rrdi -> - let updated_dss, rrd = merge_new_dss rrdi dss in - Hashtbl.replace sr_rrds sr_uuid {rrd; dss= updated_dss; domid= 0} ; - let named_updates = StringMap.map to_named_updates dss in - Rrd.ds_update_named rrd ~new_rrd:false timestamp named_updates - | None -> - debug "%s: Creating fresh RRD for SR uuid=%s" __FUNCTION__ sr_uuid ; - let dss_list = map_keys_to_list dss in - let rrd = create_fresh_rrd !use_min_max dss_list timestamp in - Hashtbl.replace sr_rrds sr_uuid {rrd; dss; domid= 0} + let sr_rrdi = Hashtbl.find_opt sr_rrds sr_uuid in + (* Deal with datasources per plugin *) + let sr_rrdi = + Option.get + (StringMap.fold + (fun _uid (timestamp, dss) sr_rrdi -> + (* First, potentially update the rrd with any new default dss *) + match sr_rrdi with + | Some rrdi -> + let updated_dss, rrd = merge_new_dss rrdi dss in + let named_updates = StringMap.map to_named_updates dss in + Rrd.ds_update_named rrd ~new_rrd:false timestamp + named_updates ; + Some {rrd; dss= updated_dss; domid= 0} + | None -> + debug "%s: Creating fresh RRD for SR uuid=%s" + __FUNCTION__ sr_uuid ; + let dss_list = map_keys_to_list dss in + let rrd = + create_fresh_rrd !use_min_max dss_list timestamp + in + Some {rrd; dss; domid= 0} + ) + plugins_dss sr_rrdi + ) + in + let missing_updates = + handle_missing_stats sr_rrdi.rrd available_dss + in + reset_missing_data sr_rrdi.rrd missing_updates ; + + Hashtbl.replace sr_rrds sr_uuid sr_rrdi with _ -> log_backtrace () in - let process_host dss = - match !host_rrd with - | None -> - debug "%s: Creating fresh RRD for localhost" __FUNCTION__ ; - let dss_list = map_keys_to_list dss in - let rrd = create_fresh_rrd true dss_list timestamp in - (* Always always create localhost rrds with min/max enabled *) - host_rrd := Some {rrd; dss; domid= 0} - | Some rrdi -> - let updated_dss, rrd = merge_new_dss rrdi dss in - host_rrd := Some {rrd; dss= updated_dss; domid= 0} ; - let named_updates = StringMap.map to_named_updates dss in - Rrd.ds_update_named rrd ~new_rrd:false timestamp named_updates + let process_host plugins_dss available_dss = + let host_rrdi = !host_rrd in + (* Deal with datasources per plugin *) + let host_rrdi = + Option.get + (StringMap.fold + (fun _uid (timestamp, dss) host_rrdi -> + match host_rrdi with + | None -> + debug "%s: Creating fresh RRD for localhost" __FUNCTION__ ; + let dss_list = map_keys_to_list dss in + let rrd = create_fresh_rrd true dss_list timestamp in + (* Always always create localhost rrds with min/max enabled *) + Some {rrd; dss; domid= 0} + | Some rrdi -> + let updated_dss, rrd = merge_new_dss rrdi dss in + let named_updates = StringMap.map to_named_updates dss in + Rrd.ds_update_named rrd ~new_rrd:false timestamp + named_updates ; + Some {rrd; dss= updated_dss; domid= 0} + ) + plugins_dss host_rrdi + ) + in + let missing_updates = + handle_missing_stats host_rrdi.rrd available_dss + in + reset_missing_data host_rrdi.rrd missing_updates ; + + host_rrd := Some host_rrdi in let process_dss ds_owner dss = + (* Flattened list of all datasources for this RRD owner, used to + determine which datasources have gone missing. Not to be used in + actual update process, since these mix up datasources with different + timestamps *) + let available_dss = OwnerMap.find ds_owner per_owner_flattened_map in match ds_owner with | Host -> - process_host dss + process_host dss available_dss | VM uuid -> - process_vm uuid dss + process_vm uuid dss available_dss | SR uuid -> - process_sr uuid dss + process_sr uuid dss available_dss in - OwnerMap.iter process_dss dss + OwnerMap.iter process_dss per_plugin_map ) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index c0adc41dfc4..eb206455684 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -773,7 +773,7 @@ module Plugin = struct let payload = get_payload ~uid plugin in let timestamp = payload.Rrd_protocol.timestamp |> Int64.to_float in let dss = List.to_seq payload.Rrd_protocol.datasources in - Some (timestamp, dss) + Some (P.string_of_uid ~uid, timestamp, dss) with _ -> None in List.iter decr_skip_count plugins ; diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli index 971cdc29860..000c53de121 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli @@ -69,7 +69,7 @@ module Plugin : sig val next_reading : string -> float - val read_stats : unit -> (float * (Rrd.ds_owner * Ds.ds) Seq.t) Seq.t + val read_stats : unit -> (string * float * (Rrd.ds_owner * Ds.ds) Seq.t) Seq.t module Local : sig val register : diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 50000b2bee0..cd19875ee42 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -514,8 +514,8 @@ let do_monitor_write xc writers = let dom0_stats = tagged_dom0_stats |> List.to_seq - |> Seq.map (fun (_name, (timestamp, dss)) -> - (timestamp, List.to_seq dss) + |> Seq.map (fun (name, (timestamp, dss)) -> + (name, timestamp, List.to_seq dss) ) in let plugins_stats = Rrdd_server.Plugin.read_stats () in @@ -524,7 +524,7 @@ let do_monitor_write xc writers = let uuid_domids = List.map (fun (_, u, i) -> (u, i)) domains in (* stats are grouped per plugin, which provides its timestamp *) - Seq.iter (Rrdd_monitor.update_rrds uuid_domids my_paused_vms) stats ; + Rrdd_monitor.update_rrds uuid_domids my_paused_vms stats ; Rrdd_libs.Constants.datasource_dump_file |> Rrdd_server.dump_host_dss_to_file ; From 8d7c057b730e591a2fd54a5e96ded14a814c04b5 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 25 Oct 2024 12:47:37 +0100 Subject: [PATCH 033/121] CA-391651 - rrd protocol: Stop truncating timestamps to seconds Instead of writing Int64 (truncated from timestamp floats) into the memory-mapped files, keep the precision of the timestamp. Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 2 +- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 1 - ocaml/xcp-rrdd/bin/transport-rw/reader_commands.ml | 2 +- ocaml/xcp-rrdd/bin/transport-rw/writer_commands.ml | 4 ++-- ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli | 2 +- ocaml/xcp-rrdd/lib/plugin/utils.ml | 2 +- ocaml/xcp-rrdd/lib/plugin/utils.mli | 2 +- ocaml/xcp-rrdd/lib/transport/base/rrd_json.ml | 2 +- ocaml/xcp-rrdd/lib/transport/base/rrd_json.mli | 2 +- ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.ml | 2 +- ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.mli | 2 +- ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml | 4 +--- ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v2.ml | 6 ++++-- ocaml/xcp-rrdd/test/transport/test_common.ml | 5 ++--- ocaml/xcp-rrdd/test/transport/test_scale.ml | 2 +- ocaml/xcp-rrdd/test/transport/test_unit.ml | 2 +- 16 files changed, 20 insertions(+), 22 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index eb206455684..f8f3c99bf8b 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -771,7 +771,7 @@ module Plugin = struct let process_plugin (uid, plugin) = try let payload = get_payload ~uid plugin in - let timestamp = payload.Rrd_protocol.timestamp |> Int64.to_float in + let timestamp = payload.Rrd_protocol.timestamp in let dss = List.to_seq payload.Rrd_protocol.datasources in Some (P.string_of_uid ~uid, timestamp, dss) with _ -> None diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index cd19875ee42..455723633bb 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -501,7 +501,6 @@ let write_dom0_stats writers tagged_dss = this name" name | Some (timestamp, dss) -> - let timestamp = Int64.of_float timestamp in writer.Rrd_writer.write_payload {timestamp; datasources= dss} in List.iter write_dss writers diff --git a/ocaml/xcp-rrdd/bin/transport-rw/reader_commands.ml b/ocaml/xcp-rrdd/bin/transport-rw/reader_commands.ml index 8736bca234b..c15bb594231 100644 --- a/ocaml/xcp-rrdd/bin/transport-rw/reader_commands.ml +++ b/ocaml/xcp-rrdd/bin/transport-rw/reader_commands.ml @@ -47,7 +47,7 @@ let string_of_data_source owner ds = let interpret_payload payload = print_endline "------------ Metadata ------------" ; - Printf.printf "timestamp = %Ld\n%!" payload.timestamp ; + Printf.printf "timestamp = %f\n%!" payload.timestamp ; print_endline "---------- Data sources ----------" ; List.iter (fun (owner, ds) -> diff --git a/ocaml/xcp-rrdd/bin/transport-rw/writer_commands.ml b/ocaml/xcp-rrdd/bin/transport-rw/writer_commands.ml index c3061349ccf..4e3ac899e1f 100644 --- a/ocaml/xcp-rrdd/bin/transport-rw/writer_commands.ml +++ b/ocaml/xcp-rrdd/bin/transport-rw/writer_commands.ml @@ -14,7 +14,7 @@ open Rrd_protocol -let now () = Int64.of_float (Unix.gettimeofday ()) +let now () = Unix.gettimeofday () let get_extra_data_sources_flag = let counter = ref 0 in @@ -27,7 +27,7 @@ let generate_time_data_source () = let current_time = now () in ( Rrd.Host , Ds.ds_make ~name:"current_time" ~description:"The current time" - ~value:(Rrd.VT_Int64 current_time) ~ty:Rrd.Gauge ~default:true + ~value:(Rrd.VT_Float current_time) ~ty:Rrd.Gauge ~default:true ~units:"seconds" () ) diff --git a/ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli b/ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli index e4eaaeecd2c..a237868c873 100644 --- a/ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli +++ b/ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli @@ -16,7 +16,7 @@ (** Utility functions useful for rrdd plugins. *) module Utils : sig - val now : unit -> int64 + val now : unit -> float (** Return the current unix epoch as an int64. *) val cut : string -> string list diff --git a/ocaml/xcp-rrdd/lib/plugin/utils.ml b/ocaml/xcp-rrdd/lib/plugin/utils.ml index 5744fa5578b..d647c25fd67 100644 --- a/ocaml/xcp-rrdd/lib/plugin/utils.ml +++ b/ocaml/xcp-rrdd/lib/plugin/utils.ml @@ -12,7 +12,7 @@ * GNU Lesser General Public License for more details. *) -let now () = Int64.of_float (Unix.gettimeofday ()) +let now () = Unix.gettimeofday () let cut str = Astring.String.fields ~empty:false ~is_sep:(fun c -> c = ' ' || c = '\t') str diff --git a/ocaml/xcp-rrdd/lib/plugin/utils.mli b/ocaml/xcp-rrdd/lib/plugin/utils.mli index 7f797b2232c..c13901ff5fe 100644 --- a/ocaml/xcp-rrdd/lib/plugin/utils.mli +++ b/ocaml/xcp-rrdd/lib/plugin/utils.mli @@ -13,7 +13,7 @@ *) (** Utility functions useful for rrdd plugins. *) -val now : unit -> int64 +val now : unit -> float (** Return the current unix epoch as an int64. *) val cut : string -> string list diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_json.ml b/ocaml/xcp-rrdd/lib/transport/base/rrd_json.ml index 15f95e3de46..f34bad05747 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_json.ml +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_json.ml @@ -88,7 +88,7 @@ let dss_to_json ~header timestamp dss = let payload = record [ - ("timestamp", `Float (Int64.to_float timestamp)) + ("timestamp", `Float timestamp) ; ("datasources", record @@ List.map ds_to_json dss) ] in diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_json.mli b/ocaml/xcp-rrdd/lib/transport/base/rrd_json.mli index 16559121168..27d0e3b4aba 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_json.mli +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_json.mli @@ -13,6 +13,6 @@ *) val json_of_dss : - header:string -> int64 -> (Rrd.ds_owner * Ds.ds) list -> string + header:string -> float -> (Rrd.ds_owner * Ds.ds) list -> string val json_metadata_of_dss : (Rrd.ds_owner * Ds.ds) list -> string diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.ml b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.ml index 310a9442392..247f0691e2f 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.ml +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.ml @@ -26,7 +26,7 @@ exception Payload_too_large exception Read_error -type payload = {timestamp: int64; datasources: (Rrd.ds_owner * Ds.ds) list} +type payload = {timestamp: float; datasources: (Rrd.ds_owner * Ds.ds) list} type protocol = { make_payload_reader: unit -> Cstruct.t -> payload diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.mli b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.mli index 310a9442392..247f0691e2f 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.mli +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.mli @@ -26,7 +26,7 @@ exception Payload_too_large exception Read_error -type payload = {timestamp: int64; datasources: (Rrd.ds_owner * Ds.ds) list} +type payload = {timestamp: float; datasources: (Rrd.ds_owner * Ds.ds) list} type protocol = { make_payload_reader: unit -> Cstruct.t -> payload diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml index 834ddd3106b..af187fa8ae7 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml @@ -94,9 +94,7 @@ let parse_payload ~(json : string) : payload = try let rpc = Jsonrpc.of_string json in let kvs = Rrd_rpc.dict_of_rpc ~rpc in - let timestamp = - Rpc.float_of_rpc (List.assoc "timestamp" kvs) |> Int64.of_float - in + let timestamp = Rpc.float_of_rpc (List.assoc "timestamp" kvs) in let datasource_rpcs = Rrd_rpc.dict_of_rpc ~rpc:(List.assoc "datasources" kvs) in diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v2.ml b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v2.ml index 1c6774d525a..3c8cafbd8a7 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v2.ml +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v2.ml @@ -75,7 +75,8 @@ module Read = struct let datasource_count cs = Int32.to_int (Cstruct.BE.get_uint32 cs datasource_count_start) - let timestamp cs = Cstruct.BE.get_uint64 cs timestamp_start + let timestamp cs = + Int64.float_of_bits (Cstruct.BE.get_uint64 cs timestamp_start) let datasource_values cs cached_datasources = let rec aux start acc = function @@ -125,7 +126,8 @@ module Write = struct let datasource_count cs value = Cstruct.BE.set_uint32 cs datasource_count_start (Int32.of_int value) - let timestamp cs value = Cstruct.BE.set_uint64 cs timestamp_start value + let timestamp cs value = + Cstruct.BE.set_uint64 cs timestamp_start (Int64.bits_of_float value) let datasource_values cs values = let rec aux start = function diff --git a/ocaml/xcp-rrdd/test/transport/test_common.ml b/ocaml/xcp-rrdd/test/transport/test_common.ml index de083183f1e..bd877281946 100644 --- a/ocaml/xcp-rrdd/test/transport/test_common.ml +++ b/ocaml/xcp-rrdd/test/transport/test_common.ml @@ -1,7 +1,7 @@ let test_payload = Rrd_protocol. { - timestamp= 1387867223L + timestamp= 1387867223.2 ; datasources= [ ( Rrd.Host @@ -133,8 +133,7 @@ let assert_ds_equal (owner1, ds1) (owner2, ds2) = let assert_payloads_equal payload1 payload2 = let open Rrd_protocol in - Alcotest.(check int64) - "Timestamps match" payload1.timestamp payload2.timestamp ; + compare_float "Timestamps match" payload1.timestamp payload2.timestamp ; Alcotest.(check int) "Number of datasources read matches written ones" (List.length payload1.datasources) diff --git a/ocaml/xcp-rrdd/test/transport/test_scale.ml b/ocaml/xcp-rrdd/test/transport/test_scale.ml index ddfe2d02a30..35ab60f600a 100644 --- a/ocaml/xcp-rrdd/test/transport/test_scale.ml +++ b/ocaml/xcp-rrdd/test/transport/test_scale.ml @@ -79,7 +79,7 @@ let write_payloads deliveries protocol sock = let run_tests shared_file_count protocol = Random.self_init () ; - let timestamp = Int64.of_float (Unix.gettimeofday ()) in + let timestamp = Unix.gettimeofday () in let deliveries = List.init shared_file_count (fun k -> { diff --git a/ocaml/xcp-rrdd/test/transport/test_unit.ml b/ocaml/xcp-rrdd/test/transport/test_unit.ml index 050eaccedcf..784fb356b7e 100644 --- a/ocaml/xcp-rrdd/test/transport/test_unit.ml +++ b/ocaml/xcp-rrdd/test/transport/test_unit.ml @@ -114,7 +114,7 @@ let test_reader_state protocol = payload again. *) let open Rrd_protocol in writer.Rrd_writer.write_payload - {test_payload with timestamp= Int64.add test_payload.timestamp 5L} ; + {test_payload with timestamp= test_payload.timestamp +. 5.} ; let (_ : Rrd_protocol.payload) = reader.Rrd_reader.read_payload () in () ) From f0d252c0c492700cfb5bb835d497bb5b65957015 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 29 Oct 2024 14:15:03 +0000 Subject: [PATCH 034/121] CA-391651 - rrdd.py: Stop truncating timestamps to seconds RRDD, aside from the OCaml library, exposes two interfaces. This changes the Python one. The C one lives in rrd-client-lib and needs to be changed at the same time and coordinated during updating. Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/scripts/rrdd/rrdd.py | 6 +++--- .../scripts/rrdd/test_api_wait_until_next_reading.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ocaml/xcp-rrdd/scripts/rrdd/rrdd.py b/ocaml/xcp-rrdd/scripts/rrdd/rrdd.py index 1132fa92b53..76dc4fd7974 100644 --- a/ocaml/xcp-rrdd/scripts/rrdd/rrdd.py +++ b/ocaml/xcp-rrdd/scripts/rrdd/rrdd.py @@ -296,10 +296,10 @@ def update(self): """Write all datasources specified (via set_datasource) since the last call to this function. The datasources are written together with the relevant metadata into the file agreed with rrdd.""" - timestamp = int(time.time()) + timestamp = time.time() data_values = [] combined = dict() - data_checksum = crc32(pack(">Q", timestamp)) & 0xffffffff + data_checksum = crc32(pack(">d", timestamp)) & 0xffffffff for ds in sorted(self.datasources, key=lambda source: source.name): value = self.pack_data(ds) @@ -326,7 +326,7 @@ def update(self): # Now write the updated header self.dest.seek(0) self.dest.write(encoded_datasource_header) - self.dest.write(pack(">LLLQ", + self.dest.write(pack(">LLLd", data_checksum, metadata_checksum, len(self.datasources), diff --git a/ocaml/xcp-rrdd/scripts/rrdd/test_api_wait_until_next_reading.py b/ocaml/xcp-rrdd/scripts/rrdd/test_api_wait_until_next_reading.py index a038513e230..be946674618 100644 --- a/ocaml/xcp-rrdd/scripts/rrdd/test_api_wait_until_next_reading.py +++ b/ocaml/xcp-rrdd/scripts/rrdd/test_api_wait_until_next_reading.py @@ -160,7 +160,7 @@ def pack_data(self, ds: MockDataSource): unpacked_metadata_checksum, unpacked_num_datasources, unpacked_timestamp, - ) = unpack(">LLLQ", header[11:]) + ) = unpack(">LLLd", header[11:]) # Assert the expected unpacked header value assert header.startswith(b"DATASOURCES") @@ -172,7 +172,7 @@ def pack_data(self, ds: MockDataSource): # # Initialize the expected checksum with the fixed time - expected_checksum = checksum(pack(">Q", fixed_time)) + expected_checksum = checksum(pack(">d", fixed_time)) # Loop over the datasources and assert the packed data testee.dest.seek(header_len) # sourcery skip: no-loop-in-tests From b3da1c9fcf94c4b47a6a740e9142f949b4e9249e Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 25 Oct 2024 14:05:11 +0100 Subject: [PATCH 035/121] CA-391651 rrd: Don't truncate timestamps when calculating values Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 6f264627066..c9d646345cd 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -235,6 +235,11 @@ let get_times time timestep = let age = time -. Int64.to_float starttime in (starttime, age) +let get_float_time time timestep = + let timestep = Int64.to_float timestep in + let starttime = timestep *. (time /. timestep) in + starttime + (** Update the CDP value with a number (start_pdp_offset) of PDPs. *) let do_cfs rra start_pdp_offset pdps = Array.iter @@ -443,10 +448,9 @@ let ds_update rrd timestamp valuesandtransforms new_rrd = (i, nan) else let raw = - ds.ds_value - /. (Int64.to_float (occu_pdp_st --- proc_pdp_st) - -. ds.ds_unknown_sec - ) + let proc_pdp_st = get_float_time last_updated rrd.timestep in + let occu_pdp_st = get_float_time timestamp rrd.timestep in + ds.ds_value /. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec) in (* Apply the transform after the raw value has been calculated *) let raw = apply_transform_function transform raw in From 76e317c797dc6b8f4c64b931b48c67069ba8d81f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 13:18:48 +0100 Subject: [PATCH 036/121] CA-391651: Update RRD tests to the new interfaces Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib_test/crowbar_tests.ml | 9 +- .../xapi-rrd/lib_test/test_data/flip_flop.xml | 2 +- ocaml/libs/xapi-rrd/lib_test/unit_tests.ml | 92 ++++++++----------- ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml | 38 +++++--- 4 files changed, 69 insertions(+), 72 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib_test/crowbar_tests.ml b/ocaml/libs/xapi-rrd/lib_test/crowbar_tests.ml index 6ff917eccfc..243b4d6a4e4 100644 --- a/ocaml/libs/xapi-rrd/lib_test/crowbar_tests.ml +++ b/ocaml/libs/xapi-rrd/lib_test/crowbar_tests.ml @@ -74,14 +74,13 @@ let ds = let rrd = Cb.(map [list1 int64; rra; ds]) (fun values rra ds -> let open Rrd in - let init_time = 0. in - - let rrd = rrd_create [|ds|] [|rra|] 5L init_time in + let rrd = rrd_create [|ds|] [|rra|] 5L 0. in List.iteri (fun i v -> - let t = 5. *. (init_time +. float_of_int i) in - ds_update rrd t [|VT_Int64 v|] [|Identity|] (i = 0) + let timestamp = 5. *. float_of_int i in + let arr = [|(0, {value= VT_Int64 v; transform= Identity})|] in + ds_update rrd timestamp arr (i = 0) ) values ; rrd diff --git a/ocaml/libs/xapi-rrd/lib_test/test_data/flip_flop.xml b/ocaml/libs/xapi-rrd/lib_test/test_data/flip_flop.xml index 8e368ed41b7..77e42106881 100644 --- a/ocaml/libs/xapi-rrd/lib_test/test_data/flip_flop.xml +++ b/ocaml/libs/xapi-rrd/lib_test/test_data/flip_flop.xml @@ -1,2 +1,2 @@ -00035100flip_flopDERIVEInfinity0Infinity00.00AVERAGE10.50000.00.00.00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.0000-3.00003.0000-5.00005.0000-7.00007.0000-9.00009.0000-11.000011.0000-13.000013.0000-15.000015.0000-17.000017.0000-19.000019.0000MIN10.50000.00.019.00000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.0000-3.0000-3.0000-5.0000-5.0000-7.0000-7.0000-9.0000-9.0000-11.0000-11.0000-13.0000-13.0000-15.0000-15.0000-17.0000-17.0000-19.0000-19.0000MAX10.50000.00.019.00000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.00001.00003.00003.00005.00005.00007.00007.00009.00009.000011.000011.000013.000013.000015.000015.000017.000017.000019.0000 +00035100flip_flopDERIVEInfinity0Infinity00.000.0AVERAGE10.50000.00.00.00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.0000-3.00003.0000-5.00005.0000-7.00007.0000-9.00009.0000-11.000011.0000-13.000013.0000-15.000015.0000-17.000017.0000-19.000019.0000MIN10.50000.00.019.00000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.0000-3.0000-3.0000-5.0000-5.0000-7.0000-7.0000-9.0000-9.0000-11.0000-11.0000-13.0000-13.0000-15.0000-15.0000-17.0000-17.0000-19.0000-19.0000MAX10.50000.00.019.00000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.00001.00003.00003.00005.00005.00007.00007.00009.00009.000011.000011.000013.000013.000015.000015.000017.000017.000019.0000 diff --git a/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml b/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml index 089d8047468..f9cb5765b9f 100644 --- a/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml +++ b/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml @@ -15,10 +15,7 @@ let assert_ds_equal d1 d2 = compare_float __LOC__ d1.ds_value d2.ds_value ; compare_float __LOC__ d1.ds_unknown_sec d2.ds_unknown_sec -let assert_dss_equal d1s d2s = - let d1s = Array.to_list d1s in - let d2s = Array.to_list d2s in - List.iter2 assert_ds_equal d1s d2s +let assert_dss_equal d1s d2s = Array.iter2 assert_ds_equal d1s d2s let assert_cdp_prep_equal c1 c2 = compare_float __LOC__ c1.cdp_value c2.cdp_value ; @@ -37,15 +34,10 @@ let assert_rra_equal a1 a2 = Alcotest.(check int) __LOC__ a1.rra_row_cnt a2.rra_row_cnt ; Alcotest.(check int) __LOC__ a1.rra_pdp_cnt a2.rra_pdp_cnt ; compare_float __LOC__ a1.rra_xff a2.rra_xff ; - List.iter2 assert_cdp_prep_equal - (Array.to_list a1.rra_cdps) - (Array.to_list a2.rra_cdps) ; - List.iter2 assert_fring_equal - (Array.to_list a1.rra_data) - (Array.to_list a2.rra_data) + Array.iter2 assert_cdp_prep_equal a1.rra_cdps a2.rra_cdps ; + Array.iter2 assert_fring_equal a1.rra_data a2.rra_data -let assert_rras_equal a1s a2s = - List.iter2 assert_rra_equal (Array.to_list a1s) (Array.to_list a2s) +let assert_rras_equal a1s a2s = Array.iter2 assert_rra_equal a1s a2s let assert_rrds_equal r1 r2 = compare_float __LOC__ r1.last_updated r2.last_updated ; @@ -74,9 +66,7 @@ let test_ranges rrd () = let in_range_fring ds fring = in_range ds.ds_min ds.ds_max (fring_to_list fring) in - let in_range_rra dss rra = - List.iter2 in_range_fring dss (Array.to_list rra.rra_data) - in + let in_range_rra dss rra = Array.iter2 in_range_fring dss rra.rra_data in let range_is_not_empty ds = Alcotest.(check bool) (Printf.sprintf "min (%f) < max (%f); " ds.ds_min ds.ds_max) @@ -84,9 +74,7 @@ let test_ranges rrd () = in Array.iter range_is_not_empty rrd.rrd_dss ; - List.iter - (in_range_rra @@ Array.to_list rrd.rrd_dss) - (Array.to_list rrd.rrd_rras) + Array.iter (in_range_rra @@ rrd.rrd_dss) rrd.rrd_rras let test_marshall rrd ~json () = ignore @@ -94,13 +82,13 @@ let test_marshall rrd ~json () = Rrd.json_to_string rrd else let out = Buffer.create 2048 in - Rrd.xml_to_output rrd (Xmlm.make_output (`Buffer out)) ; + Rrd.xml_to_output true rrd (Xmlm.make_output (`Buffer out)) ; Buffer.contents out ) let test_marshall_unmarshall rrd () = let out = Buffer.create 2048 in - Rrd.xml_to_output rrd (Xmlm.make_output (`Buffer out)) ; + Rrd.xml_to_output true rrd (Xmlm.make_output (`Buffer out)) ; let contents = Buffer.contents out in let xml = Xmlm.make_input (`String (0, contents)) in let rrd' = Rrd.from_xml xml in @@ -124,21 +112,28 @@ let gauge_rrd = let rra2 = rra_create CF_Average 100 10 0.5 in let rra3 = rra_create CF_Average 100 100 0.5 in let rra4 = rra_create CF_Average 100 1000 0.5 in + let ts = 1000000000.0 in let ds = ds_create "foo" Gauge ~mrhb:10.0 (VT_Float 0.0) in let ds2 = ds_create "bar" Gauge ~mrhb:10.0 (VT_Float 0.0) in let ds3 = ds_create "baz" Gauge ~mrhb:10.0 (VT_Float 0.0) in let ds4 = ds_create "boo" Gauge ~mrhb:10.0 (VT_Float 0.0) in - let rrd = - rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L 1000000000.0 - in + let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in let id = Identity in for i = 1 to 100000 do - let t = 1000000000.0 +. (0.7 *. float_of_int i) in - let v1 = VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))) in - let v2 = VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))) in - let v3 = VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))) in - let v4 = VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))) in - ds_update rrd t [|v1; v2; v3; v4|] [|id; id; id; id|] false + let t = 1000000.0 +. (0.7 *. float_of_int i) in + let v1 = + (0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id}) + in + let v2 = + (1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id}) + in + let v3 = + (2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id}) + in + let v4 = + (3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id}) + in + ds_update rrd t [|v1; v2; v3; v4|] false done ; rrd @@ -150,66 +145,60 @@ let of_file filename = (* Used to generate flip_flop.xml for test_ca_325844, then gets edited manually to set min to 0 *) let _deserialize_verify_rrd = - let init_time = 0. in - let rra1 = rra_create CF_Average 100 1 0.5 in let rra2 = rra_create CF_Min 100 1 0.5 in let rra3 = rra_create CF_Max 100 1 0.5 in let ds = ds_create "flip_flop" Derive (VT_Int64 0L) in - let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L init_time in + let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L 0. in let id = Identity in for i = 1 to 100 do - let t = init_time +. float_of_int i in + let t = float_of_int i in let t64 = Int64.of_float t in - let v = VT_Int64 Int64.(mul t64 (mul (-1L) (rem t64 2L))) in - ds_update rrd t [|v|] [|id|] false + let value = VT_Int64 Int64.(mul t64 (mul (-1L) (rem t64 2L))) in + ds_update rrd t [|(0, {value; transform= id})|] false done ; rrd let ca_322008_rrd = - let init_time = 0. in - let rra1 = rra_create CF_Average 100 1 0.5 in let rra2 = rra_create CF_Min 100 1 0.5 in let rra3 = rra_create CF_Max 100 1 0.5 in let ds = ds_create "even or zero" Derive ~min:0. (VT_Int64 0L) in - let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L init_time in + let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L 0. in let id = Identity in for i = 1 to 100000 do - let t = init_time +. float_of_int i in + let t = float_of_int i in let t64 = Int64.of_float t in - let v = VT_Int64 (Int64.mul t64 (Int64.rem t64 2L)) in - ds_update rrd t [|v|] [|id|] false + let value = VT_Int64 (Int64.mul t64 (Int64.rem t64 2L)) in + ds_update rrd t [|(0, {value; transform= id})|] false done ; rrd let ca_329043_rrd_1 = - let init_time = 0. in - let rra1 = rra_create CF_Average 3 1 0.5 in let rra2 = rra_create CF_Min 3 1 0.5 in let rra3 = rra_create CF_Max 3 1 0.5 in let ds = ds_create "derive_with_min" ~min:0. ~max:1. Derive VT_Unknown in - let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L init_time in + let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L 0. in let id = Identity in let time_value_of_i i = - let t = 5. *. (init_time +. float_of_int i) in + let t = 5. *. float_of_int i in if i = 1 then (t, VT_Int64 0L) else (t, VT_Int64 Int64.(of_float t)) in for i = 0 to 4 do - let t, v = time_value_of_i i in - ds_update rrd t [|v|] [|id|] (i = 0) + let t, value = time_value_of_i i in + ds_update rrd t [|(0, {value; transform= id})|] (i = 0) done ; rrd @@ -233,7 +222,7 @@ let create_rrd ?(rows = 2) values min max = List.iteri (fun i v -> let t = 5. *. (init_time +. float_of_int i) in - ds_update rrd t [|VT_Int64 v|] [|id; id; id; id|] (i = 0) + ds_update rrd t [|(0, {value= VT_Int64 v; transform= id})|] (i = 0) ) values ; rrd @@ -258,11 +247,8 @@ let test_ca_322008 () = let in_range_fring ds fring = in_range ds.ds_min rrd.last_updated (fring_to_list fring) in - let in_range_rra dss rra = - List.iter2 in_range_fring dss (Array.to_list rra.rra_data) - in - List.iter (in_range_rra @@ Array.to_list rrd.rrd_dss) - @@ Array.to_list rrd.rrd_rras + let in_range_rra dss rra = Array.iter2 in_range_fring dss rra.rra_data in + Array.iter (in_range_rra @@ rrd.rrd_dss) @@ rrd.rrd_rras let test_ca_325844 () = let rrd = of_file (Filename.concat "test_data" "flip_flop.xml") in diff --git a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml index 27d64870fe4..bb0f726b5eb 100644 --- a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml @@ -36,7 +36,18 @@ let check_datasources kind rdds expected_dss = | None -> () | Some actual_rdds -> - let actual_dss = dss_of_rrds actual_rdds in + let actual_dss = + dss_of_rrds actual_rdds + |> List.map (fun (name, dss) -> + ( name + , Rrd.StringMap.( + map (fun (_timestamp, ds) -> ds) dss + |> bindings + |> List.map snd + ) + ) + ) + in let expected_dss = List.fast_sort Stdlib.compare expected_dss in Alcotest.(check @@ list @@ pair string (list ds)) (Printf.sprintf "%s rrds are not expected" kind) @@ -45,15 +56,16 @@ let check_datasources kind rdds expected_dss = let host_rrds rrd_info = Option.bind rrd_info @@ fun rrd_info -> let h = Hashtbl.create 1 in - if rrd_info.Rrdd_shared.dss <> [] then + if rrd_info.Rrdd_shared.dss <> Rrd.StringMap.empty then Hashtbl.add h "host" rrd_info ; Some h -let update_rrds_test ~dss ~uuid_domids ~paused_vms ~expected_vm_rrds +let update_rrds_test ~timestamp ~dss ~uuid_domids ~paused_vms ~expected_vm_rrds ~expected_sr_rrds ~expected_host_dss = let test () = reset_rrdd_shared_state () ; - Rrdd_monitor.update_rrds 12345.0 (List.to_seq dss) uuid_domids paused_vms ; + Rrdd_monitor.update_rrds uuid_domids paused_vms + (List.to_seq [("update_rrds_test", timestamp, List.to_seq dss)]) ; check_datasources "VM" (Some Rrdd_shared.vm_rrds) expected_vm_rrds ; check_datasources "SR" (Some Rrdd_shared.sr_rrds) expected_sr_rrds ; check_datasources "Host" (host_rrds !Rrdd_shared.host_rrd) expected_host_dss @@ -64,35 +76,35 @@ let update_rrds = let open Rrd in [ ( "Null update" - , update_rrds_test ~dss:[] ~uuid_domids:[] ~paused_vms:[] + , update_rrds_test ~timestamp:0. ~dss:[] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Single host update" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(Host, ds_a)] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[("host", [ds_a])] ) ; ( "Multiple host updates" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(Host, ds_a); (Host, ds_b)] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[("host", [ds_a; ds_b])] ) ; ( "Single non-resident VM update" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a)] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple non-resident VM updates" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a)] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Single resident VM update" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a)] ~uuid_domids:[("a", 1)] ~paused_vms:[] @@ -100,7 +112,7 @@ let update_rrds = ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple resident VM updates" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "b", ds_b)] ~uuid_domids:[("a", 1); ("b", 1)] ~paused_vms:[] @@ -108,7 +120,7 @@ let update_rrds = ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple resident and non-resident VM updates" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "c", ds_a)] ~uuid_domids:[("a", 1); ("b", 1)] ~paused_vms:[] @@ -116,7 +128,7 @@ let update_rrds = ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple SR updates" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(SR "a", ds_a); (SR "b", ds_a); (SR "b", ds_b)] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[("a", [ds_a]); ("b", [ds_a; ds_b])] From 1d7fe35eddcacf071e2690cf5e3cc3381f27b342 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 29 Oct 2024 15:05:21 +0000 Subject: [PATCH 037/121] CA-391651 - docs: Update RRD design pages Signed-off-by: Andrii Sultanov --- doc/content/design/plugin-protocol-v2.md | 9 +-- .../xcp-rrdd/design/plugin-protocol-v2.md | 57 +++++++++++++++---- 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/doc/content/design/plugin-protocol-v2.md b/doc/content/design/plugin-protocol-v2.md index 8c02b85c61f..e27f3bec887 100644 --- a/doc/content/design/plugin-protocol-v2.md +++ b/doc/content/design/plugin-protocol-v2.md @@ -20,7 +20,7 @@ DATASOURCES 000001e4 dba4bf7a84b6d11d565d19ef91f7906e { - "timestamp": 1339685573, + "timestamp": 1339685573.245, "data_sources": { "cpu-temp-cpu0": { "description": "Temperature of CPU 0", @@ -62,7 +62,7 @@ reported datasources. ### Example ``` { - "timestamp": 1339685573, + "timestamp": 1339685573.245, "data_sources": { "cpu-temp-cpu0": { "description": "Temperature of CPU 0", @@ -96,7 +96,7 @@ Protocol V2 |data checksum |32 |int32 |binary-encoded crc32 of the concatenation of the encoded timestamp and datasource values| |metadata checksum |32 |int32 |binary-encoded crc32 of the metadata string (see below) | |number of datasources|32 |int32 |only needed if the metadata has changed - otherwise RRDD can use a cached value | -|timestamp |64 |int64 |Unix epoch | +|timestamp |64 |double|Unix epoch | |datasource values |n * 64 |int64 \| double |n is the number of datasources exported by the plugin, type dependent on the setting in the metadata for value_type [int64\|float] | |metadata length |32 |int32 | | |metadata |(string length)*8|string| | @@ -193,6 +193,3 @@ This means that for a normal update, RRDD will only have to read the header plus the first (16 + 16 + 4 + 8 + 8*n) bytes of data, where n is the number of datasources exported by the plugin. If the metadata changes RRDD will have to read all the data (and parse the metadata). - -n.b. the timestamp reported by plugins is not currently used by RRDD - it uses -its own global timestamp. diff --git a/doc/content/xcp-rrdd/design/plugin-protocol-v2.md b/doc/content/xcp-rrdd/design/plugin-protocol-v2.md index c8581a2aad3..e27f3bec887 100644 --- a/doc/content/xcp-rrdd/design/plugin-protocol-v2.md +++ b/doc/content/xcp-rrdd/design/plugin-protocol-v2.md @@ -1,5 +1,6 @@ --- title: RRDD plugin protocol v2 +layout: default design_doc: true revision: 1 status: released (7.0) @@ -19,7 +20,7 @@ DATASOURCES 000001e4 dba4bf7a84b6d11d565d19ef91f7906e { - "timestamp": 1339685573, + "timestamp": 1339685573.245, "data_sources": { "cpu-temp-cpu0": { "description": "Temperature of CPU 0", @@ -58,9 +59,10 @@ This should always be present. * The JSON data itself, encoding the values and metadata associated with the reported datasources. +### Example ``` { - "timestamp": 1339685573, + "timestamp": 1339685573.245, "data_sources": { "cpu-temp-cpu0": { "description": "Temperature of CPU 0", @@ -90,19 +92,32 @@ Protocol V2 |value|bits|format|notes| |-----|----|------|-----| -|header string |(string length)*8|string|"Datasources" as in the V1 protocol | +|header string |(string length)*8|string|"DATASOURCES" as in the V1 protocol | |data checksum |32 |int32 |binary-encoded crc32 of the concatenation of the encoded timestamp and datasource values| |metadata checksum |32 |int32 |binary-encoded crc32 of the metadata string (see below) | |number of datasources|32 |int32 |only needed if the metadata has changed - otherwise RRDD can use a cached value | -|timestamp |64 |int64 |Unix epoch | -|datasource values |n * 64 |int64 |n is the number of datasources exported by the plugin | +|timestamp |64 |double|Unix epoch | +|datasource values |n * 64 |int64 \| double |n is the number of datasources exported by the plugin, type dependent on the setting in the metadata for value_type [int64\|float] | |metadata length |32 |int32 | | |metadata |(string length)*8|string| | -All integers are bigendian. The metadata will have the same JSON-based format as +All integers/double are bigendian. The metadata will have the same JSON-based format as in the V1 protocol, minus the timestamp and `value` key-value pair for each -datasource, for example: +datasource. +| field | values | notes | required | +|-------|--------|-------|----------| +|description|string|Description of the datasource|no| +|owner|host \| vm \| sr|The object to which the data relates|no, default host| +|value_type|int64 \| float|The type of the datasource|yes| +|type|absolute \| derive \| gauge|The type of measurement being sent. Absolute for counters which are reset on reading, derive stores the derivative of the recorded values (useful for metrics which continually increase like amount of data written since start), gauge for things like temperature|no, default absolute| +|default|true \| false|Whether the source is default enabled or not|no, default false| +|units||The units the data should be displayed in|no| +|min||The minimum value for the datasource|no, default -infinity| +|max||The maximum value for the datasource|no, default +infinity| + + +### Example ``` { "datasources": { @@ -125,6 +140,27 @@ datasource, for example: "units":"B", "min":"-inf", "max":"inf" + }, + { + "cpu-temp-cpu0": { + "description": "Temperature of CPU 0", + "owner":"host", + "value_type": "float", + "type": "absolute", + "default":"true", + "units": "degC", + "min":"-inf", + "max":"inf" + }, + "cpu-temp-cpu1": { + "description": "Temperature of CPU 1", + "owner":"host", + "value_type": "float", + "type": "absolute", + "default":"true", + "units": "degC", + "min":"-inf", + "max":"inf" } } } @@ -140,13 +176,13 @@ if header != expected_header: raise InvalidHeader() if data_checksum == last_data_checksum: raise NoUpdate() -if data_checksum != md5sum(encoded_timestamp_and_values): +if data_checksum != crc32(encoded_timestamp_and_values): raise InvalidChecksum() if metadata_checksum == last_metadata_checksum: for datasource, value in cached_datasources, values: update(datasource, value) else: - if metadata_checksum != md5sum(metadata): + if metadata_checksum != crc32(metadata): raise InvalidChecksum() cached_datasources = create_datasources(metadata) for datasource, value in cached_datasources, values: @@ -157,6 +193,3 @@ This means that for a normal update, RRDD will only have to read the header plus the first (16 + 16 + 4 + 8 + 8*n) bytes of data, where n is the number of datasources exported by the plugin. If the metadata changes RRDD will have to read all the data (and parse the metadata). - -n.b. the timestamp reported by plugins is not currently used by RRDD - it uses -its own global timestamp. From 19ad403eb3ea8f76cf2623b7fda7fe0475c55f38 Mon Sep 17 00:00:00 2001 From: Stephen Cheng Date: Wed, 30 Oct 2024 02:24:10 +0000 Subject: [PATCH 038/121] CA-399396: Adjust the jemalloc parameters for memory performance The new version (5.3.0) jemalloc caused a significant increase of memory usage compared to the version 3.6.0. Signed-off-by: Stephen Cheng --- ocaml/xenopsd/scripts/qemu-wrapper | 2 +- scripts/xapi-nbd.service | 2 +- scripts/xcp-networkd.service | 2 +- scripts/xcp-rrdd.service | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ocaml/xenopsd/scripts/qemu-wrapper b/ocaml/xenopsd/scripts/qemu-wrapper index 9d9fc9aef8d..93f5c685eac 100644 --- a/ocaml/xenopsd/scripts/qemu-wrapper +++ b/ocaml/xenopsd/scripts/qemu-wrapper @@ -305,7 +305,7 @@ def main(argv): qemu_env["LD_PRELOAD"] = "/usr/lib64/libjemalloc.so.2" else: qemu_env["LD_PRELOAD"] = "/usr/lib64/libjemalloc.so.2:" + qemu_env["LD_PRELOAD"] - qemu_env["MALLOC_CONF"] = "narenas:1,tcache:false" + qemu_env["MALLOC_CONF"] = "background_thread:true,dirty_decay_ms:100,narenas:1,tcache:false" sys.stdout.flush() sys.stderr.flush() diff --git a/scripts/xapi-nbd.service b/scripts/xapi-nbd.service index bca7b551a14..6aabf845fe9 100644 --- a/scripts/xapi-nbd.service +++ b/scripts/xapi-nbd.service @@ -5,7 +5,7 @@ Wants=xapi.service message-switch.service syslog.target [Service] Environment="LD_PRELOAD=/usr/lib64/libjemalloc.so.2" -Environment="MALLOC_CONF=narenas:1,tcache:false" +Environment="MALLOC_CONF=background_thread:true,dirty_decay_ms:100,narenas:1,tcache:false" Environment=OCAMLRUNPARAM=b # The --certfile option must match the server-cert-path in xapi.conf # and the PathExists in xapi-nbd.path: any change must be made in all three files. diff --git a/scripts/xcp-networkd.service b/scripts/xcp-networkd.service index eb49512cf24..6f5bebddfd5 100644 --- a/scripts/xcp-networkd.service +++ b/scripts/xcp-networkd.service @@ -7,7 +7,7 @@ Wants=forkexecd.service message-switch.service syslog.target [Service] Type=notify Environment="LD_PRELOAD=/usr/lib64/libjemalloc.so.2" -Environment="MALLOC_CONF=narenas:1,tcache:false" +Environment="MALLOC_CONF=background_thread:true,dirty_decay_ms:100,narenas:1,tcache:false" Environment=OCAMLRUNPARAM=b EnvironmentFile=-/etc/sysconfig/xcp-networkd ExecStart=/usr/sbin/xcp-networkd $XCP_NETWORKD_OPTIONS diff --git a/scripts/xcp-rrdd.service b/scripts/xcp-rrdd.service index 81e4d78df68..ea5e42ad909 100644 --- a/scripts/xcp-rrdd.service +++ b/scripts/xcp-rrdd.service @@ -6,7 +6,7 @@ Wants=forkexecd.service xenstored.service message-switch.service syslog.target [Service] Type=notify Environment="LD_PRELOAD=/usr/lib64/libjemalloc.so.2" -Environment="MALLOC_CONF=narenas:1,tcache:false" +Environment="MALLOC_CONF=background_thread:true,dirty_decay_ms:100,narenas:1,tcache:false" Environment=OCAMLRUNPARAM=b EnvironmentFile=-/etc/sysconfig/xcp-rrdd ExecStart=/usr/sbin/xcp-rrdd $XCP_RRDD_OPTIONS From 176c9e34bafdd39e89a78ac140ae38881388258d Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 29 Oct 2024 15:01:06 +0000 Subject: [PATCH 039/121] CP-52039: Drop Semaphore from Xapi_stdext_threads Prior to version 4.12, OCaml's standard threads library (systhreads) had no builtin concept of a semaphore, so one was implemented in Xapi_stdext_threads. We replace all usages of this with Semaphore.Counting from the standard library and remove the implementation from Xapi_stdext_threads. Technically, the interface provided by the previous semaphore is more general: it permits arbitrary adjustments to the semaphore's counter, allowing for a "weighted" style of locking. However, this is only used in one place (with a weight value of 1, which is the same decrement/increment value as normal). Signed-off-by: Colin James --- ocaml/libs/http-lib/http_svr.ml | 2 +- ocaml/libs/http-lib/server_io.ml | 6 +- ocaml/libs/http-lib/server_io.mli | 2 +- .../lib/xapi-stdext-threads/semaphore.ml | 57 ------------------- .../lib/xapi-stdext-threads/semaphore.mli | 40 ------------- .../lib/xapi-stdext-threads/threadext.ml | 12 +++- .../lib/xapi-stdext-threads/threadext.mli | 4 ++ ocaml/networkd/lib/network_utils.ml | 7 ++- ocaml/xapi-aux/throttle.ml | 8 ++- ocaml/xapi/xapi_sr.ml | 1 - 10 files changed, 28 insertions(+), 111 deletions(-) delete mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.ml delete mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.mli diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 3c8ec7facbb..54a8b96ba73 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -648,7 +648,7 @@ let start ?header_read_timeout ?header_total_timeout ?max_header_length ; body= handle_connection ~header_read_timeout ~header_total_timeout ~max_header_length x - ; lock= Xapi_stdext_threads.Semaphore.create conn_limit + ; lock= Semaphore.Counting.make conn_limit } in let server = Server_io.server handler socket in diff --git a/ocaml/libs/http-lib/server_io.ml b/ocaml/libs/http-lib/server_io.ml index 09abf253ee1..c821a27c024 100644 --- a/ocaml/libs/http-lib/server_io.ml +++ b/ocaml/libs/http-lib/server_io.ml @@ -23,7 +23,7 @@ type handler = { name: string ; (* body should close the provided fd *) body: Unix.sockaddr -> Unix.file_descr -> unit - ; lock: Xapi_stdext_threads.Semaphore.t + ; lock: Semaphore.Counting.t } let handler_by_thread (h : handler) (s : Unix.file_descr) @@ -31,7 +31,7 @@ let handler_by_thread (h : handler) (s : Unix.file_descr) Thread.create (fun () -> Fun.protect - ~finally:(fun () -> Xapi_stdext_threads.Semaphore.release h.lock 1) + ~finally:(fun () -> Semaphore.Counting.release h.lock) (Debug.with_thread_named h.name (fun () -> h.body caller s)) ) () @@ -49,7 +49,7 @@ let establish_server ?(signal_fds = []) forker handler sock = @@ Polly.wait epoll 2 (-1) (fun _ fd _ -> (* If any of the signal_fd is active then bail out *) if List.mem fd signal_fds then raise PleaseClose ; - Xapi_stdext_threads.Semaphore.acquire handler.lock 1 ; + Semaphore.Counting.acquire handler.lock ; let s, caller = Unix.accept ~cloexec:true sock in try ignore (forker handler s caller) with exc -> diff --git a/ocaml/libs/http-lib/server_io.mli b/ocaml/libs/http-lib/server_io.mli index 3aca0234743..3c52f53a804 100644 --- a/ocaml/libs/http-lib/server_io.mli +++ b/ocaml/libs/http-lib/server_io.mli @@ -16,7 +16,7 @@ type handler = { name: string (** used for naming the thread *) ; body: Unix.sockaddr -> Unix.file_descr -> unit (** function called in a thread for each connection*) - ; lock: Xapi_stdext_threads.Semaphore.t + ; lock: Semaphore.Counting.t } type server = { diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.ml deleted file mode 100644 index 06621049c91..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.ml +++ /dev/null @@ -1,57 +0,0 @@ -(* - * Copyright (C) Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -type t = {mutable n: int; m: Mutex.t; c: Condition.t} - -let create n = - if n <= 0 then - invalid_arg (Printf.sprintf "Semaphore value must be positive, got %d" n) ; - let m = Mutex.create () and c = Condition.create () in - {n; m; c} - -exception Inconsistent_state of string - -let inconsistent_state fmt = - Printf.ksprintf (fun msg -> raise (Inconsistent_state msg)) fmt - -let acquire s k = - if k <= 0 then - invalid_arg - (Printf.sprintf "Semaphore acquisition requires a positive value, got %d" - k - ) ; - Mutex.lock s.m ; - while s.n < k do - Condition.wait s.c s.m - done ; - if not (s.n >= k) then - inconsistent_state "Semaphore value cannot be smaller than %d, got %d" k s.n ; - s.n <- s.n - k ; - Condition.signal s.c ; - Mutex.unlock s.m - -let release s k = - if k <= 0 then - invalid_arg - (Printf.sprintf "Semaphore release requires a positive value, got %d" k) ; - Mutex.lock s.m ; - s.n <- s.n + k ; - Condition.signal s.c ; - Mutex.unlock s.m - -let execute_with_weight s k f = - acquire s k ; - Xapi_stdext_pervasives.Pervasiveext.finally f (fun () -> release s k) - -let execute s f = execute_with_weight s 1 f diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.mli deleted file mode 100644 index 207e612032d..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/semaphore.mli +++ /dev/null @@ -1,40 +0,0 @@ -(* - * Copyright (C) Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -type t - -exception Inconsistent_state of string - -val create : int -> t -(** [create n] create a semaphore with initial value [n] (a positive integer). - Raise {!Invalid_argument} if [n] <= 0 *) - -val acquire : t -> int -> unit -(** [acquire k s] block until the semaphore value is >= [k] (a positive integer), - then atomically decrement the semaphore value by [k]. - Raise {!Invalid_argument} if [k] <= 0 *) - -val release : t -> int -> unit -(** [release k s] atomically increment the semaphore value by [k] (a positive - integer). - Raise {!Invalid_argument} if [k] <= 0 *) - -val execute_with_weight : t -> int -> (unit -> 'a) -> 'a -(** [execute_with_weight s k f] {!acquire} the semaphore with [k], - then run [f ()], and finally {!release} the semaphore with the same value [k] - (even in case of failure in the execution of [f]). - Return the value of [f ()] or re-raise the exception if any. *) - -val execute : t -> (unit -> 'a) -> 'a -(** [execute s f] same as [{execute_with_weight} s 1 f] *) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml index 1ca5e916ef4..311d985ca69 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml @@ -14,11 +14,20 @@ module M = Mutex +let finally = Xapi_stdext_pervasives.Pervasiveext.finally + module Mutex = struct (** execute the function f with the mutex hold *) let execute lock f = Mutex.lock lock ; - Xapi_stdext_pervasives.Pervasiveext.finally f (fun () -> Mutex.unlock lock) + finally f (fun () -> Mutex.unlock lock) +end + +module Semaphore = struct + let execute s f = + let module Semaphore = Semaphore.Counting in + Semaphore.acquire s ; + finally f (fun () -> Semaphore.release s) end (** Parallel List.iter. Remembers all exceptions and returns an association list mapping input x to an exception. @@ -60,7 +69,6 @@ module Delay = struct exception Pre_signalled let wait (x : t) (seconds : float) = - let finally = Xapi_stdext_pervasives.Pervasiveext.finally in let to_close = ref [] in let close' fd = if List.mem fd !to_close then Unix.close fd ; diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli index 057aedfa700..b5edcff21b8 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli @@ -15,6 +15,10 @@ module Mutex : sig val execute : Mutex.t -> (unit -> 'a) -> 'a end +module Semaphore : sig + val execute : Semaphore.Counting.t -> (unit -> 'a) -> 'a +end + val thread_iter_all_exns : ('a -> unit) -> 'a list -> ('a * exn) list val thread_iter : ('a -> unit) -> 'a list -> unit diff --git a/ocaml/networkd/lib/network_utils.ml b/ocaml/networkd/lib/network_utils.ml index 39417cf1177..4a473b29579 100644 --- a/ocaml/networkd/lib/network_utils.ml +++ b/ocaml/networkd/lib/network_utils.ml @@ -1197,12 +1197,13 @@ module Ovs = struct val appctl : ?log:bool -> string list -> string end = struct - module Semaphore = Xapi_stdext_threads.Semaphore + module Semaphore = Semaphore.Counting - let s = Semaphore.create 5 + let s = Semaphore.make 5 let vsctl ?log args = - Semaphore.execute s (fun () -> + let execute = Xapi_stdext_threads.Threadext.Semaphore.execute in + execute s (fun () -> call_script ~on_error:error_handler ?log ovs_vsctl ("--timeout=20" :: args) ) diff --git a/ocaml/xapi-aux/throttle.ml b/ocaml/xapi-aux/throttle.ml index 7be2ac9bd48..a9dacf7f164 100644 --- a/ocaml/xapi-aux/throttle.ml +++ b/ocaml/xapi-aux/throttle.ml @@ -17,10 +17,12 @@ module type SIZE = sig end module Make (Size : SIZE) = struct - module Semaphore = Xapi_stdext_threads.Semaphore + module Semaphore = Semaphore.Counting let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute + let execute = Xapi_stdext_threads.Threadext.Semaphore.execute + let semaphore = ref None let m = Mutex.create () @@ -29,11 +31,11 @@ module Make (Size : SIZE) = struct with_lock m @@ fun () -> match !semaphore with | None -> - let result = Semaphore.create (Size.n ()) in + let result = Semaphore.make (Size.n ()) in semaphore := Some result ; result | Some s -> s - let execute f = Semaphore.execute (get_semaphore ()) f + let execute f = execute (get_semaphore ()) f end diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index d572660e72d..7a83493b2de 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -20,7 +20,6 @@ module Rrdd = Rrd_client.Client let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute module Listext = Xapi_stdext_std.Listext -module Semaphore = Xapi_stdext_threads.Semaphore module Unixext = Xapi_stdext_unix.Unixext let finally = Xapi_stdext_pervasives.Pervasiveext.finally From 395d5ad6ebc75353cab2f35c511d5b7ed0ce7b9f Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Tue, 29 Oct 2024 18:01:40 +0800 Subject: [PATCH 040/121] CA-400560: Fix version segment division error For example "1.2.3a" will be divided to [Int 1; Int 2; Str 3a] and "1.2.3" is divided to [Int 1; Int 2; Int 3]. It leads to "1.2.3" > "1.2.3a" which is incorrect. After fix, "1.2.3a" will be divided to [Int 1; Int 2; Int 3; Str a], then we can get the right compare result. Signed-off-by: Changlei Li --- ocaml/tests/test_rpm.ml | 6 +-- ocaml/xapi/rpm.ml | 95 ++++++++++++++++++++++------------------- 2 files changed, 55 insertions(+), 46 deletions(-) diff --git a/ocaml/tests/test_rpm.ml b/ocaml/tests/test_rpm.ml index da47d9a0ce8..5b80215521d 100644 --- a/ocaml/tests/test_rpm.ml +++ b/ocaml/tests/test_rpm.ml @@ -163,14 +163,14 @@ module PkgCompareVersionStringsTest = Generic.MakeStateless (struct ; (("1.0", "1.a"), ">") ; (("2.50", "2.5"), ">") ; (("XS3", "xs2"), "<") - ; (("1.2.3", "1.2.3a"), ">") + ; (("1.2.3", "1.2.3a"), "<") ; (("xs4", "xs.4"), "=") ; (("2a", "2.0"), "<") ; (("2a", "2b"), "<") ; (("1.0", "1.xs2"), ">") ; (("1.0_xs", "1.0.xs"), "=") - ; (("1.0x3", "1.0x04"), ">") - ; (("1.0O3", "1.0O04"), ">") + ; (("1.0x3", "1.0x04"), "<") + ; (("1.0O3", "1.0O04"), "<") ] end) diff --git a/ocaml/xapi/rpm.ml b/ocaml/xapi/rpm.ml index dc0838b9ef1..18d4fa627b9 100644 --- a/ocaml/xapi/rpm.ml +++ b/ocaml/xapi/rpm.ml @@ -52,10 +52,24 @@ module Pkg = struct type order = LT | EQ | GT - type segment_of_version = Int of int | Str of string + type version_segment = Int of int | Str of string let string_of_order = function LT -> "<" | EQ -> "=" | GT -> ">" + let order_of_int = function 0 -> EQ | r when r > 0 -> GT | _ -> LT + + let version_segment_of_string s = + let is_all_number str = + let r = Re.Posix.compile_pat {|^[0-9]+$|} in + Re.execp r str + in + match s with + | _ when is_all_number s -> ( + try Int (int_of_string s) with _ -> Str s + ) + | _ -> + Str s + let error_msg = Printf.sprintf "Failed to parse '%s'" let parse_epoch_version_release epoch_ver_rel = @@ -157,9 +171,40 @@ module Pkg = struct | None, None -> EQ + let compare_version_segment s1 s2 = + match (s1, s2) with + | Int i1, Int i2 -> + Int.compare i1 i2 |> order_of_int + | Str s1, Str s2 -> + String.compare s1 s2 |> order_of_int + | Int _, Str _ -> + GT + | Str _, Int _ -> + LT + + let split_version_string s = + let r = Re.Posix.compile_pat {|([0-9]+|[a-zA-Z]+|~)|} in + let len = String.length s in + let rec aux acc pos = + if pos >= len then + List.rev acc + else + match Re.exec_opt ~pos r s with + | Some groups -> + let matched = Re.Group.get groups 0 in + let next_pos = Re.Group.stop groups 0 in + aux (matched :: acc) next_pos + | None -> + List.rev acc + in + aux [] 0 + + let normalize v = split_version_string v |> List.map version_segment_of_string + let compare_version_strings s1 s2 = (* Compare versions or releases of RPM packages - * I.E. for "libpath-utils-0.2.1-29.el7.x86_64" and "libpath-utils-0.2.1a-30.el7.x86_64", + * I.E. for "libpath-utils-0.2.1-29.el7.x86_64" and + * "libpath-utils-0.2.1a-30.el7.x86_64", * this function compares: * versions between "0.2.1" and "0.2.1a", or * releases between "29.el7" and "30.el7". @@ -180,50 +225,14 @@ module Pkg = struct * "1.0" ">" "1.xs2" * "1.0_xs" "=" "1.0.xs" *) - let normalize v = - let split_letters_and_numbers s = - let r = Re.Posix.compile_pat {|^([^0-9]+)([0-9]+)$|} in - match Re.exec_opt r s with - | Some groups -> - [Re.Group.get groups 1; Re.Group.get groups 2] - | None -> - [s] - in - let number = Re.Posix.compile_pat "^[0-9]+$" in - v - |> Astring.String.cuts ~sep:"." - |> List.concat_map (fun s -> Astring.String.cuts ~sep:"_" s) - |> List.concat_map (fun s -> split_letters_and_numbers s) - |> List.map (fun s -> - if Re.execp number s then - match int_of_string s with i -> Int i | exception _ -> Str s - else - Str s - ) - in let rec compare_segments l1 l2 = match (l1, l2) with | c1 :: t1, c2 :: t2 -> ( - match (c1, c2) with - | Int s1, Int s2 -> - if s1 > s2 then - GT - else if s1 = s2 then - compare_segments t1 t2 - else - LT - | Int _, Str _ -> - GT - | Str _, Int _ -> - LT - | Str s1, Str s2 -> - let r = String.compare s1 s2 in - if r < 0 then - LT - else if r > 0 then - GT - else - compare_segments t1 t2 + match compare_version_segment c1 c2 with + | EQ -> + compare_segments t1 t2 + | r -> + r ) | _ :: _, [] -> GT From 5609e49cad79a892350aff5fba7775cf672c953b Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 30 Oct 2024 15:15:26 +0000 Subject: [PATCH 041/121] Do not include xapi-clusterd.service in toolstack.target This service is not enabled and started by default, but used on-demand whenever it is needed for clustering. The current Wants= option in the toolstack.target is causing xapi-clusterd.service to be started by xe-toolstack-restart even if it is not enabled. The fix is to replace Wants= in toolstack.target with WantedBy= in xapi-clusterd.service, as the latter only installs the dependency when enabling the service. Signed-off-by: Rob Hoes --- scripts/toolstack.target | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/toolstack.target b/scripts/toolstack.target index c4019a4d232..c49701c2850 100644 --- a/scripts/toolstack.target +++ b/scripts/toolstack.target @@ -20,7 +20,6 @@ Wants=xcp-networkd.service Wants=xenopsd-xc.service Wants=squeezed.service Wants=xapi-storage-script.service -Wants=xapi-clusterd.service Wants=varstored-guard.service [Install] From c0482fefd643494d7bfa8fd4702352fb17c1a46e Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Wed, 30 Oct 2024 17:28:18 +0000 Subject: [PATCH 042/121] CA-401324: Update pvsproxy socket location A previous commit changed the socket location to /run/pvsproxy but this is problematic because the pvsproxy daemon runs as a deprivileged user and cannot create the socket. Instead, update the path to a location that the daemon has permission to create. Add a fallback to the original path to cope with older pvsproxy daemons. This fallback can be removed in the future. Co-developed-by: Pau Ruiz Safont Signed-off-by: Ross Lagerwall --- ocaml/networkd/bin/network_server.ml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/ocaml/networkd/bin/network_server.ml b/ocaml/networkd/bin/network_server.ml index 289ef665932..b398ca93b8c 100644 --- a/ocaml/networkd/bin/network_server.ml +++ b/ocaml/networkd/bin/network_server.ml @@ -1474,10 +1474,21 @@ end module PVS_proxy = struct open S.PVS_proxy - let path = ref "/run/pvsproxy" + let path = ref "" + + let depriv_path = "/run/pvsproxy-state/socket" + + let legacy_path = "/opt/citrix/pvsproxy/socket/pvsproxy" + + let default_path () = + if Sys.file_exists depriv_path then + depriv_path + else + legacy_path let do_call call = - try Jsonrpc_client.with_rpc ~path:!path ~call () + let p = match !path with "" -> default_path () | path -> path in + try Jsonrpc_client.with_rpc ~path:p ~call () with e -> error "Error when calling PVS proxy: %s" (Printexc.to_string e) ; raise (Network_error PVS_proxy_connection_error) From a6187d53b7ce8b57c3304daf7961511c90f6d107 Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Wed, 30 Oct 2024 19:34:09 +0800 Subject: [PATCH 043/121] CA-400560: Support tilde in RPM version/release comparison Tilde `~` used in RPM version stands for pre-release. So version with `~` is earlier than the same version without `~`. For example: 1.2.3~beta < 1.2.3 1.xs8 > 1.xs8~2_1 Signed-off-by: Changlei Li --- ocaml/tests/test_rpm.ml | 22 +++++++++++++++ ocaml/xapi/rpm.ml | 61 ++++++++++++++++++++--------------------- 2 files changed, 52 insertions(+), 31 deletions(-) diff --git a/ocaml/tests/test_rpm.ml b/ocaml/tests/test_rpm.ml index 5b80215521d..983d9b7398e 100644 --- a/ocaml/tests/test_rpm.ml +++ b/ocaml/tests/test_rpm.ml @@ -130,6 +130,19 @@ module PkgOfFullnameTest = Generic.MakeStateless (struct } ) ) + ; ( Io.Line "libpath-utils-2:0.2.1~rc1-29.xs8~2_1.x86_64" + , Ok + (Some + Pkg. + { + name= "libpath-utils" + ; epoch= Some 2 + ; version= "0.2.1~rc1" + ; release= "29.xs8~2_1" + ; arch= "x86_64" + } + ) + ) ; (Io.Line "libpath-utils-:0.2.1-29.el7.x86_64", Ok None) ; (Io.Line "libpath-utils-2:0.2.1-29.el7x86_64", Ok None) ; (* all RPM packages installed by default *) @@ -171,6 +184,15 @@ module PkgCompareVersionStringsTest = Generic.MakeStateless (struct ; (("1.0_xs", "1.0.xs"), "=") ; (("1.0x3", "1.0x04"), "<") ; (("1.0O3", "1.0O04"), "<") + ; (("1.2.3", "1.2.3~rc1"), ">") + ; (("1.2.3~rc1", "1.2.3~rc2"), "<") + ; (("1.2.3~rc1", "1.2.3~rc1"), "=") + ; (("1.2.3~rc1", "1.2.3~rc1.1"), "<") + ; (("1.2.3~rc1.1", "1.2.3~rc1.2"), "<") + ; (("1.2.3~rc1.1", "1.2.3~rc1_1"), "=") + ; (("1.2.3.xs8", "1.2.3.xs8~2_1"), ">") + ; (("1.2.3.xs8~2_1", "1.2.3.xs8~2_1~beta"), ">") + ; (("1.2.3.xs8~", "1.2.3.xs8"), "<") ] end) diff --git a/ocaml/xapi/rpm.ml b/ocaml/xapi/rpm.ml index 18d4fa627b9..c9823170ae6 100644 --- a/ocaml/xapi/rpm.ml +++ b/ocaml/xapi/rpm.ml @@ -52,24 +52,12 @@ module Pkg = struct type order = LT | EQ | GT - type version_segment = Int of int | Str of string + type version_segment = Int of int | Str of string | Tilde let string_of_order = function LT -> "<" | EQ -> "=" | GT -> ">" let order_of_int = function 0 -> EQ | r when r > 0 -> GT | _ -> LT - let version_segment_of_string s = - let is_all_number str = - let r = Re.Posix.compile_pat {|^[0-9]+$|} in - Re.execp r str - in - match s with - | _ when is_all_number s -> ( - try Int (int_of_string s) with _ -> Str s - ) - | _ -> - Str s - let error_msg = Printf.sprintf "Failed to parse '%s'" let parse_epoch_version_release epoch_ver_rel = @@ -177,29 +165,30 @@ module Pkg = struct Int.compare i1 i2 |> order_of_int | Str s1, Str s2 -> String.compare s1 s2 |> order_of_int + | Tilde, Tilde -> + EQ | Int _, Str _ -> GT | Str _, Int _ -> LT + | Tilde, _ -> + LT + | _, Tilde -> + GT - let split_version_string s = - let r = Re.Posix.compile_pat {|([0-9]+|[a-zA-Z]+|~)|} in - let len = String.length s in - let rec aux acc pos = - if pos >= len then - List.rev acc - else - match Re.exec_opt ~pos r s with - | Some groups -> - let matched = Re.Group.get groups 0 in - let next_pos = Re.Group.stop groups 0 in - aux (matched :: acc) next_pos - | None -> - List.rev acc - in - aux [] 0 + let split_version_string = + let r = Re.Posix.compile_pat {|[a-zA-Z]+|[0-9]+|~|} in + fun s -> s |> Re.all r |> List.map (fun g -> Re.Group.get g 0) - let normalize v = split_version_string v |> List.map version_segment_of_string + let normalize v = + let version_segment_of_string = function + | "~" -> + Tilde + | s -> ( + try Int (int_of_string s) with _ -> Str s + ) + in + v |> split_version_string |> List.map version_segment_of_string let compare_version_strings s1 s2 = (* Compare versions or releases of RPM packages @@ -218,12 +207,18 @@ module Pkg = struct * "1.0" ">" "1.a" * "2.50" ">" "2.5" * "XS3" "<" "xs2" - * "1.2.3" ">" "1.2.3a" + * "1.2.3" "<" "1.2.3a" * "xs4" "=" "xs.4" * "2a" "<" "2.0" * "2a" "<" "2b" * "1.0" ">" "1.xs2" * "1.0_xs" "=" "1.0.xs" + * "1.xs8" ">" "1.xs8~2_1" + * "1.2.3" ">" "1.2.3~beta" + * Some corner cases that don't follow standard RPM versioning conventions + * with tilde: + * "1.2.3~rc1~beta" "<" "1.2.3~rc1" + * "1.2.3~" "<" "1.2.3" *) let rec compare_segments l1 l2 = match (l1, l2) with @@ -234,6 +229,10 @@ module Pkg = struct | r -> r ) + | Tilde :: _, [] -> + LT + | [], Tilde :: _ -> + GT | _ :: _, [] -> GT | [], _ :: _ -> From e376e96fb53565221bccb97dca50887a08777394 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Mon, 4 Nov 2024 09:36:31 +0000 Subject: [PATCH 044/121] CA-401404: Only check previous active service status `systemctl list-dependencies --plain --no-pager` list uncessary xapi-clusterd service when xapi-clusterd-shutdown is started. Here instead of checking the status of all dependencies, we only check the status of previous enabled dependencies. This also complies with the behavior before toolstack.target Signed-off-by: Lin Liu --- scripts/xe-toolstack-restart | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/xe-toolstack-restart b/scripts/xe-toolstack-restart index d377ae7acbd..55e82e8f3d8 100755 --- a/scripts/xe-toolstack-restart +++ b/scripts/xe-toolstack-restart @@ -43,7 +43,11 @@ set -e systemctl restart $MPATHALERT toolstack.target # Check the status of toolstack services -for service in $(systemctl list-dependencies --plain --no-pager toolstack.target); do +for service in $(systemctl list-dependencies --plain --no-pager toolstack.target) $MPATHALERT; do + + # Skip check if the service is not enabled + systemctl is-enabled "$service" >/dev/null 2>&1 || continue + # During system bootup, xcp-rrdd-dcmi.service often fail as # `ipmitool dcmi discover` discover nothing, just ignore it for now if [ "$service" == "xcp-rrdd-dcmi.service" ]; then From 0149ee5c2433b8995d8b75f28c2037c9da4cba80 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 29 Oct 2024 15:28:13 +0000 Subject: [PATCH 045/121] CA-401242: avoid long-running, idle connections on VDI.pool_migrate When a VDI.pool_migrate starts at a pool member, a connection between the coordinator and that host remains open for the duration of the migration. This connection is completely idle. If the migration lasts for more than 12 hours, stunnel closes the connection due to inactivity, which cancels the migration. To avoid this use an internal API that uses short-running connection whenever possible to avoid interrupting the migration. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/message_forwarding.ml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 17ff3de0261..cb0b82aa7fd 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -5501,14 +5501,22 @@ functor in (snapshot, host) in + let op session_id rpc = + let sync_op () = + Client.VDI.pool_migrate ~rpc ~session_id ~vdi ~sr ~options + in + let async_op () = + Client.InternalAsync.VDI.pool_migrate ~rpc ~session_id ~vdi ~sr + ~options + in + Helpers.try_internal_async ~__context API.ref_VDI_of_rpc async_op + sync_op + in VM.reserve_memory_for_vm ~__context ~vm ~host ~snapshot ~host_op:`vm_migrate (fun () -> with_sr_andor_vdi ~__context ~vdi:(vdi, `mirror) ~doc:"VDI.mirror" (fun () -> - do_op_on ~local_fn ~__context ~host (fun session_id rpc -> - Client.VDI.pool_migrate ~rpc ~session_id ~vdi ~sr - ~options - ) + do_op_on ~local_fn ~__context ~host op ) ) ) From 3ca39ecb584662320c8185dc84af7649fe2b6fb3 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 30 Oct 2024 14:31:53 +0000 Subject: [PATCH 046/121] xapi_vdi: replaces nested if-elses with monadic Result This allows to reduce most of the indentation in check_operation_error, which returns searches for a single error and returns it. Signed-off-by: Pau Ruiz Safont --- ocaml/tests/test_vdi_allowed_operations.ml | 79 +-- ocaml/xapi/xapi_vdi.ml | 782 ++++++++++----------- ocaml/xapi/xapi_vdi.mli | 2 +- 3 files changed, 422 insertions(+), 441 deletions(-) diff --git a/ocaml/tests/test_vdi_allowed_operations.ml b/ocaml/tests/test_vdi_allowed_operations.ml index 579cf7331c8..877b4fa48e5 100644 --- a/ocaml/tests/test_vdi_allowed_operations.ml +++ b/ocaml/tests/test_vdi_allowed_operations.ml @@ -30,9 +30,8 @@ let setup_test ~__context ?sm_fun ?vdi_fun () = (vdi_ref, vdi_record) let check_same_error_code = - let open Alcotest in - let open Alcotest_comparators in - check (option error_code) "Same error code" + Alcotest.(check @@ result unit Alcotest_comparators.error_code) + "Same error code" let run_assert_equal_with_vdi ~__context ?(ha_enabled = false) ?sm_fun ?vdi_fun op exc = @@ -52,7 +51,7 @@ let test_ca98944 () = () ) `update - (Some (Api_errors.vdi_in_use, [])) ; + (Error (Api_errors.vdi_in_use, [])) ; (* Should raise vdi_in_use *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> @@ -61,7 +60,7 @@ let test_ca98944 () = () ) `update - (Some (Api_errors.vdi_in_use, [])) ; + (Error (Api_errors.vdi_in_use, [])) ; (* Should raise vdi_in_use *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> @@ -70,7 +69,7 @@ let test_ca98944 () = () ) `update - (Some (Api_errors.vdi_in_use, [])) ; + (Error (Api_errors.vdi_in_use, [])) ; (* Should raise other_operation_in_progress *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> @@ -79,14 +78,14 @@ let test_ca98944 () = () ) `update - (Some (Api_errors.other_operation_in_progress, [])) ; + (Error (Api_errors.other_operation_in_progress, [])) ; (* Should pass *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> make_vbd ~vDI:vdi_ref ~__context ~reserved:false ~currently_attached:false ~current_operations:[] () ) - `forget None + `forget (Ok ()) (* VDI.copy should be allowed if all attached VBDs are read-only. *) let test_ca101669 () = @@ -97,15 +96,15 @@ let test_ca101669 () = make_vbd ~__context ~vDI:vdi_ref ~currently_attached:true ~mode:`RW () ) `copy - (Some (Api_errors.vdi_in_use, [])) ; + (Error (Api_errors.vdi_in_use, [])) ; (* Attempting to copy a RO-attached VDI should pass. *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> make_vbd ~__context ~vDI:vdi_ref ~currently_attached:true ~mode:`RO () ) - `copy None ; + `copy (Ok ()) ; (* Attempting to copy an unattached VDI should pass. *) - run_assert_equal_with_vdi ~__context `copy None ; + run_assert_equal_with_vdi ~__context `copy (Ok ()) ; (* Attempting to copy RW- and RO-attached VDIs should fail with VDI_IN_USE. *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> @@ -115,7 +114,7 @@ let test_ca101669 () = make_vbd ~__context ~vDI:vdi_ref ~currently_attached:true ~mode:`RO () ) `copy - (Some (Api_errors.vdi_in_use, [])) + (Error (Api_errors.vdi_in_use, [])) let test_ca125187 () = let __context = Test_common.make_test_database () in @@ -128,7 +127,7 @@ let test_ca125187 () = Db.VDI.set_current_operations ~__context ~self:vdi_ref ~value:[("mytask", `copy)] ) - `copy None ; + `copy (Ok ()) ; (* A VBD can be plugged to a VDI which is being copied. This is required as * the VBD is plugged after the VDI is marked with the copy operation. *) let _, _ = @@ -162,7 +161,7 @@ let test_ca126097 () = Db.VDI.set_current_operations ~__context ~self:vdi_ref ~value:[("mytask", `copy)] ) - `clone None ; + `clone (Ok ()) ; (* Attempting to snapshot a VDI being copied should be allowed. *) run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi_ref -> @@ -173,7 +172,7 @@ let test_ca126097 () = ~value:[("mytask", `copy)] ) `snapshot - (Some (Api_errors.operation_not_allowed, [])) + (Error (Api_errors.operation_not_allowed, [])) (** Tests for the checks related to changed block tracking *) let test_cbt = @@ -189,7 +188,7 @@ let test_cbt = Db.SM.remove_from_features ~__context ~self:sm ~key:"VDI_CONFIG_CBT" ) op - (Some (Api_errors.sr_operation_not_supported, [])) + (Error (Api_errors.sr_operation_not_supported, [])) in let test_sm_feature_check = for_vdi_operations all_cbt_operations test_sm_feature_check @@ -202,7 +201,7 @@ let test_cbt = Db.VDI.set_is_a_snapshot ~__context ~self:vdi ~value:true ) op - (Some (Api_errors.operation_not_allowed, [])) + (Error (Api_errors.operation_not_allowed, [])) ) in let test_cbt_enable_disable_vdi_type_check = @@ -213,21 +212,21 @@ let test_cbt = Db.VDI.set_type ~__context ~self:vdi ~value:`metadata ) op - (Some (Api_errors.vdi_incompatible_type, [])) ; + (Error (Api_errors.vdi_incompatible_type, [])) ; run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi -> Db.VDI.set_type ~__context ~self:vdi ~value:`redo_log ) op - (Some (Api_errors.vdi_incompatible_type, [])) ; + (Error (Api_errors.vdi_incompatible_type, [])) ; run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi -> Db.VDI.set_type ~__context ~self:vdi ~value:`user) - op None ; + op (Ok ()) ; run_assert_equal_with_vdi ~__context ~vdi_fun:(fun vdi -> Db.VDI.set_type ~__context ~self:vdi ~value:`system ) - op None + op (Ok ()) ) in let test_cbt_enable_disable_not_allowed_for_reset_on_boot = @@ -238,7 +237,7 @@ let test_cbt = Db.VDI.set_on_boot ~__context ~self:vdi ~value:`reset ) op - (Some (Api_errors.vdi_on_boot_mode_incompatible_with_operation, [])) + (Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, [])) ) in let test_cbt_enable_disable_can_be_performed_live = @@ -249,7 +248,7 @@ let test_cbt = Test_common.make_vbd ~__context ~vDI:vdi ~currently_attached:true ~mode:`RW () ) - op None + op (Ok ()) ) in let test_cbt_metadata_vdi_type_check = @@ -273,7 +272,7 @@ let test_cbt = Db.VDI.set_type ~__context ~self:vdi ~value:`cbt_metadata ) op - (Some (Api_errors.vdi_incompatible_type, [])) + (Error (Api_errors.vdi_incompatible_type, [])) ) in let test_vdi_cbt_enabled_check = @@ -288,7 +287,7 @@ let test_cbt = Db.VDI.set_cbt_enabled ~__context ~self:vdi ~value:true ) op - (Some (Api_errors.vdi_cbt_enabled, [])) + (Error (Api_errors.vdi_cbt_enabled, [])) ) in let test_vdi_data_destroy () = @@ -308,31 +307,31 @@ let test_cbt = ) (* ensure VDI.data_destroy works before introducing errors *) [ - ((fun vdi -> pass_data_destroy vdi), None) + ((fun vdi -> pass_data_destroy vdi), Ok ()) ; ( (fun vdi -> pass_data_destroy vdi ; Db.VDI.set_is_a_snapshot ~__context ~self:vdi ~value:false ) - , Some (Api_errors.operation_not_allowed, []) + , Error (Api_errors.operation_not_allowed, []) ) ; ( (fun vdi -> pass_data_destroy vdi ; let sr = Db.VDI.get_SR ~__context ~self:vdi in Db.SR.set_is_tools_sr ~__context ~self:sr ~value:true ) - , Some (Api_errors.sr_operation_not_supported, []) + , Error (Api_errors.sr_operation_not_supported, []) ) ; ( (fun vdi -> pass_data_destroy vdi ; Db.VDI.set_cbt_enabled ~__context ~self:vdi ~value:false ) - , Some (Api_errors.vdi_no_cbt_metadata, []) + , Error (Api_errors.vdi_no_cbt_metadata, []) ) ; ( (fun vdi -> pass_data_destroy vdi ; Db.VDI.set_type ~__context ~self:vdi ~value:`cbt_metadata ) - , None + , Ok () ) ; (* VDI.data_destroy should wait a bit for the VDIs to be unplugged and destroyed, instead of failing immediately in check_operation_error, @@ -346,7 +345,7 @@ let test_cbt = in pass_data_destroy vdi ) - , None + , Ok () ) ; ( (fun vdi -> (* Set up the fields corresponding to a VM snapshot *) @@ -359,7 +358,7 @@ let test_cbt = in pass_data_destroy vdi ) - , None + , Ok () ) ; ( (fun vdi -> let vM = Test_common.make_vm ~__context () in @@ -369,7 +368,7 @@ let test_cbt = in pass_data_destroy vdi ) - , None + , Ok () ) ] in @@ -389,7 +388,7 @@ let test_cbt = Db.VDI.set_cbt_enabled ~__context ~self:vDI ~value:true ; Db.VDI.set_is_a_snapshot ~__context ~self:vDI ~value:true ) - , None + , Ok () ) in List.iter @@ -407,17 +406,17 @@ let test_cbt = in () ) - , Some (Api_errors.vdi_in_use, []) + , Error (Api_errors.vdi_in_use, []) ) ; (* positive test checks no errors thrown for cbt_metadata or cbt_enabled VDIs *) ( (fun vDI -> Db.VDI.set_cbt_enabled ~__context ~self:vDI ~value:true ; Db.VDI.set_type ~__context ~self:vDI ~value:`cbt_metadata ) - , None + , Ok () ) ; ( (fun vDI -> Db.VDI.set_cbt_enabled ~__context ~self:vDI ~value:true) - , None + , Ok () ) ; test_cbt_enabled_snapshot_vdi_linked_to_vm_snapshot ~vbd_currently_attached:false @@ -467,14 +466,14 @@ let test_operations_restricted_during_rpu = Db.SM.set_features ~__context ~self:sm ~value:[("VDI_MIRROR", 1L)] ) `mirror - (Some (Api_errors.not_supported_during_upgrade, [])) ; + (Error (Api_errors.not_supported_during_upgrade, [])) ; Db.Pool.remove_from_other_config ~__context ~self:pool ~key:Xapi_globs.rolling_upgrade_in_progress ; run_assert_equal_with_vdi ~__context ~sm_fun:(fun sm -> Db.SM.set_features ~__context ~self:sm ~value:[("VDI_MIRROR", 1L)] ) - `mirror None + `mirror (Ok ()) in let test_update_allowed_operations () = let __context = Mock.make_context_with_new_db "Mock context" in @@ -523,7 +522,7 @@ let test_null_vm = () in (* This shouldn't throw an exception *) - let (_ : _ option) = + let (_ : _ result) = Xapi_vdi.check_operation_error ~__context false vdi_record vdi_ref op in () diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index ab8c543a36a..a2978de0b7f 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -22,49 +22,49 @@ open D (**************************************************************************************) (* current/allowed operations checking *) +let feature_of_op = + let open Smint in + function + | `forget | `copy | `force_unlock | `blocked -> + None + | `snapshot -> + Some Vdi_snapshot + | `destroy -> + Some Vdi_delete + | `resize -> + Some Vdi_resize + | `update -> + Some Vdi_update + | `resize_online -> + Some Vdi_resize_online + | `generate_config -> + Some Vdi_generate_config + | `clone -> + Some Vdi_clone + | `mirror -> + Some Vdi_mirror + | `enable_cbt | `disable_cbt | `data_destroy | `list_changed_blocks -> + Some Vdi_configure_cbt + | `set_on_boot -> + Some Vdi_reset_on_boot + let check_sm_feature_error (op : API.vdi_operations) sm_features sr = - let required_sm_feature = - Smint.( - match op with - | `forget | `copy | `force_unlock | `blocked -> - None - | `snapshot -> - Some Vdi_snapshot - | `destroy -> - Some Vdi_delete - | `resize -> - Some Vdi_resize - | `update -> - Some Vdi_update - | `resize_online -> - Some Vdi_resize_online - | `generate_config -> - Some Vdi_generate_config - | `clone -> - Some Vdi_clone - | `mirror -> - Some Vdi_mirror - | `enable_cbt | `disable_cbt | `data_destroy | `list_changed_blocks -> - Some Vdi_configure_cbt - | `set_on_boot -> - Some Vdi_reset_on_boot - ) - in - match required_sm_feature with + match feature_of_op op with | None -> - None + Ok () | Some feature -> if Smint.(has_capability feature sm_features) then - None + Ok () else - Some (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) + Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) -(** Checks to see if an operation is valid in this state. Returns [Some exception] - if not and [None] if everything is ok. If the [vbd_records] parameter is +(** Checks to see if an operation is valid in this state. Returns [Error exception] + if not and [Ok ()] if everything is ok. If the [vbd_records] parameter is specified, it should contain at least all the VBD records from the database that are linked to this VDI. *) let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) ?vbd_records ha_enabled record _ref' op = + let ( let* ) = Result.bind in let _ref = Ref.string_of _ref' in let current_ops = record.Db_actions.vDI_current_operations in let reset_on_boot = record.Db_actions.vDI_on_boot = `reset in @@ -83,14 +83,18 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) 5. HA prevents you from deleting statefiles or metadata volumes 6. During rolling pool upgrade, only operations known by older releases are allowed *) - if - Helpers.rolling_upgrade_in_progress ~__context - && not (List.mem op Xapi_globs.rpu_allowed_vdi_operations) - then - Some (Api_errors.not_supported_during_upgrade, []) - else - (* Don't fail with other_operation_in_progress if VDI mirroring is in progress - * and destroy is called as part of VDI mirroring *) + let* () = + if + Helpers.rolling_upgrade_in_progress ~__context + && not (List.mem op Xapi_globs.rpu_allowed_vdi_operations) + then + Error (Api_errors.not_supported_during_upgrade, []) + else + Ok () + in + let* () = + (* Don't fail with other_operation_in_progress if VDI mirroring is in + progress and destroy is called as part of VDI mirroring *) let is_vdi_mirroring_in_progress = List.exists (fun (_, op) -> op = `mirror) current_ops && op = `destroy in @@ -98,373 +102,351 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) List.exists (fun (_, op) -> op <> `copy) current_ops && not is_vdi_mirroring_in_progress then - Some (Api_errors.other_operation_in_progress, ["VDI"; _ref]) - else (* check to see whether it's a local cd drive *) - let sr = record.Db_actions.vDI_SR in - let sr_type = Db.SR.get_type ~__context ~self:sr in - let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in - (* Check to see if any PBDs are attached *) - let open Xapi_database.Db_filter_types in - let pbds_attached = - match pbd_records with - | [] -> - Db.PBD.get_records_where ~__context - ~expr: - (And - ( Eq (Field "SR", Literal (Ref.string_of sr)) - , Eq (Field "currently_attached", Literal "true") - ) - ) - | _ -> - List.filter - (fun (_, pbd_record) -> - pbd_record.API.pBD_SR = sr - && pbd_record.API.pBD_currently_attached - ) - pbd_records - in - if pbds_attached = [] && List.mem op [`resize] then - Some (Api_errors.sr_no_pbds, [Ref.string_of sr]) - else - (* check to see whether VBDs exist which are using this VDI *) - - (* Only a 'live' operation can be performed if there are active (even RO) devices *) - let my_active_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Or - ( Eq (Field "currently_attached", Literal "true") - , Eq (Field "reserved", Literal "true") - ) - ) - ) - ) - | Some records -> - List.map snd - (List.filter - (fun (_, vbd_record) -> - vbd_record.Db_actions.vBD_VDI = _ref' - && (vbd_record.Db_actions.vBD_currently_attached - || vbd_record.Db_actions.vBD_reserved - ) - ) - records - ) - in - let my_active_rw_vbd_records = - List.filter - (fun vbd -> vbd.Db_actions.vBD_mode = `RW) - my_active_vbd_records - in - (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) - let my_has_current_operation_vbd_records = - match vbd_records with - | None -> - List.map snd - (Db.VBD.get_internal_records_where ~__context - ~expr: - (And - ( Eq (Field "VDI", Literal _ref) - , Not (Eq (Field "current_operations", Literal "()")) - ) - ) - ) - | Some records -> - List.map snd - (List.filter - (fun (_, vbd_record) -> - vbd_record.Db_actions.vBD_VDI = _ref' - && vbd_record.Db_actions.vBD_current_operations <> [] - ) - records - ) - in - (* If the VBD is currently_attached then some operations can still be performed ie: - VDI.clone (if the VM is suspended we have to have the 'allow_clone_suspended_vm'' flag) - VDI.snapshot; VDI.resize_online; 'blocked' (CP-831) - VDI.data_destroy: it is not allowed on VDIs linked to a VM, but the - implementation first waits for the VDI's VBDs to be unplugged and - destroyed, and the checks are performed there. - *) - let operation_can_be_performed_live = - match op with - | `snapshot - | `resize_online - | `blocked - | `clone - | `mirror - | `enable_cbt - | `disable_cbt - | `data_destroy -> - true - | _ -> - false - in - let operation_can_be_performed_with_ro_attach = - operation_can_be_performed_live - || match op with `copy -> true | _ -> false - in - (* NB RO vs RW sharing checks are done in xapi_vbd.ml *) - let blocked_by_attach = - let blocked_by_attach = - if operation_can_be_performed_live then - false - else if operation_can_be_performed_with_ro_attach then - my_active_rw_vbd_records <> [] - else - my_active_vbd_records <> [] - in - let allow_attached_vbds = - (* We use Valid_ref_list.list to ignore exceptions due to invalid references that - could propagate to the message forwarding layer, which calls this - function to check for errors - these exceptions would prevent the - actual XenAPI function from being run. Checks called from the - message forwarding layer should not fail with an exception. *) - let true_for_all_active_vbds f = - Valid_ref_list.for_all f my_active_vbd_records - in - match op with - | `list_changed_blocks -> - let vbd_connected_to_vm_snapshot vbd = - let vm = vbd.Db_actions.vBD_VM in - Db.is_valid_ref __context vm - && Db.VM.get_is_a_snapshot ~__context ~self:vm - in - (* We allow list_changed_blocks on VDIs attached to snapshot VMs, - because VM.checkpoint may set the currently_attached fields of the - snapshot's VBDs to true, and this would block list_changed_blocks. *) - true_for_all_active_vbds vbd_connected_to_vm_snapshot - | _ -> - false - in - blocked_by_attach && not allow_attached_vbds - in - if blocked_by_attach then - Some - ( Api_errors.vdi_in_use - , [_ref; Record_util.vdi_operations_to_string op] + Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) + else + Ok () + in + (* check to see whether it's a local cd drive *) + let sr = record.Db_actions.vDI_SR in + let sr_type = Db.SR.get_type ~__context ~self:sr in + let is_tools_sr = Db.SR.get_is_tools_sr ~__context ~self:sr in + (* Check to see if any PBDs are attached *) + let open Xapi_database.Db_filter_types in + let pbds_attached = + match pbd_records with + | [] -> + Db.PBD.get_records_where ~__context + ~expr: + (And + ( Eq (Field "SR", Literal (Ref.string_of sr)) + , Eq (Field "currently_attached", Literal "true") + ) ) - else if - (* data_destroy first waits for all the VBDs to disappear in its - implementation, so it is harmless to allow it when any of the VDI's - VBDs have operations in progress. This ensures that we avoid the retry - mechanism of message forwarding and only use the event loop. *) - my_has_current_operation_vbd_records <> [] && op <> `data_destroy - then - Some (Api_errors.other_operation_in_progress, ["VDI"; _ref]) - else - let sm_features = - Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type - in - let sm_feature_error = check_sm_feature_error op sm_features sr in - if sm_feature_error <> None then - sm_feature_error - else - let allowed_for_cbt_metadata_vdi = - match op with - | `clone - | `copy - | `disable_cbt - | `enable_cbt - | `mirror - | `resize - | `resize_online - | `snapshot - | `set_on_boot -> - false - | `blocked - | `data_destroy - | `destroy - | `list_changed_blocks - | `force_unlock - | `forget - | `generate_config - | `update -> - true - in - if - (not allowed_for_cbt_metadata_vdi) - && record.Db_actions.vDI_type = `cbt_metadata - then - Some - ( Api_errors.vdi_incompatible_type - , [_ref; Record_util.vdi_type_to_string `cbt_metadata] - ) - else - let allowed_when_cbt_enabled = - match op with - | `mirror | `set_on_boot -> - false - | `blocked - | `clone - | `copy - | `data_destroy - | `destroy - | `disable_cbt - | `enable_cbt - | `list_changed_blocks - | `force_unlock - | `forget - | `generate_config - | `resize - | `resize_online - | `snapshot - | `update -> - true - in - if - (not allowed_when_cbt_enabled) - && record.Db_actions.vDI_cbt_enabled - then - Some (Api_errors.vdi_cbt_enabled, [_ref]) - else - let check_destroy () = - if sr_type = "udev" then - Some (Api_errors.vdi_is_a_physical_device, [_ref]) - else if is_tools_sr then - Some - (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) - else if List.mem record.Db_actions.vDI_type [`rrd] then - Some (Api_errors.vdi_has_rrds, [_ref]) - else if - ha_enabled - && List.mem record.Db_actions.vDI_type - [`ha_statefile; `redo_log] - then - Some (Api_errors.ha_is_enabled, []) - else if - List.mem record.Db_actions.vDI_type - [`ha_statefile; `metadata] - && Xapi_pool_helpers.ha_enable_in_progress ~__context - then - Some (Api_errors.ha_enable_in_progress, []) - else if - List.mem record.Db_actions.vDI_type - [`ha_statefile; `metadata] - && Xapi_pool_helpers.ha_disable_in_progress ~__context - then - Some (Api_errors.ha_disable_in_progress, []) - else - None - in - match op with - | `forget -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type - [`ha_statefile; `redo_log] - then - Some (Api_errors.ha_is_enabled, []) - else if List.mem record.Db_actions.vDI_type [`rrd] then - Some (Api_errors.vdi_has_rrds, [_ref]) - else - None - | `destroy -> - check_destroy () - | `data_destroy -> - if not record.Db_actions.vDI_is_a_snapshot then - Some - ( Api_errors.operation_not_allowed - , ["VDI is not a snapshot: " ^ _ref] - ) - else if not record.Db_actions.vDI_cbt_enabled then - Some (Api_errors.vdi_no_cbt_metadata, [_ref]) - else - check_destroy () - | `resize -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type - [`ha_statefile; `redo_log] - then - Some (Api_errors.ha_is_enabled, []) - else - None - | `resize_online -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type - [`ha_statefile; `redo_log] - then - Some (Api_errors.ha_is_enabled, []) - else - None - | `snapshot when record.Db_actions.vDI_sharable -> - Some (Api_errors.vdi_is_sharable, [_ref]) - | (`snapshot | `copy) when reset_on_boot -> - Some - ( Api_errors.vdi_on_boot_mode_incompatible_with_operation - , [] + | _ -> + List.filter + (fun (_, pbd_record) -> + pbd_record.API.pBD_SR = sr && pbd_record.API.pBD_currently_attached + ) + pbd_records + in + let* () = + if pbds_attached = [] && List.mem op [`resize] then + Error (Api_errors.sr_no_pbds, [Ref.string_of sr]) + else + Ok () + in + + (* check to see whether VBDs exist which are using this VDI *) + + (* Only a 'live' operation can be performed if there are active (even RO) devices *) + let my_active_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Or + ( Eq (Field "currently_attached", Literal "true") + , Eq (Field "reserved", Literal "true") ) - | `snapshot -> - if List.exists (fun (_, op) -> op = `copy) current_ops then - Some - ( Api_errors.operation_not_allowed - , ["Snapshot operation not allowed during copy."] - ) - else - None - | `copy -> - if - List.mem record.Db_actions.vDI_type - [`ha_statefile; `redo_log] - then - Some - ( Api_errors.operation_not_allowed - , [ - "VDI containing HA statefile or redo log cannot be \ - copied (check the VDI's allowed operations)." - ] - ) - else - None - | `enable_cbt | `disable_cbt -> - if record.Db_actions.vDI_is_a_snapshot then - Some - ( Api_errors.operation_not_allowed - , ["VDI is a snapshot: " ^ _ref] - ) - else if - not (List.mem record.Db_actions.vDI_type [`user; `system]) - then - Some - ( Api_errors.vdi_incompatible_type - , [ - _ref - ; Record_util.vdi_type_to_string - record.Db_actions.vDI_type - ] - ) - else if reset_on_boot then - Some - ( Api_errors.vdi_on_boot_mode_incompatible_with_operation - , [] - ) - else - None - | `mirror - | `clone - | `generate_config - | `force_unlock - | `set_on_boot - | `list_changed_blocks - | `blocked - | `update -> - None + ) + ) + ) + | Some records -> + List.map snd + (List.filter + (fun (_, vbd_record) -> + vbd_record.Db_actions.vBD_VDI = _ref' + && (vbd_record.Db_actions.vBD_currently_attached + || vbd_record.Db_actions.vBD_reserved + ) + ) + records + ) + in + let my_active_rw_vbd_records = + List.filter (fun vbd -> vbd.Db_actions.vBD_mode = `RW) my_active_vbd_records + in + (* VBD operations (plug/unplug) (which should be transient) cause us to serialise *) + let my_has_current_operation_vbd_records = + match vbd_records with + | None -> + List.map snd + (Db.VBD.get_internal_records_where ~__context + ~expr: + (And + ( Eq (Field "VDI", Literal _ref) + , Not (Eq (Field "current_operations", Literal "()")) + ) + ) + ) + | Some records -> + List.map snd + (List.filter + (fun (_, vbd_record) -> + vbd_record.Db_actions.vBD_VDI = _ref' + && vbd_record.Db_actions.vBD_current_operations <> [] + ) + records + ) + in + (* If the VBD is currently_attached then some operations can still be + performed ie: VDI.clone (if the VM is suspended we have to have the + 'allow_clone_suspended_vm' flag); VDI.snapshot; VDI.resize_online; + 'blocked' (CP-831); VDI.data_destroy: it is not allowed on VDIs linked + to a VM, but the implementation first waits for the VDI's VBDs to be + unplugged and destroyed, and the checks are performed there. + *) + let operation_can_be_performed_live = + match op with + | `snapshot + | `resize_online + | `blocked + | `clone + | `mirror + | `enable_cbt + | `disable_cbt + | `data_destroy -> + true + | _ -> + false + in + let operation_can_be_performed_with_ro_attach = + operation_can_be_performed_live + || match op with `copy -> true | _ -> false + in + (* NB RO vs RW sharing checks are done in xapi_vbd.ml *) + let blocked_by_attach = + let blocked_by_attach = + if operation_can_be_performed_live then + false + else if operation_can_be_performed_with_ro_attach then + my_active_rw_vbd_records <> [] + else + my_active_vbd_records <> [] + in + let allow_attached_vbds = + (* We use Valid_ref_list.list to ignore exceptions due to invalid + references that could propagate to the message forwarding layer, which + calls this function to check for errors - these exceptions would + prevent the actual XenAPI function from being run. Checks called from + the message forwarding layer should not fail with an exception. *) + let true_for_all_active_vbds f = + Valid_ref_list.for_all f my_active_vbd_records + in + match op with + | `list_changed_blocks -> + let vbd_connected_to_vm_snapshot vbd = + let vm = vbd.Db_actions.vBD_VM in + Db.is_valid_ref __context vm + && Db.VM.get_is_a_snapshot ~__context ~self:vm + in + (* We allow list_changed_blocks on VDIs attached to snapshot VMs, + because VM.checkpoint may set the currently_attached fields of the + snapshot's VBDs to true, and this would block list_changed_blocks. *) + true_for_all_active_vbds vbd_connected_to_vm_snapshot + | _ -> + false + in + blocked_by_attach && not allow_attached_vbds + in + let* () = + if blocked_by_attach then + Error + (Api_errors.vdi_in_use, [_ref; Record_util.vdi_operations_to_string op]) + else if + (* data_destroy first waits for all the VBDs to disappear in its + implementation, so it is harmless to allow it when any of the VDI's + VBDs have operations in progress. This ensures that we avoid the retry + mechanism of message forwarding and only use the event loop. *) + my_has_current_operation_vbd_records <> [] && op <> `data_destroy + then + Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) + else + Ok () + in + let sm_features = + Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type + in + let* () = check_sm_feature_error op sm_features sr in + let allowed_for_cbt_metadata_vdi = + match op with + | `clone + | `copy + | `disable_cbt + | `enable_cbt + | `mirror + | `resize + | `resize_online + | `snapshot + | `set_on_boot -> + false + | `blocked + | `data_destroy + | `destroy + | `list_changed_blocks + | `force_unlock + | `forget + | `generate_config + | `update -> + true + in + let* () = + if + (not allowed_for_cbt_metadata_vdi) + && record.Db_actions.vDI_type = `cbt_metadata + then + Error + ( Api_errors.vdi_incompatible_type + , [_ref; Record_util.vdi_type_to_string `cbt_metadata] + ) + else + Ok () + in + let allowed_when_cbt_enabled = + match op with + | `mirror | `set_on_boot -> + false + | `blocked + | `clone + | `copy + | `data_destroy + | `destroy + | `disable_cbt + | `enable_cbt + | `list_changed_blocks + | `force_unlock + | `forget + | `generate_config + | `resize + | `resize_online + | `snapshot + | `update -> + true + in + let* () = + if (not allowed_when_cbt_enabled) && record.Db_actions.vDI_cbt_enabled then + Error (Api_errors.vdi_cbt_enabled, [_ref]) + else + Ok () + in + let check_destroy () = + if sr_type = "udev" then + Error (Api_errors.vdi_is_a_physical_device, [_ref]) + else if is_tools_sr then + Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) + else if List.mem record.Db_actions.vDI_type [`rrd] then + Error (Api_errors.vdi_has_rrds, [_ref]) + else if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else if + List.mem record.Db_actions.vDI_type [`ha_statefile; `metadata] + && Xapi_pool_helpers.ha_enable_in_progress ~__context + then + Error (Api_errors.ha_enable_in_progress, []) + else if + List.mem record.Db_actions.vDI_type [`ha_statefile; `metadata] + && Xapi_pool_helpers.ha_disable_in_progress ~__context + then + Error (Api_errors.ha_disable_in_progress, []) + else + Ok () + in + match op with + | `forget -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else if List.mem record.Db_actions.vDI_type [`rrd] then + Error (Api_errors.vdi_has_rrds, [_ref]) + else + Ok () + | `destroy -> + check_destroy () + | `data_destroy -> + if not record.Db_actions.vDI_is_a_snapshot then + Error + (Api_errors.operation_not_allowed, ["VDI is not a snapshot: " ^ _ref]) + else if not record.Db_actions.vDI_cbt_enabled then + Error (Api_errors.vdi_no_cbt_metadata, [_ref]) + else + check_destroy () + | `resize -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else + Ok () + | `resize_online -> + if + ha_enabled + && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + then + Error (Api_errors.ha_is_enabled, []) + else + Ok () + | `snapshot when record.Db_actions.vDI_sharable -> + Error (Api_errors.vdi_is_sharable, [_ref]) + | (`snapshot | `copy) when reset_on_boot -> + Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) + | `snapshot -> + if List.exists (fun (_, op) -> op = `copy) current_ops then + Error + ( Api_errors.operation_not_allowed + , ["Snapshot operation not allowed during copy."] + ) + else + Ok () + | `copy -> + if List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] then + Error + ( Api_errors.operation_not_allowed + , [ + "VDI containing HA statefile or redo log cannot be copied (check \ + the VDI's allowed operations)." + ] + ) + else + Ok () + | `enable_cbt | `disable_cbt -> + if record.Db_actions.vDI_is_a_snapshot then + Error (Api_errors.operation_not_allowed, ["VDI is a snapshot: " ^ _ref]) + else if not (List.mem record.Db_actions.vDI_type [`user; `system]) then + Error + ( Api_errors.vdi_incompatible_type + , [_ref; Record_util.vdi_type_to_string record.Db_actions.vDI_type] + ) + else if reset_on_boot then + Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) + else + Ok () + | `mirror + | `clone + | `generate_config + | `force_unlock + | `set_on_boot + | `list_changed_blocks + | `blocked + | `update -> + Ok () let assert_operation_valid ~__context ~self ~(op : API.vdi_operations) = let pool = Helpers.get_pool ~__context in let ha_enabled = Db.Pool.get_ha_enabled ~__context ~self:pool in let all = Db.VDI.get_record_internal ~__context ~self in match check_operation_error ~__context ha_enabled all self op with - | None -> + | Ok () -> () - | Some (a, b) -> + | Error (a, b) -> raise (Api_errors.Server_error (a, b)) let update_allowed_operations_internal ~__context ~self ~sr_records ~pbd_records @@ -501,7 +483,7 @@ let update_allowed_operations_internal ~__context ~self ~sr_records ~pbd_records check_operation_error ~__context ~sr_records ~pbd_records ?vbd_records ha_enabled all self x with - | None -> + | Ok () -> [x] | _ -> [] diff --git a/ocaml/xapi/xapi_vdi.mli b/ocaml/xapi/xapi_vdi.mli index 0731a5f6082..45569a12fde 100644 --- a/ocaml/xapi/xapi_vdi.mli +++ b/ocaml/xapi/xapi_vdi.mli @@ -28,7 +28,7 @@ val check_operation_error : -> Db_actions.vDI_t -> API.ref_VDI -> API.vdi_operations - -> (string * string list) option + -> (unit, string * string list) Result.t (** Checks to see if an operation is valid in this state. Returns Some exception if not and None if everything is ok. *) From e40b3fc95601abd99d16e2c405c1878467f446f7 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 31 Oct 2024 14:12:58 +0000 Subject: [PATCH 047/121] datamodel: Add all VDI operations to the SR operations variant There's no seeming reason these were missing, and they need to be added to be able to map the VDI operations to SR ones for better error messages Signed-off-by: Pau Ruiz Safont --- ocaml/idl/datamodel.ml | 7 +++++++ ocaml/idl/datamodel_common.ml | 2 +- ocaml/idl/schematest.ml | 2 +- ocaml/tests/record_util/old_record_util.ml | 15 +++++++++++++++ ocaml/xapi-cli-server/record_util.ml | 14 ++++++++++++++ 5 files changed, 38 insertions(+), 2 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index 5fb25cd26a0..e21369be258 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -4181,6 +4181,13 @@ module SR = struct , "Exporting a bitmap that shows the changed blocks between two VDIs" ) ; ("vdi_set_on_boot", "Setting the on_boot field of the VDI") + ; ("vdi_blocked", "Blocking other operations for a VDI") + ; ("vdi_copy", "Copying the VDI") + ; ("vdi_force_unlock", "Forcefully unlocking the VDI") + ; ("vdi_forget", "Forgetting about the VDI") + ; ("vdi_generate_config", "Generating the configuration of the VDI") + ; ("vdi_resize_online", "Resizing the VDI online") + ; ("vdi_update", "Refreshing the fields on the VDI") ; ("pbd_create", "Creating a PBD for this SR") ; ("pbd_destroy", "Destroying one of this SR's PBDs") ] diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index 3fb163cc961..a5fb8bd381a 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 783 +let schema_minor_vsn = 784 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 016a90960f3..595289dfd24 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "8fcd8892ec0c7d130b0da44c5fd3990b" +let last_known_schema_hash = "b427bac09aca4eabc9407738a9155326" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/tests/record_util/old_record_util.ml b/ocaml/tests/record_util/old_record_util.ml index c854f27f5aa..855a2b74b7e 100644 --- a/ocaml/tests/record_util/old_record_util.ml +++ b/ocaml/tests/record_util/old_record_util.ml @@ -341,6 +341,21 @@ let sr_operation_to_string : API.storage_operations -> string = function "PBD.create" | `pbd_destroy -> "PBD.destroy" + (* The following ones were added after the file got introduced *) + | `vdi_blocked -> + "VDI.blocked" + | `vdi_copy -> + "VDI.copy" + | `vdi_force_unlock -> + "VDI.force_unlock" + | `vdi_forget -> + "VDI.forget" + | `vdi_generate_config -> + "VDI.generate_config" + | `vdi_resize_online -> + "VDI.resize_online" + | `vdi_update -> + "VDI.update" let vbd_operation_to_string = function | `attach -> diff --git a/ocaml/xapi-cli-server/record_util.ml b/ocaml/xapi-cli-server/record_util.ml index a7a4dd2ec72..d28b6b5f763 100644 --- a/ocaml/xapi-cli-server/record_util.ml +++ b/ocaml/xapi-cli-server/record_util.ml @@ -160,6 +160,20 @@ let sr_operation_to_string : API.storage_operations -> string = function "VDI.data_destroy" | `vdi_list_changed_blocks -> "VDI.list_changed_blocks" + | `vdi_blocked -> + "VDI.blocked" + | `vdi_copy -> + "VDI.copy" + | `vdi_force_unlock -> + "VDI.force_unlock" + | `vdi_forget -> + "VDI.forget" + | `vdi_generate_config -> + "VDI.generate_config" + | `vdi_resize_online -> + "VDI.resize_online" + | `vdi_update -> + "VDI.update" | `pbd_create -> "PBD.create" | `pbd_destroy -> From 8b7cfd6f90e80588cb09f9db073695e1fe90f7c6 Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Wed, 6 Nov 2024 16:41:11 +0800 Subject: [PATCH 048/121] CA-401498: Fix test_systemd occasional timeout CI check "Run OCaml tests" on github failed occasionally. The cause is test_systemd timeout. Add sleep 1 between server start and client to fix it. Signed-off-by: Changlei Li --- ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t index bfa73c84c63..e3b19dbaff3 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/test/test_systemd.t @@ -10,6 +10,7 @@ $ ./test_systemd.exe --server & @systemd.socket READY=1 + $ sleep 1 $ ./test_systemd.exe --notify $ wait From 6a666827dc6caae0cffb645f8a8511be8e04ffee Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 5 Nov 2024 17:06:10 +0000 Subject: [PATCH 049/121] CA-399629: make daily-license-check aware of never The license field is not always a valid date, sometimes it contains a special value to signify there's no expiry date. Signed-off-by: Pau Ruiz Safont --- ocaml/license/daily_license_check.ml | 37 +++++++++++-------- .../tests/alerts/test_daily_license_check.ml | 1 + 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/ocaml/license/daily_license_check.ml b/ocaml/license/daily_license_check.ml index 3b6edecbb3e..9a376d0e591 100644 --- a/ocaml/license/daily_license_check.ml +++ b/ocaml/license/daily_license_check.ml @@ -9,28 +9,35 @@ let seconds_per_30_days = 30. *. seconds_per_day let days_to_expiry now expiry = (expiry /. seconds_per_day) -. (now /. seconds_per_day) +let get_expiry_date license = + List.assoc_opt "expiry" license + |> Fun.flip Option.bind (fun e -> if e = "never" then None else Some e) + |> Option.map Xapi_stdext_date.Date.of_iso8601 + |> Option.map Xapi_stdext_date.Date.to_unix_time + let get_hosts all_license_params threshold = List.fold_left (fun acc (name_label, license_params) -> - let expiry = List.assoc "expiry" license_params in - let expiry = Xapi_stdext_date.Date.(to_unix_time (of_iso8601 expiry)) in - if expiry < threshold then - name_label :: acc - else - acc + match get_expiry_date license_params with + | Some expiry when expiry < threshold -> + name_label :: acc + | _ -> + acc ) [] all_license_params let check_license now pool_license_state all_license_params = - let expiry = List.assoc "expiry" pool_license_state in - let expiry = Xapi_stdext_date.Date.(to_unix_time (of_iso8601 expiry)) in - let days = days_to_expiry now expiry in - if days <= 0. then - Expired (get_hosts all_license_params now) - else if days <= 30. then - Expiring (get_hosts all_license_params (now +. seconds_per_30_days)) - else - Good + match get_expiry_date pool_license_state with + | Some expiry -> + let days = days_to_expiry now expiry in + if days <= 0. then + Expired (get_hosts all_license_params now) + else if days <= 30. then + Expiring (get_hosts all_license_params (now +. seconds_per_30_days)) + else + Good + | None -> + Good let get_info_from_db rpc session_id = let pool = List.hd (XenAPI.Pool.get_all ~rpc ~session_id) in diff --git a/ocaml/tests/alerts/test_daily_license_check.ml b/ocaml/tests/alerts/test_daily_license_check.ml index 067d93288ce..025ad19ef8d 100644 --- a/ocaml/tests/alerts/test_daily_license_check.ml +++ b/ocaml/tests/alerts/test_daily_license_check.ml @@ -47,6 +47,7 @@ let expiry_samples = [ (([("expiry", "20170101T00:00:00Z")], []), Good) ; (([("expiry", "20160701T04:01:00Z")], []), Good) + ; (([("expiry", "never")], []), Good) ; (([("expiry", "20160701T04:00:00Z")], []), Expiring []) ; (([("expiry", "20160616T00:00:00Z")], []), Expiring []) ; (([("expiry", "20160601T04:00:01Z")], []), Expiring []) From b4f90280404abbc28e9920b1b40ab0c880aee8b3 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 30 Sep 2024 17:48:40 +0100 Subject: [PATCH 050/121] license_check: clean up interface Internalize the concept of expiration dates, including "never", and use Date to manage all the dates, instead of using unix time Signed-off-by: Pau Ruiz Safont --- ocaml/tests/test_pool_license.ml | 11 +---------- ocaml/xapi/license_check.ml | 27 +++++++++++++++++---------- ocaml/xapi/license_check.mli | 5 +++-- ocaml/xapi/xapi_pool.ml | 11 +---------- 4 files changed, 22 insertions(+), 32 deletions(-) diff --git a/ocaml/tests/test_pool_license.ml b/ocaml/tests/test_pool_license.ml index aad9a145c11..4e0f528e197 100644 --- a/ocaml/tests/test_pool_license.ml +++ b/ocaml/tests/test_pool_license.ml @@ -198,16 +198,7 @@ module PoolLicenseState = Generic.MakeStateful (struct Xapi_pool_license.get_lowest_edition_with_expiry ~__context ~hosts ~edition_to_int in - let pool_expiry = - match expiry with - | None -> - "never" - | Some date -> - if date = Date.of_unix_time License_check.never then - "never" - else - Date.to_rfc3339 date - in + let pool_expiry = License_check.serialize_expiry expiry in (pool_edition, pool_expiry) (* Tuples of (host_license_state list, expected pool license state) *) diff --git a/ocaml/xapi/license_check.ml b/ocaml/xapi/license_check.ml index e6df516f353..1d2a4f65eda 100644 --- a/ocaml/xapi/license_check.ml +++ b/ocaml/xapi/license_check.ml @@ -13,27 +13,34 @@ *) module L = Debug.Make (struct let name = "license" end) -let never, _ = - let start_of_epoch = Unix.gmtime 0. in - Unix.mktime {start_of_epoch with Unix.tm_year= 130} +module Date = Xapi_stdext_date.Date + +let never = Ptime.of_year 2030 |> Option.get |> Date.of_ptime + +let serialize_expiry = function + | None -> + "never" + | Some date when Date.equal date never -> + "never" + | Some date -> + Date.to_rfc3339 date let get_expiry_date ~__context ~host = let license = Db.Host.get_license_params ~__context ~self:host in - if List.mem_assoc "expiry" license then - Some (Xapi_stdext_date.Date.of_iso8601 (List.assoc "expiry" license)) - else - None + List.assoc_opt "expiry" license + |> Fun.flip Option.bind (fun e -> if e = "never" then None else Some e) + |> Option.map Xapi_stdext_date.Date.of_iso8601 let check_expiry ~__context ~host = let expired = match get_expiry_date ~__context ~host with | None -> false (* No expiry date means no expiry :) *) - | Some date -> - Unix.time () > Xapi_stdext_date.Date.to_unix_time date + | Some expiry -> + Xapi_stdext_date.Date.(is_later ~than:expiry (now ())) in if expired then - raise (Api_errors.Server_error (Api_errors.license_expired, [])) + raise Api_errors.(Server_error (license_expired, [])) let vm ~__context _vm = (* Here we check that the license is still valid - this should be the only place where this happens *) diff --git a/ocaml/xapi/license_check.mli b/ocaml/xapi/license_check.mli index 610faaf9e0b..10a5ca6aca6 100644 --- a/ocaml/xapi/license_check.mli +++ b/ocaml/xapi/license_check.mli @@ -16,8 +16,9 @@ * @group Licensing *) -val never : float -(** The expiry date that is considered to be "never". *) +val serialize_expiry : Xapi_stdext_date.Date.t option -> string +(** Get the string corresponding with the expiry that can be stored in xapi's + DB *) val get_expiry_date : __context:Context.t -> host:API.ref_host -> Xapi_stdext_date.Date.t option diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 044507bc9c2..acb22cdcfcd 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3179,16 +3179,7 @@ let get_license_state ~__context ~self:_ = Xapi_pool_license.get_lowest_edition_with_expiry ~__context ~hosts ~edition_to_int in - let pool_expiry = - match expiry with - | None -> - "never" - | Some date -> - if date = Date.of_unix_time License_check.never then - "never" - else - Date.to_rfc3339 date - in + let pool_expiry = License_check.serialize_expiry expiry in [("edition", pool_edition); ("expiry", pool_expiry)] let apply_edition ~__context ~self:_ ~edition = From 365af695d8e3882cba2bc338472bc943ac14597c Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 30 Sep 2024 17:50:17 +0100 Subject: [PATCH 051/121] license_check: update the concept of "never" This now matches the concept of xenserver's licensing daemon, which changed it in the last year. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/license_check.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi/license_check.ml b/ocaml/xapi/license_check.ml index 1d2a4f65eda..f5cb38225da 100644 --- a/ocaml/xapi/license_check.ml +++ b/ocaml/xapi/license_check.ml @@ -15,7 +15,7 @@ module L = Debug.Make (struct let name = "license" end) module Date = Xapi_stdext_date.Date -let never = Ptime.of_year 2030 |> Option.get |> Date.of_ptime +let never = Ptime.of_year 2100 |> Option.get |> Date.of_ptime let serialize_expiry = function | None -> From 7db5e8b9d1bb9d6db22a5c18cc5d6a1b8f68ec4d Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 5 Nov 2024 16:59:06 +0000 Subject: [PATCH 052/121] daily_license_check: Do not use floats for handling time Instead use the Date, Ptime and Ptime.Span Signed-off-by: Pau Ruiz Safont --- ocaml/license/daily_license_check.ml | 46 +++++++++++-------- ocaml/license/daily_license_check_main.ml | 2 +- ocaml/license/dune | 1 + .../tests/alerts/test_daily_license_check.ml | 7 ++- 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/ocaml/license/daily_license_check.ml b/ocaml/license/daily_license_check.ml index 9a376d0e591..9a84a415dea 100644 --- a/ocaml/license/daily_license_check.ml +++ b/ocaml/license/daily_license_check.ml @@ -1,39 +1,45 @@ module XenAPI = Client.Client +module Date = Xapi_stdext_date.Date type result = Good | Expiring of string list | Expired of string list -let seconds_per_day = 3600. *. 24. +let a_month_after date = + let days_30 = Ptime.Span.unsafe_of_d_ps (30, 0L) in + Date.to_ptime date + |> (fun d -> Ptime.add_span d days_30) + |> Option.fold ~none:date ~some:Date.of_ptime -let seconds_per_30_days = 30. *. seconds_per_day +let days_to_expiry ~expiry now = + Ptime.diff (Date.to_ptime expiry) (Date.to_ptime now) |> Ptime.Span.to_d_ps + |> fun (days, picosec) -> + let with_fraction = if days < 0 then Fun.id else fun d -> d + 1 in + if picosec = 0L then days else with_fraction days -let days_to_expiry now expiry = - (expiry /. seconds_per_day) -. (now /. seconds_per_day) - -let get_expiry_date license = - List.assoc_opt "expiry" license +let get_expiry_date pool_license = + List.assoc_opt "expiry" pool_license |> Fun.flip Option.bind (fun e -> if e = "never" then None else Some e) |> Option.map Xapi_stdext_date.Date.of_iso8601 - |> Option.map Xapi_stdext_date.Date.to_unix_time let get_hosts all_license_params threshold = - List.fold_left - (fun acc (name_label, license_params) -> - match get_expiry_date license_params with - | Some expiry when expiry < threshold -> - name_label :: acc - | _ -> - acc + List.filter_map + (fun (name_label, license_params) -> + let ( let* ) = Option.bind in + let* expiry = get_expiry_date license_params in + if Date.is_earlier expiry ~than:threshold then + Some name_label + else + None ) - [] all_license_params + all_license_params let check_license now pool_license_state all_license_params = match get_expiry_date pool_license_state with | Some expiry -> - let days = days_to_expiry now expiry in - if days <= 0. then + let days = days_to_expiry ~expiry now in + if days <= 0 then Expired (get_hosts all_license_params now) - else if days <= 30. then - Expiring (get_hosts all_license_params (now +. seconds_per_30_days)) + else if days <= 30 then + Expiring (get_hosts all_license_params (a_month_after now)) else Good | None -> diff --git a/ocaml/license/daily_license_check_main.ml b/ocaml/license/daily_license_check_main.ml index 8a2202e2a5d..58ba7258e1c 100644 --- a/ocaml/license/daily_license_check_main.ml +++ b/ocaml/license/daily_license_check_main.ml @@ -14,7 +14,7 @@ let _ = in Xapi_stdext_pervasives.Pervasiveext.finally (fun () -> - let now = Unix.time () in + let now = Xapi_stdext_date.Date.now () in let pool, pool_license_state, all_license_params = Daily_license_check.get_info_from_db rpc session_id in diff --git a/ocaml/license/dune b/ocaml/license/dune index f37d0695981..942f41733f0 100644 --- a/ocaml/license/dune +++ b/ocaml/license/dune @@ -4,6 +4,7 @@ (modules daily_license_check) (libraries http_lib + ptime xapi-consts xapi-client xapi-types diff --git a/ocaml/tests/alerts/test_daily_license_check.ml b/ocaml/tests/alerts/test_daily_license_check.ml index 025ad19ef8d..47a6fb763a9 100644 --- a/ocaml/tests/alerts/test_daily_license_check.ml +++ b/ocaml/tests/alerts/test_daily_license_check.ml @@ -36,8 +36,7 @@ let expiry = in Alcotest.testable pp_expiry equals -let check_time = - Xapi_stdext_date.Date.(to_unix_time (of_iso8601 "20160601T04:00:00Z")) +let check_time = Xapi_stdext_date.Date.(of_iso8601 "20160601T04:00:00Z") let test_expiry ((pool_license_state, all_license_params), expected) () = let result = check_license check_time pool_license_state all_license_params in @@ -59,7 +58,7 @@ let expiry_samples = ; ("host1", [("expiry", "20160615T00:00:00Z")]) ] ) - , Expiring ["host1"; "host0"] + , Expiring ["host0"; "host1"] ) ; ( ( [("expiry", "20160615T00:00:00Z")] , [ @@ -75,7 +74,7 @@ let expiry_samples = ; ("host1", [("expiry", "20150601T00:00:00Z")]) ] ) - , Expired ["host1"; "host0"] + , Expired ["host0"; "host1"] ) ; ( ( [("expiry", "20160101T00:00:00Z")] , [ From 77e3a3e820c19ee5722d0c09f9972f9f762c296f Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 21 Oct 2024 19:08:14 +0100 Subject: [PATCH 053/121] CA-400060: Introduce new field for sm class `host_pending_features` represents the features that are available on some of the hosts in the pool, but not all of them. Note the way this field is initialised in the `SM.create` code means that it will only contain new features that appear during upgrade. This means a feature that is added into `SM.features` during creation will stay there even if it is not available on all hosts. But we should not end up in this situation in the first place. Also change the meaning of `Sm.features` to be pool-wide. Signed-off-by: Vincent Liu --- ocaml/idl/datamodel.ml | 10 ++++++++++ ocaml/idl/datamodel_common.ml | 2 +- ocaml/idl/datamodel_lifecycle.ml | 2 ++ ocaml/idl/schematest.ml | 2 +- ocaml/sdk-gen/csharp/gen_csharp_binding.ml | 4 ++++ ocaml/tests/common/test_common.ml | 9 +++++---- 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index e21369be258..83d5d1740c3 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -5001,11 +5001,21 @@ module SM = struct , "capabilities of the SM plugin, with capability version \ numbers" ) + ; ( Changed + , "24.37.0" + , "features are now pool-wide, instead of what is available on \ + the coordinator sm" + ) ] ~ty:(Map (String, Int)) "features" "capabilities of the SM plugin, with capability version numbers" ~default_value:(Some (VMap [])) + ; field ~in_oss_since:None ~qualifier:DynamicRO ~lifecycle:[] + ~ty:(Map (Ref _host, Set String)) + ~internal_only:true "host_pending_features" + "SM features that are waiting to be declared per host." + ~default_value:(Some (VMap [])) ; field ~lifecycle:[(Published, rel_miami, "additional configuration")] ~default_value:(Some (VMap [])) diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index a5fb8bd381a..80c5076fef7 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 784 +let schema_minor_vsn = 785 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 60e46afb038..fb728685a55 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -51,6 +51,8 @@ let prototyped_of_field = function Some "22.26.0" | "VTPM", "persistence_backend" -> Some "22.26.0" + | "SM", "host_pending_features" -> + Some "24.36.0-next" | "host", "last_update_hash" -> Some "24.10.0" | "host", "pending_guidances_full" -> diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 595289dfd24..2c4a87453ba 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "b427bac09aca4eabc9407738a9155326" +let last_known_schema_hash = "18df8c33434e3df1982e11ec55d1f3f8" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/sdk-gen/csharp/gen_csharp_binding.ml b/ocaml/sdk-gen/csharp/gen_csharp_binding.ml index bbf3360c897..c9112b680e3 100644 --- a/ocaml/sdk-gen/csharp/gen_csharp_binding.ml +++ b/ocaml/sdk-gen/csharp/gen_csharp_binding.ml @@ -1184,6 +1184,10 @@ and json_serialization_attr fr = (exposed_class_name v) | Map (String, String) -> sprintf "\n [JsonConverter(typeof(StringStringMapConverter))]" + | Map (Ref u, Set String) -> + sprintf + "\n [JsonConverer(typeof(XenRefStringSetMapConverter<%s>))]" + (exposed_class_name u) | Map (Ref _, _) | Map (_, Ref _) -> failwith (sprintf "Need converter for %s" fr.field_name) | _ -> diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 7908eb4e3ff..297a68398ca 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -342,12 +342,13 @@ let default_sm_features = let make_sm ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ?(_type = "sm") ?(name_label = "") ?(name_description = "") ?(vendor = "") ?(copyright = "") ?(version = "") ?(required_api_version = "") - ?(capabilities = []) ?(features = default_sm_features) ?(configuration = []) - ?(other_config = []) ?(driver_filename = "/dev/null") - ?(required_cluster_stack = []) () = + ?(capabilities = []) ?(features = default_sm_features) + ?(host_pending_features = []) ?(configuration = []) ?(other_config = []) + ?(driver_filename = "/dev/null") ?(required_cluster_stack = []) () = Db.SM.create ~__context ~ref ~uuid ~_type ~name_label ~name_description ~vendor ~copyright ~version ~required_api_version ~capabilities ~features - ~configuration ~other_config ~driver_filename ~required_cluster_stack ; + ~host_pending_features ~configuration ~other_config ~driver_filename + ~required_cluster_stack ; ref let make_sr ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) From 7f14bfcc2ee6ed583330263ae47dc87a7ba665ea Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 21 Oct 2024 19:10:44 +0100 Subject: [PATCH 054/121] CA-400060: Sm feature intersection NEW Sm features that are found during an upgrde will now only be available when they are available on all of the hosts. Add logic to keep track of features that are only availabe on some of the hosts in the pool, and declare them in `Sm.feature` only when all of the hosts have declared this. Also move `Storage_access.on_xapi_start` to `dbsync_slave` as this needs to be run on all hosts for each sm to get a chance to say what features they have. Signed-off-by: Vincent Liu --- ocaml/tests/test_sm_features.ml | 42 +++++++++++++++++++ ocaml/xapi/dbsync_master.ml | 1 - ocaml/xapi/dbsync_slave.ml | 3 ++ ocaml/xapi/smint.ml | 17 ++++++++ ocaml/xapi/xapi_globs.ml | 2 + ocaml/xapi/xapi_sm.ml | 71 +++++++++++++++++++++++++++++++-- 6 files changed, 131 insertions(+), 5 deletions(-) diff --git a/ocaml/tests/test_sm_features.ml b/ocaml/tests/test_sm_features.ml index a78de4a54a7..091d58d4f6e 100644 --- a/ocaml/tests/test_sm_features.ml +++ b/ocaml/tests/test_sm_features.ml @@ -160,6 +160,21 @@ let test_sequences = } ] +let test_intersection_sequences = + ( { + raw= ["VDI_MIRROR"] + ; smapiv1_features= [(Vdi_mirror, 1L)] + ; smapiv2_features= ["VDI_MIRROR/1"] + ; sm= {capabilities= ["VDI_MIRROR"]; features= [("VDI_MIRROR", 1L)]} + } + , { + raw= ["VDI_MIRROR"] + ; smapiv1_features= [(Vdi_mirror, 2L)] + ; smapiv2_features= ["VDI_MIRROR/2"] + ; sm= {capabilities= ["VDI_MIRROR"]; features= [("VDI_MIRROR", 1L)]} + } + ) + module ParseSMAPIv1Features = Generic.MakeStateless (struct module Io = struct type input_t = string list @@ -249,6 +264,32 @@ module CreateSMObject = Generic.MakeStateful (struct ) end) +module CompatSMFeatures = Generic.MakeStateless (struct + module Io = struct + type input_t = (string * string) list + + type output_t = string list + + let string_of_input_t = Test_printers.(list (fun (x, y) -> x ^ "," ^ y)) + + let string_of_output_t = Test_printers.(list Fun.id) + end + + let transform l = + List.split l |> fun (x, y) -> + (Smint.parse_string_int64_features x, Smint.parse_string_int64_features y) + |> fun (x, y) -> Smint.compat_features x y |> List.map Smint.unparse_feature + + let tests = + let r1, r2 = test_intersection_sequences in + `QuickAndAutoDocumented + [ + ( List.combine r1.smapiv2_features r2.smapiv2_features + , r1.smapiv2_features + ) + ] +end) + let tests = List.map (fun (s, t) -> (Format.sprintf "sm_features_%s" s, t)) @@ -256,4 +297,5 @@ let tests = ("parse_smapiv1_features", ParseSMAPIv1Features.tests) ; ("create_smapiv2_features", CreateSMAPIv2Features.tests) ; ("create_sm_object", CreateSMObject.tests) + ; ("compat_sm_features", CompatSMFeatures.tests) ] diff --git a/ocaml/xapi/dbsync_master.ml b/ocaml/xapi/dbsync_master.ml index aad7434dc02..cac05f37e88 100644 --- a/ocaml/xapi/dbsync_master.ml +++ b/ocaml/xapi/dbsync_master.ml @@ -373,7 +373,6 @@ let update_env __context = in the db for cancelling *) Cancel_tasks.cancel_tasks_on_host ~__context ~host_opt:None ; (* Update the SM plugin table *) - Storage_access.on_xapi_start ~__context ; if !Xapi_globs.create_tools_sr then create_tools_sr_noexn __context ; ensure_vm_metrics_records_exist_noexn __context ; diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 3b90a3a05c3..942d3081071 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -362,6 +362,9 @@ let update_env __context sync_keys = switched_sync Xapi_globs.sync_refresh_localhost_info (fun () -> refresh_localhost_info ~__context info ) ; + switched_sync Xapi_globs.sync_sm_records (fun () -> + Storage_access.on_xapi_start ~__context + ) ; switched_sync Xapi_globs.sync_local_vdi_activations (fun () -> Storage_access.refresh_local_vdi_activations ~__context ) ; diff --git a/ocaml/xapi/smint.ml b/ocaml/xapi/smint.ml index 25019a18294..8797e0d7cf6 100644 --- a/ocaml/xapi/smint.ml +++ b/ocaml/xapi/smint.ml @@ -110,6 +110,8 @@ let capability_of_feature : feature -> capability = fst let known_features = List.map fst string_to_capability_table +let unparse_feature (f, v) = f ^ "/" ^ Int64.to_string v + let parse_string_int64_features features = let scan feature = match String.split_on_char '/' feature with @@ -134,6 +136,21 @@ let parse_string_int64_features features = |> List.filter_map scan |> List.sort_uniq (fun (x, _) (y, _) -> compare x y) +(** [compat_features features1 features2] finds the compatible features in the input +features lists. We assume features backwards compatible, i.e. if there are FOO/1 and + FOO/2 are present, then we assume they can both do FOO/1*) +let compat_features features1 features2 = + let features2 = List.to_seq features2 |> Hashtbl.of_seq in + List.filter_map + (fun (f1, v1) -> + match Hashtbl.find_opt features2 f1 with + | Some v2 -> + Some (f1, Int64.min v1 v2) + | None -> + None + ) + features1 + let parse_capability_int64_features strings = List.map (function c, v -> (List.assoc c string_to_capability_table, v)) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 9a461a4e7bb..efdcabfbdb6 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -340,6 +340,8 @@ let sync_switch_off = "nosync" (* dbsync_slave *) let sync_local_vdi_activations = "sync_local_vdi_activations" +let sync_sm_records = "sync_sm_records" + let sync_create_localhost = "sync_create_localhost" let sync_set_cache_sr = "sync_set_cache_sr" diff --git a/ocaml/xapi/xapi_sm.ml b/ocaml/xapi/xapi_sm.ml index ba3d7c8242a..9badc179c06 100644 --- a/ocaml/xapi/xapi_sm.ml +++ b/ocaml/xapi/xapi_sm.ml @@ -18,6 +18,8 @@ (* The SMAPIv1 plugins are a static set in the filesystem. The SMAPIv2 plugins are a dynamic set hosted in driver domains. *) +module Listext = Xapi_stdext_std.Listext + let finally = Xapi_stdext_pervasives.Pervasiveext.finally (* We treat versions as '.'-separated integer lists under the usual @@ -36,7 +38,7 @@ let create_from_query_result ~__context q = if String.lowercase_ascii q.driver <> "storage_access" then ( let features = Smint.parse_string_int64_features q.features in let capabilities = List.map fst features in - info "Registering SM plugin %s (version %s)" + info "%s Registering SM plugin %s (version %s)" __FUNCTION__ (String.lowercase_ascii q.driver) q.version ; Db.SM.create ~__context ~ref:r ~uuid:u @@ -44,19 +46,80 @@ let create_from_query_result ~__context q = ~name_label:q.name ~name_description:q.description ~vendor:q.vendor ~copyright:q.copyright ~version:q.version ~required_api_version:q.required_api_version ~capabilities ~features - ~configuration:q.configuration ~other_config:[] + ~host_pending_features:[] ~configuration:q.configuration ~other_config:[] ~driver_filename:(Sm_exec.cmd_name q.driver) ~required_cluster_stack:q.required_cluster_stack ) +let find_pending_features existing_features features = + Listext.List.set_difference features existing_features + +(** [addto_pending_hosts_features ~__context self new_features] will add [new_features] +to pending features of host [self]. It then returns a list of currently pending features *) +let addto_pending_hosts_features ~__context self new_features = + let host = Helpers.get_localhost ~__context in + let new_features = + List.map (fun (f, v) -> Smint.unparse_feature (f, v)) new_features + in + let curr_pending_features = + Db.SM.get_host_pending_features ~__context ~self + |> List.remove_assoc host + |> List.cons (host, new_features) + in + Db.SM.set_host_pending_features ~__context ~self ~value:curr_pending_features ; + List.iter + (fun (h, f) -> + debug "%s: current pending features for host %s, sm %s, features %s" + __FUNCTION__ (Ref.string_of h) (Ref.string_of self) (String.concat "," f) + ) + curr_pending_features ; + List.map + (fun (h, f) -> (h, Smint.parse_string_int64_features f)) + curr_pending_features + +let valid_hosts_pending_features ~__context pending_features = + if List.length pending_features <> List.length (Db.Host.get_all ~__context) + then ( + debug "%s: Not enough hosts have registered their sm features" __FUNCTION__ ; + [] + ) else + List.map snd pending_features |> fun l -> + List.fold_left Smint.compat_features + (* The list in theory cannot be empty due to the if condition check, but do + this just in case *) + (List.nth_opt l 0 |> Option.fold ~none:[] ~some:Fun.id) + (List.tl l) + +let remove_valid_features_from_pending ~__context ~self valid_features = + let valid_features = List.map Smint.unparse_feature valid_features in + let new_pending_feature = + Db.SM.get_host_pending_features ~__context ~self + |> List.map (fun (h, pending_features) -> + (h, Listext.List.set_difference pending_features valid_features) + ) + in + Db.SM.set_host_pending_features ~__context ~self ~value:new_pending_feature + let update_from_query_result ~__context (self, r) q_result = let open Storage_interface in let _type = String.lowercase_ascii q_result.driver in if _type <> "storage_access" then ( let driver_filename = Sm_exec.cmd_name q_result.driver in - let features = Smint.parse_string_int64_features q_result.features in + let existing_features = Db.SM.get_features ~__context ~self in + let new_features = + Smint.parse_string_int64_features q_result.features + |> find_pending_features existing_features + |> addto_pending_hosts_features ~__context self + |> valid_hosts_pending_features ~__context + in + remove_valid_features_from_pending ~__context ~self new_features ; + let features = existing_features @ new_features in + List.iter + (fun (f, v) -> debug "%s: declaring new features %s:%Ld" __FUNCTION__ f v) + new_features ; + let capabilities = List.map fst features in - info "Registering SM plugin %s (version %s)" + info "%s Registering SM plugin %s (version %s)" __FUNCTION__ (String.lowercase_ascii q_result.driver) q_result.version ; if r.API.sM_type <> _type then From 2ec0ac68689ad24c38cf789bd094f4db058bda19 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 21 Oct 2024 19:14:43 +0100 Subject: [PATCH 055/121] CA-400060: Reject pool join if sm features mismatch Implement a new `assert_sm_features_compatiable` in pre_join_checks so that if the joining host does not have compatible sm features, it will be denied entry into the pool. Signed-off-by: Vincent Liu --- ocaml/idl/datamodel_errors.ml | 7 +++++ ocaml/xapi-consts/api_errors.ml | 3 ++ ocaml/xapi/xapi_pool.ml | 49 ++++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index aead3e0abc4..80b36218f25 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -890,6 +890,13 @@ let _ = "The host joining the pool has different CA certificates from the pool \ coordinator while using the same name, uninstall them and try again." () ; + error Api_errors.pool_joining_sm_features_incompatible + ["pool_sm_ref"; "candidate_sm_ref"] + ~doc: + "The host joining the pool has an incompatible set of sm features from \ + the pool coordinator. Make sure the sm are of the same versions and try \ + again." + () ; (* External directory service *) error Api_errors.subject_cannot_be_resolved [] diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index ebafbdaa111..53e9e06176b 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -754,6 +754,9 @@ let pool_joining_host_tls_verification_mismatch = let pool_joining_host_ca_certificates_conflict = add_error "POOL_JOINING_HOST_CA_CERTIFICATES_CONFLICT" +let pool_joining_sm_features_incompatible = + add_error "POOL_JOINING_SM_FEATURES_INCOMPATIBLE" + (*workload balancing*) let wlb_not_initialized = add_error "WLB_NOT_INITIALIZED" diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index acb22cdcfcd..eb716ce766e 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -839,6 +839,52 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = ) ) in + let assert_sm_features_compatible () = + (* We consider the case where the existing pool has FOO/m, and the candidate having FOO/n, + where n >= m, to be compatible. Not vice versa. *) + let features_compatible coor_features candidate_features = + (* The pool features must not be reduced or downgraded, although it is fine + the other way around. *) + Smint.compat_features coor_features candidate_features = coor_features + in + + let master_sms = Client.SM.get_all ~rpc ~session_id in + List.iter + (fun sm -> + let master_sm_type = Client.SM.get_type ~rpc ~session_id ~self:sm in + let candidate_sm_ref, candidate_sm_rec = + match + Db.SM.get_records_where ~__context + ~expr:(Eq (Field "type", Literal master_sm_type)) + with + | [(sm_ref, sm_rec)] -> + (sm_ref, sm_rec) + | _ -> + raise + Api_errors.( + Server_error + ( pool_joining_sm_features_incompatible + , [Ref.string_of sm; ""] + ) + ) + in + + let coor_sm_features = + Client.SM.get_features ~rpc ~session_id ~self:sm + in + let candidate_sm_features = candidate_sm_rec.API.sM_features in + if not (features_compatible coor_sm_features candidate_sm_features) then + raise + Api_errors.( + Server_error + ( pool_joining_sm_features_incompatible + , [Ref.string_of sm; Ref.string_of candidate_sm_ref] + ) + ) + ) + master_sms + in + (* call pre-join asserts *) assert_pool_size_unrestricted () ; assert_management_interface_exists () ; @@ -872,7 +918,8 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = assert_tls_verification_matches () ; assert_ca_certificates_compatible () ; assert_not_in_updating_on_me () ; - assert_no_hosts_in_updating () + assert_no_hosts_in_updating () ; + assert_sm_features_compatible () let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : API.ref_host = From 3e2e970af2f1c501b5a8ebb76252df2e2babf3f9 Mon Sep 17 00:00:00 2001 From: Colin James Date: Thu, 7 Nov 2024 13:58:39 +0000 Subject: [PATCH 056/121] Document Rbac module Introduces an interface file for the Rbac module within xapi in order to document the intent of each of its functions. Signed-off-by: Colin James --- ocaml/xapi/rbac.ml | 5 --- ocaml/xapi/rbac.mli | 104 ++++++++++++++++++++++++++++++++++++++++++++ quality-gate.sh | 2 +- 3 files changed, 105 insertions(+), 6 deletions(-) create mode 100644 ocaml/xapi/rbac.mli diff --git a/ocaml/xapi/rbac.ml b/ocaml/xapi/rbac.ml index feefcf4143f..2b311a7e56d 100644 --- a/ocaml/xapi/rbac.ml +++ b/ocaml/xapi/rbac.ml @@ -243,11 +243,6 @@ let assert_permission_name ~__context ~permission = let assert_permission ~__context ~permission = assert_permission_name ~__context ~permission:permission.role_name_label -(* this is necessary to break dependency cycle between rbac and taskhelper *) -let init_task_helper_rbac_has_permission_fn = - if !TaskHelper.rbac_assert_permission_fn = None then - TaskHelper.rbac_assert_permission_fn := Some assert_permission - let has_permission_name ~__context ~permission = let session_id = get_session_of_context ~__context ~permission in is_access_allowed ~__context ~session_id ~permission diff --git a/ocaml/xapi/rbac.mli b/ocaml/xapi/rbac.mli new file mode 100644 index 00000000000..6905379a311 --- /dev/null +++ b/ocaml/xapi/rbac.mli @@ -0,0 +1,104 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val is_access_allowed : + __context:Context.t + -> session_id:[`session] Ref.t + -> permission:string + -> bool +(** Determines whether the session associated with the provided + context has the specified permission. The permission set is cached + (on the coordinator only) to benefit successive queries for the + same session. *) + +val check : + ?extra_dmsg:string + -> ?extra_msg:string + -> ?args:(string * Rpc.t) list + -> ?keys:string list + -> __context:Context.t + -> fn:(unit -> 'a) + -> [`session] Ref.t + -> string + -> 'a +(** [check] executes a function associated with an action if the + session associated with the provided context is authorised to + perform the action. + + The [?extra_dmsg] and [?extra_msg] parameters allow for extra + information in debugging and error messages. + + The [?keys] parameter specifies which fields of a (string -> _) + map are RBAC-protected. It is primarily associated with + auto-generated methods such as add_to_other_config. However, if + [?keys] is non-empty, then [?args] must also be consulted as the + related methods that require this protection specify their key + name as a parameter. Otherwise, [?args] is mostly used to log + calls within the RBAC audit log. *) + +val check_with_new_task : + ?extra_dmsg:string + -> ?extra_msg:string + -> ?task_desc:string + -> ?args:(string * Rpc.t) list + -> fn:(unit -> 'a) + -> [`session] Ref.t + -> string + -> 'a +(** Defined in terms of [check] but using a context associated with a + freshly-created task. *) + +val assert_permission_name : __context:Context.t -> permission:string -> unit +(** Performs a dry run of the [check] function with a no-op action + guarded by the provided permission (as a name). *) + +val assert_permission : + __context:Context.t -> permission:Db_actions.role_t -> unit +(** Performs a dry run of the [check] function with a no-op action + guarded by the provided permission (as a database role). *) + +val has_permission : __context:Context.t -> permission:Db_actions.role_t -> bool +(** [has_permission ctx p] determines if the session associated with + the context [ctx] is authorised to perform a specific action. + + [p] is of the type defined by the generated [Db_actions] module, + as [Xapi_role] simulates a database for the checking of static + role sets (as emitted in [Rbac_static]) and only appeals to the + xapi DB for additional roles. *) + +val is_rbac_enabled_for_http_action : string -> bool +(** [is_rbac_enabled_for_http_action route] determines whether RBAC + checks should be applied to the provided HTTP [route]. + + Some routes are precluded from RBAC checks because they are + assumed to only be used by code paths where RBAC has already been + checked or will be checked internally (e.g. /post_cli). *) + +val permission_of_action : + ?args:(string * Rpc.t) list -> keys:string list -> string -> string +(** Constructs the name of a permission associated with using an + RBAC-protected key with a specified action. + + For example, if [keys] specifies "folder" as a protected key name + for the action SR.remove_from_other_config, the permission name + associated with that is "SR.remove_from_other_config/key:folder" + - which is consistent with the format that [Rbac_static] contains. *) + +val nofn : unit -> unit +(** Named function that does nothing, e.g. (fun _ -> ()). + Used as a dummy action for RBAC checking. *) + +val destroy_session_permissions_tbl : session_id:[`session] Ref.t -> unit +(** Removes any cached permission set for the given session. This is + called when xapi destroys the DB entry for a session. *) diff --git a/quality-gate.sh b/quality-gate.sh index 16a90270b17..db8444b53e0 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=499 + N=498 # do not count ml files from the tests in ocaml/{tests/perftest/quicktest} MLIS=$(git ls-files -- '**/*.mli' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) MLS=$(git ls-files -- '**/*.ml' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) From 801dd96cdbcdf0f1fc412859aed46005040b0c57 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Fri, 8 Nov 2024 16:27:26 +0100 Subject: [PATCH 057/121] Increase wait-init-complete timeout When a host starts, the systemd service xapi-wait-init-complete waiting on the creation of the xapi init cookie file may fail on timeout for a matter of seconds. This patch adds 1 minute (300 seconds total) to the timeout passed to the script xapi-wait-init-complete. Signed-off-by: Thierry Escande --- scripts/xapi-wait-init-complete.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/xapi-wait-init-complete.service b/scripts/xapi-wait-init-complete.service index 03cb7f8e9cd..19691c477e6 100644 --- a/scripts/xapi-wait-init-complete.service +++ b/scripts/xapi-wait-init-complete.service @@ -6,7 +6,7 @@ Before=xapi-init-complete.target [Service] Type=oneshot -ExecStart=@OPTDIR@/bin/xapi-wait-init-complete 240 +ExecStart=@OPTDIR@/bin/xapi-wait-init-complete 300 RemainAfterExit=yes [Install] From 20f4dccff3837992a3ab415f6b5a9d1c6f563139 Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Tue, 1 Oct 2024 07:31:40 +0100 Subject: [PATCH 058/121] CP-51694: Add testing of Go date deserialization Signed-off-by: Danilo Del Busso --- .github/workflows/generate-and-build-sdks.yml | 1 + .github/workflows/go-ci/action.yml | 5 + ocaml/sdk-gen/go/autogen/src/convert_test.go | 91 +++++++++++++++++++ ocaml/sdk-gen/go/autogen/src/export_test.go | 37 ++++++++ 4 files changed, 134 insertions(+) create mode 100644 ocaml/sdk-gen/go/autogen/src/convert_test.go create mode 100644 ocaml/sdk-gen/go/autogen/src/export_test.go diff --git a/.github/workflows/generate-and-build-sdks.yml b/.github/workflows/generate-and-build-sdks.yml index 8187c391508..6a5e2a57bf7 100644 --- a/.github/workflows/generate-and-build-sdks.yml +++ b/.github/workflows/generate-and-build-sdks.yml @@ -54,6 +54,7 @@ jobs: path: | _build/install/default/share/go/* !_build/install/default/share/go/dune + !_build/install/default/share/go/**/*_test.go - name: Store Java SDK source uses: actions/upload-artifact@v4 diff --git a/.github/workflows/go-ci/action.yml b/.github/workflows/go-ci/action.yml index c1b2df7f1e1..30bcbfee923 100644 --- a/.github/workflows/go-ci/action.yml +++ b/.github/workflows/go-ci/action.yml @@ -14,6 +14,11 @@ runs: working-directory: ${{ github.workspace }}/_build/install/default/share/go/src args: --config=${{ github.workspace }}/.golangci.yml + - name: Run Go Tests + shell: bash + working-directory: ${{ github.workspace }}/_build/install/default/share/go/src + run: go test -v + - name: Run CI for Go SDK shell: bash run: | diff --git a/ocaml/sdk-gen/go/autogen/src/convert_test.go b/ocaml/sdk-gen/go/autogen/src/convert_test.go new file mode 100644 index 00000000000..48dabc82898 --- /dev/null +++ b/ocaml/sdk-gen/go/autogen/src/convert_test.go @@ -0,0 +1,91 @@ +/* + * Copyright (c) Cloud Software Group, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1) Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2) Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package xenapi_test + +import ( + "testing" + "time" + + "go/xenapi" +) + +func TestDateDeseralization(t *testing.T) { + dates := map[string]time.Time{ + // no dashes, no colons + "20220101T123045": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "20220101T123045Z": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "20220101T123045+03": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), // +03 timezone + "20220101T123045+0300": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + "20220101T123045+03:00": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + + "20220101T123045.123": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "20220101T123045.123Z": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "20220101T123045.123+03": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + "20220101T123045.123+0300": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + "20220101T123045.123+03:00": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + + // no dashes, with colons + "20220101T12:30:45": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "20220101T12:30:45Z": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "20220101T12:30:45+03": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + "20220101T12:30:45+0300": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + "20220101T12:30:45+03:00": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + + "20220101T12:30:45.123": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "20220101T12:30:45.123Z": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "20220101T12:30:45.123+03": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + "20220101T12:30:45.123+0300": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + "20220101T12:30:45.123+03:00": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + + // dashes and colons + "2022-01-01T12:30:45": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "2022-01-01T12:30:45Z": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "2022-01-01T12:30:45+03": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + "2022-01-01T12:30:45+0300": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + "2022-01-01T12:30:45+03:00": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + + "2022-01-01T12:30:45.123": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "2022-01-01T12:30:45.123Z": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "2022-01-01T12:30:45.123+03": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + } + for input, expected := range dates { + t.Run("Input:"+input, func(t *testing.T) { + result, err := xenapi.DeserializeTime("", input) + if err == nil { + matching := expected.Equal(result) + if !matching { + t.Fatalf(`Failed to find match for '%s'`, input) + } + } else { + t.Fatalf(`Failed to find match for '%s'`, input) + } + }) + } +} diff --git a/ocaml/sdk-gen/go/autogen/src/export_test.go b/ocaml/sdk-gen/go/autogen/src/export_test.go new file mode 100644 index 00000000000..5dbdbeb47e3 --- /dev/null +++ b/ocaml/sdk-gen/go/autogen/src/export_test.go @@ -0,0 +1,37 @@ +/* + * Copyright (c) Cloud Software Group, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1) Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2) Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// This file contains exports of private functions specifically for testing purposes. +// It allows test code to access and verify the behavior of internal functions within the `xenapi` package. + +package xenapi + +// DeserializeTime is a private function that deserializes a time value. +// It is exported for testing to allow verification of its functionality. +var DeserializeTime = deserializeTime From ca5d3b64d7c48413ff4ab3119fe81cf3c5efb9db Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Tue, 12 Nov 2024 15:19:19 +0000 Subject: [PATCH 059/121] Set non-UTC timezone for date time unit test runners Signed-off-by: Danilo Del Busso --- .github/workflows/generate-and-build-sdks.yml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/.github/workflows/generate-and-build-sdks.yml b/.github/workflows/generate-and-build-sdks.yml index 6a5e2a57bf7..39645cf68bf 100644 --- a/.github/workflows/generate-and-build-sdks.yml +++ b/.github/workflows/generate-and-build-sdks.yml @@ -24,6 +24,14 @@ jobs: shell: bash run: opam exec -- make sdk + # sdk-ci runs some Go unit tests. + # This setting ensures that SDK date time + # tests are run on a machine that + # isn't using UTC + - name: Set Timezone to Tokyo for datetime tests + run: | + sudo timedatectl set-timezone Asia/Tokyo + - name: Run CI for SDKs uses: ./.github/workflows/sdk-ci @@ -111,6 +119,14 @@ jobs: java-version: '17' distribution: 'temurin' + # Java Tests are run at compile time. + # This setting ensures that SDK date time + # tests are run on a machine that + # isn't using UTC + - name: Set Timezone to Tokyo for datetime tests + run: | + sudo timedatectl set-timezone Asia/Tokyo + - name: Build Java SDK shell: bash run: | @@ -139,6 +155,14 @@ jobs: name: SDK_Source_CSharp path: source/ + # All tests builds and pipelines should + # work on other timezones. This setting ensures that + # SDK date time tests are run on a machine that + # isn't using UTC + - name: Set Timezone to Tokyo for datetime tests + shell: pwsh + run: Set-TimeZone -Id "Tokyo Standard Time" + - name: Test C# SDK shell: pwsh run: | From 5bcef81fe68a1218e62706c8dda9df511d01ee22 Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Wed, 13 Nov 2024 09:32:31 +0000 Subject: [PATCH 060/121] Fix parsing of timezone agnostic date strings in Java deserializer Signed-off-by: Danilo Del Busso --- .../xenapi/CustomDateDeserializer.java | 151 ++++++++++-------- 1 file changed, 85 insertions(+), 66 deletions(-) diff --git a/ocaml/sdk-gen/java/autogen/xen-api/src/main/java/com/xensource/xenapi/CustomDateDeserializer.java b/ocaml/sdk-gen/java/autogen/xen-api/src/main/java/com/xensource/xenapi/CustomDateDeserializer.java index 3ba135e0a40..63be5c1c458 100644 --- a/ocaml/sdk-gen/java/autogen/xen-api/src/main/java/com/xensource/xenapi/CustomDateDeserializer.java +++ b/ocaml/sdk-gen/java/autogen/xen-api/src/main/java/com/xensource/xenapi/CustomDateDeserializer.java @@ -49,7 +49,7 @@ public class CustomDateDeserializer extends StdDeserializer { /** * Array of {@link SimpleDateFormat} objects representing the date formats * used in xen-api responses. - * + *
* RFC-3339 date formats can be returned in either Zulu or time zone agnostic. * This list is not an exhaustive list of formats supported by RFC-3339, rather * a set of formats that will enable the deserialization of xen-api dates. @@ -57,17 +57,24 @@ public class CustomDateDeserializer extends StdDeserializer { * to this list, please ensure the order is kept. */ private static final SimpleDateFormat[] dateFormatsUtc = { - // Most commonly returned formats - new SimpleDateFormat("yyyyMMdd'T'HHmmss'Z'"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"), - new SimpleDateFormat("ss.SSS"), - - // Other - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss'Z'"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSS'Z'"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSS'Z'"), - + // Most commonly returned formats + new SimpleDateFormat("yyyyMMdd'T'HHmmss'Z'"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"), + new SimpleDateFormat("ss.SSS"), + + // Other + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss'Z'"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSS'Z'"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSS'Z'"), + + // Formats without timezone info default to UTC in xapi + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSS"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSS"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"), }; /** @@ -78,61 +85,55 @@ public class CustomDateDeserializer extends StdDeserializer { * to this list, please ensure the order is kept. */ private static final SimpleDateFormat[] dateFormatsLocal = { - // no dashes, no colons - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZZZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSXXX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSXX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSS"), - - new SimpleDateFormat("yyyyMMdd'T'HHmmssZZZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmssZZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmssZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmssXXX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmssXX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmssX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss"), - - // no dashes, with colons - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZZZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSXXX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSXX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSS"), - - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZZZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssXXX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssXX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss"), - - // dashes and colons - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"), - - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZZZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"), + // no dashes, no colons + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZZZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSXXX"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSXX"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSX"), + + new SimpleDateFormat("yyyyMMdd'T'HHmmssZZZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmssZZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmssZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmssXXX"), + new SimpleDateFormat("yyyyMMdd'T'HHmmssXX"), + new SimpleDateFormat("yyyyMMdd'T'HHmmssX"), + + // no dashes, with colons + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZZZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSXXX"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSXX"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSX"), + + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZZZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssXXX"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssXX"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssX"), + + // dashes and colons + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXX"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSX"), + + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZZZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXX"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX"), }; /** * Constructs a {@link CustomDateDeserializer} instance. - */ + */ public CustomDateDeserializer() { this(null); } @@ -163,9 +164,13 @@ public CustomDateDeserializer(Class t) { @Override public Date deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException { var text = jsonParser.getText(); + Date localDate = null; + Date utcDate = null; + for (SimpleDateFormat formatter : dateFormatsUtc) { try { - return formatter.parse(text); + utcDate = formatter.parse(text); + break; } catch (ParseException e) { // ignore } @@ -173,12 +178,26 @@ public Date deserialize(JsonParser jsonParser, DeserializationContext deserializ for (SimpleDateFormat formatter : dateFormatsLocal) { try { - return formatter.parse(text); + localDate = formatter.parse(text); + break; } catch (ParseException e) { // ignore } } - throw new IOException("Failed to deserialize a Date value."); + // Some dates such as 20220101T12:30:45.123+03:00 will match both with a UTC + // and local date format. In that case, we pick the date returned by the + // local formatter, as it's more precise. + // This allows us to match strings with no timezone information (such as 20220101T12:30:45.123) + // as UTC, while correctly parsing more precise date representations + if (localDate != null && utcDate != null) { + return localDate; // Prioritize local format if both match + } else if (localDate != null) { + return localDate; + } else if (utcDate != null) { + return utcDate; + } else { + throw new IOException("Failed to deserialize a Date value."); + } } } From b81d11ef4457b919f8f99ecbf2ad631fee3f2a35 Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Wed, 13 Nov 2024 18:28:40 +0900 Subject: [PATCH 061/121] Ensure C# date tests work when running under any timezone Without the change, the tests only pass when running on a runner with UTC as a timezone Signed-off-by: Danilo Del Busso --- .../autogen/XenServerTest/DateTimeTests.cs | 79 +++++++++++-------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs b/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs index 0bda9474eb0..981204df714 100644 --- a/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs +++ b/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs @@ -51,16 +51,25 @@ public class DateTimeTests [TestMethod] [DynamicData(nameof(GetTestData), DynamicDataSourceType.Method, DynamicDataDisplayName = nameof(GetCustomDynamicDataDisplayName))] - public void TestXenDateTimeConverter(string dateString, DateTime expectedDateTime) + public void TestXenDateTimeConverter(string dateString, DateTime expectedDateTime, DateTimeKind expectedDateTimeKind) { try { var jsonDateString = "{ \"Date\" : \"" + dateString + "\" }"; - var actualDateTime = JsonConvert.DeserializeObject(jsonDateString, _settings); + var actualDateTimeObject = JsonConvert.DeserializeObject(jsonDateString, _settings); - Assert.IsNotNull(actualDateTime, $"Failed to convert '{dateString}'"); - Assert.IsTrue(expectedDateTime.Equals(actualDateTime.Date), - $"Conversion of '{dateString}' resulted in an incorrect DateTime value"); + + Assert.IsNotNull(actualDateTimeObject?.Date, $"Failed to convert '{dateString}'"); + var actualDateTime = actualDateTimeObject.Date; + Assert.IsTrue(expectedDateTimeKind.Equals(actualDateTime.Kind)); + + // expected times are in UTC to ensure these tests do + // not fail when running in other timezones + if (expectedDateTimeKind == DateTimeKind.Local) + actualDateTime = actualDateTime.ToUniversalTime(); + + Assert.IsTrue(expectedDateTime.Equals(actualDateTime), + $"Conversion of '{dateString}' resulted in an incorrect DateTime value. Expected '{expectedDateTime} but instead received '{actualDateTime}'"); } catch (Exception ex) { @@ -78,62 +87,62 @@ public static string GetCustomDynamicDataDisplayName(MethodInfo methodInfo, obje public static IEnumerable GetTestData() { // no dashes, no colons - yield return new object[] { "20220101T123045", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; - yield return new object[] { "20220101T123045Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; - yield return new object[] { "20220101T123045+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; - yield return new object[] { "20220101T123045+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; - yield return new object[] { "20220101T123045+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] { "20220101T123045", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Unspecified }; + yield return new object[] { "20220101T123045Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Utc }; + yield return new object[] { "20220101T123045+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; + yield return new object[] { "20220101T123045+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; + yield return new object[] { "20220101T123045+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T123045.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + { "20220101T123045.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Unspecified }; yield return new object[] - { "20220101T123045.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + { "20220101T123045.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Utc }; yield return new object[] - { "20220101T123045.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T123045.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T123045.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T123045.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T123045.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T123045.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; // no dashes, with colons yield return new object[] - { "20220101T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; - yield return new object[] { "20220101T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; - yield return new object[] { "20220101T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; - yield return new object[] { "20220101T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + { "20220101T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Unspecified }; + yield return new object[] { "20220101T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Utc }; + yield return new object[] { "20220101T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; + yield return new object[] { "20220101T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + { "20220101T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + { "20220101T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Unspecified }; yield return new object[] - { "20220101T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + { "20220101T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Utc }; yield return new object[] - { "20220101T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; // dashes and colons yield return new object[] - { "2022-01-01T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; - yield return new object[] { "2022-01-01T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; - yield return new object[] { "2022-01-01T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + { "2022-01-01T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Unspecified }; + yield return new object[] { "2022-01-01T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Utc }; + yield return new object[] { "2022-01-01T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "2022-01-01T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + { "2022-01-01T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "2022-01-01T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + { "2022-01-01T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "2022-01-01T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + { "2022-01-01T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Unspecified }; yield return new object[] - { "2022-01-01T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + { "2022-01-01T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Utc }; yield return new object[] - { "2022-01-01T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "2022-01-01T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "2022-01-01T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "2022-01-01T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "2022-01-01T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "2022-01-01T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; } } From c77aceb2c73f9ea774fc4ce5ff74c661d532488b Mon Sep 17 00:00:00 2001 From: Colin James Date: Sun, 17 Nov 2024 20:22:06 +0000 Subject: [PATCH 062/121] Update datamodel_lifecycle.ml Signed-off-by: Colin James --- ocaml/idl/datamodel_lifecycle.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index fb728685a55..9e3007f4744 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -52,7 +52,7 @@ let prototyped_of_field = function | "VTPM", "persistence_backend" -> Some "22.26.0" | "SM", "host_pending_features" -> - Some "24.36.0-next" + Some "24.37.0" | "host", "last_update_hash" -> Some "24.10.0" | "host", "pending_guidances_full" -> From 4222704ba517cbd72c4fb491a6970bc9814b7125 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Thu, 31 Oct 2024 05:38:12 +0000 Subject: [PATCH 063/121] CA-401274: Remove external auth limitation during set_hostname_live Now we got winbind as the Active Direcotry backend, the joined netbios name is persisted in smb.conf, thus can be compatible with hostname change This commit just remove the set_hostname_live limitation Signed-off-by: Lin Liu --- ocaml/xapi/xapi_host.ml | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 7958a15a367..9f84923fe2e 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -1337,21 +1337,6 @@ let serialize_host_enable_disable_extauth = Mutex.create () let set_hostname_live ~__context ~host ~hostname = with_lock serialize_host_enable_disable_extauth (fun () -> - let current_auth_type = - Db.Host.get_external_auth_type ~__context ~self:host - in - (* the AD extauth plugin is incompatible with a hostname change *) - ( if current_auth_type = Xapi_globs.auth_type_AD then - let current_service_name = - Db.Host.get_external_auth_service_name ~__context ~self:host - in - raise - (Api_errors.Server_error - ( Api_errors.auth_already_enabled - , [current_auth_type; current_service_name] - ) - ) - ) ; (* hostname is valid if contains only alpha, decimals, and hyphen (for hyphens, only in middle position) *) let is_invalid_hostname hostname = From f0003e62da3e44e363d577c2c4d71c4592728c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Thu, 25 Apr 2024 16:42:28 +0100 Subject: [PATCH 064/121] CP-49134: tracing: do not destroy stacktrace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reraise with the original stacktrace, this requires using the raw backtrace instead of the string one. Signed-off-by: Edwin Török --- ocaml/libs/tracing/tracing.ml | 5 +++-- ocaml/libs/tracing/tracing.mli | 4 +++- ocaml/tests/test_observer.ml | 2 +- ocaml/xapi/context.ml | 2 +- ocaml/xapi/context.mli | 2 +- ocaml/xapi/taskHelper.ml | 2 +- ocaml/xenopsd/lib/xenops_server.ml | 2 +- 7 files changed, 11 insertions(+), 8 deletions(-) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index ab097253dcb..3f521f6f29c 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -331,6 +331,7 @@ module Span = struct | exn, stacktrace -> ( let msg = Printexc.to_string exn in let exn_type = Printexc.exn_slot_name exn in + let stacktrace = Printexc.raw_backtrace_to_string stacktrace in let _description = Some (Printf.sprintf "Error: %s Type: %s Backtrace: %s" msg exn_type @@ -720,10 +721,10 @@ let with_tracing ?(attributes = []) ?(parent = None) ~name f = ignore @@ Tracer.finish span ; result with exn -> - let backtrace = Printexc.get_backtrace () in + let backtrace = Printexc.get_raw_backtrace () in let error = (exn, backtrace) in ignore @@ Tracer.finish span ~error ; - raise exn + Printexc.raise_with_backtrace exn backtrace ) | Error e -> warn "Failed to start tracing: %s" (Printexc.to_string e) ; diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index e78153c9790..18b248cc881 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -163,7 +163,9 @@ module Tracer : sig *) val finish : - ?error:exn * string -> Span.t option -> (Span.t option, exn) result + ?error:exn * Printexc.raw_backtrace + -> Span.t option + -> (Span.t option, exn) result val span_hashtbl_is_empty : unit -> bool diff --git a/ocaml/tests/test_observer.ml b/ocaml/tests/test_observer.ml index 7ea23a05939..2e2f8e6aa29 100644 --- a/ocaml/tests/test_observer.ml +++ b/ocaml/tests/test_observer.ml @@ -466,7 +466,7 @@ let test_tracing_exn_backtraces () = let (_ : int) = test_a () in () with e -> ( - let stacktrace = Printexc.get_backtrace () in + let stacktrace = Printexc.get_raw_backtrace () in let x = Tracer.finish ~error:(e, stacktrace) x in match x with | Ok (Some span) -> diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index 7027caaec67..41faa238bd5 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -517,7 +517,7 @@ let with_tracing ?originator ~__context name f = result with exn -> let backtrace = Printexc.get_raw_backtrace () in - let error = (exn, Printexc.raw_backtrace_to_string backtrace) in + let error = (exn, backtrace) in ignore @@ Tracer.finish span ~error ; Printexc.raise_with_backtrace exn backtrace ) diff --git a/ocaml/xapi/context.mli b/ocaml/xapi/context.mli index 98e04215272..34e51afd2ee 100644 --- a/ocaml/xapi/context.mli +++ b/ocaml/xapi/context.mli @@ -142,7 +142,7 @@ val get_client_ip : t -> string option val get_user_agent : t -> string option -val complete_tracing : ?error:exn * string -> t -> unit +val complete_tracing : ?error:exn * Printexc.raw_backtrace -> t -> unit val tracing_of : t -> Tracing.Span.t option diff --git a/ocaml/xapi/taskHelper.ml b/ocaml/xapi/taskHelper.ml index 30d36c0ed37..465859e7fca 100644 --- a/ocaml/xapi/taskHelper.ml +++ b/ocaml/xapi/taskHelper.ml @@ -265,7 +265,7 @@ let cancel ~__context = cancel_this ~__context ~self let failed ~__context exn = - let backtrace = Printexc.get_backtrace () in + let backtrace = Printexc.get_raw_backtrace () in let@ () = finally_complete_tracing ~error:(exn, backtrace) __context in let code, params = ExnHelper.error_of_exn exn in let@ self = operate_on_db_task ~__context in diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 579ce5d6f05..e5d8016bedb 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -1840,7 +1840,7 @@ let with_tracing ~name ~task f = Xenops_task.set_tracing task parent ; result with exn -> - let backtrace = Printexc.get_backtrace () in + let backtrace = Printexc.get_raw_backtrace () in let error = (exn, backtrace) in ignore @@ Tracer.finish span ~error ; raise exn From e871ee8a615c6c6c1bee3968eaef0f45cb9ab8d3 Mon Sep 17 00:00:00 2001 From: Colin James Date: Thu, 14 Nov 2024 13:20:46 +0000 Subject: [PATCH 065/121] CP-49078: Preprocess fields into a Hashtbl Flame graphs indicate that, under load created by parallel "xe vm-list" commands, the DB action get_record is hit often. This function constructs an API-level record by marshalling an association list that maps field names to unmarshalled string values. To do this, it serially queries all the field names using List.assoc. This has rather large cost in doing lexicographical string comparisons (caml_compare on string keys). To avoid this, regardless of record size, we preprocess the association lists __regular_fields and __set_refs into a (string, string) Hashtbl.t and query that to construct each record field. Signed-off-by: Colin James --- ocaml/idl/ocaml_backend/gen_db_actions.ml | 62 ++++++++++++++++++----- 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/ocaml/idl/ocaml_backend/gen_db_actions.ml b/ocaml/idl/ocaml_backend/gen_db_actions.ml index 91c1d9a6ad2..06f54f228ba 100644 --- a/ocaml/idl/ocaml_backend/gen_db_actions.ml +++ b/ocaml/idl/ocaml_backend/gen_db_actions.ml @@ -298,35 +298,71 @@ let db_action api : O.Module.t = ~body:(List.concat [open_db_module; body]) () in + let contains_setrefs fields = + let is_referential_field = function + | {DT.ty= DT.Set (DT.Ref _); field_ignore_foreign_key= false; _} -> + true + | _ -> + false + in + List.exists is_referential_field fields + in let get_record_aux_fn_body ?(m = "API.") (obj : obj) (all_fields : field list) = let of_field = function | { - DT.ty= DT.Set (DT.Ref other) + DT.ty= DT.Set (DT.Ref _ as ty) ; full_name ; DT.field_ignore_foreign_key= false ; _ } -> - Printf.sprintf "List.map %s.%s (List.assoc \"%s\" __set_refs)" - _string_to_dm - (OU.alias_of_ty (DT.Ref other)) + let accessor = "find_setref" in + Printf.sprintf "List.map %s.%s (%s \"%s\")" _string_to_dm + (OU.alias_of_ty ty) accessor (Escaping.escape_id full_name) | f -> - _string_to_dm - ^ "." - ^ OU.alias_of_ty f.DT.ty - ^ "(List.assoc \"" - ^ Escaping.escape_id f.full_name - ^ "\" __regular_fields)" + let ty_alias = OU.alias_of_ty f.DT.ty in + let accessor = "find_regular" in + let field_name = Escaping.escape_id f.full_name in + Printf.sprintf {|%s.%s (%s "%s")|} _string_to_dm ty_alias accessor + field_name in let make_field f = Printf.sprintf " %s%s = %s;" m (OU.ocaml_of_record_field (obj.DT.name :: f.DT.full_name)) (of_field f) in + + let create_lookup_fn name initial_size kvs = + let indent = " " in + [ + Printf.sprintf "let %s =" name + ; " let module HT = Hashtbl in" + ; Printf.sprintf " let tbl = HT.create %d in" initial_size + ; Printf.sprintf " List.iter (fun (k, v) -> HT.replace tbl k v) %s;" kvs + ; " HT.find tbl" + ; "in" + ] + |> List.map (( ^ ) indent) + in + let populate_regulars_tbl = + create_lookup_fn "find_regular" 256 "__regular_fields" + in + let populate_setrefs_tbl = + if contains_setrefs all_fields then + create_lookup_fn "find_setref" 32 "__set_refs" + else + [] + in let fields = List.map make_field all_fields in - let mk_rec = ["{"] @ fields @ [" }"] in - String.concat "\n" mk_rec + let mk_rec = [" {"] @ fields @ [" }"] in + let body = + "\n" + ^ (populate_regulars_tbl @ populate_setrefs_tbl @ mk_rec + |> String.concat "\n" + ) + in + body in let get_record_aux_fn (obj : obj) = let record_fields = List.filter client_side_field (DU.fields_of_obj obj) in @@ -364,7 +400,7 @@ let db_action api : O.Module.t = expr ; Printf.sprintf "List.map (fun (ref,(__regular_fields,__set_refs)) -> \ - Ref.of_%sstring ref, %s __regular_fields __set_refs) records" + Ref.of_%sstring ref, %s ~__regular_fields ~__set_refs) records" (if obj.DT.name = "session" then "secret_" else "") conversion_fn ] From aa7575ea63d820e9096498c107c40e18e4056440 Mon Sep 17 00:00:00 2001 From: Colin James Date: Fri, 15 Nov 2024 15:49:05 +0000 Subject: [PATCH 066/121] CP-49078: Construct a hash table inside API Rewrite the API.assocer function to internally construct a hash table from an association list. Then, shadow its usage in relevant places. Signed-off-by: Colin James --- ocaml/idl/ocaml_backend/gen_api.ml | 31 ++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/ocaml/idl/ocaml_backend/gen_api.ml b/ocaml/idl/ocaml_backend/gen_api.ml index 7bedb49eca8..502e0cd9816 100644 --- a/ocaml/idl/ocaml_backend/gen_api.ml +++ b/ocaml/idl/ocaml_backend/gen_api.ml @@ -241,8 +241,8 @@ let gen_record_type ~with_module highapi tys = [ sprintf "let rpc_of_%s_t x = Rpc.Dict (unbox_list [ %s ])" obj_name (map_fields make_of_field) - ; sprintf "let %s_t_of_rpc x = on_dict (fun x -> { %s }) x" obj_name - (map_fields make_to_field) + ; sprintf "let %s_t_of_rpc x = on_dict (fun x assocer -> { %s }) x" + obj_name (map_fields make_to_field) ; sprintf "type ref_%s_to_%s_t_map = (ref_%s * %s_t) list [@@deriving \ rpc]" @@ -408,10 +408,6 @@ let gen_client_types highapi = x | _ -> failwith \"Date.t_of_rpc\"" ; "end" ] - ; [ - "let on_dict f = function | Rpc.Dict x -> f x | _ -> failwith \ - \"Expected Dictionary\"" - ] ; ["let opt_map f = function | None -> None | Some x -> Some (f x)"] ; [ "let unbox_list = let rec loop aux = function" @@ -421,14 +417,21 @@ let gen_client_types highapi = ; "loop []" ] ; [ - "let assocer key map default = " - ; " try" - ; " List.assoc key map" - ; " with Not_found ->" - ; " match default with" - ; " | Some d -> d" - ; " | None -> failwith (Printf.sprintf \"Field %s not present in \ - rpc\" key)" + "let assocer kvs =" + ; "let tbl = Hashtbl.create 256 in" + ; "List.iter (fun (k, v) -> Hashtbl.replace tbl k v) kvs;" + ; "fun key _ default ->" + ; "match Hashtbl.find_opt tbl key with" + ; "| Some v -> v" + ; "| _ ->" + ; " match default with" + ; " | Some d -> d" + ; " | _ -> failwith (Printf.sprintf \"Field %s not present in rpc\" \ + key)" + ] + ; [ + "let on_dict f = function | Rpc.Dict x -> f x (assocer x) | _ -> \ + failwith \"Expected Dictionary\"" ] ; gen_non_record_type all_types ; gen_record_type ~with_module:true highapi From 3a49e86f1ec1c212a6199df15ff659f27194295a Mon Sep 17 00:00:00 2001 From: Colin James Date: Mon, 18 Nov 2024 09:43:23 +0000 Subject: [PATCH 067/121] CP-49078: Use Hashtbl within Schema Speed up "find" operation on Schema Tables and Database. Previously, these would use List.find which involves costly comparisons on string keys. Now, we use a hash table, keyed by names, and use that to identify columns and tables. Signed-off-by: Colin James --- ocaml/database/db_cache_types.ml | 1 + ocaml/database/schema.ml | 99 ++++++++++++++++++++++++++++---- ocaml/database/test_schemas.ml | 49 ++++++++++------ ocaml/idl/datamodel_schema.ml | 21 ++++--- 4 files changed, 134 insertions(+), 36 deletions(-) diff --git a/ocaml/database/db_cache_types.ml b/ocaml/database/db_cache_types.ml index 99190201ffa..d081dbd674c 100644 --- a/ocaml/database/db_cache_types.ml +++ b/ocaml/database/db_cache_types.ml @@ -158,6 +158,7 @@ module Row = struct with Not_found -> raise (DBCache_NotFound ("missing field", key, "")) let add_defaults g (schema : Schema.Table.t) t = + let schema = Schema.Table.t'_of_t schema in List.fold_left (fun t c -> if not (mem c.Schema.Column.name t) then diff --git a/ocaml/database/schema.ml b/ocaml/database/schema.ml index 66d5000d7db..619cba97552 100644 --- a/ocaml/database/schema.ml +++ b/ocaml/database/schema.ml @@ -96,28 +96,104 @@ module Column = struct (** only so we can special case set refs in the interface *) } [@@deriving sexp] + + let name_of t = t.name end +let tabulate ks ~key_fn = + let tbl = Hashtbl.create 64 in + List.iter (fun c -> Hashtbl.replace tbl (key_fn c) c) ks ; + tbl + +let values_of_table tbl = Hashtbl.fold (fun _ v vs -> v :: vs) tbl [] + module Table = struct - type t = {name: string; columns: Column.t list; persistent: bool} + type t' = {name: string; columns: Column.t list; persistent: bool} [@@deriving sexp] - let find name t = - try List.find (fun col -> col.Column.name = name) t.columns - with Not_found -> - raise (Db_exn.DBCache_NotFound ("missing column", t.name, name)) + type t = { + name: string + ; columns: (string, Column.t) Hashtbl.t + ; persistent: bool + } + + let t'_of_t : t -> t' = + fun (t : t) -> + let ({name; columns; persistent} : t) = t in + let columns = values_of_table columns in + {name; columns; persistent} + + let t_of_t' : t' -> t = + fun (t' : t') -> + let ({name; columns; persistent} : t') = t' in + let columns = tabulate columns ~key_fn:Column.name_of in + {name; columns; persistent} + + let sexp_of_t t = + let t' = t'_of_t t in + sexp_of_t' t' + + let t_of_sexp s = + let ({name; columns; persistent} : t') = t'_of_sexp s in + let columns = tabulate columns ~key_fn:Column.name_of in + ({name; columns; persistent} : t) + + let find name (t : t) = + match Hashtbl.find_opt t.columns name with + | Some c -> + c + | _ -> + raise (Db_exn.DBCache_NotFound ("missing column", t.name, name)) + + let create ~name ~columns ~persistent : t = + let columns = + let tbl = Hashtbl.create 64 in + List.iter (fun c -> Hashtbl.add tbl c.Column.name c) columns ; + tbl + in + {name; columns; persistent} + + let name_of t = t.name end type relationship = OneToMany of string * string * string * string [@@deriving sexp] module Database = struct - type t = {tables: Table.t list} [@@deriving sexp] + type t' = {tables: Table.t list} [@@deriving sexp] + + type t = {tables: (string, Table.t) Hashtbl.t} + + let t_of_t' : t' -> t = + fun (t' : t') -> + let ({tables} : t') = t' in + let tables = tabulate tables ~key_fn:Table.name_of in + {tables} + + let t'_of_t : t -> t' = + fun (t : t) -> + let ({tables} : t) = t in + let tables = values_of_table tables in + {tables} + + let sexp_of_t t = + let t' = t'_of_t t in + sexp_of_t' t' + + let t_of_sexp s = + let t' = t'_of_sexp s in + t_of_t' t' let find name t = - try List.find (fun tbl -> tbl.Table.name = name) t.tables - with Not_found -> - raise (Db_exn.DBCache_NotFound ("missing table", name, "")) + match Hashtbl.find_opt t.tables name with + | Some tbl -> + tbl + | _ -> + raise (Db_exn.DBCache_NotFound ("missing table", name, "")) + + let of_tables tables = + let tables = tabulate tables ~key_fn:Table.name_of in + {tables} end (** indexed by table name, a list of (this field, foreign table, foreign field) *) @@ -161,7 +237,7 @@ let empty = { major_vsn= 0 ; minor_vsn= 0 - ; database= {Database.tables= []} + ; database= {Database.tables= Hashtbl.create 64} ; one_to_many= ForeignMap.empty ; many_to_many= ForeignMap.empty } @@ -174,7 +250,8 @@ let is_field_persistent schema tblname fldname = tbl.Table.persistent && col.Column.persistent let table_names schema = - List.map (fun t -> t.Table.name) (database schema).Database.tables + let tables = (database schema).Database.tables in + Hashtbl.fold (fun k _ ks -> k :: ks) tables [] let one_to_many tblname schema = (* If there is no entry in the map it means that the table has no one-to-many relationships *) diff --git a/ocaml/database/test_schemas.ml b/ocaml/database/test_schemas.ml index 1886e620732..fa2519b5f61 100644 --- a/ocaml/database/test_schemas.ml +++ b/ocaml/database/test_schemas.ml @@ -99,22 +99,35 @@ let schema = ; issetref= false } in - let vm_table = - { - Schema.Table.name= "VM" - ; columns= - [_ref; uuid; name_label; vbds; pp; name_description; tags; other_config] - ; persistent= true - } + let vm_table : Schema.Table.t = + Schema.Table.t_of_t' + { + Schema.Table.name= "VM" + ; columns= + [ + _ref + ; uuid + ; name_label + ; vbds + ; pp + ; name_description + ; tags + ; other_config + ] + ; persistent= true + } in let vbd_table = - { - Schema.Table.name= "VBD" - ; columns= [_ref; uuid; vm; type'] - ; persistent= true - } + Schema.Table.t_of_t' + { + Schema.Table.name= "VBD" + ; columns= [_ref; uuid; vm; type'] + ; persistent= true + } + in + let database = + Schema.Database.t_of_t' {Schema.Database.tables= [vm_table; vbd_table]} in - let database = {Schema.Database.tables= [vm_table; vbd_table]} in let one_to_many = Schema.ForeignMap.add "VBD" [("VM", "VM", "VBDs")] Schema.ForeignMap.empty in @@ -140,12 +153,16 @@ let many_to_many = in let foo_column = {bar_column with Schema.Column.name= "foos"} in let foo_table = - {Schema.Table.name= "foo"; columns= [bar_column]; persistent= true} + Schema.Table.t_of_t' + {Schema.Table.name= "foo"; columns= [bar_column]; persistent= true} in let bar_table = - {Schema.Table.name= "bar"; columns= [foo_column]; persistent= true} + Schema.Table.t_of_t' + {Schema.Table.name= "bar"; columns= [foo_column]; persistent= true} + in + let database = + Schema.Database.t_of_t' {Schema.Database.tables= [foo_table; bar_table]} in - let database = {Schema.Database.tables= [foo_table; bar_table]} in let many_to_many = Schema.ForeignMap.add "foo" [("bars", "bar", "foos")] diff --git a/ocaml/idl/datamodel_schema.ml b/ocaml/idl/datamodel_schema.ml index 32bc3a94fc4..10f20662496 100644 --- a/ocaml/idl/datamodel_schema.ml +++ b/ocaml/idl/datamodel_schema.ml @@ -85,14 +85,16 @@ let of_datamodel () = in let table obj = - { - Table.name= Escaping.escape_obj obj.Datamodel_types.name - ; columns= - _ref - :: List.map (column obj) (flatten_fields obj.Datamodel_types.contents []) - ; persistent= - obj.Datamodel_types.persist = Datamodel_types.PersistEverything - } + Table.t_of_t' + { + Table.name= Escaping.escape_obj obj.Datamodel_types.name + ; columns= + _ref + :: List.map (column obj) + (flatten_fields obj.Datamodel_types.contents []) + ; persistent= + obj.Datamodel_types.persist = Datamodel_types.PersistEverything + } in let is_one_to_many x = match Datamodel_utils.Relations.classify Datamodel.all_api x with @@ -119,7 +121,8 @@ let of_datamodel () = in let database api = - {Database.tables= List.map table (Dm_api.objects_of_api api)} + let tables = List.map table (Dm_api.objects_of_api api) in + Database.of_tables tables in { major_vsn= Datamodel_common.schema_major_vsn From 7ba0031707822332bdbbbc73cd8d2eb051be2b65 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 18 Nov 2024 13:41:00 +0000 Subject: [PATCH 068/121] opam: update vhd packages' opam metadata Sync the metadata with upstream, which is not updated Signed-off-by: Pau Ruiz Safont --- dune-project | 12 ++++++------ vhd-format-lwt.opam | 12 ++++++------ vhd-format.opam | 16 ++++++++-------- vhd-format.opam.template | 16 ++++++++-------- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/dune-project b/dune-project index 4e6e0446c30..15ff4a5fbfa 100644 --- a/dune-project +++ b/dune-project @@ -464,16 +464,16 @@ This package provides an Lwt compatible interface to the library.") (homepage "https://github.com/mirage/ocaml-vhd") (source (github mirage/ocaml-vhd)) (depends - (ocaml (and (>= "4.02.3") (< "5.0.0"))) + (ocaml (>= "4.10.0")) (alcotest :with-test) - (alcotest-lwt :with-test) - bigarray-compat - (cstruct (< "6.1.0")) + (alcotest-lwt (and :with-test (>= "1.0.0"))) + (bigarray-compat (>= "1.1.0")) + (cstruct (>= "6.0.0")) cstruct-lwt (fmt :with-test) (lwt (>= "3.2.0")) - (mirage-block (>= "2.0.1")) - rresult + (mirage-block (>= "3.0.0")) + (rresult (>= "0.7.0")) (vhd-format (= :version)) (io-page (and :with-test (>= "2.4.0"))) ) diff --git a/vhd-format-lwt.opam b/vhd-format-lwt.opam index b2140a2d07e..0c8401f12b9 100644 --- a/vhd-format-lwt.opam +++ b/vhd-format-lwt.opam @@ -17,16 +17,16 @@ homepage: "https://github.com/mirage/ocaml-vhd" bug-reports: "https://github.com/mirage/ocaml-vhd/issues" depends: [ "dune" {>= "3.15"} - "ocaml" {>= "4.02.3" & < "5.0.0"} + "ocaml" {>= "4.10.0"} "alcotest" {with-test} - "alcotest-lwt" {with-test} - "bigarray-compat" - "cstruct" {< "6.1.0"} + "alcotest-lwt" {with-test & >= "1.0.0"} + "bigarray-compat" {>= "1.1.0"} + "cstruct" {>= "6.0.0"} "cstruct-lwt" "fmt" {with-test} "lwt" {>= "3.2.0"} - "mirage-block" {>= "2.0.1"} - "rresult" + "mirage-block" {>= "3.0.0"} + "rresult" {>= "0.7.0"} "vhd-format" {= version} "io-page" {with-test & >= "2.4.0"} "odoc" {with-doc} diff --git a/vhd-format.opam b/vhd-format.opam index 59c7d8122a8..d24732c35d5 100644 --- a/vhd-format.opam +++ b/vhd-format.opam @@ -3,12 +3,13 @@ opam-version: "2.0" name: "vhd-format" synopsis: "Pure OCaml library to read/write VHD format data" -description: """\ +description: """ A pure OCaml library to read and write [vhd](http://en.wikipedia.org/wiki/VHD_(file_format)) format data, plus a simple command-line tool which allows vhd files to be interrogated, manipulated, format-converted and streamed to and from files and remote -servers.""" +servers. +""" maintainer: "dave@recoil.org" authors: ["Dave Scott" "Jon Ludlam"] license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" @@ -17,15 +18,14 @@ homepage: "https://github.com/mirage/ocaml-vhd" doc: "https://mirage.github.io/ocaml-vhd/" bug-reports: "https://github.com/mirage/ocaml-vhd/issues" depends: [ - "ocaml" {>= "4.03.0"} - "bigarray-compat" - "cstruct" {>= "1.9" & < "6.1.0"} - "dune" {>= "3.15"} + "ocaml" {>= "4.10.0"} + "bigarray-compat" {>= "1.1.0"} + "cstruct" {>= "6.0.0"} "io-page" "rresult" {>= "0.3.0"} - "uuidm" {>= "0.9.6"} + "uuidm" {>= "0.9.9"} "stdlib-shims" - "dune" {>= "1.0"} + "dune" {>= "2.8"} "ppx_cstruct" {build & >= "3.0.0"} ] available: os = "linux" | os = "macos" diff --git a/vhd-format.opam.template b/vhd-format.opam.template index 382124b10dd..03a5a209cc1 100644 --- a/vhd-format.opam.template +++ b/vhd-format.opam.template @@ -1,12 +1,13 @@ opam-version: "2.0" name: "vhd-format" synopsis: "Pure OCaml library to read/write VHD format data" -description: """\ +description: """ A pure OCaml library to read and write [vhd](http://en.wikipedia.org/wiki/VHD_(file_format)) format data, plus a simple command-line tool which allows vhd files to be interrogated, manipulated, format-converted and streamed to and from files and remote -servers.""" +servers. +""" maintainer: "dave@recoil.org" authors: ["Dave Scott" "Jon Ludlam"] license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" @@ -15,15 +16,14 @@ homepage: "https://github.com/mirage/ocaml-vhd" doc: "https://mirage.github.io/ocaml-vhd/" bug-reports: "https://github.com/mirage/ocaml-vhd/issues" depends: [ - "ocaml" {>= "4.03.0"} - "bigarray-compat" - "cstruct" {>= "1.9" & < "6.1.0"} - "dune" {>= "3.15"} + "ocaml" {>= "4.10.0"} + "bigarray-compat" {>= "1.1.0"} + "cstruct" {>= "6.0.0"} "io-page" "rresult" {>= "0.3.0"} - "uuidm" {>= "0.9.6"} + "uuidm" {>= "0.9.9"} "stdlib-shims" - "dune" {>= "1.0"} + "dune" {>= "2.8"} "ppx_cstruct" {build & >= "3.0.0"} ] available: os = "linux" | os = "macos" From d87e81c2ca9fa67c65772d9267915b73a6ab008a Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 18 Nov 2024 13:57:19 +0000 Subject: [PATCH 069/121] maintenance: compatibility with cstruct 6.2.0 The function copy is being dropped because it's to_string, without labeled arguments Signed-off-by: Pau Ruiz Safont --- .../xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml index 834ddd3106b..0ecf4d5d46a 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml @@ -106,15 +106,19 @@ let parse_payload ~(json : string) : payload = let make_payload_reader () = let last_checksum = ref "" in fun cs -> - let header = Cstruct.copy cs 0 header_bytes in + let header = Cstruct.to_string cs ~off:0 ~len:header_bytes in if header <> default_header then raise Invalid_header_string ; - let length = - let length_str = "0x" ^ Cstruct.copy cs length_start length_bytes in + let len = + let length_str = + "0x" ^ Cstruct.to_string cs ~off:length_start ~len:length_bytes + in try int_of_string length_str with _ -> raise Invalid_length in - let checksum = Cstruct.copy cs checksum_start checksum_bytes in - let payload_string = Cstruct.copy cs payload_start length in + let checksum = + Cstruct.to_string cs ~off:checksum_start ~len:checksum_bytes + in + let payload_string = Cstruct.to_string cs ~off:payload_start ~len in if payload_string |> Digest.string |> Digest.to_hex <> checksum then raise Invalid_checksum ; if checksum = !last_checksum then From 630aeadb6730dd1b5933d550f696b8584e3a4d31 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 18 Nov 2024 17:39:10 +0000 Subject: [PATCH 070/121] CA-402326: Fetch SM records from the pool to avoid race Previously the SM feature check was done in two parts, fetch all the SM ref from the coordinator, and then fetch their information such as types and features from the coordinator basedon the refs. This can cause race conditions, where the previously fetched refs might have been deleted when we fetch the SM features. The deletion might happen due to the way SM registeration works for shared SRs such as GFS2, where each `PBD.plug` will trigger a deregister of existing SM and re-register (which will delete existing SMs), and create another (very likely) identical SM. To avoid this race, instead of fetch SM refs and their features separately, do this in one go so we get a consistent snapshot of the db state. Also add a bit more debugging messages. Signed-off-by: Vincent Liu --- ocaml/xapi/xapi_pool.ml | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index eb716ce766e..3a7dee78735 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -840,6 +840,10 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = ) in let assert_sm_features_compatible () = + debug + "%s Checking whether SM features on the joining host is compatible with \ + the pool" + __FUNCTION__ ; (* We consider the case where the existing pool has FOO/m, and the candidate having FOO/n, where n >= m, to be compatible. Not vice versa. *) let features_compatible coor_features candidate_features = @@ -847,15 +851,16 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = the other way around. *) Smint.compat_features coor_features candidate_features = coor_features in - - let master_sms = Client.SM.get_all ~rpc ~session_id in + let pool_sms = Client.SM.get_all_records ~rpc ~session_id in List.iter - (fun sm -> - let master_sm_type = Client.SM.get_type ~rpc ~session_id ~self:sm in + (fun (sm_ref, sm_rec) -> + let pool_sm_type = sm_rec.API.sM_type in + debug "%s Checking SM %s of name %s in the pool" __FUNCTION__ + (Ref.string_of sm_ref) sm_rec.sM_name_label ; let candidate_sm_ref, candidate_sm_rec = match Db.SM.get_records_where ~__context - ~expr:(Eq (Field "type", Literal master_sm_type)) + ~expr:(Eq (Field "type", Literal pool_sm_type)) with | [(sm_ref, sm_rec)] -> (sm_ref, sm_rec) @@ -864,25 +869,24 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = Api_errors.( Server_error ( pool_joining_sm_features_incompatible - , [Ref.string_of sm; ""] + , [Ref.string_of sm_ref; ""] ) ) in - let coor_sm_features = - Client.SM.get_features ~rpc ~session_id ~self:sm - in + let pool_sm_features = sm_rec.sM_features in + let candidate_sm_features = candidate_sm_rec.API.sM_features in - if not (features_compatible coor_sm_features candidate_sm_features) then + if not (features_compatible pool_sm_features candidate_sm_features) then raise Api_errors.( Server_error ( pool_joining_sm_features_incompatible - , [Ref.string_of sm; Ref.string_of candidate_sm_ref] + , [Ref.string_of sm_ref; Ref.string_of candidate_sm_ref] ) ) ) - master_sms + pool_sms in (* call pre-join asserts *) From 21d6773291fce4e3205dcb8fea18a2fc87c31c60 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 20 Nov 2024 09:36:20 +0000 Subject: [PATCH 071/121] Minimize xenstore accesses during domid-to-uuid lookups Bake in assumptions that have been constant ever since xenstore was created: getdomainpath always returns "/local/domain/", /local/domain/domid/vm returns "/vm/", so there's no need to look up that path to get the uuid again This reduces the number of xenstore accesses from 3 to 1 with no functional change. As suggested in: https://github.com/xapi-project/xen-api/pull/6068#pullrequestreview-2446644349 Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml | 13 +++++++++--- ocaml/xenopsd/xc/xenops_helpers.ml | 20 +++++++++++++------ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml index c7dab55ac94..bd31674a03a 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml @@ -138,9 +138,16 @@ let generate_netdev_dss () = let uuid_of_domid domid = try Xenstore.with_xs (fun xs -> - let vm = xs.Xenstore.Xs.getdomainpath domid ^ "/vm" in - let vm_dir = xs.Xenstore.Xs.read vm in - xs.Xenstore.Xs.read (vm_dir ^ "/uuid") + let vm_uuid_path = + Printf.sprintf "/local/domain/%d/vm" domid + |> xs.Xenstore.Xs.read + |> String.split_on_char '/' + in + match vm_uuid_path with + | [_; _; uuid] -> + uuid + | _ -> + raise (Invalid_argument "Incorrect xenstore node") ) with e -> fail "Failed to find uuid corresponding to domid: %d (%s)" domid diff --git a/ocaml/xenopsd/xc/xenops_helpers.ml b/ocaml/xenopsd/xc/xenops_helpers.ml index 602ef72d40f..383219dd602 100644 --- a/ocaml/xenopsd/xc/xenops_helpers.ml +++ b/ocaml/xenopsd/xc/xenops_helpers.ml @@ -28,12 +28,20 @@ exception Domain_not_found let uuid_of_domid ~xs domid = try - let vm = xs.Xs.getdomainpath domid ^ "/vm" in - let vm_dir = xs.Xs.read vm in - match Uuidx.of_string (xs.Xs.read (vm_dir ^ "/uuid")) with - | Some uuid -> - uuid - | None -> + let vm_uuid_path = + Printf.sprintf "/local/domain/%d/vm" domid + |> xs.Xs.read + |> String.split_on_char '/' + in + match vm_uuid_path with + | [_; _; uuid] -> ( + match Uuidx.of_string uuid with + | Some uuid -> + uuid + | None -> + raise Domain_not_found + ) + | _ -> raise Domain_not_found with _ -> raise Domain_not_found From 228071ae9896941bec3265f9fd0e45a38e6b9fa9 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 18 Nov 2024 15:47:51 +0000 Subject: [PATCH 072/121] CP-52524 - dbsync_slave: stop calculating boot time ourselves Completes the 15+ years old TODO, at the expense of removing an ultimate example of a "not invented here" attitude. Signed-off-by: Andrii Sultanov --- ocaml/xapi/dbsync_slave.ml | 27 +++++++++++++++------------ quality-gate.sh | 2 +- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 942d3081071..80793c06838 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -63,21 +63,24 @@ let create_localhost ~__context info = in () -(* TODO cat /proc/stat for btime ? *) let get_start_time () = try - debug "Calculating boot time..." ; - let now = Unix.time () in - let uptime = Unixext.string_of_file "/proc/uptime" in - let uptime = String.trim uptime in - let uptime = String.split ' ' uptime in - let uptime = List.hd uptime in - let uptime = float_of_string uptime in - let boot_time = Date.of_unix_time (now -. uptime) in - debug " system booted at %s" (Date.to_rfc3339 boot_time) ; - boot_time + match + Unixext.string_of_file "/proc/stat" + |> String.trim + |> String.split '\n' + |> List.find (fun s -> String.starts_with ~prefix:"btime" s) + |> String.split ' ' + with + | _ :: btime :: _ -> + let boot_time = Date.of_unix_time (float_of_string btime) in + debug "%s: system booted at %s" __FUNCTION__ (Date.to_rfc3339 boot_time) ; + boot_time + | _ -> + failwith "Couldn't parse /proc/stat" with e -> - debug "Calculating boot time failed with '%s'" (ExnHelper.string_of_exn e) ; + debug "%s: Calculating boot time failed with '%s'" __FUNCTION__ + (ExnHelper.string_of_exn e) ; Date.epoch (* not sufficient just to fill in this data on create time [Xen caps may change if VT enabled in BIOS etc.] *) diff --git a/quality-gate.sh b/quality-gate.sh index db8444b53e0..b1d170041f1 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=294 + N=293 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" From f0c9b4ce8c22302a213a64ef20d75ed78b7a06cf Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 19 Nov 2024 09:45:05 +0000 Subject: [PATCH 073/121] CP-52524: Generate an alert when various host kernel taints are set Issue an alert about a broken host kernel if bits 4, 5, 7, 9, or 14 are set in /proc/sys/kernel/tainted, indicating some kind of error was encountered and the future behaviour of the kernel might not be predictable or safe anymore (though it generally should reasonably recover). Only one alert per tainted bit per boot can be present (more than one can be issued, if the user dismissed the alerts and restarted the toolstack). Distinguish between Major (4,5,7 - these are all things that might cause a host crash, but are unlikely to corrupt whatever data has been written out) and Warning (9, 14 - might be a concern and could be raised to Support but usually are not severe enough to crash the host) levels of errors as suggested by the Foundations team. This should serve as an indicator during issue investigation to look for the cause of the taint. Signed-off-by: Andrii Sultanov --- ocaml/xapi-consts/api_messages.ml | 6 ++ ocaml/xapi/xapi_host.ml | 75 ++++++++++++++++++++++ ocaml/xapi/xapi_host.mli | 2 + ocaml/xapi/xapi_periodic_scheduler_init.ml | 7 ++ 4 files changed, 90 insertions(+) diff --git a/ocaml/xapi-consts/api_messages.ml b/ocaml/xapi-consts/api_messages.ml index ff436199a76..d5215415c15 100644 --- a/ocaml/xapi-consts/api_messages.ml +++ b/ocaml/xapi-consts/api_messages.ml @@ -360,6 +360,12 @@ let host_internal_certificate_expiring_07 = let failed_login_attempts = addMessage "FAILED_LOGIN_ATTEMPTS" 3L +let kernel_is_broken which = + addMessage ("HOST_KERNEL_ENCOUNTERED_ERROR_" ^ which) 2L + +let kernel_is_broken_warning which = + addMessage ("HOST_KERNEL_ENCOUNTERED_WARNING_" ^ which) 3L + let tls_verification_emergency_disabled = addMessage "TLS_VERIFICATION_EMERGENCY_DISABLED" 3L diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 9f84923fe2e..377d97d22fe 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -2923,6 +2923,81 @@ let emergency_reenable_tls_verification ~__context = Helpers.touch_file Constants.verify_certificates_path ; Db.Host.set_tls_verification_enabled ~__context ~self ~value:true +(** Issue an alert if /proc/sys/kernel/tainted indicates particular kernel + errors. Will send only one alert per reboot *) +let alert_if_kernel_broken = + let __context = Context.make "host_kernel_error_alert_startup_check" in + (* Only add an alert if + (a) an alert wasn't already issued for the currently booted kernel *) + let possible_alerts = + ref + ( lazy + ((* Check all the alerts since last reboot. Only done once at toolstack + startup, we track if alerts have been issued afterwards internally *) + let self = Helpers.get_localhost ~__context in + let boot_time = + Db.Host.get_other_config ~__context ~self + |> List.assoc "boot_time" + |> float_of_string + in + let all_alerts = + [ + (* processor reported a Machine Check Exception (MCE) *) + (4, Api_messages.kernel_is_broken "MCE") + ; (* bad page referenced or some unexpected page flags *) + (5, Api_messages.kernel_is_broken "BAD_PAGE") + ; (* kernel died recently, i.e. there was an OOPS or BUG *) + (7, Api_messages.kernel_is_broken "BUG") + ; (* kernel issued warning *) + (9, Api_messages.kernel_is_broken_warning "WARN") + ; (* soft lockup occurred *) + (14, Api_messages.kernel_is_broken_warning "SOFT_LOCKUP") + ] + in + all_alerts + |> List.filter (fun (_, alert_message) -> + let alert_already_issued_for_this_boot = + Helpers.call_api_functions ~__context (fun rpc session_id -> + Client.Client.Message.get_all_records ~rpc ~session_id + |> List.exists (fun (_, record) -> + record.API.message_name = fst alert_message + && API.Date.is_later + ~than:(API.Date.of_unix_time boot_time) + record.API.message_timestamp + ) + ) + in + alert_already_issued_for_this_boot + ) + ) + ) + in + (* and (b) if we found a problem *) + fun ~__context -> + let self = Helpers.get_localhost ~__context in + possible_alerts := + Lazy.from_val + (Lazy.force !possible_alerts + |> List.filter (fun (alert_bit, alert_message) -> + let is_bit_tainted = + Unixext.string_of_file "/proc/sys/kernel/tainted" + |> int_of_string + in + let is_bit_tainted = (is_bit_tainted lsr alert_bit) land 1 = 1 in + if is_bit_tainted then ( + let host = Db.Host.get_name_label ~__context ~self in + let body = + Printf.sprintf "%s" host + in + Xapi_alert.add ~msg:alert_message ~cls:`Host + ~obj_uuid:(Db.Host.get_uuid ~__context ~self) + ~body ; + false (* alert issued, remove from the list *) + ) else + true (* keep in the list, alert can be issued later *) + ) + ) + let alert_if_tls_verification_was_emergency_disabled ~__context = let tls_verification_enabled_locally = Stunnel_client.get_verify_by_default () diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index c303ee69597..ec2e3b0fcfd 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -539,6 +539,8 @@ val set_numa_affinity_policy : val emergency_disable_tls_verification : __context:Context.t -> unit +val alert_if_kernel_broken : __context:Context.t -> unit + val alert_if_tls_verification_was_emergency_disabled : __context:Context.t -> unit diff --git a/ocaml/xapi/xapi_periodic_scheduler_init.ml b/ocaml/xapi/xapi_periodic_scheduler_init.ml index 5b49ebcde50..d74b349e240 100644 --- a/ocaml/xapi/xapi_periodic_scheduler_init.ml +++ b/ocaml/xapi/xapi_periodic_scheduler_init.ml @@ -106,6 +106,13 @@ let register ~__context = (Xapi_periodic_scheduler.Periodic freq) freq Xapi_pool.alert_failed_login_attempts ) ; + Xapi_periodic_scheduler.add_to_queue "broken_kernel" + (Xapi_periodic_scheduler.Periodic 600.) 600. (fun () -> + Server_helpers.exec_with_new_task + "Periodic alert if the running kernel is broken in some serious way." + (fun __context -> Xapi_host.alert_if_kernel_broken ~__context + ) + ) ; Xapi_periodic_scheduler.add_to_queue "Period alert if TLS verification emergency disabled" (Xapi_periodic_scheduler.Periodic 600.) 600. (fun () -> From aaabb6cad3a01a5418344d95953e176ffd7d9f8c Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 22 Nov 2024 09:14:00 +0000 Subject: [PATCH 074/121] xenopsd: Optimize lazy evaluation The manual notes that `Lazy.from_fun` "should only be used if the function f is already defined. In particular it is always less efficient to write `from_fun (fun () -> expr)` than `lazy expr`. So, replace `Lazy.from_fun` with `lazy` in this particular case Compare the lambda dump for `lazy (fun () -> [| 1;2;3 |] |> Array.map (fun x -> x+1))`: ``` (seq (let (l/269 = (function param/319[int] (apply (field_imm 12 (global Stdlib__Array!)) (function x/318[int] : int (+ x/318 1)) (makearray[int] 1 2 3)))) (setfield_ptr(root-init) 0 (global Main!) l/269)) 0) ``` with the lambda dump of the `let x = Lazy.from_fun (fun () -> [| 1;2;3 |] |> Array.map (fun x -> x+1))`: ``` (seq (let (x/269 = (apply (field_imm 5 (global Stdlib__Lazy!)) (function param/332[int] (apply (field_imm 12 (global Stdlib__Array!)) (function x/331[int] : int (+ x/331 1)) (makearray[int] 1 2 3))))) (setfield_ptr(root-init) 0 (global Main!) x/269)) 0) ``` See: https://patricoferris.github.io/js_of_ocamlopt/#code=bGV0IGwgPSBsYXp5IChmdW4gKCkgLT4gW3wgMTsyOzMgfF0gfD4gQXJyYXkubWFwIChmdW4geCAtPiB4KzEpKQoKbGV0IHggPSBMYXp5LmZyb21fZnVuIChmdW4gKCkgLT4gW3wgMTsyOzMgfF0gfD4gQXJyYXkubWFwIChmdW4geCAtPiB4KzEpKQ%3D%3D Signed-off-by: Andrii Sultanov --- ocaml/xenopsd/xc/domain.ml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 7b31011aabe..d33fc482e5f 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -835,12 +835,12 @@ let create_channels ~xc uuid domid = let numa_hierarchy = let open Xenctrlext in let open Topology in - Lazy.from_fun (fun () -> - let xcext = get_handle () in - let distances = (numainfo xcext).distances in - let cpu_to_node = cputopoinfo xcext |> Array.map (fun t -> t.node) in - NUMA.make ~distances ~cpu_to_node - ) + lazy + (let xcext = get_handle () in + let distances = (numainfo xcext).distances in + let cpu_to_node = cputopoinfo xcext |> Array.map (fun t -> t.node) in + NUMA.make ~distances ~cpu_to_node + ) let numa_mutex = Mutex.create () From 860843f5175e1edaeb3d7897ce931f6edd62c457 Mon Sep 17 00:00:00 2001 From: Colin James Date: Fri, 22 Nov 2024 09:26:09 +0000 Subject: [PATCH 075/121] CA-402654: Partially revert 3e2e970af A module binding appeared to be unused but was being evaluated for its effect alone. We reintroduce it here and don't bind a name. Signed-off-by: Colin James --- ocaml/xapi/rbac.ml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ocaml/xapi/rbac.ml b/ocaml/xapi/rbac.ml index 2b311a7e56d..2a8555cc9a9 100644 --- a/ocaml/xapi/rbac.ml +++ b/ocaml/xapi/rbac.ml @@ -243,6 +243,12 @@ let assert_permission_name ~__context ~permission = let assert_permission ~__context ~permission = assert_permission_name ~__context ~permission:permission.role_name_label +(* Populates assert_permission_fn on behalf of TaskHelper to + avoid a dependency cycle. *) +let () = + if !TaskHelper.rbac_assert_permission_fn = None then + TaskHelper.rbac_assert_permission_fn := Some assert_permission + let has_permission_name ~__context ~permission = let session_id = get_session_of_context ~__context ~permission in is_access_allowed ~__context ~session_id ~permission From 40834941413714b4add385844fc51f1f77feb1ba Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 21 Nov 2024 17:59:16 +0000 Subject: [PATCH 076/121] CA-402263, xapi_sr_operatrions: don't include all API storage operations in all_ops Otherwise the allowed_operations field in SRs can contain newly added operations, which can break cross-pool migrations when using `xe` Partially reverts 723a4983ab1555b24222b417350cddebbf6dc1f5 Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_sr_operations.ml | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_sr_operations.ml b/ocaml/xapi/xapi_sr_operations.ml index 56f4c466ce6..eef09a7d9eb 100644 --- a/ocaml/xapi/xapi_sr_operations.ml +++ b/ocaml/xapi/xapi_sr_operations.ml @@ -26,7 +26,32 @@ open Client open Record_util -let all_ops = API.storage_operations__all +(* This is a subset of the API enumeration. Not all values can be included + because older versions which don't have them are unable to migrate VMs to the + the versions that have new fields in allowed operations *) +let all_ops : API.storage_operations_set = + [ + `scan + ; `destroy + ; `forget + ; `plug + ; `unplug + ; `vdi_create + ; `vdi_destroy + ; `vdi_resize + ; `vdi_clone + ; `vdi_snapshot + ; `vdi_mirror + ; `vdi_enable_cbt + ; `vdi_disable_cbt + ; `vdi_data_destroy + ; `vdi_list_changed_blocks + ; `vdi_set_on_boot + ; `vdi_introduce + ; `update + ; `pbd_create + ; `pbd_destroy + ] (* This list comes from https://github.com/xenserver/xen-api/blob/tampa-bugfix/ocaml/xapi/xapi_sr_operations.ml#L36-L38 *) let all_rpu_ops : API.storage_operations_set = From 3f59ae15f24aaa27b3e9313bcfda856215e43262 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Mon, 25 Nov 2024 12:00:00 +0100 Subject: [PATCH 077/121] NUMA docs: Fix typos and extend the intro for the best-effort mode Signed-off-by: Bernhard Kaindl --- doc/content/toolstack/features/NUMA/index.md | 74 +++++++++++++------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/doc/content/toolstack/features/NUMA/index.md b/doc/content/toolstack/features/NUMA/index.md index ee7f52c98fe..5f4d3fe41e8 100644 --- a/doc/content/toolstack/features/NUMA/index.md +++ b/doc/content/toolstack/features/NUMA/index.md @@ -49,7 +49,7 @@ There is also I/O NUMA where a cost is similarly associated to where a PCIe is p NUMA does have advantages though: if each node accesses only its local memory, then each node can independently achieve maximum throughput. -For best performance we should: +For best performance, we should: - minimize the amount of interconnect bandwidth we are using - run code that accesses memory allocated on the closest NUMA node - maximize the number of NUMA nodes that we use in the system as a whole @@ -62,39 +62,59 @@ The Xen scheduler supports 2 kinds of constraints: * hard pinning: a vCPU may only run on the specified set of pCPUs and nowhere else * soft pinning: a vCPU is *preferably* run on the specified set of pCPUs, but if they are all busy then it may run elsewhere -The former is useful if you want strict separation, but it can potentially leave part of the system idle while another part is bottlenecked with lots of vCPUs all competing for the same limited set of pCPUs. +Hard pinning can be used to partition the system. But, it can potentially leave part of the system idle while another part is bottlenecked by many vCPUs competing for the same limited set of pCPUs. -Xen does not migrate workloads between NUMA nodes on its own (the Linux kernel does), although it is possible to achieve a similar effect with explicit migration. -However migration introduces additional delays and is best avoided for entire VMs. +Xen does not migrate workloads between NUMA nodes on its own (the Linux kernel can). Although, it is possible to achieve a similar effect with explicit migration. +However, migration introduces additional delays and is best avoided for entire VMs. -The latter (soft pinning) is preferred: running a workload now, even on a potentially suboptimal pCPU (higher NUMA latency) is still better than not running it at all and waiting until a pCPU is freed up. +Therefore, soft pinning is preferred: Running on a potentially suboptimal pCPU that uses remote memory could still be better than not running it at all until a pCPU is free to run it. -Xen will also allocate memory for the VM according to the vCPU (soft) pinning: if the vCPUs are pinned only to NUMA nodes A and B, then it will allocate the VM's memory from NUMA nodes A and B (in a round-robin way, resulting in interleaving). +Xen will also allocate memory for the VM according to the vCPU (soft) pinning: If the vCPUs are pinned to NUMA nodes A and B, Xen allocates memory from NUMA nodes A and B in a round-robin way, resulting in interleaving. -By default (no pinning) it will interleave memory from all NUMA nodes, which provides average performance, but individual tasks' performance may be significantly higher or lower depending on which NUMA node the application may have "landed" on. -Furthermore restarting processes will speed them up or slow them down as address space randomization picks different memory regions inside a VM. +### Current default: No vCPU pinning + +By default, when no vCPU pinning is used, Xen interleaves memory from all NUMA nodes. This averages the memory performance, but individual tasks' performance may be significantly higher or lower depending on which NUMA node the application may have "landed" on. +As a result, restarting processes will speed them up or slow them down as address space randomization picks different memory regions inside a VM. + +This uses the memory bandwidth of all memory controllers and distributes the load across all nodes. +However, the memory latency is higher as the NUMA interconnects are used for most memory accesses and vCPU synchronization within the Domains. Note that this is not the worst case: the worst case would be for memory to be allocated on one NUMA node, but the vCPU always running on the furthest away NUMA node. ## Best effort NUMA-aware memory allocation for VMs -By default Xen stripes the VM's memory accross all NUMA nodes of the host, which means that every VM has to go through all the interconnects. + +### Summary + +The best-effort mode attempts to fit Domains into NUMA nodes and to balance memory usage. +It soft-pins Domains on the NUMA node with the most available memory when adding the Domain. +Memory is currently allocated when booting the VM (or while constructing the resuming VM). + +Parallel boot issue: Memory is not pre-allocated on creation, but allocated during boot. +The result is that parallel VM creation and boot can exhaust the memory of NUMA nodes. + +### Goals + +By default, Xen stripes the VM's memory across all NUMA nodes of the host, which means that every VM has to go through all the interconnects. The goal here is to find a better allocation than the default, not necessarily an optimal allocation. -An optimal allocation would require knowing what VMs you would start/create in the future, and planning across hosts too. +An optimal allocation would require knowing what VMs you would start/create in the future, and planning across hosts. +This allows the host to use all NUMA nodes to take advantage of the full memory bandwidth available on the pool hosts. -Overall we want to balance the VMs across NUMA nodes, such that we use all NUMA nodes to take advantage of the maximum memory bandwidth available on the system. +Overall, we want to balance the VMs across NUMA nodes, such that we use all NUMA nodes to take advantage of the maximum memory bandwidth available on the system. For now this proposed balancing will be done only by balancing memory usage: always heuristically allocating VMs on the NUMA node that has the most available memory. -Note that this allocation has a race condition for now when multiple VMs are booted in parallel, because we don't wait until Xen has constructed the domain for each one (that'd serialize domain construction, which is currently parallel). +For now, this allocation has a race condition: This happens when multiple VMs are booted in parallel, because we don't wait until Xen has constructed the domain for each one (that'd serialize domain construction, which is currently parallel). This may be improved in the future by having an API to query Xen where it has allocated the memory, and to explicitly ask it to place memory on a given NUMA node (instead of best_effort). If a VM doesn't fit into a single node then it is not so clear what the best approach is. One criteria to consider is minimizing the NUMA distance between the nodes chosen for the VM. -Large NUMA systems may not be fully connected in a mesh requiring multiple hops to each a node, or even have assymetric links, or links with different bitwidth. -These tradeoff should be approximatively reflected in the ACPI SLIT tables, as a matrix of distances between nodes. +Large NUMA systems may not be fully connected in a mesh requiring multiple hops to each a node, or even have asymmetric links, or links with different bandwidth. +The specific NUMA topology is provided by the ACPI SLIT table as the matrix of distances between nodes. It is possible that 3 NUMA nodes have a smaller average/maximum distance than 2, so we need to consider all possibilities. For N nodes there would be 2^N possibilities, so [Topology.NUMA.candidates] limits the number of choices to 65520+N (full set of 2^N possibilities for 16 NUMA nodes, and a reduced set of choices for larger systems). +### Implementation + [Topology.NUMA.candidates] is a sorted sequence of node sets, in ascending order of maximum/average distances. Once we've eliminated the candidates not suitable for this VM (that do not have enough total memory/pCPUs) we are left with a monotonically increasing sequence of nodes. There are still multiple possibilities with same average distance. @@ -110,19 +130,19 @@ See page 13 in [^AMD_numa] for a diagram of an AMD Opteron 6272 system. * Booting multiple VMs in parallel will result in potentially allocating both on the same NUMA node (race condition) * When we're about to run out of host memory we'll fall back to striping memory again, but the soft affinity mask won't reflect that (this needs an API to query Xen on where it has actually placed the VM, so we can fix up the mask accordingly) -* XAPI is not aware of NUMA balancing across a pool, and choses hosts purely based on total amount of free memory, even if a better NUMA placement could be found on another host +* XAPI is not aware of NUMA balancing across a pool, and chooses NUMA nodes purely based on amount of free memory on the NUMA nodes of the host, even if a better NUMA placement could be found on another host * Very large (>16 NUMA nodes) systems may only explore a limited number of choices (fit into a single node vs fallback to full interleaving) * The exact VM placement is not yet controllable * Microbenchmarks with a single VM on a host show both performance improvements and regressions on memory bandwidth usage: previously a single VM may have been able to take advantage of the bandwidth of both NUMA nodes if it happened to allocate memory from the right places, whereas now it'll be forced to use just a single node. As soon as you have more than 1 VM that is busy on a system enabling NUMA balancing should almost always be an improvement though. -* it is not supported to combine hard vCPU masks with soft affinity: if hard affinities are used then no NUMA scheduling is done by the toolstack and we obey exactly what the user has asked for with hard affinities. +* It is not supported to combine hard vCPU masks with soft affinity: if hard affinities are used, then no NUMA scheduling is done by the toolstack, and we obey exactly what the user has asked for with hard affinities. This shouldn't affect other VMs since the memory used by hard-pinned VMs will still be reflected in overall less memory available on individual NUMA nodes. * Corner case: the ACPI standard allows certain NUMA nodes to be unreachable (distance `0xFF` = `-1` in the Xen bindings). This is not supported and will cause an exception to be raised. If this is an issue in practice the NUMA matrix could be pre-filtered to contain only reachable nodes. - NUMA nodes with 0 CPUs *are* accepted (it can result from hard affinity pinnings) + NUMA nodes with 0 CPUs *are* accepted (it can result from hard affinity pinning) * NUMA balancing is not considered during HA planning -* Dom0 is a single VM that needs to communicate with all other VMs, so NUMA balancing is not applied to it (we'd need to expose NUMA topology to the Dom0 kernel so it can better allocate processes) +* Dom0 is a single VM that needs to communicate with all other VMs, so NUMA balancing is not applied to it (we'd need to expose NUMA topology to the Dom0 kernel, so it can better allocate processes) * IO NUMA is out of scope for now ## XAPI datamodel design @@ -139,9 +159,9 @@ Meaning of the policy: * `best_effort`: the algorithm described in this document, where soft pinning is used to achieve better balancing and lower latency * `default_policy`: when the admin hasn't expressed a preference -* Currently `default_policy` is treated as `any`, but the admin can change it, and then the system will remember that change across upgrades. +* Currently, `default_policy` is treated as `any`, but the admin can change it, and then the system will remember that change across upgrades. If we didn't have a `default_policy` then changing the "default" policy on an upgrade would be tricky: we either risk overriding an explicit choice of the admin, or existing installs cannot take advantage of the improved performance from `best_effort` -* Future XAPI versions may change `default_policy` to mean `best_effort`. +* Future, XAPI versions may change `default_policy` to mean `best_effort`. Admins can still override it to `any` if they wish on a host by host basis. It is not expected that users would have to change `best_effort`, unless they run very specific workloads, so a pool level control is not provided at this moment. @@ -149,7 +169,7 @@ It is not expected that users would have to change `best_effort`, unless they ru There is also no separate feature flag: this host flag acts as a feature flag that can be set through the API without restarting the toolstack. Although obviously only new VMs will benefit. -Debugging the allocator is done by running `xl vcpu-list` and investigating the soft pinning masks, and by analyzing xensource.log. +Debugging the allocator is done by running `xl vcpu-list` and investigating the soft pinning masks, and by analyzing `xensource.log`. ### Xenopsd implementation @@ -166,18 +186,18 @@ This avoids exponential state space explosion on very large systems (>16 NUMA no * [Topology.NUMA.choose] will choose one NUMA node deterministically, while trying to keep overall NUMA node usage balanced. * [Domain.numa_placement] builds a [NUMARequest] and uses the above [Topology] and [Softaffinity] functions to compute and apply a plan. -We used to have a `xenopsd.conf` configuration option to enable numa placement, for backwards compatibility this is still supported, but only if the admin hasn't set an explicit policy on the Host. +We used to have a `xenopsd.conf` configuration option to enable NUMA placement, for backwards compatibility this is still supported, but only if the admin hasn't set an explicit policy on the Host. It is best to remove the experimental `xenopsd.conf` entry though, a future version may completely drop it. Tests are in [test_topology.ml] which checks balancing properties and whether the plan has improved best/worst/average-case access times in a simulated test based on 2 predefined NUMA distance matrixes (one from Intel and one from an AMD system). ## Future work -* enable 'best_effort' mode by default once more testing has been done -* an API to query Xen where it has actually allocated the VM's memory. - Currently only an `xl debug-keys` interface exists which is not supported in production as it can result in killing the host via the watchdog, and is not a proper API, but a textual debug output with no stability guarantees. -* more host policies (e.g. `strict`). - Requires the XAPI pool scheduler to be NUMA aware and consider it as part of chosing hosts. +* Enable 'best_effort' mode by default once more testing has been done +* Add an API to query Xen for the NUMA node memory placement (where it has actually allocated the VM's memory). + Currently, only the `xl debug-keys` interface exists which is not supported in production as it can result in killing the host via the watchdog, and is not a proper API, but a textual debug output with no stability guarantees. +* More host policies, e.g. `strict`. + Requires the XAPI pool scheduler to be NUMA aware and consider it as part of choosing hosts. * VM level policy that can set a NUMA affinity index, mapped to a NUMA node modulo NUMA nodes available on the system (this is needed so that after migration we don't end up trying to allocate vCPUs to a non-existent NUMA node) * VM level anti-affinity rules for NUMA placement (can be achieved by setting unique NUMA affinity indexes) From fc6919e193ffbd2456e030d6eff89441a24e7d43 Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 13:58:51 +0100 Subject: [PATCH 078/121] CP-51772: Remove traceparent from Http.Request This is a breaking change to http-lib that removes the special treatment of traceparent from the Http.Request module record. In order to more easily include other tracing-related headers in future we propose that we create an aggregate data structure (named something like "trace context") which can (optionally) contain traceparent, baggage, tracestate, etc. and then we "inject" those into the additional_headers of the request we wish to endow with tracing-related information. On the receiving end, a similar "extract" function will perform the dual operation, extracting tracing context from the request: Generally, for some carrier, we intend to provide two operations: val inject : context -> carrier -> carrier val extract : carrier -> context The "carrier" is whatever transport is being used to propagate tracing across service boundaries. In our case, HTTP requests. Signed-off-by: Colin James --- ocaml/libs/http-lib/http.ml | 67 +++++++++++++--------------- ocaml/libs/http-lib/http.mli | 4 -- ocaml/libs/http-lib/http_svr.ml | 2 - ocaml/libs/http-lib/xmlrpc_client.ml | 15 ++++--- 4 files changed, 38 insertions(+), 50 deletions(-) diff --git a/ocaml/libs/http-lib/http.ml b/ocaml/libs/http-lib/http.ml index a19745576ce..75f8a5880b7 100644 --- a/ocaml/libs/http-lib/http.ml +++ b/ocaml/libs/http-lib/http.ml @@ -132,8 +132,6 @@ module Hdr = struct let location = "location" - let traceparent = "traceparent" - let hsts = "strict-transport-security" end @@ -522,7 +520,6 @@ module Request = struct ; mutable close: bool ; additional_headers: (string * string) list ; body: string option - ; traceparent: string option } [@@deriving rpc] @@ -546,12 +543,11 @@ module Request = struct ; close= true ; additional_headers= [] ; body= None - ; traceparent= None } let make ?(frame = false) ?(version = "1.1") ?(keep_alive = true) ?accept ?cookie ?length ?auth ?subtask_of ?body ?(headers = []) ?content_type - ?host ?(query = []) ?traceparent ~user_agent meth path = + ?host ?(query = []) ~user_agent meth path = { empty with version @@ -570,7 +566,6 @@ module Request = struct ; body ; accept ; query - ; traceparent } let get_version x = x.version @@ -582,8 +577,7 @@ module Request = struct Printf.sprintf "{ frame = %b; method = %s; uri = %s; query = [ %s ]; content_length = [ \ %s ]; transfer encoding = %s; version = %s; cookie = [ %s ]; task = %s; \ - subtask_of = %s; content-type = %s; host = %s; user_agent = %s; \ - traceparent = %s }" + subtask_of = %s; content-type = %s; host = %s; user_agent = %s; }" x.frame (string_of_method_t x.m) x.uri (kvpairs x.query) (Option.fold ~none:"" ~some:Int64.to_string x.content_length) (Option.value ~default:"" x.transfer_encoding) @@ -593,7 +587,6 @@ module Request = struct (Option.value ~default:"" x.content_type) (Option.value ~default:"" x.host) (Option.value ~default:"" x.user_agent) - (Option.value ~default:"" x.traceparent) let to_header_list x = let kvpairs x = @@ -643,11 +636,6 @@ module Request = struct ~some:(fun x -> [Hdr.user_agent ^ ": " ^ x]) x.user_agent in - let traceparent = - Option.fold ~none:[] - ~some:(fun x -> [Hdr.traceparent ^ ": " ^ x]) - x.traceparent - in let close = [(Hdr.connection ^ ": " ^ if x.close then "close" else "keep-alive")] in @@ -665,7 +653,6 @@ module Request = struct @ content_type @ host @ user_agent - @ traceparent @ close @ List.map (fun (k, v) -> k ^ ": " ^ v) x.additional_headers @@ -688,28 +675,34 @@ module Request = struct let frame_header = if x.frame then make_frame_header headers else "" in frame_header ^ headers ^ body - let traceparent_of req = - let open Tracing in - let ( let* ) = Option.bind in - let* traceparent = req.traceparent in - let* span_context = SpanContext.of_traceparent traceparent in - let span = Tracer.span_of_span_context span_context req.uri in - Some span - - let with_tracing ?attributes ~name req f = - let open Tracing in - let parent = traceparent_of req in - with_child_trace ?attributes parent ~name (fun (span : Span.t option) -> - match span with - | Some span -> - let traceparent = - Some (span |> Span.get_context |> SpanContext.to_traceparent) - in - let req = {req with traceparent} in - f req - | None -> - f req - ) + (* let traceparent_of req = *) + (* let open Tracing in *) + (* let ( let* ) = Option.bind in *) + (* let* traceparent = req.traceparent in *) + (* let* span_context = SpanContext.of_traceparent traceparent in *) + (* let span = Tracer.span_of_span_context span_context req.uri in *) + (* Some span *) + + (* let with_tracing ?attributes ~name req f = *) + (* let open Tracing in *) + (* let parent = traceparent_of req in *) + (* with_child_trace ?attributes parent ~name (fun (span : Span.t option) -> *) + (* match span with *) + (* | Some span -> *) + (* let traceparent = *) + (* Some (span |> Span.get_context |> SpanContext.to_traceparent) *) + (* in *) + (* let req = {req with traceparent} in *) + (* f req *) + (* | None -> *) + (* f req *) + (* ) *) + + let traceparent_of _ = None + + let with_tracing ?attributes ~name = + ignore (attributes, name) ; + Fun.flip ( @@ ) end module Response = struct diff --git a/ocaml/libs/http-lib/http.mli b/ocaml/libs/http-lib/http.mli index 3fbae8e4c6f..66557a76fe9 100644 --- a/ocaml/libs/http-lib/http.mli +++ b/ocaml/libs/http-lib/http.mli @@ -86,7 +86,6 @@ module Request : sig ; mutable close: bool ; additional_headers: (string * string) list ; body: string option - ; traceparent: string option } val rpc_of_t : t -> Rpc.t @@ -109,7 +108,6 @@ module Request : sig -> ?content_type:string -> ?host:string -> ?query:(string * string) list - -> ?traceparent:string -> user_agent:string -> method_t -> string @@ -229,8 +227,6 @@ module Hdr : sig val location : string - val traceparent : string - val hsts : string (** Header used for HTTP Strict Transport Security *) end diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 54a8b96ba73..2240d811797 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -409,8 +409,6 @@ let read_request_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length fd = {req with host= Some v} | k when k = Http.Hdr.user_agent -> {req with user_agent= Some v} - | k when k = Http.Hdr.traceparent -> - {req with traceparent= Some v} | k when k = Http.Hdr.connection && lowercase v = "close" -> {req with close= true} | k diff --git a/ocaml/libs/http-lib/xmlrpc_client.ml b/ocaml/libs/http-lib/xmlrpc_client.ml index 5bf43b0268c..f7390f8fb46 100644 --- a/ocaml/libs/http-lib/xmlrpc_client.ml +++ b/ocaml/libs/http-lib/xmlrpc_client.ml @@ -50,15 +50,16 @@ let connect ?session_id ?task_id ?subtask_of path = let xmlrpc ?frame ?version ?keep_alive ?task_id ?cookie ?length ?auth ?subtask_of ?query ?body ?(tracing = None) path = - let traceparent = - let open Tracing in - Option.map - (fun span -> Span.get_context span |> SpanContext.to_traceparent) - tracing - in + (* let traceparent = *) + (* let open Tracing in *) + (* Option.map *) + (* (fun span -> Span.get_context span |> SpanContext.to_traceparent) *) + (* tracing *) + (* in *) + ignore tracing ; let headers = Option.map (fun x -> [(Http.Hdr.task_id, x)]) task_id in Http.Request.make ~user_agent ?frame ?version ?keep_alive ?cookie ?headers - ?length ?auth ?subtask_of ?query ?body ?traceparent Http.Post path + ?length ?auth ?subtask_of ?query ?body Http.Post path (** Thrown when ECONNRESET is caught which suggests the remote crashed or restarted *) exception Connection_reset From fe66bc4f8e5f207c05603b5a711866361db76468 Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 14:13:32 +0100 Subject: [PATCH 079/121] CP-51772: Remove external usage of traceparent Temporarily remove parts of the code that worked with requests directly, manipulating (or using) traceparents. Signed-off-by: Colin James --- ocaml/xapi-cli-server/xapi_cli.ml | 13 +++++++------ ocaml/xapi/context.ml | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi-cli-server/xapi_cli.ml b/ocaml/xapi-cli-server/xapi_cli.ml index bc2389d4c44..57f09e4aafc 100644 --- a/ocaml/xapi-cli-server/xapi_cli.ml +++ b/ocaml/xapi-cli-server/xapi_cli.ml @@ -190,12 +190,13 @@ let uninteresting_cmd_postfixes = ["help"; "-get"; "-list"] let exec_command req cmd s session args = let params = get_params cmd in let tracing = - Option.bind - Http.Request.(req.traceparent) - Tracing.SpanContext.of_traceparent - |> Option.map (fun span_context -> - Tracing.Tracer.span_of_span_context span_context (get_cmdname cmd) - ) + (* Option.bind *) + (* Http.Request.(req.traceparent) *) + (* Tracing.SpanContext.of_traceparent *) + (* |> Option.map (fun span_context -> *) + (* Tracing.Tracer.span_of_span_context span_context (get_cmdname cmd) *) + (* ) *) + None in let minimal = if List.mem_assoc "minimal" params then diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index 41faa238bd5..f2cb485e770 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -221,8 +221,8 @@ let parent_of_origin (origin : origin) span_name = let open Tracing in let ( let* ) = Option.bind in match origin with - | Http (req, _) -> - let* traceparent = req.Http.Request.traceparent in + | Http (_req, _) -> + let* traceparent = (* req.Http.Request.traceparent *) None in let* span_context = SpanContext.of_traceparent traceparent in let span = Tracer.span_of_span_context span_context span_name in Some span From 7b95bd6725a267833e142a528c3401db8e6ad88b Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 15:56:41 +0100 Subject: [PATCH 080/121] CP-51772: Add TraceContext to Tracing Introduces a more general trace context record that will encapsulate the metadata of tracing that can be propagated across service boundaries. Signed-off-by: Colin James --- ocaml/libs/tracing/tracing.ml | 25 ++++++++++++++++++++++--- ocaml/libs/tracing/tracing.mli | 18 ++++++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 3f521f6f29c..cad2a8b2069 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -209,15 +209,34 @@ end = struct let compare = Int64.compare end +(* The context of a trace that can be propagated across service boundaries. *) +module TraceContext = struct + type traceparent = string + + type baggage = (string * string) list + + type t = {traceparent: traceparent option; baggage: baggage option} + + let empty = {traceparent= None; baggage= None} + + let with_traceparent traceparent ctx = {ctx with traceparent} + + let with_baggage baggage ctx = {ctx with baggage} + + let traceparent_of ctx = ctx.traceparent + + let baggage_of ctx = ctx.baggage +end + module SpanContext = struct type t = {trace_id: Trace_id.t; span_id: Span_id.t} [@@deriving rpcty] let context trace_id span_id = {trace_id; span_id} let to_traceparent t = - Printf.sprintf "00-%s-%s-01" - (Trace_id.to_string t.trace_id) - (Span_id.to_string t.span_id) + let tid = Trace_id.to_string t.trace_id in + let sid = Span_id.to_string t.span_id in + Printf.sprintf "00-%s-%s-01" tid sid let of_traceparent traceparent = let elements = String.split_on_char '-' traceparent in diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index 18b248cc881..5163a166002 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -78,6 +78,24 @@ module Trace_id : sig val to_string : t -> string end +module TraceContext : sig + type t + + val empty : t + + type traceparent = string + + type baggage = (string * string) list + + val with_traceparent : traceparent option -> t -> t + + val with_baggage : baggage option -> t -> t + + val traceparent_of : t -> traceparent option + + val baggage_of : t -> baggage option +end + module SpanContext : sig type t From e14981737948416c872caf6c99abf0959881f1cb Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 15:58:10 +0100 Subject: [PATCH 081/121] CP-51772: Add Http Request Propagator In a new library, tracing_propagator, simple injection and extraction routines are provided for rewriting HTTP requests (Http.Request) to have trace-related information. It must be a new library as a cycle would be introduced if we attempted to make tracing depend on http-lib (as http-lib depends on tracing). Signed-off-by: Colin James --- ocaml/libs/tracing/dune | 5 ++ ocaml/libs/tracing/propagator.ml | 108 ++++++++++++++++++++++++++++++ ocaml/libs/tracing/propagator.mli | 23 +++++++ 3 files changed, 136 insertions(+) create mode 100644 ocaml/libs/tracing/propagator.ml create mode 100644 ocaml/libs/tracing/propagator.mli diff --git a/ocaml/libs/tracing/dune b/ocaml/libs/tracing/dune index 8c53962c579..71e5c7b7473 100644 --- a/ocaml/libs/tracing/dune +++ b/ocaml/libs/tracing/dune @@ -28,6 +28,11 @@ (preprocess (pps ppx_deriving_rpc))) +(library + (name tracing_propagator) + (modules propagator) + (libraries astring http-lib tracing)) + (test (name test_tracing) (modules test_tracing) diff --git a/ocaml/libs/tracing/propagator.ml b/ocaml/libs/tracing/propagator.ml new file mode 100644 index 00000000000..13c48bafce3 --- /dev/null +++ b/ocaml/libs/tracing/propagator.ml @@ -0,0 +1,108 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module type S = sig + type carrier + + val inject_into : Tracing.TraceContext.t -> carrier -> carrier + + val extract_from : carrier -> Tracing.TraceContext.t +end + +let ( let* ) = Option.bind + +let ( >> ) f g x = g (f x) + +let maybe f = function Some _ as o -> f o | _ -> Fun.id + +let[@tail_mod_cons] rec filter_append p xs ys = + match xs with + | [] -> + ys + | x :: xs when p x -> + x :: filter_append p xs ys + | _ :: xs -> + filter_append p xs ys + +module Http = struct + type carrier = Http.Request.t + + open struct + let hdr_traceparent = "traceparent" + + let hdr_baggage = "baggage" + end + + let alloc_assoc k kvs = + List.filter_map + (fun (key, value) -> if key = k then Some value else None) + kvs + |> function + | [] -> + None + | xs -> + Some xs + + let parse = + let open Astring.String in + let trim_pair (key, value) = (trim key, trim value) in + cuts ~sep:";" + >> List.map (cut ~sep:"=" >> Option.map trim_pair) + >> List.filter_map Fun.id + + let inject_into ctx req = + let open Tracing in + let traceparent = (hdr_traceparent, TraceContext.traceparent_of ctx) in + let baggage = + let encoded = + let encode = + List.map (fun (k, v) -> Printf.sprintf "%s=%s" k v) + >> String.concat ";" + in + TraceContext.baggage_of ctx |> Option.map encode + in + (hdr_baggage, encoded) + in + let entries = [traceparent; baggage] in + let filter_entries entries = + let tbl = Hashtbl.create 47 in + let record (k, v) = + match v with + | Some v -> + Hashtbl.replace tbl k () ; + Some (k, v) + | _ -> + None + in + let entries = List.filter_map record entries in + (entries, fst >> Hashtbl.mem tbl) + in + let entries, to_replace = filter_entries entries in + let headers = req.Http.Request.additional_headers in + let additional_headers = + filter_append (Fun.negate to_replace) headers entries + in + {req with additional_headers} + + let extract_from req = + let open Tracing in + let headers = req.Http.Request.additional_headers in + let traceparent = List.assoc_opt hdr_traceparent headers in + let baggage = + let* all = alloc_assoc hdr_baggage headers in + Some (List.concat_map parse all) + in + let open TraceContext in + empty |> maybe with_traceparent traceparent |> maybe with_baggage baggage +end diff --git a/ocaml/libs/tracing/propagator.mli b/ocaml/libs/tracing/propagator.mli new file mode 100644 index 00000000000..36780d14c86 --- /dev/null +++ b/ocaml/libs/tracing/propagator.mli @@ -0,0 +1,23 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module type S = sig + type carrier + + val inject_into : Tracing.TraceContext.t -> carrier -> carrier + + val extract_from : carrier -> Tracing.TraceContext.t +end + +module Http : S with type carrier = Http.Request.t From 0d996b3e079d7affc9875ae352816e11e22309dd Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 16:24:10 +0100 Subject: [PATCH 082/121] CP-51772: Extract traceparent back out Restore code that was previously disabled. Signed-off-by: Colin James --- ocaml/libs/tracing/dune | 1 + ocaml/xapi-cli-server/dune | 1 + ocaml/xapi-cli-server/xapi_cli.ml | 20 +++++++++----------- ocaml/xapi/context.ml | 5 +++-- ocaml/xapi/dune | 1 + 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/ocaml/libs/tracing/dune b/ocaml/libs/tracing/dune index 71e5c7b7473..cf28881793a 100644 --- a/ocaml/libs/tracing/dune +++ b/ocaml/libs/tracing/dune @@ -31,6 +31,7 @@ (library (name tracing_propagator) (modules propagator) + (wrapped false) (libraries astring http-lib tracing)) (test diff --git a/ocaml/xapi-cli-server/dune b/ocaml/xapi-cli-server/dune index c1a8269dbb6..2c297d1da9f 100644 --- a/ocaml/xapi-cli-server/dune +++ b/ocaml/xapi-cli-server/dune @@ -42,6 +42,7 @@ xapi-stdext-threads xapi-stdext-unix xapi-tracing + tracing_propagator xmlm xml-light2 ) diff --git a/ocaml/xapi-cli-server/xapi_cli.ml b/ocaml/xapi-cli-server/xapi_cli.ml index 57f09e4aafc..5ea0f949210 100644 --- a/ocaml/xapi-cli-server/xapi_cli.ml +++ b/ocaml/xapi-cli-server/xapi_cli.ml @@ -190,19 +190,17 @@ let uninteresting_cmd_postfixes = ["help"; "-get"; "-list"] let exec_command req cmd s session args = let params = get_params cmd in let tracing = - (* Option.bind *) - (* Http.Request.(req.traceparent) *) - (* Tracing.SpanContext.of_traceparent *) - (* |> Option.map (fun span_context -> *) - (* Tracing.Tracer.span_of_span_context span_context (get_cmdname cmd) *) - (* ) *) - None + let open Tracing in + let ( let* ) = Option.bind in + let context = Propagator.Http.extract_from req in + let* traceparent = TraceContext.traceparent_of context in + let* span_context = SpanContext.of_traceparent traceparent in + let span = Tracer.span_of_span_context span_context (get_cmdname cmd) in + Some span in let minimal = - if List.mem_assoc "minimal" params then - bool_of_string (List.assoc "minimal" params) - else - false + List.assoc_opt "minimal" params + |> Option.fold ~none:false ~some:bool_of_string in let u = try List.assoc "username" params with _ -> "" in let p = try List.assoc "password" params with _ -> "" in diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index f2cb485e770..efb6ee61318 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -221,8 +221,9 @@ let parent_of_origin (origin : origin) span_name = let open Tracing in let ( let* ) = Option.bind in match origin with - | Http (_req, _) -> - let* traceparent = (* req.Http.Request.traceparent *) None in + | Http (req, _) -> + let context = Propagator.Http.extract_from req in + let* traceparent = TraceContext.traceparent_of context in let* span_context = SpanContext.of_traceparent traceparent in let span = Tracer.span_of_span_context span_context span_name in Some span diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 9f3e5f825fa..dbcb9eb284f 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -68,6 +68,7 @@ xapi_database mtime tracing + tracing_propagator uuid rpclib.core threads.posix From 069ca95e9ee84031666c2d9c679e80553e4b7ad1 Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 17:34:43 +0100 Subject: [PATCH 083/121] CP-51772: Remove tracing dependency from http-lib The tracing library is removed as a dependency from http-lib. It is still a dependency of http-svr. This is currently a breaking change. The plan is to: - Factor out - and generalise - the helpers defined in place of removing the functions Http.Request.traceparent_of and Http.Request.with_tracing. The comment describes how this should be done: consolidate the impl into the tracing library itself and then provide inject and extract routines to do it for arbitrary carriers. - Rewrite xmlrpc to accept an arbitrary request rewriter function such as (Http.Request.t -> Http.Request.t) before it dispatches. Then, tracing can be injected in by the user. Signed-off-by: Colin James --- ocaml/libs/http-lib/dune | 2 +- ocaml/libs/http-lib/http.ml | 29 ---------- ocaml/libs/http-lib/http.mli | 5 -- ocaml/libs/http-lib/http_svr.ml | 81 +++++++++++++++++++++++++-- ocaml/libs/http-lib/xmlrpc_client.ml | 9 +-- ocaml/libs/http-lib/xmlrpc_client.mli | 1 - 6 files changed, 79 insertions(+), 48 deletions(-) diff --git a/ocaml/libs/http-lib/dune b/ocaml/libs/http-lib/dune index 2990fda2453..42286576aa4 100644 --- a/ocaml/libs/http-lib/dune +++ b/ocaml/libs/http-lib/dune @@ -30,7 +30,6 @@ xapi-stdext-threads xapi-stdext-unix xml-light2 - tracing ) ) @@ -46,6 +45,7 @@ polly threads.posix tracing + tracing_propagator uri xapi-log xapi-stdext-pervasives diff --git a/ocaml/libs/http-lib/http.ml b/ocaml/libs/http-lib/http.ml index 75f8a5880b7..554f3ed6217 100644 --- a/ocaml/libs/http-lib/http.ml +++ b/ocaml/libs/http-lib/http.ml @@ -674,35 +674,6 @@ module Request = struct let headers, body = to_headers_and_body x in let frame_header = if x.frame then make_frame_header headers else "" in frame_header ^ headers ^ body - - (* let traceparent_of req = *) - (* let open Tracing in *) - (* let ( let* ) = Option.bind in *) - (* let* traceparent = req.traceparent in *) - (* let* span_context = SpanContext.of_traceparent traceparent in *) - (* let span = Tracer.span_of_span_context span_context req.uri in *) - (* Some span *) - - (* let with_tracing ?attributes ~name req f = *) - (* let open Tracing in *) - (* let parent = traceparent_of req in *) - (* with_child_trace ?attributes parent ~name (fun (span : Span.t option) -> *) - (* match span with *) - (* | Some span -> *) - (* let traceparent = *) - (* Some (span |> Span.get_context |> SpanContext.to_traceparent) *) - (* in *) - (* let req = {req with traceparent} in *) - (* f req *) - (* | None -> *) - (* f req *) - (* ) *) - - let traceparent_of _ = None - - let with_tracing ?attributes ~name = - ignore (attributes, name) ; - Fun.flip ( @@ ) end module Response = struct diff --git a/ocaml/libs/http-lib/http.mli b/ocaml/libs/http-lib/http.mli index 66557a76fe9..13b8bcaa4fa 100644 --- a/ocaml/libs/http-lib/http.mli +++ b/ocaml/libs/http-lib/http.mli @@ -126,11 +126,6 @@ module Request : sig val to_wire_string : t -> string (** [to_wire_string t] returns a string which could be sent to a server *) - - val traceparent_of : t -> Tracing.Span.t option - - val with_tracing : - ?attributes:(string * string) list -> name:string -> t -> (t -> 'a) -> 'a end (** Parsed form of the HTTP response *) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 2240d811797..68ef0197d54 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -99,9 +99,82 @@ let response_of_request req hdrs = ~headers:(connection :: cache :: hdrs) "200" "OK" +(* These functions have been factored out of Http.Request. + The carrier protocol should generally have no knowledge of tracing. *) +module type HelperS = sig + val traceparent_of : Http.Request.t -> Tracing.Span.t option + + val with_tracing : + ?attributes:(string * string) list + -> name:string + -> Http.Request.t + -> (Http.Request.t -> 'a) + -> 'a +end + +module Helper : HelperS = struct + (* This code can probably be relocated into the tracing library + with the following generalisation: + + val with_tracing : + ?attributes:(string * string) list + -> name:string + -> Http.Request.t + -> (Http.Request.t -> 'a) + -> 'a + + can become: + + val with_tracing : + ?attributes:(string * string) list + -> inject: (TraceContext.t -> 'carrier -> 'carrier) + -> extract: ('carrier -> TraceContext.t) + -> name:string + -> 'carrier + -> ('carrier -> 'a) + -> 'a + + Can possibly pass a first-class module, but there may be a dependency cycle, + so functions are a more universal interface. + *) + + let traceparent_of req = + (* TODO: The extracted TraceContext must be propagated through the + spans. Simple approach is to add it to the SpanContext, and then + inherit it properly (substituting/creating only identity-related). *) + let open Tracing in + let ( let* ) = Option.bind in + let trace_context = Propagator.Http.extract_from req in + let* parent = TraceContext.traceparent_of trace_context in + let* span_context = SpanContext.of_traceparent parent in + Some (Tracer.span_of_span_context span_context req.uri) + + let with_tracing ?attributes ~name req f = + ignore (attributes, name) ; + let open Tracing in + let trace_context = Propagator.Http.extract_from req in + let parent = traceparent_of req in + let continue_with_child = function + | Some child -> + (* Here, "traceparent" is terminology for the [version-trace_id-span_id-flags] structure. + Therefore, the purpose of the code below is to decorate the request with the derived (child) span's ID. + This function only gets called if parent is not None. *) + let span_context = Span.get_context child in + let traceparent = SpanContext.to_traceparent span_context in + let trace_context' = + TraceContext.with_traceparent (Some traceparent) trace_context + in + let req' = Propagator.Http.inject_into trace_context' req in + f req' + | _ -> + f req + in + with_child_trace ?attributes parent ~name continue_with_child +end + let response_fct req ?(hdrs = []) s (response_length : int64) (write_response_to_fd_fn : Unix.file_descr -> unit) = - let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in + let@ req = Helper.with_tracing ~name:__FUNCTION__ req in let res = { (response_of_request req hdrs) with @@ -445,7 +518,7 @@ let read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length fd = let r, proxy = read_request_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length fd in - let parent_span = Http.Request.traceparent_of r in + let parent_span = Helper.traceparent_of r in let loop_span = Option.fold ~none:None ~some:(fun span -> @@ -489,8 +562,8 @@ let read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length fd = (None, None) let handle_one (x : 'a Server.t) ss context req = - let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in - let span = Http.Request.traceparent_of req in + let@ req = Helper.with_tracing ~name:__FUNCTION__ req in + let span = Helper.traceparent_of req in let finished = ref false in try D.debug "Request %s" (Http.Request.to_string req) ; diff --git a/ocaml/libs/http-lib/xmlrpc_client.ml b/ocaml/libs/http-lib/xmlrpc_client.ml index f7390f8fb46..e23ccd69f73 100644 --- a/ocaml/libs/http-lib/xmlrpc_client.ml +++ b/ocaml/libs/http-lib/xmlrpc_client.ml @@ -49,14 +49,7 @@ let connect ?session_id ?task_id ?subtask_of path = ?subtask_of Http.Connect path let xmlrpc ?frame ?version ?keep_alive ?task_id ?cookie ?length ?auth - ?subtask_of ?query ?body ?(tracing = None) path = - (* let traceparent = *) - (* let open Tracing in *) - (* Option.map *) - (* (fun span -> Span.get_context span |> SpanContext.to_traceparent) *) - (* tracing *) - (* in *) - ignore tracing ; + ?subtask_of ?query ?body path = let headers = Option.map (fun x -> [(Http.Hdr.task_id, x)]) task_id in Http.Request.make ~user_agent ?frame ?version ?keep_alive ?cookie ?headers ?length ?auth ?subtask_of ?query ?body Http.Post path diff --git a/ocaml/libs/http-lib/xmlrpc_client.mli b/ocaml/libs/http-lib/xmlrpc_client.mli index 00d77b45937..52fb074db50 100644 --- a/ocaml/libs/http-lib/xmlrpc_client.mli +++ b/ocaml/libs/http-lib/xmlrpc_client.mli @@ -72,7 +72,6 @@ val xmlrpc : -> ?subtask_of:string -> ?query:(string * string) list -> ?body:string - -> ?tracing:Tracing.Span.t option -> string -> Http.Request.t (** Returns an HTTP.Request.t representing an XMLRPC request *) From c4962a31874b9009c57e3344354e31339aa078ee Mon Sep 17 00:00:00 2001 From: Colin James Date: Thu, 17 Oct 2024 09:20:51 +0100 Subject: [PATCH 084/121] CP-51772: Consolidate propagation into tracing lib Generalise the pattern used in http_svr.ml into a functor in the tracing library. In particular, so long as you can provide a way to name a carrier (to endow its child trace with), inject tracing context into a carrier, and extract trace context into a carrier, you can use this generic pattern to propagate tracing across arbitrary carriers (where the derived span is exported, but the incoming one is not). To this end, we factor it out of http_svr.ml and redefine the helper module used there in terms of one constructed from the new tracing propagator functor. The tracing_propagator library is used to provide definitions in the input module for defining trace propagation across HTTP headers. Signed-off-by: Colin James --- ocaml/libs/http-lib/http_svr.ml | 75 +++------------------------------ ocaml/libs/tracing/tracing.ml | 60 ++++++++++++++++++++++++++ ocaml/libs/tracing/tracing.mli | 30 +++++++++++++ 3 files changed, 95 insertions(+), 70 deletions(-) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 68ef0197d54..c4f7be5460c 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -99,77 +99,12 @@ let response_of_request req hdrs = ~headers:(connection :: cache :: hdrs) "200" "OK" -(* These functions have been factored out of Http.Request. - The carrier protocol should generally have no knowledge of tracing. *) -module type HelperS = sig - val traceparent_of : Http.Request.t -> Tracing.Span.t option - - val with_tracing : - ?attributes:(string * string) list - -> name:string - -> Http.Request.t - -> (Http.Request.t -> 'a) - -> 'a -end +module Helper = struct + include Tracing.Propagator.Make (struct + include Propagator.Http -module Helper : HelperS = struct - (* This code can probably be relocated into the tracing library - with the following generalisation: - - val with_tracing : - ?attributes:(string * string) list - -> name:string - -> Http.Request.t - -> (Http.Request.t -> 'a) - -> 'a - - can become: - - val with_tracing : - ?attributes:(string * string) list - -> inject: (TraceContext.t -> 'carrier -> 'carrier) - -> extract: ('carrier -> TraceContext.t) - -> name:string - -> 'carrier - -> ('carrier -> 'a) - -> 'a - - Can possibly pass a first-class module, but there may be a dependency cycle, - so functions are a more universal interface. - *) - - let traceparent_of req = - (* TODO: The extracted TraceContext must be propagated through the - spans. Simple approach is to add it to the SpanContext, and then - inherit it properly (substituting/creating only identity-related). *) - let open Tracing in - let ( let* ) = Option.bind in - let trace_context = Propagator.Http.extract_from req in - let* parent = TraceContext.traceparent_of trace_context in - let* span_context = SpanContext.of_traceparent parent in - Some (Tracer.span_of_span_context span_context req.uri) - - let with_tracing ?attributes ~name req f = - ignore (attributes, name) ; - let open Tracing in - let trace_context = Propagator.Http.extract_from req in - let parent = traceparent_of req in - let continue_with_child = function - | Some child -> - (* Here, "traceparent" is terminology for the [version-trace_id-span_id-flags] structure. - Therefore, the purpose of the code below is to decorate the request with the derived (child) span's ID. - This function only gets called if parent is not None. *) - let span_context = Span.get_context child in - let traceparent = SpanContext.to_traceparent span_context in - let trace_context' = - TraceContext.with_traceparent (Some traceparent) trace_context - in - let req' = Propagator.Http.inject_into trace_context' req in - f req' - | _ -> - f req - in - with_child_trace ?attributes parent ~name continue_with_child + let name_span req = req.Http.Request.uri + end) end let response_fct req ?(hdrs = []) s (response_length : int64) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index cad2a8b2069..d0adde3e776 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -788,3 +788,63 @@ module EnvHelpers = struct Some (span |> Span.get_context |> SpanContext.to_traceparent) |> of_traceparent end + +module Propagator = struct + module type S = sig + type carrier + + val traceparent_of : carrier -> Span.t option + + val with_tracing : + ?attributes:(string * string) list + -> name:string + -> carrier + -> (carrier -> 'a) + -> 'a + end + + module type PropS = sig + type carrier + + val inject_into : TraceContext.t -> carrier -> carrier + + val extract_from : carrier -> TraceContext.t + + val name_span : carrier -> string + end + + module Make (P : PropS) : S with type carrier = P.carrier = struct + type carrier = P.carrier + + let traceparent_of carrier = + (* TODO: The extracted TraceContext must be propagated through the + spans. Simple approach is to add it to the SpanContext, and then + inherit it properly (substituting/creating only identity-related). *) + let ( let* ) = Option.bind in + let trace_context = P.extract_from carrier in + let* parent = TraceContext.traceparent_of trace_context in + let* span_context = SpanContext.of_traceparent parent in + let name = P.name_span carrier in + Some (Tracer.span_of_span_context span_context name) + + let with_tracing ?attributes ~name carrier f = + let trace_context = P.extract_from carrier in + let parent = traceparent_of carrier in + let continue_with_child = function + | Some child -> + (* Here, "traceparent" is terminology for the [version-trace_id-span_id-flags] structure. + Therefore, the purpose of the code below is to decorate the request with the derived (child) span's ID. + This function only gets called if parent is not None. *) + let span_context = Span.get_context child in + let traceparent = SpanContext.to_traceparent span_context in + let trace_context' = + TraceContext.with_traceparent (Some traceparent) trace_context + in + let carrier' = P.inject_into trace_context' carrier in + f carrier' + | _ -> + f carrier + in + with_child_trace ?attributes parent ~name continue_with_child + end +end diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index 5163a166002..e2d8c8d947d 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -297,3 +297,33 @@ module EnvHelpers : sig If [span] is [None], it returns an empty list. *) end + +(** [Propagator] is a utility module for creating trace propagators over arbitrary carriers. *) +module Propagator : sig + module type S = sig + type carrier + + val traceparent_of : carrier -> Span.t option + (** [traceparent_of carrier] creates a span whose context is that encoded within the [carrier] input. + If there is no traceparent encoded within the carrier, the function returns [None]. *) + + val with_tracing : + ?attributes:(string * string) list + -> name:string + -> carrier + -> (carrier -> 'a) + -> 'a + end + + module type PropS = sig + type carrier + + val inject_into : TraceContext.t -> carrier -> carrier + + val extract_from : carrier -> TraceContext.t + + val name_span : carrier -> string + end + + module Make : functor (P : PropS) -> S with type carrier = P.carrier +end From 6dad697a21691e258815f86840609b7a4f672b58 Mon Sep 17 00:00:00 2001 From: Colin James Date: Thu, 17 Oct 2024 09:47:38 +0100 Subject: [PATCH 085/121] CP-51772: Repair xapi-cli-server's tracing Use trace propagators to endow the Http.Request.t used for XML-RPC requests with in-service information, in order to propagate it. As before, the parent span is named "xe " and its span-id is what makes its way into the request. If the endpoint receiving the request wishes to, they can derive subsequent in-service tracing, rooted from this, by using with_tracing as define in Tracing's Propagator. Signed-off-by: Colin James --- ocaml/xapi-cli-server/xapi_cli.ml | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi-cli-server/xapi_cli.ml b/ocaml/xapi-cli-server/xapi_cli.ml index 5ea0f949210..21950bd2618 100644 --- a/ocaml/xapi-cli-server/xapi_cli.ml +++ b/ocaml/xapi-cli-server/xapi_cli.ml @@ -121,6 +121,21 @@ let with_session ~local rpc u p session f = (fun () -> f session) (fun () -> do_logout ()) +module TraceHelper = struct + include Tracing.Propagator.Make (struct + include Propagator.Http + + let name_span req = req.Http.Request.uri + end) + + let inject_span_into_req (span : Tracing.Span.t option) = + let module T = Tracing in + let span_context = Option.map T.Span.get_context span in + let traceparent = Option.map T.SpanContext.to_traceparent span_context in + let trace_context = T.TraceContext.(with_traceparent traceparent empty) in + Propagator.Http.inject_into trace_context +end + let do_rpcs _req s username password minimal cmd session args tracing = let cmdname = get_cmdname cmd in let cspec = @@ -137,9 +152,9 @@ let do_rpcs _req s username password minimal cmd session args tracing = try let generic_rpc = get_rpc () in (* NB the request we've received is for the /cli. We need an XMLRPC request for the API *) - Tracing.with_tracing ~parent:tracing ~name:("xe " ^ cmdname) - @@ fun tracing -> - let req = Xmlrpc_client.xmlrpc ~version:"1.1" ~tracing "/" in + Tracing.with_tracing ~parent:tracing ~name:("xe " ^ cmdname) @@ fun span -> + let req = Xmlrpc_client.xmlrpc ~version:"1.1" "/" in + let req = TraceHelper.inject_span_into_req span req in let rpc = generic_rpc req s in if do_forward then with_session ~local:false rpc username password session (fun sess -> @@ -190,9 +205,9 @@ let uninteresting_cmd_postfixes = ["help"; "-get"; "-list"] let exec_command req cmd s session args = let params = get_params cmd in let tracing = - let open Tracing in let ( let* ) = Option.bind in let context = Propagator.Http.extract_from req in + let open Tracing in let* traceparent = TraceContext.traceparent_of context in let* span_context = SpanContext.of_traceparent traceparent in let span = Tracer.span_of_span_context span_context (get_cmdname cmd) in From 673525e2945416c4da10279b18bd0fa3be1e2546 Mon Sep 17 00:00:00 2001 From: Colin James Date: Thu, 17 Oct 2024 12:34:36 +0100 Subject: [PATCH 086/121] CP-51772: Repair tracing in xapi Signed-off-by: Colin James --- ocaml/xapi/api_server.ml | 18 +++++++++++++----- ocaml/xapi/context.ml | 2 +- ocaml/xapi/dune | 2 ++ ocaml/xapi/helpers.ml | 18 +++++++++++++++--- ocaml/xapi/message_forwarding.ml | 10 ++++------ ocaml/xapi/server_helpers.ml | 10 +++++++++- ocaml/xapi/system_domains.ml | 3 ++- ocaml/xapi/xapi_pool.ml | 3 ++- 8 files changed, 48 insertions(+), 18 deletions(-) diff --git a/ocaml/xapi/api_server.ml b/ocaml/xapi/api_server.ml index 35cb14103e3..d79d2f659e7 100644 --- a/ocaml/xapi/api_server.ml +++ b/ocaml/xapi/api_server.ml @@ -3,9 +3,17 @@ module Server = Server.Make (Actions) (Forwarder) let ( let@ ) f x = f x +module Helper = struct + include Tracing.Propagator.Make (struct + include Propagator.Http + + let name_span req = req.Http.Request.uri + end) +end + (* This bit is called directly by the fake_rpc callback *) let callback1 ?(json_rpc_version = Jsonrpc.V1) is_json req fd call = - let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in + let@ req = Helper.with_tracing ~name:__FUNCTION__ req in (* We now have the body string, the xml and the call name, and can also tell *) (* if we're a master or slave and whether the call came in on the unix domain socket or the tcp socket *) (* If we're a slave, and the call is from the unix domain socket or from the HIMN, and the call *isn't* *) @@ -24,7 +32,7 @@ let callback1 ?(json_rpc_version = Jsonrpc.V1) is_json req fd call = forward req call is_json else let response = - let@ req = Http.Request.with_tracing ~name:"Server.dispatch_call" req in + let@ req = Helper.with_tracing ~name:"Server.dispatch_call" req in Server.dispatch_call req fd call in let translated = @@ -91,8 +99,8 @@ let create_thumbprint_header req response = (** HTML callback that dispatches an RPC and returns the response. *) let callback is_json req fd _ = - let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in - let span = Http.Request.traceparent_of req in + let@ req = Helper.with_tracing ~name:__FUNCTION__ req in + let span = Helper.traceparent_of req in (* fd only used for writing *) let body = Http_svr.read_body ~limit:Constants.http_limit_max_rpc_size req fd @@ -145,7 +153,7 @@ let callback is_json req fd _ = (** HTML callback that dispatches an RPC and returns the response. *) let jsoncallback req fd _ = - let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in + let@ req = Helper.with_tracing ~name:__FUNCTION__ req in (* fd only used for writing *) let body = Http_svr.read_body ~limit:Xapi_database.Db_globs.http_limit_max_rpc_size req diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index efb6ee61318..5df47bd2a57 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -218,11 +218,11 @@ let span_kind_of_parent parent = Option.fold ~none:SpanKind.Internal ~some:(fun _ -> SpanKind.Server) parent let parent_of_origin (origin : origin) span_name = - let open Tracing in let ( let* ) = Option.bind in match origin with | Http (req, _) -> let context = Propagator.Http.extract_from req in + let open Tracing in let* traceparent = TraceContext.traceparent_of context in let* span_context = SpanContext.of_traceparent traceparent in let span = Tracer.span_of_span_context span_context span_name in diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index dbcb9eb284f..5602e62d152 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -154,6 +154,7 @@ tar-unix threads.posix tracing + tracing_propagator unixpwd uri uuid @@ -240,6 +241,7 @@ stunnel threads.posix tracing + tracing_propagator xapi-backtrace xapi-client xapi-consts diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 30965068f3f..f2f4f9747a1 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -387,6 +387,15 @@ let update_pif_addresses ~__context = Option.iter (fun (pif, bridge) -> set_DNS ~__context ~pif ~bridge) dns_if ; List.iter (fun self -> update_pif_address ~__context ~self) pifs +module TraceHelper = struct + let inject_span_into_req (span : Tracing.Span.t option) = + let module T = Tracing in + let span_context = Option.map T.Span.get_context span in + let traceparent = Option.map T.SpanContext.to_traceparent span_context in + let trace_context = T.TraceContext.(with_traceparent traceparent empty) in + Propagator.Http.inject_into trace_context +end + (* Note that both this and `make_timeboxed_rpc` are almost always * partially applied, returning a function of type 'Rpc.request -> Rpc.response'. * The body is therefore not evaluated until the RPC call is actually being @@ -395,7 +404,8 @@ let make_rpc ~__context rpc : Rpc.response = let subtask_of = Ref.string_of (Context.get_task_id __context) in let open Xmlrpc_client in let tracing = Context.set_client_span __context in - let http = xmlrpc ~subtask_of ~version:"1.1" "/" ~tracing in + let http = xmlrpc ~subtask_of ~version:"1.1" "/" in + let http = TraceHelper.inject_span_into_req tracing http in let transport = if Pool_role.is_master () then Unix Xapi_globs.unix_domain_socket @@ -418,7 +428,8 @@ let make_timeboxed_rpc ~__context timeout rpc : Rpc.response = * the task has acquired we make a new one specifically for the stunnel pid *) let open Xmlrpc_client in let tracing = Context.set_client_span __context in - let http = xmlrpc ~subtask_of ~version:"1.1" ~tracing "/" in + let http = xmlrpc ~subtask_of ~version:"1.1" "/" in + let http = TraceHelper.inject_span_into_req tracing http in let task_id = Context.get_task_id __context in let cancel () = let resources = @@ -486,7 +497,8 @@ let make_remote_rpc ?(verify_cert = Stunnel_client.pool ()) ~__context SSL (SSL.make ~verify_cert (), remote_address, !Constants.https_port) in let tracing = Context.tracing_of __context in - let http = xmlrpc ~version:"1.0" ~tracing "/" in + let http = xmlrpc ~version:"1.0" "/" in + let http = TraceHelper.inject_span_into_req tracing http in XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"remote_xapi" ~transport ~http xml (* Helper type for an object which may or may not be in the local database. *) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index cb0b82aa7fd..6423e8d7be3 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -60,9 +60,8 @@ let remote_rpc_no_retry _context hostname (task_opt : API.ref_task option) xml = in let tracing = Context.set_client_span _context in let http = - xmlrpc - ?task_id:(Option.map Ref.string_of task_opt) - ~version:"1.0" ~tracing "/" + xmlrpc ?task_id:(Option.map Ref.string_of task_opt) ~version:"1.0" "/" + |> Helpers.TraceHelper.inject_span_into_req tracing in XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"dst_xapi" ~transport ~http xml @@ -80,9 +79,8 @@ let remote_rpc_retry _context hostname (task_opt : API.ref_task option) xml = in let tracing = Context.set_client_span _context in let http = - xmlrpc - ?task_id:(Option.map Ref.string_of task_opt) - ~version:"1.1" ~tracing "/" + xmlrpc ?task_id:(Option.map Ref.string_of task_opt) ~version:"1.1" "/" + |> Helpers.TraceHelper.inject_span_into_req tracing in XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"dst_xapi" ~transport ~http xml diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index e4952769c2f..ef50491c518 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -119,10 +119,18 @@ let dispatch_exn_wrapper f = let code, params = ExnHelper.error_of_exn exn in API.response_of_failure code params +module Helper = struct + include Tracing.Propagator.Make (struct + include Propagator.Http + + let name_span req = req.Http.Request.uri + end) +end + let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name op_fn marshaller fd http_req label sync_ty generate_task_for = (* if the call has been forwarded to us, then they are responsible for completing the task, so we don't need to complete it *) - let@ http_req = Http.Request.with_tracing ~name:__FUNCTION__ http_req in + let@ http_req = Helper.with_tracing ~name:__FUNCTION__ http_req in let called_async = sync_ty <> `Sync in if called_async && not supports_async then API.response_of_fault diff --git a/ocaml/xapi/system_domains.ml b/ocaml/xapi/system_domains.ml index 5fb394605b1..0453c205566 100644 --- a/ocaml/xapi/system_domains.ml +++ b/ocaml/xapi/system_domains.ml @@ -181,7 +181,8 @@ let pingable ip () = let queryable ~__context transport () = let open Xmlrpc_client in let tracing = Context.set_client_span __context in - let http = xmlrpc ~version:"1.0" ~tracing "/" in + let http = xmlrpc ~version:"1.0" "/" in + let http = Helpers.TraceHelper.inject_span_into_req tracing http in let rpc = XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"remote_smapiv2" ~transport ~http in diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 3a7dee78735..dd4bca70e26 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3406,7 +3406,8 @@ let perform ~local_fn ~__context ~host op = let verify_cert = Some Stunnel.pool (* verify! *) in let task_id = Option.map Ref.string_of task_opt in let tracing = Context.set_client_span __context in - let http = xmlrpc ?task_id ~version:"1.0" ~tracing "/" in + let http = xmlrpc ?task_id ~version:"1.0" "/" in + let http = Helpers.TraceHelper.inject_span_into_req tracing http in let port = !Constants.https_port in let transport = SSL (SSL.make ~verify_cert ?task_id (), hostname, port) in XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"dst_xapi" ~transport ~http xml From 8e20e3e68f19e806e82e5b40872eceb8e1a6e886 Mon Sep 17 00:00:00 2001 From: Colin James Date: Mon, 21 Oct 2024 08:37:11 +0100 Subject: [PATCH 087/121] Restructuring - Wrap tracing_propagator library - Drop point-free style in parse Signed-off-by: Colin James --- ocaml/libs/http-lib/http_svr.ml | 2 +- ocaml/libs/tracing/dune | 1 - ocaml/libs/tracing/propagator.ml | 9 +++++---- ocaml/xapi-cli-server/xapi_cli.ml | 14 +++++++------- ocaml/xapi/api_server.ml | 2 +- ocaml/xapi/context.ml | 2 +- ocaml/xapi/helpers.ml | 10 +++++----- ocaml/xapi/server_helpers.ml | 2 +- 8 files changed, 21 insertions(+), 21 deletions(-) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index c4f7be5460c..a7d52b23a31 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -101,7 +101,7 @@ let response_of_request req hdrs = module Helper = struct include Tracing.Propagator.Make (struct - include Propagator.Http + include Tracing_propagator.Propagator.Http let name_span req = req.Http.Request.uri end) diff --git a/ocaml/libs/tracing/dune b/ocaml/libs/tracing/dune index cf28881793a..71e5c7b7473 100644 --- a/ocaml/libs/tracing/dune +++ b/ocaml/libs/tracing/dune @@ -31,7 +31,6 @@ (library (name tracing_propagator) (modules propagator) - (wrapped false) (libraries astring http-lib tracing)) (test diff --git a/ocaml/libs/tracing/propagator.ml b/ocaml/libs/tracing/propagator.ml index 13c48bafce3..babd0c90476 100644 --- a/ocaml/libs/tracing/propagator.ml +++ b/ocaml/libs/tracing/propagator.ml @@ -54,12 +54,13 @@ module Http = struct | xs -> Some xs - let parse = + let parse input = let open Astring.String in let trim_pair (key, value) = (trim key, trim value) in - cuts ~sep:";" - >> List.map (cut ~sep:"=" >> Option.map trim_pair) - >> List.filter_map Fun.id + input + |> cuts ~sep:";" + |> List.map (cut ~sep:"=" >> Option.map trim_pair) + |> List.filter_map Fun.id let inject_into ctx req = let open Tracing in diff --git a/ocaml/xapi-cli-server/xapi_cli.ml b/ocaml/xapi-cli-server/xapi_cli.ml index 21950bd2618..a38115fd831 100644 --- a/ocaml/xapi-cli-server/xapi_cli.ml +++ b/ocaml/xapi-cli-server/xapi_cli.ml @@ -123,17 +123,17 @@ let with_session ~local rpc u p session f = module TraceHelper = struct include Tracing.Propagator.Make (struct - include Propagator.Http + include Tracing_propagator.Propagator.Http let name_span req = req.Http.Request.uri end) let inject_span_into_req (span : Tracing.Span.t option) = - let module T = Tracing in - let span_context = Option.map T.Span.get_context span in - let traceparent = Option.map T.SpanContext.to_traceparent span_context in - let trace_context = T.TraceContext.(with_traceparent traceparent empty) in - Propagator.Http.inject_into trace_context + let open Tracing in + let span_context = Option.map Span.get_context span in + let traceparent = Option.map SpanContext.to_traceparent span_context in + let trace_context = TraceContext.(with_traceparent traceparent empty) in + Tracing_propagator.Propagator.Http.inject_into trace_context end let do_rpcs _req s username password minimal cmd session args tracing = @@ -206,8 +206,8 @@ let exec_command req cmd s session args = let params = get_params cmd in let tracing = let ( let* ) = Option.bind in - let context = Propagator.Http.extract_from req in let open Tracing in + let context = Tracing_propagator.Propagator.Http.extract_from req in let* traceparent = TraceContext.traceparent_of context in let* span_context = SpanContext.of_traceparent traceparent in let span = Tracer.span_of_span_context span_context (get_cmdname cmd) in diff --git a/ocaml/xapi/api_server.ml b/ocaml/xapi/api_server.ml index d79d2f659e7..e6864bd80e1 100644 --- a/ocaml/xapi/api_server.ml +++ b/ocaml/xapi/api_server.ml @@ -5,7 +5,7 @@ let ( let@ ) f x = f x module Helper = struct include Tracing.Propagator.Make (struct - include Propagator.Http + include Tracing_propagator.Propagator.Http let name_span req = req.Http.Request.uri end) diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index 5df47bd2a57..56829d37d75 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -221,7 +221,7 @@ let parent_of_origin (origin : origin) span_name = let ( let* ) = Option.bind in match origin with | Http (req, _) -> - let context = Propagator.Http.extract_from req in + let context = Tracing_propagator.Propagator.Http.extract_from req in let open Tracing in let* traceparent = TraceContext.traceparent_of context in let* span_context = SpanContext.of_traceparent traceparent in diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index f2f4f9747a1..d75c4dce1c9 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -389,11 +389,11 @@ let update_pif_addresses ~__context = module TraceHelper = struct let inject_span_into_req (span : Tracing.Span.t option) = - let module T = Tracing in - let span_context = Option.map T.Span.get_context span in - let traceparent = Option.map T.SpanContext.to_traceparent span_context in - let trace_context = T.TraceContext.(with_traceparent traceparent empty) in - Propagator.Http.inject_into trace_context + let open Tracing in + let span_context = Option.map Span.get_context span in + let traceparent = Option.map SpanContext.to_traceparent span_context in + let trace_context = TraceContext.(with_traceparent traceparent empty) in + Tracing_propagator.Propagator.Http.inject_into trace_context end (* Note that both this and `make_timeboxed_rpc` are almost always diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index ef50491c518..1e8261b38f1 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -121,7 +121,7 @@ let dispatch_exn_wrapper f = module Helper = struct include Tracing.Propagator.Make (struct - include Propagator.Http + include Tracing_propagator.Propagator.Http let name_span req = req.Http.Request.uri end) From 4eb7185c0150010888f4135c3cfb7bfab06f2381 Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 22 Oct 2024 09:05:39 +0100 Subject: [PATCH 088/121] CP-51772: Forward baggage from xe-cli If baggage is present in the environment, it will be sent to xapi-cli-server. Signed-off-by: Colin James --- ocaml/xe-cli/newcli.ml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ocaml/xe-cli/newcli.ml b/ocaml/xe-cli/newcli.ml index 56279d6a324..bb3a40d74de 100644 --- a/ocaml/xe-cli/newcli.ml +++ b/ocaml/xe-cli/newcli.ml @@ -817,6 +817,9 @@ let main () = let args = String.concat "\n" args in Printf.fprintf oc "User-agent: xe-cli/Unix/%d.%d\r\n" major minor ; Option.iter (Printf.fprintf oc "traceparent: %s\r\n") traceparent ; + Option.iter + (Printf.fprintf oc "baggage: %s\r\n") + (Sys.getenv_opt "BAGGAGE") ; Printf.fprintf oc "content-length: %d\r\n\r\n" (String.length args) ; Printf.fprintf oc "%s" args ; flush_all () ; From c5fe9baa5ad91974928639ca2454d46c215198f7 Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 22 Oct 2024 14:34:06 +0100 Subject: [PATCH 089/121] CP-51772: Propagate trace context through spans Adds TraceContext to the SpanContext data structure and attempts to ensure its inheritance through each part of the code base. The API exposed by the tracing library is a bit problematic, it ought to be simplified and adapted to various use cases. Signed-off-by: Colin James --- ocaml/libs/http-lib/http_svr.ml | 15 +++++-- ocaml/libs/tracing/tracing.ml | 67 +++++++++++++++++++++------- ocaml/libs/tracing/tracing.mli | 7 +++ ocaml/libs/tracing/tracing_export.ml | 20 +++++++-- ocaml/xapi-cli-server/xapi_cli.ml | 36 +++++++++------ ocaml/xapi/context.ml | 3 +- ocaml/xapi/helpers.ml | 8 +++- 7 files changed, 116 insertions(+), 40 deletions(-) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index a7d52b23a31..017587f3737 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -442,17 +442,24 @@ let read_request_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length fd = already sent back a suitable error code and response to the client. *) let read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length fd = try + (* TODO: Restore functionality of tracing this function. We rely on the request + to contain information we want spans to inherit. However, it is the reading of the + request that we intend to trace. *) + let r, proxy = + read_request_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length fd + in + let trace_context = Tracing_propagator.Propagator.Http.extract_from r in let tracer = Tracing.Tracer.get_tracer ~name:"http_tracer" in let loop_span = - match Tracing.Tracer.start ~tracer ~name:__FUNCTION__ ~parent:None () with + match + Tracing.Tracer.start ~tracer ~trace_context ~name:__FUNCTION__ + ~parent:None () + with | Ok span -> span | Error _ -> None in - let r, proxy = - read_request_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length fd - in let parent_span = Helper.traceparent_of r in let loop_span = Option.fold ~none:None diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index d0adde3e776..8beff835cec 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -95,7 +95,7 @@ let validate_attribute (key, value) = && W3CBaggage.Key.is_valid_key key module SpanKind = struct - type t = Server | Consumer | Client | Producer | Internal [@@deriving rpcty] + type t = Server | Consumer | Client | Producer | Internal let to_string = function | Server -> @@ -127,7 +127,7 @@ let endpoint_to_string = function let ok_none = Ok None module Status = struct - type status_code = Unset | Ok | Error [@@deriving rpcty] + type status_code = Unset | Ok | Error type t = {status_code: status_code; _description: string option} end @@ -229,9 +229,14 @@ module TraceContext = struct end module SpanContext = struct - type t = {trace_id: Trace_id.t; span_id: Span_id.t} [@@deriving rpcty] + type t = { + trace_id: Trace_id.t + ; span_id: Span_id.t + ; trace_context: TraceContext.t + } - let context trace_id span_id = {trace_id; span_id} + let context trace_id span_id = + {trace_id; span_id; trace_context= TraceContext.empty} let to_traceparent t = let tid = Trace_id.to_string t.trace_id in @@ -246,6 +251,7 @@ module SpanContext = struct { trace_id= Trace_id.of_string trace_id ; span_id= Span_id.of_string span_id + ; trace_context= TraceContext.empty } | _ -> None @@ -253,6 +259,15 @@ module SpanContext = struct let trace_id_of_span_context t = t.trace_id let span_id_of_span_context t = t.span_id + + let context_of_span_context t = t.trace_context + + let with_trace_context trace_context t = {t with trace_context} + + let of_trace_context trace_context = + let traceparent = TraceContext.traceparent_of trace_context in + let span_context = Option.(join (map of_traceparent traceparent)) in + Option.map (with_trace_context trace_context) span_context end module SpanLink = struct @@ -282,16 +297,25 @@ module Span = struct let get_context t = t.context - let start ?(attributes = Attributes.empty) ~name ~parent ~span_kind () = - let trace_id = + let start ?(attributes = Attributes.empty) + ?(trace_context : TraceContext.t option) ~name ~parent ~span_kind () = + let trace_id, extra_context = match parent with | None -> - Trace_id.make () + (Trace_id.make (), TraceContext.empty) | Some span_parent -> - span_parent.context.trace_id + (span_parent.context.trace_id, span_parent.context.trace_context) in let span_id = Span_id.make () in - let context : SpanContext.t = {trace_id; span_id} in + let context : SpanContext.t = + {trace_id; span_id; trace_context= extra_context} + in + let context = + (* If trace_context is provided to the call, override any inherited trace context. *) + Option.fold ~none:context + ~some:(Fun.flip SpanContext.with_trace_context context) + trace_context + in (* Using gettimeofday over Mtime as it is better for sharing timestamps between the systems *) let begin_time = Unix.gettimeofday () in let end_time = None in @@ -669,15 +693,18 @@ module Tracer = struct ; attributes= Attributes.empty } - let start ~tracer:t ?(attributes = []) ?(span_kind = SpanKind.Internal) ~name - ~parent () : (Span.t option, exn) result = + let start ~tracer:t ?(attributes = []) ?trace_context + ?(span_kind = SpanKind.Internal) ~name ~parent () : + (Span.t option, exn) result = let open TracerProvider in (* Do not start span if the TracerProvider is disabled*) if not t.enabled then ok_none else let attributes = Attributes.merge_into t.attributes attributes in - let span = Span.start ~attributes ~name ~parent ~span_kind () in + let span = + Span.start ~attributes ?trace_context ~name ~parent ~span_kind () + in Spans.add_to_spans ~span ; Ok (Some span) let update_span_with_parent span (parent : Span.t option) = @@ -691,9 +718,11 @@ module Tracer = struct |> Option.map (fun existing_span -> let old_context = Span.get_context existing_span in let new_context : SpanContext.t = + let trace_context = span.Span.context.trace_context in SpanContext.context (SpanContext.trace_id_of_span_context parent.context) old_context.span_id + |> SpanContext.with_trace_context trace_context in let updated_span = {existing_span with parent= Some parent} in let updated_span = {updated_span with context= new_context} in @@ -730,10 +759,10 @@ end let enable_span_garbage_collector ?(timeout = 86400.) () = Spans.GC.initialise_thread ~timeout -let with_tracing ?(attributes = []) ?(parent = None) ~name f = +let with_tracing ?(attributes = []) ?(parent = None) ?trace_context ~name f = let tracer = Tracer.get_tracer ~name in if tracer.enabled then ( - match Tracer.start ~tracer ~attributes ~name ~parent () with + match Tracer.start ~tracer ?trace_context ~attributes ~name ~parent () with | Ok span -> ( try let result = f span in @@ -751,12 +780,12 @@ let with_tracing ?(attributes = []) ?(parent = None) ~name f = ) else f None -let with_child_trace ?attributes parent ~name f = +let with_child_trace ?attributes ?trace_context parent ~name f = match parent with | None -> f None | Some _ as parent -> - with_tracing ?attributes ~parent ~name f + with_tracing ?attributes ?trace_context ~parent ~name f module EnvHelpers = struct let traceparent_key = "TRACEPARENT" @@ -824,6 +853,9 @@ module Propagator = struct let trace_context = P.extract_from carrier in let* parent = TraceContext.traceparent_of trace_context in let* span_context = SpanContext.of_traceparent parent in + let span_context = + SpanContext.with_trace_context trace_context span_context + in let name = P.name_span carrier in Some (Tracer.span_of_span_context span_context name) @@ -845,6 +877,7 @@ module Propagator = struct | _ -> f carrier in - with_child_trace ?attributes parent ~name continue_with_child + with_child_trace ?attributes ~trace_context parent ~name + continue_with_child end end diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index e2d8c8d947d..d20fda8c2e1 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -103,9 +103,13 @@ module SpanContext : sig val of_traceparent : string -> t option + val of_trace_context : TraceContext.t -> t option + val trace_id_of_span_context : t -> Trace_id.t val span_id_of_span_context : t -> Span_id.t + + val context_of_span_context : t -> TraceContext.t end module Span : sig @@ -164,6 +168,7 @@ module Tracer : sig val start : tracer:t -> ?attributes:(string * string) list + -> ?trace_context:TraceContext.t -> ?span_kind:SpanKind.t -> name:string -> parent:Span.t option @@ -250,12 +255,14 @@ val enable_span_garbage_collector : ?timeout:float -> unit -> unit val with_tracing : ?attributes:(string * string) list -> ?parent:Span.t option + -> ?trace_context:TraceContext.t -> name:string -> (Span.t option -> 'a) -> 'a val with_child_trace : ?attributes:(string * string) list + -> ?trace_context:TraceContext.t -> Span.t option -> name:string -> (Span.t option -> 'a) diff --git a/ocaml/libs/tracing/tracing_export.ml b/ocaml/libs/tracing/tracing_export.ml index 43761cdde1c..592a12bbb26 100644 --- a/ocaml/libs/tracing/tracing_export.ml +++ b/ocaml/libs/tracing/tracing_export.ml @@ -82,6 +82,16 @@ module Content = struct {timestamp; value} ) in + let tags = + let span_context = Span.get_context s in + let trace_context = + SpanContext.context_of_span_context span_context + in + let baggage = + TraceContext.baggage_of trace_context |> Option.value ~default:[] + in + Span.get_attributes s @ baggage + in { id= s @@ -117,7 +127,7 @@ module Content = struct |> Option.map SpanKind.to_string ; localEndpoint= {serviceName} ; annotations - ; tags= Span.get_attributes s + ; tags } let content_of (spans : Span.t list) = @@ -270,7 +280,10 @@ module Destination = struct ; ("xs.tracing.finished_spans_table.count", string_of_int count) ] in - let@ _ = with_tracing ~parent ~attributes ~name in + let@ _ = + with_tracing ~trace_context:TraceContext.empty ~parent ~attributes + ~name + in all_spans |> Content.Json.ZipkinV2.content_of |> export @@ -283,7 +296,8 @@ module Destination = struct let ((_span_list, span_count) as span_info) = Spans.since () in let attributes = [("export.traces.count", string_of_int span_count)] in let@ parent = - with_tracing ~parent:None ~attributes ~name:"Tracing.flush_spans" + with_tracing ~trace_context:TraceContext.empty ~parent:None ~attributes + ~name:"Tracing.flush_spans" in TracerProvider.get_tracer_providers () |> List.filter TracerProvider.get_enabled diff --git a/ocaml/xapi-cli-server/xapi_cli.ml b/ocaml/xapi-cli-server/xapi_cli.ml index a38115fd831..72057550ffd 100644 --- a/ocaml/xapi-cli-server/xapi_cli.ml +++ b/ocaml/xapi-cli-server/xapi_cli.ml @@ -132,11 +132,17 @@ module TraceHelper = struct let open Tracing in let span_context = Option.map Span.get_context span in let traceparent = Option.map SpanContext.to_traceparent span_context in - let trace_context = TraceContext.(with_traceparent traceparent empty) in + let trace_context = + Option.map SpanContext.context_of_span_context span_context + in + let trace_context = + Option.value ~default:TraceContext.empty trace_context + |> TraceContext.with_traceparent traceparent + in Tracing_propagator.Propagator.Http.inject_into trace_context end -let do_rpcs _req s username password minimal cmd session args tracing = +let do_rpcs req s username password minimal cmd session args = let cmdname = get_cmdname cmd in let cspec = try Hashtbl.find cmdtable cmdname @@ -151,8 +157,21 @@ let do_rpcs _req s username password minimal cmd session args tracing = let _ = check_required_keys cmd cspec.reqd in try let generic_rpc = get_rpc () in + let trace_context = Tracing_propagator.Propagator.Http.extract_from req in + let parent = + (* This is a "faux" span in the sense that it's not exported by the program. It exists + so that the derived child span can refer to its span-id as its parent during exportation + (along with inheriting the trace-id). *) + let open Tracing in + let ( let* ) = Option.bind in + let* traceparent = TraceContext.traceparent_of trace_context in + let* span_context = SpanContext.of_traceparent traceparent in + let span = Tracer.span_of_span_context span_context (get_cmdname cmd) in + Some span + in (* NB the request we've received is for the /cli. We need an XMLRPC request for the API *) - Tracing.with_tracing ~parent:tracing ~name:("xe " ^ cmdname) @@ fun span -> + Tracing.with_tracing ~trace_context ~parent ~name:("xe " ^ cmdname) + @@ fun span -> let req = Xmlrpc_client.xmlrpc ~version:"1.1" "/" in let req = TraceHelper.inject_span_into_req span req in let rpc = generic_rpc req s in @@ -204,15 +223,6 @@ let uninteresting_cmd_postfixes = ["help"; "-get"; "-list"] let exec_command req cmd s session args = let params = get_params cmd in - let tracing = - let ( let* ) = Option.bind in - let open Tracing in - let context = Tracing_propagator.Propagator.Http.extract_from req in - let* traceparent = TraceContext.traceparent_of context in - let* span_context = SpanContext.of_traceparent traceparent in - let span = Tracer.span_of_span_context span_context (get_cmdname cmd) in - Some span - in let minimal = List.assoc_opt "minimal" params |> Option.fold ~none:false ~some:bool_of_string @@ -271,7 +281,7 @@ let exec_command req cmd s session args = params ) ) ; - do_rpcs req s u p minimal cmd session args tracing + do_rpcs req s u p minimal cmd session args let get_line str i = try diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index 56829d37d75..5f357e110af 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -223,8 +223,7 @@ let parent_of_origin (origin : origin) span_name = | Http (req, _) -> let context = Tracing_propagator.Propagator.Http.extract_from req in let open Tracing in - let* traceparent = TraceContext.traceparent_of context in - let* span_context = SpanContext.of_traceparent traceparent in + let* span_context = SpanContext.of_trace_context context in let span = Tracer.span_of_span_context span_context span_name in Some span | _ -> diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index d75c4dce1c9..8c2f91fc2a3 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -392,7 +392,13 @@ module TraceHelper = struct let open Tracing in let span_context = Option.map Span.get_context span in let traceparent = Option.map SpanContext.to_traceparent span_context in - let trace_context = TraceContext.(with_traceparent traceparent empty) in + let trace_context = + Option.map SpanContext.context_of_span_context span_context + in + let trace_context = + Option.value ~default:TraceContext.empty trace_context + |> TraceContext.with_traceparent traceparent + in Tracing_propagator.Propagator.Http.inject_into trace_context end From 252c05ebe580b30dc611e589fc190fdcd34bfff8 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Tue, 26 Nov 2024 12:17:19 +0100 Subject: [PATCH 090/121] Apply fix by psafont: [Xenopsd] chooses NUMA nodes purely based on amount of free memory on the NUMA nodes of the host Co-authored-by: Pau Ruiz Safont Signed-off-by: Bernhard Kaindl --- doc/content/toolstack/features/NUMA/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/content/toolstack/features/NUMA/index.md b/doc/content/toolstack/features/NUMA/index.md index 5f4d3fe41e8..8fa637be2ca 100644 --- a/doc/content/toolstack/features/NUMA/index.md +++ b/doc/content/toolstack/features/NUMA/index.md @@ -130,7 +130,7 @@ See page 13 in [^AMD_numa] for a diagram of an AMD Opteron 6272 system. * Booting multiple VMs in parallel will result in potentially allocating both on the same NUMA node (race condition) * When we're about to run out of host memory we'll fall back to striping memory again, but the soft affinity mask won't reflect that (this needs an API to query Xen on where it has actually placed the VM, so we can fix up the mask accordingly) -* XAPI is not aware of NUMA balancing across a pool, and chooses NUMA nodes purely based on amount of free memory on the NUMA nodes of the host, even if a better NUMA placement could be found on another host +* XAPI is not aware of NUMA balancing across a pool. Xenopsd chooses NUMA nodes purely based on amount of free memory on the NUMA nodes of the host, even if a better NUMA placement could be found on another host * Very large (>16 NUMA nodes) systems may only explore a limited number of choices (fit into a single node vs fallback to full interleaving) * The exact VM placement is not yet controllable * Microbenchmarks with a single VM on a host show both performance improvements and regressions on memory bandwidth usage: previously a single VM may have been able to take advantage of the bandwidth of both NUMA nodes if it happened to allocate memory from the right places, whereas now it'll be forced to use just a single node. From aa542372139053e75e0cc05d36fe236ab3d6d551 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Tue, 26 Nov 2024 12:18:35 +0100 Subject: [PATCH 091/121] Apply fix by psafont: "Future XAPI versions may change `default_policy` to mean `best_effort`." Co-authored-by: Pau Ruiz Safont Signed-off-by: Bernhard Kaindl --- doc/content/toolstack/features/NUMA/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/content/toolstack/features/NUMA/index.md b/doc/content/toolstack/features/NUMA/index.md index 8fa637be2ca..5f89a5eaa93 100644 --- a/doc/content/toolstack/features/NUMA/index.md +++ b/doc/content/toolstack/features/NUMA/index.md @@ -161,7 +161,7 @@ Meaning of the policy: * Currently, `default_policy` is treated as `any`, but the admin can change it, and then the system will remember that change across upgrades. If we didn't have a `default_policy` then changing the "default" policy on an upgrade would be tricky: we either risk overriding an explicit choice of the admin, or existing installs cannot take advantage of the improved performance from `best_effort` -* Future, XAPI versions may change `default_policy` to mean `best_effort`. +* Future XAPI versions may change `default_policy` to mean `best_effort`. Admins can still override it to `any` if they wish on a host by host basis. It is not expected that users would have to change `best_effort`, unless they run very specific workloads, so a pool level control is not provided at this moment. From eeec845b39350a997b93eadb58b178d6304b224d Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 28 Nov 2024 12:48:01 +0000 Subject: [PATCH 092/121] xe-cli completion: Use grep -E instead of egrep Otherwise newer packages in XS9 issue "egrep: warning: egrep is obsolescent; using grep -E" warnings all over the place. Signed-off-by: Andrii Sultanov --- ocaml/xe-cli/bash-completion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index b4ba6127138..84fd3656a65 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -675,7 +675,7 @@ description() __process_params() { - echo "$1" | cut -d: -f2- | egrep -v "^ $" | cut -c 2- | \ + echo "$1" | cut -d: -f2- | grep -Ev "^ $" | cut -c 2- | \ sed -e 's/,/=,/g' -e 's/$/=/g' -e 's/:=/:/g' -e 's/-=/-/g' -e 's/ //g' } From 7ada7349447cd4e46d3a1954356c7241d0716a9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 17:11:32 +0100 Subject: [PATCH 093/121] CA-388210: factor out computing the domain parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are going to change how we compute it, so factor it out into a function. No functional change. Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index c69d28847d1..a3457ae3094 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -66,6 +66,8 @@ let backend_backtrace_error name args backtrace = let missing_uri () = backend_error "MISSING_URI" ["Please include a URI in the device-config"] +let domain_of ~dp:_ ~vm' = Storage_interface.Vm.string_of vm' + (** Functions to wrap calls to the above client modules and convert their exceptions and errors into SMAPIv2 errors of type [Storage_interface.Exception.exnty]. The above client modules should only @@ -1432,9 +1434,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.introduce vdi_introduce_impl ; - let vdi_attach3_impl dbg _dp sr vdi' vm' _readwrite = + let vdi_attach3_impl dbg dp sr vdi' vm' _readwrite = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = domain_of ~dp ~vm' in vdi_attach_common dbg sr vdi domain >>>= fun response -> let convert_implementation = function | Xapi_storage.Data.XenDisk {params; extra; backend_type} -> @@ -1456,9 +1458,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.attach3 vdi_attach3_impl ; - let vdi_activate_common dbg sr vdi' vm' readonly = + let vdi_activate_common dbg dp sr vdi' vm' readonly = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = domain_of ~dp ~vm' in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1483,17 +1485,17 @@ let bind ~volume_script_dir = ) |> wrap in - let vdi_activate3_impl dbg _dp sr vdi' vm' = - vdi_activate_common dbg sr vdi' vm' false + let vdi_activate3_impl dbg dp sr vdi' vm' = + vdi_activate_common dbg dp sr vdi' vm' false in S.VDI.activate3 vdi_activate3_impl ; - let vdi_activate_readonly_impl dbg _dp sr vdi' vm' = - vdi_activate_common dbg sr vdi' vm' true + let vdi_activate_readonly_impl dbg dp sr vdi' vm' = + vdi_activate_common dbg dp sr vdi' vm' true in S.VDI.activate_readonly vdi_activate_readonly_impl ; - let vdi_deactivate_impl dbg _dp sr vdi' vm' = + let vdi_deactivate_impl dbg dp sr vdi' vm' = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = domain_of ~dp ~vm' in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1514,9 +1516,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.deactivate vdi_deactivate_impl ; - let vdi_detach_impl dbg _dp sr vdi' vm' = + let vdi_detach_impl dbg dp sr vdi' vm' = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = domain_of ~dp ~vm' in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1627,9 +1629,9 @@ let bind ~volume_script_dir = S.VDI.epoch_end vdi_epoch_end_impl ; let vdi_set_persistent_impl _dbg _sr _vdi _persistent = return () |> wrap in S.VDI.set_persistent vdi_set_persistent_impl ; - let dp_destroy2 dbg _dp sr vdi' vm' _allow_leak = + let dp_destroy2 dbg dp sr vdi' vm' _allow_leak = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = domain_of ~dp ~vm' in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> From 864d73420e654b5395a6e98a60615ed292a6e1dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 17:12:33 +0100 Subject: [PATCH 094/121] CA-388210: SMAPIv3 concurrency safety: send the (unique) datapath argument as domain for Dom0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dom0 allows multiple attaches of RO disks, used e.g. for VDI.copy. Send a unique value to SMAPIv3 plugins to avoid bugs to the lack of reference counting in those plugins. XAPI already sends a unique value here, either vbd/domid/device, or a fresh UUID (for storage migration). Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index a3457ae3094..d76ff039e7d 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -66,7 +66,22 @@ let backend_backtrace_error name args backtrace = let missing_uri () = backend_error "MISSING_URI" ["Please include a URI in the device-config"] -let domain_of ~dp:_ ~vm' = Storage_interface.Vm.string_of vm' +(** return a unique 'domain' string for Dom0, so that we can plug disks + multiple times (e.g. for copy). + + XAPI should give us a unique 'dp' (datapath) string, e.g. a UUID for storage migration, + or vbd/domid/device. + For regular guests keep the domain as passed by XAPI (an integer). + *) +let domain_of ~dp ~vm' = + let vm = Storage_interface.Vm.string_of vm' in + match vm with + | "0" -> + (* SM tries to use this in filesystem paths, so cannot have /, + and systemd might be a bit unhappy with - *) + "u0-" ^ dp |> String.map ~f:(function '/' | '-' -> '_' | c -> c) + | _ -> + vm (** Functions to wrap calls to the above client modules and convert their exceptions and errors into SMAPIv2 errors of type From d5cd034ad38e892831ae103b595455dcee9486af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 14:24:58 +0100 Subject: [PATCH 095/121] CA-388210: SMAPIv3 debugging: log PID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Log PID on successful and failed operations, and log full cmdline for newly spawned processes. This can be used to debug stuck scripts, so that we know which invocation is the one that is stuck. Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/lib.ml | 4 +++- ocaml/xapi-storage-script/lib.mli | 3 ++- ocaml/xapi-storage-script/main.ml | 28 ++++++++++++++++----------- ocaml/xapi-storage-script/test_lib.ml | 25 ++++++++++++++++++++---- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/ocaml/xapi-storage-script/lib.ml b/ocaml/xapi-storage-script/lib.ml index 9c9059432bf..a3beb9a8009 100644 --- a/ocaml/xapi-storage-script/lib.ml +++ b/ocaml/xapi-storage-script/lib.ml @@ -131,6 +131,7 @@ module Process = struct type t = { exit_status: (unit, exit_or_signal) Result.t + ; pid: int ; stdout: string ; stderr: string } @@ -176,6 +177,7 @@ module Process = struct let run ~env ~prog ~args ~input = let ( let@ ) f x = f x in let@ p = with_process ~env ~prog ~args in + let pid = p#pid in let sender = send p#stdin input in let receiver_out = receive p#stdout in let receiver_err = receive p#stderr in @@ -185,7 +187,7 @@ module Process = struct Lwt.both sender receiver >>= fun ((), (stdout, stderr)) -> p#status >>= fun status -> let exit_status = Output.exit_or_signal_of_unix status in - Lwt.return {Output.exit_status; stdout; stderr} + Lwt.return {Output.exit_status; pid; stdout; stderr} ) (function | Lwt.Canceled as exn -> diff --git a/ocaml/xapi-storage-script/lib.mli b/ocaml/xapi-storage-script/lib.mli index a55c4b81fbc..eae9183a174 100644 --- a/ocaml/xapi-storage-script/lib.mli +++ b/ocaml/xapi-storage-script/lib.mli @@ -65,6 +65,7 @@ module Process : sig type t = { exit_status: (unit, exit_or_signal) result + ; pid: int ; stdout: string ; stderr: string } @@ -78,7 +79,7 @@ module Process : sig -> Output.t Lwt.t (** Runs a cli program, writes [input] into its stdin, then closing the fd, and finally waits for the program to finish and returns the exit status, - its stdout and stderr. *) + the pid, and its stdout and stderr. *) end module DirWatcher : sig diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index d76ff039e7d..a0b99ae91ab 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -79,7 +79,7 @@ let domain_of ~dp ~vm' = | "0" -> (* SM tries to use this in filesystem paths, so cannot have /, and systemd might be a bit unhappy with - *) - "u0-" ^ dp |> String.map ~f:(function '/' | '-' -> '_' | c -> c) + "u0-" ^ dp |> String.map (function '/' | '-' -> '_' | c -> c) | _ -> vm @@ -477,6 +477,8 @@ let fork_exec_rpc : ) >>>= fun input -> let input = compat_in input |> Jsonrpc.to_string in + debug (fun m -> m "Running %s" @@ Filename.quote_command script_name args) + >>= fun () -> Process.run ~env ~prog:script_name ~args ~input >>= fun output -> let fail_because ~cause description = fail @@ -500,12 +502,13 @@ let fork_exec_rpc : with | Error _ -> error (fun m -> - m "%s failed and printed bad error json: %s" script_name - output.Process.Output.stdout + m "%s[%d] failed and printed bad error json: %s" script_name + output.pid output.Process.Output.stdout ) >>= fun () -> error (fun m -> - m "%s failed, stderr: %s" script_name output.Process.Output.stderr + m "%s[%d] failed, stderr: %s" script_name output.pid + output.Process.Output.stderr ) >>= fun () -> fail_because "non-zero exit and bad json on stdout" @@ -516,12 +519,12 @@ let fork_exec_rpc : with | Error _ -> error (fun m -> - m "%s failed and printed bad error json: %s" script_name - output.Process.Output.stdout + m "%s[%d] failed and printed bad error json: %s" script_name + output.pid output.Process.Output.stdout ) >>= fun () -> error (fun m -> - m "%s failed, stderr: %s" script_name + m "%s[%d] failed, stderr: %s" script_name output.pid output.Process.Output.stderr ) >>= fun () -> @@ -532,7 +535,9 @@ let fork_exec_rpc : ) ) | Error (Signal signal) -> - error (fun m -> m "%s caught a signal and failed" script_name) + error (fun m -> + m "%s[%d] caught a signal and failed" script_name output.pid + ) >>= fun () -> fail_because "signalled" ~cause:(Signal.to_string signal) | Ok () -> ( (* Parse the json on stdout. We get back a JSON-RPC @@ -544,8 +549,8 @@ let fork_exec_rpc : with | Error _ -> error (fun m -> - m "%s succeeded but printed bad json: %s" script_name - output.Process.Output.stdout + m "%s[%d] succeeded but printed bad json: %s" script_name + output.pid output.Process.Output.stdout ) >>= fun () -> fail @@ -554,7 +559,8 @@ let fork_exec_rpc : ) | Ok response -> info (fun m -> - m "%s succeeded: %s" script_name output.Process.Output.stdout + m "%s[%d] succeeded: %s" script_name output.pid + output.Process.Output.stdout ) >>= fun () -> let response = compat_out response in diff --git a/ocaml/xapi-storage-script/test_lib.ml b/ocaml/xapi-storage-script/test_lib.ml index e016d1368a4..ca1d0a07a1c 100644 --- a/ocaml/xapi-storage-script/test_lib.ml +++ b/ocaml/xapi-storage-script/test_lib.ml @@ -103,12 +103,20 @@ let test_run_status = let module P = Process in let test () = let* output = P.run ~prog:"true" ~args:[] ~input:"" ~env:[] in - let expected = P.Output.{exit_status= Ok (); stdout= ""; stderr= ""} in + let expected = + P.Output.{exit_status= Ok (); pid= output.pid; stdout= ""; stderr= ""} + in Alcotest.(check output_c) "Exit status is correct" expected output ; let* output = P.run ~prog:"false" ~args:[] ~input:"" ~env:[] in let expected = - P.Output.{exit_status= Error (Exit_non_zero 1); stdout= ""; stderr= ""} + P.Output. + { + exit_status= Error (Exit_non_zero 1) + ; pid= output.pid + ; stdout= "" + ; stderr= "" + } in Alcotest.(check output_c) "Exit status is correct" expected output ; @@ -121,7 +129,10 @@ let test_run_output = let test () = let content = "@@@@@@" in let* output = P.run ~prog:"cat" ~args:["-"] ~input:content ~env:[] in - let expected = P.Output.{exit_status= Ok (); stdout= content; stderr= ""} in + let expected = + P.Output. + {exit_status= Ok (); pid= output.pid; stdout= content; stderr= ""} + in Alcotest.(check output_c) "Stdout is correct" expected output ; let* output = P.run ~prog:"cat" ~args:[content] ~input:content ~env:[] in @@ -129,7 +140,13 @@ let test_run_output = Printf.sprintf "cat: %s: No such file or directory\n" content in let expected = - P.Output.{exit_status= Error (Exit_non_zero 1); stdout= ""; stderr} + P.Output. + { + exit_status= Error (Exit_non_zero 1) + ; pid= output.pid + ; stdout= "" + ; stderr + } in Alcotest.(check output_c) "Stderr is correct" expected output ; Lwt.return () From ce82302b5244f248a1dd736421b506499a5d402d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 14:24:58 +0100 Subject: [PATCH 096/121] CA-388210: SMAPIv3 concurrency: turn on concurrent operations by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is believed that the deadlocks in SMAPIv3 were caused by VDI.copy operations that attach the same disk RO multiple times in Dom0. With the previous commits we now use a unique identifier and spawn a separate qemu-dp process in the SMAPIv3 plugins, which should prevent the deadlocks and IO errors due to lack of refcounting. Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index a0b99ae91ab..e29f4937ab5 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1796,7 +1796,7 @@ let rec diff a b = (* default false due to bugs in SMAPIv3 plugins, once they are fixed this should be set to true *) -let concurrent = ref false +let concurrent = ref true type reload = All | Files of string list | Nothing From 4c97bfbf386bcf67318578f8434fc5d0729319b9 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Sun, 1 Dec 2024 13:25:19 +0000 Subject: [PATCH 097/121] Improve Delay test Test was pretty minimal. Add test for result from "wait" function. Check that "signal" calls are collapsed as stated. Check using multiple threads. Check timeout not calling "signal". Old "test_wait" test done by "no_signal". Signed-off-by: Frediano Ziglio --- .../xapi-stdext/lib/xapi-stdext-threads/dune | 2 +- .../lib/xapi-stdext-threads/threadext_test.ml | 82 ++++++++++++++----- 2 files changed, 63 insertions(+), 21 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index f7e9141c3a9..663c24437c7 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -12,5 +12,5 @@ (name threadext_test) (package xapi-stdext-threads) (modules threadext_test) - (libraries xapi_stdext_threads alcotest mtime.clock.os mtime fmt) + (libraries xapi_stdext_threads alcotest mtime.clock.os mtime fmt threads.posix) ) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml index c21cd62e8c0..0442e302ab7 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2006-2009 Citrix Systems Inc. + * Copyright (C) 2006-2024 Citrix Systems Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -14,22 +14,64 @@ module Delay = Xapi_stdext_threads.Threadext.Delay -let span_approx ~max_error = - let eq_within a b = - let diff = Mtime.Span.abs_diff a b in - Mtime.Span.compare diff max_error < 0 - in - Alcotest.testable Mtime.Span.pp @@ eq_within - -let test_wait () = - let m = Delay.make () in - let c = Mtime_clock.counter () in - let time = 1 in - let expected = Mtime.Span.(time * s) in - let max_error = Mtime.Span.(10 * ms) in - let _ = Delay.wait m (float_of_int time) in - let wait_time = Mtime_clock.count c in - Alcotest.check' (span_approx ~max_error) ~msg:"diff is smaller than max error" - ~expected ~actual:wait_time - -let () = Alcotest.run "Threadext" [("wait", [("wait", `Quick, test_wait)])] +let delay_wait_check ~min ~max delay timeout expected = + let cnt = Mtime_clock.counter () in + let res = Delay.wait delay timeout in + let elapsed = (Mtime_clock.count cnt |> Mtime.Span.to_float_ns) *. 1e-9 in + Alcotest.(check bool) "expected result" expected res ; + if elapsed < min || elapsed > max then + let msg = Printf.sprintf "%f not in range %f-%f" elapsed min max in + Alcotest.(check bool) msg true false + +(* +Single simple signal stored +- signal +- wait on same thread should succeed quickly +*) +let simple () = + let d = Delay.make () in + Delay.signal d ; + delay_wait_check ~min:0. ~max:0.01 d 1.0 false + +(* +No signal +- wait on same thread should timeout more or less on delay +*) +let no_signal () = + let d = Delay.make () in + delay_wait_check ~min:0.1 ~max:0.11 d 0.1 true + +(* +Signal twice, collapsed +- signal +- signal +- wait on same thread should succeed quickly +- wait on same thread should timeout +*) +let collapsed () = + let d = Delay.make () in + Delay.signal d ; + Delay.signal d ; + delay_wait_check ~min:0. ~max:0.01 d 0.1 false ; + delay_wait_check ~min:0.1 ~max:0.11 d 0.1 true + +(* +Signal from another thread +- signal on another thread after a while +- wait on same thread should succeed more or less on other thread sleep +*) +let other_thread () = + let d = Delay.make () in + let th = Thread.create (fun d -> Thread.delay 0.1 ; Delay.signal d) d in + delay_wait_check ~min:0.1 ~max:0.11 d 1.0 false ; + Thread.join th + +let tests = + [ + ("simple", `Quick, simple) + ; ("no_signal", `Quick, no_signal) + ; ("collapsed", `Quick, collapsed) + ; ("other_thread", `Quick, other_thread) + ] + +let () = Alcotest.run "Threadext" [("Delay", tests)] From 78d6df36b3fb4e48b6ebf9cab93a3f0bbfa5e1cd Mon Sep 17 00:00:00 2001 From: Mark Syms Date: Tue, 12 Nov 2024 09:27:25 +0000 Subject: [PATCH 098/121] CP-42675: add new SM GC message ID Signed-off-by: Mark Syms --- ocaml/xapi-consts/api_messages.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ocaml/xapi-consts/api_messages.ml b/ocaml/xapi-consts/api_messages.ml index ff436199a76..a81eb1a426f 100644 --- a/ocaml/xapi-consts/api_messages.ml +++ b/ocaml/xapi-consts/api_messages.ml @@ -370,3 +370,5 @@ let xapi_startup_blocked_as_version_higher_than_coordinator = let all_running_vms_in_anti_affinity_grp_on_single_host = addMessage "ALL_RUNNING_VMS_IN_ANTI_AFFINITY_GRP_ON_SINGLE_HOST" 3L + +let sm_gc_no_space = addMessage "SM_GC_NO_SPACE" 3L From 21d115613f06461d14c6b58b8a07d5de24258117 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Fri, 29 Nov 2024 11:48:14 +0800 Subject: [PATCH 099/121] CA-403101: Keep host.last_update_hash for host joined a pool Signed-off-by: Gang Ji --- ocaml/idl/datamodel_host.ml | 9 +++++++++ ocaml/tests/common/test_common.ml | 3 ++- ocaml/tests/test_host.ml | 2 +- ocaml/xapi/dbsync_slave.ml | 2 +- ocaml/xapi/xapi_host.ml | 8 ++++---- ocaml/xapi/xapi_host.mli | 1 + ocaml/xapi/xapi_pool.ml | 1 + 7 files changed, 19 insertions(+), 7 deletions(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index b0fb9a6aace..78b68a35722 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1291,6 +1291,15 @@ let create_params = ; param_release= dundee_release ; param_default= Some (VDateTime Date.epoch) } + ; { + param_type= String + ; param_name= "last_update_hash" + ; param_doc= + "The SHA256 checksum of updateinfo of the most recently applied update \ + on the host" + ; param_release= numbered_release "24.39.0-next" + ; param_default= Some (VString "") + } ] let create = diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 297a68398ca..7ac0868c84b 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -170,12 +170,13 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ?(external_auth_service_name = "") ?(external_auth_configuration = []) ?(license_params = []) ?(edition = "free") ?(license_server = []) ?(local_cache_sr = Ref.null) ?(chipset_info = []) ?(ssl_legacy = false) - ?(last_software_update = Date.epoch) () = + ?(last_software_update = Date.epoch) ?(last_update_hash = "") () = let host = Xapi_host.create ~__context ~uuid ~name_label ~name_description ~hostname ~address ~external_auth_type ~external_auth_service_name ~external_auth_configuration ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info ~ssl_legacy ~last_software_update + ~last_update_hash in Db.Host.set_cpu_info ~__context ~self:host ~value:default_cpu_info ; host diff --git a/ocaml/tests/test_host.ml b/ocaml/tests/test_host.ml index 80e72f4f113..60c735e2aff 100644 --- a/ocaml/tests/test_host.ml +++ b/ocaml/tests/test_host.ml @@ -23,7 +23,7 @@ let add_host __context name = ~external_auth_service_name:"" ~external_auth_configuration:[] ~license_params:[] ~edition:"" ~license_server:[] ~local_cache_sr:Ref.null ~chipset_info:[] ~ssl_legacy:false - ~last_software_update:Xapi_stdext_date.Date.epoch + ~last_software_update:Xapi_stdext_date.Date.epoch ~last_update_hash:"" ) (* Creates an unlicensed pool with the maximum number of hosts *) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 942d3081071..10678562e79 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -59,7 +59,7 @@ let create_localhost ~__context info = ~external_auth_configuration:[] ~license_params:[] ~edition:"" ~license_server:[("address", "localhost"); ("port", "27000")] ~local_cache_sr:Ref.null ~chipset_info:[] ~ssl_legacy:false - ~last_software_update:Date.epoch + ~last_software_update:Date.epoch ~last_update_hash:"" in () diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 9f84923fe2e..1b22a037b30 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -991,7 +991,7 @@ let is_host_alive ~__context ~host = let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~external_auth_type ~external_auth_service_name ~external_auth_configuration ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info - ~ssl_legacy:_ ~last_software_update = + ~ssl_legacy:_ ~last_software_update ~last_update_hash = (* fail-safe. We already test this on the joining host, but it's racy, so multiple concurrent pool-join might succeed. Note: we do it in this order to avoid a problem checking restrictions during the initial setup of the database *) @@ -1053,9 +1053,9 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ) ~control_domain:Ref.null ~updates_requiring_reboot:[] ~iscsi_iqn:"" ~multipathing:false ~uefi_certificates:"" ~editions:[] ~pending_guidances:[] - ~tls_verification_enabled ~last_software_update ~recommended_guidances:[] - ~latest_synced_updates_applied:`unknown ~pending_guidances_recommended:[] - ~pending_guidances_full:[] ~last_update_hash:"" ; + ~tls_verification_enabled ~last_software_update ~last_update_hash + ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown + ~pending_guidances_recommended:[] ~pending_guidances_full:[] ; (* If the host we're creating is us, make sure its set to live *) Db.Host_metrics.set_last_updated ~__context ~self:metrics ~value:(Date.now ()) ; Db.Host_metrics.set_live ~__context ~self:metrics ~value:host_is_us ; diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index c303ee69597..bafc47f6de0 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -129,6 +129,7 @@ val create : -> chipset_info:(string * string) list -> ssl_legacy:bool -> last_software_update:API.datetime + -> last_update_hash:string -> [`host] Ref.t val destroy : __context:Context.t -> self:API.ref_host -> unit diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index dd4bca70e26..93e03cd15a0 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -968,6 +968,7 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : ~local_cache_sr ~chipset_info:host.API.host_chipset_info ~ssl_legacy:false ~last_software_update:host.API.host_last_software_update + ~last_update_hash:host.API.host_last_update_hash in (* Copy other-config into newly created host record: *) no_exn From 07724733e6de2a805fa5b22064b8fdf83c749da8 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 3 Dec 2024 08:41:57 +0000 Subject: [PATCH 100/121] xapi_message: Fix incorrect slow path invocation (and its logs) get_since_for_events checks if the cache contains entries older than the 'since' timepoint requested. If so, this means the cache contains all entries strictly after (>) 'since'. If not, this means the cache needs to be reconstructed, since some entries strictly after (>) 'since' might be just out of cache. Previous code would reconstruct the cache when the oldest entry in the cache was equal to the 'since' timepoint requested, which is incorrect and caused a deluge of "x is older than x itself" in the logs. So, change the check for 'entries older than the since timestamp' to be less-or-equal (as the opposite of 'strictly after'), and clarify the debug message. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_message.ml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/xapi_message.ml b/ocaml/xapi/xapi_message.ml index 8bc43cc48e8..2bc570925b9 100644 --- a/ocaml/xapi/xapi_message.ml +++ b/ocaml/xapi/xapi_message.ml @@ -647,7 +647,7 @@ let get_since_for_events ~__context since = let cached_result = with_lock in_memory_cache_mutex (fun () -> match List.rev !in_memory_cache with - | (last_in_memory, _, _) :: _ when last_in_memory < since -> + | (oldest_in_memory, _, _) :: _ when oldest_in_memory <= since -> Some (List.filter_map (fun (gen, _ref, msg) -> @@ -658,11 +658,11 @@ let get_since_for_events ~__context since = ) !in_memory_cache ) - | (last_in_memory, _, _) :: _ -> + | (oldest_in_memory, _, _) :: _ -> debug - "%s: cache (%Ld) is older than requested time (%Ld): Using slow \ - message lookup" - __FUNCTION__ last_in_memory since ; + "%s: cache (%Ld) might not contain all messages since the \ + requested time (%Ld): Using slow message lookup" + __FUNCTION__ oldest_in_memory since ; None | _ -> debug "%s: empty cache; Using slow message lookup" __FUNCTION__ ; From 9e3ad1c11e466d02e9663ccebbdde5dbb85a73ec Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 27 Nov 2024 16:16:28 +0000 Subject: [PATCH 101/121] xapi: move the 'periodic' scheduler to xapi-stdext-threads This allows the scheduler based on threads to be used by components that aren't xapi, like forkexecd. Signed-off-by: Pau Ruiz Safont --- .../xapi-stdext/lib/xapi-stdext-threads/dune | 10 +++- .../lib/xapi-stdext-threads}/ipq.ml | 0 .../lib/xapi-stdext-threads/scheduler.ml} | 22 +++++---- .../lib/xapi-stdext-threads/scheduler.mli} | 0 ocaml/tests/common/dune | 1 + ocaml/tests/common/test_event_common.ml | 8 ++-- ocaml/xapi/dune | 2 + ocaml/xapi/extauth_plugin_ADwinbind.ml | 16 +++---- ocaml/xapi/helpers.ml | 7 +-- ocaml/xapi/pool_periodic_update_sync.ml | 6 +-- ocaml/xapi/xapi.ml | 2 +- ocaml/xapi/xapi_event.ml | 6 +-- ocaml/xapi/xapi_host.ml | 14 +++--- ocaml/xapi/xapi_periodic_scheduler_init.ml | 47 +++++++++++-------- 14 files changed, 81 insertions(+), 60 deletions(-) rename ocaml/{xapi => libs/xapi-stdext/lib/xapi-stdext-threads}/ipq.ml (100%) rename ocaml/{xapi/xapi_periodic_scheduler.ml => libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml} (85%) rename ocaml/{xapi/xapi_periodic_scheduler.mli => libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli} (100%) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index 663c24437c7..81bf6e67af2 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -1,13 +1,21 @@ (library (public_name xapi-stdext-threads) (name xapi_stdext_threads) - (modules :standard \ threadext_test) + (modules :standard \ ipq scheduler threadext_test) (libraries threads.posix unix xapi-stdext-unix xapi-stdext-pervasives) ) + +(library + (public_name xapi-stdext-threads.scheduler) + (name xapi_stdext_threads_scheduler) + (modules ipq scheduler) + (libraries mtime mtime.clock threads.posix unix xapi-log xapi-stdext-threads) +) + (test (name threadext_test) (package xapi-stdext-threads) diff --git a/ocaml/xapi/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml similarity index 100% rename from ocaml/xapi/ipq.ml rename to ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml diff --git a/ocaml/xapi/xapi_periodic_scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml similarity index 85% rename from ocaml/xapi/xapi_periodic_scheduler.ml rename to ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index 1edcb938857..b3d44caa62e 100644 --- a/ocaml/xapi/xapi_periodic_scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -12,7 +12,7 @@ * GNU Lesser General Public License for more details. *) -module D = Debug.Make (struct let name = "backgroundscheduler" end) +module D = Debug.Make (struct let name = __MODULE__ end) open D module Delay = Xapi_stdext_threads.Threadext.Delay @@ -30,16 +30,20 @@ let (queue : t Ipq.t) = Ipq.create 50 let lock = Mutex.create () module Clock = struct - (** time span of s seconds *) let span s = Mtime.Span.of_uint64_ns (Int64.of_float (s *. 1e9)) + let span_to_s span = + Mtime.Span.to_uint64_ns span |> Int64.to_float |> fun ns -> ns /. 1e9 + let add_span clock secs = + (* return mix or max available value if the add overflows *) match Mtime.add_span clock (span secs) with | Some t -> t + | None when secs > 0. -> + Mtime.max_stamp | None -> - raise - Api_errors.(Server_error (internal_error, ["clock overflow"; __LOC__])) + Mtime.min_stamp end let add_to_queue ?(signal = true) name ty start newfunc = @@ -59,7 +63,7 @@ let remove_from_queue name = Ipq.remove queue index let loop () = - debug "Periodic scheduler started" ; + debug "%s started" __MODULE__ ; try while true do let empty = with_lock lock (fun () -> Ipq.is_empty queue) in @@ -82,8 +86,8 @@ let loop () = ) else (* Sleep until next event. *) let sleep = Mtime.(span next.Ipq.time now) - |> Mtime.Span.add (Clock.span 0.001) - |> Scheduler.span_to_s + |> Mtime.Span.(add ms) + |> Clock.span_to_s in try ignore (Delay.wait delay sleep) with e -> @@ -102,5 +106,5 @@ let loop () = done with _ -> error - "Periodic scheduler died! Xapi will no longer function well and should \ - be restarted." + "Scheduler thread died! This daemon will no longer function well and \ + should be restarted." diff --git a/ocaml/xapi/xapi_periodic_scheduler.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli similarity index 100% rename from ocaml/xapi/xapi_periodic_scheduler.mli rename to ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli diff --git a/ocaml/tests/common/dune b/ocaml/tests/common/dune index c578f5f9785..29acca3d2cb 100644 --- a/ocaml/tests/common/dune +++ b/ocaml/tests/common/dune @@ -26,6 +26,7 @@ xapi-test-utils xapi-types xapi-stdext-date + xapi-stdext-threads.scheduler xapi-stdext-unix ) ) diff --git a/ocaml/tests/common/test_event_common.ml b/ocaml/tests/common/test_event_common.ml index 149a27d5ea8..9d37c038ab4 100644 --- a/ocaml/tests/common/test_event_common.ml +++ b/ocaml/tests/common/test_event_common.ml @@ -2,16 +2,16 @@ let ps_start = ref false let scheduler_mutex = Mutex.create () +module Scheduler = Xapi_stdext_threads_scheduler.Scheduler + let start_periodic_scheduler () = Mutex.lock scheduler_mutex ; if !ps_start then () else ( - Xapi_periodic_scheduler.add_to_queue "dummy" - (Xapi_periodic_scheduler.Periodic 60.0) 0.0 (fun () -> () - ) ; + Scheduler.add_to_queue "dummy" (Scheduler.Periodic 60.0) 0.0 (fun () -> ()) ; Xapi_event.register_hooks () ; - ignore (Thread.create Xapi_periodic_scheduler.loop ()) ; + ignore (Thread.create Scheduler.loop ()) ; ps_start := true ) ; Mutex.unlock scheduler_mutex diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 5602e62d152..d2e2fb17de8 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -196,6 +196,7 @@ xapi-stdext-pervasives xapi-stdext-std xapi-stdext-threads + xapi-stdext-threads.scheduler xapi-stdext-unix xapi-stdext-zerocheck xapi-tracing @@ -256,6 +257,7 @@ xapi-stdext-pervasives xapi-stdext-std xapi-stdext-threads + xapi-stdext-threads.scheduler xapi-stdext-unix xapi-types xapi_aux diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.ml b/ocaml/xapi/extauth_plugin_ADwinbind.ml index fc0aa01ad0b..6f51eea9cc5 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.ml +++ b/ocaml/xapi/extauth_plugin_ADwinbind.ml @@ -22,6 +22,7 @@ end) open D open Xapi_stdext_std.Xstringext open Auth_signature +module Scheduler = Xapi_stdext_threads_scheduler.Scheduler let finally = Xapi_stdext_pervasives.Pervasiveext.finally @@ -1172,16 +1173,14 @@ module ClosestKdc = struct let trigger_update ~start = if Pool_role.is_master () then ( debug "Trigger task: %s" periodic_update_task_name ; - Xapi_periodic_scheduler.add_to_queue periodic_update_task_name - (Xapi_periodic_scheduler.Periodic - !Xapi_globs.winbind_update_closest_kdc_interval - ) + Scheduler.add_to_queue periodic_update_task_name + (Scheduler.Periodic !Xapi_globs.winbind_update_closest_kdc_interval) start update ) let stop_update () = if Pool_role.is_master () then - Xapi_periodic_scheduler.remove_from_queue periodic_update_task_name + Scheduler.remove_from_queue periodic_update_task_name end module RotateMachinePassword = struct @@ -1302,11 +1301,10 @@ module RotateMachinePassword = struct let trigger_rotate ~start = debug "Trigger task: %s" task_name ; - Xapi_periodic_scheduler.add_to_queue task_name - (Xapi_periodic_scheduler.Periodic !Xapi_globs.winbind_machine_pwd_timeout) - start rotate + Scheduler.add_to_queue task_name + (Scheduler.Periodic !Xapi_globs.winbind_machine_pwd_timeout) start rotate - let stop_rotate () = Xapi_periodic_scheduler.remove_from_queue task_name + let stop_rotate () = Scheduler.remove_from_queue task_name end let build_netbios_name ~config_params = diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 8c2f91fc2a3..68368754e72 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -443,8 +443,9 @@ let make_timeboxed_rpc ~__context timeout rpc : Rpc.response = in List.iter Locking_helpers.kill_resource resources in - Xapi_periodic_scheduler.add_to_queue (Ref.string_of task_id) - Xapi_periodic_scheduler.OneShot timeout cancel ; + let module Scheduler = Xapi_stdext_threads_scheduler.Scheduler in + Scheduler.add_to_queue (Ref.string_of task_id) Scheduler.OneShot timeout + cancel ; let transport = if Pool_role.is_master () then Unix Xapi_globs.unix_domain_socket @@ -459,7 +460,7 @@ let make_timeboxed_rpc ~__context timeout rpc : Rpc.response = let result = XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"xapi" ~transport ~http rpc in - Xapi_periodic_scheduler.remove_from_queue (Ref.string_of task_id) ; + Scheduler.remove_from_queue (Ref.string_of task_id) ; result ) diff --git a/ocaml/xapi/pool_periodic_update_sync.ml b/ocaml/xapi/pool_periodic_update_sync.ml index 45aacf82a9c..a9755d0cf1e 100644 --- a/ocaml/xapi/pool_periodic_update_sync.ml +++ b/ocaml/xapi/pool_periodic_update_sync.ml @@ -16,6 +16,7 @@ module D = Debug.Make (struct let name = __MODULE__ end) open D open Client +module Scheduler = Xapi_stdext_threads_scheduler.Scheduler type frequency = Daily | Weekly of int @@ -162,12 +163,11 @@ let rec update_sync () = ) and add_to_queue ~__context () = - let open Xapi_periodic_scheduler in - add_to_queue periodic_update_sync_task_name OneShot + Scheduler.add_to_queue periodic_update_sync_task_name Scheduler.OneShot (seconds_until_next_schedule ~__context) update_sync let set_enabled ~__context ~value = - Xapi_periodic_scheduler.remove_from_queue periodic_update_sync_task_name ; + Scheduler.remove_from_queue periodic_update_sync_task_name ; if value then add_to_queue ~__context () diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index ca87e740efb..fd5c0650266 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -1114,7 +1114,7 @@ let server_init () = ) ; ( "Starting periodic scheduler" , [Startup.OnThread] - , Xapi_periodic_scheduler.loop + , Xapi_stdext_threads_scheduler.Scheduler.loop ) ; ( "Synchronising host configuration files" , [] diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 8c7432106ab..600d2859dd3 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -425,12 +425,12 @@ module From = struct && (not (session_is_invalid call)) && Unix.gettimeofday () < deadline do - Xapi_periodic_scheduler.add_to_queue timeoutname - Xapi_periodic_scheduler.OneShot + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue timeoutname + Xapi_stdext_threads_scheduler.Scheduler.OneShot (deadline -. Unix.gettimeofday () +. 0.5) (fun () -> Condition.broadcast c) ; Condition.wait c m ; - Xapi_periodic_scheduler.remove_from_queue timeoutname + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue timeoutname done ) ; if session_is_invalid call then ( diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 46e06a9b4d7..cd6ae3a7d35 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -938,12 +938,12 @@ let ask_host_if_it_is_a_slave ~__context ~host = "ask_host_if_it_is_a_slave: host taking a long time to respond - IP: \ %s; uuid: %s" ip uuid ; - Xapi_periodic_scheduler.add_to_queue task_name - Xapi_periodic_scheduler.OneShot timeout + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue task_name + Xapi_stdext_threads_scheduler.Scheduler.OneShot timeout (log_host_slow_to_respond (min (2. *. timeout) 300.)) in - Xapi_periodic_scheduler.add_to_queue task_name - Xapi_periodic_scheduler.OneShot timeout + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue task_name + Xapi_stdext_threads_scheduler.Scheduler.OneShot timeout (log_host_slow_to_respond timeout) ; let res = Message_forwarding.do_op_on_localsession_nolivecheck ~local_fn ~__context @@ -951,7 +951,7 @@ let ask_host_if_it_is_a_slave ~__context ~host = Client.Client.Pool.is_slave ~rpc ~session_id ~host ) in - Xapi_periodic_scheduler.remove_from_queue task_name ; + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue task_name ; res in Server_helpers.exec_with_subtask ~__context "host.ask_host_if_it_is_a_slave" @@ -1497,8 +1497,8 @@ let sync_data ~__context ~host = Xapi_sync.sync_host ~__context host (* Nb, no attempt to wrap exceptions yet *) let backup_rrds ~__context ~host:_ ~delay = - Xapi_periodic_scheduler.add_to_queue "RRD backup" - Xapi_periodic_scheduler.OneShot delay (fun _ -> + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "RRD backup" + Xapi_stdext_threads_scheduler.Scheduler.OneShot delay (fun _ -> let master_address = Pool_role.get_master_address_opt () in log_and_ignore_exn (Rrdd.backup_rrds master_address) ; log_and_ignore_exn (fun () -> diff --git a/ocaml/xapi/xapi_periodic_scheduler_init.ml b/ocaml/xapi/xapi_periodic_scheduler_init.ml index d74b349e240..39866292460 100644 --- a/ocaml/xapi/xapi_periodic_scheduler_init.ml +++ b/ocaml/xapi/xapi_periodic_scheduler_init.ml @@ -76,46 +76,53 @@ let register ~__context = let update_all_subjects_delay = 10.0 in (* initial delay = 10 seconds *) if master then - Xapi_periodic_scheduler.add_to_queue "Synchronising RRDs/messages" - (Xapi_periodic_scheduler.Periodic sync_timer) sync_delay sync_func ; + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue + "Synchronising RRDs/messages" + (Xapi_stdext_threads_scheduler.Scheduler.Periodic sync_timer) sync_delay + sync_func ; if master then - Xapi_periodic_scheduler.add_to_queue "Backing up RRDs" - (Xapi_periodic_scheduler.Periodic rrdbackup_timer) rrdbackup_delay - rrdbackup_func ; + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "Backing up RRDs" + (Xapi_stdext_threads_scheduler.Scheduler.Periodic rrdbackup_timer) + rrdbackup_delay rrdbackup_func ; if master then - Xapi_periodic_scheduler.add_to_queue + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "Revalidating externally-authenticated sessions" - (Xapi_periodic_scheduler.Periodic + (Xapi_stdext_threads_scheduler.Scheduler.Periodic !Xapi_globs.session_revalidation_interval - ) session_revalidation_delay session_revalidation_func ; + ) + session_revalidation_delay session_revalidation_func ; if master then - Xapi_periodic_scheduler.add_to_queue + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "Trying to update subjects' info using external directory service (if \ any)" - (Xapi_periodic_scheduler.Periodic !Xapi_globs.update_all_subjects_interval) + (Xapi_stdext_threads_scheduler.Scheduler.Periodic + !Xapi_globs.update_all_subjects_interval + ) update_all_subjects_delay update_all_subjects_func ; - Xapi_periodic_scheduler.add_to_queue "Periodic scheduler heartbeat" - (Xapi_periodic_scheduler.Periodic hb_timer) 240.0 hb_func ; - Xapi_periodic_scheduler.add_to_queue "Update monitor configuration" - (Xapi_periodic_scheduler.Periodic 3600.0) 3600.0 + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue + "Periodic scheduler heartbeat" + (Xapi_stdext_threads_scheduler.Scheduler.Periodic hb_timer) 240.0 hb_func ; + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue + "Update monitor configuration" + (Xapi_stdext_threads_scheduler.Scheduler.Periodic 3600.0) 3600.0 Monitor_master.update_configuration_from_master ; ( if master then let freq = !Xapi_globs.failed_login_alert_freq |> float_of_int in - Xapi_periodic_scheduler.add_to_queue + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "Periodic alert failed login attempts" - (Xapi_periodic_scheduler.Periodic freq) freq + (Xapi_stdext_threads_scheduler.Scheduler.Periodic freq) freq Xapi_pool.alert_failed_login_attempts ) ; - Xapi_periodic_scheduler.add_to_queue "broken_kernel" - (Xapi_periodic_scheduler.Periodic 600.) 600. (fun () -> + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "broken_kernel" + (Xapi_stdext_threads_scheduler.Scheduler.Periodic 600.) 600. (fun () -> Server_helpers.exec_with_new_task "Periodic alert if the running kernel is broken in some serious way." (fun __context -> Xapi_host.alert_if_kernel_broken ~__context ) ) ; - Xapi_periodic_scheduler.add_to_queue + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "Period alert if TLS verification emergency disabled" - (Xapi_periodic_scheduler.Periodic 600.) 600. (fun () -> + (Xapi_stdext_threads_scheduler.Scheduler.Periodic 600.) 600. (fun () -> Server_helpers.exec_with_new_task "Period alert if TLS verification emergency disabled" (fun __context -> Xapi_host.alert_if_tls_verification_was_emergency_disabled ~__context From cd98e9db047f67cd07579038060dc7c1cc930eb7 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Tue, 26 Nov 2024 16:44:18 +0000 Subject: [PATCH 102/121] Check index before using it removing an element from Imperative priority queue The queue uses an Array larger than the current size. It's possible that code check the index much later returning error but also setting the queue in an invalid state. So to avoid structure corruption check the index as soon as possible. Signed-off-by: Frediano Ziglio --- .../xapi-stdext/lib/xapi-stdext-threads/dune | 10 ++--- .../lib/xapi-stdext-threads/ipq.ml | 2 + .../lib/xapi-stdext-threads/ipq_test.ml | 40 +++++++++++++++++++ .../lib/xapi-stdext-threads/ipq_test.mli | 0 4 files changed, 47 insertions(+), 5 deletions(-) create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.mli diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index 81bf6e67af2..cdaf50ee02f 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -1,7 +1,7 @@ (library (public_name xapi-stdext-threads) (name xapi_stdext_threads) - (modules :standard \ ipq scheduler threadext_test) + (modules :standard \ ipq scheduler threadext_test ipq_test) (libraries threads.posix unix @@ -16,9 +16,9 @@ (libraries mtime mtime.clock threads.posix unix xapi-log xapi-stdext-threads) ) -(test - (name threadext_test) +(tests + (names threadext_test ipq_test) (package xapi-stdext-threads) - (modules threadext_test) - (libraries xapi_stdext_threads alcotest mtime.clock.os mtime fmt threads.posix) + (modules threadext_test ipq_test) + (libraries xapi_stdext_threads alcotest mtime.clock.os mtime fmt threads.posix xapi_stdext_threads_scheduler) ) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index ba56825ebe0..cba404293e7 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -64,6 +64,8 @@ let maximum h = let remove h s = if h.size <= 0 then raise EmptyHeap ; + if s < 0 || s >= h.size then + invalid_arg (Printf.sprintf "%s: index %d out of bounds" __FUNCTION__ s) ; let n = h.size - 1 in h.size <- n ; let d = h.data in diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml new file mode 100644 index 00000000000..b6a614b302c --- /dev/null +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml @@ -0,0 +1,40 @@ +(* + * Copyright (C) 2024 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module Ipq = Xapi_stdext_threads_scheduler.Ipq + +(* test we get "out of bound" exception calling Ipq.remove *) +let test_out_of_index () = + let q = Ipq.create 10 in + Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.now ()} ; + let is_oob = function + | Invalid_argument s when String.ends_with ~suffix:" out of bounds" s -> + true + | _ -> + false + in + let oob_check n = + (Alcotest.match_raises "out of bound" is_oob @@ fun () -> Ipq.remove q n) ; + Alcotest.(check bool) "same value" false (Ipq.is_empty q) + in + oob_check 10 ; + oob_check (-1) ; + oob_check 9 ; + oob_check 1 ; + (* this should succeed *) + Ipq.remove q 0 + +let tests = [("test_out_of_index", `Quick, test_out_of_index)] + +let () = Alcotest.run "Ipq" [("generic", tests)] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.mli new file mode 100644 index 00000000000..e69de29bb2d From 414deefc1996df1bc4dbbd151e10c02dd7c80bbb Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Wed, 27 Nov 2024 18:45:44 +0000 Subject: [PATCH 103/121] Fix removing elements from Imperative priority queue The queue is implemented using heap tree saved as an array. This is pretty standard. To do a removal you replace the element to remove with the last element and adjust the heap order. This can result in having to move the moved element up or down base its the value. Code only moved down, so also move it up if needed. Signed-off-by: Frediano Ziglio --- .../xapi-stdext/lib/xapi-stdext-threads/ipq.ml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index cba404293e7..9fd99c59892 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -67,9 +67,17 @@ let remove h s = if s < 0 || s >= h.size then invalid_arg (Printf.sprintf "%s: index %d out of bounds" __FUNCTION__ s) ; let n = h.size - 1 in - h.size <- n ; let d = h.data in let x = d.(n) in + (* moving [x] up in the heap *) + let rec moveup i = + let fi = (i - 1) / 2 in + if i > 0 && Mtime.is_later d.(fi).time ~than:x.time then ( + d.(i) <- d.(fi) ; + moveup fi + ) else + d.(i) <- x + in (* moving [x] down in the heap *) let rec movedown i = let j = (2 * i) + 1 in @@ -86,7 +94,13 @@ let remove h s = else d.(i) <- x in - movedown s + if s = n then + () + else if Mtime.is_later d.(s).time ~than:x.time then + moveup s + else + movedown s ; + h.size <- n let find h ev = let rec iter n = From dab868476a00efe77e391ca8c4b13f9fc5323100 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Tue, 26 Nov 2024 17:38:56 +0000 Subject: [PATCH 104/121] Remove possible systematic leak in Imperative priority queue The queue is implemented with an Array larger than the content. Instead of copying an initial value taken from first insert or potentially later when the Array is expanded provide a "default" value to use as a filler. This allows to provide a value not having references to external object so not extending their lifetime in an unpredictable way. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/ipq.ml | 14 +++-- .../lib/xapi-stdext-threads/ipq.mli | 55 +++++++++++++++++++ .../lib/xapi-stdext-threads/ipq_test.ml | 23 +++++++- .../lib/xapi-stdext-threads/scheduler.ml | 4 +- quality-gate.sh | 2 +- 5 files changed, 89 insertions(+), 9 deletions(-) create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index 9fd99c59892..17c1a6e9792 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -15,15 +15,16 @@ type 'a event = {ev: 'a; time: Mtime.t} -type 'a t = {mutable size: int; mutable data: 'a event array} +type 'a t = {default: 'a event; mutable size: int; mutable data: 'a event array} exception EmptyHeap -let create n = +let create n default = if n <= 0 then invalid_arg "create" else - {size= -n; data= [||]} + let default = {ev= default; time= Mtime_clock.now ()} in + {default; size= -n; data= [||]} let is_empty h = h.size <= 0 @@ -32,14 +33,14 @@ let resize h = assert (n > 0) ; let n' = 2 * n in let d = h.data in - let d' = Array.make n' d.(0) in + let d' = Array.make n' h.default in Array.blit d 0 d' 0 n ; h.data <- d' let add h x = (* first addition: we allocate the array *) if h.size < 0 then ( - h.data <- Array.make (-h.size) x ; + h.data <- Array.make (-h.size) h.default ; h.size <- 0 ) ; let n = h.size in @@ -69,6 +70,7 @@ let remove h s = let n = h.size - 1 in let d = h.data in let x = d.(n) in + d.(n) <- h.default ; (* moving [x] up in the heap *) let rec moveup i = let fi = (i - 1) / 2 in @@ -134,11 +136,13 @@ let iter f h = f d.(i) done +(* let fold f h x0 = let n = h.size in let d = h.data in let rec foldrec x i = if i >= n then x else foldrec (f d.(i) x) (succ i) in foldrec x0 0 +*) (* let _ = diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli new file mode 100644 index 00000000000..f470bcc8d32 --- /dev/null +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli @@ -0,0 +1,55 @@ +(* + * Copyright (C) 2024 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type 'a event = {ev: 'a; time: Mtime.t} + +type 'a t + +exception EmptyHeap + +val create : int -> 'a -> 'a t +(** [create n default] creates an empty Imperative priority queue. + The queue initially is initialized to store [n] elements. + The queue will expand beyond [n] automatically if needed. + [default] value will the used to fill unused data. *) + +val is_empty : 'a t -> bool +(** Check if the queue is empty *) + +val add : 'a t -> 'a event -> unit +(** Add an event to the queue *) + +val remove : 'a t -> int -> unit +(** Remove an event from the queue passing the index. + @raise EmptyHeap if the queue is empty. + @raise Invalid_argument if the index is invalid. *) + +val find_p : 'a t -> ('a -> bool) -> int +(** Find the index of an event which matches a given condition + or -1 if not found *) + +val find : 'a t -> 'a -> int +(** Find the index of an event which matches a given event + or -1 if not found *) + +val maximum : 'a t -> 'a event +(** Return a copy of the event with the next time. + @raise EmptyHeap if the queue is empty. *) + +val pop_maximum : 'a t -> 'a event +(** Return and remove the event with the next time. + @raise EmptyHeap if the queue is empty. *) + +val iter : ('a event -> unit) -> 'a t -> unit +(** Iterate given function on the list of events in the queue *) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml index b6a614b302c..51fb39367d9 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml @@ -16,7 +16,7 @@ module Ipq = Xapi_stdext_threads_scheduler.Ipq (* test we get "out of bound" exception calling Ipq.remove *) let test_out_of_index () = - let q = Ipq.create 10 in + let q = Ipq.create 10 0 in Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.now ()} ; let is_oob = function | Invalid_argument s when String.ends_with ~suffix:" out of bounds" s -> @@ -35,6 +35,25 @@ let test_out_of_index () = (* this should succeed *) Ipq.remove q 0 -let tests = [("test_out_of_index", `Quick, test_out_of_index)] +(* check queue does not retain some data after being removed *) +let test_leak () = + let default () = () in + let q = Ipq.create 10 default in + let array = Array.make 1024 'x' in + let use_array () = array.(0) <- 'a' in + let allocated = Atomic.make true in + Gc.finalise (fun _ -> Atomic.set allocated false) array ; + Ipq.add q {Ipq.ev= use_array; Ipq.time= Mtime_clock.now ()} ; + Ipq.remove q 0 ; + Gc.full_major () ; + Gc.full_major () ; + Alcotest.(check bool) "allocated" false (Atomic.get allocated) ; + Ipq.add q {Ipq.ev= default; Ipq.time= Mtime_clock.now ()} + +let tests = + [ + ("test_out_of_index", `Quick, test_out_of_index) + ; ("test_leak", `Quick, test_leak) + ] let () = Alcotest.run "Ipq" [("generic", tests)] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index b3d44caa62e..3e8543ec04d 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -25,7 +25,9 @@ type t = {func: unit -> unit; ty: func_ty; name: string} let delay = Delay.make () -let (queue : t Ipq.t) = Ipq.create 50 +let queue_default = {func= (fun () -> ()); ty= OneShot; name= ""} + +let (queue : t Ipq.t) = Ipq.create 50 queue_default let lock = Mutex.create () diff --git a/quality-gate.sh b/quality-gate.sh index b1d170041f1..b72ca099aa7 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=498 + N=497 # do not count ml files from the tests in ocaml/{tests/perftest/quicktest} MLIS=$(git ls-files -- '**/*.mli' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) MLS=$(git ls-files -- '**/*.ml' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) From 4f5bb1a2aba55696aeb215fb87621dee05aede66 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Tue, 26 Nov 2024 22:30:31 +0000 Subject: [PATCH 105/121] Add test for is_empty for Imperative priority queue Signed-off-by: Frediano Ziglio --- .../xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml index 51fb39367d9..e08c1902fd0 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml @@ -50,10 +50,20 @@ let test_leak () = Alcotest.(check bool) "allocated" false (Atomic.get allocated) ; Ipq.add q {Ipq.ev= default; Ipq.time= Mtime_clock.now ()} +(* test Ipq.is_empty call *) +let test_empty () = + let q = Ipq.create 10 0 in + Alcotest.(check bool) "same value" true (Ipq.is_empty q) ; + Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.now ()} ; + Alcotest.(check bool) "same value" false (Ipq.is_empty q) ; + Ipq.remove q 0 ; + Alcotest.(check bool) "same value" true (Ipq.is_empty q) + let tests = [ ("test_out_of_index", `Quick, test_out_of_index) ; ("test_leak", `Quick, test_leak) + ; ("test_empty", `Quick, test_empty) ] let () = Alcotest.run "Ipq" [("generic", tests)] From 1a8a1f3dff351342ada0e5a9ee8382a56dc4f3a9 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Wed, 27 Nov 2024 21:58:45 +0000 Subject: [PATCH 106/121] Move and improve old test for Imperative priority queue Add 100 elements with random time. Remove first 50 inserted. Remove last 50 in the order of time. While doing that check the content of the structure and the structure itself. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/ipq.ml | 26 ++----- .../lib/xapi-stdext-threads/ipq.mli | 3 + .../lib/xapi-stdext-threads/ipq_test.ml | 74 +++++++++++++++++++ 3 files changed, 85 insertions(+), 18 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index 17c1a6e9792..0c0a458e2ce 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -130,6 +130,14 @@ let pop_maximum h = let m = maximum h in remove h 0 ; m +let check h = + let d = h.data in + for i = 1 to h.size - 1 do + let fi = (i - 1) / 2 in + let ordered = Mtime.is_later d.(i).time ~than:d.(fi).time in + assert ordered + done + let iter f h = let d = h.data in for i = 0 to h.size - 1 do @@ -143,21 +151,3 @@ let fold f h x0 = let rec foldrec x i = if i >= n then x else foldrec (f d.(i) x) (succ i) in foldrec x0 0 *) - -(* -let _ = - let test : int t = create 100 in - for i=0 to 99 do - let e = {time=Random.float 10.0; ev=i} in - add test e - done; - for i=0 to 49 do - let xx=find test i in - remove test xx - done; -(* remove test xx;*) - for i=0 to 49 do - let e=pop_maximum test in - Printf.printf "time: %f, site: %d\n" e.time e.ev - done -*) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli index f470bcc8d32..b7c4974e642 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli @@ -53,3 +53,6 @@ val pop_maximum : 'a t -> 'a event val iter : ('a event -> unit) -> 'a t -> unit (** Iterate given function on the list of events in the queue *) + +val check : 'a t -> unit +(** Check internal queue state, used for debugging *) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml index e08c1902fd0..e8e64093e16 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml @@ -59,11 +59,85 @@ let test_empty () = Ipq.remove q 0 ; Alcotest.(check bool) "same value" true (Ipq.is_empty q) +module Int64Set = Set.Make (Int64) + +let check = Ipq.check + +(* get size of the queue *) +let size queue = + let l = ref 0 in + Ipq.iter (fun _ -> l := !l + 1) queue ; + !l + +(* get a set of times from the queue *) +let set queue = + let s = ref Int64Set.empty in + Ipq.iter + (fun d -> + let t = d.time in + let t = Mtime.to_uint64_ns t in + s := Int64Set.add t !s + ) + queue ; + !s + +let test_old () = + let test : int Ipq.t = Ipq.create 100 0 in + let s = ref Int64Set.empty in + let add i = + let ti = Random.int64 1000000L in + let t = Mtime.of_uint64_ns ti in + let e = {Ipq.time= t; Ipq.ev= i} in + Ipq.add test e ; + s := Int64Set.add ti !s + in + for i = 0 to 49 do + add i + done ; + let first_half = set test in + for i = 50 to 99 do + add i + done ; + check test ; + (* we should have all elements *) + Alcotest.(check int) "100 elements" 100 (size test) ; + + let all = set test in + Alcotest.(check int) "same list" 0 (Int64Set.compare !s all) ; + + (* remove half of the elements *) + for i = 0 to 49 do + let xx = Ipq.find test i in + Printf.printf "Removing element %d position %d\n%!" i xx ; + Ipq.remove test xx ; + check test + done ; + Alcotest.(check int) "50 elements" 50 (size test) ; + + (* make sure we have the right elements in the list *) + let s = set test in + let second_half = Int64Set.diff all first_half in + Alcotest.(check int) "same list" 0 (Int64Set.compare s second_half) ; + + (* remove test *) + let prev = ref 0L in + for _ = 0 to 49 do + let e = Ipq.pop_maximum test in + let t = Mtime.to_uint64_ns e.time in + Alcotest.(check bool) + (Printf.sprintf "%Ld bigger than %Ld" t !prev) + true (t >= !prev) ; + Printf.printf "time: %Ld, site: %d\n" t e.ev ; + prev := t ; + check test + done + let tests = [ ("test_out_of_index", `Quick, test_out_of_index) ; ("test_leak", `Quick, test_leak) ; ("test_empty", `Quick, test_empty) + ; ("test_old", `Quick, test_old) ] let () = Alcotest.run "Ipq" [("generic", tests)] From 32020588430afea87f78c8da1d5058d98535519e Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Thu, 28 Nov 2024 09:12:18 +0000 Subject: [PATCH 107/121] Initialise Imperative priority queue array on creation No need to wait. Signed-off-by: Frediano Ziglio --- ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index 0c0a458e2ce..4cf29ed3d9b 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -24,7 +24,7 @@ let create n default = invalid_arg "create" else let default = {ev= default; time= Mtime_clock.now ()} in - {default; size= -n; data= [||]} + {default; size= 0; data= Array.make n default} let is_empty h = h.size <= 0 @@ -38,11 +38,6 @@ let resize h = h.data <- d' let add h x = - (* first addition: we allocate the array *) - if h.size < 0 then ( - h.data <- Array.make (-h.size) h.default ; - h.size <- 0 - ) ; let n = h.size in (* resizing if needed *) if n = Array.length h.data then resize h ; From 87ca90ec087f58ea34a94999006f8e324b4d2605 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 1 Oct 2024 17:18:49 +0100 Subject: [PATCH 108/121] CA-399757: Add CAS style check for SR scan SR.scan is currently not an atomic operation, and this has caused problems as during the scan itself, there might be other calls changing the state of the database, such as VDI.db_introduce called by SM, if using SMAPIv1. This will confuse SR.scan as it sees an outdated snapshot. The proposed workaround would be add a CAS style check for SR.scan, which will refuse to update the db if it detects changes. This is still subject to the TOCTOU problem, but should reduce the racing window. Signed-off-by: Vincent Liu --- ocaml/xapi/xapi_sr.ml | 63 ++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index 7a83493b2de..8af5bd6e62f 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -786,26 +786,51 @@ let scan ~__context ~sr = SRScanThrottle.execute (fun () -> transform_storage_exn (fun () -> let sr_uuid = Db.SR.get_uuid ~__context ~self:sr in - let vs, sr_info = - C.SR.scan2 (Ref.string_of task) - (Storage_interface.Sr.of_string sr_uuid) - in - let db_vdis = - Db.VDI.get_records_where ~__context - ~expr:(Eq (Field "SR", Literal sr')) - in - update_vdis ~__context ~sr db_vdis vs ; - let virtual_allocation = - List.fold_left Int64.add 0L - (List.map (fun v -> v.Storage_interface.virtual_size) vs) + (* CA-399757: Do not update_vdis unless we are sure that the db was not + changed during the scan. If it was, retry the scan operation. This + change might be a result of the SMAPIv1 call back into xapi with + the db_introduce call, for example. + + Note this still suffers TOCTOU problem, but a complete operation is not easily + implementable without rearchitecting the storage apis *) + let rec scan_rec limit = + let find_vdis () = + Db.VDI.get_records_where ~__context + ~expr:(Eq (Field "SR", Literal sr')) + in + let db_vdis_before = find_vdis () in + let vs, sr_info = + C.SR.scan2 (Ref.string_of task) + (Storage_interface.Sr.of_string sr_uuid) + in + let db_vdis_after = find_vdis () in + if limit > 0 && db_vdis_after <> db_vdis_before then + (scan_rec [@tailcall]) (limit - 1) + else if limit = 0 then + raise + (Api_errors.Server_error + (Api_errors.internal_error, ["SR.scan retry limit exceeded"]) + ) + else ( + update_vdis ~__context ~sr db_vdis_after vs ; + let virtual_allocation = + List.fold_left + (fun acc v -> Int64.add v.Storage_interface.virtual_size acc) + 0L vs + in + Db.SR.set_virtual_allocation ~__context ~self:sr + ~value:virtual_allocation ; + Db.SR.set_physical_size ~__context ~self:sr + ~value:sr_info.total_space ; + Db.SR.set_physical_utilisation ~__context ~self:sr + ~value:(Int64.sub sr_info.total_space sr_info.free_space) ; + Db.SR.remove_from_other_config ~__context ~self:sr ~key:"dirty" ; + Db.SR.set_clustered ~__context ~self:sr ~value:sr_info.clustered + ) in - Db.SR.set_virtual_allocation ~__context ~self:sr - ~value:virtual_allocation ; - Db.SR.set_physical_size ~__context ~self:sr ~value:sr_info.total_space ; - Db.SR.set_physical_utilisation ~__context ~self:sr - ~value:(Int64.sub sr_info.total_space sr_info.free_space) ; - Db.SR.remove_from_other_config ~__context ~self:sr ~key:"dirty" ; - Db.SR.set_clustered ~__context ~self:sr ~value:sr_info.clustered + (* XXX Retry 10 times, and then give up. We should really expect to + reach this retry limit though, unless something really bad has happened.*) + scan_rec 10 ) ) From 6d275ae0bb674b61bca6028a023ca9f47d4b15d4 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 3 Dec 2024 16:56:59 +0000 Subject: [PATCH 109/121] xapi-stdext-threads: use mtime.clock.os mtime.clock is not yet available on the xs-opam available when building xenserver Signed-off-by: Pau Ruiz Safont --- ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index cdaf50ee02f..e0437e881ab 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -13,7 +13,7 @@ (public_name xapi-stdext-threads.scheduler) (name xapi_stdext_threads_scheduler) (modules ipq scheduler) - (libraries mtime mtime.clock threads.posix unix xapi-log xapi-stdext-threads) + (libraries mtime mtime.clock.os threads.posix unix xapi-log xapi-stdext-threads) ) (tests From c85270a2223c74057595df5c43e9a36828e2ac1f Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 4 Dec 2024 11:28:19 +0000 Subject: [PATCH 110/121] Remove unused ocaml/perftest Signed-off-by: Rob Hoes --- ocaml/perftest/createVM.ml | 171 ------- ocaml/perftest/createpool.ml | 751 ------------------------------ ocaml/perftest/cumulative_time.ml | 145 ------ ocaml/perftest/dune | 24 - ocaml/perftest/gnuplot.ml | 165 ------- ocaml/perftest/graphutil.ml | 134 ------ ocaml/perftest/histogram.ml | 230 --------- ocaml/perftest/perfdebug.ml | 24 - ocaml/perftest/perftest.ml | 195 -------- ocaml/perftest/perfutil.ml | 101 ---- ocaml/perftest/scenario.ml | 157 ------- ocaml/perftest/statistics.ml | 155 ------ ocaml/perftest/tests.ml | 493 -------------------- ocaml/perftest/testtypes.ml | 49 -- 14 files changed, 2794 deletions(-) delete mode 100644 ocaml/perftest/createVM.ml delete mode 100644 ocaml/perftest/createpool.ml delete mode 100644 ocaml/perftest/cumulative_time.ml delete mode 100644 ocaml/perftest/dune delete mode 100644 ocaml/perftest/gnuplot.ml delete mode 100644 ocaml/perftest/graphutil.ml delete mode 100644 ocaml/perftest/histogram.ml delete mode 100644 ocaml/perftest/perfdebug.ml delete mode 100644 ocaml/perftest/perftest.ml delete mode 100644 ocaml/perftest/perfutil.ml delete mode 100644 ocaml/perftest/scenario.ml delete mode 100644 ocaml/perftest/statistics.ml delete mode 100644 ocaml/perftest/tests.ml delete mode 100644 ocaml/perftest/testtypes.ml diff --git a/ocaml/perftest/createVM.ml b/ocaml/perftest/createVM.ml deleted file mode 100644 index e3496223488..00000000000 --- a/ocaml/perftest/createVM.ml +++ /dev/null @@ -1,171 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -open Scenario -open Perfutil -open Client -open Perfdebug - -let iscsi_vm_iso = "xenserver-iscsi-target.iso" - -let iscsi_vm_template = "Other install media" - -let innertemplate = "Windows XP SP3" - -let make_iscsi_ip pool = Printf.sprintf "192.168.%d.200" (pool.ipbase + 2) - -let find_iscsi_iso session_id = - let vdis = Client.VDI.get_all ~rpc ~session_id in - try - Some - (List.find - (fun vdi -> - Client.VDI.get_name_label ~rpc ~session_id ~self:vdi = iscsi_vm_iso - ) - vdis - ) - with _ -> None - -(** Create the VM with the iscsi iso attached *) -let make_iscsi session_id pool network = - try - let iscsi_iso = - match find_iscsi_iso session_id with - | Some vdi -> - vdi - | None -> - failwith "iSCSI VM iso not found" - in - let template = - List.hd - (Client.VM.get_by_name_label ~rpc ~session_id ~label:iscsi_vm_template) - in - let newvm = - Client.VM.clone ~rpc ~session_id ~vm:template - ~new_name:"ISCSI target server" - in - Client.VM.provision ~rpc ~session_id ~vm:newvm ; - let _ (* isovbd *) = - Client.VBD.create ~rpc ~session_id ~vM:newvm ~vDI:iscsi_iso ~device:"" - ~userdevice:"0" ~bootable:true ~mode:`RO ~_type:`CD ~unpluggable:false - ~empty:false ~other_config:[] ~currently_attached:false - ~qos_algorithm_type:"" ~qos_algorithm_params:[] - in - let realpool = List.hd (Client.Pool.get_all ~rpc ~session_id) in - let defaultsr = - Client.Pool.get_default_SR ~rpc ~session_id ~self:realpool - in - for i = 0 to pool.iscsi_luns - 1 do - let storage_vdi_label = Printf.sprintf "SCSI VDI %d" i in - let storage_vdi = - Client.VDI.create ~rpc ~session_id ~name_label:storage_vdi_label - ~name_description:"" ~sR:defaultsr ~virtual_size:sr_disk_size - ~_type:`user ~sharable:false ~read_only:false - ~other_config:[(oc_key, pool.key)] - ~xenstore_data:[] ~sm_config:[] ~tags:[] - in - let userdevice = Printf.sprintf "%d" (i + 1) in - ignore - (Client.VBD.create ~rpc ~session_id ~vM:newvm ~vDI:storage_vdi - ~device:"" ~userdevice ~bootable:false ~mode:`RW ~_type:`Disk - ~unpluggable:false ~empty:false ~other_config:[] - ~currently_attached:false ~qos_algorithm_type:"" - ~qos_algorithm_params:[] - ) - done ; - Client.VM.set_PV_bootloader ~rpc ~session_id ~self:newvm ~value:"pygrub" ; - Client.VM.set_PV_args ~rpc ~session_id ~self:newvm - ~value: - (Printf.sprintf "net_ip=%s net_mask=255.255.255.0" (make_iscsi_ip pool)) ; - Client.VM.set_HVM_boot_policy ~rpc ~session_id ~self:newvm ~value:"" ; - let (_ : API.ref_VIF) = - Client.VIF.create ~rpc ~session_id ~device:"0" ~network ~vM:newvm ~mAC:"" - ~mTU:1500L - ~other_config:[(oc_key, pool.key)] - ~currently_attached:false ~qos_algorithm_type:"" - ~qos_algorithm_params:[] ~locking_mode:`network_default ~ipv4_allowed:[] - ~ipv6_allowed:[] - in - Client.VM.add_to_other_config ~rpc ~session_id ~self:newvm ~key:oc_key - ~value:pool.key ; - let uuid = Inventory.lookup "INSTALLATION_UUID" in - let host = Client.Host.get_by_uuid ~rpc ~session_id ~uuid in - Client.VM.start_on ~rpc ~session_id ~vm:newvm ~host ~start_paused:false - ~force:false ; - Some newvm - with e -> - debug "Caught exception with iscsi VM: %s" (Printexc.to_string e) ; - None - -let make ~rpc ~session_id ~pool:_ ~vm ~networks ~storages = - let wintemplate = - List.hd (Client.VM.get_by_name_label ~rpc ~session_id ~label:innertemplate) - in - let host_refs = Array.of_list (Client.Host.get_all ~rpc ~session_id) in - for i = 0 to Array.length storages - 1 do - Printf.printf "Creating %d VMs in SR %d\n%!" vm.num i ; - for j = 0 to vm.num - 1 do - let newname = - Printf.sprintf "VM %d%s%s" j - ( if Array.length storages > 1 then - Printf.sprintf " in SR %d" i - else - "" - ) - (if vm.tag <> "" then " - " ^ vm.tag else "") - in - let clone = - Client.VM.clone ~rpc ~session_id ~vm:wintemplate ~new_name:newname - in - Client.VM.add_tags ~rpc ~session_id ~self:clone ~value:vm.tag ; - Client.VM.remove_from_other_config ~rpc ~session_id ~self:clone - ~key:"disks" ; - for userdevice = 0 to vm.vbds - 1 do - Printf.printf " - creating VDI %d for VM %d on SR %d of %d\n%!" - userdevice j i (Array.length storages) ; - let newdisk = - Client.VDI.create ~rpc ~session_id ~name_label:"Guest disk" - ~name_description:"" ~sR:storages.(i) ~virtual_size:4194304L - ~_type:`user ~sharable:false ~read_only:false ~xenstore_data:[] - ~other_config:[] ~sm_config:[] ~tags:[] - in - ignore - (Client.VBD.create ~rpc ~session_id ~vM:clone ~vDI:newdisk - ~userdevice:(string_of_int userdevice) ~bootable:false ~mode:`RW - ~_type:`Disk ~unpluggable:true ~empty:false ~qos_algorithm_type:"" - ~qos_algorithm_params:[] ~other_config:[] ~device:"" - ~currently_attached:false - ) - done ; - Client.VM.provision ~rpc ~session_id ~vm:clone ; - for device = 0 to min vm.vifs (Array.length networks) - 1 do - ignore - (Client.VIF.create ~rpc ~session_id ~device:(string_of_int device) - ~network:networks.(device) ~vM:clone ~mAC:"" ~mTU:1500L - ~other_config:[] ~qos_algorithm_type:"" ~qos_algorithm_params:[] - ~locking_mode:`network_default ~ipv4_allowed:[] ~ipv6_allowed:[] - ~currently_attached:false - ) - done ; - Client.VM.set_memory_static_min ~rpc ~session_id ~self:clone - ~value:16777216L ; - Client.VM.set_memory_dynamic_min ~rpc ~session_id ~self:clone - ~value:16777216L ; - Client.VM.set_memory_dynamic_max ~rpc ~session_id ~self:clone - ~value:16777216L ; - Client.VM.set_memory_static_max ~rpc ~session_id ~self:clone - ~value:16777216L ; - if vm.has_affinity && Array.length storages = Array.length host_refs then - Client.VM.set_affinity ~rpc ~session_id ~self:clone ~value:host_refs.(i) - done - done diff --git a/ocaml/perftest/createpool.ml b/ocaml/perftest/createpool.ml deleted file mode 100644 index bf96cfb7c36..00000000000 --- a/ocaml/perftest/createpool.ml +++ /dev/null @@ -1,751 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(* Create a pool of SDKs *) - -open Client -open Perfutil -open Xapi_stdext_std -open Scenario -open Perfdebug - -let master_of_pool = "master_of_pool" - -let management_ip = "management_ip" - -let get_network_num_from_interface pool i = - if i < pool.bonds * 2 then - i / 2 - else - i - pool.bonds - -(** Only storage types supporting active thin-provisioned disks allow us to - create a 2TiB disk, clone it and attach it to a bunch of VMs without - running out of space. In particular the hybrid thin/thick behaviour of - LVHD won't work so we can't use LVM over iSCSI or FC. It's probably easiest - to include a whitelist here rather than find an EQL array to test this. *) -let sr_is_suitable session_id sr = - let t = - String.lowercase_ascii (Client.SR.get_type ~rpc ~session_id ~self:sr) - in - t = "ext" || t = "nfs" - -let default_sr_must_be_suitable session_id = - let realpool = List.hd (Client.Pool.get_all ~rpc ~session_id) in - let defaultsr = Client.Pool.get_default_SR ~rpc ~session_id ~self:realpool in - if not (sr_is_suitable session_id defaultsr) then - failwith - "Pool's default SR is unsuitable for the local storage on the template" - -let initialise session_id template pool = - (* First, create the networks the hosts will have their interfaces on *) - let networks_to_create = pool.interfaces_per_host - pool.bonds in - debug "Creating %d networks..." networks_to_create ; - let networks = - Array.init networks_to_create (fun i -> - Client.Network.create ~rpc ~session_id - ~name_label:(Printf.sprintf "perftestnet%d" i) - ~name_description:"" ~mTU:1500L - ~other_config:[(oc_key, pool.key)] - ~bridge:"" ~managed:true ~tags:[] - ) - in - (* Set up the template - create the VIFs *) - debug "Setting up the template. Creating VIFs on networks" ; - let interfaces = - Array.init pool.interfaces_per_host (fun i -> - let net = networks.(get_network_num_from_interface pool i) in - Client.VIF.create ~rpc ~session_id ~device:(string_of_int i) - ~network:net ~vM:template ~mAC:"" ~mTU:1500L - ~other_config:[(oc_key, pool.key)] - ~qos_algorithm_type:"" ~qos_algorithm_params:[] - ~locking_mode:`network_default ~ipv4_allowed:[] ~ipv6_allowed:[] - ~currently_attached:false - ) - in - (* Create a disk for local storage *) - debug "Creating a disk for local storage on the template" ; - default_sr_must_be_suitable session_id ; - let realpool = List.hd (Client.Pool.get_all ~rpc ~session_id) in - let defaultsr = Client.Pool.get_default_SR ~rpc ~session_id ~self:realpool in - let newdisk = - Client.VDI.create ~rpc ~session_id ~name_label:"SDK storage" - ~name_description:"" ~sR:defaultsr ~virtual_size:sr_disk_size ~_type:`user - ~sharable:false ~read_only:false ~xenstore_data:[] - ~other_config:[(oc_key, pool.key)] - ~sm_config:[] ~tags:[] - in - let (_ : API.ref_VBD) = - Client.VBD.create ~rpc ~session_id ~vM:template ~vDI:newdisk - ~userdevice:sr_disk_device ~bootable:false ~mode:`RW ~_type:`Disk - ~unpluggable:true ~empty:false ~qos_algorithm_type:"" - ~qos_algorithm_params:[] - ~other_config:[(oc_key, pool.key)] - ~device:"" ~currently_attached:false - in - debug "Setting up xenstore keys" ; - (* Set up the various xenstore keys *) - Client.VM.set_PV_args ~rpc ~session_id ~self:template ~value:"noninteractive" ; - (* no password setting step *) - Client.VM.add_to_xenstore_data ~rpc ~session_id ~self:template - ~key:"vm-data/provision/interfaces/0/admin" ~value:"true" ; - Array.iteri - (fun i _ -> - Client.VM.add_to_xenstore_data ~rpc ~session_id ~self:template - ~key:(Printf.sprintf "vm-data/provision/interfaces/%d/mode" i) - ~value:"static" ; - Client.VM.add_to_xenstore_data ~rpc ~session_id ~self:template - ~key:(Printf.sprintf "vm-data/provision/interfaces/%d/netmask" i) - ~value:"255.255.255.0" - ) - interfaces ; - debug "Setting memory to 128 Megs" ; - Client.VM.set_memory_static_min ~rpc ~session_id ~self:template - ~value:(Int64.mul 128L 1048576L) ; - Client.VM.set_memory_dynamic_min ~rpc ~session_id ~self:template - ~value:(Int64.mul 128L 1048576L) ; - Client.VM.set_memory_dynamic_max ~rpc ~session_id ~self:template - ~value:(Int64.mul 128L 1048576L) ; - Client.VM.set_memory_static_max ~rpc ~session_id ~self:template - ~value:(Int64.mul 128L 1048576L) ; - Client.VM.remove_from_other_config ~rpc ~session_id ~self:template ~key:oc_key ; - Client.VM.add_to_other_config ~rpc ~session_id ~self:template ~key:oc_key - ~value:pool.key ; - interfaces - -let reset_template session_id template = - (* Destroy template's VIFs *) - debug "Resetting template to factory settings" ; - let vifs = Client.VM.get_VIFs ~rpc ~session_id ~self:template in - List.iter - (fun vif -> - try - if - List.mem_assoc oc_key - (Client.VIF.get_other_config ~rpc ~session_id ~self:vif) - then - Client.VIF.destroy ~rpc ~session_id ~self:vif - with _ -> () - ) - vifs ; - (* Destroy template's sr disk *) - let vbds = Client.VM.get_VBDs ~rpc ~session_id ~self:template in - List.iter - (fun vbd -> - if - List.mem_assoc oc_key - (Client.VBD.get_other_config ~rpc ~session_id ~self:vbd) - then ( - let vdi = Client.VBD.get_VDI ~rpc ~session_id ~self:vbd in - assert ( - List.mem_assoc oc_key - (Client.VDI.get_other_config ~rpc ~session_id ~self:vdi) - ) ; - Client.VDI.destroy ~rpc ~session_id ~self:vdi ; - try Client.VBD.destroy ~rpc ~session_id ~self:vbd with _ -> () - ) - ) - vbds ; - (* Remove xenstore keys *) - Client.VM.set_xenstore_data ~rpc ~session_id ~self:template ~value:[] ; - Client.VM.set_PV_args ~rpc ~session_id ~self:template ~value:"" ; - try - Client.VM.remove_from_other_config ~rpc ~session_id ~self:template - ~key:oc_key - with _ -> () - -let uninitialise session_id _template key = - (* Shut down and uninstall any VMs *) - debug "Shutting down and uninstalling any VMs" ; - let vms = Client.VM.get_all ~rpc ~session_id in - List.iter - (fun vm -> - let is_a_template = - Client.VM.get_is_a_template ~rpc ~session_id ~self:vm - in - let is_control_domain = - Client.VM.get_is_control_domain ~rpc ~session_id ~self:vm - in - let is_managed = - try - List.assoc oc_key - (Client.VM.get_other_config ~rpc ~session_id ~self:vm) - = key - with _ -> false - in - let running = - Client.VM.get_power_state ~rpc ~session_id ~self:vm = `Running - in - if (not is_a_template) && (not is_control_domain) && is_managed then ( - if running then Client.VM.hard_shutdown ~rpc ~session_id ~vm ; - let vbds = Client.VM.get_VBDs ~rpc ~session_id ~self:vm in - let vdis = - List.map - (fun vbd -> Client.VBD.get_VDI ~rpc ~session_id ~self:vbd) - vbds - in - List.iter - (fun vdi -> - try Client.VDI.destroy ~rpc ~session_id ~self:vdi with _ -> () - ) - vdis ; - List.iter - (fun vbd -> - try Client.VBD.destroy ~rpc ~session_id ~self:vbd with _ -> () - ) - vbds ; - List.iter - (fun vif -> - try Client.VIF.destroy ~rpc ~session_id ~self:vif with _ -> () - ) - (Client.VM.get_VIFs ~rpc ~session_id ~self:vm) ; - Client.VM.destroy ~rpc ~session_id ~self:vm - ) - ) - vms ; - (* Destroy networks *) - debug "Destroying networks" ; - let nets = Client.Network.get_all_records ~rpc ~session_id in - let mynets = - List.filter - (fun (_, r) -> - List.mem_assoc oc_key r.API.network_other_config - && List.assoc oc_key r.API.network_other_config = key - ) - nets - in - List.iter - (fun (net, _) -> Client.Network.destroy ~rpc ~session_id ~self:net) - mynets ; - let nets = Client.Network.get_all_records ~rpc ~session_id in - debug "Destroying any bridges" ; - let ic = - Unix.open_process_in "ifconfig -a | grep \"^xapi\" | awk '{print $1}'" - in - let netdevs = - let rec doline () = - try - let x = input_line ic in - x :: doline () - with _ -> [] - in - doline () - in - List.iter - (fun netdev -> - if not (List.exists (fun (_, net) -> net.API.network_bridge = netdev) nets) - then ( - ignore - (Sys.command (Printf.sprintf "ifconfig %s down 2>/dev/null" netdev)) ; - ignore (Sys.command (Printf.sprintf "brctl delbr %s 2>/dev/null" netdev)) - ) - ) - netdevs - -let destroy_sdk_pool session_id sdkname key = - let template = - List.hd (Client.VM.get_by_name_label ~rpc ~session_id ~label:sdkname) - in - uninitialise session_id template key - -let describe_pool template_name pool_name key = - let pool = Scenario.get pool_name in - let pool = {pool with key} in - Printf.sprintf "Base template: %s" template_name :: description_of_pool pool - -let iscsi_vm_iso_must_exist session_id = - (* The iSCSI VM iso must exist *) - if CreateVM.find_iscsi_iso session_id = None then - failwith - (Printf.sprintf "The iSCSI target VM iso could not be found (%s)" - CreateVM.iscsi_vm_iso - ) - -let create_sdk_pool session_id sdkname pool_name key ipbase = - iscsi_vm_iso_must_exist session_id ; - default_sr_must_be_suitable session_id ; - let pool = List.find (fun p -> p.id = pool_name) pools in - let pool = {pool with key; ipbase} in - let template = - try List.hd (Client.VM.get_by_name_label ~rpc ~session_id ~label:sdkname) - with _ -> - debug ~out:stderr "template '%s' not found" sdkname ; - exit 1 - in - let uuid = Client.VM.get_uuid ~rpc ~session_id ~self:template in - debug "Creating test pool '%s' using SDK template uuid=%s" pool.id uuid ; - (* Clear up any leftover state on the template *) - reset_template session_id template ; - let interfaces = initialise session_id template pool in - Printf.printf "Creating iSCSI target VM serving %d LUNs\n%!" pool.iscsi_luns ; - let (_ : API.ref_VM option) = - CreateVM.make_iscsi session_id pool - (Client.VIF.get_network ~rpc ~session_id ~self:interfaces.(2)) - in - debug "Creating %d SDK VMs" pool.hosts ; - let hosts = - Array.init pool.hosts (fun i -> - let n = i + 1 in - let vm = - Client.VM.clone ~rpc ~session_id ~vm:template - ~new_name:(Printf.sprintf "perftestpool%d" n) - in - Client.VM.provision ~rpc ~session_id ~vm ; - Array.iteri - (fun i _ -> - ignore - (Client.VM.add_to_xenstore_data ~rpc ~session_id ~self:vm - ~key:(Printf.sprintf "vm-data/provision/interfaces/%d/ip" i) - ~value:(Printf.sprintf "192.168.%d.%d" (i + pool.ipbase) n) - ) - ) - interfaces ; - vm - ) - in - debug "Setting memory on master to be 256 Megs" ; - Client.VM.set_memory_static_max ~rpc ~session_id ~self:hosts.(0) - ~value:(Int64.mul 256L 1048576L) ; - Client.VM.set_memory_static_min ~rpc ~session_id ~self:hosts.(0) - ~value:(Int64.mul 256L 1048576L) ; - Client.VM.set_memory_dynamic_max ~rpc ~session_id ~self:hosts.(0) - ~value:(Int64.mul 256L 1048576L) ; - Client.VM.set_memory_dynamic_min ~rpc ~session_id ~self:hosts.(0) - ~value:(Int64.mul 256L 1048576L) ; - Client.VM.add_to_other_config ~rpc ~session_id ~self:hosts.(0) - ~key:master_of_pool ~value:pool.key ; - Client.VM.add_to_other_config ~rpc ~session_id ~self:hosts.(0) - ~key:management_ip - ~value:(Printf.sprintf "192.168.%d.1" pool.ipbase) ; - let localhost_uuid = Inventory.lookup "INSTALLATION_UUID" in - Array.iteri - (fun i host -> - debug "Starting VM %d" i ; - Client.VM.start_on ~rpc ~session_id ~vm:host - ~host:(Client.Host.get_by_uuid ~rpc ~session_id ~uuid:localhost_uuid) - ~start_paused:false ~force:false - ) - hosts ; - ignore - (Sys.command - (Printf.sprintf "ifconfig %s 192.168.%d.200 up" - (Client.Network.get_bridge ~rpc ~session_id - ~self:(Client.VIF.get_network ~rpc ~session_id ~self:interfaces.(0)) - ) - pool.ipbase - ) - ) ; - reset_template session_id template ; - debug "Guests are now booting..." ; - let pingable = Array.make (Array.length hosts) false in - let firstboot = Array.make (Array.length hosts) false in - let string_of_status () = - Array.to_seq pingable - |> Seq.mapi (fun i ping -> - let boot = firstboot.(i) in - match (ping, boot) with - | false, false -> - '.' - | true, false -> - 'P' - | true, true -> - 'B' - | _, _ -> - '?' - ) - |> String.of_seq - in - let has_guest_booted i _vm = - let ip = Printf.sprintf "192.168.%d.%d" pool.ipbase (i + 1) in - let is_pingable () = - if pingable.(i) then - true - else if - Sys.command - (Printf.sprintf "ping -W 1 -c 1 %s 2>/dev/null >/dev/null" ip) - = 0 - then ( - pingable.(i) <- true ; - debug "Individual host status: %s" (string_of_status ()) ; - true - ) else - false - in - let firstbooted () = - if firstboot.(i) then - true - else - let rpc = remoterpc ip in - try - let session_id = - Client.Session.login_with_password ~rpc ~uname:"root" - ~pwd:"xensource" ~version:"1.1" ~originator:"perftest" - in - Xapi_stdext_pervasives.Pervasiveext.finally - (fun () -> - let host = List.hd (Client.Host.get_all ~rpc ~session_id) in - (* only one host because it hasn't joined the pool yet *) - let other_config = - Client.Host.get_other_config ~rpc ~session_id ~self:host - in - let key = "firstboot-complete" in - (* Since these are 'fresh' hosts which have never booted, the key goes from missing -> present *) - if List.mem_assoc key other_config then ( - firstboot.(i) <- true ; - debug "Individual host status: %s" (string_of_status ()) ; - true - ) else - false - ) - (fun () -> Client.Session.logout ~rpc ~session_id) - with _ -> false - in - is_pingable () && firstbooted () - in - let wait_until_guests_have_booted () = - for i = 0 to Array.length pingable - 1 do - pingable.(i) <- false - done ; - let finished = ref false in - while not !finished do - finished := - List.fold_left ( && ) true - (Array.to_list (Array.mapi has_guest_booted hosts)) ; - Unix.sleep 20 - done - in - wait_until_guests_have_booted () ; - debug "Guests have booted; issuing Pool.joins." ; - let host_uuids = - Array.mapi - (fun i _ -> - let n = i + 1 in - let rpc = remoterpc (Printf.sprintf "192.168.%d.%d" pool.ipbase n) in - let session_id = - Client.Session.login_with_password ~rpc ~uname:"root" ~pwd:"xensource" - ~version:"1.1" ~originator:"perftest" - in - let h = List.hd (Client.Host.get_all ~rpc ~session_id) in - let u = Client.Host.get_uuid ~rpc ~session_id ~self:h in - debug "Setting name of host %d" n ; - Client.Host.set_name_label ~rpc ~session_id ~self:h - ~value:(Printf.sprintf "perftest host %d" i) ; - if i <> 0 then ( - debug "Joining to pool" ; - Client.Pool.join ~rpc ~session_id - ~master_address:(Printf.sprintf "192.168.%d.1" pool.ipbase) - ~master_username:"root" ~master_password:"xensource" - ) ; - u - ) - hosts - in - let poolrpc = remoterpc (Printf.sprintf "192.168.%d.1" pool.ipbase) in - let poolses = - Client.Session.login_with_password ~rpc:poolrpc ~uname:"root" - ~pwd:"xensource" ~version:"1.1" ~originator:"perftest" - in - let vpool = List.hd (Client.Pool.get_all ~rpc:poolrpc ~session_id:poolses) in - Client.Pool.add_to_other_config ~rpc:poolrpc ~session_id:poolses ~self:vpool - ~key:"scenario" ~value:pool_name ; - debug "Waiting for all hosts to become live and enabled" ; - let hosts = - Array.of_list (Client.Host.get_all ~rpc:poolrpc ~session_id:poolses) - in - let live = Array.make (Array.length hosts) false in - let enabled = Array.make (Array.length hosts) false in - let string_of_status () = - Array.to_seq live - |> Seq.mapi (fun i live -> - let enabled = enabled.(i) in - match (live, enabled) with - | false, false -> - '.' - | true, false -> - 'L' - | true, true -> - 'E' - | _, _ -> - '?' - ) - |> String.of_seq - in - let has_host_booted rpc session_id i host = - try - if live.(i) && enabled.(i) then - true - else - let metrics = Client.Host.get_metrics ~rpc ~session_id ~self:host in - let live' = - Client.Host_metrics.get_live ~rpc ~session_id ~self:metrics - in - let enabled' = Client.Host.get_enabled ~rpc ~session_id ~self:host in - if live.(i) <> live' || enabled.(i) <> enabled' then - debug "Individual host status: %s" (string_of_status ()) ; - live.(i) <- live' ; - enabled.(i) <- enabled' ; - live' && enabled' - with _ -> false - in - let finished = ref false in - while not !finished do - Unix.sleep 20 ; - finished := - List.fold_left ( && ) true - (Array.to_list (Array.mapi (has_host_booted poolrpc poolses) hosts)) - done ; - debug "All hosts are ready." ; - let mypool = List.hd (Client.Pool.get_all ~rpc:poolrpc ~session_id:poolses) in - let master = - Client.Pool.get_master ~rpc:poolrpc ~session_id:poolses ~self:mypool - in - let iscsi_vm_ip = CreateVM.make_iscsi_ip pool in - let xml = - try - Client.SR.probe ~rpc:poolrpc ~session_id:poolses ~host:master - ~device_config:[("target", iscsi_vm_ip)] - ~sm_config:[] ~_type:"lvmoiscsi" - with Api_errors.Server_error ("SR_BACKEND_FAILURE_96", [xml; _]) -> xml - in - let iqns = parse_sr_probe_for_iqn xml in - if iqns = [] then - failwith "iSCSI target VM failed again - maybe you should fix it this time?" ; - let iqn = List.hd iqns in - let xml = - try - Client.SR.probe ~rpc:poolrpc ~session_id:poolses ~host:master - ~device_config:[("target", iscsi_vm_ip); ("targetIQN", iqn)] - ~sm_config:[] ~_type:"lvmoiscsi" - with Api_errors.Server_error ("SR_BACKEND_FAILURE_107", [xml; _]) -> xml - in - (* Create an SR for each LUN found *) - Printf.printf "Creating LVMoISCSI SRs (one for each of %d LUNs)\n%!" - pool.iscsi_luns ; - let scsiids = Array.of_list (parse_sr_probe_for_scsiids xml) in - if Array.length scsiids <> pool.iscsi_luns then - failwith - (Printf.sprintf - "We created %d VDIs on the iSCSI target VM but found %d LUNs" - pool.iscsi_luns (Array.length scsiids) - ) ; - let lun_srs = - Array.init pool.iscsi_luns (fun i -> - Printf.printf " - Creating shared LVMoISCSI SR %d...\n%!" i ; - let name_label = Printf.sprintf "LVMoISCSI-%d" i in - Client.SR.create ~rpc:poolrpc ~session_id:poolses ~host:master - ~device_config: - [ - ("target", iscsi_vm_ip) - ; ("targetIQN", iqn) - ; ("SCSIid", scsiids.(i)) - ] - ~physical_size:0L ~name_label ~name_description:"" ~_type:"lvmoiscsi" - ~content_type:"" ~shared:true ~sm_config:[] - ) - in - let local_srs = - Array.mapi - (fun i host_uuid -> - let h = - Client.Host.get_by_uuid ~rpc:poolrpc ~session_id:poolses - ~uuid:host_uuid - in - let name_label = Printf.sprintf "Local LVM on host %d" i in - Client.SR.create ~rpc:poolrpc ~session_id:poolses ~host:h - ~device_config:[("device", "/dev/" ^ sr_disk_device)] - ~physical_size:0L ~name_label ~name_description:"" ~_type:"lvm" - ~content_type:"" ~shared:false ~sm_config:[] - ) - host_uuids - in - let pifs = Client.PIF.get_all ~rpc:poolrpc ~session_id:poolses in - let bondednets = - Array.init pool.bonds (fun i -> - Client.Network.create ~rpc:poolrpc ~session_id:poolses - ~name_label:(Printf.sprintf "Network associated with bond%d" i) - ~name_description:"" ~mTU:1500L ~other_config:[] ~bridge:"" - ~managed:true ~tags:[] - ) - in - let unused_nets = - ref - (Listext.List.setify - (List.map - (fun pif -> - Client.PIF.get_network ~rpc:poolrpc ~session_id:poolses ~self:pif - ) - pifs - ) - ) - in - (* Reconfigure the master's networking last as this will be the most destructive *) - let master_uuid = - Client.Host.get_uuid ~rpc:poolrpc ~session_id:poolses ~self:master - in - let slave_uuids = - List.filter (fun x -> x <> master_uuid) (Array.to_list host_uuids) - in - let host_uuids = Array.of_list (slave_uuids @ [master_uuid]) in - let (_ : API.ref_Bond array array) = - Array.map - (fun host_uuid -> - let host_ref = - Client.Host.get_by_uuid ~rpc:poolrpc ~session_id:poolses - ~uuid:host_uuid - in - let pifs = - List.filter - (fun pif -> - Client.PIF.get_host ~rpc:poolrpc ~session_id:poolses ~self:pif - = host_ref - ) - pifs - in - Array.init pool.bonds (fun bnum -> - let device = Printf.sprintf "eth%d" (bnum * 2) in - let device2 = Printf.sprintf "eth%d" ((bnum * 2) + 1) in - let master = - List.find - (fun pif -> - Client.PIF.get_device ~rpc:poolrpc ~session_id:poolses - ~self:pif - = device - ) - pifs - in - let pifs = - List.filter - (fun pif -> - let d = - Client.PIF.get_device ~rpc:poolrpc ~session_id:poolses - ~self:pif - in - d = device || d = device2 - ) - pifs - in - let nets = - List.map - (fun pif -> - Client.PIF.get_network ~rpc:poolrpc ~session_id:poolses - ~self:pif - ) - pifs - in - unused_nets := - List.filter (fun net -> not (List.mem net nets)) !unused_nets ; - let mac = - Client.PIF.get_MAC ~rpc:poolrpc ~session_id:poolses ~self:master - in - let bond = - Client.Bond.create ~rpc:poolrpc ~session_id:poolses - ~network:bondednets.(bnum) ~members:pifs ~mAC:mac - ~mode:`balanceslb ~properties:[] - in - let bondpif = - Client.Bond.get_master ~rpc:poolrpc ~session_id:poolses ~self:bond - in - Client.PIF.reconfigure_ip ~rpc:poolrpc ~session_id:poolses - ~self:bondpif ~mode:`Static - ~iP: - (Client.PIF.get_IP ~rpc:poolrpc ~session_id:poolses ~self:master) - ~netmask:"255.255.255.0" ~gateway:"" ~dNS:"" ; - if - Client.PIF.get_management ~rpc:poolrpc ~session_id:poolses - ~self:master - then ( - ( try - Client.Host.management_reconfigure ~rpc:poolrpc - ~session_id:poolses ~pif:bondpif - with _ -> () - ) ; - debug "Reconfigured management interface to be on the bond." ; - (* In case we've lost our network connection *) - wait_until_guests_have_booted () - ) ; - bond - ) - ) - host_uuids - in - debug "Waiting for all guests to be pingable again." ; - wait_until_guests_have_booted () ; - debug "Successfully pinged all virtual hosts." ; - (* We'll use the Windows XP SP3 template to create the VMs required *) - let nets_for_vms = !unused_nets @ Array.to_list bondednets in - debug "Nets for VMs: %s" - (String.concat "," - (List.map - (fun net -> - Client.Network.get_name_label ~rpc:poolrpc ~session_id:poolses - ~self:net - ) - nets_for_vms - ) - ) ; - let networks = Array.of_list nets_for_vms in - Printf.printf "Creating VMs (%s)\n%!" - (if pool.use_shared_storage then "on shared storage" else "on local storage") ; - let storages = if pool.use_shared_storage then lun_srs else local_srs in - List.iter - (fun vm -> - CreateVM.make ~rpc:poolrpc ~session_id:poolses ~networks ~storages ~pool - ~vm - ) - pool.vms - -let create_pool session_id _ pool_name key _ = - iscsi_vm_iso_must_exist session_id ; - default_sr_must_be_suitable session_id ; - let pool = Scenario.get pool_name in - let pool = {pool with key} in - if pool.Scenario.hosts <> 1 then ( - debug ~out:stderr - "At the moment, multiple host pool is supported only for SDK pool" ; - exit 1 - ) ; - let host = List.hd (Client.Host.get_all ~rpc ~session_id) in - (* 1/ forget the local lvm storages *) - List.iter - (fun lvm_sr -> - List.iter - (fun pbd -> Client.PBD.unplug ~rpc ~session_id ~self:pbd) - (Client.SR.get_PBDs ~rpc ~session_id ~self:lvm_sr) ; - Client.SR.forget ~rpc ~session_id ~sr:lvm_sr - ) - (Client.SR.get_by_name_label ~rpc ~session_id ~label:"Local storage") ; - (* 2/ create an default ext storage *) - let storages = - match Client.SR.get_by_name_label ~rpc ~session_id ~label:"Local vhd" with - | [] -> - [| - Client.SR.create ~rpc ~session_id ~_type:"ext" - ~name_label:"Local vhd" ~name_description:"" - ~device_config:[("device", "/dev/sda3")] - ~host ~physical_size:Scenario.sr_disk_size ~shared:true - ~sm_config:[] ~content_type:"" - |] - | l -> - Array.of_list l - in - let pool_ref = List.hd (Client.Pool.get_all ~rpc ~session_id) in - Client.Pool.set_default_SR ~rpc ~session_id ~self:pool_ref ~value:storages.(0) ; - Client.Pool.set_crash_dump_SR ~rpc ~session_id ~self:pool_ref - ~value:storages.(0) ; - Client.Pool.set_suspend_image_SR ~rpc ~session_id ~self:pool_ref - ~value:storages.(0) ; - (* 3/ building the VMs *) - let networks = Array.of_list (Client.Network.get_all ~rpc ~session_id) in - List.iter - (fun vm -> CreateVM.make ~rpc ~session_id ~networks ~storages ~pool ~vm) - pool.vms diff --git a/ocaml/perftest/cumulative_time.ml b/ocaml/perftest/cumulative_time.ml deleted file mode 100644 index 5c7ff17d4e9..00000000000 --- a/ocaml/perftest/cumulative_time.ml +++ /dev/null @@ -1,145 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -open Graphutil - -let _ = - let inputs = ref [] in - let format = ref `X11 in - let separate_graphs = ref false in - let graphic_filename = ref "" in - Arg.parse - [ - ( "-format" - , Arg.Symbol - ( ["eps"; "gif"; "x11"] - , function - | "eps" -> - format := `Eps - | "gif" -> - format := `Gif - | "x11" -> - format := `X11 - | _ -> - failwith "huh ?" - ) - , " Set output format (default: X11)" - ) - ; ( "-output" - , Arg.Set_string graphic_filename - , " Set default output file (for non-X11 modes)" - ) - ; ( "-separate" - , Arg.Set separate_graphs - , " Plot each data series on separate axes" - ) - ] - (fun x -> inputs := x :: !inputs) - "Generate a histogram by convolving sample points with a gaussian.\nusage:" ; - if !inputs = [] then failwith "Needs at least one input filename" ; - if !format <> `X11 && !graphic_filename = "" then - failwith "This format needs an -output" ; - let inputs = get_info ~separate:!separate_graphs !inputs in - let output_files = - List.map (fun _ -> Filename.temp_file "cumulative" "dat") inputs - in - let all = List.combine inputs output_files in - Xapi_stdext_pervasives.Pervasiveext.finally - (fun () -> - let max_readings = ref 0 in - List.iter - (fun ((info, points), output_file) -> - let (_ : string) = get_result info in - let num_points = List.length points in - max_readings := max num_points !max_readings ; - let open Xapi_stdext_unix in - Unixext.with_file output_file - [Unix.O_WRONLY; Unix.O_TRUNC; Unix.O_CREAT] 0o644 (fun fd -> - let points_array = Array.of_list (List.rev points) in - let cumulative = ref 0. in - for i = 0 to num_points - 1 do - cumulative := points_array.(i) +. !cumulative ; - Unixext.really_write_string fd - (Printf.sprintf "%d %f %f\n" (i + 1) !cumulative - points_array.(i) - ) - done - ) - ) - all ; - (* Plot a line for (a) elapsed time and (b) this particular duration *) - let ls = - List.concat - (List.mapi - (fun i ((info, _floats), output) -> - let graph_one_label = - Printf.sprintf "Cumulative time, SR %d (left axis)" (i + 1) - in - let graph_two_label = - Printf.sprintf "Time per VM, SR %d (right axis)" (i + 1) - in - [ - { - Gnuplot.filename= output - ; title= graph_one_label - ; graphname= get_result info - ; field= 2 - ; yaxis= 1 - ; scale= 1. /. 3600. - ; style= "lines" - } - ; { - Gnuplot.filename= output - ; title= graph_two_label - ; graphname= get_result info - ; field= 3 - ; yaxis= 2 - ; scale= 1. - ; style= "lines" - } - ] - ) - all - ) - in - List.iter - (fun result -> - let g = - { - Gnuplot.xlabel= - Printf.sprintf "Number of %s" (string_of_result result) - ; ylabel= "Elapsed time (h)" - ; y2label= Some "Duration (s)" - ; lines= List.filter (fun l -> l.Gnuplot.graphname = result) ls - ; log_x_axis= false - ; xrange= Some (0., float_of_int !max_readings) - ; normal_probability_y_axis= None - } - in - let output = - match !format with - | `Eps -> - Gnuplot.Ps (Printf.sprintf "%s-%s.eps" !graphic_filename result) - | `Gif -> - Gnuplot.Gif (Printf.sprintf "%s-%s.gif" !graphic_filename result) - | `X11 -> - Gnuplot.X11 - in - ignore (Gnuplot.render g output) - ) - (get_result_types inputs) - ) - (fun () -> - List.iter (fun f -> Xapi_stdext_unix.Unixext.unlink_safe f) output_files - ) diff --git a/ocaml/perftest/dune b/ocaml/perftest/dune deleted file mode 100644 index 38d7a0efd16..00000000000 --- a/ocaml/perftest/dune +++ /dev/null @@ -1,24 +0,0 @@ -(executable - (modes exe) - (name perftest) - (public_name perftest) - (package xapi-debug) - (libraries - - http_lib - rpclib.core - threads.posix - xapi-consts - xapi-cli-protocol - xapi-client - xapi-datamodel - xapi-inventory - xapi-types - xapi-stdext-pervasives - xapi-stdext-std - xapi-stdext-threads - xapi-stdext-unix - xml-light2 - ) -) - diff --git a/ocaml/perftest/gnuplot.ml b/ocaml/perftest/gnuplot.ml deleted file mode 100644 index c39ca01475e..00000000000 --- a/ocaml/perftest/gnuplot.ml +++ /dev/null @@ -1,165 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(** Module to drive gnuplot *) - -open Xapi_stdext_pervasives.Pervasiveext - -type line = { - graphname: string - ; filename: string - ; title: string - ; field: int - ; yaxis: int - ; (* 1 -> left axis; 2 -> right axis *) - scale: float - ; (* multiply the values by this factor *) - style: string (* 'linespoints', 'lines', etc *) -} - -type t = { - xlabel: string - ; ylabel: string - ; y2label: string option - ; log_x_axis: bool - ; xrange: (float * float) option - ; normal_probability_y_axis: (float * float) option - ; lines: line list -} - -type output = Ps of string | Gif of string | X11 - -let make_normal_probability_tics tics = - Printf.sprintf "set ytics (%s)" - (String.concat ", " - (List.map (fun tic -> Printf.sprintf "\"%.2f\" invnorm(%f)" tic tic) tics) - ) - -let make_log_tics tics = - Printf.sprintf "set xtics (%s)" - (String.concat ", " - (List.map (fun tic -> Printf.sprintf "\"%.2f\" %f" tic tic) tics) - ) - -let invnorm (x : t) (y : string) = - if x.normal_probability_y_axis = None then - y - else - Printf.sprintf "invnorm(%s)" y - -let render (x : t) output = - let line (y : line) = - let field = - if x.normal_probability_y_axis = None then - Printf.sprintf "($%d*%f)" y.field y.scale - else - Printf.sprintf "(invnorm($%d*%f))" y.field y.scale - in - Printf.sprintf "\"%s\" using 1:%s axis x1y%d title \"%s\" with %s" - y.filename field y.yaxis y.title y.style - in - let config = - [ - Printf.sprintf "set terminal %s" - ( match output with - | Ps _ -> - "postscript eps enhanced color" - | Gif _ -> - "gif" - | X11 -> - "wxt 0" - ) - ; Printf.sprintf "set output %s" - ( match output with - | Ps filename -> - "\"" ^ filename ^ "\"" - | Gif filename -> - "\"" ^ filename ^ "\"" - | X11 -> - "" - ) - ; Printf.sprintf "set xlabel \"%s\"" x.xlabel - ; Printf.sprintf "set ylabel \"%s\"" x.ylabel - ] - @ ( match x.y2label with - | None -> - [] - | Some label -> - [ - Printf.sprintf "set y2label \"%s\"" label - ; "set ytics nomirror" - ; "set y2tics auto" - ; "set y2range [0:]" - ] - ) - @ ( match x.normal_probability_y_axis with - | Some (min, max) -> - [ - make_normal_probability_tics - [ - 0.001 - ; 0.01 - ; 0.05 - ; 0.1 - ; 0.2 - ; 0.3 - ; 0.4 - ; 0.5 - ; 0.6 - ; 0.7 - ; 0.8 - ; 0.9 - ; 0.95 - ; 0.99 - ; 0.999 - ] - ; Printf.sprintf "set yrange [invnorm(%f):invnorm(%f)]" min max - ] - | None -> - [] - ) - @ ( match x.log_x_axis with - | true -> - [ - "set logscale x" - ; "set grid" - ; "set xtics (\"1\" 1, \"2\" 2, \"3\" 3, \"4\" 4, \"5\" 5, \"6\" 6, \ - \"7\" 7, \"8\" 8, \"9\" 9, \"10\" 10, \"11\" 11, \"12\" 12, \ - \"13\" 13, \"14\" 14, \"15\" 15, \"20\" 20, \"30\" 30)" - ] - | false -> - [] - ) - @ [ - (if x.log_x_axis then "set logscale x" else "") - ; ( match x.xrange with - | None -> - "set xrange [*:*]" - | Some (min, max) -> - Printf.sprintf "set xrange [%f:%f]" min max - ) - ; Printf.sprintf "plot %s" (String.concat ", " (List.map line x.lines)) - ] - in - let f = Filename.temp_file "gnuplot" "gnuplot" in - let open Xapi_stdext_unix in - Unixext.write_string_to_file f (String.concat "\n" config) ; - finally - (fun () -> - Unix.system - (Printf.sprintf "gnuplot %s %s" - (if output = X11 then "-persist" else "") - f - ) - ) - (fun () -> Unixext.unlink_safe f) diff --git a/ocaml/perftest/graphutil.ml b/ocaml/perftest/graphutil.ml deleted file mode 100644 index e2b0880ed46..00000000000 --- a/ocaml/perftest/graphutil.ml +++ /dev/null @@ -1,134 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -open Xapi_stdext_std -open Xapi_stdext_unix -open Testtypes -open Perfdebug - -type short_info = string * string * string - -type info = short_info * float list - -let merge_infos (infos : info list) = - let names = - Listext.List.setify - (List.map - (fun ((file, result, subtest), _) -> (file, result, subtest)) - infos - ) - in - let floats ((file, result, subtest) as i) = - ( i - , List.concat_map - (fun ((f, r, s), fl) -> - if file = f && result = r && subtest = s then fl else [] - ) - infos - ) - in - let merge_infos = List.map floats names in - debug "Available data:" ; - List.iter - (fun ((f, r, s), fl) -> - debug "\t* in file: %s \t%s \t%s \t-- %i points" f r s (List.length fl) - ) - merge_infos ; - merge_infos - -let clone_cnt = ref 0 - -let info_from_raw_result ?(separate = false) file result : info list = - match result.rawresult with - | StartTest floats | ShutdownTest floats -> - [((file, result.resultname, result.subtest), floats)] - | CloneTest floats -> - (* Pretend that we got the data from separate files, so they are considered as separate data series *) - let file = Printf.sprintf "%s-%d" file !clone_cnt in - (* Make the resultnames distinct to force the lines onto separate graphs *) - let resultname = - if separate then - Printf.sprintf "%s-%d" result.resultname !clone_cnt - else - result.resultname - in - let subtest = result.subtest in - clone_cnt := !clone_cnt + 1 ; - [((file, resultname, subtest), floats)] - | _ -> - [] - -let floats_from_file fname = - let floats = ref [] in - Unixext.readfile_line - (fun line -> floats := float_of_string (String.trim line) :: !floats) - fname ; - !floats - -let get_info ?(separate = false) files : info list = - let aux f = - match Testtypes.from_string (Unixext.string_of_file f) with - | None -> - [((f, "", ""), floats_from_file f)] - | Some results -> - List.concat_map (info_from_raw_result ~separate f) results - in - merge_infos (List.concat_map aux files) - -let short_info_to_string ((file, result, subtest) : short_info) = - Printf.sprintf "%s.%s.%s" result subtest file - -let short_info_to_title ((_, _, subtest) : short_info) = subtest - -let get_result ((_, result, _) : short_info) = result - -let get_result_types (all_info : info list) = - Listext.List.setify (List.map (fun ((_, result, _), _) -> result) all_info) - -let replace_assoc r n l = - if List.mem_assoc r l then - (r, n) :: List.remove_assoc r l - else - (r, n) :: l - -let get_op op extremum (infos : info list) = - let mem : (string * float) list ref = ref [] in - let aux ((_, result, _), floats) = - if List.mem_assoc result !mem then - mem := - (result, List.fold_left op (List.assoc result !mem) floats) - :: List.remove_assoc result !mem - else - mem := (result, List.fold_left op extremum floats) :: !mem - in - List.iter aux infos ; !mem - -let get_min = get_op min max_float - -let get_max = get_op max min_float - -let string_of_result = function - | "startall" -> - "sequential VM.start" - | "stopall" -> - "sequential VM.stop" - | "parallel_startall" -> - "parallel VM.start" - | "parallel_stopall" -> - "parallel VM.stop" - | "clone" -> - "parallel VM.clone" - | s when Xstringext.String.startswith "clone-" s -> - "parallel VM.clone" - | _ -> - "???" diff --git a/ocaml/perftest/histogram.ml b/ocaml/perftest/histogram.ml deleted file mode 100644 index 19afe0db278..00000000000 --- a/ocaml/perftest/histogram.ml +++ /dev/null @@ -1,230 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -open Perfdebug -open Statistics -open Graphutil - -let _ = - let sigma = ref 0.1 in - let inputs = ref [] in - let format = ref `X11 in - let graphic_filename = ref "" in - let integrate = ref false in - let normal = ref false in - let log_axis = ref false in - let min_percentile = ref 1. in - let max_percentile = ref 95. in - Arg.parse - [ - ( "-format" - , Arg.Symbol - ( ["eps"; "gif"; "x11"] - , function - | "eps" -> - format := `Eps - | "gif" -> - format := `Gif - | "x11" -> - format := `X11 - | _ -> - failwith "huh ?" - ) - , " Set output format (default: X11)" - ) - ; ( "-output" - , Arg.Set_string graphic_filename - , " Set default output file (for non-X11 modes)" - ) - ; ( "-sigma" - , Arg.Set_float sigma - , Printf.sprintf " Set sigma for the gaussian (default %f)" !sigma - ) - ; ( "-integrate" - , Arg.Set integrate - , Printf.sprintf - " Integrate the probability density function (default: %b)" !integrate - ) - ; ( "-normal" - , Arg.Set normal - , Printf.sprintf " Use a 'normal probability axis' (default: %b)" !normal - ) - ; ( "-log" - , Arg.Set log_axis - , Printf.sprintf " Use a log x axis (default: %b)" !log_axis - ) - ; ( "-minpercentile" - , Arg.Set_float min_percentile - , Printf.sprintf " Minimum percentile to plot (default: %.2f)" - !min_percentile - ) - ; ( "-maxpercentile" - , Arg.Set_float max_percentile - , Printf.sprintf " Maximum percentile to plot (default: %.2f)" - !max_percentile - ) - ] - (fun x -> inputs := x :: !inputs) - "Generate a histogram by convolving sample points with a gaussian.\nusage:" ; - if !inputs = [] then failwith "Needs at least one input filename" ; - if !format <> `X11 && !graphic_filename = "" then - failwith "This format needs an -output" ; - let sigma = !sigma in - let inputs = get_info !inputs in - let output_files = - List.map (fun _ -> Filename.temp_file "histogram" "dat") inputs - in - let all = List.combine inputs output_files in - Xapi_stdext_pervasives.Pervasiveext.finally - (fun () -> - (* Write some summary statistics on stderr *) - List.iter - (fun (info, points) -> - debug ~out:stderr "%s has lognormal mean %f +/- %f" - (short_info_to_string info) - (LogNormal.mean points) (LogNormal.sigma points) - ) - inputs ; - let min_point = get_min inputs in - let max_point = get_max inputs in - (* To make sure that each added gaussian really adds 1 unit of area, we extend the bins - 3 sigmas to the left and right *) - let min_point = List.map (fun (r, n) -> (r, n -. (3. *. sigma))) min_point - and max_point = - List.map (fun (r, n) -> (r, n +. (3. *. sigma))) max_point - in - (* Attempt to zoom the graph in on the 10% to 90% region *) - let xrange_min = ref max_point and xrange_max = ref min_point in - List.iter - (fun ((info, points), output_file) -> - let result = get_result info in - let x = - Hist.make - (List.assoc result min_point) - (List.assoc result max_point) - 1000 - in - - (* -- Apply the Weierstrass transform -- *) - - (* NB Each call to Hist.convolve (i.e. each VM timing measured) increases the total area under the curve by 1. - By dividing through by 'n' (where 'n' is the total number of VMs i.e. points) we make the total area under - the curve equal 1 so we can consider the result as a probability density function. In particular this means - we can directly compare curves for 10, 100, 1000 measurements without worrying about scale factors and - also trade speed for estimation accuracy. *) - let num_points = float_of_int (List.length points) in - List.iter - (fun y -> - Hist.convolve x (fun z -> gaussian y sigma z /. num_points) - ) - points ; - (* Sanity-check: area under histogram should be almost 1.0 *) - let total_area = - Hist.fold x - (fun bin_start bin_end height acc -> - ((bin_end -. bin_start) *. height) +. acc - ) - 0. - in - if abs_float (1. -. total_area) > 0.01 then - debug ~out:stderr - "WARNING: area under histogram should be 1.0 but is %f" total_area ; - let cumulative = Hist.integrate x in - let t_10 = Hist.find_x cumulative 0.1 in - let t_80 = Hist.find_x cumulative 0.8 in - let t_90 = Hist.find_x cumulative 0.9 in - let t_95 = Hist.find_x cumulative 0.95 in - debug ~out:stderr "10th percentile: %f" t_10 ; - debug ~out:stderr "80th percentile: %f" t_80 ; - debug ~out:stderr "90th percentile: %f" t_90 ; - debug ~out:stderr "95th percentile: %f" t_95 ; - debug ~out:stderr "Clipping data between %.0f and %.0f percentiles" - !min_percentile !max_percentile ; - xrange_min := - replace_assoc result - (min - (List.assoc result !xrange_min) - (Hist.find_x cumulative (!min_percentile /. 100.)) - ) - !xrange_min ; - xrange_max := - replace_assoc result - (max - (List.assoc result !xrange_max) - (Hist.find_x cumulative (!max_percentile /. 100.)) - ) - !xrange_max ; - let x = if !integrate then Hist.integrate x else x in - Xapi_stdext_unix.Unixext.with_file output_file - [Unix.O_WRONLY; Unix.O_TRUNC; Unix.O_CREAT] - 0o644 (Hist.to_gnuplot x) - ) - all ; - let ls = - List.map - (fun ((info, _floats), output) -> - { - Gnuplot.filename= output - ; title= short_info_to_title info - ; graphname= get_result info - ; field= 2 - ; yaxis= 1 - ; scale= 1. - ; style= "linespoints" - } - ) - all - in - let ylabel = - if !integrate then - "Cumulative probability" - else - "Estimate of the probability density function" - in - List.iter - (fun result -> - let g = - { - Gnuplot.xlabel= - Printf.sprintf "Time for %s XenAPI calls to complete / seconds" - (string_of_result result) - ; ylabel - ; y2label= None - ; lines= List.filter (fun l -> l.Gnuplot.graphname = result) ls - ; log_x_axis= !log_axis - ; xrange= - Some - (List.assoc result !xrange_min, List.assoc result !xrange_max) - ; normal_probability_y_axis= - ( if !normal then - Some (!min_percentile /. 100., !max_percentile /. 100.) - else - None - ) - } - in - let output = - match !format with - | `Eps -> - Gnuplot.Ps (Printf.sprintf "%s-%s.eps" !graphic_filename result) - | `Gif -> - Gnuplot.Gif (Printf.sprintf "%s-%s.gif" !graphic_filename result) - | `X11 -> - Gnuplot.X11 - in - ignore (Gnuplot.render g output) - ) - (get_result_types inputs) - ) - (fun () -> List.iter Xapi_stdext_unix.Unixext.unlink_safe output_files) diff --git a/ocaml/perftest/perfdebug.ml b/ocaml/perftest/perfdebug.ml deleted file mode 100644 index 4c71c8e8ce1..00000000000 --- a/ocaml/perftest/perfdebug.ml +++ /dev/null @@ -1,24 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -let stdout_m = Mutex.create () - -let debug ?(out = stdout) (fmt : ('a, unit, string, unit) format4) = - Xapi_stdext_threads.Threadext.Mutex.execute stdout_m (fun () -> - Printf.ksprintf - (fun s -> - Printf.fprintf out "%s\n" s ; - flush stdout - ) - fmt - ) diff --git a/ocaml/perftest/perftest.ml b/ocaml/perftest/perftest.ml deleted file mode 100644 index c9b744676be..00000000000 --- a/ocaml/perftest/perftest.ml +++ /dev/null @@ -1,195 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(* Performance testing *) - -open Client -open Perfutil -open Testtypes -open Perfdebug - -let xenrtfname = ref "perftest-xenrt.log" - -let marshall_xenrt pool metadata results = - let oc = open_out !xenrtfname in - Printf.fprintf oc "\n" ; - Printf.fprintf oc "%s\n" (Scenario.xml_of_scenario pool) ; - Printf.fprintf oc " \n" ; - List.iter - (fun (k, v) -> Printf.fprintf oc " %s%s\n" k v) - metadata ; - Printf.fprintf oc " \n \n" ; - List.iter - (fun r -> - Printf.fprintf oc " %f\n" - r.resultname r.subtest r.xenrtresult - ) - results ; - Printf.fprintf oc " \n" ; - close_out oc - -let rawfname = ref "" - -let marshall_raw (raw_results : Testtypes.result list) = - if !rawfname <> "" then ( - let oc = open_out !rawfname in - Printf.fprintf oc "%s" (Testtypes.to_string raw_results) ; - close_out oc - ) - -let marshall pool metadata results = - marshall_raw results ; - marshall_xenrt pool metadata results - -let string_of_set l = Printf.sprintf "{%s}" (String.concat ", " l) - -let get_metadata rpc session_id = - let pool = List.hd (Client.Pool.get_all ~rpc ~session_id) in - let master = Client.Pool.get_master ~rpc ~session_id ~self:pool in - let sv = Client.Host.get_software_version ~rpc ~session_id ~self:master in - sv - -let _ = - let template_name = ref "sdk-gold" in - let key = ref "" in - let scenario = ref "xendesktop" in - let ipbase = ref 0 in - let mode = ref "" in - let run_all = ref false in - let iter = ref 1 in - let possible_modes = ["initpool"; "destroypool"; "run"; "describe"] in - Arg.parse - (Arg.align - [ - ( "-template" - , Arg.Set_string template_name - , Printf.sprintf " Clone VMs from named base template (default is %s)" - !template_name - ) - ; ( "-scenario" - , Arg.Set_string scenario - , Printf.sprintf - " Choose scenario (default is %s; possibilities are %s" !scenario - (string_of_set (Scenario.get_all ())) - ) - ; ("-key", Arg.Set_string key, " Key name to identify the Pool instance") - ; ( "-ipbase" - , Arg.Set_int ipbase - , Printf.sprintf - " Choose base IP address (default is %d for 192.168.%d.1)" !ipbase - !ipbase - ) - ; ( "-xenrtoutput" - , Arg.Set_string xenrtfname - , " Set output filename for xenrt (defaults to perftest-xenrt.log)" - ) - ; ( "-rawoutput" - , Arg.Set_string rawfname - , " Set output filename for raw results (by default, do not output \ - results)" - ) - ; ( "-runall" - , Arg.Set run_all - , Printf.sprintf " Run tests %s (tests run by default are %s)" - (string_of_set Tests.testnames) - (string_of_set Tests.runtestnames) - ) - ; ( "-iter" - , Arg.Set_int iter - , Printf.sprintf " Number of iterations (default is %i)" !iter - ) - ] - ) - (fun x -> - if !mode = "" then - mode := x - else - debug ~out:stderr "Ignoring unexpected argument: %s" x - ) - (Printf.sprintf - "Configure and run a simulated test\nUsage: %s -key %s" - Sys.argv.(0) - (string_of_set possible_modes) - ) ; - if not (List.mem !mode possible_modes) then ( - debug ~out:stderr "Unknown mode: \"%s\" (possibilities are %s)" !mode - (string_of_set possible_modes) ; - exit 1 - ) ; - if not (List.mem !scenario (Scenario.get_all ())) then ( - debug ~out:stderr "Unknown scenario: \"%s\" (possibilities are %s)" - !scenario - (string_of_set (Scenario.get_all ())) ; - exit 1 - ) ; - if !key = "" then ( - debug ~out:stderr "Must set a -key to identify the Pool instance" ; - exit 1 - ) ; - try - match !mode with - | "describe" -> - let lines = Createpool.describe_pool !template_name !scenario !key in - List.iter (fun x -> debug "* %s" x) lines - | _ -> - let session = - Client.Session.login_with_password ~rpc ~uname:"root" ~pwd:"xenroot" - ~version:"1.2" ~originator:"perftest" - in - let (_ : API.string_to_string_map) = get_metadata rpc session in - let open Xapi_stdext_pervasives in - Pervasiveext.finally - (fun () -> - let pool = Scenario.get !scenario in - match !mode with - | "initpool" when pool.Scenario.sdk -> - Createpool.create_sdk_pool session !template_name !scenario !key - !ipbase - | "initpool" -> - Createpool.create_pool session !template_name !scenario !key - !ipbase - | "destroypool" when pool.Scenario.sdk -> - Createpool.destroy_sdk_pool session !template_name !key - | "destroypool" -> - debug ~out:stderr "Not yet implemented ... " - | "run" -> - let newrpc = - if pool.Scenario.sdk then - remoterpc (Printf.sprintf "192.168.%d.1" !ipbase) - else - rpc - in - let session = - if pool.Scenario.sdk then - Client.Session.login_with_password ~rpc:newrpc ~uname:"root" - ~pwd:"xensource" ~version:"1.2" ~originator:"perftest" - else - session - in - Pervasiveext.finally - (fun () -> - marshall pool - (get_metadata newrpc session) - (Tests.run newrpc session !key !run_all !iter) - ) - (fun () -> - if pool.Scenario.sdk then - Client.Session.logout ~rpc:newrpc ~session_id:session - ) - | _ -> - failwith (Printf.sprintf "unknown mode: %s" !mode) - ) - (fun () -> Client.Session.logout ~rpc ~session_id:session) - with Api_errors.Server_error (code, params) -> - debug ~out:stderr "Caught API error: %s [ %s ]" code - (String.concat "; " params) diff --git a/ocaml/perftest/perfutil.ml b/ocaml/perftest/perfutil.ml deleted file mode 100644 index f1ebe69c93b..00000000000 --- a/ocaml/perftest/perfutil.ml +++ /dev/null @@ -1,101 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(* Utilities for performance monitor *) - -open Client - -let rpc xml = - let open Xmlrpc_client in - XMLRPC_protocol.rpc ~srcstr:"perftest" ~dststr:"xapi" - ~transport:(Unix (Filename.concat "/var/lib/xcp" "xapi")) - ~http:(xmlrpc ~version:"1.0" "/") - xml - -let remoterpc host xml = - let open Xmlrpc_client in - XMLRPC_protocol.rpc ~srcstr:"perftest" ~dststr:"remotexapi" - ~transport:(SSL (SSL.make ~verify_cert:None (), host, 443)) - ~http:(xmlrpc ~version:"1.1" "/") - xml - -(* Rewrite the provisioning XML fragment to create all disks on a new, specified SR. This is cut-n-pasted from cli_util.ml *) -let rewrite_provisioning_xml rpc session_id new_vm sr_uuid = - let rewrite_xml xml newsrname = - let rewrite_disk = function - | Xml.Element ("disk", params, []) -> - Xml.Element - ( "disk" - , List.map - (fun (x, y) -> if x <> "sr" then (x, y) else ("sr", newsrname)) - params - , [] - ) - | x -> - x - in - match xml with - | Xml.Element ("provision", [], disks) -> - Xml.Element ("provision", [], List.map rewrite_disk disks) - | x -> - x - in - let other_config = Client.VM.get_other_config ~rpc ~session_id ~self:new_vm in - if List.mem_assoc "disks" other_config then ( - let xml = Xml.parse_string (List.assoc "disks" other_config) in - Client.VM.remove_from_other_config ~rpc ~session_id ~self:new_vm - ~key:"disks" ; - let newdisks = rewrite_xml xml sr_uuid in - Client.VM.add_to_other_config ~rpc ~session_id ~self:new_vm ~key:"disks" - ~value:(Xml.to_string newdisks) - ) - -let parse_sr_probe_for_iqn (xml : string) : string list = - match Xml.parse_string xml with - | Xml.Element ("iscsi-target-iqns", _, children) -> - let parse_tgts = function - | Xml.Element ("TGT", _, children) -> - let parse_kv = function - | Xml.Element (key, _, [Xml.PCData v]) -> - (key, String.trim v) - | _ -> - failwith "Malformed key/value pair" - in - let all = List.map parse_kv children in - List.assoc "TargetIQN" all - | _ -> - failwith "Malformed or missing " - in - List.map parse_tgts children - | _ -> - failwith "Missing element" - -let parse_sr_probe_for_scsiids (xml : string) : string list = - match Xml.parse_string xml with - | Xml.Element ("iscsi-target", _, children) -> - let parse_luns = function - | Xml.Element ("LUN", _, children) -> - let parse_kv = function - | Xml.Element (key, _, [Xml.PCData v]) -> - (key, String.trim v) - | _ -> - failwith "Malformed key/value pair" - in - let all = List.map parse_kv children in - List.assoc "SCSIid" all - | _ -> - failwith "Malformed or missing " - in - List.map parse_luns children - | _ -> - failwith "Missing element" diff --git a/ocaml/perftest/scenario.ml b/ocaml/perftest/scenario.ml deleted file mode 100644 index 0db7210a044..00000000000 --- a/ocaml/perftest/scenario.ml +++ /dev/null @@ -1,157 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(* VMs *) -type vm = {vbds: int; vifs: int; tag: string; num: int; has_affinity: bool} - -let default_vm num = - {vbds= 1; vifs= 4; tag= "everything"; num; has_affinity= true} - -let string_of_vm (x : vm) = - let vbds = - Printf.sprintf "%s VBDs" (if x.vbds = 0 then "no" else string_of_int x.vbds) - in - let vifs = - Printf.sprintf "%s VIFs" (if x.vifs = 0 then "no" else string_of_int x.vifs) - in - Printf.sprintf "%d VMs per host (tag %s) with %s, %s and affinity%s set" x.num - x.tag vbds vifs - (if x.has_affinity then "" else " not") - -(* Pools *) -type pool = { - id: string - ; sdk: bool - ; hosts: int - ; interfaces_per_host: int - ; vms: vm list - ; bonds: int - ; (* Needs to be less than or equal to interfaces_per_host / 2 *) - key: string - ; ipbase: int - ; iscsi_luns: int - ; use_shared_storage: bool -} - -let default = - { - id= "default" - ; sdk= true - ; hosts= 1 - ; interfaces_per_host= 6 - ; vms= - [ - default_vm 20 - ; {(default_vm 20) with vifs= 0; tag= "novifs"} - ; {(default_vm 20) with vbds= 0; tag= "novbds"} - ; {(default_vm 20) with vifs= 0; vbds= 0; tag= "novbdsnovifs"} - ] - ; bonds= 2 - ; key= "" - ; ipbase= 0 - ; iscsi_luns= 1 - ; use_shared_storage= false - } - -let description_of_pool (x : pool) = - [ - Printf.sprintf "Scenario: %s" x.id - ; Printf.sprintf "Key: %s" x.key - ; Printf.sprintf - "%d hosts, each with %d network interfaces, %d of which are paired into \ - %d bonds" - x.hosts x.interfaces_per_host (x.bonds * 2) x.bonds - ] - @ List.map string_of_vm x.vms - -let pools = - [ - {default with id= "pool0"; hosts= 1} - ; {default with id= "pool1"; hosts= 4} - ; {default with id= "pool2"; hosts= 16} - ; {default with id= "pool3"; hosts= 48} - ; { - default with - id= "real1" - ; hosts= 1 - ; sdk= false - ; bonds= 0 - ; interfaces_per_host= 0 - ; vms= [{(default_vm 50) with tag= ""}] - } - ; { - default with - id= "xendesktop" - ; hosts= 8 - ; vms= - [ - { - (default_vm 50) with - vbds= 0 - ; vifs= 1 - ; tag= "xendesktop" - ; has_affinity= false - } - ] - } - ; { - default with - id= "empty" - ; hosts= 1 - ; (* we won't be starting VMs in the clone test so we don't need any hosts *) - vms= [{(default_vm 1) with tag= "winxp-gold"; vifs= 1; vbds= 1}] - ; (* 1 per host *) - iscsi_luns= 6 - ; use_shared_storage= true - } - ] - -let get_all () = List.map (fun p -> p.id) pools - -let get name = List.find (fun p -> p.id = name) pools - -let xml_of_scenario s = - String.concat "\n" - ([ - "" - ; Printf.sprintf " %s" s.id - ; Printf.sprintf " %s" s.key - ; Printf.sprintf " %b" s.sdk - ; Printf.sprintf " %d" s.hosts - ; Printf.sprintf " %d" - s.interfaces_per_host - ; Printf.sprintf " " - ] - @ List.map - (fun vm -> - Printf.sprintf - " " - vm.vbds vm.vifs vm.tag vm.num vm.has_affinity - ) - s.vms - @ [ - " " - ; Printf.sprintf " %d" s.bonds - ; Printf.sprintf " %d" s.ipbase - ; "" - ] - ) - -let oc_key = "perftestsetup" - -let sr_disk_size = Int64.mul 1048576L 2093049L - -(* limit of 1 vhd ~2 terabytes (megs, gigs, t.. what?) *) - -let sr_disk_device = "xvde" diff --git a/ocaml/perftest/statistics.ml b/ocaml/perftest/statistics.ml deleted file mode 100644 index 49c5bc29aa8..00000000000 --- a/ocaml/perftest/statistics.ml +++ /dev/null @@ -1,155 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(** Useful stats-related functions for plotting graphs and analysing the results of perftest *) - -let pi = atan 1. *. 4. - -let gaussian mu sigma x = - 1.0 - /. (sigma *. sqrt (2.0 *. pi)) - *. exp (-.((x -. mu) ** 2.0) /. (2.0 *. sigma *. sigma)) - -module Hist = struct - type t = { - bin_start: float array - ; bin_end: float array - ; bin_count: float array - (* height of each bin: multiply by width to get area *) - } - - (** Initialise a histogram covering values from [min:max] in 'n' uniform steps *) - let make (min : float) (max : float) (n : int) = - let range = max -. min in - { - bin_start= - Array.init n (fun i -> - (range /. float_of_int n *. float_of_int i) +. min - ) - ; bin_end= - Array.init n (fun i -> - (range /. float_of_int n *. float_of_int (i + 1)) +. min - ) - ; bin_count= Array.init n (fun _ -> 0.) - } - - let integrate (x : t) = - let n = Array.length x.bin_start in - let result = - make x.bin_start.(0) x.bin_end.(Array.length x.bin_end - 1) n - in - let area = ref 0. in - for i = 0 to Array.length x.bin_start - 1 do - assert (x.bin_start.(i) = result.bin_start.(i)) ; - let width = x.bin_end.(i) -. x.bin_start.(i) in - area := !area +. (x.bin_count.(i) *. width) ; - result.bin_count.(i) <- !area - done ; - result - - (** Call 'f' with the start, end and height of each bin *) - let iter (x : t) (f : float -> float -> float -> unit) = - for i = 0 to Array.length x.bin_start - 1 do - let width = x.bin_end.(i) -. x.bin_start.(i) in - f x.bin_start.(i) x.bin_end.(i) (x.bin_count.(i) /. width) - done - - (** Fold 'f' over the bins calling it with 'bin_start' 'bin_end' 'height' and 'acc' *) - let fold (x : t) (f : float -> float -> float -> 'a -> 'a) (init : 'a) = - let acc = ref init in - iter x (fun bin_start bin_end height -> - acc := f bin_start bin_end height !acc - ) ; - !acc - - (** Write output to a file descriptor in gnuplot format *) - let to_gnuplot (x : t) (fd : Unix.file_descr) = - iter x (fun bin_start bin_end height -> - let center = (bin_start +. bin_end) /. 2.0 in - let line = Printf.sprintf "%f %f\n" center height |> Bytes.of_string in - let (_ : int) = Unix.write fd line 0 (Bytes.length line) in - () - ) - - exception Stop - - (** Add a sample point *) - let add (x : t) (y : float) = - try - for i = 0 to Array.length x.bin_start - 1 do - if x.bin_start.(i) <= y && y <= x.bin_end.(i + 1) then ( - x.bin_count.(i) <- x.bin_count.(i) +. 1.0 ; - raise Stop - ) - done - with Stop -> () - - (** Evaluate 'f' given the center of each bin and add the result to the bin count *) - let convolve (x : t) (f : float -> float) = - for i = 0 to Array.length x.bin_start - 1 do - let center = (x.bin_start.(i) +. x.bin_end.(i)) /. 2.0 in - let width = x.bin_end.(i) -. x.bin_start.(i) in - let result = f center in - x.bin_count.(i) <- x.bin_count.(i) +. (result *. width) - done - - (** Given a monotonically increasing histogram find the 'x' value given a 'y' *) - let find_x (x : t) (y : float) = - match - fold x - (fun bin_start bin_end height acc -> - match acc with - | Some _ -> - acc (* got it already *) - | None -> - if height > y then - Some ((bin_start +. bin_end) /. 2.) (* no interpolation *) - else - None - ) - None - with - | Some x -> - x - | None -> - raise Not_found -end - -module Normal = struct - let mean (points : float list) = - List.fold_left ( +. ) 0. points /. float_of_int (List.length points) - - let sigma (points : float list) = - let sum_x = List.fold_left ( +. ) 0. points - and sum_xx = List.fold_left ( +. ) 0. (List.map (fun x -> x *. x) points) in - let n = float_of_int (List.length points) in - sqrt ((n *. sum_xx) -. (sum_x *. sum_x)) /. n -end - -module LogNormal = struct - let mean (points : float list) = - let points = List.map log points in - let normal_sigma = Normal.sigma points in - let normal_mean = Normal.mean points in - exp (normal_mean +. (normal_sigma *. normal_sigma /. 2.)) - - let sigma (points : float list) = - let points = List.map log points in - let normal_sigma = Normal.sigma points in - let normal_mean = Normal.mean points in - let v = - (exp (normal_sigma *. normal_sigma) -. 1.) - *. exp ((2. *. normal_mean) +. (normal_sigma *. normal_sigma)) - in - sqrt v -end diff --git a/ocaml/perftest/tests.ml b/ocaml/perftest/tests.ml deleted file mode 100644 index 731d0fa1200..00000000000 --- a/ocaml/perftest/tests.ml +++ /dev/null @@ -1,493 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(* Tests *) - -open Client -open Xapi_stdext_std -open Xapi_stdext_pervasives.Pervasiveext -open Testtypes -open Perfdebug - -let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute - -type test = { - run: bool - ; key: string - ; testname: string - ; func: (Rpc.call -> Rpc.response) -> API.ref_session -> test -> result list -} - -let time f = - let now = Unix.gettimeofday () in - f () ; - let elapsed = Unix.gettimeofday () -. now in - elapsed - -let subtest_string key tag = - if tag = "" then - key - else - Printf.sprintf "%s (%s)" key tag - -let startall rpc session_id test = - let vms = Client.VM.get_all_records ~rpc ~session_id in - let tags = List.map (fun (_, vmr) -> vmr.API.vM_tags) vms in - let tags = Listext.List.setify (List.concat tags) in - List.map - (fun tag -> - debug "Starting VMs with tag: %s" tag ; - let vms = - List.filter (fun (_, vmr) -> List.mem tag vmr.API.vM_tags) vms - in - let vms = - List.sort - (fun (_, vmr1) (_, vmr2) -> - compare vmr1.API.vM_affinity vmr2.API.vM_affinity - ) - vms - in - let vms_names_uuids = - List.map - (fun (vm, vmr) -> (vm, vmr.API.vM_name_label, vmr.API.vM_uuid)) - vms - in - let times = - List.map - (fun (vm, name_label, uuid) -> - debug "Starting VM uuid '%s' (%s)" uuid name_label ; - let result = - time (fun () -> - Client.VM.start ~rpc ~session_id ~vm ~start_paused:false - ~force:false - ) - in - debug "Elapsed time: %f" result ; - result - ) - vms_names_uuids - in - { - resultname= test.testname - ; subtest= subtest_string test.key tag - ; xenrtresult= List.fold_left ( +. ) 0.0 times - ; rawresult= StartTest times - } - ) - tags - -let parallel_with_vms async_op opname n vms rpc session_id test subtest_name = - (* Not starting in affinity order *) - let vms_names_uuids = - List.map (fun (vm, vmr) -> (vm, vmr.API.vM_name_label, vmr.API.vM_uuid)) vms - in - - (* Manage a set of active tasks using the event system. This could be factored out into a more generic - service if/when necessary *) - - (* Start 'n' at a time *) - let active_tasks = ref [] in - let vms_to_start = ref vms_names_uuids in - let vm_to_start_time = Hashtbl.create 10 in - let tasks_to_vm = Hashtbl.create 10 in - let m = Mutex.create () in - let c = Condition.create () in - let results = ref [] in - (* Take a set of tasks which have finished, update structures and return true if there are no more active tasks - left. *) - let process_finished_tasks finished = - let to_delete = ref [] in - let finished = - with_lock m (fun () -> - List.iter - (fun task -> - if List.mem task !active_tasks then ( - ( match Hashtbl.find_opt tasks_to_vm task with - | None -> - debug ~out:stderr - "Ignoring completed task which doesn't correspond to a \ - VM %s" - opname - | Some uuid -> - let started = Hashtbl.find vm_to_start_time uuid in - let time_taken = Unix.gettimeofday () -. started in - results := time_taken :: !results ; - debug "%sing VM uuid '%s'" opname uuid ; - debug "Elapsed time: %f" time_taken ; - Hashtbl.remove vm_to_start_time uuid ; - Hashtbl.remove tasks_to_vm task - ) ; - active_tasks := List.filter (fun x -> x <> task) !active_tasks ; - Condition.signal c ; - to_delete := task :: !to_delete - ) - ) - finished ; - !active_tasks = [] (* true if no active tasks left *) - ) - in - List.iter - (fun task -> Client.Task.destroy ~rpc ~session_id ~self:task) - !to_delete ; - finished - in - (* Run this in a thread body to create a thread which will process each task completion and then terminate when all the - tasks have finished. *) - let check_active_tasks () = - let classes = ["task"] in - finally - (fun () -> - let finished = ref false in - while not !finished do - Client.Event.register ~rpc ~session_id ~classes ; - try - (* Need to check once after registering to avoid a race *) - let finished_tasks = - List.filter - (fun task -> - Client.Task.get_status ~rpc ~session_id ~self:task <> `pending - ) - (with_lock m (fun () -> !active_tasks)) - in - finished := process_finished_tasks finished_tasks ; - while not !finished do - (* debug ~out:stderr "Polling for events (%d active tasks)" (with_lock m (fun () -> List.length !active_tasks)); *) - let events = - Event_types.events_of_rpc (Client.Event.next ~rpc ~session_id) - in - let events = List.map Event_helper.record_of_event events in - let finished_tasks = - List.concat_map - (function - | Event_helper.Task (t, Some t_rec) -> - if - t_rec.API.task_status <> `pending - || t_rec.API.task_current_operations <> [] - then - [t] - else - [] - | Event_helper.Task (t, None) -> - [t] - | _ -> - [] - ) - events - in - - finished := process_finished_tasks finished_tasks - done - with - | Api_errors.Server_error (code, _) - when code = Api_errors.events_lost - -> - debug ~out:stderr "Caught EVENTS_LOST; reregistering" ; - Client.Event.unregister ~rpc ~session_id ~classes - done - ) - (fun () -> Client.Event.unregister ~rpc ~session_id ~classes) - in - let control_task = - Client.Task.create ~rpc ~session_id - ~label:("Parallel VM " ^ opname ^ " test") - ~description:"" - in - active_tasks := [control_task] ; - let thread = Thread.create check_active_tasks () in - while !vms_to_start <> [] do - let start_one () = - let vm, _, uuid = List.hd !vms_to_start in - vms_to_start := List.tl !vms_to_start ; - with_lock m (fun () -> - let task = async_op ~rpc ~session_id ~vm in - debug ~out:stderr "Issued VM %s for '%s'" opname uuid ; - Hashtbl.add tasks_to_vm task uuid ; - Hashtbl.add vm_to_start_time uuid (Unix.gettimeofday ()) ; - active_tasks := task :: !active_tasks - ) - in - (* Only start at most 'n' at once. Note that the active_task list includes a master control task *) - with_lock m (fun () -> - while List.length !active_tasks > n do - Condition.wait c m - done - ) ; - start_one () - done ; - Client.Task.cancel ~rpc ~session_id ~task:control_task ; - debug ~out:stderr "Finished %sing VMs" opname ; - Thread.join thread ; - { - resultname= test.testname - ; subtest= subtest_name - ; xenrtresult= List.fold_left ( +. ) 0.0 !results - ; rawresult= StartTest !results - } - -(** @param n the maximum number of concurrent invocations of async_op *) -let parallel async_op opname n rpc session_id test = - let vms = Client.VM.get_all_records ~rpc ~session_id in - let tags = List.map (fun (_, vmr) -> vmr.API.vM_tags) vms in - let tags = Listext.List.setify (List.concat tags) in - Printf.printf "Tags are [%s]\n%!" (String.concat "; " tags) ; - List.map - (fun tag -> - let vms = - List.filter (fun (_, vmr) -> List.mem tag vmr.API.vM_tags) vms - in - Printf.printf "%sing %d VMs with tag: %s\n%!" opname (List.length vms) tag ; - parallel_with_vms async_op opname n vms rpc session_id test - (subtest_string test.key tag) - ) - tags - -let parallel_startall = - parallel (Client.Async.VM.start ~start_paused:false ~force:false) "start" - -let parallel_stopall = parallel Client.Async.VM.hard_shutdown "stop" - -let stopall rpc session_id test = - let vms = Client.VM.get_all_records ~rpc ~session_id in - let tags = List.map (fun (_, vmr) -> vmr.API.vM_tags) vms in - let tags = Listext.List.setify (List.concat tags) in - List.map - (fun tag -> - debug "Starting VMs with tag: %s" tag ; - let vms = - List.filter (fun (_, vmr) -> List.mem tag vmr.API.vM_tags) vms - in - let vms = - List.sort - (fun (_, vmr1) (_, vmr2) -> - compare vmr1.API.vM_affinity vmr2.API.vM_affinity - ) - vms - in - let vms_names_uuids = - List.map - (fun (vm, vmr) -> (vm, vmr.API.vM_name_label, vmr.API.vM_uuid)) - vms - in - let times = - List.map - (fun (vm, name_label, uuid) -> - debug "Stopping VM uuid '%s' (%s)" uuid name_label ; - let result = - time (fun () -> Client.VM.hard_shutdown ~rpc ~session_id ~vm) - in - debug "Elapsed time: %f" result ; - result - ) - vms_names_uuids - in - { - resultname= test.testname - ; subtest= subtest_string test.key tag - ; xenrtresult= List.fold_left ( +. ) 0.0 times - ; rawresult= ShutdownTest times - } - ) - tags - -let clone num_clones rpc session_id test = - Printf.printf "Doing clone test\n%!" ; - let vms = Client.VM.get_all_records ~rpc ~session_id in - let tags = List.map (fun (_, vmr) -> vmr.API.vM_tags) vms in - let tags = Listext.List.setify (List.concat tags) in - Printf.printf "Tags are [%s]\n%!" (String.concat "; " tags) ; - List.concat_map - (fun tag -> - let vms = - List.filter (fun (_, vmr) -> List.mem tag vmr.API.vM_tags) vms - in - Printf.printf "We've got %d VMs\n%!" (List.length vms) ; - (* Start a thread to clone each one n times *) - let body (vm, vmr, res, clone_refs) = - let name_label = vmr.API.vM_name_label in - Printf.printf "Performing %d clones of '%s' within thread...\n%!" - num_clones name_label ; - for j = 0 to num_clones - 1 do - let result = - time (fun () -> - let clone = - Client.VM.clone ~rpc ~session_id ~vm ~new_name:"clone" - in - clone_refs := clone :: !clone_refs - ) - in - Printf.printf "clone %d of '%s' finished: %f\n%!" j name_label result ; - res := result :: !res - done - in - let threads_and_results = - List.map - (fun (vm, vmr) -> - let res : float list ref = ref [] in - let clones : API.ref_VM list ref = ref [] in - let t = Thread.create body (vm, vmr, res, clones) in - (t, (res, clones)) - ) - vms - in - let threads, times_and_clones = List.split threads_and_results in - let times, clones = List.split times_and_clones in - Printf.printf "Waiting for threads to finish...\n%!" ; - List.iter (fun t -> Thread.join t) threads ; - Printf.printf "Threads have finished\n%!" ; - (* times is a list of (list of floats, each being the time to clone a VM), one per SR *) - let times = List.map (fun x -> !x) times in - Printf.printf "Times are: [%s]\n%!" - (String.concat ", " - (List.map - (fun x -> - Printf.sprintf "[%s]" - (String.concat ", " - (List.map (fun x -> Printf.sprintf "%f" x) x) - ) - ) - times - ) - ) ; - let clones = List.map (fun x -> !x) clones in - (* Output the results for cloning each gold VM as a separate record *) - let results = - List.map - (fun x -> - { - resultname= test.testname - ; subtest= subtest_string test.key tag - ; xenrtresult= List.fold_left ( +. ) 0.0 (List.concat times) - ; rawresult= CloneTest x - } - ) - times - in - (* Best-effort clean-up *) - ignore_exn (fun () -> - Printf.printf "Cleaning up...\n%!" ; - (* Create a thread to clean up each set of clones *) - let threads = - List.mapi - (fun i clones -> - Thread.create - (fun clones -> - List.iteri - (fun j clone -> - Printf.printf "Thread %d destroying VM %d...\n%!" i j ; - let vbds = - Client.VM.get_VBDs ~rpc ~session_id ~self:clone - in - let vdis = - List.map - (fun vbd -> - Client.VBD.get_VDI ~rpc ~session_id ~self:vbd - ) - vbds - in - List.iter - (fun vdi -> - Client.VDI.destroy ~rpc ~session_id ~self:vdi - ) - vdis ; - Client.VM.destroy ~rpc ~session_id ~self:clone - ) - clones - ) - clones - ) - clones - in - Printf.printf "Waiting for clean-up threads to finish...\n%!" ; - List.iter (fun t -> Thread.join t) threads ; - Printf.printf "Clean-up threads have finished\n%!" - ) ; - (* Finally, return the results *) - results - ) - tags - -let recordssize rpc session_id test = - let doxmlrpctest (subtestname, testfn) = - testfn () ; - let res = Int64.to_float !Http_client.last_content_length in - { - resultname= test.testname - ; subtest= subtestname - ; xenrtresult= res - ; rawresult= SizeTest res - } - in - List.map doxmlrpctest - [ - ( "VM records" - , fun () -> ignore (Client.VM.get_all_records ~rpc ~session_id) - ) - ; ( "VBD records" - , fun () -> ignore (Client.VBD.get_all_records ~rpc ~session_id) - ) - ; ( "VIF records" - , fun () -> ignore (Client.VIF.get_all_records ~rpc ~session_id) - ) - ; ( "VDI records" - , fun () -> ignore (Client.VDI.get_all_records ~rpc ~session_id) - ) - ; ( "SR records" - , fun () -> ignore (Client.SR.get_all_records ~rpc ~session_id) - ) - ] - -let tests key = - [ - {run= true; key; testname= "clone"; func= clone 200} - ; {run= true; key; testname= "startall"; func= startall} - ; {run= true; key; testname= "recordssize"; func= recordssize} - ; {run= true; key; testname= "stopall"; func= stopall} - ; {run= false; key; testname= "parallel_startall"; func= parallel_startall 10} - ; {run= false; key; testname= "parallel_stopall"; func= parallel_stopall 10} - ] - -let testnames = List.map (fun t -> t.testname) (tests "") - -let runtestnames = - List.map (fun t -> t.testname) (List.filter (fun t -> t.run) (tests "")) - -let runone rpc session_id test = - debug "Running test: %s" test.testname ; - let results = test.func rpc session_id test in - debug "Finished: Results=[%s]" - (String.concat "; " - (List.map - (fun result -> - Printf.sprintf "subtest '%s': %f" result.subtest result.xenrtresult - ) - results - ) - ) ; - results - -let run rpc session_id key run_all iter = - let tests = - if run_all then - tests key - else - List.filter (fun t -> t.run) (tests key) - in - let rec iter_tests n = - if n = 1 then - tests - else - tests @ iter_tests (n - 1) - in - List.fold_left - (fun acc test -> runone rpc session_id test @ acc) - [] (iter_tests iter) diff --git a/ocaml/perftest/testtypes.ml b/ocaml/perftest/testtypes.ml deleted file mode 100644 index 4635c11b898..00000000000 --- a/ocaml/perftest/testtypes.ml +++ /dev/null @@ -1,49 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -(* Test results *) - -type resultdata = - | StartTest of float list - | SizeTest of float - | ShutdownTest of float list - | CloneTest of float list - -(* one float list per gold VM cloned *) - -type result = { - resultname: string - ; subtest: string - ; xenrtresult: float - ; rawresult: resultdata (* Specific to the actual test *) -} - -let header = "RAW" - -let sep = ':' - -let to_string (results : result list) = - Printf.sprintf "%s%c%s" header sep - (Marshal.to_string results [Marshal.No_sharing]) - -let from_string s : result list option = - let open Xapi_stdext_std.Xstringext.String in - if startswith header s then - match split ~limit:2 sep s with - | [_; r] -> - Some (Marshal.from_string r 0) - | _ -> - None - else - None From 2b02240c32dc9e2723d44b9d58bd8f624c464c52 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 4 Dec 2024 11:30:23 +0000 Subject: [PATCH 111/121] Remove references to perftest Signed-off-by: Rob Hoes --- ocaml/libs/http-lib/http_client.ml | 6 ------ ocaml/libs/http-lib/http_client.mli | 3 --- quality-gate.sh | 6 +++--- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/ocaml/libs/http-lib/http_client.ml b/ocaml/libs/http-lib/http_client.ml index 5cb67212bcc..fed8f5f7780 100644 --- a/ocaml/libs/http-lib/http_client.ml +++ b/ocaml/libs/http-lib/http_client.ml @@ -200,9 +200,6 @@ let response_of_fd ?(use_fastpath = false) fd = __FUNCTION__ (Printexc.to_string e) ; None -(** See perftest/tests.ml *) -let last_content_length = ref 0L - let http_rpc_recv_response use_fastpath error_msg fd = match response_of_fd ~use_fastpath fd with | None -> @@ -212,9 +209,6 @@ let http_rpc_recv_response use_fastpath error_msg fd = | ("401" | "403" | "500") as http_code -> raise (Http_error (http_code, error_msg)) | "200" -> - Option.iter - (fun x -> last_content_length := x) - response.Http.Response.content_length ; response | code -> raise (Http_request_rejected (Printf.sprintf "%s: %s" code error_msg)) diff --git a/ocaml/libs/http-lib/http_client.mli b/ocaml/libs/http-lib/http_client.mli index 68d65649e3c..3d9b6591d5f 100644 --- a/ocaml/libs/http-lib/http_client.mli +++ b/ocaml/libs/http-lib/http_client.mli @@ -40,6 +40,3 @@ val rpc : (** [rpc fd request body f] marshals the HTTP request represented by [request] and [body] through file descriptor [fd] and then applies the response to [f]. On failure an exception is thrown. *) - -val last_content_length : int64 ref -(** See perftest/tests.ml *) diff --git a/quality-gate.sh b/quality-gate.sh index b72ca099aa7..6255ce0575e 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -26,9 +26,9 @@ verify-cert () { mli-files () { N=497 - # do not count ml files from the tests in ocaml/{tests/perftest/quicktest} - MLIS=$(git ls-files -- '**/*.mli' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) - MLS=$(git ls-files -- '**/*.ml' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) + # do not count ml files from the tests in ocaml/{tests/quicktest} + MLIS=$(git ls-files -- '**/*.mli' | grep -vE "ocaml/tests|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) + MLS=$(git ls-files -- '**/*.ml' | grep -vE "ocaml/tests|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) num_mls_without_mlis=$(comm -23 <(sort <<<"$MLS") <(sort <<<"$MLIS") | wc -l) if [ "$num_mls_without_mlis" -eq "$N" ]; then echo "OK counted $num_mls_without_mlis .ml files without an .mli" From fd011dd8f1b3a82bf747cfe9f97f13cb1da414cf Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 4 Dec 2024 11:59:32 +0000 Subject: [PATCH 112/121] Update quality-gate Signed-off-by: Rob Hoes --- quality-gate.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/quality-gate.sh b/quality-gate.sh index 6255ce0575e..a7ffefea72b 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=293 + N=277 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" @@ -14,7 +14,7 @@ list-hd () { } verify-cert () { - N=14 + N=13 NONE=$(git grep -r --count 'verify_cert:None' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$NONE" -eq "$N" ]; then echo "OK counted $NONE usages of verify_cert:None" @@ -106,7 +106,7 @@ unixgetenv () { } hashtblfind () { - N=36 + N=35 # Looks for all .ml files except the ones using Core.Hashtbl.find, # which already returns Option HASHTBLFIND=$(git grep -P -r --count 'Hashtbl.find(?!_opt)' -- '**/*.ml' ':!ocaml/xapi-storage-script/main.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) From 03fccc872ff6d894012b1454382dccad44d5e1e6 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 4 Dec 2024 16:28:34 +0000 Subject: [PATCH 113/121] CA-401075: remove misleading logs from HTTP client Recent changes to Http_client were made to log errors and backtraces in case of unusual errors, in order to better diagnose problems. Unfortunately, this also led to benign exceptions being logged, which caused people to suspect problems where the exception was actually "expected". In particular the following error started appearing in the logs many times: Raised Http_client.Parse_error("Expected initial header []") The case here is that the client has sent an HTTP request, but the server disconnects before sending a response. It turns out that this happens commonly when an external connection is received by xapi and handed over to another process, such as xcp-rrdd. The connection's file descriptor is passed to the other process, and the original HTTP request is forwarded over a local socket. The other process continues to handle the request and sends responses to the forwarded socket, but never to the local socket, where xapi is waiting for the response. This is a quick change that makes the caller of the receive function aware that no response was sent, without logging scary errors, to fix the immediate problem. In addition, we should enhance the handover process to actually send responses, although funcionally this is not an issue. Signed-off-by: Rob Hoes --- ocaml/libs/http-lib/http_client.ml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ocaml/libs/http-lib/http_client.ml b/ocaml/libs/http-lib/http_client.ml index 5cb67212bcc..6f1831fb855 100644 --- a/ocaml/libs/http-lib/http_client.ml +++ b/ocaml/libs/http-lib/http_client.ml @@ -119,6 +119,8 @@ let response_of_fd_exn_slow fd = ; additional_headers= !headers ; body= None } + | [] -> + raise End_of_file | _ -> error "Failed to parse HTTP response status line [%s]" line ; raise (Parse_error (Printf.sprintf "Expected initial header [%s]" line)) @@ -192,6 +194,9 @@ let response_of_fd ?(use_fastpath = false) fd = with | Unix.Unix_error (_, _, _) as e -> raise e + | End_of_file -> + info "No response: connection closed by server" ; + None | e -> Backtrace.is_important e ; let bt = Backtrace.get e in From efff0953c92bca3e641607d7ffc6c04788cdd84b Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Wed, 4 Dec 2024 17:10:54 +0000 Subject: [PATCH 114/121] CP-52807: No more cluster stack alert As per the new design, we are no longer telling user that their cluster stack version is out of date, since the corosync upgrade will be part of the compulsory actions enforced by XC upgrade wizard and the host installer. Signed-off-by: Vincent Liu --- ocaml/xapi-consts/api_messages.ml | 2 -- ocaml/xapi/xapi_clustering.ml | 59 ++----------------------------- 2 files changed, 3 insertions(+), 58 deletions(-) diff --git a/ocaml/xapi-consts/api_messages.ml b/ocaml/xapi-consts/api_messages.ml index 8e2ed38df7f..812340d1040 100644 --- a/ocaml/xapi-consts/api_messages.ml +++ b/ocaml/xapi-consts/api_messages.ml @@ -311,8 +311,6 @@ let cluster_host_leaving = addMessage "CLUSTER_HOST_LEAVING" 3L let cluster_host_joining = addMessage "CLUSTER_HOST_JOINING" 4L -let cluster_stack_out_of_date = addMessage "CLUSTER_STACK_OUT_OF_DATE" 3L - (* Certificate expiration messages *) let host_server_certificate_expiring = "HOST_SERVER_CERTIFICATE_EXPIRING" diff --git a/ocaml/xapi/xapi_clustering.ml b/ocaml/xapi/xapi_clustering.ml index d2b61be2f55..4bef40e3d4d 100644 --- a/ocaml/xapi/xapi_clustering.ml +++ b/ocaml/xapi/xapi_clustering.ml @@ -562,8 +562,6 @@ module Watcher = struct let finish_watch = Atomic.make false - let cluster_stack_watcher : bool Atomic.t = Atomic.make false - (* This function exists to store the fact that the watcher should be destroyed, to avoid the race that the cluster is destroyed, while the watcher is still waiting/stabilising. @@ -632,41 +630,6 @@ module Watcher = struct () done - let watch_cluster_stack_version ~__context ~host = - match find_cluster_host ~__context ~host with - | Some ch -> - let cluster_ref = Db.Cluster_host.get_cluster ~__context ~self:ch in - let cluster_rec = Db.Cluster.get_record ~__context ~self:cluster_ref in - if - Cluster_stack.of_version - ( cluster_rec.API.cluster_cluster_stack - , cluster_rec.API.cluster_cluster_stack_version - ) - = Cluster_stack.Corosync2 - then ( - debug "%s: Detected Corosync 2 running as cluster stack" __FUNCTION__ ; - let body = - "The current cluster stack version of Corosync 2 is out of date, \ - consider updating to Corosync 3" - in - let name, priority = Api_messages.cluster_stack_out_of_date in - let host_uuid = Db.Host.get_uuid ~__context ~self:host in - - Helpers.call_api_functions ~__context (fun rpc session_id -> - let _ : [> `message] Ref.t = - Client.Client.Message.create ~rpc ~session_id ~name ~priority - ~cls:`Host ~obj_uuid:host_uuid ~body - in - () - ) - ) else - debug - "%s: Detected Corosync 3 as cluster stack, not generating a \ - warning messsage" - __FUNCTION__ - | None -> - debug "%s: No cluster host, no need to watch" __FUNCTION__ - (** [create_as_necessary] will create cluster watchers on the coordinator if they are not already created. There is no need to destroy them: once the clustering daemon is disabled, @@ -674,7 +637,7 @@ module Watcher = struct let create_as_necessary ~__context ~host = let is_master = Helpers.is_pool_master ~__context ~host in let daemon_enabled = Daemon.is_enabled () in - if is_master && daemon_enabled then ( + if is_master && daemon_enabled then if Atomic.compare_and_set cluster_change_watcher false true then ( debug "%s: create watcher for corosync-notifyd on coordinator" __FUNCTION__ ; @@ -687,24 +650,8 @@ module Watcher = struct (* someone else must have gone into the if branch above and created the thread before us, leave it to them *) debug "%s: not create watcher for corosync-notifyd as it already exists" - __FUNCTION__ ; - - if Xapi_cluster_helpers.corosync3_enabled ~__context then - if Atomic.compare_and_set cluster_stack_watcher false true then ( - debug - "%s: create cluster stack watcher for out-of-date cluster stack \ - (corosync2)" - __FUNCTION__ ; - let _ : Thread.t = - Thread.create - (fun () -> watch_cluster_stack_version ~__context ~host) - () - in - () - ) else - debug "%s: not create watcher for cluster stack as it already exists" - __FUNCTION__ - ) else + __FUNCTION__ + else debug "%s not create watcher because we are %b master and clustering is \ enabled %b " From 92114cd8c51968aeb264da0412fba5929ce1fa37 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Sun, 1 Dec 2024 13:24:56 +0000 Subject: [PATCH 115/121] Rewrite Delay module Old implementation had different issues. Just use mutexes and conditions using C stubs. Current Ocaml Condition module does not have support for timeout waiting for conditions, so use C stubs. This implementation does not require opening a pipe. This reduces the possibilities of errors calling "wait" to zero. Mostly of the times it does not require kernel calls. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/delay_stubs.c | 169 ++++++++++++++++++ .../xapi-stdext/lib/xapi-stdext-threads/dune | 5 + .../lib/xapi-stdext-threads/threadext.ml | 76 ++------ .../lib/xapi-stdext-threads/threadext.mli | 6 +- 4 files changed, 195 insertions(+), 61 deletions(-) create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/delay_stubs.c diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/delay_stubs.c b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/delay_stubs.c new file mode 100644 index 00000000000..05138c263d9 --- /dev/null +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/delay_stubs.c @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2024 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include +#include +#include +#include + +#include +#include +#include + +typedef struct delay { + pthread_mutex_t mtx; + pthread_cond_t cond; + bool signaled; +} delay; + +// Initialize delay +// Returns error number or 0 if success +static int delay_init(delay *d) +{ + int err; + pthread_condattr_t cond_attr; + + d->signaled = false; + + err = pthread_condattr_init(&cond_attr); + if (err) + goto err0; + err = pthread_condattr_setclock(&cond_attr, CLOCK_MONOTONIC); + if (!err) + err = pthread_cond_init(&d->cond, &cond_attr); + if (err) + goto err1; + err = pthread_mutex_init(&d->mtx, NULL); + if (err) + goto err2; + pthread_condattr_destroy(&cond_attr); + return 0; + +err2: + pthread_cond_destroy(&d->cond); +err1: + pthread_condattr_destroy(&cond_attr); +err0: + return err; +} + +static void delay_destroy(delay *d) +{ + pthread_cond_destroy(&d->cond); + pthread_mutex_destroy(&d->mtx); +} + +static void delay_signal(delay *d) +{ + // there are quite some chances lock is not held + if (pthread_mutex_trylock(&d->mtx) == 0) { + d->signaled = true; + pthread_cond_signal(&d->cond); + pthread_mutex_unlock(&d->mtx); + return; + } + + // slow way, release engine + caml_release_runtime_system(); + pthread_mutex_lock(&d->mtx); + d->signaled = true; + pthread_cond_signal(&d->cond); + pthread_mutex_unlock(&d->mtx); + caml_acquire_runtime_system(); +} + +// Wait for deadline or signal. +// Returns error number or 0 if success. +// Error can be ETIMEDOUT. +int delay_wait(delay *d, const struct timespec *deadline) +{ + int err; + + caml_release_runtime_system(); + pthread_mutex_lock(&d->mtx); + do { + if (d->signaled) { + d->signaled = false; + err = 0; + break; + } + err = pthread_cond_timedwait(&d->cond, &d->mtx, deadline); + } while (err == 0); + pthread_mutex_unlock(&d->mtx); + caml_acquire_runtime_system(); + return err; +} + +#define delay_val(v) (*((delay **)Data_custom_val(v))) + +static void delay_finalize(value v_delay) +{ + delay *d = delay_val(v_delay); + delay_destroy(d); + caml_stat_free(d); +} + +static struct custom_operations delay_ops = { + "xapi.delay", + delay_finalize, + custom_compare_default, + custom_hash_default, + custom_serialize_default, + custom_deserialize_default, + custom_compare_ext_default, + custom_fixed_length_default +}; + +CAMLprim value caml_xapi_delay_create(value v_unit) +{ + CAMLparam1(v_unit); + CAMLlocal1(res); + delay *d; + int err; + + d = caml_stat_alloc(sizeof(*d)); + err = delay_init(d); + if (err) { + caml_stat_free(d); + unix_error(err, "caml_delay_create", Nothing); + } + res = caml_alloc_custom(&delay_ops, sizeof(delay *), 0, 1); + delay_val(res) = d; + CAMLreturn(res); +} + +CAMLprim value caml_xapi_delay_signal(value v_delay) +{ + CAMLparam1(v_delay); + delay *d = delay_val(v_delay); + delay_signal(d); + CAMLreturn(Val_unit); +} + +CAMLprim value caml_xapi_delay_wait(value v_delay, value v_deadline) +{ + CAMLparam2(v_delay, v_deadline); + delay *d = delay_val(v_delay); + uint64_t deadline = (uint64_t) Int64_val(v_deadline); + struct timespec ts = { + deadline / 1000000000u, + deadline % 1000000000u + }; + + int err = delay_wait(d, &ts); + if (err != 0 && err != ETIMEDOUT) + unix_error(err, "caml_delay_wait", Nothing); + + CAMLreturn(err ? Val_true : Val_false); +} diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index e0437e881ab..7fcff9e08c2 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -3,10 +3,15 @@ (name xapi_stdext_threads) (modules :standard \ ipq scheduler threadext_test ipq_test) (libraries + mtime + mtime.clock.os threads.posix unix xapi-stdext-unix xapi-stdext-pervasives) + (foreign_stubs + (language c) + (names delay_stubs)) ) (library diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml index 311d985ca69..b954a159ddb 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml @@ -55,69 +55,27 @@ let thread_iter f xs = match thread_iter_all_exns f xs with [] -> () | (_, e) :: _ -> raise e module Delay = struct - (* Concrete type is the ends of a pipe *) - type t = { - (* A pipe is used to wake up a thread blocked in wait: *) - mutable pipe_in: Unix.file_descr option - ; (* Indicates that a signal arrived before a wait: *) - mutable signalled: bool - ; m: M.t - } + type t - let make () = {pipe_in= None; signalled= false; m= M.create ()} + external make : unit -> t = "caml_xapi_delay_create" - exception Pre_signalled + external signal : t -> unit = "caml_xapi_delay_signal" - let wait (x : t) (seconds : float) = - let to_close = ref [] in - let close' fd = - if List.mem fd !to_close then Unix.close fd ; - to_close := List.filter (fun x -> fd <> x) !to_close - in - finally - (fun () -> - try - let pipe_out = - Mutex.execute x.m (fun () -> - if x.signalled then ( - x.signalled <- false ; - raise Pre_signalled - ) ; - let pipe_out, pipe_in = Unix.pipe () in - (* these will be unconditionally closed on exit *) - to_close := [pipe_out; pipe_in] ; - x.pipe_in <- Some pipe_in ; - x.signalled <- false ; - pipe_out - ) - in - let open Xapi_stdext_unix.Unixext in - (* flush the single byte from the pipe *) - try - let (_ : string) = - time_limited_single_read pipe_out 1 ~max_wait:seconds - in - false - with Timeout -> true - (* return true if we waited the full length of time, false if we were woken *) - with Pre_signalled -> false - ) - (fun () -> - Mutex.execute x.m (fun () -> - x.pipe_in <- None ; - List.iter close' !to_close - ) - ) + external wait : t -> int64 -> bool = "caml_xapi_delay_wait" - let signal (x : t) = - Mutex.execute x.m (fun () -> - match x.pipe_in with - | Some fd -> - ignore (Unix.write fd (Bytes.of_string "X") 0 1) - | None -> - x.signalled <- true - (* If the wait hasn't happened yet then store up the signal *) - ) + let wait d t = + if t <= 0. then + true + else + match Mtime.Span.of_float_ns (t *. 1e9) with + | Some span -> + let now = Mtime_clock.now () in + let deadline = + Mtime.add_span now span |> Option.value ~default:Mtime.max_stamp + in + wait d (Mtime.to_uint64_ns deadline) + | None -> + invalid_arg "Time specified too big" end let wait_timed_read fd timeout = diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli index b5edcff21b8..a1af35ccbeb 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli @@ -31,8 +31,10 @@ module Delay : sig val wait : t -> float -> bool (** Blocks the calling thread for a given period of time with the option of returning early if someone calls 'signal'. Returns true if the full time - period elapsed and false if signalled. Note that multple 'signals' are - coalesced; 'signals' sent before 'wait' is called are not lost. *) + period elapsed and false if signalled. Note that multiple 'signals' are + coalesced; 'signals' sent before 'wait' is called are not lost. + Only one thread should call 'wait' for a given 'Delay', attempts + to call from multiple thread is an undefined behaviour. *) val signal : t -> unit (** Sends a signal to a waiting thread. See 'wait' *) From d3c9a5010fa2a3746b68f6d10681b00965570e65 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 5 Dec 2024 13:57:59 +0000 Subject: [PATCH 116/121] CA-394851: Update allowed operations on the cloned VBD Default empty allowed_operations on a cloned VBD means that XenCenter does not display the DVD option in the console tab for VMs cloned from templates, for example. Follow the practice in xapi_vbd, and update_allowed_operations immediately after Db.VBD.create. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vbd_helpers.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xapi/xapi_vbd_helpers.ml b/ocaml/xapi/xapi_vbd_helpers.ml index f6b1cc260e7..a63fa6edf1f 100644 --- a/ocaml/xapi/xapi_vbd_helpers.ml +++ b/ocaml/xapi/xapi_vbd_helpers.ml @@ -443,4 +443,5 @@ let copy ~__context ?vdi ~vm vbd = ~qos_algorithm_type:all.API.vBD_qos_algorithm_type ~qos_algorithm_params:all.API.vBD_qos_algorithm_params ~qos_supported_algorithms:[] ~runtime_properties:[] ~metrics ; + update_allowed_operations ~__context ~self:new_vbd ; new_vbd From 1586c747cab62ce1cfb7796cbc01f02f5a2548f0 Mon Sep 17 00:00:00 2001 From: Lunfan Zhang Date: Mon, 25 Nov 2024 09:20:09 +0000 Subject: [PATCH 117/121] CP-51429 Avoid redundant processing when full metadata already exists during sync_updates Signed-off-by: Lunfan Zhang --- ocaml/xapi/repository.ml | 12 +++++++++++- ocaml/xapi/repository.mli | 2 +- ocaml/xapi/xapi_pool.ml | 6 +++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index dd123557a49..8024818d4d9 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -25,6 +25,8 @@ module Pkgs = (val Pkg_mgr.get_pkg_mgr) let capacity_in_parallel = 16 +let ( // ) = Filename.concat + (* The cache below is protected by pool's current_operations locking mechanism *) let updates_in_cache : (API.ref_host, Yojson.Basic.t) Hashtbl.t = Hashtbl.create 64 @@ -201,7 +203,15 @@ let sync ~__context ~self ~token ~token_id = * I.E. proxy username/password and temporary token file path. *) write_initial_yum_config () - ) + ) ; + (* The custom yum-utils will fully download repository metadata.*) + let repodata_dir = + !Xapi_globs.local_pool_repo_dir + // repo_name + // "repodata" + // "repomd.xml.asc" + in + Sys.file_exists repodata_dir with e -> error "Failed to sync with remote YUM repository: %s" (ExnHelper.string_of_exn e) ; diff --git a/ocaml/xapi/repository.mli b/ocaml/xapi/repository.mli index e7bddad8bad..81e95730ac9 100644 --- a/ocaml/xapi/repository.mli +++ b/ocaml/xapi/repository.mli @@ -40,7 +40,7 @@ val sync : -> self:[`Repository] API.Ref.t -> token:string -> token_id:string - -> unit + -> bool val create_pool_repository : __context:Context.t -> self:[`Repository] API.Ref.t -> unit diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 3a7dee78735..cfe1f5e3234 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3534,10 +3534,10 @@ let sync_repos ~__context ~self ~repos ~force ~token ~token_id = repos |> List.iter (fun repo -> if force then cleanup_pool_repo ~__context ~self:repo ; - sync ~__context ~self:repo ~token ~token_id ; - (* Dnf sync all the metadata including updateinfo, + let complete = sync ~__context ~self:repo ~token ~token_id in + (* Dnf and custom yum-utils sync all the metadata including updateinfo, * Thus no need to re-create pool repository *) - if Pkgs.manager = Yum then + if Pkgs.manager = Yum && complete = false then create_pool_repository ~__context ~self:repo ) ; let checksum = set_available_updates ~__context in From f936acbd933749ebf9d5a1244e01e6350d694253 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Fri, 6 Dec 2024 14:32:35 +0000 Subject: [PATCH 118/121] Delay: wait a bit more testing the module In some environments the time ranges checked are too strict causing test to fail. Previously the maximum error accepted was 10 ms, increase to 50 ms. Also increase timeouts to reduce error/value ratio. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/threadext_test.ml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml index 0442e302ab7..b93df9f47a8 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml @@ -31,7 +31,7 @@ Single simple signal stored let simple () = let d = Delay.make () in Delay.signal d ; - delay_wait_check ~min:0. ~max:0.01 d 1.0 false + delay_wait_check ~min:0. ~max:0.05 d 1.0 false (* No signal @@ -39,7 +39,7 @@ No signal *) let no_signal () = let d = Delay.make () in - delay_wait_check ~min:0.1 ~max:0.11 d 0.1 true + delay_wait_check ~min:0.2 ~max:0.25 d 0.2 true (* Signal twice, collapsed @@ -52,8 +52,8 @@ let collapsed () = let d = Delay.make () in Delay.signal d ; Delay.signal d ; - delay_wait_check ~min:0. ~max:0.01 d 0.1 false ; - delay_wait_check ~min:0.1 ~max:0.11 d 0.1 true + delay_wait_check ~min:0. ~max:0.05 d 0.2 false ; + delay_wait_check ~min:0.2 ~max:0.25 d 0.2 true (* Signal from another thread @@ -62,8 +62,8 @@ Signal from another thread *) let other_thread () = let d = Delay.make () in - let th = Thread.create (fun d -> Thread.delay 0.1 ; Delay.signal d) d in - delay_wait_check ~min:0.1 ~max:0.11 d 1.0 false ; + let th = Thread.create (fun d -> Thread.delay 0.2 ; Delay.signal d) d in + delay_wait_check ~min:0.2 ~max:0.25 d 1.0 false ; Thread.join th let tests = From 21b56b4159eacb3468334757dab9d70e5a1d2043 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 5 Dec 2024 15:05:17 +0000 Subject: [PATCH 119/121] xapi_sr: remove commented code from 2009 Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_sr.ml | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index 8af5bd6e62f..0c4c88aa5d0 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -360,23 +360,6 @@ let create ~__context ~host ~device_config ~(physical_size : int64) ~name_label Helpers.assert_rolling_upgrade_not_in_progress ~__context ; debug "SR.create name_label=%s sm_config=[ %s ]" name_label (String.concat "; " (List.map (fun (k, v) -> k ^ " = " ^ v) sm_config)) ; - (* This breaks the udev SR which doesn't support sr_probe *) - (* - let probe_result = probe ~__context ~host ~device_config ~_type ~sm_config in - begin - match Xml.parse_string probe_result with - | Xml.Element("SRlist", _, children) -> () - | _ -> - (* Figure out what was missing, then throw the appropriate error *) - match String.lowercase_ascii _type with - | "lvmoiscsi" -> - if not (List.exists (fun (s,_) -> "targetiqn" = String.lowercase_ascii s) device_config) - then raise (Api_errors.Server_error ("SR_BACKEND_FAILURE_96",["";"";probe_result])) - else if not (List.exists (fun (s,_) -> "scsiid" = String.lowercase_ascii s) device_config) - then raise (Api_errors.Server_error ("SR_BACKEND_FAILURE_107",["";"";probe_result])) - | _ -> () - end; -*) let sr_uuid = Uuidx.make () in let sr_uuid_str = Uuidx.to_string sr_uuid in (* Create the SR in the database before creating on disk, so the backends can read the sm_config field. If an error happens here From 098546a8d36ec4956ae3877d68847d079ab26729 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 5 Dec 2024 15:06:36 +0000 Subject: [PATCH 120/121] CA-390025: do not override SR's client-set metadata on update Some plugins may not store the client-set metadata, and return a static value when replying to the update. This would override the values that a client used when the SR was created, or set afterwards, which is unexpected. Now name_label and name_description fields returned by the plugins are ignored on update. Current set_name_label and set_name_description rely on the update mechanism to work. Instead add database call at the end of the methods to ensure both xapi and the SR backend are synchronized, even when the latter fails to update the values. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_sr.ml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index 0c4c88aa5d0..12ab2bef924 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -575,9 +575,6 @@ let update ~__context ~sr = Db.SR.get_uuid ~__context ~self:sr |> Storage_interface.Sr.of_string in let sr_info = C.SR.stat (Ref.string_of task) sr' in - Db.SR.set_name_label ~__context ~self:sr ~value:sr_info.name_label ; - Db.SR.set_name_description ~__context ~self:sr - ~value:sr_info.name_description ; Db.SR.set_physical_size ~__context ~self:sr ~value:sr_info.total_space ; Db.SR.set_physical_utilisation ~__context ~self:sr ~value:(Int64.sub sr_info.total_space sr_info.free_space) ; @@ -846,7 +843,7 @@ let set_name_label ~__context ~sr ~value = (Storage_interface.Sr.of_string sr') value ) ; - update ~__context ~sr + Db.SR.set_name_label ~__context ~self:sr ~value let set_name_description ~__context ~sr ~value = let open Storage_access in @@ -860,7 +857,7 @@ let set_name_description ~__context ~sr ~value = (Storage_interface.Sr.of_string sr') value ) ; - update ~__context ~sr + Db.SR.set_name_description ~__context ~self:sr ~value let set_virtual_allocation ~__context ~self ~value = Db.SR.set_virtual_allocation ~__context ~self ~value From ea46f81b3e8da3cb9bff4da42db46b934c57148f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 10 Dec 2024 09:21:09 +0000 Subject: [PATCH 121/121] xe-cli completion: Hide COMPREPLY manipulation behind functions Though the majority of completions were already using set_completions and the like to add completion suggestions, there were two leftovers still needlessly changing COMPREPLY themselves. This caused bugs, as in the case of xe vm-import filename= autocompleting all of the filenames into the prompt instead of presenting the choice. Let only these functions operate on COMPREPLY directly. Signed-off-by: Andrii Sultanov --- ocaml/xe-cli/bash-completion | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index 84fd3656a65..aae832f4d67 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -143,9 +143,9 @@ _xe() IFS=$'\n,' # Here we actually WANT file name completion, so using compgen is OK. local comp_files=$(compgen -f "$value") - COMPREPLY=( "$comp_files" ) __xe_debug "triggering filename completion for the value:" __xe_debug $(__tab_delimited_newline_array "$comp_files") + set_completions "$comp_files" "$value" return 0 ;; @@ -156,7 +156,6 @@ _xe() if [ "${OLDSTYLE_WORDS[1]}" == "pif-reconfigure-ip" ]; then IFS=$'\n,' suggested_modes="dhcp,static,none" - COMPREPLY=( $(compgen -W "dhcp ,static ,none" -- "$value") ) elif [ "${COMP_WORDS[1]}" == "pif-reconfigure-ipv6" ]; then IFS=$'\n,' suggested_modes="dhcp,static,none,autoconf"