From 30501ada3cab4f464f4889948d7247035b5fb48b Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Mon, 30 Sep 2024 14:13:09 +0100 Subject: [PATCH 01/86] CP-51694: Add testing of C# date converter Signed-off-by: Danilo Del Busso --- .github/workflows/generate-and-build-sdks.yml | 7 + .../autogen/XenServerTest/DateTimeTests.cs | 139 ++++++++++++++++++ .../XenServerTest/XenServerTest.csproj | 27 ++++ .../sdk-gen/csharp/autogen/src/Converters.cs | 15 +- 4 files changed, 183 insertions(+), 5 deletions(-) create mode 100644 ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs create mode 100644 ocaml/sdk-gen/csharp/autogen/XenServerTest/XenServerTest.csproj diff --git a/.github/workflows/generate-and-build-sdks.yml b/.github/workflows/generate-and-build-sdks.yml index a439c969b50..8187c391508 100644 --- a/.github/workflows/generate-and-build-sdks.yml +++ b/.github/workflows/generate-and-build-sdks.yml @@ -138,6 +138,13 @@ jobs: name: SDK_Source_CSharp path: source/ + - name: Test C# SDK + shell: pwsh + run: | + dotnet test source/XenServerTest ` + --disable-build-servers ` + --verbosity=normal + - name: Build C# SDK shell: pwsh run: | diff --git a/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs b/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs new file mode 100644 index 00000000000..0bda9474eb0 --- /dev/null +++ b/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs @@ -0,0 +1,139 @@ +/* + * Copyright (c) Cloud Software Group, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1) Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2) Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +using System.Reflection; +using Newtonsoft.Json; +using XenAPI; +using Console = System.Console; + +namespace XenServerTest; + +internal class DateTimeObject +{ + [JsonConverter(typeof(XenDateTimeConverter))] + public DateTime Date { get; set; } +} + +[TestClass] +public class DateTimeTests +{ + private readonly JsonSerializerSettings _settings = new() + { + Converters = new List { new XenDateTimeConverter() } + }; + + [TestMethod] + [DynamicData(nameof(GetTestData), DynamicDataSourceType.Method, + DynamicDataDisplayName = nameof(GetCustomDynamicDataDisplayName))] + public void TestXenDateTimeConverter(string dateString, DateTime expectedDateTime) + { + try + { + var jsonDateString = "{ \"Date\" : \"" + dateString + "\" }"; + var actualDateTime = JsonConvert.DeserializeObject(jsonDateString, _settings); + + Assert.IsNotNull(actualDateTime, $"Failed to convert '{dateString}'"); + Assert.IsTrue(expectedDateTime.Equals(actualDateTime.Date), + $"Conversion of '{dateString}' resulted in an incorrect DateTime value"); + } + catch (Exception ex) + { + // Log the error or mark this specific data entry as failed + Console.WriteLine($@"Error processing dateString '{dateString}': {ex.Message}"); + Assert.Fail($"An error occurred while processing '{dateString}'"); + } + } + + public static string GetCustomDynamicDataDisplayName(MethodInfo methodInfo, object[] data) + { + return $"{methodInfo.Name}: '{data[0] as string}'"; + } + + public static IEnumerable GetTestData() + { + // no dashes, no colons + yield return new object[] { "20220101T123045", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; + yield return new object[] { "20220101T123045Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; + yield return new object[] { "20220101T123045+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] { "20220101T123045+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] { "20220101T123045+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + + yield return new object[] + { "20220101T123045.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + yield return new object[] + { "20220101T123045.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + yield return new object[] + { "20220101T123045.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "20220101T123045.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "20220101T123045.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + + // no dashes, with colons + yield return new object[] + { "20220101T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; + yield return new object[] { "20220101T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; + yield return new object[] { "20220101T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] { "20220101T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] + { "20220101T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + + yield return new object[] + { "20220101T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + yield return new object[] + { "20220101T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + yield return new object[] + { "20220101T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "20220101T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "20220101T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + + // dashes and colons + yield return new object[] + { "2022-01-01T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; + yield return new object[] { "2022-01-01T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; + yield return new object[] { "2022-01-01T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] + { "2022-01-01T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] + { "2022-01-01T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + + yield return new object[] + { "2022-01-01T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + yield return new object[] + { "2022-01-01T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + yield return new object[] + { "2022-01-01T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "2022-01-01T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + yield return new object[] + { "2022-01-01T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + } +} diff --git a/ocaml/sdk-gen/csharp/autogen/XenServerTest/XenServerTest.csproj b/ocaml/sdk-gen/csharp/autogen/XenServerTest/XenServerTest.csproj new file mode 100644 index 00000000000..8300b4b7edb --- /dev/null +++ b/ocaml/sdk-gen/csharp/autogen/XenServerTest/XenServerTest.csproj @@ -0,0 +1,27 @@ + + + + net6.0 + enable + enable + + false + true + + + + + + + + + + + + + + + + + + diff --git a/ocaml/sdk-gen/csharp/autogen/src/Converters.cs b/ocaml/sdk-gen/csharp/autogen/src/Converters.cs index 32b02d987a6..6f828fdc0a6 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Converters.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Converters.cs @@ -31,10 +31,12 @@ using System.Collections.Generic; using System.Globalization; using System.Linq; +using System.Runtime.CompilerServices; using Newtonsoft.Json; using Newtonsoft.Json.Converters; using Newtonsoft.Json.Linq; +[assembly: InternalsVisibleTo("XenServerTest")] namespace XenAPI { @@ -437,12 +439,16 @@ internal class XenDateTimeConverter : IsoDateTimeConverter public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer) { - string str = JToken.Load(reader).ToString(); + // JsonReader may have already parsed the date for us + if (reader.ValueType != null && reader.ValueType == typeof(DateTime)) + { + return reader.Value; + } - DateTime result; + var str = JToken.Load(reader).ToString(); if (DateTime.TryParseExact(str, DateFormatsUtc, CultureInfo.InvariantCulture, - DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out result)) + DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal, out var result)) return result; if (DateTime.TryParseExact(str, DateFormatsLocal, CultureInfo.InvariantCulture, @@ -454,9 +460,8 @@ public override object ReadJson(JsonReader reader, Type objectType, object exist public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer) { - if (value is DateTime) + if (value is DateTime dateTime) { - var dateTime = (DateTime)value; dateTime = dateTime.ToUniversalTime(); var text = dateTime.ToString(DateFormatsUtc[0], CultureInfo.InvariantCulture); writer.WriteValue(text); From dc1ef200a16e9d2a0597705157a5201a1221fad6 Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Mon, 30 Sep 2024 14:51:59 +0100 Subject: [PATCH 02/86] CP-51694: Add testing of Java date deserializer Signed-off-by: Danilo Del Busso --- ocaml/sdk-gen/java/autogen/xen-api/pom.xml | 12 ++ .../test/java/CustomDateDeserializerTest.java | 123 ++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 ocaml/sdk-gen/java/autogen/xen-api/src/test/java/CustomDateDeserializerTest.java diff --git a/ocaml/sdk-gen/java/autogen/xen-api/pom.xml b/ocaml/sdk-gen/java/autogen/xen-api/pom.xml index 66e1b633db2..c3a6cabdfda 100644 --- a/ocaml/sdk-gen/java/autogen/xen-api/pom.xml +++ b/ocaml/sdk-gen/java/autogen/xen-api/pom.xml @@ -62,6 +62,13 @@ httpclient5 5.3 + + + org.junit.jupiter + junit-jupiter + 5.11.1 + test + @@ -119,6 +126,11 @@ + + org.apache.maven.plugins + maven-surefire-plugin + 3.5.0 + diff --git a/ocaml/sdk-gen/java/autogen/xen-api/src/test/java/CustomDateDeserializerTest.java b/ocaml/sdk-gen/java/autogen/xen-api/src/test/java/CustomDateDeserializerTest.java new file mode 100644 index 00000000000..f125e1d1174 --- /dev/null +++ b/ocaml/sdk-gen/java/autogen/xen-api/src/test/java/CustomDateDeserializerTest.java @@ -0,0 +1,123 @@ +/* + * Copyright (c) Cloud Software Group, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1) Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2) Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.xensource.xenapi.CustomDateDeserializer; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.text.SimpleDateFormat; +import java.util.*; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class CustomDateDeserializerTest { + + private static Stream provideDateStringsAndExpectedDates() { + Hashtable dates = new Hashtable<>(); + + // no dashes, no colons + dates.put("20220101T123045", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T123045Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T123045+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T123045+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T123045+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + + dates.put("20220101T123045.123", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T123045.123Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T123045.123+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T123045.123+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T123045.123+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + + // no dashes, with colons + dates.put("20220101T12:30:45", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T12:30:45Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T12:30:45+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T12:30:45+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T12:30:45+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + + dates.put("20220101T12:30:45.123", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T12:30:45.123Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("20220101T12:30:45.123+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T12:30:45.123+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("20220101T12:30:45.123+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + + // dashes and colons + dates.put("2022-01-01T12:30:45", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("2022-01-01T12:30:45Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("UTC"))); + dates.put("2022-01-01T12:30:45+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("2022-01-01T12:30:45+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + dates.put("2022-01-01T12:30:45+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 0, TimeZone.getTimeZone("GMT+03"))); + + dates.put("2022-01-01T12:30:45.123", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("2022-01-01T12:30:45.123Z", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("UTC"))); + dates.put("2022-01-01T12:30:45.123+03", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("2022-01-01T12:30:45.123+0300", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + dates.put("2022-01-01T12:30:45.123+03:00", createDate(2022, Calendar.JANUARY, 1, 12, 30, 45, 123, TimeZone.getTimeZone("GMT+03"))); + + + return dates.entrySet().stream() + .map(entry -> Arguments.of(entry.getKey(), entry.getValue())); + } + + private static Date createDate(int year, int month, int day, int hour, int minute, int seconds, int milliseconds, TimeZone timeZone) { + Calendar calendar = new GregorianCalendar(timeZone); + calendar.set(year, month, day, hour, minute, seconds); + calendar.set(Calendar.MILLISECOND, milliseconds); + return calendar.getTime(); + } + + private static ObjectMapper createObjectMapperWithCustomDeserializer() { + ObjectMapper mapper = new ObjectMapper(); + SimpleModule module = new SimpleModule(); + module.addDeserializer(Date.class, new CustomDateDeserializer()); + mapper.registerModule(module); + return mapper; + } + + @ParameterizedTest + @MethodSource("provideDateStringsAndExpectedDates") + public void shouldParseDateStringsCorrectlyWithCustomDeserializer(String dateString, Date expectedDate) throws Exception { + ObjectMapper mapper = createObjectMapperWithCustomDeserializer(); + + Date parsedDate = mapper.readValue("\"" + dateString + "\"", Date.class); + + SimpleDateFormat outputFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS Z"); + String parsedDateString = outputFormat.format(parsedDate); + String expectedDateString = outputFormat.format(expectedDate); + + assertEquals(expectedDate, parsedDate, + () -> "Failed to parse datetime value: " + dateString + + ". Parsed date: " + parsedDateString + + ", expected: " + expectedDateString); + } +} From f0b632236e5589a59e02def75531e51d5c8e32fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Fri, 26 Apr 2024 15:14:27 +0100 Subject: [PATCH 03/86] rrdd: avoid constructing intermediate lists, use Seq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This enables extending the type without causing performance issues, and should reduce the work for the garbage collector. Signed-off-by: Edwin Török --- ocaml/libs/xapi-rrd/lib/rrd.ml | 2 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 14 ++++---------- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 15 ++++++++------- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli | 2 +- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 8 ++++++-- ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml | 2 +- 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 0b67cc9efc5..ade27f677fd 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -459,7 +459,7 @@ let ds_update rrd timestamp values transforms new_domid = (** Update the rrd with named values rather than just an ordered array *) let ds_update_named rrd timestamp ~new_domid valuesandtransforms = let valuesandtransforms = - valuesandtransforms |> List.to_seq |> StringMap.of_seq + valuesandtransforms |> StringMap.of_seq in let get_value_and_transform {ds_name; _} = Option.value ~default:(VT_Unknown, Identity) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 34a44e92dfe..89ccd03f943 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -97,7 +97,7 @@ let update_rrds timestamp dss uuid_domids paused_vms = in OwnerMap.update owner merge all in - let dss = List.fold_left consolidate OwnerMap.empty dss in + let dss = Seq.fold_left consolidate OwnerMap.empty dss in (* the first parameter and ds.ds_name are equivalent *) let to_named_updates (_, ds) = @@ -123,9 +123,7 @@ let update_rrds timestamp dss uuid_domids paused_vms = unreliable" by_how_much ; let process_vm vm_uuid dss = - let named_updates = - StringMap.to_seq dss |> Seq.map to_named_updates |> List.of_seq - in + let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in match StringMap.find_opt vm_uuid uuid_domids with @@ -155,9 +153,7 @@ let update_rrds timestamp dss uuid_domids paused_vms = __FUNCTION__ vm_uuid in let process_sr sr_uuid dss = - let named_updates = - StringMap.to_seq dss |> Seq.map to_named_updates |> List.of_seq - in + let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in try (* First, potentially update the rrd with any new default dss *) @@ -175,9 +171,7 @@ let update_rrds timestamp dss uuid_domids paused_vms = with _ -> log_backtrace () in let process_host dss = - let named_updates = - StringMap.to_seq dss |> Seq.map to_named_updates |> List.of_seq - in + let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in match !host_rrd with diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index 9662af66611..bd625bdae30 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -764,22 +764,23 @@ module Plugin = struct ) (* Read, parse, and combine metrics from all registered plugins. *) - let read_stats () : (Rrd.ds_owner * Ds.ds) list = + let read_stats () : (Rrd.ds_owner * Ds.ds) Seq.t = let plugins = with_lock registered_m (fun _ -> List.of_seq (Hashtbl.to_seq registered) ) in - let process_plugin acc (uid, plugin) = + let process_plugin (uid, plugin) = try let payload = get_payload ~uid plugin in - List.rev_append payload.Rrd_protocol.datasources acc - with _ -> acc + List.to_seq payload.Rrd_protocol.datasources + with _ -> Seq.empty in List.iter decr_skip_count plugins ; plugins - |> List.filter (Fun.negate skip) - |> List.fold_left process_plugin [] + |> List.to_seq + |> Seq.filter (Fun.negate skip) + |> Seq.flat_map process_plugin end module Local = Make (struct @@ -805,7 +806,7 @@ module Plugin = struct let deregister = Local.deregister (* Read, parse, and combine metrics from all registered plugins. *) - let read_stats () : (Rrd.ds_owner * Ds.ds) list = Local.read_stats () + let read_stats () : (Rrd.ds_owner * Ds.ds) Seq.t = Local.read_stats () end module HA = struct diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli index 8fbe6f41992..2911895a4a3 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli @@ -69,7 +69,7 @@ module Plugin : sig val next_reading : string -> float - val read_stats : unit -> (Rrd.ds_owner * Ds.ds) list + val read_stats : unit -> (Rrd.ds_owner * Ds.ds) Seq.t module Local : sig val register : diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 48da4c60ae7..f48a7a62171 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -508,9 +508,13 @@ let do_monitor_write xc writers = let timestamp, domains, my_paused_vms = domain_snapshot xc in let tagged_dom0_stats = generate_all_dom0_stats xc timestamp domains in write_dom0_stats writers (Int64.of_float timestamp) tagged_dom0_stats ; - let dom0_stats = List.concat_map snd tagged_dom0_stats in + let dom0_stats = + tagged_dom0_stats + |> List.to_seq + |> Seq.flat_map (fun l -> l |> snd |> List.to_seq) + in let plugins_stats = Rrdd_server.Plugin.read_stats () in - let stats = List.rev_append plugins_stats dom0_stats in + let stats = Seq.append plugins_stats dom0_stats in Rrdd_stats.print_snapshot () ; let uuid_domids = List.map (fun (_, u, i) -> (u, i)) domains in Rrdd_monitor.update_rrds timestamp stats uuid_domids my_paused_vms ; diff --git a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml index 8fe6a1c551c..27d64870fe4 100644 --- a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml @@ -53,7 +53,7 @@ let update_rrds_test ~dss ~uuid_domids ~paused_vms ~expected_vm_rrds ~expected_sr_rrds ~expected_host_dss = let test () = reset_rrdd_shared_state () ; - Rrdd_monitor.update_rrds 12345.0 dss uuid_domids paused_vms ; + Rrdd_monitor.update_rrds 12345.0 (List.to_seq dss) uuid_domids paused_vms ; check_datasources "VM" (Some Rrdd_shared.vm_rrds) expected_vm_rrds ; check_datasources "SR" (Some Rrdd_shared.sr_rrds) expected_sr_rrds ; check_datasources "Host" (host_rrds !Rrdd_shared.host_rrd) expected_host_dss From 8b2b2d249c9f8b8a399b8c067fbcd13a6a037668 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 11 Oct 2024 13:41:51 +0100 Subject: [PATCH 04/86] CA-391651 - rrd: Remove deprecated member of rra struct Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index ade27f677fd..7189b48753f 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -161,8 +161,6 @@ type rra = { ; rra_data: Fring.t array (** Stored data, one ring per datasource *) ; rra_cdps: cdp_prep array (** scratch area for consolidated datapoint preparation *) - ; mutable rra_updatehook: (rrd -> int -> unit) option - (** Hook that gets called when an update happens *) } (** The container for the DSs and RRAs. Also specifies the period between pdps *) @@ -185,7 +183,6 @@ let copy_rra x = ; rra_xff= x.rra_xff ; rra_data= Array.map Fring.copy x.rra_data ; rra_cdps= Array.map copy_cdp_prep x.rra_cdps - ; rra_updatehook= x.rra_updatehook } let copy_ds x = @@ -322,7 +319,6 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = ) rra.rra_cdps ; do_cfs rra new_start_pdp_offset pdps ; - match rra.rra_updatehook with None -> () | Some f -> f rrd rra_step_cnt ) in Array.iter updatefn rrd.rrd_rras @@ -486,7 +482,6 @@ let rra_create cf row_cnt pdp_cnt xff = ; rra_cdps= [||] (* defer creation of the data until we know how many dss we're storing *) - ; rra_updatehook= None (* DEPRECATED *) } let ds_create name ty ?(min = neg_infinity) ?(max = infinity) ?(mrhb = infinity) @@ -784,7 +779,6 @@ let from_xml input = ; rra_xff= float_of_string xff ; rra_data= database ; rra_cdps= Array.of_list cdps - ; rra_updatehook= None } ) i From cacf52a5101ddae7d9940e8385ae415b8327a4b0 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 11:25:32 +0100 Subject: [PATCH 05/86] CA-391651: Make timestamps of data collectors in xcp-rrdd independent Instead of getting one timestamp for all collectors, get them closer to the actual measurement time. Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index f48a7a62171..c0ed92d05fc 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -466,7 +466,6 @@ let domain_snapshot xc = let domains = Xenctrl.domain_getinfolist xc 0 |> List.filter_map metadata_of_domain in - let timestamp = Unix.gettimeofday () in let domain_paused (d, uuid, _) = if d.Xenctrl.paused then Some uuid else None in @@ -474,7 +473,7 @@ let domain_snapshot xc = let domids = List.map (fun (_, _, i) -> i) domains |> IntSet.of_list in let domains_only k v = Option.map (Fun.const v) (IntSet.find_opt k domids) in Hashtbl.filter_map_inplace domains_only Rrdd_shared.memory_targets ; - (timestamp, domains, paused_uuids) + (domains, paused_uuids) let dom0_stat_generators = [ @@ -484,13 +483,16 @@ let dom0_stat_generators = ; ("cache", fun _ timestamp _ -> dss_cache timestamp) ] -let generate_all_dom0_stats xc timestamp domains = +let generate_all_dom0_stats xc domains = let handle_generator (name, generator) = - (name, handle_exn name (fun _ -> generator xc timestamp domains) []) + let timestamp = Unix.gettimeofday () in + ( name + , (timestamp, handle_exn name (fun _ -> generator xc timestamp domains) []) + ) in List.map handle_generator dom0_stat_generators -let write_dom0_stats writers timestamp tagged_dss = +let write_dom0_stats writers tagged_dss = let write_dss (name, writer) = match List.assoc_opt name tagged_dss with | None -> @@ -498,20 +500,23 @@ let write_dom0_stats writers timestamp tagged_dss = "Could not write stats for \"%s\": no stats were associated with \ this name" name - | Some dss -> + | Some (timestamp, dss) -> + let timestamp = Int64.of_float timestamp in writer.Rrd_writer.write_payload {timestamp; datasources= dss} in List.iter write_dss writers let do_monitor_write xc writers = Rrdd_libs.Stats.time_this "monitor" (fun _ -> - let timestamp, domains, my_paused_vms = domain_snapshot xc in - let tagged_dom0_stats = generate_all_dom0_stats xc timestamp domains in - write_dom0_stats writers (Int64.of_float timestamp) tagged_dom0_stats ; + let domains, my_paused_vms = domain_snapshot xc in + let tagged_dom0_stats = generate_all_dom0_stats xc domains in + write_dom0_stats writers tagged_dom0_stats ; let dom0_stats = tagged_dom0_stats |> List.to_seq - |> Seq.flat_map (fun l -> l |> snd |> List.to_seq) + |> Seq.flat_map (fun (name, (timestamp, dss)) -> + (timestamp, List.to_seq dss) + ) in let plugins_stats = Rrdd_server.Plugin.read_stats () in let stats = Seq.append plugins_stats dom0_stats in From a7bc62d321492ef8ff326196907c3eed5bde467f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 11:44:21 +0100 Subject: [PATCH 06/86] CA-391651: rrdd_server - read plugins' timestamps, don't just ignore them Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 8 +++++--- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index bd625bdae30..2b2a6cca004 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -764,7 +764,7 @@ module Plugin = struct ) (* Read, parse, and combine metrics from all registered plugins. *) - let read_stats () : (Rrd.ds_owner * Ds.ds) Seq.t = + let read_stats () = let plugins = with_lock registered_m (fun _ -> List.of_seq (Hashtbl.to_seq registered) @@ -773,7 +773,9 @@ module Plugin = struct let process_plugin (uid, plugin) = try let payload = get_payload ~uid plugin in - List.to_seq payload.Rrd_protocol.datasources + let timestamp = payload.Rrd_protocol.timestamp |> Int64.to_float in + let dss = List.to_seq payload.Rrd_protocol.datasources in + (timestamp, dss) with _ -> Seq.empty in List.iter decr_skip_count plugins ; @@ -806,7 +808,7 @@ module Plugin = struct let deregister = Local.deregister (* Read, parse, and combine metrics from all registered plugins. *) - let read_stats () : (Rrd.ds_owner * Ds.ds) Seq.t = Local.read_stats () + let read_stats () = Local.read_stats () end module HA = struct diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli index 2911895a4a3..d32540390c0 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli @@ -69,7 +69,7 @@ module Plugin : sig val next_reading : string -> float - val read_stats : unit -> (Rrd.ds_owner * Ds.ds) Seq.t + val read_stats : unit -> float * (Rrd.ds_owner * Ds.ds) Seq.t module Local : sig val register : From fdcb38651fa35706c24c11f7eda408a6184f492e Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 15:12:51 +0100 Subject: [PATCH 07/86] CA-391651: Propagate the timestamp inside RRD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of timestamps taken at read time in xcp-rrdd, propagate timestamps taken by plugins (or collectors in xcp-rrdd itself) and use these when updating values. Also process datasources without flattening them all into one list. This allows to process datasources with the same timestamp (provided by the plugin or dom0 collector in xcp_rrdd) at once. Co-authored-by: Edwin Török Signed-off-by: Edwin Török Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 49 ++++---- ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml | 4 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 111 ++++++++++--------- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 58 +++++----- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli | 2 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml | 9 +- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 13 ++- 7 files changed, 130 insertions(+), 116 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 7189b48753f..845f49aebc7 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -318,7 +318,7 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = cdp.cdp_value <- cdp_init ) rra.rra_cdps ; - do_cfs rra new_start_pdp_offset pdps ; + do_cfs rra new_start_pdp_offset pdps ) in Array.iter updatefn rrd.rrd_rras @@ -363,7 +363,12 @@ let process_ds_value ds value interval new_domid = rate let ds_update rrd timestamp values transforms new_domid = - (* Interval is the time between this and the last update *) + (* Interval is the time between this and the last update + + Currently ds_update is called with datasources that belong to a single + plugin, correspondingly they all have the same timestamp. + Further refactoring is needed if timestamps per measurement are to be + introduced. *) let interval = timestamp -. rrd.last_updated in (* Work around the clock going backwards *) let interval = if interval < 0. then 5. else interval in @@ -452,11 +457,10 @@ let ds_update rrd timestamp values transforms new_domid = v2s ) -(** Update the rrd with named values rather than just an ordered array *) -let ds_update_named rrd timestamp ~new_domid valuesandtransforms = - let valuesandtransforms = - valuesandtransforms |> StringMap.of_seq - in +(** Update the rrd with named values rather than just an ordered array + Must be called with datasources coming from a single plugin, with + [timestamp] and [uid] representing it *) +let ds_update_named rrd ~new_domid timestamp valuesandtransforms = let get_value_and_transform {ds_name; _} = Option.value ~default:(VT_Unknown, Identity) (StringMap.find_opt ds_name valuesandtransforms) @@ -497,8 +501,7 @@ let ds_create name ty ?(min = neg_infinity) ?(max = infinity) ?(mrhb = infinity) ; ds_unknown_sec= 0.0 } -let rrd_create dss rras timestep inittime = - (* Use the standard update routines to initialise everything to correct values *) +let rrd_create dss rras timestep timestamp = let rrd = { last_updated= 0.0 @@ -510,16 +513,16 @@ let rrd_create dss rras timestep inittime = { rra with rra_data= - Array.init (Array.length dss) (fun i -> - let ds = dss.(i) in - Fring.make rra.rra_row_cnt nan ds.ds_min ds.ds_max - ) + Array.map + (fun ds -> Fring.make rra.rra_row_cnt nan ds.ds_min ds.ds_max) + dss ; rra_cdps= - Array.init (Array.length dss) (fun i -> - let ds = dss.(i) in + Array.map + (fun ds -> let cdp_init = cf_init_value rra.rra_cf ds in {cdp_value= cdp_init; cdp_unknown_pdps= 0} - ) + ) + dss } ) rras @@ -527,21 +530,20 @@ let rrd_create dss rras timestep inittime = in let values = Array.map (fun ds -> ds.ds_last) dss in let transforms = Array.make (Array.length values) Identity in - ds_update rrd inittime values transforms true ; + (* Use the standard update routines to initialise everything to correct values *) + ds_update rrd timestamp values transforms true ; rrd (** Add in a new DS into a pre-existing RRD. Preserves data of all the other archives and fills the new one full of NaNs. Note that this doesn't fill in the CDP values correctly at the moment! - - @param now = Unix.gettimeofday () *) -let rrd_add_ds rrd now newds = +let rrd_add_ds rrd timestamp newds = if List.mem newds.ds_name (ds_names rrd) then rrd else - let npdps = Int64.of_float now /// rrd.timestep in + let npdps = Int64.of_float timestamp /// rrd.timestep in { rrd with rrd_dss= Array.append rrd.rrd_dss [|newds|] @@ -631,15 +633,14 @@ let find_best_rras rrd pdp_interval cf start = in List.filter (contains_time newstarttime) rras -(* now = Unix.gettimeofday () *) -let query_named_ds rrd now ds_name cf = +let query_named_ds rrd as_of_time ds_name cf = let n = Utils.array_index ds_name (Array.map (fun ds -> ds.ds_name) rrd.rrd_dss) in if n = -1 then raise (Invalid_data_source ds_name) else - let rras = find_best_rras rrd 0 (Some cf) (Int64.of_float now) in + let rras = find_best_rras rrd 0 (Some cf) (Int64.of_float as_of_time) in match rras with | [] -> raise No_RRA_Available diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml index 4cf580ed590..60f4c75dac0 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_http_handler.ml @@ -228,5 +228,7 @@ let put_rrd_handler (req : Http.Request.t) (s : Unix.file_descr) _ = ) else ( debug "Receiving RRD for resident VM uuid=%s. Replacing in hashtable." uuid ; let domid = int_of_string (List.assoc "domid" query) in - with_lock mutex (fun _ -> Hashtbl.replace vm_rrds uuid {rrd; dss= []; domid}) + with_lock mutex (fun _ -> + Hashtbl.replace vm_rrds uuid {rrd; dss= Rrd.StringMap.empty; domid} + ) ) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 89ccd03f943..2a91ddafdad 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -26,7 +26,7 @@ let create_rras use_min_max = let step = 5L (** Create a rrd *) -let create_fresh_rrd use_min_max dss = +let create_fresh_rrd use_min_max dss timestamp = let rras = create_rras use_min_max in let dss = Array.of_list @@ -43,24 +43,36 @@ let create_fresh_rrd use_min_max dss = dss ) in - Rrd.rrd_create dss rras step (Unix.gettimeofday ()) + Rrd.rrd_create dss rras step timestamp -let merge_new_dss rrd dss = - let should_enable_ds ds = !Rrdd_shared.enable_all_dss || ds.ds_default in - let enabled_dss = List.filter should_enable_ds dss in - let current_dss = Rrd.ds_names rrd |> StringSet.of_list in +(* Check if new (enabled) datasources appeared, and add them to the RRD *) +let merge_new_dss rrdi dss = + let should_enable_ds _ (_, ds) = + !Rrdd_shared.enable_all_dss || ds.ds_default + in + let default_dss = StringMap.filter should_enable_ds dss in + (* NOTE: It's enough to check if all the default datasources have been added + to the RRD_INFO, because if a non-default one has been enabled at runtime, + it's added to the RRD immediately and we don't need to bother *) let new_dss = - List.filter - (fun ds -> not (StringSet.mem ds.ds_name current_dss)) - enabled_dss + StringMap.filter + (fun ds_name _ -> not (StringMap.mem ds_name rrdi.dss)) + default_dss in - let now = Unix.gettimeofday () in - List.fold_left - (fun rrd ds -> - rrd_add_ds rrd now - (Rrd.ds_create ds.ds_name ds.Ds.ds_type ~mrhb:300.0 Rrd.VT_Unknown) - ) - rrd new_dss + (* fold on Map is not tail-recursive, but the depth of the stack should be + log of the number of entries at worst, so this should be alright. + Previous conversions to List are also not tail-recursive with identical + stack depth *) + let merge_keys _key a _b = Some a in + let updated_dss = StringMap.union merge_keys dss rrdi.dss in + ( updated_dss + , StringMap.fold + (fun _key (timestamp, ds) rrd -> + rrd_add_ds rrd timestamp + (Rrd.ds_create ds.ds_name ds.Ds.ds_type ~mrhb:300.0 Rrd.VT_Unknown) + ) + new_dss rrdi.rrd + ) module OwnerMap = Map.Make (struct type t = ds_owner @@ -84,11 +96,11 @@ end) update, we assume that the domain has gone and we stream the RRD to the master. We also have a list of the currently rebooting VMs to ensure we don't accidentally archive the RRD. *) -let update_rrds timestamp dss uuid_domids paused_vms = +let update_rrds uuid_domids paused_vms (timestamp, dss) = let uuid_domids = List.to_seq uuid_domids |> StringMap.of_seq in let paused_vms = List.to_seq paused_vms |> StringSet.of_seq in let consolidate all (owner, ds) = - let add_ds_to = StringMap.add ds.ds_name ds in + let add_ds_to = StringMap.add ds.ds_name (timestamp, ds) in let merge = function | None -> Some (add_ds_to StringMap.empty) @@ -98,10 +110,9 @@ let update_rrds timestamp dss uuid_domids paused_vms = OwnerMap.update owner merge all in let dss = Seq.fold_left consolidate OwnerMap.empty dss in - - (* the first parameter and ds.ds_name are equivalent *) - let to_named_updates (_, ds) = - (ds.ds_name, (ds.ds_value, ds.ds_pdp_transform_function)) + let to_named_updates (_, ds) = (ds.ds_value, ds.ds_pdp_transform_function) in + let map_keys_to_list dss = + StringMap.bindings dss |> List.map snd |> List.map snd in (* Here we do the synchronising between the dom0 view of the world and our @@ -109,12 +120,13 @@ let update_rrds timestamp dss uuid_domids paused_vms = the world *) Xapi_stdext_threads.Threadext.Mutex.execute mutex (fun _ -> let out_of_date, by_how_much = + let reading_timestamp = Unix.gettimeofday () in match !host_rrd with | None -> (false, 0.) | Some rrdi -> - ( rrdi.rrd.Rrd.last_updated > timestamp - , abs_float (timestamp -. rrdi.rrd.Rrd.last_updated) + ( rrdi.rrd.Rrd.last_updated > reading_timestamp + , abs_float (reading_timestamp -. rrdi.rrd.Rrd.last_updated) ) in if out_of_date then @@ -122,30 +134,26 @@ let update_rrds timestamp dss uuid_domids paused_vms = "Clock just went backwards by %.0f seconds: RRD data may now be \ unreliable" by_how_much ; - let process_vm vm_uuid dss = - let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in - let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in - + let process_vm vm_uuid (dss : (float * Ds.ds) Rrd.StringMap.t) = match StringMap.find_opt vm_uuid uuid_domids with | Some domid -> ( (* First, potentially update the rrd with any new default dss *) match Hashtbl.find_opt vm_rrds vm_uuid with | Some rrdi -> - let rrd = merge_new_dss rrdi.rrd dss in - Hashtbl.replace vm_rrds vm_uuid {rrd; dss; domid} ; + let updated_dss, rrd = merge_new_dss rrdi dss in + Hashtbl.replace vm_rrds vm_uuid {rrd; dss= updated_dss; domid} ; (* CA-34383: Memory updates from paused domains serve no useful purpose. During a migrate such updates can also cause undesirable discontinuities in the observed value of memory_actual. Hence, we ignore changes from paused domains: *) - if not (StringSet.mem vm_uuid paused_vms) then ( - Rrd.ds_update_named rrd timestamp - ~new_domid:(domid <> rrdi.domid) named_updates ; - rrdi.dss <- dss ; - rrdi.domid <- domid - ) + let named_updates = StringMap.map to_named_updates dss in + if not (StringSet.mem vm_uuid paused_vms) then + Rrd.ds_update_named rrd ~new_domid:(domid <> rrdi.domid) + timestamp named_updates | None -> debug "%s: Creating fresh RRD for VM uuid=%s" __FUNCTION__ vm_uuid ; - let rrd = create_fresh_rrd !use_min_max dss in + let dss_list = map_keys_to_list dss in + let rrd = create_fresh_rrd !use_min_max dss_list timestamp in Hashtbl.replace vm_rrds vm_uuid {rrd; dss; domid} ) | None -> @@ -153,39 +161,36 @@ let update_rrds timestamp dss uuid_domids paused_vms = __FUNCTION__ vm_uuid in let process_sr sr_uuid dss = - let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in - let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in try (* First, potentially update the rrd with any new default dss *) match Hashtbl.find_opt sr_rrds sr_uuid with | Some rrdi -> - let rrd = merge_new_dss rrdi.rrd dss in - Hashtbl.replace sr_rrds sr_uuid {rrd; dss; domid= 0} ; - Rrd.ds_update_named rrd timestamp ~new_domid:false named_updates ; - rrdi.dss <- dss ; - rrdi.domid <- 0 + let updated_dss, rrd = merge_new_dss rrdi dss in + Hashtbl.replace sr_rrds sr_uuid {rrd; dss= updated_dss; domid= 0} ; + let named_updates = StringMap.map to_named_updates dss in + Rrd.ds_update_named rrd ~new_domid:false timestamp named_updates | None -> debug "%s: Creating fresh RRD for SR uuid=%s" __FUNCTION__ sr_uuid ; - let rrd = create_fresh_rrd !use_min_max dss in + let dss_list = map_keys_to_list dss in + let rrd = create_fresh_rrd !use_min_max dss_list timestamp in Hashtbl.replace sr_rrds sr_uuid {rrd; dss; domid= 0} with _ -> log_backtrace () in let process_host dss = - let named_updates = StringMap.to_seq dss |> Seq.map to_named_updates in - let dss = StringMap.to_seq dss |> Seq.map snd |> List.of_seq in - match !host_rrd with | None -> debug "%s: Creating fresh RRD for localhost" __FUNCTION__ ; - let rrd = create_fresh_rrd true dss in + let dss_list = map_keys_to_list dss in + let rrd = create_fresh_rrd true dss_list timestamp in (* Always always create localhost rrds with min/max enabled *) host_rrd := Some {rrd; dss; domid= 0} | Some rrdi -> - rrdi.dss <- dss ; - let rrd = merge_new_dss rrdi.rrd dss in - host_rrd := Some {rrd; dss; domid= 0} ; - Rrd.ds_update_named rrd timestamp ~new_domid:false named_updates + let updated_dss, rrd = merge_new_dss rrdi dss in + host_rrd := Some {rrd; dss= updated_dss; domid= 0} ; + let named_updates = StringMap.map to_named_updates dss in + Rrd.ds_update_named rrd ~new_domid:false timestamp named_updates in + let process_dss ds_owner dss = match ds_owner with | Host -> diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index 2b2a6cca004..c0adc41dfc4 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -59,7 +59,8 @@ let push_sr_rrd (sr_uuid : string) (path : string) : unit = | Some rrd -> debug "Pushing RRD for SR uuid=%s locally" sr_uuid ; with_lock mutex (fun _ -> - Hashtbl.replace sr_rrds sr_uuid {rrd; dss= []; domid= 0} + Hashtbl.replace sr_rrds sr_uuid + {rrd; dss= Rrd.StringMap.empty; domid= 0} ) | None -> () @@ -256,7 +257,9 @@ module Deprecated = struct ) ) in - with_lock mutex (fun () -> host_rrd := Some {rrd; dss= []; domid= 0}) + with_lock mutex (fun () -> + host_rrd := Some {rrd; dss= Rrd.StringMap.empty; domid= 0} + ) with _ -> () end @@ -264,7 +267,9 @@ let push_rrd_local uuid domid : unit = try let rrd = get_rrd ~uuid in debug "Pushing RRD for VM uuid=%s locally" uuid ; - with_lock mutex (fun _ -> Hashtbl.replace vm_rrds uuid {rrd; dss= []; domid}) + with_lock mutex (fun _ -> + Hashtbl.replace vm_rrds uuid {rrd; dss= Rrd.StringMap.empty; domid} + ) with _ -> () let push_rrd_remote uuid member_address : unit = @@ -345,12 +350,11 @@ let fail_missing name = raise (Rrdd_error (Datasource_missing name)) name {ds_name}. The operation fails if rrdi does not contain any live datasource with the name {ds_name} *) let add_ds ~rrdi ~ds_name = - match List.find_opt (fun ds -> ds.Ds.ds_name = ds_name) rrdi.dss with + match Rrd.StringMap.find_opt ds_name rrdi.dss with | None -> fail_missing ds_name - | Some ds -> - let now = Unix.gettimeofday () in - Rrd.rrd_add_ds rrdi.rrd now + | Some (timestamp, ds) -> + Rrd.rrd_add_ds rrdi.rrd timestamp (Rrd.ds_create ds.ds_name ds.ds_type ~mrhb:300.0 Rrd.VT_Unknown) let add rrds uuid domid ds_name rrdi = @@ -391,7 +395,6 @@ let query_possible_dss rrdi = 'live' ds, then it is enabled if it exists in the set rrdi.rrd. If we have an 'archival' ds, then it is enabled if it is also an enabled 'live' ds, otherwise it is disabled. *) - let module SMap = Map.Make (String) in let module SSet = Set.Make (String) in let open Ds in let open Data_source in @@ -401,26 +404,22 @@ let query_possible_dss rrdi = let enabled_names = Rrd.ds_names rrdi.rrd |> SSet.of_list in let is_live_ds_enabled ds = SSet.mem ds.ds_name enabled_names in live_sources - |> List.to_seq - |> Seq.map (fun ds -> - ( ds.ds_name - , { - name= ds.ds_name - ; description= ds.ds_description - ; enabled= is_live_ds_enabled ds - ; standard= ds.ds_default - ; min= ds.ds_min - ; max= ds.ds_max - ; units= ds.ds_units - } - ) + |> Rrd.StringMap.map (fun (_timestamp, ds) -> + { + name= ds.ds_name + ; description= ds.ds_description + ; enabled= is_live_ds_enabled ds + ; standard= ds.ds_default + ; min= ds.ds_min + ; max= ds.ds_max + ; units= ds.ds_units + } ) - |> SMap.of_seq in let name_to_disabled_dss = archival_sources |> Seq.filter_map (fun ds -> - if SMap.mem ds.Rrd.ds_name name_to_live_dss then + if Rrd.StringMap.mem ds.Rrd.ds_name name_to_live_dss then None else Some @@ -437,10 +436,9 @@ let query_possible_dss rrdi = ) ) in - SMap.add_seq name_to_disabled_dss name_to_live_dss - |> SMap.to_seq - |> Seq.map snd - |> List.of_seq + Rrd.StringMap.add_seq name_to_disabled_dss name_to_live_dss + |> Rrd.StringMap.bindings + |> List.map snd let query_possible_host_dss () : Data_source.t list = with_lock mutex (fun () -> @@ -775,14 +773,14 @@ module Plugin = struct let payload = get_payload ~uid plugin in let timestamp = payload.Rrd_protocol.timestamp |> Int64.to_float in let dss = List.to_seq payload.Rrd_protocol.datasources in - (timestamp, dss) - with _ -> Seq.empty + Some (timestamp, dss) + with _ -> None in List.iter decr_skip_count plugins ; plugins |> List.to_seq |> Seq.filter (Fun.negate skip) - |> Seq.flat_map process_plugin + |> Seq.filter_map process_plugin end module Local = Make (struct diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli index d32540390c0..971cdc29860 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli @@ -69,7 +69,7 @@ module Plugin : sig val next_reading : string -> float - val read_stats : unit -> float * (Rrd.ds_owner * Ds.ds) Seq.t + val read_stats : unit -> (float * (Rrd.ds_owner * Ds.ds) Seq.t) Seq.t module Local : sig val register : diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml index 0dc1a82ce2f..4c1f85b140f 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml @@ -75,7 +75,14 @@ let use_min_max = ref false let mutex = Mutex.create () -type rrd_info = {rrd: Rrd.rrd; mutable dss: Ds.ds list; mutable domid: int} +type rrd_info = { + rrd: Rrd.rrd + ; mutable dss: (float * Ds.ds) Rrd.StringMap.t + (* Important: this must contain the entire list of datasources associated + with the RRD, even the ones disabled by default, as rrd_add_ds calls + can enable DSs at runtime *) + ; mutable domid: int +} (* RRDs *) let vm_rrds : (string, rrd_info) Hashtbl.t = Hashtbl.create 32 diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index c0ed92d05fc..94ed27765bd 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -514,15 +514,15 @@ let do_monitor_write xc writers = let dom0_stats = tagged_dom0_stats |> List.to_seq - |> Seq.flat_map (fun (name, (timestamp, dss)) -> - (timestamp, List.to_seq dss) - ) + |> Seq.map (fun (name, (timestamp, dss)) -> (timestamp, List.to_seq dss)) in let plugins_stats = Rrdd_server.Plugin.read_stats () in let stats = Seq.append plugins_stats dom0_stats in Rrdd_stats.print_snapshot () ; let uuid_domids = List.map (fun (_, u, i) -> (u, i)) domains in - Rrdd_monitor.update_rrds timestamp stats uuid_domids my_paused_vms ; + + (* stats are grouped per plugin, which provides its timestamp *) + Seq.iter (Rrdd_monitor.update_rrds uuid_domids my_paused_vms) stats ; Rrdd_libs.Constants.datasource_dump_file |> Rrdd_server.dump_host_dss_to_file ; @@ -541,10 +541,11 @@ let monitor_write_loop writers = Rrdd_shared.last_loop_end_time := Unix.gettimeofday () ) ; Thread.delay !Rrdd_shared.timeslice - with _ -> + with e -> debug "Monitor/write thread caught an exception. Pausing for 10s, \ - then restarting." ; + then restarting: %s" + (Printexc.to_string e) ; log_backtrace () ; Thread.delay 10. done From 0a6daf2d3cd74ddd8b0db9be8e4262a26279e329 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 13:32:06 +0100 Subject: [PATCH 08/86] CA-391651: Rename 'new_domid' parameter to 'new_rrd' RRDs can have different owners, and new ones can show up without new domids being involved (SRs, VMs are also owner types). Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 12 ++++++------ ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 845f49aebc7..0ff4ac59f82 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -327,7 +327,7 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = it's dependent on the time interval between updates. To be able to deal with gauge DSs, we multiply by the interval so that it cancels the subsequent divide by interval later on *) -let process_ds_value ds value interval new_domid = +let process_ds_value ds value interval new_rrd = if interval > ds.ds_mrhb then nan else @@ -342,7 +342,7 @@ let process_ds_value ds value interval new_domid = in let rate = - match (ds.ds_ty, new_domid) with + match (ds.ds_ty, new_rrd) with | Absolute, _ | Derive, true -> value_raw | Gauge, _ -> @@ -362,7 +362,7 @@ let process_ds_value ds value interval new_domid = ds.ds_last <- value ; rate -let ds_update rrd timestamp values transforms new_domid = +let ds_update rrd timestamp values transforms new_rrd = (* Interval is the time between this and the last update Currently ds_update is called with datasources that belong to a single @@ -400,7 +400,7 @@ let ds_update rrd timestamp values transforms new_domid = (* Calculate the values we're going to store based on the input data and the type of the DS *) let v2s = Array.mapi - (fun i value -> process_ds_value rrd.rrd_dss.(i) value interval new_domid) + (fun i value -> process_ds_value rrd.rrd_dss.(i) value interval new_rrd) values in (* Update the PDP accumulators up until the most recent PDP *) @@ -460,7 +460,7 @@ let ds_update rrd timestamp values transforms new_domid = (** Update the rrd with named values rather than just an ordered array Must be called with datasources coming from a single plugin, with [timestamp] and [uid] representing it *) -let ds_update_named rrd ~new_domid timestamp valuesandtransforms = +let ds_update_named rrd ~new_rrd timestamp valuesandtransforms = let get_value_and_transform {ds_name; _} = Option.value ~default:(VT_Unknown, Identity) (StringMap.find_opt ds_name valuesandtransforms) @@ -468,7 +468,7 @@ let ds_update_named rrd ~new_domid timestamp valuesandtransforms = let ds_values, ds_transforms = Array.split (Array.map get_value_and_transform rrd.rrd_dss) in - ds_update rrd timestamp ds_values ds_transforms new_domid + ds_update rrd timestamp ds_values ds_transforms new_rrd (** Get registered DS names *) let ds_names rrd = Array.to_list (Array.map (fun ds -> ds.ds_name) rrd.rrd_dss) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 2a91ddafdad..ea2101c1c5e 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -148,8 +148,8 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = ignore changes from paused domains: *) let named_updates = StringMap.map to_named_updates dss in if not (StringSet.mem vm_uuid paused_vms) then - Rrd.ds_update_named rrd ~new_domid:(domid <> rrdi.domid) - timestamp named_updates + Rrd.ds_update_named rrd ~new_rrd:(domid <> rrdi.domid) timestamp + named_updates | None -> debug "%s: Creating fresh RRD for VM uuid=%s" __FUNCTION__ vm_uuid ; let dss_list = map_keys_to_list dss in @@ -168,7 +168,7 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = let updated_dss, rrd = merge_new_dss rrdi dss in Hashtbl.replace sr_rrds sr_uuid {rrd; dss= updated_dss; domid= 0} ; let named_updates = StringMap.map to_named_updates dss in - Rrd.ds_update_named rrd ~new_domid:false timestamp named_updates + Rrd.ds_update_named rrd ~new_rrd:false timestamp named_updates | None -> debug "%s: Creating fresh RRD for SR uuid=%s" __FUNCTION__ sr_uuid ; let dss_list = map_keys_to_list dss in @@ -188,7 +188,7 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = let updated_dss, rrd = merge_new_dss rrdi dss in host_rrd := Some {rrd; dss= updated_dss; domid= 0} ; let named_updates = StringMap.map to_named_updates dss in - Rrd.ds_update_named rrd ~new_domid:false timestamp named_updates + Rrd.ds_update_named rrd ~new_rrd:false timestamp named_updates in let process_dss ds_owner dss = From 8aa596d708bb9d8b21122c4d579832e2bf702012 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 15:25:59 +0100 Subject: [PATCH 09/86] CA-391651 - rrd: Carry indices with datasources ds_update* functions previously relied on being called for the entirety of datasources of a certain RRD. Instead, operate on chunks of datasources provided by the same plugin. These chunks are not contiguous (more detailed explanation in the comments in the code), so indices to rrd_dss and the associated structures need to be carried with each datasource. Also turns the value*transform tuple into a type for more explicit accesses. Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 182 +++++++++++++++--------- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 4 +- 2 files changed, 119 insertions(+), 67 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 0ff4ac59f82..176cd36289a 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -172,6 +172,13 @@ and rrd = { ; rrd_rras: rra array } +(** Parts of the datasources used in updating RRDs to minimize transferred data *) + +and ds_value_and_transform = { + value: ds_value_type + ; transform: ds_transform_function +} + let copy_cdp_prep x = {cdp_value= x.cdp_value; cdp_unknown_pdps= x.cdp_unknown_pdps} @@ -228,41 +235,42 @@ let get_times time timestep = (** Update the CDP value with a number (start_pdp_offset) of PDPs. *) let do_cfs rra start_pdp_offset pdps = - for i = 0 to Array.length pdps - 1 do - let cdp = rra.rra_cdps.(i) in - if Utils.isnan pdps.(i) then ( - (* CDP is an accumulator for the average. If we've got some unknowns, we need to - renormalize. ie, CDP contains \sum_{i=0}^j{ (1/n) x_i} where n is the number of - values we expect to have. If we have unknowns, we need to multiply the whole - thing by \frac{n_{old}}{n_{new}} *) - let olddiv = rra.rra_pdp_cnt - cdp.cdp_unknown_pdps in - let newdiv = olddiv - start_pdp_offset in - if newdiv > 0 then ( - cdp.cdp_value <- - cdp.cdp_value *. float_of_int olddiv /. float_of_int newdiv ; - cdp.cdp_unknown_pdps <- cdp.cdp_unknown_pdps + start_pdp_offset - ) - ) else - let cdpv = cdp.cdp_value in - cdp.cdp_value <- - ( match rra.rra_cf with - | CF_Average -> - cdpv - +. pdps.(i) - *. float_of_int start_pdp_offset - /. float_of_int rra.rra_pdp_cnt - | CF_Min -> - min cdpv pdps.(i) - | CF_Max -> - max cdpv pdps.(i) - | CF_Last -> - pdps.(i) + Array.iter + (fun (i, pdp) -> + let cdp = rra.rra_cdps.(i) in + if Utils.isnan pdp then ( + (* CDP is an accumulator for the average. If we've got some unknowns, we need to + renormalize. ie, CDP contains \sum_{i=0}^j{ (1/n) x_i} where n is the number of + values we expect to have. If we have unknowns, we need to multiply the whole + thing by \frac{n_{old}}{n_{new}} *) + let olddiv = rra.rra_pdp_cnt - cdp.cdp_unknown_pdps in + let newdiv = olddiv - start_pdp_offset in + if newdiv > 0 then ( + cdp.cdp_value <- + cdp.cdp_value *. float_of_int olddiv /. float_of_int newdiv ; + cdp.cdp_unknown_pdps <- cdp.cdp_unknown_pdps + start_pdp_offset ) - done + ) else + let cdpv = cdp.cdp_value in + cdp.cdp_value <- + ( match rra.rra_cf with + | CF_Average -> + cdpv + +. pdp + *. float_of_int start_pdp_offset + /. float_of_int rra.rra_pdp_cnt + | CF_Min -> + min cdpv pdp + | CF_Max -> + max cdpv pdp + | CF_Last -> + pdp + ) + ) + pdps (** Update the RRAs with a number of PDPs. *) let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = - (* debug "rra_update";*) let updatefn rra = let start_pdp_offset = rra.rra_pdp_cnt @@ -287,37 +295,39 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = repeated values is simply the value itself. *) let primaries = Array.map - (fun cdp -> + (fun (i, _) -> + let cdp = rra.rra_cdps.(i) in if cdp.cdp_unknown_pdps <= int_of_float (rra.rra_xff *. float_of_int rra.rra_pdp_cnt) then - cdp.cdp_value + (i, cdp.cdp_value) else - nan + (i, nan) ) - rra.rra_cdps + pdps in let secondaries = pdps in - let push i value = Fring.push rra.rra_data.(i) value in - Array.iteri push primaries ; + let push (i, value) = Fring.push rra.rra_data.(i) value in + Array.iter push primaries ; for _ = 1 to min (rra_step_cnt - 1) rra.rra_row_cnt do - Array.iteri push secondaries + Array.iter push secondaries done ; (* Reinitialise the CDP preparation area *) let new_start_pdp_offset = (elapsed_pdp_st - start_pdp_offset) mod rra.rra_pdp_cnt in - Array.iteri - (fun i cdp -> + Array.iter + (fun (i, _) -> + let cdp = rra.rra_cdps.(i) in let ds = rrd.rrd_dss.(i) in let cdp_init = cf_init_value rra.rra_cf ds in cdp.cdp_unknown_pdps <- 0 ; cdp.cdp_value <- cdp_init ) - rra.rra_cdps ; + pdps ; do_cfs rra new_start_pdp_offset pdps ) in @@ -362,7 +372,7 @@ let process_ds_value ds value interval new_rrd = ds.ds_last <- value ; rate -let ds_update rrd timestamp values transforms new_rrd = +let ds_update rrd timestamp valuesandtransforms new_rrd = (* Interval is the time between this and the last update Currently ds_update is called with datasources that belong to a single @@ -399,13 +409,16 @@ let ds_update rrd timestamp values transforms new_rrd = (* Calculate the values we're going to store based on the input data and the type of the DS *) let v2s = - Array.mapi - (fun i value -> process_ds_value rrd.rrd_dss.(i) value interval new_rrd) - values + Array.map + (fun (i, {value; _}) -> + let v = process_ds_value rrd.rrd_dss.(i) value interval new_rrd in + (i, v) + ) + valuesandtransforms in (* Update the PDP accumulators up until the most recent PDP *) - Array.iteri - (fun i value -> + Array.iter + (fun (i, value) -> let ds = rrd.rrd_dss.(i) in if Utils.isnan value then ds.ds_unknown_sec <- pre_int @@ -418,10 +431,11 @@ let ds_update rrd timestamp values transforms new_rrd = if elapsed_pdp_st > 0 then ( (* Calculate the PDPs for each DS *) let pdps = - Array.mapi - (fun i ds -> + Array.map + (fun (i, {transform; _}) -> + let ds = rrd.rrd_dss.(i) in if interval > ds.ds_mrhb then - nan + (i, nan) else let raw = ds.ds_value @@ -430,21 +444,21 @@ let ds_update rrd timestamp values transforms new_rrd = ) in (* Apply the transform after the raw value has been calculated *) - let raw = apply_transform_function transforms.(i) raw in + let raw = apply_transform_function transform raw in (* Make sure the values are not out of bounds after all the processing *) if raw < ds.ds_min || raw > ds.ds_max then - nan + (i, nan) else - raw + (i, raw) ) - rrd.rrd_dss + valuesandtransforms in rra_update rrd proc_pdp_st elapsed_pdp_st pdps ; (* Reset the PDP accumulators *) - Array.iteri - (fun i value -> + Array.iter + (fun (i, value) -> let ds = rrd.rrd_dss.(i) in if Utils.isnan value then ( ds.ds_value <- 0.0 ; @@ -461,14 +475,49 @@ let ds_update rrd timestamp values transforms new_rrd = Must be called with datasources coming from a single plugin, with [timestamp] and [uid] representing it *) let ds_update_named rrd ~new_rrd timestamp valuesandtransforms = - let get_value_and_transform {ds_name; _} = - Option.value ~default:(VT_Unknown, Identity) - (StringMap.find_opt ds_name valuesandtransforms) - in - let ds_values, ds_transforms = - Array.split (Array.map get_value_and_transform rrd.rrd_dss) + (* NOTE: + RRD data is stored in several arrays, with the same index pointing to the + same datasource's data in different arrays. This dependency is not always + obvious and doesn't apply to everything, i.e. 'rrd_dss' stores datasources + one after another, but the 'rrd_rras' are actually sideways matrices, + with rrd_rras.(i).rra_data containing Frings for _all_ datasources, not + just the i-th datasource. So if one datasource is removed or adjusted, + one needs to update RRAs by iterating over all 'rrd_rras', not just + changing the i-th array. + + rrdd_monitor processes datasources per plugin (and then per owner), so the + list of 'valuesandtransforms' all come with a single timestamp. But these + datasources can be located all over the 'rrd_dss' array, not necessarily + consecutively. Non-exhaustive examples of why that can happen: + 1) initially disabled datasources can be enabled at runtime behind our + back, which adds them to the end of the rrd_dss array + 2) on toolstack restart, RRDs are restored from the filesystem, but the + new order of registration of plugins might not necessarily be the same + as the one before the restart (so they might be consecutive, but static + chunk indexes can't be assumed) + 3) rrd_monitor iterates over the hash table of registered plugins, which + means that plugins registered later can end up earlier in its ordering + + All this means that plugin's datasources can not be assumed to be + consecutive and each datasource should carry its index in rrd's arrays + with itself, they can't just be processed in chunks. + + (This is due to how this used to be organized historically, with all of + the RRD's datasources processed at once with the server's timestamp, even + though they could have come from different plugins originally) + *) + let arr, _ = + Array.fold_left + (fun (arr, i) {ds_name; _} -> + match StringMap.find_opt ds_name valuesandtransforms with + | Some ds -> + (Array.append arr [|(i, ds)|], i + 1) + | None -> + (arr, i + 1) + ) + ([||], 0) rrd.rrd_dss in - ds_update rrd timestamp ds_values ds_transforms new_rrd + ds_update rrd timestamp arr new_rrd (** Get registered DS names *) let ds_names rrd = Array.to_list (Array.map (fun ds -> ds.ds_name) rrd.rrd_dss) @@ -528,10 +577,11 @@ let rrd_create dss rras timestep timestamp = rras } in - let values = Array.map (fun ds -> ds.ds_last) dss in - let transforms = Array.make (Array.length values) Identity in + let valuesandtransforms = + Array.mapi (fun i ds -> (i, {value= ds.ds_last; transform= Identity})) dss + in (* Use the standard update routines to initialise everything to correct values *) - ds_update rrd timestamp values transforms true ; + ds_update rrd timestamp valuesandtransforms true ; rrd (** Add in a new DS into a pre-existing RRD. Preserves data of all the other archives diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index ea2101c1c5e..2b7c6eda420 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -110,7 +110,9 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = OwnerMap.update owner merge all in let dss = Seq.fold_left consolidate OwnerMap.empty dss in - let to_named_updates (_, ds) = (ds.ds_value, ds.ds_pdp_transform_function) in + let to_named_updates (_, ds) = + {value= ds.ds_value; transform= ds.ds_pdp_transform_function} + in let map_keys_to_list dss = StringMap.bindings dss |> List.map snd |> List.map snd in From a897b53abca2c0dc1f71967a914bd10a60a6299e Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 25 Oct 2024 15:10:38 +0100 Subject: [PATCH 10/86] CA-391651: Use per-datasource last_updated timestamp during updating and archiving This ensures that the interval between the current timestamp and previous update is calculated correctly. Keep the same external interface through rrdd_http_handler by adding an 'internal' parameter to xml exporting functions. Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 40 +++++++++++++++++--------- ocaml/libs/xapi-rrd/unix/rrd_unix.ml | 7 +++-- ocaml/libs/xapi-rrd/unix/rrd_unix.mli | 13 ++++++--- ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml | 5 ++-- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 4 ++- ocaml/xcp-rrdd/bin/rrddump/rrddump.ml | 5 ++-- 6 files changed, 48 insertions(+), 26 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 176cd36289a..962353971ac 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -129,6 +129,7 @@ type ds = { ; mutable ds_value: float (** Current calculated rate of the PDP *) ; mutable ds_unknown_sec: float (** Number of seconds that are unknown in the current PDP *) + ; mutable ds_last_updated: float (** Last time this datasource was updated *) } [@@deriving rpc] @@ -202,6 +203,7 @@ let copy_ds x = ; ds_last= x.ds_last ; ds_value= x.ds_value ; ds_unknown_sec= x.ds_unknown_sec + ; ds_last_updated= x.ds_last_updated } let copy_rrd x = @@ -379,12 +381,14 @@ let ds_update rrd timestamp valuesandtransforms new_rrd = plugin, correspondingly they all have the same timestamp. Further refactoring is needed if timestamps per measurement are to be introduced. *) - let interval = timestamp -. rrd.last_updated in + let first_ds_index, _ = valuesandtransforms.(0) in + let last_updated = rrd.rrd_dss.(first_ds_index).ds_last_updated in + let interval = timestamp -. last_updated in (* Work around the clock going backwards *) let interval = if interval < 0. then 5. else interval in (* start time (st) and age of the last processed pdp and the currently occupied one *) - let proc_pdp_st, _proc_pdp_age = get_times rrd.last_updated rrd.timestep in + let proc_pdp_st, _proc_pdp_age = get_times last_updated rrd.timestep in let occu_pdp_st, occu_pdp_age = get_times timestamp rrd.timestep in (* The number of pdps that should result from this update *) @@ -412,6 +416,7 @@ let ds_update rrd timestamp valuesandtransforms new_rrd = Array.map (fun (i, {value; _}) -> let v = process_ds_value rrd.rrd_dss.(i) value interval new_rrd in + rrd.rrd_dss.(i).ds_last_updated <- timestamp ; (i, v) ) valuesandtransforms @@ -548,6 +553,7 @@ let ds_create name ty ?(min = neg_infinity) ?(max = infinity) ?(mrhb = infinity) ; ds_last= init ; ds_value= 0.0 ; ds_unknown_sec= 0.0 + ; ds_last_updated= 0.0 } let rrd_create dss rras timestep timestamp = @@ -706,11 +712,11 @@ let from_xml input = let read_header i = ignore (get_el "version" i) ; let step = get_el "step" i in - let last_update = get_el "lastupdate" i in + let last_update = float_of_string (get_el "lastupdate" i) in (step, last_update) in - let read_dss i = + let read_dss i rrd_last_update = let read_ds i = read_block "ds" (fun i -> @@ -722,6 +728,10 @@ let from_xml input = ignore (get_el "last_ds" i) ; let value = get_el "value" i in let unknown_sec = get_el "unknown_sec" i in + let last_updated = + try float_of_string (get_el "last_updated" i) + with _ -> rrd_last_update + in { ds_name= name ; ds_ty= @@ -742,11 +752,12 @@ let from_xml input = ; (* float_of_string "last_ds"; *) ds_value= float_of_string value ; ds_unknown_sec= float_of_string unknown_sec + ; ds_last_updated= last_updated } ) i in - let dss = read_all "ds" read_ds i [] in + let dss = Array.of_list (read_all "ds" read_ds i []) in dss in @@ -791,7 +802,7 @@ let from_xml input = let cols = try Array.length data.(0) with _ -> -1 in let db = Array.init cols (fun i -> - let ds = List.nth dss i in + let ds = dss.(i) in Fring.make rows nan ds.ds_min ds.ds_max ) in @@ -844,13 +855,13 @@ let from_xml input = read_block "rrd" (fun i -> let step, last_update = read_header i in - let dss = read_dss i in + let dss = read_dss i last_update in let rras = read_rras dss i in let rrd = { - last_updated= float_of_string last_update + last_updated= last_update ; timestep= Int64.of_string step - ; rrd_dss= Array.of_list dss + ; rrd_dss= dss ; rrd_rras= Array.of_list rras } in @@ -884,7 +895,7 @@ let from_xml input = ) input -let xml_to_output rrd output = +let xml_to_output internal rrd output = (* We use an output channel for Xmlm-compat buffered output. Provided we flush at the end we should be safe. *) let tag n fn output = @@ -906,7 +917,9 @@ let xml_to_output rrd output = tag "value" (data (Utils.f_to_s ds.ds_value)) output ; tag "unknown_sec" (data (Printf.sprintf "%d" (int_of_float ds.ds_unknown_sec))) - output + output ; + if internal then + tag "last_updated" (data (Utils.f_to_s ds.ds_last_updated)) output ) output in @@ -968,9 +981,7 @@ let xml_to_output rrd output = (fun output -> tag "version" (data "0003") output ; tag "step" (data (Int64.to_string rrd.timestep)) output ; - tag "lastupdate" - (data (Printf.sprintf "%Ld" (Int64.of_float rrd.last_updated))) - output ; + tag "lastupdate" (data (Utils.f_to_s rrd.last_updated)) output ; do_dss rrd.rrd_dss output ; do_rras rrd.rrd_rras output ) @@ -1002,6 +1013,7 @@ module Json = struct ; ("last_ds", string (ds_value_to_string ds.ds_last)) ; ("value", float ds.ds_value) ; ("unknown_sec", float ds.ds_unknown_sec) + ; ("last_updated", float ds.ds_last_updated) ] let cdp x = diff --git a/ocaml/libs/xapi-rrd/unix/rrd_unix.ml b/ocaml/libs/xapi-rrd/unix/rrd_unix.ml index da91c99fd65..745361fb31d 100644 --- a/ocaml/libs/xapi-rrd/unix/rrd_unix.ml +++ b/ocaml/libs/xapi-rrd/unix/rrd_unix.ml @@ -30,12 +30,13 @@ let with_out_channel_output fd f = ) (fun () -> Out_channel.close_noerr oc) -let xml_to_fd rrd fd = with_out_channel_output fd (Rrd.xml_to_output rrd) +let xml_to_fd internal rrd fd = + with_out_channel_output fd (Rrd.xml_to_output internal rrd) let json_to_fd rrd fd = let payload = Rrd.json_to_string rrd |> Bytes.unsafe_of_string in let len = Bytes.length payload in Unix.write fd payload 0 len |> ignore -let to_fd ?(json = false) rrd fd = - (if json then json_to_fd else xml_to_fd) rrd fd +let to_fd ?(json = false) ?(internal = false) rrd fd = + (if json then json_to_fd else xml_to_fd internal) rrd fd diff --git a/ocaml/libs/xapi-rrd/unix/rrd_unix.mli b/ocaml/libs/xapi-rrd/unix/rrd_unix.mli index bddb4553413..eb06cde2119 100644 --- a/ocaml/libs/xapi-rrd/unix/rrd_unix.mli +++ b/ocaml/libs/xapi-rrd/unix/rrd_unix.mli @@ -1,11 +1,11 @@ (* Copyright (C) Citrix Systems Inc. - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; version 2.1 only. with the special exception on linking described in file LICENSE. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -15,5 +15,10 @@ This module provides Unix tools for dealing with RRDs *) -val to_fd : ?json:bool -> Rrd.rrd -> Unix.file_descr -> unit -(** Serialize the rrd to xml / json and offer it through a file descriptor *) +val to_fd : ?json:bool -> ?internal:bool -> Rrd.rrd -> Unix.file_descr -> unit +(** Serialize the rrd to xml / json and offer it through a file descriptor. + + If [internal] is true (false is the default), then the output is not + guaranteed to be compatible with external tools, and can only be parsed + by xcp-rrdd. + *) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml index 4c1f85b140f..08807e39b74 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml @@ -137,7 +137,7 @@ let send_rrd ?(session_id : string option) let open Xmlrpc_client in with_transport transport (with_http request (fun (_response, fd) -> - try Rrd_unix.to_fd rrd fd with _ -> log_backtrace () + try Rrd_unix.to_fd ~internal:true rrd fd with _ -> log_backtrace () ) ) ; debug "Sending RRD complete." @@ -161,7 +161,8 @@ let archive_rrd_internal ?(transport = None) ~uuid ~rrd () = 0o755 ; let base_filename = Rrdd_libs.Constants.rrd_location ^ "/" ^ uuid in Xapi_stdext_unix.Unixext.atomic_write_to_file (base_filename ^ ".gz") - 0o644 (fun fd -> Gzip.Default.compress fd (Rrd_unix.to_fd rrd) + 0o644 (fun fd -> + Gzip.Default.compress fd (Rrd_unix.to_fd ~internal:true rrd) ) ; (* If there's an uncompressed one hanging around, remove it. *) Xapi_stdext_unix.Unixext.unlink_safe base_filename diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 94ed27765bd..50000b2bee0 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -514,7 +514,9 @@ let do_monitor_write xc writers = let dom0_stats = tagged_dom0_stats |> List.to_seq - |> Seq.map (fun (name, (timestamp, dss)) -> (timestamp, List.to_seq dss)) + |> Seq.map (fun (_name, (timestamp, dss)) -> + (timestamp, List.to_seq dss) + ) in let plugins_stats = Rrdd_server.Plugin.read_stats () in let stats = Seq.append plugins_stats dom0_stats in diff --git a/ocaml/xcp-rrdd/bin/rrddump/rrddump.ml b/ocaml/xcp-rrdd/bin/rrddump/rrddump.ml index cd0f1675f0d..8d759fed20b 100644 --- a/ocaml/xcp-rrdd/bin/rrddump/rrddump.ml +++ b/ocaml/xcp-rrdd/bin/rrddump/rrddump.ml @@ -32,10 +32,11 @@ let text_export rrd = Int64.sub last_cdp_time (Int64.mul (Int64.of_int i) rra_timestep) in for j = 0 to Array.length rrd.rrd_dss - 1 do - Printf.printf "Doing ds: %s\n" rrd.rrd_dss.(j).ds_name ; + let ds = rrd.rrd_dss.(j) in + Printf.printf "Doing ds: %s\n" ds.ds_name ; let oc = open_out - (Printf.sprintf "rrd_data_%s_%s_%Ld.dat" rrd.rrd_dss.(j).ds_name + (Printf.sprintf "rrd_data_%s_%s_%Ld.dat" ds.ds_name (cf_type_to_string rra.rra_cf) (Int64.mul (Int64.of_int (rra.rra_pdp_cnt * rra.rra_row_cnt)) From 7052ddd926b00c2299fe5fd8e0ab3104bc54cc89 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 21 Oct 2024 08:59:27 +0100 Subject: [PATCH 11/86] CA-391651: rrd - don't iterate over lists needlessly Add an "unsafe" version of rrd_add_ds to avoid checking if the datasource already exists twice. Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 52 +++++++++++++------------ ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 4 +- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 962353971ac..6f264627066 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -590,6 +590,33 @@ let rrd_create dss rras timestep timestamp = ds_update rrd timestamp valuesandtransforms true ; rrd +(** Add the datasource even if it exists in the RRD already. *) +let rrd_add_ds_unsafe rrd timestamp newds = + let npdps = Int64.of_float timestamp /// rrd.timestep in + { + rrd with + rrd_dss= Array.append rrd.rrd_dss [|newds|] + ; rrd_rras= + Array.map + (fun rra -> + let cdp_init = cf_init_value rra.rra_cf newds in + let fring = + Fring.make rra.rra_row_cnt nan newds.ds_min newds.ds_max + in + let nunknowns = + Int64.to_int (Int64.rem npdps (Int64.of_int rra.rra_pdp_cnt)) + in + { + rra with + rra_data= Array.append rra.rra_data [|fring|] + ; rra_cdps= + Array.append rra.rra_cdps + [|{cdp_value= cdp_init; cdp_unknown_pdps= nunknowns}|] + } + ) + rrd.rrd_rras + } + (** Add in a new DS into a pre-existing RRD. Preserves data of all the other archives and fills the new one full of NaNs. Note that this doesn't fill in the CDP values correctly at the moment! @@ -599,30 +626,7 @@ let rrd_add_ds rrd timestamp newds = if List.mem newds.ds_name (ds_names rrd) then rrd else - let npdps = Int64.of_float timestamp /// rrd.timestep in - { - rrd with - rrd_dss= Array.append rrd.rrd_dss [|newds|] - ; rrd_rras= - Array.map - (fun rra -> - let cdp_init = cf_init_value rra.rra_cf newds in - let fring = - Fring.make rra.rra_row_cnt nan newds.ds_min newds.ds_max - in - let nunknowns = - Int64.to_int (Int64.rem npdps (Int64.of_int rra.rra_pdp_cnt)) - in - { - rra with - rra_data= Array.append rra.rra_data [|fring|] - ; rra_cdps= - Array.append rra.rra_cdps - [|{cdp_value= cdp_init; cdp_unknown_pdps= nunknowns}|] - } - ) - rrd.rrd_rras - } + rrd_add_ds_unsafe rrd timestamp newds (** Remove the named DS from an RRD. Removes all of the data associated with it, too *) let rrd_remove_ds rrd ds_name = diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 2b7c6eda420..df4fa798785 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -68,7 +68,9 @@ let merge_new_dss rrdi dss = ( updated_dss , StringMap.fold (fun _key (timestamp, ds) rrd -> - rrd_add_ds rrd timestamp + (* SAFETY: verified that these datasources aren't enabled above + already, in a more efficient way than RRD does it *) + rrd_add_ds_unsafe rrd timestamp (Rrd.ds_create ds.ds_name ds.Ds.ds_type ~mrhb:300.0 Rrd.VT_Unknown) ) new_dss rrdi.rrd From 65d16f5d4dc77e3a03748a077476f4cc56259800 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 23 Oct 2024 13:59:16 +0100 Subject: [PATCH 12/86] CA-391651: rrdd_monitor - Handle missing datasources by resetting them explicitly If RRD's datasource disappears and is not reported by the plugin (or collected by xcp-rrdd), then it's not going to be refreshed, keeping its old value and timestamp. Previously, this was handled nicely because the whole RRD was updated at once, and updates were defaulted to VT_Unknown, Identity before being assigned actually collected values. After RRD collection was changed to process datasources in chunks per source, we need to explicitly find the ones that weren't updated on this iteration, and explicitly reset them. (This is not just a theoretical exercise, it can happen when a VIF gets unplugged or destroyed, for example, with its stats not being reset and continuing to display old values) Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 267 ++++++++++++++++++------ ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 2 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli | 2 +- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 6 +- 4 files changed, 209 insertions(+), 68 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index df4fa798785..c46a33d6f96 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -91,27 +91,67 @@ module OwnerMap = Map.Make (struct String.compare a b end) +(** Converts all the updates collected from various sources in the form of + (uid * timestamp * (ds_owner * ds) Seq.t) Seq.t + into two OwnerMaps, one mapping an owner to a (flattened) Set of its + datasources (used to determine missing datasources), and another mapping + the owner to a Map of datasources grouped by plugin (used during updates) + *) +let convert_to_owner_map dss = + let consolidate (per_owner_map, per_plugin_map) (source_uid, timestamp, dss) = + let add_to_plugin (per_owner_map, per_plugin_map) (owner, ds) = + let add_dsts_to = StringMap.add ds.ds_name (timestamp, ds) in + let add_ds_to = StringSet.add ds.ds_name in + let merge = function + | None -> + Some (add_ds_to StringSet.empty) + | Some dss -> + Some (add_ds_to dss) + in + let per_owner_map = OwnerMap.update owner merge per_owner_map in + let add_plugin_ds_to = + StringMap.update source_uid (function + | None -> + Some (timestamp, add_dsts_to StringMap.empty) + | Some (timestamp, dss) -> + Some (timestamp, add_dsts_to dss) + ) + in + let plugin_merge = function + | None -> + Some (add_plugin_ds_to StringMap.empty) + | Some plugins_dss -> + Some (add_plugin_ds_to plugins_dss) + in + let per_plugin_map : + (float * (float * ds) StringMap.t) StringMap.t OwnerMap.t = + OwnerMap.update owner plugin_merge per_plugin_map + in + (per_owner_map, per_plugin_map) + in + Seq.fold_left add_to_plugin (per_owner_map, per_plugin_map) dss + in + let per_owner_map, per_plugin_map = + Seq.fold_left consolidate (OwnerMap.empty, OwnerMap.empty) dss + in + (per_owner_map, per_plugin_map) + (** Updates all of the hosts rrds. We are passed a list of uuids that is used as the primary source for which VMs are resident on us. When a new uuid turns up that we haven't got an RRD for in our hashtbl, we create a new one. When a uuid for which we have an RRD for doesn't appear to have any stats this update, we assume that the domain has gone and we stream the RRD to the master. We also have a list of the currently rebooting VMs to ensure we - don't accidentally archive the RRD. *) -let update_rrds uuid_domids paused_vms (timestamp, dss) = + don't accidentally archive the RRD. + Also resets the value of datasources that are enabled in the RRD, but + weren't updated on this refresh cycle. + *) +let update_rrds uuid_domids paused_vms plugins_dss = let uuid_domids = List.to_seq uuid_domids |> StringMap.of_seq in let paused_vms = List.to_seq paused_vms |> StringSet.of_seq in - let consolidate all (owner, ds) = - let add_ds_to = StringMap.add ds.ds_name (timestamp, ds) in - let merge = function - | None -> - Some (add_ds_to StringMap.empty) - | Some dss -> - Some (add_ds_to dss) - in - OwnerMap.update owner merge all + let per_owner_flattened_map, per_plugin_map = + convert_to_owner_map plugins_dss in - let dss = Seq.fold_left consolidate OwnerMap.empty dss in let to_named_updates (_, ds) = {value= ds.ds_value; transform= ds.ds_pdp_transform_function} in @@ -119,6 +159,37 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = StringMap.bindings dss |> List.map snd |> List.map snd in + (* Determine datasources missing from this batch for this RRD, reset + them to default Unknown values *) + let handle_missing_stats rrd dss = + let named_update = {value= VT_Unknown; transform= Identity} in + (* Check which of the enabled data sources are missing from the update batch *) + let missing_dss = + Array.fold_left + (fun missing (ds : Rrd.ds) -> + if StringSet.mem ds.ds_name dss then + missing + else + StringMap.add ds.ds_name named_update missing + ) + StringMap.empty rrd.rrd_dss + in + missing_dss + in + let reset_missing_data = + (* NOTE: This processes already added and enabled datasources that have + not been provided a value on this refresh cycle, so no data sources need + to be added to RRDs *) + (* NOTE: new_rrd is always false, since it's only 'true' currently if a VM's + domid does not correspond to rrdi.domid, which would already have been + fixed by replacing rrdi.domid with the current domid when updating with + provided datasources before this function is called *) + let missing_data_timestamp = Unix.gettimeofday () in + fun rrd dss -> + if not (StringMap.is_empty dss) then + Rrd.ds_update_named rrd ~new_rrd:false missing_data_timestamp dss + in + (* Here we do the synchronising between the dom0 view of the world and our Hashtbl. By the end of this execute block, the Hashtbl correctly represents the world *) @@ -138,71 +209,141 @@ let update_rrds uuid_domids paused_vms (timestamp, dss) = "Clock just went backwards by %.0f seconds: RRD data may now be \ unreliable" by_how_much ; - let process_vm vm_uuid (dss : (float * Ds.ds) Rrd.StringMap.t) = + let process_vm vm_uuid + (plugins_dss : (float * (float * Ds.ds) Rrd.StringMap.t) StringMap.t) + available_dss = match StringMap.find_opt vm_uuid uuid_domids with - | Some domid -> ( - (* First, potentially update the rrd with any new default dss *) - match Hashtbl.find_opt vm_rrds vm_uuid with - | Some rrdi -> - let updated_dss, rrd = merge_new_dss rrdi dss in - Hashtbl.replace vm_rrds vm_uuid {rrd; dss= updated_dss; domid} ; - (* CA-34383: Memory updates from paused domains serve no useful - purpose. During a migrate such updates can also cause undesirable - discontinuities in the observed value of memory_actual. Hence, we - ignore changes from paused domains: *) - let named_updates = StringMap.map to_named_updates dss in - if not (StringSet.mem vm_uuid paused_vms) then - Rrd.ds_update_named rrd ~new_rrd:(domid <> rrdi.domid) timestamp - named_updates - | None -> - debug "%s: Creating fresh RRD for VM uuid=%s" __FUNCTION__ vm_uuid ; - let dss_list = map_keys_to_list dss in - let rrd = create_fresh_rrd !use_min_max dss_list timestamp in - Hashtbl.replace vm_rrds vm_uuid {rrd; dss; domid} - ) + | Some domid -> + (* Deal with datasources per plugin *) + let vm_rrdi = Hashtbl.find_opt vm_rrds vm_uuid in + let vm_rrdi = + (* SAFETY: Entries in String/OwnerMap are only present if + they contain a list of datasources, and thus the rrd is + definitely Some after .fold above. + This applies to all such constructs in process_* functions *) + Option.get + (StringMap.fold + (fun _uid (timestamp, dss) vm_rrd -> + (* First, potentially update the rrd with any new default dss *) + match vm_rrd with + | Some rrdi -> + let updated_dss, rrd = merge_new_dss rrdi dss in + (* CA-34383: Memory updates from paused domains serve no useful + purpose. During a migrate such updates can also cause undesirable + discontinuities in the observed value of memory_actual. Hence, we + ignore changes from paused domains: *) + ( if not (StringSet.mem vm_uuid paused_vms) then + let named_updates = + StringMap.map to_named_updates dss + in + Rrd.ds_update_named rrd + ~new_rrd:(domid <> rrdi.domid) timestamp + named_updates + ) ; + Some {rrd; dss= updated_dss; domid} + | None -> + debug "%s: Creating fresh RRD for VM uuid=%s" + __FUNCTION__ vm_uuid ; + let dss_list = map_keys_to_list dss in + let rrd = + create_fresh_rrd !use_min_max dss_list timestamp + in + Some {rrd; dss; domid} + ) + plugins_dss vm_rrdi + ) + in + let missing_updates = + handle_missing_stats vm_rrdi.rrd available_dss + in + reset_missing_data vm_rrdi.rrd missing_updates ; + + Hashtbl.replace vm_rrds vm_uuid vm_rrdi | None -> info "%s: VM uuid=%s is not resident in this host, ignoring rrds" __FUNCTION__ vm_uuid in - let process_sr sr_uuid dss = + let process_sr sr_uuid plugins_dss available_dss = try - (* First, potentially update the rrd with any new default dss *) - match Hashtbl.find_opt sr_rrds sr_uuid with - | Some rrdi -> - let updated_dss, rrd = merge_new_dss rrdi dss in - Hashtbl.replace sr_rrds sr_uuid {rrd; dss= updated_dss; domid= 0} ; - let named_updates = StringMap.map to_named_updates dss in - Rrd.ds_update_named rrd ~new_rrd:false timestamp named_updates - | None -> - debug "%s: Creating fresh RRD for SR uuid=%s" __FUNCTION__ sr_uuid ; - let dss_list = map_keys_to_list dss in - let rrd = create_fresh_rrd !use_min_max dss_list timestamp in - Hashtbl.replace sr_rrds sr_uuid {rrd; dss; domid= 0} + let sr_rrdi = Hashtbl.find_opt sr_rrds sr_uuid in + (* Deal with datasources per plugin *) + let sr_rrdi = + Option.get + (StringMap.fold + (fun _uid (timestamp, dss) sr_rrdi -> + (* First, potentially update the rrd with any new default dss *) + match sr_rrdi with + | Some rrdi -> + let updated_dss, rrd = merge_new_dss rrdi dss in + let named_updates = StringMap.map to_named_updates dss in + Rrd.ds_update_named rrd ~new_rrd:false timestamp + named_updates ; + Some {rrd; dss= updated_dss; domid= 0} + | None -> + debug "%s: Creating fresh RRD for SR uuid=%s" + __FUNCTION__ sr_uuid ; + let dss_list = map_keys_to_list dss in + let rrd = + create_fresh_rrd !use_min_max dss_list timestamp + in + Some {rrd; dss; domid= 0} + ) + plugins_dss sr_rrdi + ) + in + let missing_updates = + handle_missing_stats sr_rrdi.rrd available_dss + in + reset_missing_data sr_rrdi.rrd missing_updates ; + + Hashtbl.replace sr_rrds sr_uuid sr_rrdi with _ -> log_backtrace () in - let process_host dss = - match !host_rrd with - | None -> - debug "%s: Creating fresh RRD for localhost" __FUNCTION__ ; - let dss_list = map_keys_to_list dss in - let rrd = create_fresh_rrd true dss_list timestamp in - (* Always always create localhost rrds with min/max enabled *) - host_rrd := Some {rrd; dss; domid= 0} - | Some rrdi -> - let updated_dss, rrd = merge_new_dss rrdi dss in - host_rrd := Some {rrd; dss= updated_dss; domid= 0} ; - let named_updates = StringMap.map to_named_updates dss in - Rrd.ds_update_named rrd ~new_rrd:false timestamp named_updates + let process_host plugins_dss available_dss = + let host_rrdi = !host_rrd in + (* Deal with datasources per plugin *) + let host_rrdi = + Option.get + (StringMap.fold + (fun _uid (timestamp, dss) host_rrdi -> + match host_rrdi with + | None -> + debug "%s: Creating fresh RRD for localhost" __FUNCTION__ ; + let dss_list = map_keys_to_list dss in + let rrd = create_fresh_rrd true dss_list timestamp in + (* Always always create localhost rrds with min/max enabled *) + Some {rrd; dss; domid= 0} + | Some rrdi -> + let updated_dss, rrd = merge_new_dss rrdi dss in + let named_updates = StringMap.map to_named_updates dss in + Rrd.ds_update_named rrd ~new_rrd:false timestamp + named_updates ; + Some {rrd; dss= updated_dss; domid= 0} + ) + plugins_dss host_rrdi + ) + in + let missing_updates = + handle_missing_stats host_rrdi.rrd available_dss + in + reset_missing_data host_rrdi.rrd missing_updates ; + + host_rrd := Some host_rrdi in let process_dss ds_owner dss = + (* Flattened list of all datasources for this RRD owner, used to + determine which datasources have gone missing. Not to be used in + actual update process, since these mix up datasources with different + timestamps *) + let available_dss = OwnerMap.find ds_owner per_owner_flattened_map in match ds_owner with | Host -> - process_host dss + process_host dss available_dss | VM uuid -> - process_vm uuid dss + process_vm uuid dss available_dss | SR uuid -> - process_sr uuid dss + process_sr uuid dss available_dss in - OwnerMap.iter process_dss dss + OwnerMap.iter process_dss per_plugin_map ) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index c0adc41dfc4..eb206455684 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -773,7 +773,7 @@ module Plugin = struct let payload = get_payload ~uid plugin in let timestamp = payload.Rrd_protocol.timestamp |> Int64.to_float in let dss = List.to_seq payload.Rrd_protocol.datasources in - Some (timestamp, dss) + Some (P.string_of_uid ~uid, timestamp, dss) with _ -> None in List.iter decr_skip_count plugins ; diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli index 971cdc29860..000c53de121 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli @@ -69,7 +69,7 @@ module Plugin : sig val next_reading : string -> float - val read_stats : unit -> (float * (Rrd.ds_owner * Ds.ds) Seq.t) Seq.t + val read_stats : unit -> (string * float * (Rrd.ds_owner * Ds.ds) Seq.t) Seq.t module Local : sig val register : diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 50000b2bee0..cd19875ee42 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -514,8 +514,8 @@ let do_monitor_write xc writers = let dom0_stats = tagged_dom0_stats |> List.to_seq - |> Seq.map (fun (_name, (timestamp, dss)) -> - (timestamp, List.to_seq dss) + |> Seq.map (fun (name, (timestamp, dss)) -> + (name, timestamp, List.to_seq dss) ) in let plugins_stats = Rrdd_server.Plugin.read_stats () in @@ -524,7 +524,7 @@ let do_monitor_write xc writers = let uuid_domids = List.map (fun (_, u, i) -> (u, i)) domains in (* stats are grouped per plugin, which provides its timestamp *) - Seq.iter (Rrdd_monitor.update_rrds uuid_domids my_paused_vms) stats ; + Rrdd_monitor.update_rrds uuid_domids my_paused_vms stats ; Rrdd_libs.Constants.datasource_dump_file |> Rrdd_server.dump_host_dss_to_file ; From 8d7c057b730e591a2fd54a5e96ded14a814c04b5 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 25 Oct 2024 12:47:37 +0100 Subject: [PATCH 13/86] CA-391651 - rrd protocol: Stop truncating timestamps to seconds Instead of writing Int64 (truncated from timestamp floats) into the memory-mapped files, keep the precision of the timestamp. Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 2 +- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 1 - ocaml/xcp-rrdd/bin/transport-rw/reader_commands.ml | 2 +- ocaml/xcp-rrdd/bin/transport-rw/writer_commands.ml | 4 ++-- ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli | 2 +- ocaml/xcp-rrdd/lib/plugin/utils.ml | 2 +- ocaml/xcp-rrdd/lib/plugin/utils.mli | 2 +- ocaml/xcp-rrdd/lib/transport/base/rrd_json.ml | 2 +- ocaml/xcp-rrdd/lib/transport/base/rrd_json.mli | 2 +- ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.ml | 2 +- ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.mli | 2 +- ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml | 4 +--- ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v2.ml | 6 ++++-- ocaml/xcp-rrdd/test/transport/test_common.ml | 5 ++--- ocaml/xcp-rrdd/test/transport/test_scale.ml | 2 +- ocaml/xcp-rrdd/test/transport/test_unit.ml | 2 +- 16 files changed, 20 insertions(+), 22 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index eb206455684..f8f3c99bf8b 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -771,7 +771,7 @@ module Plugin = struct let process_plugin (uid, plugin) = try let payload = get_payload ~uid plugin in - let timestamp = payload.Rrd_protocol.timestamp |> Int64.to_float in + let timestamp = payload.Rrd_protocol.timestamp in let dss = List.to_seq payload.Rrd_protocol.datasources in Some (P.string_of_uid ~uid, timestamp, dss) with _ -> None diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index cd19875ee42..455723633bb 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -501,7 +501,6 @@ let write_dom0_stats writers tagged_dss = this name" name | Some (timestamp, dss) -> - let timestamp = Int64.of_float timestamp in writer.Rrd_writer.write_payload {timestamp; datasources= dss} in List.iter write_dss writers diff --git a/ocaml/xcp-rrdd/bin/transport-rw/reader_commands.ml b/ocaml/xcp-rrdd/bin/transport-rw/reader_commands.ml index 8736bca234b..c15bb594231 100644 --- a/ocaml/xcp-rrdd/bin/transport-rw/reader_commands.ml +++ b/ocaml/xcp-rrdd/bin/transport-rw/reader_commands.ml @@ -47,7 +47,7 @@ let string_of_data_source owner ds = let interpret_payload payload = print_endline "------------ Metadata ------------" ; - Printf.printf "timestamp = %Ld\n%!" payload.timestamp ; + Printf.printf "timestamp = %f\n%!" payload.timestamp ; print_endline "---------- Data sources ----------" ; List.iter (fun (owner, ds) -> diff --git a/ocaml/xcp-rrdd/bin/transport-rw/writer_commands.ml b/ocaml/xcp-rrdd/bin/transport-rw/writer_commands.ml index c3061349ccf..4e3ac899e1f 100644 --- a/ocaml/xcp-rrdd/bin/transport-rw/writer_commands.ml +++ b/ocaml/xcp-rrdd/bin/transport-rw/writer_commands.ml @@ -14,7 +14,7 @@ open Rrd_protocol -let now () = Int64.of_float (Unix.gettimeofday ()) +let now () = Unix.gettimeofday () let get_extra_data_sources_flag = let counter = ref 0 in @@ -27,7 +27,7 @@ let generate_time_data_source () = let current_time = now () in ( Rrd.Host , Ds.ds_make ~name:"current_time" ~description:"The current time" - ~value:(Rrd.VT_Int64 current_time) ~ty:Rrd.Gauge ~default:true + ~value:(Rrd.VT_Float current_time) ~ty:Rrd.Gauge ~default:true ~units:"seconds" () ) diff --git a/ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli b/ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli index e4eaaeecd2c..a237868c873 100644 --- a/ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli +++ b/ocaml/xcp-rrdd/lib/plugin/rrdd_plugin.mli @@ -16,7 +16,7 @@ (** Utility functions useful for rrdd plugins. *) module Utils : sig - val now : unit -> int64 + val now : unit -> float (** Return the current unix epoch as an int64. *) val cut : string -> string list diff --git a/ocaml/xcp-rrdd/lib/plugin/utils.ml b/ocaml/xcp-rrdd/lib/plugin/utils.ml index 5744fa5578b..d647c25fd67 100644 --- a/ocaml/xcp-rrdd/lib/plugin/utils.ml +++ b/ocaml/xcp-rrdd/lib/plugin/utils.ml @@ -12,7 +12,7 @@ * GNU Lesser General Public License for more details. *) -let now () = Int64.of_float (Unix.gettimeofday ()) +let now () = Unix.gettimeofday () let cut str = Astring.String.fields ~empty:false ~is_sep:(fun c -> c = ' ' || c = '\t') str diff --git a/ocaml/xcp-rrdd/lib/plugin/utils.mli b/ocaml/xcp-rrdd/lib/plugin/utils.mli index 7f797b2232c..c13901ff5fe 100644 --- a/ocaml/xcp-rrdd/lib/plugin/utils.mli +++ b/ocaml/xcp-rrdd/lib/plugin/utils.mli @@ -13,7 +13,7 @@ *) (** Utility functions useful for rrdd plugins. *) -val now : unit -> int64 +val now : unit -> float (** Return the current unix epoch as an int64. *) val cut : string -> string list diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_json.ml b/ocaml/xcp-rrdd/lib/transport/base/rrd_json.ml index 15f95e3de46..f34bad05747 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_json.ml +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_json.ml @@ -88,7 +88,7 @@ let dss_to_json ~header timestamp dss = let payload = record [ - ("timestamp", `Float (Int64.to_float timestamp)) + ("timestamp", `Float timestamp) ; ("datasources", record @@ List.map ds_to_json dss) ] in diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_json.mli b/ocaml/xcp-rrdd/lib/transport/base/rrd_json.mli index 16559121168..27d0e3b4aba 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_json.mli +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_json.mli @@ -13,6 +13,6 @@ *) val json_of_dss : - header:string -> int64 -> (Rrd.ds_owner * Ds.ds) list -> string + header:string -> float -> (Rrd.ds_owner * Ds.ds) list -> string val json_metadata_of_dss : (Rrd.ds_owner * Ds.ds) list -> string diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.ml b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.ml index 310a9442392..247f0691e2f 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.ml +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.ml @@ -26,7 +26,7 @@ exception Payload_too_large exception Read_error -type payload = {timestamp: int64; datasources: (Rrd.ds_owner * Ds.ds) list} +type payload = {timestamp: float; datasources: (Rrd.ds_owner * Ds.ds) list} type protocol = { make_payload_reader: unit -> Cstruct.t -> payload diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.mli b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.mli index 310a9442392..247f0691e2f 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.mli +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol.mli @@ -26,7 +26,7 @@ exception Payload_too_large exception Read_error -type payload = {timestamp: int64; datasources: (Rrd.ds_owner * Ds.ds) list} +type payload = {timestamp: float; datasources: (Rrd.ds_owner * Ds.ds) list} type protocol = { make_payload_reader: unit -> Cstruct.t -> payload diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml index 834ddd3106b..af187fa8ae7 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v1.ml @@ -94,9 +94,7 @@ let parse_payload ~(json : string) : payload = try let rpc = Jsonrpc.of_string json in let kvs = Rrd_rpc.dict_of_rpc ~rpc in - let timestamp = - Rpc.float_of_rpc (List.assoc "timestamp" kvs) |> Int64.of_float - in + let timestamp = Rpc.float_of_rpc (List.assoc "timestamp" kvs) in let datasource_rpcs = Rrd_rpc.dict_of_rpc ~rpc:(List.assoc "datasources" kvs) in diff --git a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v2.ml b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v2.ml index 1c6774d525a..3c8cafbd8a7 100644 --- a/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v2.ml +++ b/ocaml/xcp-rrdd/lib/transport/base/rrd_protocol_v2.ml @@ -75,7 +75,8 @@ module Read = struct let datasource_count cs = Int32.to_int (Cstruct.BE.get_uint32 cs datasource_count_start) - let timestamp cs = Cstruct.BE.get_uint64 cs timestamp_start + let timestamp cs = + Int64.float_of_bits (Cstruct.BE.get_uint64 cs timestamp_start) let datasource_values cs cached_datasources = let rec aux start acc = function @@ -125,7 +126,8 @@ module Write = struct let datasource_count cs value = Cstruct.BE.set_uint32 cs datasource_count_start (Int32.of_int value) - let timestamp cs value = Cstruct.BE.set_uint64 cs timestamp_start value + let timestamp cs value = + Cstruct.BE.set_uint64 cs timestamp_start (Int64.bits_of_float value) let datasource_values cs values = let rec aux start = function diff --git a/ocaml/xcp-rrdd/test/transport/test_common.ml b/ocaml/xcp-rrdd/test/transport/test_common.ml index de083183f1e..bd877281946 100644 --- a/ocaml/xcp-rrdd/test/transport/test_common.ml +++ b/ocaml/xcp-rrdd/test/transport/test_common.ml @@ -1,7 +1,7 @@ let test_payload = Rrd_protocol. { - timestamp= 1387867223L + timestamp= 1387867223.2 ; datasources= [ ( Rrd.Host @@ -133,8 +133,7 @@ let assert_ds_equal (owner1, ds1) (owner2, ds2) = let assert_payloads_equal payload1 payload2 = let open Rrd_protocol in - Alcotest.(check int64) - "Timestamps match" payload1.timestamp payload2.timestamp ; + compare_float "Timestamps match" payload1.timestamp payload2.timestamp ; Alcotest.(check int) "Number of datasources read matches written ones" (List.length payload1.datasources) diff --git a/ocaml/xcp-rrdd/test/transport/test_scale.ml b/ocaml/xcp-rrdd/test/transport/test_scale.ml index ddfe2d02a30..35ab60f600a 100644 --- a/ocaml/xcp-rrdd/test/transport/test_scale.ml +++ b/ocaml/xcp-rrdd/test/transport/test_scale.ml @@ -79,7 +79,7 @@ let write_payloads deliveries protocol sock = let run_tests shared_file_count protocol = Random.self_init () ; - let timestamp = Int64.of_float (Unix.gettimeofday ()) in + let timestamp = Unix.gettimeofday () in let deliveries = List.init shared_file_count (fun k -> { diff --git a/ocaml/xcp-rrdd/test/transport/test_unit.ml b/ocaml/xcp-rrdd/test/transport/test_unit.ml index 050eaccedcf..784fb356b7e 100644 --- a/ocaml/xcp-rrdd/test/transport/test_unit.ml +++ b/ocaml/xcp-rrdd/test/transport/test_unit.ml @@ -114,7 +114,7 @@ let test_reader_state protocol = payload again. *) let open Rrd_protocol in writer.Rrd_writer.write_payload - {test_payload with timestamp= Int64.add test_payload.timestamp 5L} ; + {test_payload with timestamp= test_payload.timestamp +. 5.} ; let (_ : Rrd_protocol.payload) = reader.Rrd_reader.read_payload () in () ) From f0d252c0c492700cfb5bb835d497bb5b65957015 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 29 Oct 2024 14:15:03 +0000 Subject: [PATCH 14/86] CA-391651 - rrdd.py: Stop truncating timestamps to seconds RRDD, aside from the OCaml library, exposes two interfaces. This changes the Python one. The C one lives in rrd-client-lib and needs to be changed at the same time and coordinated during updating. Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/scripts/rrdd/rrdd.py | 6 +++--- .../scripts/rrdd/test_api_wait_until_next_reading.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ocaml/xcp-rrdd/scripts/rrdd/rrdd.py b/ocaml/xcp-rrdd/scripts/rrdd/rrdd.py index 1132fa92b53..76dc4fd7974 100644 --- a/ocaml/xcp-rrdd/scripts/rrdd/rrdd.py +++ b/ocaml/xcp-rrdd/scripts/rrdd/rrdd.py @@ -296,10 +296,10 @@ def update(self): """Write all datasources specified (via set_datasource) since the last call to this function. The datasources are written together with the relevant metadata into the file agreed with rrdd.""" - timestamp = int(time.time()) + timestamp = time.time() data_values = [] combined = dict() - data_checksum = crc32(pack(">Q", timestamp)) & 0xffffffff + data_checksum = crc32(pack(">d", timestamp)) & 0xffffffff for ds in sorted(self.datasources, key=lambda source: source.name): value = self.pack_data(ds) @@ -326,7 +326,7 @@ def update(self): # Now write the updated header self.dest.seek(0) self.dest.write(encoded_datasource_header) - self.dest.write(pack(">LLLQ", + self.dest.write(pack(">LLLd", data_checksum, metadata_checksum, len(self.datasources), diff --git a/ocaml/xcp-rrdd/scripts/rrdd/test_api_wait_until_next_reading.py b/ocaml/xcp-rrdd/scripts/rrdd/test_api_wait_until_next_reading.py index a038513e230..be946674618 100644 --- a/ocaml/xcp-rrdd/scripts/rrdd/test_api_wait_until_next_reading.py +++ b/ocaml/xcp-rrdd/scripts/rrdd/test_api_wait_until_next_reading.py @@ -160,7 +160,7 @@ def pack_data(self, ds: MockDataSource): unpacked_metadata_checksum, unpacked_num_datasources, unpacked_timestamp, - ) = unpack(">LLLQ", header[11:]) + ) = unpack(">LLLd", header[11:]) # Assert the expected unpacked header value assert header.startswith(b"DATASOURCES") @@ -172,7 +172,7 @@ def pack_data(self, ds: MockDataSource): # # Initialize the expected checksum with the fixed time - expected_checksum = checksum(pack(">Q", fixed_time)) + expected_checksum = checksum(pack(">d", fixed_time)) # Loop over the datasources and assert the packed data testee.dest.seek(header_len) # sourcery skip: no-loop-in-tests From b3da1c9fcf94c4b47a6a740e9142f949b4e9249e Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 25 Oct 2024 14:05:11 +0100 Subject: [PATCH 15/86] CA-391651 rrd: Don't truncate timestamps when calculating values Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib/rrd.ml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 6f264627066..c9d646345cd 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -235,6 +235,11 @@ let get_times time timestep = let age = time -. Int64.to_float starttime in (starttime, age) +let get_float_time time timestep = + let timestep = Int64.to_float timestep in + let starttime = timestep *. (time /. timestep) in + starttime + (** Update the CDP value with a number (start_pdp_offset) of PDPs. *) let do_cfs rra start_pdp_offset pdps = Array.iter @@ -443,10 +448,9 @@ let ds_update rrd timestamp valuesandtransforms new_rrd = (i, nan) else let raw = - ds.ds_value - /. (Int64.to_float (occu_pdp_st --- proc_pdp_st) - -. ds.ds_unknown_sec - ) + let proc_pdp_st = get_float_time last_updated rrd.timestep in + let occu_pdp_st = get_float_time timestamp rrd.timestep in + ds.ds_value /. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec) in (* Apply the transform after the raw value has been calculated *) let raw = apply_transform_function transform raw in From 76e317c797dc6b8f4c64b931b48c67069ba8d81f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 18 Oct 2024 13:18:48 +0100 Subject: [PATCH 16/86] CA-391651: Update RRD tests to the new interfaces Signed-off-by: Andrii Sultanov --- ocaml/libs/xapi-rrd/lib_test/crowbar_tests.ml | 9 +- .../xapi-rrd/lib_test/test_data/flip_flop.xml | 2 +- ocaml/libs/xapi-rrd/lib_test/unit_tests.ml | 92 ++++++++----------- ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml | 38 +++++--- 4 files changed, 69 insertions(+), 72 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib_test/crowbar_tests.ml b/ocaml/libs/xapi-rrd/lib_test/crowbar_tests.ml index 6ff917eccfc..243b4d6a4e4 100644 --- a/ocaml/libs/xapi-rrd/lib_test/crowbar_tests.ml +++ b/ocaml/libs/xapi-rrd/lib_test/crowbar_tests.ml @@ -74,14 +74,13 @@ let ds = let rrd = Cb.(map [list1 int64; rra; ds]) (fun values rra ds -> let open Rrd in - let init_time = 0. in - - let rrd = rrd_create [|ds|] [|rra|] 5L init_time in + let rrd = rrd_create [|ds|] [|rra|] 5L 0. in List.iteri (fun i v -> - let t = 5. *. (init_time +. float_of_int i) in - ds_update rrd t [|VT_Int64 v|] [|Identity|] (i = 0) + let timestamp = 5. *. float_of_int i in + let arr = [|(0, {value= VT_Int64 v; transform= Identity})|] in + ds_update rrd timestamp arr (i = 0) ) values ; rrd diff --git a/ocaml/libs/xapi-rrd/lib_test/test_data/flip_flop.xml b/ocaml/libs/xapi-rrd/lib_test/test_data/flip_flop.xml index 8e368ed41b7..77e42106881 100644 --- a/ocaml/libs/xapi-rrd/lib_test/test_data/flip_flop.xml +++ b/ocaml/libs/xapi-rrd/lib_test/test_data/flip_flop.xml @@ -1,2 +1,2 @@ -00035100flip_flopDERIVEInfinity0Infinity00.00AVERAGE10.50000.00.00.00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.0000-3.00003.0000-5.00005.0000-7.00007.0000-9.00009.0000-11.000011.0000-13.000013.0000-15.000015.0000-17.000017.0000-19.000019.0000MIN10.50000.00.019.00000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.0000-3.0000-3.0000-5.0000-5.0000-7.0000-7.0000-9.0000-9.0000-11.0000-11.0000-13.0000-13.0000-15.0000-15.0000-17.0000-17.0000-19.0000-19.0000MAX10.50000.00.019.00000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.00001.00003.00003.00005.00005.00007.00007.00009.00009.000011.000011.000013.000013.000015.000015.000017.000017.000019.0000 +00035100flip_flopDERIVEInfinity0Infinity00.000.0AVERAGE10.50000.00.00.00NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.0000-3.00003.0000-5.00005.0000-7.00007.0000-9.00009.0000-11.000011.0000-13.000013.0000-15.000015.0000-17.000017.0000-19.000019.0000MIN10.50000.00.019.00000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.0000-3.0000-3.0000-5.0000-5.0000-7.0000-7.0000-9.0000-9.0000-11.0000-11.0000-13.0000-13.0000-15.0000-15.0000-17.0000-17.0000-19.0000-19.0000MAX10.50000.00.019.00000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.01.00001.00003.00003.00005.00005.00007.00007.00009.00009.000011.000011.000013.000013.000015.000015.000017.000017.000019.0000 diff --git a/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml b/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml index 089d8047468..f9cb5765b9f 100644 --- a/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml +++ b/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml @@ -15,10 +15,7 @@ let assert_ds_equal d1 d2 = compare_float __LOC__ d1.ds_value d2.ds_value ; compare_float __LOC__ d1.ds_unknown_sec d2.ds_unknown_sec -let assert_dss_equal d1s d2s = - let d1s = Array.to_list d1s in - let d2s = Array.to_list d2s in - List.iter2 assert_ds_equal d1s d2s +let assert_dss_equal d1s d2s = Array.iter2 assert_ds_equal d1s d2s let assert_cdp_prep_equal c1 c2 = compare_float __LOC__ c1.cdp_value c2.cdp_value ; @@ -37,15 +34,10 @@ let assert_rra_equal a1 a2 = Alcotest.(check int) __LOC__ a1.rra_row_cnt a2.rra_row_cnt ; Alcotest.(check int) __LOC__ a1.rra_pdp_cnt a2.rra_pdp_cnt ; compare_float __LOC__ a1.rra_xff a2.rra_xff ; - List.iter2 assert_cdp_prep_equal - (Array.to_list a1.rra_cdps) - (Array.to_list a2.rra_cdps) ; - List.iter2 assert_fring_equal - (Array.to_list a1.rra_data) - (Array.to_list a2.rra_data) + Array.iter2 assert_cdp_prep_equal a1.rra_cdps a2.rra_cdps ; + Array.iter2 assert_fring_equal a1.rra_data a2.rra_data -let assert_rras_equal a1s a2s = - List.iter2 assert_rra_equal (Array.to_list a1s) (Array.to_list a2s) +let assert_rras_equal a1s a2s = Array.iter2 assert_rra_equal a1s a2s let assert_rrds_equal r1 r2 = compare_float __LOC__ r1.last_updated r2.last_updated ; @@ -74,9 +66,7 @@ let test_ranges rrd () = let in_range_fring ds fring = in_range ds.ds_min ds.ds_max (fring_to_list fring) in - let in_range_rra dss rra = - List.iter2 in_range_fring dss (Array.to_list rra.rra_data) - in + let in_range_rra dss rra = Array.iter2 in_range_fring dss rra.rra_data in let range_is_not_empty ds = Alcotest.(check bool) (Printf.sprintf "min (%f) < max (%f); " ds.ds_min ds.ds_max) @@ -84,9 +74,7 @@ let test_ranges rrd () = in Array.iter range_is_not_empty rrd.rrd_dss ; - List.iter - (in_range_rra @@ Array.to_list rrd.rrd_dss) - (Array.to_list rrd.rrd_rras) + Array.iter (in_range_rra @@ rrd.rrd_dss) rrd.rrd_rras let test_marshall rrd ~json () = ignore @@ -94,13 +82,13 @@ let test_marshall rrd ~json () = Rrd.json_to_string rrd else let out = Buffer.create 2048 in - Rrd.xml_to_output rrd (Xmlm.make_output (`Buffer out)) ; + Rrd.xml_to_output true rrd (Xmlm.make_output (`Buffer out)) ; Buffer.contents out ) let test_marshall_unmarshall rrd () = let out = Buffer.create 2048 in - Rrd.xml_to_output rrd (Xmlm.make_output (`Buffer out)) ; + Rrd.xml_to_output true rrd (Xmlm.make_output (`Buffer out)) ; let contents = Buffer.contents out in let xml = Xmlm.make_input (`String (0, contents)) in let rrd' = Rrd.from_xml xml in @@ -124,21 +112,28 @@ let gauge_rrd = let rra2 = rra_create CF_Average 100 10 0.5 in let rra3 = rra_create CF_Average 100 100 0.5 in let rra4 = rra_create CF_Average 100 1000 0.5 in + let ts = 1000000000.0 in let ds = ds_create "foo" Gauge ~mrhb:10.0 (VT_Float 0.0) in let ds2 = ds_create "bar" Gauge ~mrhb:10.0 (VT_Float 0.0) in let ds3 = ds_create "baz" Gauge ~mrhb:10.0 (VT_Float 0.0) in let ds4 = ds_create "boo" Gauge ~mrhb:10.0 (VT_Float 0.0) in - let rrd = - rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L 1000000000.0 - in + let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in let id = Identity in for i = 1 to 100000 do - let t = 1000000000.0 +. (0.7 *. float_of_int i) in - let v1 = VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))) in - let v2 = VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))) in - let v3 = VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))) in - let v4 = VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))) in - ds_update rrd t [|v1; v2; v3; v4|] [|id; id; id; id|] false + let t = 1000000.0 +. (0.7 *. float_of_int i) in + let v1 = + (0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id}) + in + let v2 = + (1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id}) + in + let v3 = + (2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id}) + in + let v4 = + (3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id}) + in + ds_update rrd t [|v1; v2; v3; v4|] false done ; rrd @@ -150,66 +145,60 @@ let of_file filename = (* Used to generate flip_flop.xml for test_ca_325844, then gets edited manually to set min to 0 *) let _deserialize_verify_rrd = - let init_time = 0. in - let rra1 = rra_create CF_Average 100 1 0.5 in let rra2 = rra_create CF_Min 100 1 0.5 in let rra3 = rra_create CF_Max 100 1 0.5 in let ds = ds_create "flip_flop" Derive (VT_Int64 0L) in - let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L init_time in + let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L 0. in let id = Identity in for i = 1 to 100 do - let t = init_time +. float_of_int i in + let t = float_of_int i in let t64 = Int64.of_float t in - let v = VT_Int64 Int64.(mul t64 (mul (-1L) (rem t64 2L))) in - ds_update rrd t [|v|] [|id|] false + let value = VT_Int64 Int64.(mul t64 (mul (-1L) (rem t64 2L))) in + ds_update rrd t [|(0, {value; transform= id})|] false done ; rrd let ca_322008_rrd = - let init_time = 0. in - let rra1 = rra_create CF_Average 100 1 0.5 in let rra2 = rra_create CF_Min 100 1 0.5 in let rra3 = rra_create CF_Max 100 1 0.5 in let ds = ds_create "even or zero" Derive ~min:0. (VT_Int64 0L) in - let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L init_time in + let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L 0. in let id = Identity in for i = 1 to 100000 do - let t = init_time +. float_of_int i in + let t = float_of_int i in let t64 = Int64.of_float t in - let v = VT_Int64 (Int64.mul t64 (Int64.rem t64 2L)) in - ds_update rrd t [|v|] [|id|] false + let value = VT_Int64 (Int64.mul t64 (Int64.rem t64 2L)) in + ds_update rrd t [|(0, {value; transform= id})|] false done ; rrd let ca_329043_rrd_1 = - let init_time = 0. in - let rra1 = rra_create CF_Average 3 1 0.5 in let rra2 = rra_create CF_Min 3 1 0.5 in let rra3 = rra_create CF_Max 3 1 0.5 in let ds = ds_create "derive_with_min" ~min:0. ~max:1. Derive VT_Unknown in - let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L init_time in + let rrd = rrd_create [|ds|] [|rra1; rra2; rra3|] 5L 0. in let id = Identity in let time_value_of_i i = - let t = 5. *. (init_time +. float_of_int i) in + let t = 5. *. float_of_int i in if i = 1 then (t, VT_Int64 0L) else (t, VT_Int64 Int64.(of_float t)) in for i = 0 to 4 do - let t, v = time_value_of_i i in - ds_update rrd t [|v|] [|id|] (i = 0) + let t, value = time_value_of_i i in + ds_update rrd t [|(0, {value; transform= id})|] (i = 0) done ; rrd @@ -233,7 +222,7 @@ let create_rrd ?(rows = 2) values min max = List.iteri (fun i v -> let t = 5. *. (init_time +. float_of_int i) in - ds_update rrd t [|VT_Int64 v|] [|id; id; id; id|] (i = 0) + ds_update rrd t [|(0, {value= VT_Int64 v; transform= id})|] (i = 0) ) values ; rrd @@ -258,11 +247,8 @@ let test_ca_322008 () = let in_range_fring ds fring = in_range ds.ds_min rrd.last_updated (fring_to_list fring) in - let in_range_rra dss rra = - List.iter2 in_range_fring dss (Array.to_list rra.rra_data) - in - List.iter (in_range_rra @@ Array.to_list rrd.rrd_dss) - @@ Array.to_list rrd.rrd_rras + let in_range_rra dss rra = Array.iter2 in_range_fring dss rra.rra_data in + Array.iter (in_range_rra @@ rrd.rrd_dss) @@ rrd.rrd_rras let test_ca_325844 () = let rrd = of_file (Filename.concat "test_data" "flip_flop.xml") in diff --git a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml index 27d64870fe4..bb0f726b5eb 100644 --- a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml @@ -36,7 +36,18 @@ let check_datasources kind rdds expected_dss = | None -> () | Some actual_rdds -> - let actual_dss = dss_of_rrds actual_rdds in + let actual_dss = + dss_of_rrds actual_rdds + |> List.map (fun (name, dss) -> + ( name + , Rrd.StringMap.( + map (fun (_timestamp, ds) -> ds) dss + |> bindings + |> List.map snd + ) + ) + ) + in let expected_dss = List.fast_sort Stdlib.compare expected_dss in Alcotest.(check @@ list @@ pair string (list ds)) (Printf.sprintf "%s rrds are not expected" kind) @@ -45,15 +56,16 @@ let check_datasources kind rdds expected_dss = let host_rrds rrd_info = Option.bind rrd_info @@ fun rrd_info -> let h = Hashtbl.create 1 in - if rrd_info.Rrdd_shared.dss <> [] then + if rrd_info.Rrdd_shared.dss <> Rrd.StringMap.empty then Hashtbl.add h "host" rrd_info ; Some h -let update_rrds_test ~dss ~uuid_domids ~paused_vms ~expected_vm_rrds +let update_rrds_test ~timestamp ~dss ~uuid_domids ~paused_vms ~expected_vm_rrds ~expected_sr_rrds ~expected_host_dss = let test () = reset_rrdd_shared_state () ; - Rrdd_monitor.update_rrds 12345.0 (List.to_seq dss) uuid_domids paused_vms ; + Rrdd_monitor.update_rrds uuid_domids paused_vms + (List.to_seq [("update_rrds_test", timestamp, List.to_seq dss)]) ; check_datasources "VM" (Some Rrdd_shared.vm_rrds) expected_vm_rrds ; check_datasources "SR" (Some Rrdd_shared.sr_rrds) expected_sr_rrds ; check_datasources "Host" (host_rrds !Rrdd_shared.host_rrd) expected_host_dss @@ -64,35 +76,35 @@ let update_rrds = let open Rrd in [ ( "Null update" - , update_rrds_test ~dss:[] ~uuid_domids:[] ~paused_vms:[] + , update_rrds_test ~timestamp:0. ~dss:[] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Single host update" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(Host, ds_a)] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[("host", [ds_a])] ) ; ( "Multiple host updates" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(Host, ds_a); (Host, ds_b)] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[("host", [ds_a; ds_b])] ) ; ( "Single non-resident VM update" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a)] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple non-resident VM updates" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a)] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Single resident VM update" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a)] ~uuid_domids:[("a", 1)] ~paused_vms:[] @@ -100,7 +112,7 @@ let update_rrds = ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple resident VM updates" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "b", ds_b)] ~uuid_domids:[("a", 1); ("b", 1)] ~paused_vms:[] @@ -108,7 +120,7 @@ let update_rrds = ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple resident and non-resident VM updates" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "c", ds_a)] ~uuid_domids:[("a", 1); ("b", 1)] ~paused_vms:[] @@ -116,7 +128,7 @@ let update_rrds = ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple SR updates" - , update_rrds_test + , update_rrds_test ~timestamp:0. ~dss:[(SR "a", ds_a); (SR "b", ds_a); (SR "b", ds_b)] ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[("a", [ds_a]); ("b", [ds_a; ds_b])] From 1d7fe35eddcacf071e2690cf5e3cc3381f27b342 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 29 Oct 2024 15:05:21 +0000 Subject: [PATCH 17/86] CA-391651 - docs: Update RRD design pages Signed-off-by: Andrii Sultanov --- doc/content/design/plugin-protocol-v2.md | 9 +-- .../xcp-rrdd/design/plugin-protocol-v2.md | 57 +++++++++++++++---- 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/doc/content/design/plugin-protocol-v2.md b/doc/content/design/plugin-protocol-v2.md index 8c02b85c61f..e27f3bec887 100644 --- a/doc/content/design/plugin-protocol-v2.md +++ b/doc/content/design/plugin-protocol-v2.md @@ -20,7 +20,7 @@ DATASOURCES 000001e4 dba4bf7a84b6d11d565d19ef91f7906e { - "timestamp": 1339685573, + "timestamp": 1339685573.245, "data_sources": { "cpu-temp-cpu0": { "description": "Temperature of CPU 0", @@ -62,7 +62,7 @@ reported datasources. ### Example ``` { - "timestamp": 1339685573, + "timestamp": 1339685573.245, "data_sources": { "cpu-temp-cpu0": { "description": "Temperature of CPU 0", @@ -96,7 +96,7 @@ Protocol V2 |data checksum |32 |int32 |binary-encoded crc32 of the concatenation of the encoded timestamp and datasource values| |metadata checksum |32 |int32 |binary-encoded crc32 of the metadata string (see below) | |number of datasources|32 |int32 |only needed if the metadata has changed - otherwise RRDD can use a cached value | -|timestamp |64 |int64 |Unix epoch | +|timestamp |64 |double|Unix epoch | |datasource values |n * 64 |int64 \| double |n is the number of datasources exported by the plugin, type dependent on the setting in the metadata for value_type [int64\|float] | |metadata length |32 |int32 | | |metadata |(string length)*8|string| | @@ -193,6 +193,3 @@ This means that for a normal update, RRDD will only have to read the header plus the first (16 + 16 + 4 + 8 + 8*n) bytes of data, where n is the number of datasources exported by the plugin. If the metadata changes RRDD will have to read all the data (and parse the metadata). - -n.b. the timestamp reported by plugins is not currently used by RRDD - it uses -its own global timestamp. diff --git a/doc/content/xcp-rrdd/design/plugin-protocol-v2.md b/doc/content/xcp-rrdd/design/plugin-protocol-v2.md index c8581a2aad3..e27f3bec887 100644 --- a/doc/content/xcp-rrdd/design/plugin-protocol-v2.md +++ b/doc/content/xcp-rrdd/design/plugin-protocol-v2.md @@ -1,5 +1,6 @@ --- title: RRDD plugin protocol v2 +layout: default design_doc: true revision: 1 status: released (7.0) @@ -19,7 +20,7 @@ DATASOURCES 000001e4 dba4bf7a84b6d11d565d19ef91f7906e { - "timestamp": 1339685573, + "timestamp": 1339685573.245, "data_sources": { "cpu-temp-cpu0": { "description": "Temperature of CPU 0", @@ -58,9 +59,10 @@ This should always be present. * The JSON data itself, encoding the values and metadata associated with the reported datasources. +### Example ``` { - "timestamp": 1339685573, + "timestamp": 1339685573.245, "data_sources": { "cpu-temp-cpu0": { "description": "Temperature of CPU 0", @@ -90,19 +92,32 @@ Protocol V2 |value|bits|format|notes| |-----|----|------|-----| -|header string |(string length)*8|string|"Datasources" as in the V1 protocol | +|header string |(string length)*8|string|"DATASOURCES" as in the V1 protocol | |data checksum |32 |int32 |binary-encoded crc32 of the concatenation of the encoded timestamp and datasource values| |metadata checksum |32 |int32 |binary-encoded crc32 of the metadata string (see below) | |number of datasources|32 |int32 |only needed if the metadata has changed - otherwise RRDD can use a cached value | -|timestamp |64 |int64 |Unix epoch | -|datasource values |n * 64 |int64 |n is the number of datasources exported by the plugin | +|timestamp |64 |double|Unix epoch | +|datasource values |n * 64 |int64 \| double |n is the number of datasources exported by the plugin, type dependent on the setting in the metadata for value_type [int64\|float] | |metadata length |32 |int32 | | |metadata |(string length)*8|string| | -All integers are bigendian. The metadata will have the same JSON-based format as +All integers/double are bigendian. The metadata will have the same JSON-based format as in the V1 protocol, minus the timestamp and `value` key-value pair for each -datasource, for example: +datasource. +| field | values | notes | required | +|-------|--------|-------|----------| +|description|string|Description of the datasource|no| +|owner|host \| vm \| sr|The object to which the data relates|no, default host| +|value_type|int64 \| float|The type of the datasource|yes| +|type|absolute \| derive \| gauge|The type of measurement being sent. Absolute for counters which are reset on reading, derive stores the derivative of the recorded values (useful for metrics which continually increase like amount of data written since start), gauge for things like temperature|no, default absolute| +|default|true \| false|Whether the source is default enabled or not|no, default false| +|units||The units the data should be displayed in|no| +|min||The minimum value for the datasource|no, default -infinity| +|max||The maximum value for the datasource|no, default +infinity| + + +### Example ``` { "datasources": { @@ -125,6 +140,27 @@ datasource, for example: "units":"B", "min":"-inf", "max":"inf" + }, + { + "cpu-temp-cpu0": { + "description": "Temperature of CPU 0", + "owner":"host", + "value_type": "float", + "type": "absolute", + "default":"true", + "units": "degC", + "min":"-inf", + "max":"inf" + }, + "cpu-temp-cpu1": { + "description": "Temperature of CPU 1", + "owner":"host", + "value_type": "float", + "type": "absolute", + "default":"true", + "units": "degC", + "min":"-inf", + "max":"inf" } } } @@ -140,13 +176,13 @@ if header != expected_header: raise InvalidHeader() if data_checksum == last_data_checksum: raise NoUpdate() -if data_checksum != md5sum(encoded_timestamp_and_values): +if data_checksum != crc32(encoded_timestamp_and_values): raise InvalidChecksum() if metadata_checksum == last_metadata_checksum: for datasource, value in cached_datasources, values: update(datasource, value) else: - if metadata_checksum != md5sum(metadata): + if metadata_checksum != crc32(metadata): raise InvalidChecksum() cached_datasources = create_datasources(metadata) for datasource, value in cached_datasources, values: @@ -157,6 +193,3 @@ This means that for a normal update, RRDD will only have to read the header plus the first (16 + 16 + 4 + 8 + 8*n) bytes of data, where n is the number of datasources exported by the plugin. If the metadata changes RRDD will have to read all the data (and parse the metadata). - -n.b. the timestamp reported by plugins is not currently used by RRDD - it uses -its own global timestamp. From 801dd96cdbcdf0f1fc412859aed46005040b0c57 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Fri, 8 Nov 2024 16:27:26 +0100 Subject: [PATCH 18/86] Increase wait-init-complete timeout When a host starts, the systemd service xapi-wait-init-complete waiting on the creation of the xapi init cookie file may fail on timeout for a matter of seconds. This patch adds 1 minute (300 seconds total) to the timeout passed to the script xapi-wait-init-complete. Signed-off-by: Thierry Escande --- scripts/xapi-wait-init-complete.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/xapi-wait-init-complete.service b/scripts/xapi-wait-init-complete.service index 03cb7f8e9cd..19691c477e6 100644 --- a/scripts/xapi-wait-init-complete.service +++ b/scripts/xapi-wait-init-complete.service @@ -6,7 +6,7 @@ Before=xapi-init-complete.target [Service] Type=oneshot -ExecStart=@OPTDIR@/bin/xapi-wait-init-complete 240 +ExecStart=@OPTDIR@/bin/xapi-wait-init-complete 300 RemainAfterExit=yes [Install] From 20f4dccff3837992a3ab415f6b5a9d1c6f563139 Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Tue, 1 Oct 2024 07:31:40 +0100 Subject: [PATCH 19/86] CP-51694: Add testing of Go date deserialization Signed-off-by: Danilo Del Busso --- .github/workflows/generate-and-build-sdks.yml | 1 + .github/workflows/go-ci/action.yml | 5 + ocaml/sdk-gen/go/autogen/src/convert_test.go | 91 +++++++++++++++++++ ocaml/sdk-gen/go/autogen/src/export_test.go | 37 ++++++++ 4 files changed, 134 insertions(+) create mode 100644 ocaml/sdk-gen/go/autogen/src/convert_test.go create mode 100644 ocaml/sdk-gen/go/autogen/src/export_test.go diff --git a/.github/workflows/generate-and-build-sdks.yml b/.github/workflows/generate-and-build-sdks.yml index 8187c391508..6a5e2a57bf7 100644 --- a/.github/workflows/generate-and-build-sdks.yml +++ b/.github/workflows/generate-and-build-sdks.yml @@ -54,6 +54,7 @@ jobs: path: | _build/install/default/share/go/* !_build/install/default/share/go/dune + !_build/install/default/share/go/**/*_test.go - name: Store Java SDK source uses: actions/upload-artifact@v4 diff --git a/.github/workflows/go-ci/action.yml b/.github/workflows/go-ci/action.yml index c1b2df7f1e1..30bcbfee923 100644 --- a/.github/workflows/go-ci/action.yml +++ b/.github/workflows/go-ci/action.yml @@ -14,6 +14,11 @@ runs: working-directory: ${{ github.workspace }}/_build/install/default/share/go/src args: --config=${{ github.workspace }}/.golangci.yml + - name: Run Go Tests + shell: bash + working-directory: ${{ github.workspace }}/_build/install/default/share/go/src + run: go test -v + - name: Run CI for Go SDK shell: bash run: | diff --git a/ocaml/sdk-gen/go/autogen/src/convert_test.go b/ocaml/sdk-gen/go/autogen/src/convert_test.go new file mode 100644 index 00000000000..48dabc82898 --- /dev/null +++ b/ocaml/sdk-gen/go/autogen/src/convert_test.go @@ -0,0 +1,91 @@ +/* + * Copyright (c) Cloud Software Group, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1) Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2) Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package xenapi_test + +import ( + "testing" + "time" + + "go/xenapi" +) + +func TestDateDeseralization(t *testing.T) { + dates := map[string]time.Time{ + // no dashes, no colons + "20220101T123045": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "20220101T123045Z": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "20220101T123045+03": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), // +03 timezone + "20220101T123045+0300": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + "20220101T123045+03:00": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + + "20220101T123045.123": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "20220101T123045.123Z": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "20220101T123045.123+03": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + "20220101T123045.123+0300": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + "20220101T123045.123+03:00": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + + // no dashes, with colons + "20220101T12:30:45": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "20220101T12:30:45Z": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "20220101T12:30:45+03": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + "20220101T12:30:45+0300": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + "20220101T12:30:45+03:00": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + + "20220101T12:30:45.123": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "20220101T12:30:45.123Z": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "20220101T12:30:45.123+03": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + "20220101T12:30:45.123+0300": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + "20220101T12:30:45.123+03:00": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + + // dashes and colons + "2022-01-01T12:30:45": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "2022-01-01T12:30:45Z": time.Date(2022, 1, 1, 12, 30, 45, 0, time.UTC), + "2022-01-01T12:30:45+03": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + "2022-01-01T12:30:45+0300": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + "2022-01-01T12:30:45+03:00": time.Date(2022, 1, 1, 12, 30, 45, 0, time.FixedZone("", 3*60*60)), + + "2022-01-01T12:30:45.123": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "2022-01-01T12:30:45.123Z": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.UTC), + "2022-01-01T12:30:45.123+03": time.Date(2022, 1, 1, 12, 30, 45, 123000000, time.FixedZone("", 3*60*60)), + } + for input, expected := range dates { + t.Run("Input:"+input, func(t *testing.T) { + result, err := xenapi.DeserializeTime("", input) + if err == nil { + matching := expected.Equal(result) + if !matching { + t.Fatalf(`Failed to find match for '%s'`, input) + } + } else { + t.Fatalf(`Failed to find match for '%s'`, input) + } + }) + } +} diff --git a/ocaml/sdk-gen/go/autogen/src/export_test.go b/ocaml/sdk-gen/go/autogen/src/export_test.go new file mode 100644 index 00000000000..5dbdbeb47e3 --- /dev/null +++ b/ocaml/sdk-gen/go/autogen/src/export_test.go @@ -0,0 +1,37 @@ +/* + * Copyright (c) Cloud Software Group, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1) Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2) Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// This file contains exports of private functions specifically for testing purposes. +// It allows test code to access and verify the behavior of internal functions within the `xenapi` package. + +package xenapi + +// DeserializeTime is a private function that deserializes a time value. +// It is exported for testing to allow verification of its functionality. +var DeserializeTime = deserializeTime From ca5d3b64d7c48413ff4ab3119fe81cf3c5efb9db Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Tue, 12 Nov 2024 15:19:19 +0000 Subject: [PATCH 20/86] Set non-UTC timezone for date time unit test runners Signed-off-by: Danilo Del Busso --- .github/workflows/generate-and-build-sdks.yml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/.github/workflows/generate-and-build-sdks.yml b/.github/workflows/generate-and-build-sdks.yml index 6a5e2a57bf7..39645cf68bf 100644 --- a/.github/workflows/generate-and-build-sdks.yml +++ b/.github/workflows/generate-and-build-sdks.yml @@ -24,6 +24,14 @@ jobs: shell: bash run: opam exec -- make sdk + # sdk-ci runs some Go unit tests. + # This setting ensures that SDK date time + # tests are run on a machine that + # isn't using UTC + - name: Set Timezone to Tokyo for datetime tests + run: | + sudo timedatectl set-timezone Asia/Tokyo + - name: Run CI for SDKs uses: ./.github/workflows/sdk-ci @@ -111,6 +119,14 @@ jobs: java-version: '17' distribution: 'temurin' + # Java Tests are run at compile time. + # This setting ensures that SDK date time + # tests are run on a machine that + # isn't using UTC + - name: Set Timezone to Tokyo for datetime tests + run: | + sudo timedatectl set-timezone Asia/Tokyo + - name: Build Java SDK shell: bash run: | @@ -139,6 +155,14 @@ jobs: name: SDK_Source_CSharp path: source/ + # All tests builds and pipelines should + # work on other timezones. This setting ensures that + # SDK date time tests are run on a machine that + # isn't using UTC + - name: Set Timezone to Tokyo for datetime tests + shell: pwsh + run: Set-TimeZone -Id "Tokyo Standard Time" + - name: Test C# SDK shell: pwsh run: | From 5bcef81fe68a1218e62706c8dda9df511d01ee22 Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Wed, 13 Nov 2024 09:32:31 +0000 Subject: [PATCH 21/86] Fix parsing of timezone agnostic date strings in Java deserializer Signed-off-by: Danilo Del Busso --- .../xenapi/CustomDateDeserializer.java | 151 ++++++++++-------- 1 file changed, 85 insertions(+), 66 deletions(-) diff --git a/ocaml/sdk-gen/java/autogen/xen-api/src/main/java/com/xensource/xenapi/CustomDateDeserializer.java b/ocaml/sdk-gen/java/autogen/xen-api/src/main/java/com/xensource/xenapi/CustomDateDeserializer.java index 3ba135e0a40..63be5c1c458 100644 --- a/ocaml/sdk-gen/java/autogen/xen-api/src/main/java/com/xensource/xenapi/CustomDateDeserializer.java +++ b/ocaml/sdk-gen/java/autogen/xen-api/src/main/java/com/xensource/xenapi/CustomDateDeserializer.java @@ -49,7 +49,7 @@ public class CustomDateDeserializer extends StdDeserializer { /** * Array of {@link SimpleDateFormat} objects representing the date formats * used in xen-api responses. - * + *
* RFC-3339 date formats can be returned in either Zulu or time zone agnostic. * This list is not an exhaustive list of formats supported by RFC-3339, rather * a set of formats that will enable the deserialization of xen-api dates. @@ -57,17 +57,24 @@ public class CustomDateDeserializer extends StdDeserializer { * to this list, please ensure the order is kept. */ private static final SimpleDateFormat[] dateFormatsUtc = { - // Most commonly returned formats - new SimpleDateFormat("yyyyMMdd'T'HHmmss'Z'"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"), - new SimpleDateFormat("ss.SSS"), - - // Other - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss'Z'"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSS'Z'"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSS'Z'"), - + // Most commonly returned formats + new SimpleDateFormat("yyyyMMdd'T'HHmmss'Z'"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"), + new SimpleDateFormat("ss.SSS"), + + // Other + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss'Z'"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSS'Z'"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSS'Z'"), + + // Formats without timezone info default to UTC in xapi + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSS"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSS"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"), }; /** @@ -78,61 +85,55 @@ public class CustomDateDeserializer extends StdDeserializer { * to this list, please ensure the order is kept. */ private static final SimpleDateFormat[] dateFormatsLocal = { - // no dashes, no colons - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZZZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSXXX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSXX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSS"), - - new SimpleDateFormat("yyyyMMdd'T'HHmmssZZZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmssZZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmssZ"), - new SimpleDateFormat("yyyyMMdd'T'HHmmssXXX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmssXX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmssX"), - new SimpleDateFormat("yyyyMMdd'T'HHmmss"), - - // no dashes, with colons - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZZZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSXXX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSXX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSS"), - - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZZZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZ"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssXXX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssXX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssX"), - new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss"), - - // dashes and colons - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"), - - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZZZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX"), - new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"), + // no dashes, no colons + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZZZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSXXX"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSXX"), + new SimpleDateFormat("yyyyMMdd'T'HHmmss.SSSX"), + + new SimpleDateFormat("yyyyMMdd'T'HHmmssZZZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmssZZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmssZ"), + new SimpleDateFormat("yyyyMMdd'T'HHmmssXXX"), + new SimpleDateFormat("yyyyMMdd'T'HHmmssXX"), + new SimpleDateFormat("yyyyMMdd'T'HHmmssX"), + + // no dashes, with colons + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZZZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSXXX"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSXX"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ss.SSSX"), + + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZZZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssZ"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssXXX"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssXX"), + new SimpleDateFormat("yyyyMMdd'T'HH:mm:ssX"), + + // dashes and colons + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXX"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSX"), + + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZZZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXX"), + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX"), }; /** * Constructs a {@link CustomDateDeserializer} instance. - */ + */ public CustomDateDeserializer() { this(null); } @@ -163,9 +164,13 @@ public CustomDateDeserializer(Class t) { @Override public Date deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException { var text = jsonParser.getText(); + Date localDate = null; + Date utcDate = null; + for (SimpleDateFormat formatter : dateFormatsUtc) { try { - return formatter.parse(text); + utcDate = formatter.parse(text); + break; } catch (ParseException e) { // ignore } @@ -173,12 +178,26 @@ public Date deserialize(JsonParser jsonParser, DeserializationContext deserializ for (SimpleDateFormat formatter : dateFormatsLocal) { try { - return formatter.parse(text); + localDate = formatter.parse(text); + break; } catch (ParseException e) { // ignore } } - throw new IOException("Failed to deserialize a Date value."); + // Some dates such as 20220101T12:30:45.123+03:00 will match both with a UTC + // and local date format. In that case, we pick the date returned by the + // local formatter, as it's more precise. + // This allows us to match strings with no timezone information (such as 20220101T12:30:45.123) + // as UTC, while correctly parsing more precise date representations + if (localDate != null && utcDate != null) { + return localDate; // Prioritize local format if both match + } else if (localDate != null) { + return localDate; + } else if (utcDate != null) { + return utcDate; + } else { + throw new IOException("Failed to deserialize a Date value."); + } } } From b81d11ef4457b919f8f99ecbf2ad631fee3f2a35 Mon Sep 17 00:00:00 2001 From: Danilo Del Busso Date: Wed, 13 Nov 2024 18:28:40 +0900 Subject: [PATCH 22/86] Ensure C# date tests work when running under any timezone Without the change, the tests only pass when running on a runner with UTC as a timezone Signed-off-by: Danilo Del Busso --- .../autogen/XenServerTest/DateTimeTests.cs | 79 +++++++++++-------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs b/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs index 0bda9474eb0..981204df714 100644 --- a/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs +++ b/ocaml/sdk-gen/csharp/autogen/XenServerTest/DateTimeTests.cs @@ -51,16 +51,25 @@ public class DateTimeTests [TestMethod] [DynamicData(nameof(GetTestData), DynamicDataSourceType.Method, DynamicDataDisplayName = nameof(GetCustomDynamicDataDisplayName))] - public void TestXenDateTimeConverter(string dateString, DateTime expectedDateTime) + public void TestXenDateTimeConverter(string dateString, DateTime expectedDateTime, DateTimeKind expectedDateTimeKind) { try { var jsonDateString = "{ \"Date\" : \"" + dateString + "\" }"; - var actualDateTime = JsonConvert.DeserializeObject(jsonDateString, _settings); + var actualDateTimeObject = JsonConvert.DeserializeObject(jsonDateString, _settings); - Assert.IsNotNull(actualDateTime, $"Failed to convert '{dateString}'"); - Assert.IsTrue(expectedDateTime.Equals(actualDateTime.Date), - $"Conversion of '{dateString}' resulted in an incorrect DateTime value"); + + Assert.IsNotNull(actualDateTimeObject?.Date, $"Failed to convert '{dateString}'"); + var actualDateTime = actualDateTimeObject.Date; + Assert.IsTrue(expectedDateTimeKind.Equals(actualDateTime.Kind)); + + // expected times are in UTC to ensure these tests do + // not fail when running in other timezones + if (expectedDateTimeKind == DateTimeKind.Local) + actualDateTime = actualDateTime.ToUniversalTime(); + + Assert.IsTrue(expectedDateTime.Equals(actualDateTime), + $"Conversion of '{dateString}' resulted in an incorrect DateTime value. Expected '{expectedDateTime} but instead received '{actualDateTime}'"); } catch (Exception ex) { @@ -78,62 +87,62 @@ public static string GetCustomDynamicDataDisplayName(MethodInfo methodInfo, obje public static IEnumerable GetTestData() { // no dashes, no colons - yield return new object[] { "20220101T123045", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; - yield return new object[] { "20220101T123045Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; - yield return new object[] { "20220101T123045+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; - yield return new object[] { "20220101T123045+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; - yield return new object[] { "20220101T123045+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + yield return new object[] { "20220101T123045", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Unspecified }; + yield return new object[] { "20220101T123045Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Utc }; + yield return new object[] { "20220101T123045+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; + yield return new object[] { "20220101T123045+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; + yield return new object[] { "20220101T123045+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T123045.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + { "20220101T123045.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Unspecified }; yield return new object[] - { "20220101T123045.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + { "20220101T123045.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Utc }; yield return new object[] - { "20220101T123045.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T123045.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T123045.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T123045.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T123045.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T123045.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; // no dashes, with colons yield return new object[] - { "20220101T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; - yield return new object[] { "20220101T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; - yield return new object[] { "20220101T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; - yield return new object[] { "20220101T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + { "20220101T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Unspecified }; + yield return new object[] { "20220101T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Utc }; + yield return new object[] { "20220101T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; + yield return new object[] { "20220101T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + { "20220101T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + { "20220101T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Unspecified }; yield return new object[] - { "20220101T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + { "20220101T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Utc }; yield return new object[] - { "20220101T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "20220101T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "20220101T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; // dashes and colons yield return new object[] - { "2022-01-01T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Unspecified) }; - yield return new object[] { "2022-01-01T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc) }; - yield return new object[] { "2022-01-01T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + { "2022-01-01T12:30:45", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Unspecified }; + yield return new object[] { "2022-01-01T12:30:45Z", new DateTime(2022, 1, 1, 12, 30, 45, DateTimeKind.Utc), DateTimeKind.Utc }; + yield return new object[] { "2022-01-01T12:30:45+03", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "2022-01-01T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + { "2022-01-01T12:30:45+0300", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "2022-01-01T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Local) }; + { "2022-01-01T12:30:45+03:00", new DateTime(2022, 1, 1, 9, 30, 45, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "2022-01-01T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Unspecified) }; + { "2022-01-01T12:30:45.123", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Unspecified }; yield return new object[] - { "2022-01-01T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc) }; + { "2022-01-01T12:30:45.123Z", new DateTime(2022, 1, 1, 12, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Utc }; yield return new object[] - { "2022-01-01T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "2022-01-01T12:30:45.123+03", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "2022-01-01T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "2022-01-01T12:30:45.123+0300", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; yield return new object[] - { "2022-01-01T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Local) }; + { "2022-01-01T12:30:45.123+03:00", new DateTime(2022, 1, 1, 9, 30, 45, 123, DateTimeKind.Utc), DateTimeKind.Local }; } } From 630aeadb6730dd1b5933d550f696b8584e3a4d31 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 18 Nov 2024 17:39:10 +0000 Subject: [PATCH 23/86] CA-402326: Fetch SM records from the pool to avoid race Previously the SM feature check was done in two parts, fetch all the SM ref from the coordinator, and then fetch their information such as types and features from the coordinator basedon the refs. This can cause race conditions, where the previously fetched refs might have been deleted when we fetch the SM features. The deletion might happen due to the way SM registeration works for shared SRs such as GFS2, where each `PBD.plug` will trigger a deregister of existing SM and re-register (which will delete existing SMs), and create another (very likely) identical SM. To avoid this race, instead of fetch SM refs and their features separately, do this in one go so we get a consistent snapshot of the db state. Also add a bit more debugging messages. Signed-off-by: Vincent Liu --- ocaml/xapi/xapi_pool.ml | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index eb716ce766e..3a7dee78735 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -840,6 +840,10 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = ) in let assert_sm_features_compatible () = + debug + "%s Checking whether SM features on the joining host is compatible with \ + the pool" + __FUNCTION__ ; (* We consider the case where the existing pool has FOO/m, and the candidate having FOO/n, where n >= m, to be compatible. Not vice versa. *) let features_compatible coor_features candidate_features = @@ -847,15 +851,16 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = the other way around. *) Smint.compat_features coor_features candidate_features = coor_features in - - let master_sms = Client.SM.get_all ~rpc ~session_id in + let pool_sms = Client.SM.get_all_records ~rpc ~session_id in List.iter - (fun sm -> - let master_sm_type = Client.SM.get_type ~rpc ~session_id ~self:sm in + (fun (sm_ref, sm_rec) -> + let pool_sm_type = sm_rec.API.sM_type in + debug "%s Checking SM %s of name %s in the pool" __FUNCTION__ + (Ref.string_of sm_ref) sm_rec.sM_name_label ; let candidate_sm_ref, candidate_sm_rec = match Db.SM.get_records_where ~__context - ~expr:(Eq (Field "type", Literal master_sm_type)) + ~expr:(Eq (Field "type", Literal pool_sm_type)) with | [(sm_ref, sm_rec)] -> (sm_ref, sm_rec) @@ -864,25 +869,24 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = Api_errors.( Server_error ( pool_joining_sm_features_incompatible - , [Ref.string_of sm; ""] + , [Ref.string_of sm_ref; ""] ) ) in - let coor_sm_features = - Client.SM.get_features ~rpc ~session_id ~self:sm - in + let pool_sm_features = sm_rec.sM_features in + let candidate_sm_features = candidate_sm_rec.API.sM_features in - if not (features_compatible coor_sm_features candidate_sm_features) then + if not (features_compatible pool_sm_features candidate_sm_features) then raise Api_errors.( Server_error ( pool_joining_sm_features_incompatible - , [Ref.string_of sm; Ref.string_of candidate_sm_ref] + , [Ref.string_of sm_ref; Ref.string_of candidate_sm_ref] ) ) ) - master_sms + pool_sms in (* call pre-join asserts *) From 21d6773291fce4e3205dcb8fea18a2fc87c31c60 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Wed, 20 Nov 2024 09:36:20 +0000 Subject: [PATCH 24/86] Minimize xenstore accesses during domid-to-uuid lookups Bake in assumptions that have been constant ever since xenstore was created: getdomainpath always returns "/local/domain/", /local/domain/domid/vm returns "/vm/", so there's no need to look up that path to get the uuid again This reduces the number of xenstore accesses from 3 to 1 with no functional change. As suggested in: https://github.com/xapi-project/xen-api/pull/6068#pullrequestreview-2446644349 Signed-off-by: Andrii Sultanov --- ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml | 13 +++++++++--- ocaml/xenopsd/xc/xenops_helpers.ml | 20 +++++++++++++------ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml index c7dab55ac94..bd31674a03a 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml @@ -138,9 +138,16 @@ let generate_netdev_dss () = let uuid_of_domid domid = try Xenstore.with_xs (fun xs -> - let vm = xs.Xenstore.Xs.getdomainpath domid ^ "/vm" in - let vm_dir = xs.Xenstore.Xs.read vm in - xs.Xenstore.Xs.read (vm_dir ^ "/uuid") + let vm_uuid_path = + Printf.sprintf "/local/domain/%d/vm" domid + |> xs.Xenstore.Xs.read + |> String.split_on_char '/' + in + match vm_uuid_path with + | [_; _; uuid] -> + uuid + | _ -> + raise (Invalid_argument "Incorrect xenstore node") ) with e -> fail "Failed to find uuid corresponding to domid: %d (%s)" domid diff --git a/ocaml/xenopsd/xc/xenops_helpers.ml b/ocaml/xenopsd/xc/xenops_helpers.ml index 602ef72d40f..383219dd602 100644 --- a/ocaml/xenopsd/xc/xenops_helpers.ml +++ b/ocaml/xenopsd/xc/xenops_helpers.ml @@ -28,12 +28,20 @@ exception Domain_not_found let uuid_of_domid ~xs domid = try - let vm = xs.Xs.getdomainpath domid ^ "/vm" in - let vm_dir = xs.Xs.read vm in - match Uuidx.of_string (xs.Xs.read (vm_dir ^ "/uuid")) with - | Some uuid -> - uuid - | None -> + let vm_uuid_path = + Printf.sprintf "/local/domain/%d/vm" domid + |> xs.Xs.read + |> String.split_on_char '/' + in + match vm_uuid_path with + | [_; _; uuid] -> ( + match Uuidx.of_string uuid with + | Some uuid -> + uuid + | None -> + raise Domain_not_found + ) + | _ -> raise Domain_not_found with _ -> raise Domain_not_found From 228071ae9896941bec3265f9fd0e45a38e6b9fa9 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Mon, 18 Nov 2024 15:47:51 +0000 Subject: [PATCH 25/86] CP-52524 - dbsync_slave: stop calculating boot time ourselves Completes the 15+ years old TODO, at the expense of removing an ultimate example of a "not invented here" attitude. Signed-off-by: Andrii Sultanov --- ocaml/xapi/dbsync_slave.ml | 27 +++++++++++++++------------ quality-gate.sh | 2 +- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 942d3081071..80793c06838 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -63,21 +63,24 @@ let create_localhost ~__context info = in () -(* TODO cat /proc/stat for btime ? *) let get_start_time () = try - debug "Calculating boot time..." ; - let now = Unix.time () in - let uptime = Unixext.string_of_file "/proc/uptime" in - let uptime = String.trim uptime in - let uptime = String.split ' ' uptime in - let uptime = List.hd uptime in - let uptime = float_of_string uptime in - let boot_time = Date.of_unix_time (now -. uptime) in - debug " system booted at %s" (Date.to_rfc3339 boot_time) ; - boot_time + match + Unixext.string_of_file "/proc/stat" + |> String.trim + |> String.split '\n' + |> List.find (fun s -> String.starts_with ~prefix:"btime" s) + |> String.split ' ' + with + | _ :: btime :: _ -> + let boot_time = Date.of_unix_time (float_of_string btime) in + debug "%s: system booted at %s" __FUNCTION__ (Date.to_rfc3339 boot_time) ; + boot_time + | _ -> + failwith "Couldn't parse /proc/stat" with e -> - debug "Calculating boot time failed with '%s'" (ExnHelper.string_of_exn e) ; + debug "%s: Calculating boot time failed with '%s'" __FUNCTION__ + (ExnHelper.string_of_exn e) ; Date.epoch (* not sufficient just to fill in this data on create time [Xen caps may change if VT enabled in BIOS etc.] *) diff --git a/quality-gate.sh b/quality-gate.sh index db8444b53e0..b1d170041f1 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=294 + N=293 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" From f0c9b4ce8c22302a213a64ef20d75ed78b7a06cf Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 19 Nov 2024 09:45:05 +0000 Subject: [PATCH 26/86] CP-52524: Generate an alert when various host kernel taints are set Issue an alert about a broken host kernel if bits 4, 5, 7, 9, or 14 are set in /proc/sys/kernel/tainted, indicating some kind of error was encountered and the future behaviour of the kernel might not be predictable or safe anymore (though it generally should reasonably recover). Only one alert per tainted bit per boot can be present (more than one can be issued, if the user dismissed the alerts and restarted the toolstack). Distinguish between Major (4,5,7 - these are all things that might cause a host crash, but are unlikely to corrupt whatever data has been written out) and Warning (9, 14 - might be a concern and could be raised to Support but usually are not severe enough to crash the host) levels of errors as suggested by the Foundations team. This should serve as an indicator during issue investigation to look for the cause of the taint. Signed-off-by: Andrii Sultanov --- ocaml/xapi-consts/api_messages.ml | 6 ++ ocaml/xapi/xapi_host.ml | 75 ++++++++++++++++++++++ ocaml/xapi/xapi_host.mli | 2 + ocaml/xapi/xapi_periodic_scheduler_init.ml | 7 ++ 4 files changed, 90 insertions(+) diff --git a/ocaml/xapi-consts/api_messages.ml b/ocaml/xapi-consts/api_messages.ml index ff436199a76..d5215415c15 100644 --- a/ocaml/xapi-consts/api_messages.ml +++ b/ocaml/xapi-consts/api_messages.ml @@ -360,6 +360,12 @@ let host_internal_certificate_expiring_07 = let failed_login_attempts = addMessage "FAILED_LOGIN_ATTEMPTS" 3L +let kernel_is_broken which = + addMessage ("HOST_KERNEL_ENCOUNTERED_ERROR_" ^ which) 2L + +let kernel_is_broken_warning which = + addMessage ("HOST_KERNEL_ENCOUNTERED_WARNING_" ^ which) 3L + let tls_verification_emergency_disabled = addMessage "TLS_VERIFICATION_EMERGENCY_DISABLED" 3L diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 9f84923fe2e..377d97d22fe 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -2923,6 +2923,81 @@ let emergency_reenable_tls_verification ~__context = Helpers.touch_file Constants.verify_certificates_path ; Db.Host.set_tls_verification_enabled ~__context ~self ~value:true +(** Issue an alert if /proc/sys/kernel/tainted indicates particular kernel + errors. Will send only one alert per reboot *) +let alert_if_kernel_broken = + let __context = Context.make "host_kernel_error_alert_startup_check" in + (* Only add an alert if + (a) an alert wasn't already issued for the currently booted kernel *) + let possible_alerts = + ref + ( lazy + ((* Check all the alerts since last reboot. Only done once at toolstack + startup, we track if alerts have been issued afterwards internally *) + let self = Helpers.get_localhost ~__context in + let boot_time = + Db.Host.get_other_config ~__context ~self + |> List.assoc "boot_time" + |> float_of_string + in + let all_alerts = + [ + (* processor reported a Machine Check Exception (MCE) *) + (4, Api_messages.kernel_is_broken "MCE") + ; (* bad page referenced or some unexpected page flags *) + (5, Api_messages.kernel_is_broken "BAD_PAGE") + ; (* kernel died recently, i.e. there was an OOPS or BUG *) + (7, Api_messages.kernel_is_broken "BUG") + ; (* kernel issued warning *) + (9, Api_messages.kernel_is_broken_warning "WARN") + ; (* soft lockup occurred *) + (14, Api_messages.kernel_is_broken_warning "SOFT_LOCKUP") + ] + in + all_alerts + |> List.filter (fun (_, alert_message) -> + let alert_already_issued_for_this_boot = + Helpers.call_api_functions ~__context (fun rpc session_id -> + Client.Client.Message.get_all_records ~rpc ~session_id + |> List.exists (fun (_, record) -> + record.API.message_name = fst alert_message + && API.Date.is_later + ~than:(API.Date.of_unix_time boot_time) + record.API.message_timestamp + ) + ) + in + alert_already_issued_for_this_boot + ) + ) + ) + in + (* and (b) if we found a problem *) + fun ~__context -> + let self = Helpers.get_localhost ~__context in + possible_alerts := + Lazy.from_val + (Lazy.force !possible_alerts + |> List.filter (fun (alert_bit, alert_message) -> + let is_bit_tainted = + Unixext.string_of_file "/proc/sys/kernel/tainted" + |> int_of_string + in + let is_bit_tainted = (is_bit_tainted lsr alert_bit) land 1 = 1 in + if is_bit_tainted then ( + let host = Db.Host.get_name_label ~__context ~self in + let body = + Printf.sprintf "%s" host + in + Xapi_alert.add ~msg:alert_message ~cls:`Host + ~obj_uuid:(Db.Host.get_uuid ~__context ~self) + ~body ; + false (* alert issued, remove from the list *) + ) else + true (* keep in the list, alert can be issued later *) + ) + ) + let alert_if_tls_verification_was_emergency_disabled ~__context = let tls_verification_enabled_locally = Stunnel_client.get_verify_by_default () diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index c303ee69597..ec2e3b0fcfd 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -539,6 +539,8 @@ val set_numa_affinity_policy : val emergency_disable_tls_verification : __context:Context.t -> unit +val alert_if_kernel_broken : __context:Context.t -> unit + val alert_if_tls_verification_was_emergency_disabled : __context:Context.t -> unit diff --git a/ocaml/xapi/xapi_periodic_scheduler_init.ml b/ocaml/xapi/xapi_periodic_scheduler_init.ml index 5b49ebcde50..d74b349e240 100644 --- a/ocaml/xapi/xapi_periodic_scheduler_init.ml +++ b/ocaml/xapi/xapi_periodic_scheduler_init.ml @@ -106,6 +106,13 @@ let register ~__context = (Xapi_periodic_scheduler.Periodic freq) freq Xapi_pool.alert_failed_login_attempts ) ; + Xapi_periodic_scheduler.add_to_queue "broken_kernel" + (Xapi_periodic_scheduler.Periodic 600.) 600. (fun () -> + Server_helpers.exec_with_new_task + "Periodic alert if the running kernel is broken in some serious way." + (fun __context -> Xapi_host.alert_if_kernel_broken ~__context + ) + ) ; Xapi_periodic_scheduler.add_to_queue "Period alert if TLS verification emergency disabled" (Xapi_periodic_scheduler.Periodic 600.) 600. (fun () -> From aaabb6cad3a01a5418344d95953e176ffd7d9f8c Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 22 Nov 2024 09:14:00 +0000 Subject: [PATCH 27/86] xenopsd: Optimize lazy evaluation The manual notes that `Lazy.from_fun` "should only be used if the function f is already defined. In particular it is always less efficient to write `from_fun (fun () -> expr)` than `lazy expr`. So, replace `Lazy.from_fun` with `lazy` in this particular case Compare the lambda dump for `lazy (fun () -> [| 1;2;3 |] |> Array.map (fun x -> x+1))`: ``` (seq (let (l/269 = (function param/319[int] (apply (field_imm 12 (global Stdlib__Array!)) (function x/318[int] : int (+ x/318 1)) (makearray[int] 1 2 3)))) (setfield_ptr(root-init) 0 (global Main!) l/269)) 0) ``` with the lambda dump of the `let x = Lazy.from_fun (fun () -> [| 1;2;3 |] |> Array.map (fun x -> x+1))`: ``` (seq (let (x/269 = (apply (field_imm 5 (global Stdlib__Lazy!)) (function param/332[int] (apply (field_imm 12 (global Stdlib__Array!)) (function x/331[int] : int (+ x/331 1)) (makearray[int] 1 2 3))))) (setfield_ptr(root-init) 0 (global Main!) x/269)) 0) ``` See: https://patricoferris.github.io/js_of_ocamlopt/#code=bGV0IGwgPSBsYXp5IChmdW4gKCkgLT4gW3wgMTsyOzMgfF0gfD4gQXJyYXkubWFwIChmdW4geCAtPiB4KzEpKQoKbGV0IHggPSBMYXp5LmZyb21fZnVuIChmdW4gKCkgLT4gW3wgMTsyOzMgfF0gfD4gQXJyYXkubWFwIChmdW4geCAtPiB4KzEpKQ%3D%3D Signed-off-by: Andrii Sultanov --- ocaml/xenopsd/xc/domain.ml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 7b31011aabe..d33fc482e5f 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -835,12 +835,12 @@ let create_channels ~xc uuid domid = let numa_hierarchy = let open Xenctrlext in let open Topology in - Lazy.from_fun (fun () -> - let xcext = get_handle () in - let distances = (numainfo xcext).distances in - let cpu_to_node = cputopoinfo xcext |> Array.map (fun t -> t.node) in - NUMA.make ~distances ~cpu_to_node - ) + lazy + (let xcext = get_handle () in + let distances = (numainfo xcext).distances in + let cpu_to_node = cputopoinfo xcext |> Array.map (fun t -> t.node) in + NUMA.make ~distances ~cpu_to_node + ) let numa_mutex = Mutex.create () From 860843f5175e1edaeb3d7897ce931f6edd62c457 Mon Sep 17 00:00:00 2001 From: Colin James Date: Fri, 22 Nov 2024 09:26:09 +0000 Subject: [PATCH 28/86] CA-402654: Partially revert 3e2e970af A module binding appeared to be unused but was being evaluated for its effect alone. We reintroduce it here and don't bind a name. Signed-off-by: Colin James --- ocaml/xapi/rbac.ml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ocaml/xapi/rbac.ml b/ocaml/xapi/rbac.ml index 2b311a7e56d..2a8555cc9a9 100644 --- a/ocaml/xapi/rbac.ml +++ b/ocaml/xapi/rbac.ml @@ -243,6 +243,12 @@ let assert_permission_name ~__context ~permission = let assert_permission ~__context ~permission = assert_permission_name ~__context ~permission:permission.role_name_label +(* Populates assert_permission_fn on behalf of TaskHelper to + avoid a dependency cycle. *) +let () = + if !TaskHelper.rbac_assert_permission_fn = None then + TaskHelper.rbac_assert_permission_fn := Some assert_permission + let has_permission_name ~__context ~permission = let session_id = get_session_of_context ~__context ~permission in is_access_allowed ~__context ~session_id ~permission From 40834941413714b4add385844fc51f1f77feb1ba Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 21 Nov 2024 17:59:16 +0000 Subject: [PATCH 29/86] CA-402263, xapi_sr_operatrions: don't include all API storage operations in all_ops Otherwise the allowed_operations field in SRs can contain newly added operations, which can break cross-pool migrations when using `xe` Partially reverts 723a4983ab1555b24222b417350cddebbf6dc1f5 Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_sr_operations.ml | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/xapi_sr_operations.ml b/ocaml/xapi/xapi_sr_operations.ml index 56f4c466ce6..eef09a7d9eb 100644 --- a/ocaml/xapi/xapi_sr_operations.ml +++ b/ocaml/xapi/xapi_sr_operations.ml @@ -26,7 +26,32 @@ open Client open Record_util -let all_ops = API.storage_operations__all +(* This is a subset of the API enumeration. Not all values can be included + because older versions which don't have them are unable to migrate VMs to the + the versions that have new fields in allowed operations *) +let all_ops : API.storage_operations_set = + [ + `scan + ; `destroy + ; `forget + ; `plug + ; `unplug + ; `vdi_create + ; `vdi_destroy + ; `vdi_resize + ; `vdi_clone + ; `vdi_snapshot + ; `vdi_mirror + ; `vdi_enable_cbt + ; `vdi_disable_cbt + ; `vdi_data_destroy + ; `vdi_list_changed_blocks + ; `vdi_set_on_boot + ; `vdi_introduce + ; `update + ; `pbd_create + ; `pbd_destroy + ] (* This list comes from https://github.com/xenserver/xen-api/blob/tampa-bugfix/ocaml/xapi/xapi_sr_operations.ml#L36-L38 *) let all_rpu_ops : API.storage_operations_set = From 3f59ae15f24aaa27b3e9313bcfda856215e43262 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Mon, 25 Nov 2024 12:00:00 +0100 Subject: [PATCH 30/86] NUMA docs: Fix typos and extend the intro for the best-effort mode Signed-off-by: Bernhard Kaindl --- doc/content/toolstack/features/NUMA/index.md | 74 +++++++++++++------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/doc/content/toolstack/features/NUMA/index.md b/doc/content/toolstack/features/NUMA/index.md index ee7f52c98fe..5f4d3fe41e8 100644 --- a/doc/content/toolstack/features/NUMA/index.md +++ b/doc/content/toolstack/features/NUMA/index.md @@ -49,7 +49,7 @@ There is also I/O NUMA where a cost is similarly associated to where a PCIe is p NUMA does have advantages though: if each node accesses only its local memory, then each node can independently achieve maximum throughput. -For best performance we should: +For best performance, we should: - minimize the amount of interconnect bandwidth we are using - run code that accesses memory allocated on the closest NUMA node - maximize the number of NUMA nodes that we use in the system as a whole @@ -62,39 +62,59 @@ The Xen scheduler supports 2 kinds of constraints: * hard pinning: a vCPU may only run on the specified set of pCPUs and nowhere else * soft pinning: a vCPU is *preferably* run on the specified set of pCPUs, but if they are all busy then it may run elsewhere -The former is useful if you want strict separation, but it can potentially leave part of the system idle while another part is bottlenecked with lots of vCPUs all competing for the same limited set of pCPUs. +Hard pinning can be used to partition the system. But, it can potentially leave part of the system idle while another part is bottlenecked by many vCPUs competing for the same limited set of pCPUs. -Xen does not migrate workloads between NUMA nodes on its own (the Linux kernel does), although it is possible to achieve a similar effect with explicit migration. -However migration introduces additional delays and is best avoided for entire VMs. +Xen does not migrate workloads between NUMA nodes on its own (the Linux kernel can). Although, it is possible to achieve a similar effect with explicit migration. +However, migration introduces additional delays and is best avoided for entire VMs. -The latter (soft pinning) is preferred: running a workload now, even on a potentially suboptimal pCPU (higher NUMA latency) is still better than not running it at all and waiting until a pCPU is freed up. +Therefore, soft pinning is preferred: Running on a potentially suboptimal pCPU that uses remote memory could still be better than not running it at all until a pCPU is free to run it. -Xen will also allocate memory for the VM according to the vCPU (soft) pinning: if the vCPUs are pinned only to NUMA nodes A and B, then it will allocate the VM's memory from NUMA nodes A and B (in a round-robin way, resulting in interleaving). +Xen will also allocate memory for the VM according to the vCPU (soft) pinning: If the vCPUs are pinned to NUMA nodes A and B, Xen allocates memory from NUMA nodes A and B in a round-robin way, resulting in interleaving. -By default (no pinning) it will interleave memory from all NUMA nodes, which provides average performance, but individual tasks' performance may be significantly higher or lower depending on which NUMA node the application may have "landed" on. -Furthermore restarting processes will speed them up or slow them down as address space randomization picks different memory regions inside a VM. +### Current default: No vCPU pinning + +By default, when no vCPU pinning is used, Xen interleaves memory from all NUMA nodes. This averages the memory performance, but individual tasks' performance may be significantly higher or lower depending on which NUMA node the application may have "landed" on. +As a result, restarting processes will speed them up or slow them down as address space randomization picks different memory regions inside a VM. + +This uses the memory bandwidth of all memory controllers and distributes the load across all nodes. +However, the memory latency is higher as the NUMA interconnects are used for most memory accesses and vCPU synchronization within the Domains. Note that this is not the worst case: the worst case would be for memory to be allocated on one NUMA node, but the vCPU always running on the furthest away NUMA node. ## Best effort NUMA-aware memory allocation for VMs -By default Xen stripes the VM's memory accross all NUMA nodes of the host, which means that every VM has to go through all the interconnects. + +### Summary + +The best-effort mode attempts to fit Domains into NUMA nodes and to balance memory usage. +It soft-pins Domains on the NUMA node with the most available memory when adding the Domain. +Memory is currently allocated when booting the VM (or while constructing the resuming VM). + +Parallel boot issue: Memory is not pre-allocated on creation, but allocated during boot. +The result is that parallel VM creation and boot can exhaust the memory of NUMA nodes. + +### Goals + +By default, Xen stripes the VM's memory across all NUMA nodes of the host, which means that every VM has to go through all the interconnects. The goal here is to find a better allocation than the default, not necessarily an optimal allocation. -An optimal allocation would require knowing what VMs you would start/create in the future, and planning across hosts too. +An optimal allocation would require knowing what VMs you would start/create in the future, and planning across hosts. +This allows the host to use all NUMA nodes to take advantage of the full memory bandwidth available on the pool hosts. -Overall we want to balance the VMs across NUMA nodes, such that we use all NUMA nodes to take advantage of the maximum memory bandwidth available on the system. +Overall, we want to balance the VMs across NUMA nodes, such that we use all NUMA nodes to take advantage of the maximum memory bandwidth available on the system. For now this proposed balancing will be done only by balancing memory usage: always heuristically allocating VMs on the NUMA node that has the most available memory. -Note that this allocation has a race condition for now when multiple VMs are booted in parallel, because we don't wait until Xen has constructed the domain for each one (that'd serialize domain construction, which is currently parallel). +For now, this allocation has a race condition: This happens when multiple VMs are booted in parallel, because we don't wait until Xen has constructed the domain for each one (that'd serialize domain construction, which is currently parallel). This may be improved in the future by having an API to query Xen where it has allocated the memory, and to explicitly ask it to place memory on a given NUMA node (instead of best_effort). If a VM doesn't fit into a single node then it is not so clear what the best approach is. One criteria to consider is minimizing the NUMA distance between the nodes chosen for the VM. -Large NUMA systems may not be fully connected in a mesh requiring multiple hops to each a node, or even have assymetric links, or links with different bitwidth. -These tradeoff should be approximatively reflected in the ACPI SLIT tables, as a matrix of distances between nodes. +Large NUMA systems may not be fully connected in a mesh requiring multiple hops to each a node, or even have asymmetric links, or links with different bandwidth. +The specific NUMA topology is provided by the ACPI SLIT table as the matrix of distances between nodes. It is possible that 3 NUMA nodes have a smaller average/maximum distance than 2, so we need to consider all possibilities. For N nodes there would be 2^N possibilities, so [Topology.NUMA.candidates] limits the number of choices to 65520+N (full set of 2^N possibilities for 16 NUMA nodes, and a reduced set of choices for larger systems). +### Implementation + [Topology.NUMA.candidates] is a sorted sequence of node sets, in ascending order of maximum/average distances. Once we've eliminated the candidates not suitable for this VM (that do not have enough total memory/pCPUs) we are left with a monotonically increasing sequence of nodes. There are still multiple possibilities with same average distance. @@ -110,19 +130,19 @@ See page 13 in [^AMD_numa] for a diagram of an AMD Opteron 6272 system. * Booting multiple VMs in parallel will result in potentially allocating both on the same NUMA node (race condition) * When we're about to run out of host memory we'll fall back to striping memory again, but the soft affinity mask won't reflect that (this needs an API to query Xen on where it has actually placed the VM, so we can fix up the mask accordingly) -* XAPI is not aware of NUMA balancing across a pool, and choses hosts purely based on total amount of free memory, even if a better NUMA placement could be found on another host +* XAPI is not aware of NUMA balancing across a pool, and chooses NUMA nodes purely based on amount of free memory on the NUMA nodes of the host, even if a better NUMA placement could be found on another host * Very large (>16 NUMA nodes) systems may only explore a limited number of choices (fit into a single node vs fallback to full interleaving) * The exact VM placement is not yet controllable * Microbenchmarks with a single VM on a host show both performance improvements and regressions on memory bandwidth usage: previously a single VM may have been able to take advantage of the bandwidth of both NUMA nodes if it happened to allocate memory from the right places, whereas now it'll be forced to use just a single node. As soon as you have more than 1 VM that is busy on a system enabling NUMA balancing should almost always be an improvement though. -* it is not supported to combine hard vCPU masks with soft affinity: if hard affinities are used then no NUMA scheduling is done by the toolstack and we obey exactly what the user has asked for with hard affinities. +* It is not supported to combine hard vCPU masks with soft affinity: if hard affinities are used, then no NUMA scheduling is done by the toolstack, and we obey exactly what the user has asked for with hard affinities. This shouldn't affect other VMs since the memory used by hard-pinned VMs will still be reflected in overall less memory available on individual NUMA nodes. * Corner case: the ACPI standard allows certain NUMA nodes to be unreachable (distance `0xFF` = `-1` in the Xen bindings). This is not supported and will cause an exception to be raised. If this is an issue in practice the NUMA matrix could be pre-filtered to contain only reachable nodes. - NUMA nodes with 0 CPUs *are* accepted (it can result from hard affinity pinnings) + NUMA nodes with 0 CPUs *are* accepted (it can result from hard affinity pinning) * NUMA balancing is not considered during HA planning -* Dom0 is a single VM that needs to communicate with all other VMs, so NUMA balancing is not applied to it (we'd need to expose NUMA topology to the Dom0 kernel so it can better allocate processes) +* Dom0 is a single VM that needs to communicate with all other VMs, so NUMA balancing is not applied to it (we'd need to expose NUMA topology to the Dom0 kernel, so it can better allocate processes) * IO NUMA is out of scope for now ## XAPI datamodel design @@ -139,9 +159,9 @@ Meaning of the policy: * `best_effort`: the algorithm described in this document, where soft pinning is used to achieve better balancing and lower latency * `default_policy`: when the admin hasn't expressed a preference -* Currently `default_policy` is treated as `any`, but the admin can change it, and then the system will remember that change across upgrades. +* Currently, `default_policy` is treated as `any`, but the admin can change it, and then the system will remember that change across upgrades. If we didn't have a `default_policy` then changing the "default" policy on an upgrade would be tricky: we either risk overriding an explicit choice of the admin, or existing installs cannot take advantage of the improved performance from `best_effort` -* Future XAPI versions may change `default_policy` to mean `best_effort`. +* Future, XAPI versions may change `default_policy` to mean `best_effort`. Admins can still override it to `any` if they wish on a host by host basis. It is not expected that users would have to change `best_effort`, unless they run very specific workloads, so a pool level control is not provided at this moment. @@ -149,7 +169,7 @@ It is not expected that users would have to change `best_effort`, unless they ru There is also no separate feature flag: this host flag acts as a feature flag that can be set through the API without restarting the toolstack. Although obviously only new VMs will benefit. -Debugging the allocator is done by running `xl vcpu-list` and investigating the soft pinning masks, and by analyzing xensource.log. +Debugging the allocator is done by running `xl vcpu-list` and investigating the soft pinning masks, and by analyzing `xensource.log`. ### Xenopsd implementation @@ -166,18 +186,18 @@ This avoids exponential state space explosion on very large systems (>16 NUMA no * [Topology.NUMA.choose] will choose one NUMA node deterministically, while trying to keep overall NUMA node usage balanced. * [Domain.numa_placement] builds a [NUMARequest] and uses the above [Topology] and [Softaffinity] functions to compute and apply a plan. -We used to have a `xenopsd.conf` configuration option to enable numa placement, for backwards compatibility this is still supported, but only if the admin hasn't set an explicit policy on the Host. +We used to have a `xenopsd.conf` configuration option to enable NUMA placement, for backwards compatibility this is still supported, but only if the admin hasn't set an explicit policy on the Host. It is best to remove the experimental `xenopsd.conf` entry though, a future version may completely drop it. Tests are in [test_topology.ml] which checks balancing properties and whether the plan has improved best/worst/average-case access times in a simulated test based on 2 predefined NUMA distance matrixes (one from Intel and one from an AMD system). ## Future work -* enable 'best_effort' mode by default once more testing has been done -* an API to query Xen where it has actually allocated the VM's memory. - Currently only an `xl debug-keys` interface exists which is not supported in production as it can result in killing the host via the watchdog, and is not a proper API, but a textual debug output with no stability guarantees. -* more host policies (e.g. `strict`). - Requires the XAPI pool scheduler to be NUMA aware and consider it as part of chosing hosts. +* Enable 'best_effort' mode by default once more testing has been done +* Add an API to query Xen for the NUMA node memory placement (where it has actually allocated the VM's memory). + Currently, only the `xl debug-keys` interface exists which is not supported in production as it can result in killing the host via the watchdog, and is not a proper API, but a textual debug output with no stability guarantees. +* More host policies, e.g. `strict`. + Requires the XAPI pool scheduler to be NUMA aware and consider it as part of choosing hosts. * VM level policy that can set a NUMA affinity index, mapped to a NUMA node modulo NUMA nodes available on the system (this is needed so that after migration we don't end up trying to allocate vCPUs to a non-existent NUMA node) * VM level anti-affinity rules for NUMA placement (can be achieved by setting unique NUMA affinity indexes) From fc6919e193ffbd2456e030d6eff89441a24e7d43 Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 13:58:51 +0100 Subject: [PATCH 31/86] CP-51772: Remove traceparent from Http.Request This is a breaking change to http-lib that removes the special treatment of traceparent from the Http.Request module record. In order to more easily include other tracing-related headers in future we propose that we create an aggregate data structure (named something like "trace context") which can (optionally) contain traceparent, baggage, tracestate, etc. and then we "inject" those into the additional_headers of the request we wish to endow with tracing-related information. On the receiving end, a similar "extract" function will perform the dual operation, extracting tracing context from the request: Generally, for some carrier, we intend to provide two operations: val inject : context -> carrier -> carrier val extract : carrier -> context The "carrier" is whatever transport is being used to propagate tracing across service boundaries. In our case, HTTP requests. Signed-off-by: Colin James --- ocaml/libs/http-lib/http.ml | 67 +++++++++++++--------------- ocaml/libs/http-lib/http.mli | 4 -- ocaml/libs/http-lib/http_svr.ml | 2 - ocaml/libs/http-lib/xmlrpc_client.ml | 15 ++++--- 4 files changed, 38 insertions(+), 50 deletions(-) diff --git a/ocaml/libs/http-lib/http.ml b/ocaml/libs/http-lib/http.ml index a19745576ce..75f8a5880b7 100644 --- a/ocaml/libs/http-lib/http.ml +++ b/ocaml/libs/http-lib/http.ml @@ -132,8 +132,6 @@ module Hdr = struct let location = "location" - let traceparent = "traceparent" - let hsts = "strict-transport-security" end @@ -522,7 +520,6 @@ module Request = struct ; mutable close: bool ; additional_headers: (string * string) list ; body: string option - ; traceparent: string option } [@@deriving rpc] @@ -546,12 +543,11 @@ module Request = struct ; close= true ; additional_headers= [] ; body= None - ; traceparent= None } let make ?(frame = false) ?(version = "1.1") ?(keep_alive = true) ?accept ?cookie ?length ?auth ?subtask_of ?body ?(headers = []) ?content_type - ?host ?(query = []) ?traceparent ~user_agent meth path = + ?host ?(query = []) ~user_agent meth path = { empty with version @@ -570,7 +566,6 @@ module Request = struct ; body ; accept ; query - ; traceparent } let get_version x = x.version @@ -582,8 +577,7 @@ module Request = struct Printf.sprintf "{ frame = %b; method = %s; uri = %s; query = [ %s ]; content_length = [ \ %s ]; transfer encoding = %s; version = %s; cookie = [ %s ]; task = %s; \ - subtask_of = %s; content-type = %s; host = %s; user_agent = %s; \ - traceparent = %s }" + subtask_of = %s; content-type = %s; host = %s; user_agent = %s; }" x.frame (string_of_method_t x.m) x.uri (kvpairs x.query) (Option.fold ~none:"" ~some:Int64.to_string x.content_length) (Option.value ~default:"" x.transfer_encoding) @@ -593,7 +587,6 @@ module Request = struct (Option.value ~default:"" x.content_type) (Option.value ~default:"" x.host) (Option.value ~default:"" x.user_agent) - (Option.value ~default:"" x.traceparent) let to_header_list x = let kvpairs x = @@ -643,11 +636,6 @@ module Request = struct ~some:(fun x -> [Hdr.user_agent ^ ": " ^ x]) x.user_agent in - let traceparent = - Option.fold ~none:[] - ~some:(fun x -> [Hdr.traceparent ^ ": " ^ x]) - x.traceparent - in let close = [(Hdr.connection ^ ": " ^ if x.close then "close" else "keep-alive")] in @@ -665,7 +653,6 @@ module Request = struct @ content_type @ host @ user_agent - @ traceparent @ close @ List.map (fun (k, v) -> k ^ ": " ^ v) x.additional_headers @@ -688,28 +675,34 @@ module Request = struct let frame_header = if x.frame then make_frame_header headers else "" in frame_header ^ headers ^ body - let traceparent_of req = - let open Tracing in - let ( let* ) = Option.bind in - let* traceparent = req.traceparent in - let* span_context = SpanContext.of_traceparent traceparent in - let span = Tracer.span_of_span_context span_context req.uri in - Some span - - let with_tracing ?attributes ~name req f = - let open Tracing in - let parent = traceparent_of req in - with_child_trace ?attributes parent ~name (fun (span : Span.t option) -> - match span with - | Some span -> - let traceparent = - Some (span |> Span.get_context |> SpanContext.to_traceparent) - in - let req = {req with traceparent} in - f req - | None -> - f req - ) + (* let traceparent_of req = *) + (* let open Tracing in *) + (* let ( let* ) = Option.bind in *) + (* let* traceparent = req.traceparent in *) + (* let* span_context = SpanContext.of_traceparent traceparent in *) + (* let span = Tracer.span_of_span_context span_context req.uri in *) + (* Some span *) + + (* let with_tracing ?attributes ~name req f = *) + (* let open Tracing in *) + (* let parent = traceparent_of req in *) + (* with_child_trace ?attributes parent ~name (fun (span : Span.t option) -> *) + (* match span with *) + (* | Some span -> *) + (* let traceparent = *) + (* Some (span |> Span.get_context |> SpanContext.to_traceparent) *) + (* in *) + (* let req = {req with traceparent} in *) + (* f req *) + (* | None -> *) + (* f req *) + (* ) *) + + let traceparent_of _ = None + + let with_tracing ?attributes ~name = + ignore (attributes, name) ; + Fun.flip ( @@ ) end module Response = struct diff --git a/ocaml/libs/http-lib/http.mli b/ocaml/libs/http-lib/http.mli index 3fbae8e4c6f..66557a76fe9 100644 --- a/ocaml/libs/http-lib/http.mli +++ b/ocaml/libs/http-lib/http.mli @@ -86,7 +86,6 @@ module Request : sig ; mutable close: bool ; additional_headers: (string * string) list ; body: string option - ; traceparent: string option } val rpc_of_t : t -> Rpc.t @@ -109,7 +108,6 @@ module Request : sig -> ?content_type:string -> ?host:string -> ?query:(string * string) list - -> ?traceparent:string -> user_agent:string -> method_t -> string @@ -229,8 +227,6 @@ module Hdr : sig val location : string - val traceparent : string - val hsts : string (** Header used for HTTP Strict Transport Security *) end diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 54a8b96ba73..2240d811797 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -409,8 +409,6 @@ let read_request_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length fd = {req with host= Some v} | k when k = Http.Hdr.user_agent -> {req with user_agent= Some v} - | k when k = Http.Hdr.traceparent -> - {req with traceparent= Some v} | k when k = Http.Hdr.connection && lowercase v = "close" -> {req with close= true} | k diff --git a/ocaml/libs/http-lib/xmlrpc_client.ml b/ocaml/libs/http-lib/xmlrpc_client.ml index 5bf43b0268c..f7390f8fb46 100644 --- a/ocaml/libs/http-lib/xmlrpc_client.ml +++ b/ocaml/libs/http-lib/xmlrpc_client.ml @@ -50,15 +50,16 @@ let connect ?session_id ?task_id ?subtask_of path = let xmlrpc ?frame ?version ?keep_alive ?task_id ?cookie ?length ?auth ?subtask_of ?query ?body ?(tracing = None) path = - let traceparent = - let open Tracing in - Option.map - (fun span -> Span.get_context span |> SpanContext.to_traceparent) - tracing - in + (* let traceparent = *) + (* let open Tracing in *) + (* Option.map *) + (* (fun span -> Span.get_context span |> SpanContext.to_traceparent) *) + (* tracing *) + (* in *) + ignore tracing ; let headers = Option.map (fun x -> [(Http.Hdr.task_id, x)]) task_id in Http.Request.make ~user_agent ?frame ?version ?keep_alive ?cookie ?headers - ?length ?auth ?subtask_of ?query ?body ?traceparent Http.Post path + ?length ?auth ?subtask_of ?query ?body Http.Post path (** Thrown when ECONNRESET is caught which suggests the remote crashed or restarted *) exception Connection_reset From fe66bc4f8e5f207c05603b5a711866361db76468 Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 14:13:32 +0100 Subject: [PATCH 32/86] CP-51772: Remove external usage of traceparent Temporarily remove parts of the code that worked with requests directly, manipulating (or using) traceparents. Signed-off-by: Colin James --- ocaml/xapi-cli-server/xapi_cli.ml | 13 +++++++------ ocaml/xapi/context.ml | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ocaml/xapi-cli-server/xapi_cli.ml b/ocaml/xapi-cli-server/xapi_cli.ml index bc2389d4c44..57f09e4aafc 100644 --- a/ocaml/xapi-cli-server/xapi_cli.ml +++ b/ocaml/xapi-cli-server/xapi_cli.ml @@ -190,12 +190,13 @@ let uninteresting_cmd_postfixes = ["help"; "-get"; "-list"] let exec_command req cmd s session args = let params = get_params cmd in let tracing = - Option.bind - Http.Request.(req.traceparent) - Tracing.SpanContext.of_traceparent - |> Option.map (fun span_context -> - Tracing.Tracer.span_of_span_context span_context (get_cmdname cmd) - ) + (* Option.bind *) + (* Http.Request.(req.traceparent) *) + (* Tracing.SpanContext.of_traceparent *) + (* |> Option.map (fun span_context -> *) + (* Tracing.Tracer.span_of_span_context span_context (get_cmdname cmd) *) + (* ) *) + None in let minimal = if List.mem_assoc "minimal" params then diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index 41faa238bd5..f2cb485e770 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -221,8 +221,8 @@ let parent_of_origin (origin : origin) span_name = let open Tracing in let ( let* ) = Option.bind in match origin with - | Http (req, _) -> - let* traceparent = req.Http.Request.traceparent in + | Http (_req, _) -> + let* traceparent = (* req.Http.Request.traceparent *) None in let* span_context = SpanContext.of_traceparent traceparent in let span = Tracer.span_of_span_context span_context span_name in Some span From 7b95bd6725a267833e142a528c3401db8e6ad88b Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 15:56:41 +0100 Subject: [PATCH 33/86] CP-51772: Add TraceContext to Tracing Introduces a more general trace context record that will encapsulate the metadata of tracing that can be propagated across service boundaries. Signed-off-by: Colin James --- ocaml/libs/tracing/tracing.ml | 25 ++++++++++++++++++++++--- ocaml/libs/tracing/tracing.mli | 18 ++++++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 3f521f6f29c..cad2a8b2069 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -209,15 +209,34 @@ end = struct let compare = Int64.compare end +(* The context of a trace that can be propagated across service boundaries. *) +module TraceContext = struct + type traceparent = string + + type baggage = (string * string) list + + type t = {traceparent: traceparent option; baggage: baggage option} + + let empty = {traceparent= None; baggage= None} + + let with_traceparent traceparent ctx = {ctx with traceparent} + + let with_baggage baggage ctx = {ctx with baggage} + + let traceparent_of ctx = ctx.traceparent + + let baggage_of ctx = ctx.baggage +end + module SpanContext = struct type t = {trace_id: Trace_id.t; span_id: Span_id.t} [@@deriving rpcty] let context trace_id span_id = {trace_id; span_id} let to_traceparent t = - Printf.sprintf "00-%s-%s-01" - (Trace_id.to_string t.trace_id) - (Span_id.to_string t.span_id) + let tid = Trace_id.to_string t.trace_id in + let sid = Span_id.to_string t.span_id in + Printf.sprintf "00-%s-%s-01" tid sid let of_traceparent traceparent = let elements = String.split_on_char '-' traceparent in diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index 18b248cc881..5163a166002 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -78,6 +78,24 @@ module Trace_id : sig val to_string : t -> string end +module TraceContext : sig + type t + + val empty : t + + type traceparent = string + + type baggage = (string * string) list + + val with_traceparent : traceparent option -> t -> t + + val with_baggage : baggage option -> t -> t + + val traceparent_of : t -> traceparent option + + val baggage_of : t -> baggage option +end + module SpanContext : sig type t From e14981737948416c872caf6c99abf0959881f1cb Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 15:58:10 +0100 Subject: [PATCH 34/86] CP-51772: Add Http Request Propagator In a new library, tracing_propagator, simple injection and extraction routines are provided for rewriting HTTP requests (Http.Request) to have trace-related information. It must be a new library as a cycle would be introduced if we attempted to make tracing depend on http-lib (as http-lib depends on tracing). Signed-off-by: Colin James --- ocaml/libs/tracing/dune | 5 ++ ocaml/libs/tracing/propagator.ml | 108 ++++++++++++++++++++++++++++++ ocaml/libs/tracing/propagator.mli | 23 +++++++ 3 files changed, 136 insertions(+) create mode 100644 ocaml/libs/tracing/propagator.ml create mode 100644 ocaml/libs/tracing/propagator.mli diff --git a/ocaml/libs/tracing/dune b/ocaml/libs/tracing/dune index 8c53962c579..71e5c7b7473 100644 --- a/ocaml/libs/tracing/dune +++ b/ocaml/libs/tracing/dune @@ -28,6 +28,11 @@ (preprocess (pps ppx_deriving_rpc))) +(library + (name tracing_propagator) + (modules propagator) + (libraries astring http-lib tracing)) + (test (name test_tracing) (modules test_tracing) diff --git a/ocaml/libs/tracing/propagator.ml b/ocaml/libs/tracing/propagator.ml new file mode 100644 index 00000000000..13c48bafce3 --- /dev/null +++ b/ocaml/libs/tracing/propagator.ml @@ -0,0 +1,108 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module type S = sig + type carrier + + val inject_into : Tracing.TraceContext.t -> carrier -> carrier + + val extract_from : carrier -> Tracing.TraceContext.t +end + +let ( let* ) = Option.bind + +let ( >> ) f g x = g (f x) + +let maybe f = function Some _ as o -> f o | _ -> Fun.id + +let[@tail_mod_cons] rec filter_append p xs ys = + match xs with + | [] -> + ys + | x :: xs when p x -> + x :: filter_append p xs ys + | _ :: xs -> + filter_append p xs ys + +module Http = struct + type carrier = Http.Request.t + + open struct + let hdr_traceparent = "traceparent" + + let hdr_baggage = "baggage" + end + + let alloc_assoc k kvs = + List.filter_map + (fun (key, value) -> if key = k then Some value else None) + kvs + |> function + | [] -> + None + | xs -> + Some xs + + let parse = + let open Astring.String in + let trim_pair (key, value) = (trim key, trim value) in + cuts ~sep:";" + >> List.map (cut ~sep:"=" >> Option.map trim_pair) + >> List.filter_map Fun.id + + let inject_into ctx req = + let open Tracing in + let traceparent = (hdr_traceparent, TraceContext.traceparent_of ctx) in + let baggage = + let encoded = + let encode = + List.map (fun (k, v) -> Printf.sprintf "%s=%s" k v) + >> String.concat ";" + in + TraceContext.baggage_of ctx |> Option.map encode + in + (hdr_baggage, encoded) + in + let entries = [traceparent; baggage] in + let filter_entries entries = + let tbl = Hashtbl.create 47 in + let record (k, v) = + match v with + | Some v -> + Hashtbl.replace tbl k () ; + Some (k, v) + | _ -> + None + in + let entries = List.filter_map record entries in + (entries, fst >> Hashtbl.mem tbl) + in + let entries, to_replace = filter_entries entries in + let headers = req.Http.Request.additional_headers in + let additional_headers = + filter_append (Fun.negate to_replace) headers entries + in + {req with additional_headers} + + let extract_from req = + let open Tracing in + let headers = req.Http.Request.additional_headers in + let traceparent = List.assoc_opt hdr_traceparent headers in + let baggage = + let* all = alloc_assoc hdr_baggage headers in + Some (List.concat_map parse all) + in + let open TraceContext in + empty |> maybe with_traceparent traceparent |> maybe with_baggage baggage +end diff --git a/ocaml/libs/tracing/propagator.mli b/ocaml/libs/tracing/propagator.mli new file mode 100644 index 00000000000..36780d14c86 --- /dev/null +++ b/ocaml/libs/tracing/propagator.mli @@ -0,0 +1,23 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module type S = sig + type carrier + + val inject_into : Tracing.TraceContext.t -> carrier -> carrier + + val extract_from : carrier -> Tracing.TraceContext.t +end + +module Http : S with type carrier = Http.Request.t From 0d996b3e079d7affc9875ae352816e11e22309dd Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 16:24:10 +0100 Subject: [PATCH 35/86] CP-51772: Extract traceparent back out Restore code that was previously disabled. Signed-off-by: Colin James --- ocaml/libs/tracing/dune | 1 + ocaml/xapi-cli-server/dune | 1 + ocaml/xapi-cli-server/xapi_cli.ml | 20 +++++++++----------- ocaml/xapi/context.ml | 5 +++-- ocaml/xapi/dune | 1 + 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/ocaml/libs/tracing/dune b/ocaml/libs/tracing/dune index 71e5c7b7473..cf28881793a 100644 --- a/ocaml/libs/tracing/dune +++ b/ocaml/libs/tracing/dune @@ -31,6 +31,7 @@ (library (name tracing_propagator) (modules propagator) + (wrapped false) (libraries astring http-lib tracing)) (test diff --git a/ocaml/xapi-cli-server/dune b/ocaml/xapi-cli-server/dune index c1a8269dbb6..2c297d1da9f 100644 --- a/ocaml/xapi-cli-server/dune +++ b/ocaml/xapi-cli-server/dune @@ -42,6 +42,7 @@ xapi-stdext-threads xapi-stdext-unix xapi-tracing + tracing_propagator xmlm xml-light2 ) diff --git a/ocaml/xapi-cli-server/xapi_cli.ml b/ocaml/xapi-cli-server/xapi_cli.ml index 57f09e4aafc..5ea0f949210 100644 --- a/ocaml/xapi-cli-server/xapi_cli.ml +++ b/ocaml/xapi-cli-server/xapi_cli.ml @@ -190,19 +190,17 @@ let uninteresting_cmd_postfixes = ["help"; "-get"; "-list"] let exec_command req cmd s session args = let params = get_params cmd in let tracing = - (* Option.bind *) - (* Http.Request.(req.traceparent) *) - (* Tracing.SpanContext.of_traceparent *) - (* |> Option.map (fun span_context -> *) - (* Tracing.Tracer.span_of_span_context span_context (get_cmdname cmd) *) - (* ) *) - None + let open Tracing in + let ( let* ) = Option.bind in + let context = Propagator.Http.extract_from req in + let* traceparent = TraceContext.traceparent_of context in + let* span_context = SpanContext.of_traceparent traceparent in + let span = Tracer.span_of_span_context span_context (get_cmdname cmd) in + Some span in let minimal = - if List.mem_assoc "minimal" params then - bool_of_string (List.assoc "minimal" params) - else - false + List.assoc_opt "minimal" params + |> Option.fold ~none:false ~some:bool_of_string in let u = try List.assoc "username" params with _ -> "" in let p = try List.assoc "password" params with _ -> "" in diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index f2cb485e770..efb6ee61318 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -221,8 +221,9 @@ let parent_of_origin (origin : origin) span_name = let open Tracing in let ( let* ) = Option.bind in match origin with - | Http (_req, _) -> - let* traceparent = (* req.Http.Request.traceparent *) None in + | Http (req, _) -> + let context = Propagator.Http.extract_from req in + let* traceparent = TraceContext.traceparent_of context in let* span_context = SpanContext.of_traceparent traceparent in let span = Tracer.span_of_span_context span_context span_name in Some span diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 9f3e5f825fa..dbcb9eb284f 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -68,6 +68,7 @@ xapi_database mtime tracing + tracing_propagator uuid rpclib.core threads.posix From 069ca95e9ee84031666c2d9c679e80553e4b7ad1 Mon Sep 17 00:00:00 2001 From: Colin James Date: Wed, 16 Oct 2024 17:34:43 +0100 Subject: [PATCH 36/86] CP-51772: Remove tracing dependency from http-lib The tracing library is removed as a dependency from http-lib. It is still a dependency of http-svr. This is currently a breaking change. The plan is to: - Factor out - and generalise - the helpers defined in place of removing the functions Http.Request.traceparent_of and Http.Request.with_tracing. The comment describes how this should be done: consolidate the impl into the tracing library itself and then provide inject and extract routines to do it for arbitrary carriers. - Rewrite xmlrpc to accept an arbitrary request rewriter function such as (Http.Request.t -> Http.Request.t) before it dispatches. Then, tracing can be injected in by the user. Signed-off-by: Colin James --- ocaml/libs/http-lib/dune | 2 +- ocaml/libs/http-lib/http.ml | 29 ---------- ocaml/libs/http-lib/http.mli | 5 -- ocaml/libs/http-lib/http_svr.ml | 81 +++++++++++++++++++++++++-- ocaml/libs/http-lib/xmlrpc_client.ml | 9 +-- ocaml/libs/http-lib/xmlrpc_client.mli | 1 - 6 files changed, 79 insertions(+), 48 deletions(-) diff --git a/ocaml/libs/http-lib/dune b/ocaml/libs/http-lib/dune index 2990fda2453..42286576aa4 100644 --- a/ocaml/libs/http-lib/dune +++ b/ocaml/libs/http-lib/dune @@ -30,7 +30,6 @@ xapi-stdext-threads xapi-stdext-unix xml-light2 - tracing ) ) @@ -46,6 +45,7 @@ polly threads.posix tracing + tracing_propagator uri xapi-log xapi-stdext-pervasives diff --git a/ocaml/libs/http-lib/http.ml b/ocaml/libs/http-lib/http.ml index 75f8a5880b7..554f3ed6217 100644 --- a/ocaml/libs/http-lib/http.ml +++ b/ocaml/libs/http-lib/http.ml @@ -674,35 +674,6 @@ module Request = struct let headers, body = to_headers_and_body x in let frame_header = if x.frame then make_frame_header headers else "" in frame_header ^ headers ^ body - - (* let traceparent_of req = *) - (* let open Tracing in *) - (* let ( let* ) = Option.bind in *) - (* let* traceparent = req.traceparent in *) - (* let* span_context = SpanContext.of_traceparent traceparent in *) - (* let span = Tracer.span_of_span_context span_context req.uri in *) - (* Some span *) - - (* let with_tracing ?attributes ~name req f = *) - (* let open Tracing in *) - (* let parent = traceparent_of req in *) - (* with_child_trace ?attributes parent ~name (fun (span : Span.t option) -> *) - (* match span with *) - (* | Some span -> *) - (* let traceparent = *) - (* Some (span |> Span.get_context |> SpanContext.to_traceparent) *) - (* in *) - (* let req = {req with traceparent} in *) - (* f req *) - (* | None -> *) - (* f req *) - (* ) *) - - let traceparent_of _ = None - - let with_tracing ?attributes ~name = - ignore (attributes, name) ; - Fun.flip ( @@ ) end module Response = struct diff --git a/ocaml/libs/http-lib/http.mli b/ocaml/libs/http-lib/http.mli index 66557a76fe9..13b8bcaa4fa 100644 --- a/ocaml/libs/http-lib/http.mli +++ b/ocaml/libs/http-lib/http.mli @@ -126,11 +126,6 @@ module Request : sig val to_wire_string : t -> string (** [to_wire_string t] returns a string which could be sent to a server *) - - val traceparent_of : t -> Tracing.Span.t option - - val with_tracing : - ?attributes:(string * string) list -> name:string -> t -> (t -> 'a) -> 'a end (** Parsed form of the HTTP response *) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 2240d811797..68ef0197d54 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -99,9 +99,82 @@ let response_of_request req hdrs = ~headers:(connection :: cache :: hdrs) "200" "OK" +(* These functions have been factored out of Http.Request. + The carrier protocol should generally have no knowledge of tracing. *) +module type HelperS = sig + val traceparent_of : Http.Request.t -> Tracing.Span.t option + + val with_tracing : + ?attributes:(string * string) list + -> name:string + -> Http.Request.t + -> (Http.Request.t -> 'a) + -> 'a +end + +module Helper : HelperS = struct + (* This code can probably be relocated into the tracing library + with the following generalisation: + + val with_tracing : + ?attributes:(string * string) list + -> name:string + -> Http.Request.t + -> (Http.Request.t -> 'a) + -> 'a + + can become: + + val with_tracing : + ?attributes:(string * string) list + -> inject: (TraceContext.t -> 'carrier -> 'carrier) + -> extract: ('carrier -> TraceContext.t) + -> name:string + -> 'carrier + -> ('carrier -> 'a) + -> 'a + + Can possibly pass a first-class module, but there may be a dependency cycle, + so functions are a more universal interface. + *) + + let traceparent_of req = + (* TODO: The extracted TraceContext must be propagated through the + spans. Simple approach is to add it to the SpanContext, and then + inherit it properly (substituting/creating only identity-related). *) + let open Tracing in + let ( let* ) = Option.bind in + let trace_context = Propagator.Http.extract_from req in + let* parent = TraceContext.traceparent_of trace_context in + let* span_context = SpanContext.of_traceparent parent in + Some (Tracer.span_of_span_context span_context req.uri) + + let with_tracing ?attributes ~name req f = + ignore (attributes, name) ; + let open Tracing in + let trace_context = Propagator.Http.extract_from req in + let parent = traceparent_of req in + let continue_with_child = function + | Some child -> + (* Here, "traceparent" is terminology for the [version-trace_id-span_id-flags] structure. + Therefore, the purpose of the code below is to decorate the request with the derived (child) span's ID. + This function only gets called if parent is not None. *) + let span_context = Span.get_context child in + let traceparent = SpanContext.to_traceparent span_context in + let trace_context' = + TraceContext.with_traceparent (Some traceparent) trace_context + in + let req' = Propagator.Http.inject_into trace_context' req in + f req' + | _ -> + f req + in + with_child_trace ?attributes parent ~name continue_with_child +end + let response_fct req ?(hdrs = []) s (response_length : int64) (write_response_to_fd_fn : Unix.file_descr -> unit) = - let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in + let@ req = Helper.with_tracing ~name:__FUNCTION__ req in let res = { (response_of_request req hdrs) with @@ -445,7 +518,7 @@ let read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length fd = let r, proxy = read_request_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length fd in - let parent_span = Http.Request.traceparent_of r in + let parent_span = Helper.traceparent_of r in let loop_span = Option.fold ~none:None ~some:(fun span -> @@ -489,8 +562,8 @@ let read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length fd = (None, None) let handle_one (x : 'a Server.t) ss context req = - let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in - let span = Http.Request.traceparent_of req in + let@ req = Helper.with_tracing ~name:__FUNCTION__ req in + let span = Helper.traceparent_of req in let finished = ref false in try D.debug "Request %s" (Http.Request.to_string req) ; diff --git a/ocaml/libs/http-lib/xmlrpc_client.ml b/ocaml/libs/http-lib/xmlrpc_client.ml index f7390f8fb46..e23ccd69f73 100644 --- a/ocaml/libs/http-lib/xmlrpc_client.ml +++ b/ocaml/libs/http-lib/xmlrpc_client.ml @@ -49,14 +49,7 @@ let connect ?session_id ?task_id ?subtask_of path = ?subtask_of Http.Connect path let xmlrpc ?frame ?version ?keep_alive ?task_id ?cookie ?length ?auth - ?subtask_of ?query ?body ?(tracing = None) path = - (* let traceparent = *) - (* let open Tracing in *) - (* Option.map *) - (* (fun span -> Span.get_context span |> SpanContext.to_traceparent) *) - (* tracing *) - (* in *) - ignore tracing ; + ?subtask_of ?query ?body path = let headers = Option.map (fun x -> [(Http.Hdr.task_id, x)]) task_id in Http.Request.make ~user_agent ?frame ?version ?keep_alive ?cookie ?headers ?length ?auth ?subtask_of ?query ?body Http.Post path diff --git a/ocaml/libs/http-lib/xmlrpc_client.mli b/ocaml/libs/http-lib/xmlrpc_client.mli index 00d77b45937..52fb074db50 100644 --- a/ocaml/libs/http-lib/xmlrpc_client.mli +++ b/ocaml/libs/http-lib/xmlrpc_client.mli @@ -72,7 +72,6 @@ val xmlrpc : -> ?subtask_of:string -> ?query:(string * string) list -> ?body:string - -> ?tracing:Tracing.Span.t option -> string -> Http.Request.t (** Returns an HTTP.Request.t representing an XMLRPC request *) From c4962a31874b9009c57e3344354e31339aa078ee Mon Sep 17 00:00:00 2001 From: Colin James Date: Thu, 17 Oct 2024 09:20:51 +0100 Subject: [PATCH 37/86] CP-51772: Consolidate propagation into tracing lib Generalise the pattern used in http_svr.ml into a functor in the tracing library. In particular, so long as you can provide a way to name a carrier (to endow its child trace with), inject tracing context into a carrier, and extract trace context into a carrier, you can use this generic pattern to propagate tracing across arbitrary carriers (where the derived span is exported, but the incoming one is not). To this end, we factor it out of http_svr.ml and redefine the helper module used there in terms of one constructed from the new tracing propagator functor. The tracing_propagator library is used to provide definitions in the input module for defining trace propagation across HTTP headers. Signed-off-by: Colin James --- ocaml/libs/http-lib/http_svr.ml | 75 +++------------------------------ ocaml/libs/tracing/tracing.ml | 60 ++++++++++++++++++++++++++ ocaml/libs/tracing/tracing.mli | 30 +++++++++++++ 3 files changed, 95 insertions(+), 70 deletions(-) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 68ef0197d54..c4f7be5460c 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -99,77 +99,12 @@ let response_of_request req hdrs = ~headers:(connection :: cache :: hdrs) "200" "OK" -(* These functions have been factored out of Http.Request. - The carrier protocol should generally have no knowledge of tracing. *) -module type HelperS = sig - val traceparent_of : Http.Request.t -> Tracing.Span.t option - - val with_tracing : - ?attributes:(string * string) list - -> name:string - -> Http.Request.t - -> (Http.Request.t -> 'a) - -> 'a -end +module Helper = struct + include Tracing.Propagator.Make (struct + include Propagator.Http -module Helper : HelperS = struct - (* This code can probably be relocated into the tracing library - with the following generalisation: - - val with_tracing : - ?attributes:(string * string) list - -> name:string - -> Http.Request.t - -> (Http.Request.t -> 'a) - -> 'a - - can become: - - val with_tracing : - ?attributes:(string * string) list - -> inject: (TraceContext.t -> 'carrier -> 'carrier) - -> extract: ('carrier -> TraceContext.t) - -> name:string - -> 'carrier - -> ('carrier -> 'a) - -> 'a - - Can possibly pass a first-class module, but there may be a dependency cycle, - so functions are a more universal interface. - *) - - let traceparent_of req = - (* TODO: The extracted TraceContext must be propagated through the - spans. Simple approach is to add it to the SpanContext, and then - inherit it properly (substituting/creating only identity-related). *) - let open Tracing in - let ( let* ) = Option.bind in - let trace_context = Propagator.Http.extract_from req in - let* parent = TraceContext.traceparent_of trace_context in - let* span_context = SpanContext.of_traceparent parent in - Some (Tracer.span_of_span_context span_context req.uri) - - let with_tracing ?attributes ~name req f = - ignore (attributes, name) ; - let open Tracing in - let trace_context = Propagator.Http.extract_from req in - let parent = traceparent_of req in - let continue_with_child = function - | Some child -> - (* Here, "traceparent" is terminology for the [version-trace_id-span_id-flags] structure. - Therefore, the purpose of the code below is to decorate the request with the derived (child) span's ID. - This function only gets called if parent is not None. *) - let span_context = Span.get_context child in - let traceparent = SpanContext.to_traceparent span_context in - let trace_context' = - TraceContext.with_traceparent (Some traceparent) trace_context - in - let req' = Propagator.Http.inject_into trace_context' req in - f req' - | _ -> - f req - in - with_child_trace ?attributes parent ~name continue_with_child + let name_span req = req.Http.Request.uri + end) end let response_fct req ?(hdrs = []) s (response_length : int64) diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index cad2a8b2069..d0adde3e776 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -788,3 +788,63 @@ module EnvHelpers = struct Some (span |> Span.get_context |> SpanContext.to_traceparent) |> of_traceparent end + +module Propagator = struct + module type S = sig + type carrier + + val traceparent_of : carrier -> Span.t option + + val with_tracing : + ?attributes:(string * string) list + -> name:string + -> carrier + -> (carrier -> 'a) + -> 'a + end + + module type PropS = sig + type carrier + + val inject_into : TraceContext.t -> carrier -> carrier + + val extract_from : carrier -> TraceContext.t + + val name_span : carrier -> string + end + + module Make (P : PropS) : S with type carrier = P.carrier = struct + type carrier = P.carrier + + let traceparent_of carrier = + (* TODO: The extracted TraceContext must be propagated through the + spans. Simple approach is to add it to the SpanContext, and then + inherit it properly (substituting/creating only identity-related). *) + let ( let* ) = Option.bind in + let trace_context = P.extract_from carrier in + let* parent = TraceContext.traceparent_of trace_context in + let* span_context = SpanContext.of_traceparent parent in + let name = P.name_span carrier in + Some (Tracer.span_of_span_context span_context name) + + let with_tracing ?attributes ~name carrier f = + let trace_context = P.extract_from carrier in + let parent = traceparent_of carrier in + let continue_with_child = function + | Some child -> + (* Here, "traceparent" is terminology for the [version-trace_id-span_id-flags] structure. + Therefore, the purpose of the code below is to decorate the request with the derived (child) span's ID. + This function only gets called if parent is not None. *) + let span_context = Span.get_context child in + let traceparent = SpanContext.to_traceparent span_context in + let trace_context' = + TraceContext.with_traceparent (Some traceparent) trace_context + in + let carrier' = P.inject_into trace_context' carrier in + f carrier' + | _ -> + f carrier + in + with_child_trace ?attributes parent ~name continue_with_child + end +end diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index 5163a166002..e2d8c8d947d 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -297,3 +297,33 @@ module EnvHelpers : sig If [span] is [None], it returns an empty list. *) end + +(** [Propagator] is a utility module for creating trace propagators over arbitrary carriers. *) +module Propagator : sig + module type S = sig + type carrier + + val traceparent_of : carrier -> Span.t option + (** [traceparent_of carrier] creates a span whose context is that encoded within the [carrier] input. + If there is no traceparent encoded within the carrier, the function returns [None]. *) + + val with_tracing : + ?attributes:(string * string) list + -> name:string + -> carrier + -> (carrier -> 'a) + -> 'a + end + + module type PropS = sig + type carrier + + val inject_into : TraceContext.t -> carrier -> carrier + + val extract_from : carrier -> TraceContext.t + + val name_span : carrier -> string + end + + module Make : functor (P : PropS) -> S with type carrier = P.carrier +end From 6dad697a21691e258815f86840609b7a4f672b58 Mon Sep 17 00:00:00 2001 From: Colin James Date: Thu, 17 Oct 2024 09:47:38 +0100 Subject: [PATCH 38/86] CP-51772: Repair xapi-cli-server's tracing Use trace propagators to endow the Http.Request.t used for XML-RPC requests with in-service information, in order to propagate it. As before, the parent span is named "xe " and its span-id is what makes its way into the request. If the endpoint receiving the request wishes to, they can derive subsequent in-service tracing, rooted from this, by using with_tracing as define in Tracing's Propagator. Signed-off-by: Colin James --- ocaml/xapi-cli-server/xapi_cli.ml | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/ocaml/xapi-cli-server/xapi_cli.ml b/ocaml/xapi-cli-server/xapi_cli.ml index 5ea0f949210..21950bd2618 100644 --- a/ocaml/xapi-cli-server/xapi_cli.ml +++ b/ocaml/xapi-cli-server/xapi_cli.ml @@ -121,6 +121,21 @@ let with_session ~local rpc u p session f = (fun () -> f session) (fun () -> do_logout ()) +module TraceHelper = struct + include Tracing.Propagator.Make (struct + include Propagator.Http + + let name_span req = req.Http.Request.uri + end) + + let inject_span_into_req (span : Tracing.Span.t option) = + let module T = Tracing in + let span_context = Option.map T.Span.get_context span in + let traceparent = Option.map T.SpanContext.to_traceparent span_context in + let trace_context = T.TraceContext.(with_traceparent traceparent empty) in + Propagator.Http.inject_into trace_context +end + let do_rpcs _req s username password minimal cmd session args tracing = let cmdname = get_cmdname cmd in let cspec = @@ -137,9 +152,9 @@ let do_rpcs _req s username password minimal cmd session args tracing = try let generic_rpc = get_rpc () in (* NB the request we've received is for the /cli. We need an XMLRPC request for the API *) - Tracing.with_tracing ~parent:tracing ~name:("xe " ^ cmdname) - @@ fun tracing -> - let req = Xmlrpc_client.xmlrpc ~version:"1.1" ~tracing "/" in + Tracing.with_tracing ~parent:tracing ~name:("xe " ^ cmdname) @@ fun span -> + let req = Xmlrpc_client.xmlrpc ~version:"1.1" "/" in + let req = TraceHelper.inject_span_into_req span req in let rpc = generic_rpc req s in if do_forward then with_session ~local:false rpc username password session (fun sess -> @@ -190,9 +205,9 @@ let uninteresting_cmd_postfixes = ["help"; "-get"; "-list"] let exec_command req cmd s session args = let params = get_params cmd in let tracing = - let open Tracing in let ( let* ) = Option.bind in let context = Propagator.Http.extract_from req in + let open Tracing in let* traceparent = TraceContext.traceparent_of context in let* span_context = SpanContext.of_traceparent traceparent in let span = Tracer.span_of_span_context span_context (get_cmdname cmd) in From 673525e2945416c4da10279b18bd0fa3be1e2546 Mon Sep 17 00:00:00 2001 From: Colin James Date: Thu, 17 Oct 2024 12:34:36 +0100 Subject: [PATCH 39/86] CP-51772: Repair tracing in xapi Signed-off-by: Colin James --- ocaml/xapi/api_server.ml | 18 +++++++++++++----- ocaml/xapi/context.ml | 2 +- ocaml/xapi/dune | 2 ++ ocaml/xapi/helpers.ml | 18 +++++++++++++++--- ocaml/xapi/message_forwarding.ml | 10 ++++------ ocaml/xapi/server_helpers.ml | 10 +++++++++- ocaml/xapi/system_domains.ml | 3 ++- ocaml/xapi/xapi_pool.ml | 3 ++- 8 files changed, 48 insertions(+), 18 deletions(-) diff --git a/ocaml/xapi/api_server.ml b/ocaml/xapi/api_server.ml index 35cb14103e3..d79d2f659e7 100644 --- a/ocaml/xapi/api_server.ml +++ b/ocaml/xapi/api_server.ml @@ -3,9 +3,17 @@ module Server = Server.Make (Actions) (Forwarder) let ( let@ ) f x = f x +module Helper = struct + include Tracing.Propagator.Make (struct + include Propagator.Http + + let name_span req = req.Http.Request.uri + end) +end + (* This bit is called directly by the fake_rpc callback *) let callback1 ?(json_rpc_version = Jsonrpc.V1) is_json req fd call = - let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in + let@ req = Helper.with_tracing ~name:__FUNCTION__ req in (* We now have the body string, the xml and the call name, and can also tell *) (* if we're a master or slave and whether the call came in on the unix domain socket or the tcp socket *) (* If we're a slave, and the call is from the unix domain socket or from the HIMN, and the call *isn't* *) @@ -24,7 +32,7 @@ let callback1 ?(json_rpc_version = Jsonrpc.V1) is_json req fd call = forward req call is_json else let response = - let@ req = Http.Request.with_tracing ~name:"Server.dispatch_call" req in + let@ req = Helper.with_tracing ~name:"Server.dispatch_call" req in Server.dispatch_call req fd call in let translated = @@ -91,8 +99,8 @@ let create_thumbprint_header req response = (** HTML callback that dispatches an RPC and returns the response. *) let callback is_json req fd _ = - let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in - let span = Http.Request.traceparent_of req in + let@ req = Helper.with_tracing ~name:__FUNCTION__ req in + let span = Helper.traceparent_of req in (* fd only used for writing *) let body = Http_svr.read_body ~limit:Constants.http_limit_max_rpc_size req fd @@ -145,7 +153,7 @@ let callback is_json req fd _ = (** HTML callback that dispatches an RPC and returns the response. *) let jsoncallback req fd _ = - let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in + let@ req = Helper.with_tracing ~name:__FUNCTION__ req in (* fd only used for writing *) let body = Http_svr.read_body ~limit:Xapi_database.Db_globs.http_limit_max_rpc_size req diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index efb6ee61318..5df47bd2a57 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -218,11 +218,11 @@ let span_kind_of_parent parent = Option.fold ~none:SpanKind.Internal ~some:(fun _ -> SpanKind.Server) parent let parent_of_origin (origin : origin) span_name = - let open Tracing in let ( let* ) = Option.bind in match origin with | Http (req, _) -> let context = Propagator.Http.extract_from req in + let open Tracing in let* traceparent = TraceContext.traceparent_of context in let* span_context = SpanContext.of_traceparent traceparent in let span = Tracer.span_of_span_context span_context span_name in diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index dbcb9eb284f..5602e62d152 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -154,6 +154,7 @@ tar-unix threads.posix tracing + tracing_propagator unixpwd uri uuid @@ -240,6 +241,7 @@ stunnel threads.posix tracing + tracing_propagator xapi-backtrace xapi-client xapi-consts diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 30965068f3f..f2f4f9747a1 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -387,6 +387,15 @@ let update_pif_addresses ~__context = Option.iter (fun (pif, bridge) -> set_DNS ~__context ~pif ~bridge) dns_if ; List.iter (fun self -> update_pif_address ~__context ~self) pifs +module TraceHelper = struct + let inject_span_into_req (span : Tracing.Span.t option) = + let module T = Tracing in + let span_context = Option.map T.Span.get_context span in + let traceparent = Option.map T.SpanContext.to_traceparent span_context in + let trace_context = T.TraceContext.(with_traceparent traceparent empty) in + Propagator.Http.inject_into trace_context +end + (* Note that both this and `make_timeboxed_rpc` are almost always * partially applied, returning a function of type 'Rpc.request -> Rpc.response'. * The body is therefore not evaluated until the RPC call is actually being @@ -395,7 +404,8 @@ let make_rpc ~__context rpc : Rpc.response = let subtask_of = Ref.string_of (Context.get_task_id __context) in let open Xmlrpc_client in let tracing = Context.set_client_span __context in - let http = xmlrpc ~subtask_of ~version:"1.1" "/" ~tracing in + let http = xmlrpc ~subtask_of ~version:"1.1" "/" in + let http = TraceHelper.inject_span_into_req tracing http in let transport = if Pool_role.is_master () then Unix Xapi_globs.unix_domain_socket @@ -418,7 +428,8 @@ let make_timeboxed_rpc ~__context timeout rpc : Rpc.response = * the task has acquired we make a new one specifically for the stunnel pid *) let open Xmlrpc_client in let tracing = Context.set_client_span __context in - let http = xmlrpc ~subtask_of ~version:"1.1" ~tracing "/" in + let http = xmlrpc ~subtask_of ~version:"1.1" "/" in + let http = TraceHelper.inject_span_into_req tracing http in let task_id = Context.get_task_id __context in let cancel () = let resources = @@ -486,7 +497,8 @@ let make_remote_rpc ?(verify_cert = Stunnel_client.pool ()) ~__context SSL (SSL.make ~verify_cert (), remote_address, !Constants.https_port) in let tracing = Context.tracing_of __context in - let http = xmlrpc ~version:"1.0" ~tracing "/" in + let http = xmlrpc ~version:"1.0" "/" in + let http = TraceHelper.inject_span_into_req tracing http in XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"remote_xapi" ~transport ~http xml (* Helper type for an object which may or may not be in the local database. *) diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index cb0b82aa7fd..6423e8d7be3 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -60,9 +60,8 @@ let remote_rpc_no_retry _context hostname (task_opt : API.ref_task option) xml = in let tracing = Context.set_client_span _context in let http = - xmlrpc - ?task_id:(Option.map Ref.string_of task_opt) - ~version:"1.0" ~tracing "/" + xmlrpc ?task_id:(Option.map Ref.string_of task_opt) ~version:"1.0" "/" + |> Helpers.TraceHelper.inject_span_into_req tracing in XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"dst_xapi" ~transport ~http xml @@ -80,9 +79,8 @@ let remote_rpc_retry _context hostname (task_opt : API.ref_task option) xml = in let tracing = Context.set_client_span _context in let http = - xmlrpc - ?task_id:(Option.map Ref.string_of task_opt) - ~version:"1.1" ~tracing "/" + xmlrpc ?task_id:(Option.map Ref.string_of task_opt) ~version:"1.1" "/" + |> Helpers.TraceHelper.inject_span_into_req tracing in XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"dst_xapi" ~transport ~http xml diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index e4952769c2f..ef50491c518 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -119,10 +119,18 @@ let dispatch_exn_wrapper f = let code, params = ExnHelper.error_of_exn exn in API.response_of_failure code params +module Helper = struct + include Tracing.Propagator.Make (struct + include Propagator.Http + + let name_span req = req.Http.Request.uri + end) +end + let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name op_fn marshaller fd http_req label sync_ty generate_task_for = (* if the call has been forwarded to us, then they are responsible for completing the task, so we don't need to complete it *) - let@ http_req = Http.Request.with_tracing ~name:__FUNCTION__ http_req in + let@ http_req = Helper.with_tracing ~name:__FUNCTION__ http_req in let called_async = sync_ty <> `Sync in if called_async && not supports_async then API.response_of_fault diff --git a/ocaml/xapi/system_domains.ml b/ocaml/xapi/system_domains.ml index 5fb394605b1..0453c205566 100644 --- a/ocaml/xapi/system_domains.ml +++ b/ocaml/xapi/system_domains.ml @@ -181,7 +181,8 @@ let pingable ip () = let queryable ~__context transport () = let open Xmlrpc_client in let tracing = Context.set_client_span __context in - let http = xmlrpc ~version:"1.0" ~tracing "/" in + let http = xmlrpc ~version:"1.0" "/" in + let http = Helpers.TraceHelper.inject_span_into_req tracing http in let rpc = XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"remote_smapiv2" ~transport ~http in diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 3a7dee78735..dd4bca70e26 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3406,7 +3406,8 @@ let perform ~local_fn ~__context ~host op = let verify_cert = Some Stunnel.pool (* verify! *) in let task_id = Option.map Ref.string_of task_opt in let tracing = Context.set_client_span __context in - let http = xmlrpc ?task_id ~version:"1.0" ~tracing "/" in + let http = xmlrpc ?task_id ~version:"1.0" "/" in + let http = Helpers.TraceHelper.inject_span_into_req tracing http in let port = !Constants.https_port in let transport = SSL (SSL.make ~verify_cert ?task_id (), hostname, port) in XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"dst_xapi" ~transport ~http xml From 8e20e3e68f19e806e82e5b40872eceb8e1a6e886 Mon Sep 17 00:00:00 2001 From: Colin James Date: Mon, 21 Oct 2024 08:37:11 +0100 Subject: [PATCH 40/86] Restructuring - Wrap tracing_propagator library - Drop point-free style in parse Signed-off-by: Colin James --- ocaml/libs/http-lib/http_svr.ml | 2 +- ocaml/libs/tracing/dune | 1 - ocaml/libs/tracing/propagator.ml | 9 +++++---- ocaml/xapi-cli-server/xapi_cli.ml | 14 +++++++------- ocaml/xapi/api_server.ml | 2 +- ocaml/xapi/context.ml | 2 +- ocaml/xapi/helpers.ml | 10 +++++----- ocaml/xapi/server_helpers.ml | 2 +- 8 files changed, 21 insertions(+), 21 deletions(-) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index c4f7be5460c..a7d52b23a31 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -101,7 +101,7 @@ let response_of_request req hdrs = module Helper = struct include Tracing.Propagator.Make (struct - include Propagator.Http + include Tracing_propagator.Propagator.Http let name_span req = req.Http.Request.uri end) diff --git a/ocaml/libs/tracing/dune b/ocaml/libs/tracing/dune index cf28881793a..71e5c7b7473 100644 --- a/ocaml/libs/tracing/dune +++ b/ocaml/libs/tracing/dune @@ -31,7 +31,6 @@ (library (name tracing_propagator) (modules propagator) - (wrapped false) (libraries astring http-lib tracing)) (test diff --git a/ocaml/libs/tracing/propagator.ml b/ocaml/libs/tracing/propagator.ml index 13c48bafce3..babd0c90476 100644 --- a/ocaml/libs/tracing/propagator.ml +++ b/ocaml/libs/tracing/propagator.ml @@ -54,12 +54,13 @@ module Http = struct | xs -> Some xs - let parse = + let parse input = let open Astring.String in let trim_pair (key, value) = (trim key, trim value) in - cuts ~sep:";" - >> List.map (cut ~sep:"=" >> Option.map trim_pair) - >> List.filter_map Fun.id + input + |> cuts ~sep:";" + |> List.map (cut ~sep:"=" >> Option.map trim_pair) + |> List.filter_map Fun.id let inject_into ctx req = let open Tracing in diff --git a/ocaml/xapi-cli-server/xapi_cli.ml b/ocaml/xapi-cli-server/xapi_cli.ml index 21950bd2618..a38115fd831 100644 --- a/ocaml/xapi-cli-server/xapi_cli.ml +++ b/ocaml/xapi-cli-server/xapi_cli.ml @@ -123,17 +123,17 @@ let with_session ~local rpc u p session f = module TraceHelper = struct include Tracing.Propagator.Make (struct - include Propagator.Http + include Tracing_propagator.Propagator.Http let name_span req = req.Http.Request.uri end) let inject_span_into_req (span : Tracing.Span.t option) = - let module T = Tracing in - let span_context = Option.map T.Span.get_context span in - let traceparent = Option.map T.SpanContext.to_traceparent span_context in - let trace_context = T.TraceContext.(with_traceparent traceparent empty) in - Propagator.Http.inject_into trace_context + let open Tracing in + let span_context = Option.map Span.get_context span in + let traceparent = Option.map SpanContext.to_traceparent span_context in + let trace_context = TraceContext.(with_traceparent traceparent empty) in + Tracing_propagator.Propagator.Http.inject_into trace_context end let do_rpcs _req s username password minimal cmd session args tracing = @@ -206,8 +206,8 @@ let exec_command req cmd s session args = let params = get_params cmd in let tracing = let ( let* ) = Option.bind in - let context = Propagator.Http.extract_from req in let open Tracing in + let context = Tracing_propagator.Propagator.Http.extract_from req in let* traceparent = TraceContext.traceparent_of context in let* span_context = SpanContext.of_traceparent traceparent in let span = Tracer.span_of_span_context span_context (get_cmdname cmd) in diff --git a/ocaml/xapi/api_server.ml b/ocaml/xapi/api_server.ml index d79d2f659e7..e6864bd80e1 100644 --- a/ocaml/xapi/api_server.ml +++ b/ocaml/xapi/api_server.ml @@ -5,7 +5,7 @@ let ( let@ ) f x = f x module Helper = struct include Tracing.Propagator.Make (struct - include Propagator.Http + include Tracing_propagator.Propagator.Http let name_span req = req.Http.Request.uri end) diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index 5df47bd2a57..56829d37d75 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -221,7 +221,7 @@ let parent_of_origin (origin : origin) span_name = let ( let* ) = Option.bind in match origin with | Http (req, _) -> - let context = Propagator.Http.extract_from req in + let context = Tracing_propagator.Propagator.Http.extract_from req in let open Tracing in let* traceparent = TraceContext.traceparent_of context in let* span_context = SpanContext.of_traceparent traceparent in diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index f2f4f9747a1..d75c4dce1c9 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -389,11 +389,11 @@ let update_pif_addresses ~__context = module TraceHelper = struct let inject_span_into_req (span : Tracing.Span.t option) = - let module T = Tracing in - let span_context = Option.map T.Span.get_context span in - let traceparent = Option.map T.SpanContext.to_traceparent span_context in - let trace_context = T.TraceContext.(with_traceparent traceparent empty) in - Propagator.Http.inject_into trace_context + let open Tracing in + let span_context = Option.map Span.get_context span in + let traceparent = Option.map SpanContext.to_traceparent span_context in + let trace_context = TraceContext.(with_traceparent traceparent empty) in + Tracing_propagator.Propagator.Http.inject_into trace_context end (* Note that both this and `make_timeboxed_rpc` are almost always diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index ef50491c518..1e8261b38f1 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -121,7 +121,7 @@ let dispatch_exn_wrapper f = module Helper = struct include Tracing.Propagator.Make (struct - include Propagator.Http + include Tracing_propagator.Propagator.Http let name_span req = req.Http.Request.uri end) From 4eb7185c0150010888f4135c3cfb7bfab06f2381 Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 22 Oct 2024 09:05:39 +0100 Subject: [PATCH 41/86] CP-51772: Forward baggage from xe-cli If baggage is present in the environment, it will be sent to xapi-cli-server. Signed-off-by: Colin James --- ocaml/xe-cli/newcli.ml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ocaml/xe-cli/newcli.ml b/ocaml/xe-cli/newcli.ml index 56279d6a324..bb3a40d74de 100644 --- a/ocaml/xe-cli/newcli.ml +++ b/ocaml/xe-cli/newcli.ml @@ -817,6 +817,9 @@ let main () = let args = String.concat "\n" args in Printf.fprintf oc "User-agent: xe-cli/Unix/%d.%d\r\n" major minor ; Option.iter (Printf.fprintf oc "traceparent: %s\r\n") traceparent ; + Option.iter + (Printf.fprintf oc "baggage: %s\r\n") + (Sys.getenv_opt "BAGGAGE") ; Printf.fprintf oc "content-length: %d\r\n\r\n" (String.length args) ; Printf.fprintf oc "%s" args ; flush_all () ; From c5fe9baa5ad91974928639ca2454d46c215198f7 Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 22 Oct 2024 14:34:06 +0100 Subject: [PATCH 42/86] CP-51772: Propagate trace context through spans Adds TraceContext to the SpanContext data structure and attempts to ensure its inheritance through each part of the code base. The API exposed by the tracing library is a bit problematic, it ought to be simplified and adapted to various use cases. Signed-off-by: Colin James --- ocaml/libs/http-lib/http_svr.ml | 15 +++++-- ocaml/libs/tracing/tracing.ml | 67 +++++++++++++++++++++------- ocaml/libs/tracing/tracing.mli | 7 +++ ocaml/libs/tracing/tracing_export.ml | 20 +++++++-- ocaml/xapi-cli-server/xapi_cli.ml | 36 +++++++++------ ocaml/xapi/context.ml | 3 +- ocaml/xapi/helpers.ml | 8 +++- 7 files changed, 116 insertions(+), 40 deletions(-) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index a7d52b23a31..017587f3737 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -442,17 +442,24 @@ let read_request_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length fd = already sent back a suitable error code and response to the client. *) let read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length fd = try + (* TODO: Restore functionality of tracing this function. We rely on the request + to contain information we want spans to inherit. However, it is the reading of the + request that we intend to trace. *) + let r, proxy = + read_request_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length fd + in + let trace_context = Tracing_propagator.Propagator.Http.extract_from r in let tracer = Tracing.Tracer.get_tracer ~name:"http_tracer" in let loop_span = - match Tracing.Tracer.start ~tracer ~name:__FUNCTION__ ~parent:None () with + match + Tracing.Tracer.start ~tracer ~trace_context ~name:__FUNCTION__ + ~parent:None () + with | Ok span -> span | Error _ -> None in - let r, proxy = - read_request_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length fd - in let parent_span = Helper.traceparent_of r in let loop_span = Option.fold ~none:None diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index d0adde3e776..8beff835cec 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -95,7 +95,7 @@ let validate_attribute (key, value) = && W3CBaggage.Key.is_valid_key key module SpanKind = struct - type t = Server | Consumer | Client | Producer | Internal [@@deriving rpcty] + type t = Server | Consumer | Client | Producer | Internal let to_string = function | Server -> @@ -127,7 +127,7 @@ let endpoint_to_string = function let ok_none = Ok None module Status = struct - type status_code = Unset | Ok | Error [@@deriving rpcty] + type status_code = Unset | Ok | Error type t = {status_code: status_code; _description: string option} end @@ -229,9 +229,14 @@ module TraceContext = struct end module SpanContext = struct - type t = {trace_id: Trace_id.t; span_id: Span_id.t} [@@deriving rpcty] + type t = { + trace_id: Trace_id.t + ; span_id: Span_id.t + ; trace_context: TraceContext.t + } - let context trace_id span_id = {trace_id; span_id} + let context trace_id span_id = + {trace_id; span_id; trace_context= TraceContext.empty} let to_traceparent t = let tid = Trace_id.to_string t.trace_id in @@ -246,6 +251,7 @@ module SpanContext = struct { trace_id= Trace_id.of_string trace_id ; span_id= Span_id.of_string span_id + ; trace_context= TraceContext.empty } | _ -> None @@ -253,6 +259,15 @@ module SpanContext = struct let trace_id_of_span_context t = t.trace_id let span_id_of_span_context t = t.span_id + + let context_of_span_context t = t.trace_context + + let with_trace_context trace_context t = {t with trace_context} + + let of_trace_context trace_context = + let traceparent = TraceContext.traceparent_of trace_context in + let span_context = Option.(join (map of_traceparent traceparent)) in + Option.map (with_trace_context trace_context) span_context end module SpanLink = struct @@ -282,16 +297,25 @@ module Span = struct let get_context t = t.context - let start ?(attributes = Attributes.empty) ~name ~parent ~span_kind () = - let trace_id = + let start ?(attributes = Attributes.empty) + ?(trace_context : TraceContext.t option) ~name ~parent ~span_kind () = + let trace_id, extra_context = match parent with | None -> - Trace_id.make () + (Trace_id.make (), TraceContext.empty) | Some span_parent -> - span_parent.context.trace_id + (span_parent.context.trace_id, span_parent.context.trace_context) in let span_id = Span_id.make () in - let context : SpanContext.t = {trace_id; span_id} in + let context : SpanContext.t = + {trace_id; span_id; trace_context= extra_context} + in + let context = + (* If trace_context is provided to the call, override any inherited trace context. *) + Option.fold ~none:context + ~some:(Fun.flip SpanContext.with_trace_context context) + trace_context + in (* Using gettimeofday over Mtime as it is better for sharing timestamps between the systems *) let begin_time = Unix.gettimeofday () in let end_time = None in @@ -669,15 +693,18 @@ module Tracer = struct ; attributes= Attributes.empty } - let start ~tracer:t ?(attributes = []) ?(span_kind = SpanKind.Internal) ~name - ~parent () : (Span.t option, exn) result = + let start ~tracer:t ?(attributes = []) ?trace_context + ?(span_kind = SpanKind.Internal) ~name ~parent () : + (Span.t option, exn) result = let open TracerProvider in (* Do not start span if the TracerProvider is disabled*) if not t.enabled then ok_none else let attributes = Attributes.merge_into t.attributes attributes in - let span = Span.start ~attributes ~name ~parent ~span_kind () in + let span = + Span.start ~attributes ?trace_context ~name ~parent ~span_kind () + in Spans.add_to_spans ~span ; Ok (Some span) let update_span_with_parent span (parent : Span.t option) = @@ -691,9 +718,11 @@ module Tracer = struct |> Option.map (fun existing_span -> let old_context = Span.get_context existing_span in let new_context : SpanContext.t = + let trace_context = span.Span.context.trace_context in SpanContext.context (SpanContext.trace_id_of_span_context parent.context) old_context.span_id + |> SpanContext.with_trace_context trace_context in let updated_span = {existing_span with parent= Some parent} in let updated_span = {updated_span with context= new_context} in @@ -730,10 +759,10 @@ end let enable_span_garbage_collector ?(timeout = 86400.) () = Spans.GC.initialise_thread ~timeout -let with_tracing ?(attributes = []) ?(parent = None) ~name f = +let with_tracing ?(attributes = []) ?(parent = None) ?trace_context ~name f = let tracer = Tracer.get_tracer ~name in if tracer.enabled then ( - match Tracer.start ~tracer ~attributes ~name ~parent () with + match Tracer.start ~tracer ?trace_context ~attributes ~name ~parent () with | Ok span -> ( try let result = f span in @@ -751,12 +780,12 @@ let with_tracing ?(attributes = []) ?(parent = None) ~name f = ) else f None -let with_child_trace ?attributes parent ~name f = +let with_child_trace ?attributes ?trace_context parent ~name f = match parent with | None -> f None | Some _ as parent -> - with_tracing ?attributes ~parent ~name f + with_tracing ?attributes ?trace_context ~parent ~name f module EnvHelpers = struct let traceparent_key = "TRACEPARENT" @@ -824,6 +853,9 @@ module Propagator = struct let trace_context = P.extract_from carrier in let* parent = TraceContext.traceparent_of trace_context in let* span_context = SpanContext.of_traceparent parent in + let span_context = + SpanContext.with_trace_context trace_context span_context + in let name = P.name_span carrier in Some (Tracer.span_of_span_context span_context name) @@ -845,6 +877,7 @@ module Propagator = struct | _ -> f carrier in - with_child_trace ?attributes parent ~name continue_with_child + with_child_trace ?attributes ~trace_context parent ~name + continue_with_child end end diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index e2d8c8d947d..d20fda8c2e1 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -103,9 +103,13 @@ module SpanContext : sig val of_traceparent : string -> t option + val of_trace_context : TraceContext.t -> t option + val trace_id_of_span_context : t -> Trace_id.t val span_id_of_span_context : t -> Span_id.t + + val context_of_span_context : t -> TraceContext.t end module Span : sig @@ -164,6 +168,7 @@ module Tracer : sig val start : tracer:t -> ?attributes:(string * string) list + -> ?trace_context:TraceContext.t -> ?span_kind:SpanKind.t -> name:string -> parent:Span.t option @@ -250,12 +255,14 @@ val enable_span_garbage_collector : ?timeout:float -> unit -> unit val with_tracing : ?attributes:(string * string) list -> ?parent:Span.t option + -> ?trace_context:TraceContext.t -> name:string -> (Span.t option -> 'a) -> 'a val with_child_trace : ?attributes:(string * string) list + -> ?trace_context:TraceContext.t -> Span.t option -> name:string -> (Span.t option -> 'a) diff --git a/ocaml/libs/tracing/tracing_export.ml b/ocaml/libs/tracing/tracing_export.ml index 43761cdde1c..592a12bbb26 100644 --- a/ocaml/libs/tracing/tracing_export.ml +++ b/ocaml/libs/tracing/tracing_export.ml @@ -82,6 +82,16 @@ module Content = struct {timestamp; value} ) in + let tags = + let span_context = Span.get_context s in + let trace_context = + SpanContext.context_of_span_context span_context + in + let baggage = + TraceContext.baggage_of trace_context |> Option.value ~default:[] + in + Span.get_attributes s @ baggage + in { id= s @@ -117,7 +127,7 @@ module Content = struct |> Option.map SpanKind.to_string ; localEndpoint= {serviceName} ; annotations - ; tags= Span.get_attributes s + ; tags } let content_of (spans : Span.t list) = @@ -270,7 +280,10 @@ module Destination = struct ; ("xs.tracing.finished_spans_table.count", string_of_int count) ] in - let@ _ = with_tracing ~parent ~attributes ~name in + let@ _ = + with_tracing ~trace_context:TraceContext.empty ~parent ~attributes + ~name + in all_spans |> Content.Json.ZipkinV2.content_of |> export @@ -283,7 +296,8 @@ module Destination = struct let ((_span_list, span_count) as span_info) = Spans.since () in let attributes = [("export.traces.count", string_of_int span_count)] in let@ parent = - with_tracing ~parent:None ~attributes ~name:"Tracing.flush_spans" + with_tracing ~trace_context:TraceContext.empty ~parent:None ~attributes + ~name:"Tracing.flush_spans" in TracerProvider.get_tracer_providers () |> List.filter TracerProvider.get_enabled diff --git a/ocaml/xapi-cli-server/xapi_cli.ml b/ocaml/xapi-cli-server/xapi_cli.ml index a38115fd831..72057550ffd 100644 --- a/ocaml/xapi-cli-server/xapi_cli.ml +++ b/ocaml/xapi-cli-server/xapi_cli.ml @@ -132,11 +132,17 @@ module TraceHelper = struct let open Tracing in let span_context = Option.map Span.get_context span in let traceparent = Option.map SpanContext.to_traceparent span_context in - let trace_context = TraceContext.(with_traceparent traceparent empty) in + let trace_context = + Option.map SpanContext.context_of_span_context span_context + in + let trace_context = + Option.value ~default:TraceContext.empty trace_context + |> TraceContext.with_traceparent traceparent + in Tracing_propagator.Propagator.Http.inject_into trace_context end -let do_rpcs _req s username password minimal cmd session args tracing = +let do_rpcs req s username password minimal cmd session args = let cmdname = get_cmdname cmd in let cspec = try Hashtbl.find cmdtable cmdname @@ -151,8 +157,21 @@ let do_rpcs _req s username password minimal cmd session args tracing = let _ = check_required_keys cmd cspec.reqd in try let generic_rpc = get_rpc () in + let trace_context = Tracing_propagator.Propagator.Http.extract_from req in + let parent = + (* This is a "faux" span in the sense that it's not exported by the program. It exists + so that the derived child span can refer to its span-id as its parent during exportation + (along with inheriting the trace-id). *) + let open Tracing in + let ( let* ) = Option.bind in + let* traceparent = TraceContext.traceparent_of trace_context in + let* span_context = SpanContext.of_traceparent traceparent in + let span = Tracer.span_of_span_context span_context (get_cmdname cmd) in + Some span + in (* NB the request we've received is for the /cli. We need an XMLRPC request for the API *) - Tracing.with_tracing ~parent:tracing ~name:("xe " ^ cmdname) @@ fun span -> + Tracing.with_tracing ~trace_context ~parent ~name:("xe " ^ cmdname) + @@ fun span -> let req = Xmlrpc_client.xmlrpc ~version:"1.1" "/" in let req = TraceHelper.inject_span_into_req span req in let rpc = generic_rpc req s in @@ -204,15 +223,6 @@ let uninteresting_cmd_postfixes = ["help"; "-get"; "-list"] let exec_command req cmd s session args = let params = get_params cmd in - let tracing = - let ( let* ) = Option.bind in - let open Tracing in - let context = Tracing_propagator.Propagator.Http.extract_from req in - let* traceparent = TraceContext.traceparent_of context in - let* span_context = SpanContext.of_traceparent traceparent in - let span = Tracer.span_of_span_context span_context (get_cmdname cmd) in - Some span - in let minimal = List.assoc_opt "minimal" params |> Option.fold ~none:false ~some:bool_of_string @@ -271,7 +281,7 @@ let exec_command req cmd s session args = params ) ) ; - do_rpcs req s u p minimal cmd session args tracing + do_rpcs req s u p minimal cmd session args let get_line str i = try diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index 56829d37d75..5f357e110af 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -223,8 +223,7 @@ let parent_of_origin (origin : origin) span_name = | Http (req, _) -> let context = Tracing_propagator.Propagator.Http.extract_from req in let open Tracing in - let* traceparent = TraceContext.traceparent_of context in - let* span_context = SpanContext.of_traceparent traceparent in + let* span_context = SpanContext.of_trace_context context in let span = Tracer.span_of_span_context span_context span_name in Some span | _ -> diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index d75c4dce1c9..8c2f91fc2a3 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -392,7 +392,13 @@ module TraceHelper = struct let open Tracing in let span_context = Option.map Span.get_context span in let traceparent = Option.map SpanContext.to_traceparent span_context in - let trace_context = TraceContext.(with_traceparent traceparent empty) in + let trace_context = + Option.map SpanContext.context_of_span_context span_context + in + let trace_context = + Option.value ~default:TraceContext.empty trace_context + |> TraceContext.with_traceparent traceparent + in Tracing_propagator.Propagator.Http.inject_into trace_context end From 252c05ebe580b30dc611e589fc190fdcd34bfff8 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Tue, 26 Nov 2024 12:17:19 +0100 Subject: [PATCH 43/86] Apply fix by psafont: [Xenopsd] chooses NUMA nodes purely based on amount of free memory on the NUMA nodes of the host Co-authored-by: Pau Ruiz Safont Signed-off-by: Bernhard Kaindl --- doc/content/toolstack/features/NUMA/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/content/toolstack/features/NUMA/index.md b/doc/content/toolstack/features/NUMA/index.md index 5f4d3fe41e8..8fa637be2ca 100644 --- a/doc/content/toolstack/features/NUMA/index.md +++ b/doc/content/toolstack/features/NUMA/index.md @@ -130,7 +130,7 @@ See page 13 in [^AMD_numa] for a diagram of an AMD Opteron 6272 system. * Booting multiple VMs in parallel will result in potentially allocating both on the same NUMA node (race condition) * When we're about to run out of host memory we'll fall back to striping memory again, but the soft affinity mask won't reflect that (this needs an API to query Xen on where it has actually placed the VM, so we can fix up the mask accordingly) -* XAPI is not aware of NUMA balancing across a pool, and chooses NUMA nodes purely based on amount of free memory on the NUMA nodes of the host, even if a better NUMA placement could be found on another host +* XAPI is not aware of NUMA balancing across a pool. Xenopsd chooses NUMA nodes purely based on amount of free memory on the NUMA nodes of the host, even if a better NUMA placement could be found on another host * Very large (>16 NUMA nodes) systems may only explore a limited number of choices (fit into a single node vs fallback to full interleaving) * The exact VM placement is not yet controllable * Microbenchmarks with a single VM on a host show both performance improvements and regressions on memory bandwidth usage: previously a single VM may have been able to take advantage of the bandwidth of both NUMA nodes if it happened to allocate memory from the right places, whereas now it'll be forced to use just a single node. From aa542372139053e75e0cc05d36fe236ab3d6d551 Mon Sep 17 00:00:00 2001 From: Bernhard Kaindl Date: Tue, 26 Nov 2024 12:18:35 +0100 Subject: [PATCH 44/86] Apply fix by psafont: "Future XAPI versions may change `default_policy` to mean `best_effort`." Co-authored-by: Pau Ruiz Safont Signed-off-by: Bernhard Kaindl --- doc/content/toolstack/features/NUMA/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/content/toolstack/features/NUMA/index.md b/doc/content/toolstack/features/NUMA/index.md index 8fa637be2ca..5f89a5eaa93 100644 --- a/doc/content/toolstack/features/NUMA/index.md +++ b/doc/content/toolstack/features/NUMA/index.md @@ -161,7 +161,7 @@ Meaning of the policy: * Currently, `default_policy` is treated as `any`, but the admin can change it, and then the system will remember that change across upgrades. If we didn't have a `default_policy` then changing the "default" policy on an upgrade would be tricky: we either risk overriding an explicit choice of the admin, or existing installs cannot take advantage of the improved performance from `best_effort` -* Future, XAPI versions may change `default_policy` to mean `best_effort`. +* Future XAPI versions may change `default_policy` to mean `best_effort`. Admins can still override it to `any` if they wish on a host by host basis. It is not expected that users would have to change `best_effort`, unless they run very specific workloads, so a pool level control is not provided at this moment. From eeec845b39350a997b93eadb58b178d6304b224d Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 28 Nov 2024 12:48:01 +0000 Subject: [PATCH 45/86] xe-cli completion: Use grep -E instead of egrep Otherwise newer packages in XS9 issue "egrep: warning: egrep is obsolescent; using grep -E" warnings all over the place. Signed-off-by: Andrii Sultanov --- ocaml/xe-cli/bash-completion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index b4ba6127138..84fd3656a65 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -675,7 +675,7 @@ description() __process_params() { - echo "$1" | cut -d: -f2- | egrep -v "^ $" | cut -c 2- | \ + echo "$1" | cut -d: -f2- | grep -Ev "^ $" | cut -c 2- | \ sed -e 's/,/=,/g' -e 's/$/=/g' -e 's/:=/:/g' -e 's/-=/-/g' -e 's/ //g' } From 7ada7349447cd4e46d3a1954356c7241d0716a9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 17:11:32 +0100 Subject: [PATCH 46/86] CA-388210: factor out computing the domain parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are going to change how we compute it, so factor it out into a function. No functional change. Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index c69d28847d1..a3457ae3094 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -66,6 +66,8 @@ let backend_backtrace_error name args backtrace = let missing_uri () = backend_error "MISSING_URI" ["Please include a URI in the device-config"] +let domain_of ~dp:_ ~vm' = Storage_interface.Vm.string_of vm' + (** Functions to wrap calls to the above client modules and convert their exceptions and errors into SMAPIv2 errors of type [Storage_interface.Exception.exnty]. The above client modules should only @@ -1432,9 +1434,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.introduce vdi_introduce_impl ; - let vdi_attach3_impl dbg _dp sr vdi' vm' _readwrite = + let vdi_attach3_impl dbg dp sr vdi' vm' _readwrite = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = domain_of ~dp ~vm' in vdi_attach_common dbg sr vdi domain >>>= fun response -> let convert_implementation = function | Xapi_storage.Data.XenDisk {params; extra; backend_type} -> @@ -1456,9 +1458,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.attach3 vdi_attach3_impl ; - let vdi_activate_common dbg sr vdi' vm' readonly = + let vdi_activate_common dbg dp sr vdi' vm' readonly = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = domain_of ~dp ~vm' in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1483,17 +1485,17 @@ let bind ~volume_script_dir = ) |> wrap in - let vdi_activate3_impl dbg _dp sr vdi' vm' = - vdi_activate_common dbg sr vdi' vm' false + let vdi_activate3_impl dbg dp sr vdi' vm' = + vdi_activate_common dbg dp sr vdi' vm' false in S.VDI.activate3 vdi_activate3_impl ; - let vdi_activate_readonly_impl dbg _dp sr vdi' vm' = - vdi_activate_common dbg sr vdi' vm' true + let vdi_activate_readonly_impl dbg dp sr vdi' vm' = + vdi_activate_common dbg dp sr vdi' vm' true in S.VDI.activate_readonly vdi_activate_readonly_impl ; - let vdi_deactivate_impl dbg _dp sr vdi' vm' = + let vdi_deactivate_impl dbg dp sr vdi' vm' = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = domain_of ~dp ~vm' in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1514,9 +1516,9 @@ let bind ~volume_script_dir = |> wrap in S.VDI.deactivate vdi_deactivate_impl ; - let vdi_detach_impl dbg _dp sr vdi' vm' = + let vdi_detach_impl dbg dp sr vdi' vm' = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = domain_of ~dp ~vm' in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> @@ -1627,9 +1629,9 @@ let bind ~volume_script_dir = S.VDI.epoch_end vdi_epoch_end_impl ; let vdi_set_persistent_impl _dbg _sr _vdi _persistent = return () |> wrap in S.VDI.set_persistent vdi_set_persistent_impl ; - let dp_destroy2 dbg _dp sr vdi' vm' _allow_leak = + let dp_destroy2 dbg dp sr vdi' vm' _allow_leak = (let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in + let domain = domain_of ~dp ~vm' in Attached_SRs.find sr >>>= fun sr -> (* Discover the URIs using Volume.stat *) stat ~dbg ~sr ~vdi >>>= fun response -> From 864d73420e654b5395a6e98a60615ed292a6e1dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 17:12:33 +0100 Subject: [PATCH 47/86] CA-388210: SMAPIv3 concurrency safety: send the (unique) datapath argument as domain for Dom0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dom0 allows multiple attaches of RO disks, used e.g. for VDI.copy. Send a unique value to SMAPIv3 plugins to avoid bugs to the lack of reference counting in those plugins. XAPI already sends a unique value here, either vbd/domid/device, or a fresh UUID (for storage migration). Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index a3457ae3094..d76ff039e7d 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -66,7 +66,22 @@ let backend_backtrace_error name args backtrace = let missing_uri () = backend_error "MISSING_URI" ["Please include a URI in the device-config"] -let domain_of ~dp:_ ~vm' = Storage_interface.Vm.string_of vm' +(** return a unique 'domain' string for Dom0, so that we can plug disks + multiple times (e.g. for copy). + + XAPI should give us a unique 'dp' (datapath) string, e.g. a UUID for storage migration, + or vbd/domid/device. + For regular guests keep the domain as passed by XAPI (an integer). + *) +let domain_of ~dp ~vm' = + let vm = Storage_interface.Vm.string_of vm' in + match vm with + | "0" -> + (* SM tries to use this in filesystem paths, so cannot have /, + and systemd might be a bit unhappy with - *) + "u0-" ^ dp |> String.map ~f:(function '/' | '-' -> '_' | c -> c) + | _ -> + vm (** Functions to wrap calls to the above client modules and convert their exceptions and errors into SMAPIv2 errors of type From d5cd034ad38e892831ae103b595455dcee9486af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 14:24:58 +0100 Subject: [PATCH 48/86] CA-388210: SMAPIv3 debugging: log PID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Log PID on successful and failed operations, and log full cmdline for newly spawned processes. This can be used to debug stuck scripts, so that we know which invocation is the one that is stuck. Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/lib.ml | 4 +++- ocaml/xapi-storage-script/lib.mli | 3 ++- ocaml/xapi-storage-script/main.ml | 28 ++++++++++++++++----------- ocaml/xapi-storage-script/test_lib.ml | 25 ++++++++++++++++++++---- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/ocaml/xapi-storage-script/lib.ml b/ocaml/xapi-storage-script/lib.ml index 9c9059432bf..a3beb9a8009 100644 --- a/ocaml/xapi-storage-script/lib.ml +++ b/ocaml/xapi-storage-script/lib.ml @@ -131,6 +131,7 @@ module Process = struct type t = { exit_status: (unit, exit_or_signal) Result.t + ; pid: int ; stdout: string ; stderr: string } @@ -176,6 +177,7 @@ module Process = struct let run ~env ~prog ~args ~input = let ( let@ ) f x = f x in let@ p = with_process ~env ~prog ~args in + let pid = p#pid in let sender = send p#stdin input in let receiver_out = receive p#stdout in let receiver_err = receive p#stderr in @@ -185,7 +187,7 @@ module Process = struct Lwt.both sender receiver >>= fun ((), (stdout, stderr)) -> p#status >>= fun status -> let exit_status = Output.exit_or_signal_of_unix status in - Lwt.return {Output.exit_status; stdout; stderr} + Lwt.return {Output.exit_status; pid; stdout; stderr} ) (function | Lwt.Canceled as exn -> diff --git a/ocaml/xapi-storage-script/lib.mli b/ocaml/xapi-storage-script/lib.mli index a55c4b81fbc..eae9183a174 100644 --- a/ocaml/xapi-storage-script/lib.mli +++ b/ocaml/xapi-storage-script/lib.mli @@ -65,6 +65,7 @@ module Process : sig type t = { exit_status: (unit, exit_or_signal) result + ; pid: int ; stdout: string ; stderr: string } @@ -78,7 +79,7 @@ module Process : sig -> Output.t Lwt.t (** Runs a cli program, writes [input] into its stdin, then closing the fd, and finally waits for the program to finish and returns the exit status, - its stdout and stderr. *) + the pid, and its stdout and stderr. *) end module DirWatcher : sig diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index d76ff039e7d..a0b99ae91ab 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -79,7 +79,7 @@ let domain_of ~dp ~vm' = | "0" -> (* SM tries to use this in filesystem paths, so cannot have /, and systemd might be a bit unhappy with - *) - "u0-" ^ dp |> String.map ~f:(function '/' | '-' -> '_' | c -> c) + "u0-" ^ dp |> String.map (function '/' | '-' -> '_' | c -> c) | _ -> vm @@ -477,6 +477,8 @@ let fork_exec_rpc : ) >>>= fun input -> let input = compat_in input |> Jsonrpc.to_string in + debug (fun m -> m "Running %s" @@ Filename.quote_command script_name args) + >>= fun () -> Process.run ~env ~prog:script_name ~args ~input >>= fun output -> let fail_because ~cause description = fail @@ -500,12 +502,13 @@ let fork_exec_rpc : with | Error _ -> error (fun m -> - m "%s failed and printed bad error json: %s" script_name - output.Process.Output.stdout + m "%s[%d] failed and printed bad error json: %s" script_name + output.pid output.Process.Output.stdout ) >>= fun () -> error (fun m -> - m "%s failed, stderr: %s" script_name output.Process.Output.stderr + m "%s[%d] failed, stderr: %s" script_name output.pid + output.Process.Output.stderr ) >>= fun () -> fail_because "non-zero exit and bad json on stdout" @@ -516,12 +519,12 @@ let fork_exec_rpc : with | Error _ -> error (fun m -> - m "%s failed and printed bad error json: %s" script_name - output.Process.Output.stdout + m "%s[%d] failed and printed bad error json: %s" script_name + output.pid output.Process.Output.stdout ) >>= fun () -> error (fun m -> - m "%s failed, stderr: %s" script_name + m "%s[%d] failed, stderr: %s" script_name output.pid output.Process.Output.stderr ) >>= fun () -> @@ -532,7 +535,9 @@ let fork_exec_rpc : ) ) | Error (Signal signal) -> - error (fun m -> m "%s caught a signal and failed" script_name) + error (fun m -> + m "%s[%d] caught a signal and failed" script_name output.pid + ) >>= fun () -> fail_because "signalled" ~cause:(Signal.to_string signal) | Ok () -> ( (* Parse the json on stdout. We get back a JSON-RPC @@ -544,8 +549,8 @@ let fork_exec_rpc : with | Error _ -> error (fun m -> - m "%s succeeded but printed bad json: %s" script_name - output.Process.Output.stdout + m "%s[%d] succeeded but printed bad json: %s" script_name + output.pid output.Process.Output.stdout ) >>= fun () -> fail @@ -554,7 +559,8 @@ let fork_exec_rpc : ) | Ok response -> info (fun m -> - m "%s succeeded: %s" script_name output.Process.Output.stdout + m "%s[%d] succeeded: %s" script_name output.pid + output.Process.Output.stdout ) >>= fun () -> let response = compat_out response in diff --git a/ocaml/xapi-storage-script/test_lib.ml b/ocaml/xapi-storage-script/test_lib.ml index e016d1368a4..ca1d0a07a1c 100644 --- a/ocaml/xapi-storage-script/test_lib.ml +++ b/ocaml/xapi-storage-script/test_lib.ml @@ -103,12 +103,20 @@ let test_run_status = let module P = Process in let test () = let* output = P.run ~prog:"true" ~args:[] ~input:"" ~env:[] in - let expected = P.Output.{exit_status= Ok (); stdout= ""; stderr= ""} in + let expected = + P.Output.{exit_status= Ok (); pid= output.pid; stdout= ""; stderr= ""} + in Alcotest.(check output_c) "Exit status is correct" expected output ; let* output = P.run ~prog:"false" ~args:[] ~input:"" ~env:[] in let expected = - P.Output.{exit_status= Error (Exit_non_zero 1); stdout= ""; stderr= ""} + P.Output. + { + exit_status= Error (Exit_non_zero 1) + ; pid= output.pid + ; stdout= "" + ; stderr= "" + } in Alcotest.(check output_c) "Exit status is correct" expected output ; @@ -121,7 +129,10 @@ let test_run_output = let test () = let content = "@@@@@@" in let* output = P.run ~prog:"cat" ~args:["-"] ~input:content ~env:[] in - let expected = P.Output.{exit_status= Ok (); stdout= content; stderr= ""} in + let expected = + P.Output. + {exit_status= Ok (); pid= output.pid; stdout= content; stderr= ""} + in Alcotest.(check output_c) "Stdout is correct" expected output ; let* output = P.run ~prog:"cat" ~args:[content] ~input:content ~env:[] in @@ -129,7 +140,13 @@ let test_run_output = Printf.sprintf "cat: %s: No such file or directory\n" content in let expected = - P.Output.{exit_status= Error (Exit_non_zero 1); stdout= ""; stderr} + P.Output. + { + exit_status= Error (Exit_non_zero 1) + ; pid= output.pid + ; stdout= "" + ; stderr + } in Alcotest.(check output_c) "Stderr is correct" expected output ; Lwt.return () From ce82302b5244f248a1dd736421b506499a5d402d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Wed, 7 Aug 2024 14:24:58 +0100 Subject: [PATCH 49/86] CA-388210: SMAPIv3 concurrency: turn on concurrent operations by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is believed that the deadlocks in SMAPIv3 were caused by VDI.copy operations that attach the same disk RO multiple times in Dom0. With the previous commits we now use a unique identifier and spawn a separate qemu-dp process in the SMAPIv3 plugins, which should prevent the deadlocks and IO errors due to lack of refcounting. Signed-off-by: Edwin Török --- ocaml/xapi-storage-script/main.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index a0b99ae91ab..e29f4937ab5 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -1796,7 +1796,7 @@ let rec diff a b = (* default false due to bugs in SMAPIv3 plugins, once they are fixed this should be set to true *) -let concurrent = ref false +let concurrent = ref true type reload = All | Files of string list | Nothing From 4c97bfbf386bcf67318578f8434fc5d0729319b9 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Sun, 1 Dec 2024 13:25:19 +0000 Subject: [PATCH 50/86] Improve Delay test Test was pretty minimal. Add test for result from "wait" function. Check that "signal" calls are collapsed as stated. Check using multiple threads. Check timeout not calling "signal". Old "test_wait" test done by "no_signal". Signed-off-by: Frediano Ziglio --- .../xapi-stdext/lib/xapi-stdext-threads/dune | 2 +- .../lib/xapi-stdext-threads/threadext_test.ml | 82 ++++++++++++++----- 2 files changed, 63 insertions(+), 21 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index f7e9141c3a9..663c24437c7 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -12,5 +12,5 @@ (name threadext_test) (package xapi-stdext-threads) (modules threadext_test) - (libraries xapi_stdext_threads alcotest mtime.clock.os mtime fmt) + (libraries xapi_stdext_threads alcotest mtime.clock.os mtime fmt threads.posix) ) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml index c21cd62e8c0..0442e302ab7 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml @@ -1,5 +1,5 @@ (* - * Copyright (C) 2006-2009 Citrix Systems Inc. + * Copyright (C) 2006-2024 Citrix Systems Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -14,22 +14,64 @@ module Delay = Xapi_stdext_threads.Threadext.Delay -let span_approx ~max_error = - let eq_within a b = - let diff = Mtime.Span.abs_diff a b in - Mtime.Span.compare diff max_error < 0 - in - Alcotest.testable Mtime.Span.pp @@ eq_within - -let test_wait () = - let m = Delay.make () in - let c = Mtime_clock.counter () in - let time = 1 in - let expected = Mtime.Span.(time * s) in - let max_error = Mtime.Span.(10 * ms) in - let _ = Delay.wait m (float_of_int time) in - let wait_time = Mtime_clock.count c in - Alcotest.check' (span_approx ~max_error) ~msg:"diff is smaller than max error" - ~expected ~actual:wait_time - -let () = Alcotest.run "Threadext" [("wait", [("wait", `Quick, test_wait)])] +let delay_wait_check ~min ~max delay timeout expected = + let cnt = Mtime_clock.counter () in + let res = Delay.wait delay timeout in + let elapsed = (Mtime_clock.count cnt |> Mtime.Span.to_float_ns) *. 1e-9 in + Alcotest.(check bool) "expected result" expected res ; + if elapsed < min || elapsed > max then + let msg = Printf.sprintf "%f not in range %f-%f" elapsed min max in + Alcotest.(check bool) msg true false + +(* +Single simple signal stored +- signal +- wait on same thread should succeed quickly +*) +let simple () = + let d = Delay.make () in + Delay.signal d ; + delay_wait_check ~min:0. ~max:0.01 d 1.0 false + +(* +No signal +- wait on same thread should timeout more or less on delay +*) +let no_signal () = + let d = Delay.make () in + delay_wait_check ~min:0.1 ~max:0.11 d 0.1 true + +(* +Signal twice, collapsed +- signal +- signal +- wait on same thread should succeed quickly +- wait on same thread should timeout +*) +let collapsed () = + let d = Delay.make () in + Delay.signal d ; + Delay.signal d ; + delay_wait_check ~min:0. ~max:0.01 d 0.1 false ; + delay_wait_check ~min:0.1 ~max:0.11 d 0.1 true + +(* +Signal from another thread +- signal on another thread after a while +- wait on same thread should succeed more or less on other thread sleep +*) +let other_thread () = + let d = Delay.make () in + let th = Thread.create (fun d -> Thread.delay 0.1 ; Delay.signal d) d in + delay_wait_check ~min:0.1 ~max:0.11 d 1.0 false ; + Thread.join th + +let tests = + [ + ("simple", `Quick, simple) + ; ("no_signal", `Quick, no_signal) + ; ("collapsed", `Quick, collapsed) + ; ("other_thread", `Quick, other_thread) + ] + +let () = Alcotest.run "Threadext" [("Delay", tests)] From 78d6df36b3fb4e48b6ebf9cab93a3f0bbfa5e1cd Mon Sep 17 00:00:00 2001 From: Mark Syms Date: Tue, 12 Nov 2024 09:27:25 +0000 Subject: [PATCH 51/86] CP-42675: add new SM GC message ID Signed-off-by: Mark Syms --- ocaml/xapi-consts/api_messages.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ocaml/xapi-consts/api_messages.ml b/ocaml/xapi-consts/api_messages.ml index ff436199a76..a81eb1a426f 100644 --- a/ocaml/xapi-consts/api_messages.ml +++ b/ocaml/xapi-consts/api_messages.ml @@ -370,3 +370,5 @@ let xapi_startup_blocked_as_version_higher_than_coordinator = let all_running_vms_in_anti_affinity_grp_on_single_host = addMessage "ALL_RUNNING_VMS_IN_ANTI_AFFINITY_GRP_ON_SINGLE_HOST" 3L + +let sm_gc_no_space = addMessage "SM_GC_NO_SPACE" 3L From 21d115613f06461d14c6b58b8a07d5de24258117 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Fri, 29 Nov 2024 11:48:14 +0800 Subject: [PATCH 52/86] CA-403101: Keep host.last_update_hash for host joined a pool Signed-off-by: Gang Ji --- ocaml/idl/datamodel_host.ml | 9 +++++++++ ocaml/tests/common/test_common.ml | 3 ++- ocaml/tests/test_host.ml | 2 +- ocaml/xapi/dbsync_slave.ml | 2 +- ocaml/xapi/xapi_host.ml | 8 ++++---- ocaml/xapi/xapi_host.mli | 1 + ocaml/xapi/xapi_pool.ml | 1 + 7 files changed, 19 insertions(+), 7 deletions(-) diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index b0fb9a6aace..78b68a35722 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -1291,6 +1291,15 @@ let create_params = ; param_release= dundee_release ; param_default= Some (VDateTime Date.epoch) } + ; { + param_type= String + ; param_name= "last_update_hash" + ; param_doc= + "The SHA256 checksum of updateinfo of the most recently applied update \ + on the host" + ; param_release= numbered_release "24.39.0-next" + ; param_default= Some (VString "") + } ] let create = diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 297a68398ca..7ac0868c84b 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -170,12 +170,13 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ?(external_auth_service_name = "") ?(external_auth_configuration = []) ?(license_params = []) ?(edition = "free") ?(license_server = []) ?(local_cache_sr = Ref.null) ?(chipset_info = []) ?(ssl_legacy = false) - ?(last_software_update = Date.epoch) () = + ?(last_software_update = Date.epoch) ?(last_update_hash = "") () = let host = Xapi_host.create ~__context ~uuid ~name_label ~name_description ~hostname ~address ~external_auth_type ~external_auth_service_name ~external_auth_configuration ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info ~ssl_legacy ~last_software_update + ~last_update_hash in Db.Host.set_cpu_info ~__context ~self:host ~value:default_cpu_info ; host diff --git a/ocaml/tests/test_host.ml b/ocaml/tests/test_host.ml index 80e72f4f113..60c735e2aff 100644 --- a/ocaml/tests/test_host.ml +++ b/ocaml/tests/test_host.ml @@ -23,7 +23,7 @@ let add_host __context name = ~external_auth_service_name:"" ~external_auth_configuration:[] ~license_params:[] ~edition:"" ~license_server:[] ~local_cache_sr:Ref.null ~chipset_info:[] ~ssl_legacy:false - ~last_software_update:Xapi_stdext_date.Date.epoch + ~last_software_update:Xapi_stdext_date.Date.epoch ~last_update_hash:"" ) (* Creates an unlicensed pool with the maximum number of hosts *) diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 942d3081071..10678562e79 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -59,7 +59,7 @@ let create_localhost ~__context info = ~external_auth_configuration:[] ~license_params:[] ~edition:"" ~license_server:[("address", "localhost"); ("port", "27000")] ~local_cache_sr:Ref.null ~chipset_info:[] ~ssl_legacy:false - ~last_software_update:Date.epoch + ~last_software_update:Date.epoch ~last_update_hash:"" in () diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 9f84923fe2e..1b22a037b30 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -991,7 +991,7 @@ let is_host_alive ~__context ~host = let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~external_auth_type ~external_auth_service_name ~external_auth_configuration ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info - ~ssl_legacy:_ ~last_software_update = + ~ssl_legacy:_ ~last_software_update ~last_update_hash = (* fail-safe. We already test this on the joining host, but it's racy, so multiple concurrent pool-join might succeed. Note: we do it in this order to avoid a problem checking restrictions during the initial setup of the database *) @@ -1053,9 +1053,9 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ) ~control_domain:Ref.null ~updates_requiring_reboot:[] ~iscsi_iqn:"" ~multipathing:false ~uefi_certificates:"" ~editions:[] ~pending_guidances:[] - ~tls_verification_enabled ~last_software_update ~recommended_guidances:[] - ~latest_synced_updates_applied:`unknown ~pending_guidances_recommended:[] - ~pending_guidances_full:[] ~last_update_hash:"" ; + ~tls_verification_enabled ~last_software_update ~last_update_hash + ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown + ~pending_guidances_recommended:[] ~pending_guidances_full:[] ; (* If the host we're creating is us, make sure its set to live *) Db.Host_metrics.set_last_updated ~__context ~self:metrics ~value:(Date.now ()) ; Db.Host_metrics.set_live ~__context ~self:metrics ~value:host_is_us ; diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index c303ee69597..bafc47f6de0 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -129,6 +129,7 @@ val create : -> chipset_info:(string * string) list -> ssl_legacy:bool -> last_software_update:API.datetime + -> last_update_hash:string -> [`host] Ref.t val destroy : __context:Context.t -> self:API.ref_host -> unit diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index dd4bca70e26..93e03cd15a0 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -968,6 +968,7 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : ~local_cache_sr ~chipset_info:host.API.host_chipset_info ~ssl_legacy:false ~last_software_update:host.API.host_last_software_update + ~last_update_hash:host.API.host_last_update_hash in (* Copy other-config into newly created host record: *) no_exn From 07724733e6de2a805fa5b22064b8fdf83c749da8 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 3 Dec 2024 08:41:57 +0000 Subject: [PATCH 53/86] xapi_message: Fix incorrect slow path invocation (and its logs) get_since_for_events checks if the cache contains entries older than the 'since' timepoint requested. If so, this means the cache contains all entries strictly after (>) 'since'. If not, this means the cache needs to be reconstructed, since some entries strictly after (>) 'since' might be just out of cache. Previous code would reconstruct the cache when the oldest entry in the cache was equal to the 'since' timepoint requested, which is incorrect and caused a deluge of "x is older than x itself" in the logs. So, change the check for 'entries older than the since timestamp' to be less-or-equal (as the opposite of 'strictly after'), and clarify the debug message. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_message.ml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/xapi_message.ml b/ocaml/xapi/xapi_message.ml index 8bc43cc48e8..2bc570925b9 100644 --- a/ocaml/xapi/xapi_message.ml +++ b/ocaml/xapi/xapi_message.ml @@ -647,7 +647,7 @@ let get_since_for_events ~__context since = let cached_result = with_lock in_memory_cache_mutex (fun () -> match List.rev !in_memory_cache with - | (last_in_memory, _, _) :: _ when last_in_memory < since -> + | (oldest_in_memory, _, _) :: _ when oldest_in_memory <= since -> Some (List.filter_map (fun (gen, _ref, msg) -> @@ -658,11 +658,11 @@ let get_since_for_events ~__context since = ) !in_memory_cache ) - | (last_in_memory, _, _) :: _ -> + | (oldest_in_memory, _, _) :: _ -> debug - "%s: cache (%Ld) is older than requested time (%Ld): Using slow \ - message lookup" - __FUNCTION__ last_in_memory since ; + "%s: cache (%Ld) might not contain all messages since the \ + requested time (%Ld): Using slow message lookup" + __FUNCTION__ oldest_in_memory since ; None | _ -> debug "%s: empty cache; Using slow message lookup" __FUNCTION__ ; From 9e3ad1c11e466d02e9663ccebbdde5dbb85a73ec Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 27 Nov 2024 16:16:28 +0000 Subject: [PATCH 54/86] xapi: move the 'periodic' scheduler to xapi-stdext-threads This allows the scheduler based on threads to be used by components that aren't xapi, like forkexecd. Signed-off-by: Pau Ruiz Safont --- .../xapi-stdext/lib/xapi-stdext-threads/dune | 10 +++- .../lib/xapi-stdext-threads}/ipq.ml | 0 .../lib/xapi-stdext-threads/scheduler.ml} | 22 +++++---- .../lib/xapi-stdext-threads/scheduler.mli} | 0 ocaml/tests/common/dune | 1 + ocaml/tests/common/test_event_common.ml | 8 ++-- ocaml/xapi/dune | 2 + ocaml/xapi/extauth_plugin_ADwinbind.ml | 16 +++---- ocaml/xapi/helpers.ml | 7 +-- ocaml/xapi/pool_periodic_update_sync.ml | 6 +-- ocaml/xapi/xapi.ml | 2 +- ocaml/xapi/xapi_event.ml | 6 +-- ocaml/xapi/xapi_host.ml | 14 +++--- ocaml/xapi/xapi_periodic_scheduler_init.ml | 47 +++++++++++-------- 14 files changed, 81 insertions(+), 60 deletions(-) rename ocaml/{xapi => libs/xapi-stdext/lib/xapi-stdext-threads}/ipq.ml (100%) rename ocaml/{xapi/xapi_periodic_scheduler.ml => libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml} (85%) rename ocaml/{xapi/xapi_periodic_scheduler.mli => libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli} (100%) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index 663c24437c7..81bf6e67af2 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -1,13 +1,21 @@ (library (public_name xapi-stdext-threads) (name xapi_stdext_threads) - (modules :standard \ threadext_test) + (modules :standard \ ipq scheduler threadext_test) (libraries threads.posix unix xapi-stdext-unix xapi-stdext-pervasives) ) + +(library + (public_name xapi-stdext-threads.scheduler) + (name xapi_stdext_threads_scheduler) + (modules ipq scheduler) + (libraries mtime mtime.clock threads.posix unix xapi-log xapi-stdext-threads) +) + (test (name threadext_test) (package xapi-stdext-threads) diff --git a/ocaml/xapi/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml similarity index 100% rename from ocaml/xapi/ipq.ml rename to ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml diff --git a/ocaml/xapi/xapi_periodic_scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml similarity index 85% rename from ocaml/xapi/xapi_periodic_scheduler.ml rename to ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index 1edcb938857..b3d44caa62e 100644 --- a/ocaml/xapi/xapi_periodic_scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -12,7 +12,7 @@ * GNU Lesser General Public License for more details. *) -module D = Debug.Make (struct let name = "backgroundscheduler" end) +module D = Debug.Make (struct let name = __MODULE__ end) open D module Delay = Xapi_stdext_threads.Threadext.Delay @@ -30,16 +30,20 @@ let (queue : t Ipq.t) = Ipq.create 50 let lock = Mutex.create () module Clock = struct - (** time span of s seconds *) let span s = Mtime.Span.of_uint64_ns (Int64.of_float (s *. 1e9)) + let span_to_s span = + Mtime.Span.to_uint64_ns span |> Int64.to_float |> fun ns -> ns /. 1e9 + let add_span clock secs = + (* return mix or max available value if the add overflows *) match Mtime.add_span clock (span secs) with | Some t -> t + | None when secs > 0. -> + Mtime.max_stamp | None -> - raise - Api_errors.(Server_error (internal_error, ["clock overflow"; __LOC__])) + Mtime.min_stamp end let add_to_queue ?(signal = true) name ty start newfunc = @@ -59,7 +63,7 @@ let remove_from_queue name = Ipq.remove queue index let loop () = - debug "Periodic scheduler started" ; + debug "%s started" __MODULE__ ; try while true do let empty = with_lock lock (fun () -> Ipq.is_empty queue) in @@ -82,8 +86,8 @@ let loop () = ) else (* Sleep until next event. *) let sleep = Mtime.(span next.Ipq.time now) - |> Mtime.Span.add (Clock.span 0.001) - |> Scheduler.span_to_s + |> Mtime.Span.(add ms) + |> Clock.span_to_s in try ignore (Delay.wait delay sleep) with e -> @@ -102,5 +106,5 @@ let loop () = done with _ -> error - "Periodic scheduler died! Xapi will no longer function well and should \ - be restarted." + "Scheduler thread died! This daemon will no longer function well and \ + should be restarted." diff --git a/ocaml/xapi/xapi_periodic_scheduler.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli similarity index 100% rename from ocaml/xapi/xapi_periodic_scheduler.mli rename to ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli diff --git a/ocaml/tests/common/dune b/ocaml/tests/common/dune index c578f5f9785..29acca3d2cb 100644 --- a/ocaml/tests/common/dune +++ b/ocaml/tests/common/dune @@ -26,6 +26,7 @@ xapi-test-utils xapi-types xapi-stdext-date + xapi-stdext-threads.scheduler xapi-stdext-unix ) ) diff --git a/ocaml/tests/common/test_event_common.ml b/ocaml/tests/common/test_event_common.ml index 149a27d5ea8..9d37c038ab4 100644 --- a/ocaml/tests/common/test_event_common.ml +++ b/ocaml/tests/common/test_event_common.ml @@ -2,16 +2,16 @@ let ps_start = ref false let scheduler_mutex = Mutex.create () +module Scheduler = Xapi_stdext_threads_scheduler.Scheduler + let start_periodic_scheduler () = Mutex.lock scheduler_mutex ; if !ps_start then () else ( - Xapi_periodic_scheduler.add_to_queue "dummy" - (Xapi_periodic_scheduler.Periodic 60.0) 0.0 (fun () -> () - ) ; + Scheduler.add_to_queue "dummy" (Scheduler.Periodic 60.0) 0.0 (fun () -> ()) ; Xapi_event.register_hooks () ; - ignore (Thread.create Xapi_periodic_scheduler.loop ()) ; + ignore (Thread.create Scheduler.loop ()) ; ps_start := true ) ; Mutex.unlock scheduler_mutex diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index 5602e62d152..d2e2fb17de8 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -196,6 +196,7 @@ xapi-stdext-pervasives xapi-stdext-std xapi-stdext-threads + xapi-stdext-threads.scheduler xapi-stdext-unix xapi-stdext-zerocheck xapi-tracing @@ -256,6 +257,7 @@ xapi-stdext-pervasives xapi-stdext-std xapi-stdext-threads + xapi-stdext-threads.scheduler xapi-stdext-unix xapi-types xapi_aux diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.ml b/ocaml/xapi/extauth_plugin_ADwinbind.ml index fc0aa01ad0b..6f51eea9cc5 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.ml +++ b/ocaml/xapi/extauth_plugin_ADwinbind.ml @@ -22,6 +22,7 @@ end) open D open Xapi_stdext_std.Xstringext open Auth_signature +module Scheduler = Xapi_stdext_threads_scheduler.Scheduler let finally = Xapi_stdext_pervasives.Pervasiveext.finally @@ -1172,16 +1173,14 @@ module ClosestKdc = struct let trigger_update ~start = if Pool_role.is_master () then ( debug "Trigger task: %s" periodic_update_task_name ; - Xapi_periodic_scheduler.add_to_queue periodic_update_task_name - (Xapi_periodic_scheduler.Periodic - !Xapi_globs.winbind_update_closest_kdc_interval - ) + Scheduler.add_to_queue periodic_update_task_name + (Scheduler.Periodic !Xapi_globs.winbind_update_closest_kdc_interval) start update ) let stop_update () = if Pool_role.is_master () then - Xapi_periodic_scheduler.remove_from_queue periodic_update_task_name + Scheduler.remove_from_queue periodic_update_task_name end module RotateMachinePassword = struct @@ -1302,11 +1301,10 @@ module RotateMachinePassword = struct let trigger_rotate ~start = debug "Trigger task: %s" task_name ; - Xapi_periodic_scheduler.add_to_queue task_name - (Xapi_periodic_scheduler.Periodic !Xapi_globs.winbind_machine_pwd_timeout) - start rotate + Scheduler.add_to_queue task_name + (Scheduler.Periodic !Xapi_globs.winbind_machine_pwd_timeout) start rotate - let stop_rotate () = Xapi_periodic_scheduler.remove_from_queue task_name + let stop_rotate () = Scheduler.remove_from_queue task_name end let build_netbios_name ~config_params = diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 8c2f91fc2a3..68368754e72 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -443,8 +443,9 @@ let make_timeboxed_rpc ~__context timeout rpc : Rpc.response = in List.iter Locking_helpers.kill_resource resources in - Xapi_periodic_scheduler.add_to_queue (Ref.string_of task_id) - Xapi_periodic_scheduler.OneShot timeout cancel ; + let module Scheduler = Xapi_stdext_threads_scheduler.Scheduler in + Scheduler.add_to_queue (Ref.string_of task_id) Scheduler.OneShot timeout + cancel ; let transport = if Pool_role.is_master () then Unix Xapi_globs.unix_domain_socket @@ -459,7 +460,7 @@ let make_timeboxed_rpc ~__context timeout rpc : Rpc.response = let result = XMLRPC_protocol.rpc ~srcstr:"xapi" ~dststr:"xapi" ~transport ~http rpc in - Xapi_periodic_scheduler.remove_from_queue (Ref.string_of task_id) ; + Scheduler.remove_from_queue (Ref.string_of task_id) ; result ) diff --git a/ocaml/xapi/pool_periodic_update_sync.ml b/ocaml/xapi/pool_periodic_update_sync.ml index 45aacf82a9c..a9755d0cf1e 100644 --- a/ocaml/xapi/pool_periodic_update_sync.ml +++ b/ocaml/xapi/pool_periodic_update_sync.ml @@ -16,6 +16,7 @@ module D = Debug.Make (struct let name = __MODULE__ end) open D open Client +module Scheduler = Xapi_stdext_threads_scheduler.Scheduler type frequency = Daily | Weekly of int @@ -162,12 +163,11 @@ let rec update_sync () = ) and add_to_queue ~__context () = - let open Xapi_periodic_scheduler in - add_to_queue periodic_update_sync_task_name OneShot + Scheduler.add_to_queue periodic_update_sync_task_name Scheduler.OneShot (seconds_until_next_schedule ~__context) update_sync let set_enabled ~__context ~value = - Xapi_periodic_scheduler.remove_from_queue periodic_update_sync_task_name ; + Scheduler.remove_from_queue periodic_update_sync_task_name ; if value then add_to_queue ~__context () diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index ca87e740efb..fd5c0650266 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -1114,7 +1114,7 @@ let server_init () = ) ; ( "Starting periodic scheduler" , [Startup.OnThread] - , Xapi_periodic_scheduler.loop + , Xapi_stdext_threads_scheduler.Scheduler.loop ) ; ( "Synchronising host configuration files" , [] diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 8c7432106ab..600d2859dd3 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -425,12 +425,12 @@ module From = struct && (not (session_is_invalid call)) && Unix.gettimeofday () < deadline do - Xapi_periodic_scheduler.add_to_queue timeoutname - Xapi_periodic_scheduler.OneShot + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue timeoutname + Xapi_stdext_threads_scheduler.Scheduler.OneShot (deadline -. Unix.gettimeofday () +. 0.5) (fun () -> Condition.broadcast c) ; Condition.wait c m ; - Xapi_periodic_scheduler.remove_from_queue timeoutname + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue timeoutname done ) ; if session_is_invalid call then ( diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 46e06a9b4d7..cd6ae3a7d35 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -938,12 +938,12 @@ let ask_host_if_it_is_a_slave ~__context ~host = "ask_host_if_it_is_a_slave: host taking a long time to respond - IP: \ %s; uuid: %s" ip uuid ; - Xapi_periodic_scheduler.add_to_queue task_name - Xapi_periodic_scheduler.OneShot timeout + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue task_name + Xapi_stdext_threads_scheduler.Scheduler.OneShot timeout (log_host_slow_to_respond (min (2. *. timeout) 300.)) in - Xapi_periodic_scheduler.add_to_queue task_name - Xapi_periodic_scheduler.OneShot timeout + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue task_name + Xapi_stdext_threads_scheduler.Scheduler.OneShot timeout (log_host_slow_to_respond timeout) ; let res = Message_forwarding.do_op_on_localsession_nolivecheck ~local_fn ~__context @@ -951,7 +951,7 @@ let ask_host_if_it_is_a_slave ~__context ~host = Client.Client.Pool.is_slave ~rpc ~session_id ~host ) in - Xapi_periodic_scheduler.remove_from_queue task_name ; + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue task_name ; res in Server_helpers.exec_with_subtask ~__context "host.ask_host_if_it_is_a_slave" @@ -1497,8 +1497,8 @@ let sync_data ~__context ~host = Xapi_sync.sync_host ~__context host (* Nb, no attempt to wrap exceptions yet *) let backup_rrds ~__context ~host:_ ~delay = - Xapi_periodic_scheduler.add_to_queue "RRD backup" - Xapi_periodic_scheduler.OneShot delay (fun _ -> + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "RRD backup" + Xapi_stdext_threads_scheduler.Scheduler.OneShot delay (fun _ -> let master_address = Pool_role.get_master_address_opt () in log_and_ignore_exn (Rrdd.backup_rrds master_address) ; log_and_ignore_exn (fun () -> diff --git a/ocaml/xapi/xapi_periodic_scheduler_init.ml b/ocaml/xapi/xapi_periodic_scheduler_init.ml index d74b349e240..39866292460 100644 --- a/ocaml/xapi/xapi_periodic_scheduler_init.ml +++ b/ocaml/xapi/xapi_periodic_scheduler_init.ml @@ -76,46 +76,53 @@ let register ~__context = let update_all_subjects_delay = 10.0 in (* initial delay = 10 seconds *) if master then - Xapi_periodic_scheduler.add_to_queue "Synchronising RRDs/messages" - (Xapi_periodic_scheduler.Periodic sync_timer) sync_delay sync_func ; + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue + "Synchronising RRDs/messages" + (Xapi_stdext_threads_scheduler.Scheduler.Periodic sync_timer) sync_delay + sync_func ; if master then - Xapi_periodic_scheduler.add_to_queue "Backing up RRDs" - (Xapi_periodic_scheduler.Periodic rrdbackup_timer) rrdbackup_delay - rrdbackup_func ; + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "Backing up RRDs" + (Xapi_stdext_threads_scheduler.Scheduler.Periodic rrdbackup_timer) + rrdbackup_delay rrdbackup_func ; if master then - Xapi_periodic_scheduler.add_to_queue + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "Revalidating externally-authenticated sessions" - (Xapi_periodic_scheduler.Periodic + (Xapi_stdext_threads_scheduler.Scheduler.Periodic !Xapi_globs.session_revalidation_interval - ) session_revalidation_delay session_revalidation_func ; + ) + session_revalidation_delay session_revalidation_func ; if master then - Xapi_periodic_scheduler.add_to_queue + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "Trying to update subjects' info using external directory service (if \ any)" - (Xapi_periodic_scheduler.Periodic !Xapi_globs.update_all_subjects_interval) + (Xapi_stdext_threads_scheduler.Scheduler.Periodic + !Xapi_globs.update_all_subjects_interval + ) update_all_subjects_delay update_all_subjects_func ; - Xapi_periodic_scheduler.add_to_queue "Periodic scheduler heartbeat" - (Xapi_periodic_scheduler.Periodic hb_timer) 240.0 hb_func ; - Xapi_periodic_scheduler.add_to_queue "Update monitor configuration" - (Xapi_periodic_scheduler.Periodic 3600.0) 3600.0 + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue + "Periodic scheduler heartbeat" + (Xapi_stdext_threads_scheduler.Scheduler.Periodic hb_timer) 240.0 hb_func ; + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue + "Update monitor configuration" + (Xapi_stdext_threads_scheduler.Scheduler.Periodic 3600.0) 3600.0 Monitor_master.update_configuration_from_master ; ( if master then let freq = !Xapi_globs.failed_login_alert_freq |> float_of_int in - Xapi_periodic_scheduler.add_to_queue + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "Periodic alert failed login attempts" - (Xapi_periodic_scheduler.Periodic freq) freq + (Xapi_stdext_threads_scheduler.Scheduler.Periodic freq) freq Xapi_pool.alert_failed_login_attempts ) ; - Xapi_periodic_scheduler.add_to_queue "broken_kernel" - (Xapi_periodic_scheduler.Periodic 600.) 600. (fun () -> + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "broken_kernel" + (Xapi_stdext_threads_scheduler.Scheduler.Periodic 600.) 600. (fun () -> Server_helpers.exec_with_new_task "Periodic alert if the running kernel is broken in some serious way." (fun __context -> Xapi_host.alert_if_kernel_broken ~__context ) ) ; - Xapi_periodic_scheduler.add_to_queue + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue "Period alert if TLS verification emergency disabled" - (Xapi_periodic_scheduler.Periodic 600.) 600. (fun () -> + (Xapi_stdext_threads_scheduler.Scheduler.Periodic 600.) 600. (fun () -> Server_helpers.exec_with_new_task "Period alert if TLS verification emergency disabled" (fun __context -> Xapi_host.alert_if_tls_verification_was_emergency_disabled ~__context From cd98e9db047f67cd07579038060dc7c1cc930eb7 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Tue, 26 Nov 2024 16:44:18 +0000 Subject: [PATCH 55/86] Check index before using it removing an element from Imperative priority queue The queue uses an Array larger than the current size. It's possible that code check the index much later returning error but also setting the queue in an invalid state. So to avoid structure corruption check the index as soon as possible. Signed-off-by: Frediano Ziglio --- .../xapi-stdext/lib/xapi-stdext-threads/dune | 10 ++--- .../lib/xapi-stdext-threads/ipq.ml | 2 + .../lib/xapi-stdext-threads/ipq_test.ml | 40 +++++++++++++++++++ .../lib/xapi-stdext-threads/ipq_test.mli | 0 4 files changed, 47 insertions(+), 5 deletions(-) create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.mli diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index 81bf6e67af2..cdaf50ee02f 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -1,7 +1,7 @@ (library (public_name xapi-stdext-threads) (name xapi_stdext_threads) - (modules :standard \ ipq scheduler threadext_test) + (modules :standard \ ipq scheduler threadext_test ipq_test) (libraries threads.posix unix @@ -16,9 +16,9 @@ (libraries mtime mtime.clock threads.posix unix xapi-log xapi-stdext-threads) ) -(test - (name threadext_test) +(tests + (names threadext_test ipq_test) (package xapi-stdext-threads) - (modules threadext_test) - (libraries xapi_stdext_threads alcotest mtime.clock.os mtime fmt threads.posix) + (modules threadext_test ipq_test) + (libraries xapi_stdext_threads alcotest mtime.clock.os mtime fmt threads.posix xapi_stdext_threads_scheduler) ) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index ba56825ebe0..cba404293e7 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -64,6 +64,8 @@ let maximum h = let remove h s = if h.size <= 0 then raise EmptyHeap ; + if s < 0 || s >= h.size then + invalid_arg (Printf.sprintf "%s: index %d out of bounds" __FUNCTION__ s) ; let n = h.size - 1 in h.size <- n ; let d = h.data in diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml new file mode 100644 index 00000000000..b6a614b302c --- /dev/null +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml @@ -0,0 +1,40 @@ +(* + * Copyright (C) 2024 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module Ipq = Xapi_stdext_threads_scheduler.Ipq + +(* test we get "out of bound" exception calling Ipq.remove *) +let test_out_of_index () = + let q = Ipq.create 10 in + Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.now ()} ; + let is_oob = function + | Invalid_argument s when String.ends_with ~suffix:" out of bounds" s -> + true + | _ -> + false + in + let oob_check n = + (Alcotest.match_raises "out of bound" is_oob @@ fun () -> Ipq.remove q n) ; + Alcotest.(check bool) "same value" false (Ipq.is_empty q) + in + oob_check 10 ; + oob_check (-1) ; + oob_check 9 ; + oob_check 1 ; + (* this should succeed *) + Ipq.remove q 0 + +let tests = [("test_out_of_index", `Quick, test_out_of_index)] + +let () = Alcotest.run "Ipq" [("generic", tests)] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.mli new file mode 100644 index 00000000000..e69de29bb2d From 414deefc1996df1bc4dbbd151e10c02dd7c80bbb Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Wed, 27 Nov 2024 18:45:44 +0000 Subject: [PATCH 56/86] Fix removing elements from Imperative priority queue The queue is implemented using heap tree saved as an array. This is pretty standard. To do a removal you replace the element to remove with the last element and adjust the heap order. This can result in having to move the moved element up or down base its the value. Code only moved down, so also move it up if needed. Signed-off-by: Frediano Ziglio --- .../xapi-stdext/lib/xapi-stdext-threads/ipq.ml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index cba404293e7..9fd99c59892 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -67,9 +67,17 @@ let remove h s = if s < 0 || s >= h.size then invalid_arg (Printf.sprintf "%s: index %d out of bounds" __FUNCTION__ s) ; let n = h.size - 1 in - h.size <- n ; let d = h.data in let x = d.(n) in + (* moving [x] up in the heap *) + let rec moveup i = + let fi = (i - 1) / 2 in + if i > 0 && Mtime.is_later d.(fi).time ~than:x.time then ( + d.(i) <- d.(fi) ; + moveup fi + ) else + d.(i) <- x + in (* moving [x] down in the heap *) let rec movedown i = let j = (2 * i) + 1 in @@ -86,7 +94,13 @@ let remove h s = else d.(i) <- x in - movedown s + if s = n then + () + else if Mtime.is_later d.(s).time ~than:x.time then + moveup s + else + movedown s ; + h.size <- n let find h ev = let rec iter n = From dab868476a00efe77e391ca8c4b13f9fc5323100 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Tue, 26 Nov 2024 17:38:56 +0000 Subject: [PATCH 57/86] Remove possible systematic leak in Imperative priority queue The queue is implemented with an Array larger than the content. Instead of copying an initial value taken from first insert or potentially later when the Array is expanded provide a "default" value to use as a filler. This allows to provide a value not having references to external object so not extending their lifetime in an unpredictable way. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/ipq.ml | 14 +++-- .../lib/xapi-stdext-threads/ipq.mli | 55 +++++++++++++++++++ .../lib/xapi-stdext-threads/ipq_test.ml | 23 +++++++- .../lib/xapi-stdext-threads/scheduler.ml | 4 +- quality-gate.sh | 2 +- 5 files changed, 89 insertions(+), 9 deletions(-) create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index 9fd99c59892..17c1a6e9792 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -15,15 +15,16 @@ type 'a event = {ev: 'a; time: Mtime.t} -type 'a t = {mutable size: int; mutable data: 'a event array} +type 'a t = {default: 'a event; mutable size: int; mutable data: 'a event array} exception EmptyHeap -let create n = +let create n default = if n <= 0 then invalid_arg "create" else - {size= -n; data= [||]} + let default = {ev= default; time= Mtime_clock.now ()} in + {default; size= -n; data= [||]} let is_empty h = h.size <= 0 @@ -32,14 +33,14 @@ let resize h = assert (n > 0) ; let n' = 2 * n in let d = h.data in - let d' = Array.make n' d.(0) in + let d' = Array.make n' h.default in Array.blit d 0 d' 0 n ; h.data <- d' let add h x = (* first addition: we allocate the array *) if h.size < 0 then ( - h.data <- Array.make (-h.size) x ; + h.data <- Array.make (-h.size) h.default ; h.size <- 0 ) ; let n = h.size in @@ -69,6 +70,7 @@ let remove h s = let n = h.size - 1 in let d = h.data in let x = d.(n) in + d.(n) <- h.default ; (* moving [x] up in the heap *) let rec moveup i = let fi = (i - 1) / 2 in @@ -134,11 +136,13 @@ let iter f h = f d.(i) done +(* let fold f h x0 = let n = h.size in let d = h.data in let rec foldrec x i = if i >= n then x else foldrec (f d.(i) x) (succ i) in foldrec x0 0 +*) (* let _ = diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli new file mode 100644 index 00000000000..f470bcc8d32 --- /dev/null +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli @@ -0,0 +1,55 @@ +(* + * Copyright (C) 2024 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type 'a event = {ev: 'a; time: Mtime.t} + +type 'a t + +exception EmptyHeap + +val create : int -> 'a -> 'a t +(** [create n default] creates an empty Imperative priority queue. + The queue initially is initialized to store [n] elements. + The queue will expand beyond [n] automatically if needed. + [default] value will the used to fill unused data. *) + +val is_empty : 'a t -> bool +(** Check if the queue is empty *) + +val add : 'a t -> 'a event -> unit +(** Add an event to the queue *) + +val remove : 'a t -> int -> unit +(** Remove an event from the queue passing the index. + @raise EmptyHeap if the queue is empty. + @raise Invalid_argument if the index is invalid. *) + +val find_p : 'a t -> ('a -> bool) -> int +(** Find the index of an event which matches a given condition + or -1 if not found *) + +val find : 'a t -> 'a -> int +(** Find the index of an event which matches a given event + or -1 if not found *) + +val maximum : 'a t -> 'a event +(** Return a copy of the event with the next time. + @raise EmptyHeap if the queue is empty. *) + +val pop_maximum : 'a t -> 'a event +(** Return and remove the event with the next time. + @raise EmptyHeap if the queue is empty. *) + +val iter : ('a event -> unit) -> 'a t -> unit +(** Iterate given function on the list of events in the queue *) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml index b6a614b302c..51fb39367d9 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml @@ -16,7 +16,7 @@ module Ipq = Xapi_stdext_threads_scheduler.Ipq (* test we get "out of bound" exception calling Ipq.remove *) let test_out_of_index () = - let q = Ipq.create 10 in + let q = Ipq.create 10 0 in Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.now ()} ; let is_oob = function | Invalid_argument s when String.ends_with ~suffix:" out of bounds" s -> @@ -35,6 +35,25 @@ let test_out_of_index () = (* this should succeed *) Ipq.remove q 0 -let tests = [("test_out_of_index", `Quick, test_out_of_index)] +(* check queue does not retain some data after being removed *) +let test_leak () = + let default () = () in + let q = Ipq.create 10 default in + let array = Array.make 1024 'x' in + let use_array () = array.(0) <- 'a' in + let allocated = Atomic.make true in + Gc.finalise (fun _ -> Atomic.set allocated false) array ; + Ipq.add q {Ipq.ev= use_array; Ipq.time= Mtime_clock.now ()} ; + Ipq.remove q 0 ; + Gc.full_major () ; + Gc.full_major () ; + Alcotest.(check bool) "allocated" false (Atomic.get allocated) ; + Ipq.add q {Ipq.ev= default; Ipq.time= Mtime_clock.now ()} + +let tests = + [ + ("test_out_of_index", `Quick, test_out_of_index) + ; ("test_leak", `Quick, test_leak) + ] let () = Alcotest.run "Ipq" [("generic", tests)] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index b3d44caa62e..3e8543ec04d 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -25,7 +25,9 @@ type t = {func: unit -> unit; ty: func_ty; name: string} let delay = Delay.make () -let (queue : t Ipq.t) = Ipq.create 50 +let queue_default = {func= (fun () -> ()); ty= OneShot; name= ""} + +let (queue : t Ipq.t) = Ipq.create 50 queue_default let lock = Mutex.create () diff --git a/quality-gate.sh b/quality-gate.sh index b1d170041f1..b72ca099aa7 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=498 + N=497 # do not count ml files from the tests in ocaml/{tests/perftest/quicktest} MLIS=$(git ls-files -- '**/*.mli' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) MLS=$(git ls-files -- '**/*.ml' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) From 4f5bb1a2aba55696aeb215fb87621dee05aede66 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Tue, 26 Nov 2024 22:30:31 +0000 Subject: [PATCH 58/86] Add test for is_empty for Imperative priority queue Signed-off-by: Frediano Ziglio --- .../xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml index 51fb39367d9..e08c1902fd0 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml @@ -50,10 +50,20 @@ let test_leak () = Alcotest.(check bool) "allocated" false (Atomic.get allocated) ; Ipq.add q {Ipq.ev= default; Ipq.time= Mtime_clock.now ()} +(* test Ipq.is_empty call *) +let test_empty () = + let q = Ipq.create 10 0 in + Alcotest.(check bool) "same value" true (Ipq.is_empty q) ; + Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.now ()} ; + Alcotest.(check bool) "same value" false (Ipq.is_empty q) ; + Ipq.remove q 0 ; + Alcotest.(check bool) "same value" true (Ipq.is_empty q) + let tests = [ ("test_out_of_index", `Quick, test_out_of_index) ; ("test_leak", `Quick, test_leak) + ; ("test_empty", `Quick, test_empty) ] let () = Alcotest.run "Ipq" [("generic", tests)] From 1a8a1f3dff351342ada0e5a9ee8382a56dc4f3a9 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Wed, 27 Nov 2024 21:58:45 +0000 Subject: [PATCH 59/86] Move and improve old test for Imperative priority queue Add 100 elements with random time. Remove first 50 inserted. Remove last 50 in the order of time. While doing that check the content of the structure and the structure itself. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/ipq.ml | 26 ++----- .../lib/xapi-stdext-threads/ipq.mli | 3 + .../lib/xapi-stdext-threads/ipq_test.ml | 74 +++++++++++++++++++ 3 files changed, 85 insertions(+), 18 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index 17c1a6e9792..0c0a458e2ce 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -130,6 +130,14 @@ let pop_maximum h = let m = maximum h in remove h 0 ; m +let check h = + let d = h.data in + for i = 1 to h.size - 1 do + let fi = (i - 1) / 2 in + let ordered = Mtime.is_later d.(i).time ~than:d.(fi).time in + assert ordered + done + let iter f h = let d = h.data in for i = 0 to h.size - 1 do @@ -143,21 +151,3 @@ let fold f h x0 = let rec foldrec x i = if i >= n then x else foldrec (f d.(i) x) (succ i) in foldrec x0 0 *) - -(* -let _ = - let test : int t = create 100 in - for i=0 to 99 do - let e = {time=Random.float 10.0; ev=i} in - add test e - done; - for i=0 to 49 do - let xx=find test i in - remove test xx - done; -(* remove test xx;*) - for i=0 to 49 do - let e=pop_maximum test in - Printf.printf "time: %f, site: %d\n" e.time e.ev - done -*) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli index f470bcc8d32..b7c4974e642 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli @@ -53,3 +53,6 @@ val pop_maximum : 'a t -> 'a event val iter : ('a event -> unit) -> 'a t -> unit (** Iterate given function on the list of events in the queue *) + +val check : 'a t -> unit +(** Check internal queue state, used for debugging *) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml index e08c1902fd0..e8e64093e16 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml @@ -59,11 +59,85 @@ let test_empty () = Ipq.remove q 0 ; Alcotest.(check bool) "same value" true (Ipq.is_empty q) +module Int64Set = Set.Make (Int64) + +let check = Ipq.check + +(* get size of the queue *) +let size queue = + let l = ref 0 in + Ipq.iter (fun _ -> l := !l + 1) queue ; + !l + +(* get a set of times from the queue *) +let set queue = + let s = ref Int64Set.empty in + Ipq.iter + (fun d -> + let t = d.time in + let t = Mtime.to_uint64_ns t in + s := Int64Set.add t !s + ) + queue ; + !s + +let test_old () = + let test : int Ipq.t = Ipq.create 100 0 in + let s = ref Int64Set.empty in + let add i = + let ti = Random.int64 1000000L in + let t = Mtime.of_uint64_ns ti in + let e = {Ipq.time= t; Ipq.ev= i} in + Ipq.add test e ; + s := Int64Set.add ti !s + in + for i = 0 to 49 do + add i + done ; + let first_half = set test in + for i = 50 to 99 do + add i + done ; + check test ; + (* we should have all elements *) + Alcotest.(check int) "100 elements" 100 (size test) ; + + let all = set test in + Alcotest.(check int) "same list" 0 (Int64Set.compare !s all) ; + + (* remove half of the elements *) + for i = 0 to 49 do + let xx = Ipq.find test i in + Printf.printf "Removing element %d position %d\n%!" i xx ; + Ipq.remove test xx ; + check test + done ; + Alcotest.(check int) "50 elements" 50 (size test) ; + + (* make sure we have the right elements in the list *) + let s = set test in + let second_half = Int64Set.diff all first_half in + Alcotest.(check int) "same list" 0 (Int64Set.compare s second_half) ; + + (* remove test *) + let prev = ref 0L in + for _ = 0 to 49 do + let e = Ipq.pop_maximum test in + let t = Mtime.to_uint64_ns e.time in + Alcotest.(check bool) + (Printf.sprintf "%Ld bigger than %Ld" t !prev) + true (t >= !prev) ; + Printf.printf "time: %Ld, site: %d\n" t e.ev ; + prev := t ; + check test + done + let tests = [ ("test_out_of_index", `Quick, test_out_of_index) ; ("test_leak", `Quick, test_leak) ; ("test_empty", `Quick, test_empty) + ; ("test_old", `Quick, test_old) ] let () = Alcotest.run "Ipq" [("generic", tests)] From 32020588430afea87f78c8da1d5058d98535519e Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Thu, 28 Nov 2024 09:12:18 +0000 Subject: [PATCH 60/86] Initialise Imperative priority queue array on creation No need to wait. Signed-off-by: Frediano Ziglio --- ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index 0c0a458e2ce..4cf29ed3d9b 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -24,7 +24,7 @@ let create n default = invalid_arg "create" else let default = {ev= default; time= Mtime_clock.now ()} in - {default; size= -n; data= [||]} + {default; size= 0; data= Array.make n default} let is_empty h = h.size <= 0 @@ -38,11 +38,6 @@ let resize h = h.data <- d' let add h x = - (* first addition: we allocate the array *) - if h.size < 0 then ( - h.data <- Array.make (-h.size) h.default ; - h.size <- 0 - ) ; let n = h.size in (* resizing if needed *) if n = Array.length h.data then resize h ; From 87ca90ec087f58ea34a94999006f8e324b4d2605 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 1 Oct 2024 17:18:49 +0100 Subject: [PATCH 61/86] CA-399757: Add CAS style check for SR scan SR.scan is currently not an atomic operation, and this has caused problems as during the scan itself, there might be other calls changing the state of the database, such as VDI.db_introduce called by SM, if using SMAPIv1. This will confuse SR.scan as it sees an outdated snapshot. The proposed workaround would be add a CAS style check for SR.scan, which will refuse to update the db if it detects changes. This is still subject to the TOCTOU problem, but should reduce the racing window. Signed-off-by: Vincent Liu --- ocaml/xapi/xapi_sr.ml | 63 ++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index 7a83493b2de..8af5bd6e62f 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -786,26 +786,51 @@ let scan ~__context ~sr = SRScanThrottle.execute (fun () -> transform_storage_exn (fun () -> let sr_uuid = Db.SR.get_uuid ~__context ~self:sr in - let vs, sr_info = - C.SR.scan2 (Ref.string_of task) - (Storage_interface.Sr.of_string sr_uuid) - in - let db_vdis = - Db.VDI.get_records_where ~__context - ~expr:(Eq (Field "SR", Literal sr')) - in - update_vdis ~__context ~sr db_vdis vs ; - let virtual_allocation = - List.fold_left Int64.add 0L - (List.map (fun v -> v.Storage_interface.virtual_size) vs) + (* CA-399757: Do not update_vdis unless we are sure that the db was not + changed during the scan. If it was, retry the scan operation. This + change might be a result of the SMAPIv1 call back into xapi with + the db_introduce call, for example. + + Note this still suffers TOCTOU problem, but a complete operation is not easily + implementable without rearchitecting the storage apis *) + let rec scan_rec limit = + let find_vdis () = + Db.VDI.get_records_where ~__context + ~expr:(Eq (Field "SR", Literal sr')) + in + let db_vdis_before = find_vdis () in + let vs, sr_info = + C.SR.scan2 (Ref.string_of task) + (Storage_interface.Sr.of_string sr_uuid) + in + let db_vdis_after = find_vdis () in + if limit > 0 && db_vdis_after <> db_vdis_before then + (scan_rec [@tailcall]) (limit - 1) + else if limit = 0 then + raise + (Api_errors.Server_error + (Api_errors.internal_error, ["SR.scan retry limit exceeded"]) + ) + else ( + update_vdis ~__context ~sr db_vdis_after vs ; + let virtual_allocation = + List.fold_left + (fun acc v -> Int64.add v.Storage_interface.virtual_size acc) + 0L vs + in + Db.SR.set_virtual_allocation ~__context ~self:sr + ~value:virtual_allocation ; + Db.SR.set_physical_size ~__context ~self:sr + ~value:sr_info.total_space ; + Db.SR.set_physical_utilisation ~__context ~self:sr + ~value:(Int64.sub sr_info.total_space sr_info.free_space) ; + Db.SR.remove_from_other_config ~__context ~self:sr ~key:"dirty" ; + Db.SR.set_clustered ~__context ~self:sr ~value:sr_info.clustered + ) in - Db.SR.set_virtual_allocation ~__context ~self:sr - ~value:virtual_allocation ; - Db.SR.set_physical_size ~__context ~self:sr ~value:sr_info.total_space ; - Db.SR.set_physical_utilisation ~__context ~self:sr - ~value:(Int64.sub sr_info.total_space sr_info.free_space) ; - Db.SR.remove_from_other_config ~__context ~self:sr ~key:"dirty" ; - Db.SR.set_clustered ~__context ~self:sr ~value:sr_info.clustered + (* XXX Retry 10 times, and then give up. We should really expect to + reach this retry limit though, unless something really bad has happened.*) + scan_rec 10 ) ) From 6d275ae0bb674b61bca6028a023ca9f47d4b15d4 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 3 Dec 2024 16:56:59 +0000 Subject: [PATCH 62/86] xapi-stdext-threads: use mtime.clock.os mtime.clock is not yet available on the xs-opam available when building xenserver Signed-off-by: Pau Ruiz Safont --- ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index cdaf50ee02f..e0437e881ab 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -13,7 +13,7 @@ (public_name xapi-stdext-threads.scheduler) (name xapi_stdext_threads_scheduler) (modules ipq scheduler) - (libraries mtime mtime.clock threads.posix unix xapi-log xapi-stdext-threads) + (libraries mtime mtime.clock.os threads.posix unix xapi-log xapi-stdext-threads) ) (tests From c85270a2223c74057595df5c43e9a36828e2ac1f Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 4 Dec 2024 11:28:19 +0000 Subject: [PATCH 63/86] Remove unused ocaml/perftest Signed-off-by: Rob Hoes --- ocaml/perftest/createVM.ml | 171 ------- ocaml/perftest/createpool.ml | 751 ------------------------------ ocaml/perftest/cumulative_time.ml | 145 ------ ocaml/perftest/dune | 24 - ocaml/perftest/gnuplot.ml | 165 ------- ocaml/perftest/graphutil.ml | 134 ------ ocaml/perftest/histogram.ml | 230 --------- ocaml/perftest/perfdebug.ml | 24 - ocaml/perftest/perftest.ml | 195 -------- ocaml/perftest/perfutil.ml | 101 ---- ocaml/perftest/scenario.ml | 157 ------- ocaml/perftest/statistics.ml | 155 ------ ocaml/perftest/tests.ml | 493 -------------------- ocaml/perftest/testtypes.ml | 49 -- 14 files changed, 2794 deletions(-) delete mode 100644 ocaml/perftest/createVM.ml delete mode 100644 ocaml/perftest/createpool.ml delete mode 100644 ocaml/perftest/cumulative_time.ml delete mode 100644 ocaml/perftest/dune delete mode 100644 ocaml/perftest/gnuplot.ml delete mode 100644 ocaml/perftest/graphutil.ml delete mode 100644 ocaml/perftest/histogram.ml delete mode 100644 ocaml/perftest/perfdebug.ml delete mode 100644 ocaml/perftest/perftest.ml delete mode 100644 ocaml/perftest/perfutil.ml delete mode 100644 ocaml/perftest/scenario.ml delete mode 100644 ocaml/perftest/statistics.ml delete mode 100644 ocaml/perftest/tests.ml delete mode 100644 ocaml/perftest/testtypes.ml diff --git a/ocaml/perftest/createVM.ml b/ocaml/perftest/createVM.ml deleted file mode 100644 index e3496223488..00000000000 --- a/ocaml/perftest/createVM.ml +++ /dev/null @@ -1,171 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -open Scenario -open Perfutil -open Client -open Perfdebug - -let iscsi_vm_iso = "xenserver-iscsi-target.iso" - -let iscsi_vm_template = "Other install media" - -let innertemplate = "Windows XP SP3" - -let make_iscsi_ip pool = Printf.sprintf "192.168.%d.200" (pool.ipbase + 2) - -let find_iscsi_iso session_id = - let vdis = Client.VDI.get_all ~rpc ~session_id in - try - Some - (List.find - (fun vdi -> - Client.VDI.get_name_label ~rpc ~session_id ~self:vdi = iscsi_vm_iso - ) - vdis - ) - with _ -> None - -(** Create the VM with the iscsi iso attached *) -let make_iscsi session_id pool network = - try - let iscsi_iso = - match find_iscsi_iso session_id with - | Some vdi -> - vdi - | None -> - failwith "iSCSI VM iso not found" - in - let template = - List.hd - (Client.VM.get_by_name_label ~rpc ~session_id ~label:iscsi_vm_template) - in - let newvm = - Client.VM.clone ~rpc ~session_id ~vm:template - ~new_name:"ISCSI target server" - in - Client.VM.provision ~rpc ~session_id ~vm:newvm ; - let _ (* isovbd *) = - Client.VBD.create ~rpc ~session_id ~vM:newvm ~vDI:iscsi_iso ~device:"" - ~userdevice:"0" ~bootable:true ~mode:`RO ~_type:`CD ~unpluggable:false - ~empty:false ~other_config:[] ~currently_attached:false - ~qos_algorithm_type:"" ~qos_algorithm_params:[] - in - let realpool = List.hd (Client.Pool.get_all ~rpc ~session_id) in - let defaultsr = - Client.Pool.get_default_SR ~rpc ~session_id ~self:realpool - in - for i = 0 to pool.iscsi_luns - 1 do - let storage_vdi_label = Printf.sprintf "SCSI VDI %d" i in - let storage_vdi = - Client.VDI.create ~rpc ~session_id ~name_label:storage_vdi_label - ~name_description:"" ~sR:defaultsr ~virtual_size:sr_disk_size - ~_type:`user ~sharable:false ~read_only:false - ~other_config:[(oc_key, pool.key)] - ~xenstore_data:[] ~sm_config:[] ~tags:[] - in - let userdevice = Printf.sprintf "%d" (i + 1) in - ignore - (Client.VBD.create ~rpc ~session_id ~vM:newvm ~vDI:storage_vdi - ~device:"" ~userdevice ~bootable:false ~mode:`RW ~_type:`Disk - ~unpluggable:false ~empty:false ~other_config:[] - ~currently_attached:false ~qos_algorithm_type:"" - ~qos_algorithm_params:[] - ) - done ; - Client.VM.set_PV_bootloader ~rpc ~session_id ~self:newvm ~value:"pygrub" ; - Client.VM.set_PV_args ~rpc ~session_id ~self:newvm - ~value: - (Printf.sprintf "net_ip=%s net_mask=255.255.255.0" (make_iscsi_ip pool)) ; - Client.VM.set_HVM_boot_policy ~rpc ~session_id ~self:newvm ~value:"" ; - let (_ : API.ref_VIF) = - Client.VIF.create ~rpc ~session_id ~device:"0" ~network ~vM:newvm ~mAC:"" - ~mTU:1500L - ~other_config:[(oc_key, pool.key)] - ~currently_attached:false ~qos_algorithm_type:"" - ~qos_algorithm_params:[] ~locking_mode:`network_default ~ipv4_allowed:[] - ~ipv6_allowed:[] - in - Client.VM.add_to_other_config ~rpc ~session_id ~self:newvm ~key:oc_key - ~value:pool.key ; - let uuid = Inventory.lookup "INSTALLATION_UUID" in - let host = Client.Host.get_by_uuid ~rpc ~session_id ~uuid in - Client.VM.start_on ~rpc ~session_id ~vm:newvm ~host ~start_paused:false - ~force:false ; - Some newvm - with e -> - debug "Caught exception with iscsi VM: %s" (Printexc.to_string e) ; - None - -let make ~rpc ~session_id ~pool:_ ~vm ~networks ~storages = - let wintemplate = - List.hd (Client.VM.get_by_name_label ~rpc ~session_id ~label:innertemplate) - in - let host_refs = Array.of_list (Client.Host.get_all ~rpc ~session_id) in - for i = 0 to Array.length storages - 1 do - Printf.printf "Creating %d VMs in SR %d\n%!" vm.num i ; - for j = 0 to vm.num - 1 do - let newname = - Printf.sprintf "VM %d%s%s" j - ( if Array.length storages > 1 then - Printf.sprintf " in SR %d" i - else - "" - ) - (if vm.tag <> "" then " - " ^ vm.tag else "") - in - let clone = - Client.VM.clone ~rpc ~session_id ~vm:wintemplate ~new_name:newname - in - Client.VM.add_tags ~rpc ~session_id ~self:clone ~value:vm.tag ; - Client.VM.remove_from_other_config ~rpc ~session_id ~self:clone - ~key:"disks" ; - for userdevice = 0 to vm.vbds - 1 do - Printf.printf " - creating VDI %d for VM %d on SR %d of %d\n%!" - userdevice j i (Array.length storages) ; - let newdisk = - Client.VDI.create ~rpc ~session_id ~name_label:"Guest disk" - ~name_description:"" ~sR:storages.(i) ~virtual_size:4194304L - ~_type:`user ~sharable:false ~read_only:false ~xenstore_data:[] - ~other_config:[] ~sm_config:[] ~tags:[] - in - ignore - (Client.VBD.create ~rpc ~session_id ~vM:clone ~vDI:newdisk - ~userdevice:(string_of_int userdevice) ~bootable:false ~mode:`RW - ~_type:`Disk ~unpluggable:true ~empty:false ~qos_algorithm_type:"" - ~qos_algorithm_params:[] ~other_config:[] ~device:"" - ~currently_attached:false - ) - done ; - Client.VM.provision ~rpc ~session_id ~vm:clone ; - for device = 0 to min vm.vifs (Array.length networks) - 1 do - ignore - (Client.VIF.create ~rpc ~session_id ~device:(string_of_int device) - ~network:networks.(device) ~vM:clone ~mAC:"" ~mTU:1500L - ~other_config:[] ~qos_algorithm_type:"" ~qos_algorithm_params:[] - ~locking_mode:`network_default ~ipv4_allowed:[] ~ipv6_allowed:[] - ~currently_attached:false - ) - done ; - Client.VM.set_memory_static_min ~rpc ~session_id ~self:clone - ~value:16777216L ; - Client.VM.set_memory_dynamic_min ~rpc ~session_id ~self:clone - ~value:16777216L ; - Client.VM.set_memory_dynamic_max ~rpc ~session_id ~self:clone - ~value:16777216L ; - Client.VM.set_memory_static_max ~rpc ~session_id ~self:clone - ~value:16777216L ; - if vm.has_affinity && Array.length storages = Array.length host_refs then - Client.VM.set_affinity ~rpc ~session_id ~self:clone ~value:host_refs.(i) - done - done diff --git a/ocaml/perftest/createpool.ml b/ocaml/perftest/createpool.ml deleted file mode 100644 index bf96cfb7c36..00000000000 --- a/ocaml/perftest/createpool.ml +++ /dev/null @@ -1,751 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(* Create a pool of SDKs *) - -open Client -open Perfutil -open Xapi_stdext_std -open Scenario -open Perfdebug - -let master_of_pool = "master_of_pool" - -let management_ip = "management_ip" - -let get_network_num_from_interface pool i = - if i < pool.bonds * 2 then - i / 2 - else - i - pool.bonds - -(** Only storage types supporting active thin-provisioned disks allow us to - create a 2TiB disk, clone it and attach it to a bunch of VMs without - running out of space. In particular the hybrid thin/thick behaviour of - LVHD won't work so we can't use LVM over iSCSI or FC. It's probably easiest - to include a whitelist here rather than find an EQL array to test this. *) -let sr_is_suitable session_id sr = - let t = - String.lowercase_ascii (Client.SR.get_type ~rpc ~session_id ~self:sr) - in - t = "ext" || t = "nfs" - -let default_sr_must_be_suitable session_id = - let realpool = List.hd (Client.Pool.get_all ~rpc ~session_id) in - let defaultsr = Client.Pool.get_default_SR ~rpc ~session_id ~self:realpool in - if not (sr_is_suitable session_id defaultsr) then - failwith - "Pool's default SR is unsuitable for the local storage on the template" - -let initialise session_id template pool = - (* First, create the networks the hosts will have their interfaces on *) - let networks_to_create = pool.interfaces_per_host - pool.bonds in - debug "Creating %d networks..." networks_to_create ; - let networks = - Array.init networks_to_create (fun i -> - Client.Network.create ~rpc ~session_id - ~name_label:(Printf.sprintf "perftestnet%d" i) - ~name_description:"" ~mTU:1500L - ~other_config:[(oc_key, pool.key)] - ~bridge:"" ~managed:true ~tags:[] - ) - in - (* Set up the template - create the VIFs *) - debug "Setting up the template. Creating VIFs on networks" ; - let interfaces = - Array.init pool.interfaces_per_host (fun i -> - let net = networks.(get_network_num_from_interface pool i) in - Client.VIF.create ~rpc ~session_id ~device:(string_of_int i) - ~network:net ~vM:template ~mAC:"" ~mTU:1500L - ~other_config:[(oc_key, pool.key)] - ~qos_algorithm_type:"" ~qos_algorithm_params:[] - ~locking_mode:`network_default ~ipv4_allowed:[] ~ipv6_allowed:[] - ~currently_attached:false - ) - in - (* Create a disk for local storage *) - debug "Creating a disk for local storage on the template" ; - default_sr_must_be_suitable session_id ; - let realpool = List.hd (Client.Pool.get_all ~rpc ~session_id) in - let defaultsr = Client.Pool.get_default_SR ~rpc ~session_id ~self:realpool in - let newdisk = - Client.VDI.create ~rpc ~session_id ~name_label:"SDK storage" - ~name_description:"" ~sR:defaultsr ~virtual_size:sr_disk_size ~_type:`user - ~sharable:false ~read_only:false ~xenstore_data:[] - ~other_config:[(oc_key, pool.key)] - ~sm_config:[] ~tags:[] - in - let (_ : API.ref_VBD) = - Client.VBD.create ~rpc ~session_id ~vM:template ~vDI:newdisk - ~userdevice:sr_disk_device ~bootable:false ~mode:`RW ~_type:`Disk - ~unpluggable:true ~empty:false ~qos_algorithm_type:"" - ~qos_algorithm_params:[] - ~other_config:[(oc_key, pool.key)] - ~device:"" ~currently_attached:false - in - debug "Setting up xenstore keys" ; - (* Set up the various xenstore keys *) - Client.VM.set_PV_args ~rpc ~session_id ~self:template ~value:"noninteractive" ; - (* no password setting step *) - Client.VM.add_to_xenstore_data ~rpc ~session_id ~self:template - ~key:"vm-data/provision/interfaces/0/admin" ~value:"true" ; - Array.iteri - (fun i _ -> - Client.VM.add_to_xenstore_data ~rpc ~session_id ~self:template - ~key:(Printf.sprintf "vm-data/provision/interfaces/%d/mode" i) - ~value:"static" ; - Client.VM.add_to_xenstore_data ~rpc ~session_id ~self:template - ~key:(Printf.sprintf "vm-data/provision/interfaces/%d/netmask" i) - ~value:"255.255.255.0" - ) - interfaces ; - debug "Setting memory to 128 Megs" ; - Client.VM.set_memory_static_min ~rpc ~session_id ~self:template - ~value:(Int64.mul 128L 1048576L) ; - Client.VM.set_memory_dynamic_min ~rpc ~session_id ~self:template - ~value:(Int64.mul 128L 1048576L) ; - Client.VM.set_memory_dynamic_max ~rpc ~session_id ~self:template - ~value:(Int64.mul 128L 1048576L) ; - Client.VM.set_memory_static_max ~rpc ~session_id ~self:template - ~value:(Int64.mul 128L 1048576L) ; - Client.VM.remove_from_other_config ~rpc ~session_id ~self:template ~key:oc_key ; - Client.VM.add_to_other_config ~rpc ~session_id ~self:template ~key:oc_key - ~value:pool.key ; - interfaces - -let reset_template session_id template = - (* Destroy template's VIFs *) - debug "Resetting template to factory settings" ; - let vifs = Client.VM.get_VIFs ~rpc ~session_id ~self:template in - List.iter - (fun vif -> - try - if - List.mem_assoc oc_key - (Client.VIF.get_other_config ~rpc ~session_id ~self:vif) - then - Client.VIF.destroy ~rpc ~session_id ~self:vif - with _ -> () - ) - vifs ; - (* Destroy template's sr disk *) - let vbds = Client.VM.get_VBDs ~rpc ~session_id ~self:template in - List.iter - (fun vbd -> - if - List.mem_assoc oc_key - (Client.VBD.get_other_config ~rpc ~session_id ~self:vbd) - then ( - let vdi = Client.VBD.get_VDI ~rpc ~session_id ~self:vbd in - assert ( - List.mem_assoc oc_key - (Client.VDI.get_other_config ~rpc ~session_id ~self:vdi) - ) ; - Client.VDI.destroy ~rpc ~session_id ~self:vdi ; - try Client.VBD.destroy ~rpc ~session_id ~self:vbd with _ -> () - ) - ) - vbds ; - (* Remove xenstore keys *) - Client.VM.set_xenstore_data ~rpc ~session_id ~self:template ~value:[] ; - Client.VM.set_PV_args ~rpc ~session_id ~self:template ~value:"" ; - try - Client.VM.remove_from_other_config ~rpc ~session_id ~self:template - ~key:oc_key - with _ -> () - -let uninitialise session_id _template key = - (* Shut down and uninstall any VMs *) - debug "Shutting down and uninstalling any VMs" ; - let vms = Client.VM.get_all ~rpc ~session_id in - List.iter - (fun vm -> - let is_a_template = - Client.VM.get_is_a_template ~rpc ~session_id ~self:vm - in - let is_control_domain = - Client.VM.get_is_control_domain ~rpc ~session_id ~self:vm - in - let is_managed = - try - List.assoc oc_key - (Client.VM.get_other_config ~rpc ~session_id ~self:vm) - = key - with _ -> false - in - let running = - Client.VM.get_power_state ~rpc ~session_id ~self:vm = `Running - in - if (not is_a_template) && (not is_control_domain) && is_managed then ( - if running then Client.VM.hard_shutdown ~rpc ~session_id ~vm ; - let vbds = Client.VM.get_VBDs ~rpc ~session_id ~self:vm in - let vdis = - List.map - (fun vbd -> Client.VBD.get_VDI ~rpc ~session_id ~self:vbd) - vbds - in - List.iter - (fun vdi -> - try Client.VDI.destroy ~rpc ~session_id ~self:vdi with _ -> () - ) - vdis ; - List.iter - (fun vbd -> - try Client.VBD.destroy ~rpc ~session_id ~self:vbd with _ -> () - ) - vbds ; - List.iter - (fun vif -> - try Client.VIF.destroy ~rpc ~session_id ~self:vif with _ -> () - ) - (Client.VM.get_VIFs ~rpc ~session_id ~self:vm) ; - Client.VM.destroy ~rpc ~session_id ~self:vm - ) - ) - vms ; - (* Destroy networks *) - debug "Destroying networks" ; - let nets = Client.Network.get_all_records ~rpc ~session_id in - let mynets = - List.filter - (fun (_, r) -> - List.mem_assoc oc_key r.API.network_other_config - && List.assoc oc_key r.API.network_other_config = key - ) - nets - in - List.iter - (fun (net, _) -> Client.Network.destroy ~rpc ~session_id ~self:net) - mynets ; - let nets = Client.Network.get_all_records ~rpc ~session_id in - debug "Destroying any bridges" ; - let ic = - Unix.open_process_in "ifconfig -a | grep \"^xapi\" | awk '{print $1}'" - in - let netdevs = - let rec doline () = - try - let x = input_line ic in - x :: doline () - with _ -> [] - in - doline () - in - List.iter - (fun netdev -> - if not (List.exists (fun (_, net) -> net.API.network_bridge = netdev) nets) - then ( - ignore - (Sys.command (Printf.sprintf "ifconfig %s down 2>/dev/null" netdev)) ; - ignore (Sys.command (Printf.sprintf "brctl delbr %s 2>/dev/null" netdev)) - ) - ) - netdevs - -let destroy_sdk_pool session_id sdkname key = - let template = - List.hd (Client.VM.get_by_name_label ~rpc ~session_id ~label:sdkname) - in - uninitialise session_id template key - -let describe_pool template_name pool_name key = - let pool = Scenario.get pool_name in - let pool = {pool with key} in - Printf.sprintf "Base template: %s" template_name :: description_of_pool pool - -let iscsi_vm_iso_must_exist session_id = - (* The iSCSI VM iso must exist *) - if CreateVM.find_iscsi_iso session_id = None then - failwith - (Printf.sprintf "The iSCSI target VM iso could not be found (%s)" - CreateVM.iscsi_vm_iso - ) - -let create_sdk_pool session_id sdkname pool_name key ipbase = - iscsi_vm_iso_must_exist session_id ; - default_sr_must_be_suitable session_id ; - let pool = List.find (fun p -> p.id = pool_name) pools in - let pool = {pool with key; ipbase} in - let template = - try List.hd (Client.VM.get_by_name_label ~rpc ~session_id ~label:sdkname) - with _ -> - debug ~out:stderr "template '%s' not found" sdkname ; - exit 1 - in - let uuid = Client.VM.get_uuid ~rpc ~session_id ~self:template in - debug "Creating test pool '%s' using SDK template uuid=%s" pool.id uuid ; - (* Clear up any leftover state on the template *) - reset_template session_id template ; - let interfaces = initialise session_id template pool in - Printf.printf "Creating iSCSI target VM serving %d LUNs\n%!" pool.iscsi_luns ; - let (_ : API.ref_VM option) = - CreateVM.make_iscsi session_id pool - (Client.VIF.get_network ~rpc ~session_id ~self:interfaces.(2)) - in - debug "Creating %d SDK VMs" pool.hosts ; - let hosts = - Array.init pool.hosts (fun i -> - let n = i + 1 in - let vm = - Client.VM.clone ~rpc ~session_id ~vm:template - ~new_name:(Printf.sprintf "perftestpool%d" n) - in - Client.VM.provision ~rpc ~session_id ~vm ; - Array.iteri - (fun i _ -> - ignore - (Client.VM.add_to_xenstore_data ~rpc ~session_id ~self:vm - ~key:(Printf.sprintf "vm-data/provision/interfaces/%d/ip" i) - ~value:(Printf.sprintf "192.168.%d.%d" (i + pool.ipbase) n) - ) - ) - interfaces ; - vm - ) - in - debug "Setting memory on master to be 256 Megs" ; - Client.VM.set_memory_static_max ~rpc ~session_id ~self:hosts.(0) - ~value:(Int64.mul 256L 1048576L) ; - Client.VM.set_memory_static_min ~rpc ~session_id ~self:hosts.(0) - ~value:(Int64.mul 256L 1048576L) ; - Client.VM.set_memory_dynamic_max ~rpc ~session_id ~self:hosts.(0) - ~value:(Int64.mul 256L 1048576L) ; - Client.VM.set_memory_dynamic_min ~rpc ~session_id ~self:hosts.(0) - ~value:(Int64.mul 256L 1048576L) ; - Client.VM.add_to_other_config ~rpc ~session_id ~self:hosts.(0) - ~key:master_of_pool ~value:pool.key ; - Client.VM.add_to_other_config ~rpc ~session_id ~self:hosts.(0) - ~key:management_ip - ~value:(Printf.sprintf "192.168.%d.1" pool.ipbase) ; - let localhost_uuid = Inventory.lookup "INSTALLATION_UUID" in - Array.iteri - (fun i host -> - debug "Starting VM %d" i ; - Client.VM.start_on ~rpc ~session_id ~vm:host - ~host:(Client.Host.get_by_uuid ~rpc ~session_id ~uuid:localhost_uuid) - ~start_paused:false ~force:false - ) - hosts ; - ignore - (Sys.command - (Printf.sprintf "ifconfig %s 192.168.%d.200 up" - (Client.Network.get_bridge ~rpc ~session_id - ~self:(Client.VIF.get_network ~rpc ~session_id ~self:interfaces.(0)) - ) - pool.ipbase - ) - ) ; - reset_template session_id template ; - debug "Guests are now booting..." ; - let pingable = Array.make (Array.length hosts) false in - let firstboot = Array.make (Array.length hosts) false in - let string_of_status () = - Array.to_seq pingable - |> Seq.mapi (fun i ping -> - let boot = firstboot.(i) in - match (ping, boot) with - | false, false -> - '.' - | true, false -> - 'P' - | true, true -> - 'B' - | _, _ -> - '?' - ) - |> String.of_seq - in - let has_guest_booted i _vm = - let ip = Printf.sprintf "192.168.%d.%d" pool.ipbase (i + 1) in - let is_pingable () = - if pingable.(i) then - true - else if - Sys.command - (Printf.sprintf "ping -W 1 -c 1 %s 2>/dev/null >/dev/null" ip) - = 0 - then ( - pingable.(i) <- true ; - debug "Individual host status: %s" (string_of_status ()) ; - true - ) else - false - in - let firstbooted () = - if firstboot.(i) then - true - else - let rpc = remoterpc ip in - try - let session_id = - Client.Session.login_with_password ~rpc ~uname:"root" - ~pwd:"xensource" ~version:"1.1" ~originator:"perftest" - in - Xapi_stdext_pervasives.Pervasiveext.finally - (fun () -> - let host = List.hd (Client.Host.get_all ~rpc ~session_id) in - (* only one host because it hasn't joined the pool yet *) - let other_config = - Client.Host.get_other_config ~rpc ~session_id ~self:host - in - let key = "firstboot-complete" in - (* Since these are 'fresh' hosts which have never booted, the key goes from missing -> present *) - if List.mem_assoc key other_config then ( - firstboot.(i) <- true ; - debug "Individual host status: %s" (string_of_status ()) ; - true - ) else - false - ) - (fun () -> Client.Session.logout ~rpc ~session_id) - with _ -> false - in - is_pingable () && firstbooted () - in - let wait_until_guests_have_booted () = - for i = 0 to Array.length pingable - 1 do - pingable.(i) <- false - done ; - let finished = ref false in - while not !finished do - finished := - List.fold_left ( && ) true - (Array.to_list (Array.mapi has_guest_booted hosts)) ; - Unix.sleep 20 - done - in - wait_until_guests_have_booted () ; - debug "Guests have booted; issuing Pool.joins." ; - let host_uuids = - Array.mapi - (fun i _ -> - let n = i + 1 in - let rpc = remoterpc (Printf.sprintf "192.168.%d.%d" pool.ipbase n) in - let session_id = - Client.Session.login_with_password ~rpc ~uname:"root" ~pwd:"xensource" - ~version:"1.1" ~originator:"perftest" - in - let h = List.hd (Client.Host.get_all ~rpc ~session_id) in - let u = Client.Host.get_uuid ~rpc ~session_id ~self:h in - debug "Setting name of host %d" n ; - Client.Host.set_name_label ~rpc ~session_id ~self:h - ~value:(Printf.sprintf "perftest host %d" i) ; - if i <> 0 then ( - debug "Joining to pool" ; - Client.Pool.join ~rpc ~session_id - ~master_address:(Printf.sprintf "192.168.%d.1" pool.ipbase) - ~master_username:"root" ~master_password:"xensource" - ) ; - u - ) - hosts - in - let poolrpc = remoterpc (Printf.sprintf "192.168.%d.1" pool.ipbase) in - let poolses = - Client.Session.login_with_password ~rpc:poolrpc ~uname:"root" - ~pwd:"xensource" ~version:"1.1" ~originator:"perftest" - in - let vpool = List.hd (Client.Pool.get_all ~rpc:poolrpc ~session_id:poolses) in - Client.Pool.add_to_other_config ~rpc:poolrpc ~session_id:poolses ~self:vpool - ~key:"scenario" ~value:pool_name ; - debug "Waiting for all hosts to become live and enabled" ; - let hosts = - Array.of_list (Client.Host.get_all ~rpc:poolrpc ~session_id:poolses) - in - let live = Array.make (Array.length hosts) false in - let enabled = Array.make (Array.length hosts) false in - let string_of_status () = - Array.to_seq live - |> Seq.mapi (fun i live -> - let enabled = enabled.(i) in - match (live, enabled) with - | false, false -> - '.' - | true, false -> - 'L' - | true, true -> - 'E' - | _, _ -> - '?' - ) - |> String.of_seq - in - let has_host_booted rpc session_id i host = - try - if live.(i) && enabled.(i) then - true - else - let metrics = Client.Host.get_metrics ~rpc ~session_id ~self:host in - let live' = - Client.Host_metrics.get_live ~rpc ~session_id ~self:metrics - in - let enabled' = Client.Host.get_enabled ~rpc ~session_id ~self:host in - if live.(i) <> live' || enabled.(i) <> enabled' then - debug "Individual host status: %s" (string_of_status ()) ; - live.(i) <- live' ; - enabled.(i) <- enabled' ; - live' && enabled' - with _ -> false - in - let finished = ref false in - while not !finished do - Unix.sleep 20 ; - finished := - List.fold_left ( && ) true - (Array.to_list (Array.mapi (has_host_booted poolrpc poolses) hosts)) - done ; - debug "All hosts are ready." ; - let mypool = List.hd (Client.Pool.get_all ~rpc:poolrpc ~session_id:poolses) in - let master = - Client.Pool.get_master ~rpc:poolrpc ~session_id:poolses ~self:mypool - in - let iscsi_vm_ip = CreateVM.make_iscsi_ip pool in - let xml = - try - Client.SR.probe ~rpc:poolrpc ~session_id:poolses ~host:master - ~device_config:[("target", iscsi_vm_ip)] - ~sm_config:[] ~_type:"lvmoiscsi" - with Api_errors.Server_error ("SR_BACKEND_FAILURE_96", [xml; _]) -> xml - in - let iqns = parse_sr_probe_for_iqn xml in - if iqns = [] then - failwith "iSCSI target VM failed again - maybe you should fix it this time?" ; - let iqn = List.hd iqns in - let xml = - try - Client.SR.probe ~rpc:poolrpc ~session_id:poolses ~host:master - ~device_config:[("target", iscsi_vm_ip); ("targetIQN", iqn)] - ~sm_config:[] ~_type:"lvmoiscsi" - with Api_errors.Server_error ("SR_BACKEND_FAILURE_107", [xml; _]) -> xml - in - (* Create an SR for each LUN found *) - Printf.printf "Creating LVMoISCSI SRs (one for each of %d LUNs)\n%!" - pool.iscsi_luns ; - let scsiids = Array.of_list (parse_sr_probe_for_scsiids xml) in - if Array.length scsiids <> pool.iscsi_luns then - failwith - (Printf.sprintf - "We created %d VDIs on the iSCSI target VM but found %d LUNs" - pool.iscsi_luns (Array.length scsiids) - ) ; - let lun_srs = - Array.init pool.iscsi_luns (fun i -> - Printf.printf " - Creating shared LVMoISCSI SR %d...\n%!" i ; - let name_label = Printf.sprintf "LVMoISCSI-%d" i in - Client.SR.create ~rpc:poolrpc ~session_id:poolses ~host:master - ~device_config: - [ - ("target", iscsi_vm_ip) - ; ("targetIQN", iqn) - ; ("SCSIid", scsiids.(i)) - ] - ~physical_size:0L ~name_label ~name_description:"" ~_type:"lvmoiscsi" - ~content_type:"" ~shared:true ~sm_config:[] - ) - in - let local_srs = - Array.mapi - (fun i host_uuid -> - let h = - Client.Host.get_by_uuid ~rpc:poolrpc ~session_id:poolses - ~uuid:host_uuid - in - let name_label = Printf.sprintf "Local LVM on host %d" i in - Client.SR.create ~rpc:poolrpc ~session_id:poolses ~host:h - ~device_config:[("device", "/dev/" ^ sr_disk_device)] - ~physical_size:0L ~name_label ~name_description:"" ~_type:"lvm" - ~content_type:"" ~shared:false ~sm_config:[] - ) - host_uuids - in - let pifs = Client.PIF.get_all ~rpc:poolrpc ~session_id:poolses in - let bondednets = - Array.init pool.bonds (fun i -> - Client.Network.create ~rpc:poolrpc ~session_id:poolses - ~name_label:(Printf.sprintf "Network associated with bond%d" i) - ~name_description:"" ~mTU:1500L ~other_config:[] ~bridge:"" - ~managed:true ~tags:[] - ) - in - let unused_nets = - ref - (Listext.List.setify - (List.map - (fun pif -> - Client.PIF.get_network ~rpc:poolrpc ~session_id:poolses ~self:pif - ) - pifs - ) - ) - in - (* Reconfigure the master's networking last as this will be the most destructive *) - let master_uuid = - Client.Host.get_uuid ~rpc:poolrpc ~session_id:poolses ~self:master - in - let slave_uuids = - List.filter (fun x -> x <> master_uuid) (Array.to_list host_uuids) - in - let host_uuids = Array.of_list (slave_uuids @ [master_uuid]) in - let (_ : API.ref_Bond array array) = - Array.map - (fun host_uuid -> - let host_ref = - Client.Host.get_by_uuid ~rpc:poolrpc ~session_id:poolses - ~uuid:host_uuid - in - let pifs = - List.filter - (fun pif -> - Client.PIF.get_host ~rpc:poolrpc ~session_id:poolses ~self:pif - = host_ref - ) - pifs - in - Array.init pool.bonds (fun bnum -> - let device = Printf.sprintf "eth%d" (bnum * 2) in - let device2 = Printf.sprintf "eth%d" ((bnum * 2) + 1) in - let master = - List.find - (fun pif -> - Client.PIF.get_device ~rpc:poolrpc ~session_id:poolses - ~self:pif - = device - ) - pifs - in - let pifs = - List.filter - (fun pif -> - let d = - Client.PIF.get_device ~rpc:poolrpc ~session_id:poolses - ~self:pif - in - d = device || d = device2 - ) - pifs - in - let nets = - List.map - (fun pif -> - Client.PIF.get_network ~rpc:poolrpc ~session_id:poolses - ~self:pif - ) - pifs - in - unused_nets := - List.filter (fun net -> not (List.mem net nets)) !unused_nets ; - let mac = - Client.PIF.get_MAC ~rpc:poolrpc ~session_id:poolses ~self:master - in - let bond = - Client.Bond.create ~rpc:poolrpc ~session_id:poolses - ~network:bondednets.(bnum) ~members:pifs ~mAC:mac - ~mode:`balanceslb ~properties:[] - in - let bondpif = - Client.Bond.get_master ~rpc:poolrpc ~session_id:poolses ~self:bond - in - Client.PIF.reconfigure_ip ~rpc:poolrpc ~session_id:poolses - ~self:bondpif ~mode:`Static - ~iP: - (Client.PIF.get_IP ~rpc:poolrpc ~session_id:poolses ~self:master) - ~netmask:"255.255.255.0" ~gateway:"" ~dNS:"" ; - if - Client.PIF.get_management ~rpc:poolrpc ~session_id:poolses - ~self:master - then ( - ( try - Client.Host.management_reconfigure ~rpc:poolrpc - ~session_id:poolses ~pif:bondpif - with _ -> () - ) ; - debug "Reconfigured management interface to be on the bond." ; - (* In case we've lost our network connection *) - wait_until_guests_have_booted () - ) ; - bond - ) - ) - host_uuids - in - debug "Waiting for all guests to be pingable again." ; - wait_until_guests_have_booted () ; - debug "Successfully pinged all virtual hosts." ; - (* We'll use the Windows XP SP3 template to create the VMs required *) - let nets_for_vms = !unused_nets @ Array.to_list bondednets in - debug "Nets for VMs: %s" - (String.concat "," - (List.map - (fun net -> - Client.Network.get_name_label ~rpc:poolrpc ~session_id:poolses - ~self:net - ) - nets_for_vms - ) - ) ; - let networks = Array.of_list nets_for_vms in - Printf.printf "Creating VMs (%s)\n%!" - (if pool.use_shared_storage then "on shared storage" else "on local storage") ; - let storages = if pool.use_shared_storage then lun_srs else local_srs in - List.iter - (fun vm -> - CreateVM.make ~rpc:poolrpc ~session_id:poolses ~networks ~storages ~pool - ~vm - ) - pool.vms - -let create_pool session_id _ pool_name key _ = - iscsi_vm_iso_must_exist session_id ; - default_sr_must_be_suitable session_id ; - let pool = Scenario.get pool_name in - let pool = {pool with key} in - if pool.Scenario.hosts <> 1 then ( - debug ~out:stderr - "At the moment, multiple host pool is supported only for SDK pool" ; - exit 1 - ) ; - let host = List.hd (Client.Host.get_all ~rpc ~session_id) in - (* 1/ forget the local lvm storages *) - List.iter - (fun lvm_sr -> - List.iter - (fun pbd -> Client.PBD.unplug ~rpc ~session_id ~self:pbd) - (Client.SR.get_PBDs ~rpc ~session_id ~self:lvm_sr) ; - Client.SR.forget ~rpc ~session_id ~sr:lvm_sr - ) - (Client.SR.get_by_name_label ~rpc ~session_id ~label:"Local storage") ; - (* 2/ create an default ext storage *) - let storages = - match Client.SR.get_by_name_label ~rpc ~session_id ~label:"Local vhd" with - | [] -> - [| - Client.SR.create ~rpc ~session_id ~_type:"ext" - ~name_label:"Local vhd" ~name_description:"" - ~device_config:[("device", "/dev/sda3")] - ~host ~physical_size:Scenario.sr_disk_size ~shared:true - ~sm_config:[] ~content_type:"" - |] - | l -> - Array.of_list l - in - let pool_ref = List.hd (Client.Pool.get_all ~rpc ~session_id) in - Client.Pool.set_default_SR ~rpc ~session_id ~self:pool_ref ~value:storages.(0) ; - Client.Pool.set_crash_dump_SR ~rpc ~session_id ~self:pool_ref - ~value:storages.(0) ; - Client.Pool.set_suspend_image_SR ~rpc ~session_id ~self:pool_ref - ~value:storages.(0) ; - (* 3/ building the VMs *) - let networks = Array.of_list (Client.Network.get_all ~rpc ~session_id) in - List.iter - (fun vm -> CreateVM.make ~rpc ~session_id ~networks ~storages ~pool ~vm) - pool.vms diff --git a/ocaml/perftest/cumulative_time.ml b/ocaml/perftest/cumulative_time.ml deleted file mode 100644 index 5c7ff17d4e9..00000000000 --- a/ocaml/perftest/cumulative_time.ml +++ /dev/null @@ -1,145 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -open Graphutil - -let _ = - let inputs = ref [] in - let format = ref `X11 in - let separate_graphs = ref false in - let graphic_filename = ref "" in - Arg.parse - [ - ( "-format" - , Arg.Symbol - ( ["eps"; "gif"; "x11"] - , function - | "eps" -> - format := `Eps - | "gif" -> - format := `Gif - | "x11" -> - format := `X11 - | _ -> - failwith "huh ?" - ) - , " Set output format (default: X11)" - ) - ; ( "-output" - , Arg.Set_string graphic_filename - , " Set default output file (for non-X11 modes)" - ) - ; ( "-separate" - , Arg.Set separate_graphs - , " Plot each data series on separate axes" - ) - ] - (fun x -> inputs := x :: !inputs) - "Generate a histogram by convolving sample points with a gaussian.\nusage:" ; - if !inputs = [] then failwith "Needs at least one input filename" ; - if !format <> `X11 && !graphic_filename = "" then - failwith "This format needs an -output" ; - let inputs = get_info ~separate:!separate_graphs !inputs in - let output_files = - List.map (fun _ -> Filename.temp_file "cumulative" "dat") inputs - in - let all = List.combine inputs output_files in - Xapi_stdext_pervasives.Pervasiveext.finally - (fun () -> - let max_readings = ref 0 in - List.iter - (fun ((info, points), output_file) -> - let (_ : string) = get_result info in - let num_points = List.length points in - max_readings := max num_points !max_readings ; - let open Xapi_stdext_unix in - Unixext.with_file output_file - [Unix.O_WRONLY; Unix.O_TRUNC; Unix.O_CREAT] 0o644 (fun fd -> - let points_array = Array.of_list (List.rev points) in - let cumulative = ref 0. in - for i = 0 to num_points - 1 do - cumulative := points_array.(i) +. !cumulative ; - Unixext.really_write_string fd - (Printf.sprintf "%d %f %f\n" (i + 1) !cumulative - points_array.(i) - ) - done - ) - ) - all ; - (* Plot a line for (a) elapsed time and (b) this particular duration *) - let ls = - List.concat - (List.mapi - (fun i ((info, _floats), output) -> - let graph_one_label = - Printf.sprintf "Cumulative time, SR %d (left axis)" (i + 1) - in - let graph_two_label = - Printf.sprintf "Time per VM, SR %d (right axis)" (i + 1) - in - [ - { - Gnuplot.filename= output - ; title= graph_one_label - ; graphname= get_result info - ; field= 2 - ; yaxis= 1 - ; scale= 1. /. 3600. - ; style= "lines" - } - ; { - Gnuplot.filename= output - ; title= graph_two_label - ; graphname= get_result info - ; field= 3 - ; yaxis= 2 - ; scale= 1. - ; style= "lines" - } - ] - ) - all - ) - in - List.iter - (fun result -> - let g = - { - Gnuplot.xlabel= - Printf.sprintf "Number of %s" (string_of_result result) - ; ylabel= "Elapsed time (h)" - ; y2label= Some "Duration (s)" - ; lines= List.filter (fun l -> l.Gnuplot.graphname = result) ls - ; log_x_axis= false - ; xrange= Some (0., float_of_int !max_readings) - ; normal_probability_y_axis= None - } - in - let output = - match !format with - | `Eps -> - Gnuplot.Ps (Printf.sprintf "%s-%s.eps" !graphic_filename result) - | `Gif -> - Gnuplot.Gif (Printf.sprintf "%s-%s.gif" !graphic_filename result) - | `X11 -> - Gnuplot.X11 - in - ignore (Gnuplot.render g output) - ) - (get_result_types inputs) - ) - (fun () -> - List.iter (fun f -> Xapi_stdext_unix.Unixext.unlink_safe f) output_files - ) diff --git a/ocaml/perftest/dune b/ocaml/perftest/dune deleted file mode 100644 index 38d7a0efd16..00000000000 --- a/ocaml/perftest/dune +++ /dev/null @@ -1,24 +0,0 @@ -(executable - (modes exe) - (name perftest) - (public_name perftest) - (package xapi-debug) - (libraries - - http_lib - rpclib.core - threads.posix - xapi-consts - xapi-cli-protocol - xapi-client - xapi-datamodel - xapi-inventory - xapi-types - xapi-stdext-pervasives - xapi-stdext-std - xapi-stdext-threads - xapi-stdext-unix - xml-light2 - ) -) - diff --git a/ocaml/perftest/gnuplot.ml b/ocaml/perftest/gnuplot.ml deleted file mode 100644 index c39ca01475e..00000000000 --- a/ocaml/perftest/gnuplot.ml +++ /dev/null @@ -1,165 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(** Module to drive gnuplot *) - -open Xapi_stdext_pervasives.Pervasiveext - -type line = { - graphname: string - ; filename: string - ; title: string - ; field: int - ; yaxis: int - ; (* 1 -> left axis; 2 -> right axis *) - scale: float - ; (* multiply the values by this factor *) - style: string (* 'linespoints', 'lines', etc *) -} - -type t = { - xlabel: string - ; ylabel: string - ; y2label: string option - ; log_x_axis: bool - ; xrange: (float * float) option - ; normal_probability_y_axis: (float * float) option - ; lines: line list -} - -type output = Ps of string | Gif of string | X11 - -let make_normal_probability_tics tics = - Printf.sprintf "set ytics (%s)" - (String.concat ", " - (List.map (fun tic -> Printf.sprintf "\"%.2f\" invnorm(%f)" tic tic) tics) - ) - -let make_log_tics tics = - Printf.sprintf "set xtics (%s)" - (String.concat ", " - (List.map (fun tic -> Printf.sprintf "\"%.2f\" %f" tic tic) tics) - ) - -let invnorm (x : t) (y : string) = - if x.normal_probability_y_axis = None then - y - else - Printf.sprintf "invnorm(%s)" y - -let render (x : t) output = - let line (y : line) = - let field = - if x.normal_probability_y_axis = None then - Printf.sprintf "($%d*%f)" y.field y.scale - else - Printf.sprintf "(invnorm($%d*%f))" y.field y.scale - in - Printf.sprintf "\"%s\" using 1:%s axis x1y%d title \"%s\" with %s" - y.filename field y.yaxis y.title y.style - in - let config = - [ - Printf.sprintf "set terminal %s" - ( match output with - | Ps _ -> - "postscript eps enhanced color" - | Gif _ -> - "gif" - | X11 -> - "wxt 0" - ) - ; Printf.sprintf "set output %s" - ( match output with - | Ps filename -> - "\"" ^ filename ^ "\"" - | Gif filename -> - "\"" ^ filename ^ "\"" - | X11 -> - "" - ) - ; Printf.sprintf "set xlabel \"%s\"" x.xlabel - ; Printf.sprintf "set ylabel \"%s\"" x.ylabel - ] - @ ( match x.y2label with - | None -> - [] - | Some label -> - [ - Printf.sprintf "set y2label \"%s\"" label - ; "set ytics nomirror" - ; "set y2tics auto" - ; "set y2range [0:]" - ] - ) - @ ( match x.normal_probability_y_axis with - | Some (min, max) -> - [ - make_normal_probability_tics - [ - 0.001 - ; 0.01 - ; 0.05 - ; 0.1 - ; 0.2 - ; 0.3 - ; 0.4 - ; 0.5 - ; 0.6 - ; 0.7 - ; 0.8 - ; 0.9 - ; 0.95 - ; 0.99 - ; 0.999 - ] - ; Printf.sprintf "set yrange [invnorm(%f):invnorm(%f)]" min max - ] - | None -> - [] - ) - @ ( match x.log_x_axis with - | true -> - [ - "set logscale x" - ; "set grid" - ; "set xtics (\"1\" 1, \"2\" 2, \"3\" 3, \"4\" 4, \"5\" 5, \"6\" 6, \ - \"7\" 7, \"8\" 8, \"9\" 9, \"10\" 10, \"11\" 11, \"12\" 12, \ - \"13\" 13, \"14\" 14, \"15\" 15, \"20\" 20, \"30\" 30)" - ] - | false -> - [] - ) - @ [ - (if x.log_x_axis then "set logscale x" else "") - ; ( match x.xrange with - | None -> - "set xrange [*:*]" - | Some (min, max) -> - Printf.sprintf "set xrange [%f:%f]" min max - ) - ; Printf.sprintf "plot %s" (String.concat ", " (List.map line x.lines)) - ] - in - let f = Filename.temp_file "gnuplot" "gnuplot" in - let open Xapi_stdext_unix in - Unixext.write_string_to_file f (String.concat "\n" config) ; - finally - (fun () -> - Unix.system - (Printf.sprintf "gnuplot %s %s" - (if output = X11 then "-persist" else "") - f - ) - ) - (fun () -> Unixext.unlink_safe f) diff --git a/ocaml/perftest/graphutil.ml b/ocaml/perftest/graphutil.ml deleted file mode 100644 index e2b0880ed46..00000000000 --- a/ocaml/perftest/graphutil.ml +++ /dev/null @@ -1,134 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -open Xapi_stdext_std -open Xapi_stdext_unix -open Testtypes -open Perfdebug - -type short_info = string * string * string - -type info = short_info * float list - -let merge_infos (infos : info list) = - let names = - Listext.List.setify - (List.map - (fun ((file, result, subtest), _) -> (file, result, subtest)) - infos - ) - in - let floats ((file, result, subtest) as i) = - ( i - , List.concat_map - (fun ((f, r, s), fl) -> - if file = f && result = r && subtest = s then fl else [] - ) - infos - ) - in - let merge_infos = List.map floats names in - debug "Available data:" ; - List.iter - (fun ((f, r, s), fl) -> - debug "\t* in file: %s \t%s \t%s \t-- %i points" f r s (List.length fl) - ) - merge_infos ; - merge_infos - -let clone_cnt = ref 0 - -let info_from_raw_result ?(separate = false) file result : info list = - match result.rawresult with - | StartTest floats | ShutdownTest floats -> - [((file, result.resultname, result.subtest), floats)] - | CloneTest floats -> - (* Pretend that we got the data from separate files, so they are considered as separate data series *) - let file = Printf.sprintf "%s-%d" file !clone_cnt in - (* Make the resultnames distinct to force the lines onto separate graphs *) - let resultname = - if separate then - Printf.sprintf "%s-%d" result.resultname !clone_cnt - else - result.resultname - in - let subtest = result.subtest in - clone_cnt := !clone_cnt + 1 ; - [((file, resultname, subtest), floats)] - | _ -> - [] - -let floats_from_file fname = - let floats = ref [] in - Unixext.readfile_line - (fun line -> floats := float_of_string (String.trim line) :: !floats) - fname ; - !floats - -let get_info ?(separate = false) files : info list = - let aux f = - match Testtypes.from_string (Unixext.string_of_file f) with - | None -> - [((f, "", ""), floats_from_file f)] - | Some results -> - List.concat_map (info_from_raw_result ~separate f) results - in - merge_infos (List.concat_map aux files) - -let short_info_to_string ((file, result, subtest) : short_info) = - Printf.sprintf "%s.%s.%s" result subtest file - -let short_info_to_title ((_, _, subtest) : short_info) = subtest - -let get_result ((_, result, _) : short_info) = result - -let get_result_types (all_info : info list) = - Listext.List.setify (List.map (fun ((_, result, _), _) -> result) all_info) - -let replace_assoc r n l = - if List.mem_assoc r l then - (r, n) :: List.remove_assoc r l - else - (r, n) :: l - -let get_op op extremum (infos : info list) = - let mem : (string * float) list ref = ref [] in - let aux ((_, result, _), floats) = - if List.mem_assoc result !mem then - mem := - (result, List.fold_left op (List.assoc result !mem) floats) - :: List.remove_assoc result !mem - else - mem := (result, List.fold_left op extremum floats) :: !mem - in - List.iter aux infos ; !mem - -let get_min = get_op min max_float - -let get_max = get_op max min_float - -let string_of_result = function - | "startall" -> - "sequential VM.start" - | "stopall" -> - "sequential VM.stop" - | "parallel_startall" -> - "parallel VM.start" - | "parallel_stopall" -> - "parallel VM.stop" - | "clone" -> - "parallel VM.clone" - | s when Xstringext.String.startswith "clone-" s -> - "parallel VM.clone" - | _ -> - "???" diff --git a/ocaml/perftest/histogram.ml b/ocaml/perftest/histogram.ml deleted file mode 100644 index 19afe0db278..00000000000 --- a/ocaml/perftest/histogram.ml +++ /dev/null @@ -1,230 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -open Perfdebug -open Statistics -open Graphutil - -let _ = - let sigma = ref 0.1 in - let inputs = ref [] in - let format = ref `X11 in - let graphic_filename = ref "" in - let integrate = ref false in - let normal = ref false in - let log_axis = ref false in - let min_percentile = ref 1. in - let max_percentile = ref 95. in - Arg.parse - [ - ( "-format" - , Arg.Symbol - ( ["eps"; "gif"; "x11"] - , function - | "eps" -> - format := `Eps - | "gif" -> - format := `Gif - | "x11" -> - format := `X11 - | _ -> - failwith "huh ?" - ) - , " Set output format (default: X11)" - ) - ; ( "-output" - , Arg.Set_string graphic_filename - , " Set default output file (for non-X11 modes)" - ) - ; ( "-sigma" - , Arg.Set_float sigma - , Printf.sprintf " Set sigma for the gaussian (default %f)" !sigma - ) - ; ( "-integrate" - , Arg.Set integrate - , Printf.sprintf - " Integrate the probability density function (default: %b)" !integrate - ) - ; ( "-normal" - , Arg.Set normal - , Printf.sprintf " Use a 'normal probability axis' (default: %b)" !normal - ) - ; ( "-log" - , Arg.Set log_axis - , Printf.sprintf " Use a log x axis (default: %b)" !log_axis - ) - ; ( "-minpercentile" - , Arg.Set_float min_percentile - , Printf.sprintf " Minimum percentile to plot (default: %.2f)" - !min_percentile - ) - ; ( "-maxpercentile" - , Arg.Set_float max_percentile - , Printf.sprintf " Maximum percentile to plot (default: %.2f)" - !max_percentile - ) - ] - (fun x -> inputs := x :: !inputs) - "Generate a histogram by convolving sample points with a gaussian.\nusage:" ; - if !inputs = [] then failwith "Needs at least one input filename" ; - if !format <> `X11 && !graphic_filename = "" then - failwith "This format needs an -output" ; - let sigma = !sigma in - let inputs = get_info !inputs in - let output_files = - List.map (fun _ -> Filename.temp_file "histogram" "dat") inputs - in - let all = List.combine inputs output_files in - Xapi_stdext_pervasives.Pervasiveext.finally - (fun () -> - (* Write some summary statistics on stderr *) - List.iter - (fun (info, points) -> - debug ~out:stderr "%s has lognormal mean %f +/- %f" - (short_info_to_string info) - (LogNormal.mean points) (LogNormal.sigma points) - ) - inputs ; - let min_point = get_min inputs in - let max_point = get_max inputs in - (* To make sure that each added gaussian really adds 1 unit of area, we extend the bins - 3 sigmas to the left and right *) - let min_point = List.map (fun (r, n) -> (r, n -. (3. *. sigma))) min_point - and max_point = - List.map (fun (r, n) -> (r, n +. (3. *. sigma))) max_point - in - (* Attempt to zoom the graph in on the 10% to 90% region *) - let xrange_min = ref max_point and xrange_max = ref min_point in - List.iter - (fun ((info, points), output_file) -> - let result = get_result info in - let x = - Hist.make - (List.assoc result min_point) - (List.assoc result max_point) - 1000 - in - - (* -- Apply the Weierstrass transform -- *) - - (* NB Each call to Hist.convolve (i.e. each VM timing measured) increases the total area under the curve by 1. - By dividing through by 'n' (where 'n' is the total number of VMs i.e. points) we make the total area under - the curve equal 1 so we can consider the result as a probability density function. In particular this means - we can directly compare curves for 10, 100, 1000 measurements without worrying about scale factors and - also trade speed for estimation accuracy. *) - let num_points = float_of_int (List.length points) in - List.iter - (fun y -> - Hist.convolve x (fun z -> gaussian y sigma z /. num_points) - ) - points ; - (* Sanity-check: area under histogram should be almost 1.0 *) - let total_area = - Hist.fold x - (fun bin_start bin_end height acc -> - ((bin_end -. bin_start) *. height) +. acc - ) - 0. - in - if abs_float (1. -. total_area) > 0.01 then - debug ~out:stderr - "WARNING: area under histogram should be 1.0 but is %f" total_area ; - let cumulative = Hist.integrate x in - let t_10 = Hist.find_x cumulative 0.1 in - let t_80 = Hist.find_x cumulative 0.8 in - let t_90 = Hist.find_x cumulative 0.9 in - let t_95 = Hist.find_x cumulative 0.95 in - debug ~out:stderr "10th percentile: %f" t_10 ; - debug ~out:stderr "80th percentile: %f" t_80 ; - debug ~out:stderr "90th percentile: %f" t_90 ; - debug ~out:stderr "95th percentile: %f" t_95 ; - debug ~out:stderr "Clipping data between %.0f and %.0f percentiles" - !min_percentile !max_percentile ; - xrange_min := - replace_assoc result - (min - (List.assoc result !xrange_min) - (Hist.find_x cumulative (!min_percentile /. 100.)) - ) - !xrange_min ; - xrange_max := - replace_assoc result - (max - (List.assoc result !xrange_max) - (Hist.find_x cumulative (!max_percentile /. 100.)) - ) - !xrange_max ; - let x = if !integrate then Hist.integrate x else x in - Xapi_stdext_unix.Unixext.with_file output_file - [Unix.O_WRONLY; Unix.O_TRUNC; Unix.O_CREAT] - 0o644 (Hist.to_gnuplot x) - ) - all ; - let ls = - List.map - (fun ((info, _floats), output) -> - { - Gnuplot.filename= output - ; title= short_info_to_title info - ; graphname= get_result info - ; field= 2 - ; yaxis= 1 - ; scale= 1. - ; style= "linespoints" - } - ) - all - in - let ylabel = - if !integrate then - "Cumulative probability" - else - "Estimate of the probability density function" - in - List.iter - (fun result -> - let g = - { - Gnuplot.xlabel= - Printf.sprintf "Time for %s XenAPI calls to complete / seconds" - (string_of_result result) - ; ylabel - ; y2label= None - ; lines= List.filter (fun l -> l.Gnuplot.graphname = result) ls - ; log_x_axis= !log_axis - ; xrange= - Some - (List.assoc result !xrange_min, List.assoc result !xrange_max) - ; normal_probability_y_axis= - ( if !normal then - Some (!min_percentile /. 100., !max_percentile /. 100.) - else - None - ) - } - in - let output = - match !format with - | `Eps -> - Gnuplot.Ps (Printf.sprintf "%s-%s.eps" !graphic_filename result) - | `Gif -> - Gnuplot.Gif (Printf.sprintf "%s-%s.gif" !graphic_filename result) - | `X11 -> - Gnuplot.X11 - in - ignore (Gnuplot.render g output) - ) - (get_result_types inputs) - ) - (fun () -> List.iter Xapi_stdext_unix.Unixext.unlink_safe output_files) diff --git a/ocaml/perftest/perfdebug.ml b/ocaml/perftest/perfdebug.ml deleted file mode 100644 index 4c71c8e8ce1..00000000000 --- a/ocaml/perftest/perfdebug.ml +++ /dev/null @@ -1,24 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -let stdout_m = Mutex.create () - -let debug ?(out = stdout) (fmt : ('a, unit, string, unit) format4) = - Xapi_stdext_threads.Threadext.Mutex.execute stdout_m (fun () -> - Printf.ksprintf - (fun s -> - Printf.fprintf out "%s\n" s ; - flush stdout - ) - fmt - ) diff --git a/ocaml/perftest/perftest.ml b/ocaml/perftest/perftest.ml deleted file mode 100644 index c9b744676be..00000000000 --- a/ocaml/perftest/perftest.ml +++ /dev/null @@ -1,195 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(* Performance testing *) - -open Client -open Perfutil -open Testtypes -open Perfdebug - -let xenrtfname = ref "perftest-xenrt.log" - -let marshall_xenrt pool metadata results = - let oc = open_out !xenrtfname in - Printf.fprintf oc "\n" ; - Printf.fprintf oc "%s\n" (Scenario.xml_of_scenario pool) ; - Printf.fprintf oc " \n" ; - List.iter - (fun (k, v) -> Printf.fprintf oc " %s%s\n" k v) - metadata ; - Printf.fprintf oc " \n \n" ; - List.iter - (fun r -> - Printf.fprintf oc " %f\n" - r.resultname r.subtest r.xenrtresult - ) - results ; - Printf.fprintf oc " \n" ; - close_out oc - -let rawfname = ref "" - -let marshall_raw (raw_results : Testtypes.result list) = - if !rawfname <> "" then ( - let oc = open_out !rawfname in - Printf.fprintf oc "%s" (Testtypes.to_string raw_results) ; - close_out oc - ) - -let marshall pool metadata results = - marshall_raw results ; - marshall_xenrt pool metadata results - -let string_of_set l = Printf.sprintf "{%s}" (String.concat ", " l) - -let get_metadata rpc session_id = - let pool = List.hd (Client.Pool.get_all ~rpc ~session_id) in - let master = Client.Pool.get_master ~rpc ~session_id ~self:pool in - let sv = Client.Host.get_software_version ~rpc ~session_id ~self:master in - sv - -let _ = - let template_name = ref "sdk-gold" in - let key = ref "" in - let scenario = ref "xendesktop" in - let ipbase = ref 0 in - let mode = ref "" in - let run_all = ref false in - let iter = ref 1 in - let possible_modes = ["initpool"; "destroypool"; "run"; "describe"] in - Arg.parse - (Arg.align - [ - ( "-template" - , Arg.Set_string template_name - , Printf.sprintf " Clone VMs from named base template (default is %s)" - !template_name - ) - ; ( "-scenario" - , Arg.Set_string scenario - , Printf.sprintf - " Choose scenario (default is %s; possibilities are %s" !scenario - (string_of_set (Scenario.get_all ())) - ) - ; ("-key", Arg.Set_string key, " Key name to identify the Pool instance") - ; ( "-ipbase" - , Arg.Set_int ipbase - , Printf.sprintf - " Choose base IP address (default is %d for 192.168.%d.1)" !ipbase - !ipbase - ) - ; ( "-xenrtoutput" - , Arg.Set_string xenrtfname - , " Set output filename for xenrt (defaults to perftest-xenrt.log)" - ) - ; ( "-rawoutput" - , Arg.Set_string rawfname - , " Set output filename for raw results (by default, do not output \ - results)" - ) - ; ( "-runall" - , Arg.Set run_all - , Printf.sprintf " Run tests %s (tests run by default are %s)" - (string_of_set Tests.testnames) - (string_of_set Tests.runtestnames) - ) - ; ( "-iter" - , Arg.Set_int iter - , Printf.sprintf " Number of iterations (default is %i)" !iter - ) - ] - ) - (fun x -> - if !mode = "" then - mode := x - else - debug ~out:stderr "Ignoring unexpected argument: %s" x - ) - (Printf.sprintf - "Configure and run a simulated test\nUsage: %s -key %s" - Sys.argv.(0) - (string_of_set possible_modes) - ) ; - if not (List.mem !mode possible_modes) then ( - debug ~out:stderr "Unknown mode: \"%s\" (possibilities are %s)" !mode - (string_of_set possible_modes) ; - exit 1 - ) ; - if not (List.mem !scenario (Scenario.get_all ())) then ( - debug ~out:stderr "Unknown scenario: \"%s\" (possibilities are %s)" - !scenario - (string_of_set (Scenario.get_all ())) ; - exit 1 - ) ; - if !key = "" then ( - debug ~out:stderr "Must set a -key to identify the Pool instance" ; - exit 1 - ) ; - try - match !mode with - | "describe" -> - let lines = Createpool.describe_pool !template_name !scenario !key in - List.iter (fun x -> debug "* %s" x) lines - | _ -> - let session = - Client.Session.login_with_password ~rpc ~uname:"root" ~pwd:"xenroot" - ~version:"1.2" ~originator:"perftest" - in - let (_ : API.string_to_string_map) = get_metadata rpc session in - let open Xapi_stdext_pervasives in - Pervasiveext.finally - (fun () -> - let pool = Scenario.get !scenario in - match !mode with - | "initpool" when pool.Scenario.sdk -> - Createpool.create_sdk_pool session !template_name !scenario !key - !ipbase - | "initpool" -> - Createpool.create_pool session !template_name !scenario !key - !ipbase - | "destroypool" when pool.Scenario.sdk -> - Createpool.destroy_sdk_pool session !template_name !key - | "destroypool" -> - debug ~out:stderr "Not yet implemented ... " - | "run" -> - let newrpc = - if pool.Scenario.sdk then - remoterpc (Printf.sprintf "192.168.%d.1" !ipbase) - else - rpc - in - let session = - if pool.Scenario.sdk then - Client.Session.login_with_password ~rpc:newrpc ~uname:"root" - ~pwd:"xensource" ~version:"1.2" ~originator:"perftest" - else - session - in - Pervasiveext.finally - (fun () -> - marshall pool - (get_metadata newrpc session) - (Tests.run newrpc session !key !run_all !iter) - ) - (fun () -> - if pool.Scenario.sdk then - Client.Session.logout ~rpc:newrpc ~session_id:session - ) - | _ -> - failwith (Printf.sprintf "unknown mode: %s" !mode) - ) - (fun () -> Client.Session.logout ~rpc ~session_id:session) - with Api_errors.Server_error (code, params) -> - debug ~out:stderr "Caught API error: %s [ %s ]" code - (String.concat "; " params) diff --git a/ocaml/perftest/perfutil.ml b/ocaml/perftest/perfutil.ml deleted file mode 100644 index f1ebe69c93b..00000000000 --- a/ocaml/perftest/perfutil.ml +++ /dev/null @@ -1,101 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(* Utilities for performance monitor *) - -open Client - -let rpc xml = - let open Xmlrpc_client in - XMLRPC_protocol.rpc ~srcstr:"perftest" ~dststr:"xapi" - ~transport:(Unix (Filename.concat "/var/lib/xcp" "xapi")) - ~http:(xmlrpc ~version:"1.0" "/") - xml - -let remoterpc host xml = - let open Xmlrpc_client in - XMLRPC_protocol.rpc ~srcstr:"perftest" ~dststr:"remotexapi" - ~transport:(SSL (SSL.make ~verify_cert:None (), host, 443)) - ~http:(xmlrpc ~version:"1.1" "/") - xml - -(* Rewrite the provisioning XML fragment to create all disks on a new, specified SR. This is cut-n-pasted from cli_util.ml *) -let rewrite_provisioning_xml rpc session_id new_vm sr_uuid = - let rewrite_xml xml newsrname = - let rewrite_disk = function - | Xml.Element ("disk", params, []) -> - Xml.Element - ( "disk" - , List.map - (fun (x, y) -> if x <> "sr" then (x, y) else ("sr", newsrname)) - params - , [] - ) - | x -> - x - in - match xml with - | Xml.Element ("provision", [], disks) -> - Xml.Element ("provision", [], List.map rewrite_disk disks) - | x -> - x - in - let other_config = Client.VM.get_other_config ~rpc ~session_id ~self:new_vm in - if List.mem_assoc "disks" other_config then ( - let xml = Xml.parse_string (List.assoc "disks" other_config) in - Client.VM.remove_from_other_config ~rpc ~session_id ~self:new_vm - ~key:"disks" ; - let newdisks = rewrite_xml xml sr_uuid in - Client.VM.add_to_other_config ~rpc ~session_id ~self:new_vm ~key:"disks" - ~value:(Xml.to_string newdisks) - ) - -let parse_sr_probe_for_iqn (xml : string) : string list = - match Xml.parse_string xml with - | Xml.Element ("iscsi-target-iqns", _, children) -> - let parse_tgts = function - | Xml.Element ("TGT", _, children) -> - let parse_kv = function - | Xml.Element (key, _, [Xml.PCData v]) -> - (key, String.trim v) - | _ -> - failwith "Malformed key/value pair" - in - let all = List.map parse_kv children in - List.assoc "TargetIQN" all - | _ -> - failwith "Malformed or missing " - in - List.map parse_tgts children - | _ -> - failwith "Missing element" - -let parse_sr_probe_for_scsiids (xml : string) : string list = - match Xml.parse_string xml with - | Xml.Element ("iscsi-target", _, children) -> - let parse_luns = function - | Xml.Element ("LUN", _, children) -> - let parse_kv = function - | Xml.Element (key, _, [Xml.PCData v]) -> - (key, String.trim v) - | _ -> - failwith "Malformed key/value pair" - in - let all = List.map parse_kv children in - List.assoc "SCSIid" all - | _ -> - failwith "Malformed or missing " - in - List.map parse_luns children - | _ -> - failwith "Missing element" diff --git a/ocaml/perftest/scenario.ml b/ocaml/perftest/scenario.ml deleted file mode 100644 index 0db7210a044..00000000000 --- a/ocaml/perftest/scenario.ml +++ /dev/null @@ -1,157 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(* VMs *) -type vm = {vbds: int; vifs: int; tag: string; num: int; has_affinity: bool} - -let default_vm num = - {vbds= 1; vifs= 4; tag= "everything"; num; has_affinity= true} - -let string_of_vm (x : vm) = - let vbds = - Printf.sprintf "%s VBDs" (if x.vbds = 0 then "no" else string_of_int x.vbds) - in - let vifs = - Printf.sprintf "%s VIFs" (if x.vifs = 0 then "no" else string_of_int x.vifs) - in - Printf.sprintf "%d VMs per host (tag %s) with %s, %s and affinity%s set" x.num - x.tag vbds vifs - (if x.has_affinity then "" else " not") - -(* Pools *) -type pool = { - id: string - ; sdk: bool - ; hosts: int - ; interfaces_per_host: int - ; vms: vm list - ; bonds: int - ; (* Needs to be less than or equal to interfaces_per_host / 2 *) - key: string - ; ipbase: int - ; iscsi_luns: int - ; use_shared_storage: bool -} - -let default = - { - id= "default" - ; sdk= true - ; hosts= 1 - ; interfaces_per_host= 6 - ; vms= - [ - default_vm 20 - ; {(default_vm 20) with vifs= 0; tag= "novifs"} - ; {(default_vm 20) with vbds= 0; tag= "novbds"} - ; {(default_vm 20) with vifs= 0; vbds= 0; tag= "novbdsnovifs"} - ] - ; bonds= 2 - ; key= "" - ; ipbase= 0 - ; iscsi_luns= 1 - ; use_shared_storage= false - } - -let description_of_pool (x : pool) = - [ - Printf.sprintf "Scenario: %s" x.id - ; Printf.sprintf "Key: %s" x.key - ; Printf.sprintf - "%d hosts, each with %d network interfaces, %d of which are paired into \ - %d bonds" - x.hosts x.interfaces_per_host (x.bonds * 2) x.bonds - ] - @ List.map string_of_vm x.vms - -let pools = - [ - {default with id= "pool0"; hosts= 1} - ; {default with id= "pool1"; hosts= 4} - ; {default with id= "pool2"; hosts= 16} - ; {default with id= "pool3"; hosts= 48} - ; { - default with - id= "real1" - ; hosts= 1 - ; sdk= false - ; bonds= 0 - ; interfaces_per_host= 0 - ; vms= [{(default_vm 50) with tag= ""}] - } - ; { - default with - id= "xendesktop" - ; hosts= 8 - ; vms= - [ - { - (default_vm 50) with - vbds= 0 - ; vifs= 1 - ; tag= "xendesktop" - ; has_affinity= false - } - ] - } - ; { - default with - id= "empty" - ; hosts= 1 - ; (* we won't be starting VMs in the clone test so we don't need any hosts *) - vms= [{(default_vm 1) with tag= "winxp-gold"; vifs= 1; vbds= 1}] - ; (* 1 per host *) - iscsi_luns= 6 - ; use_shared_storage= true - } - ] - -let get_all () = List.map (fun p -> p.id) pools - -let get name = List.find (fun p -> p.id = name) pools - -let xml_of_scenario s = - String.concat "\n" - ([ - "" - ; Printf.sprintf " %s" s.id - ; Printf.sprintf " %s" s.key - ; Printf.sprintf " %b" s.sdk - ; Printf.sprintf " %d" s.hosts - ; Printf.sprintf " %d" - s.interfaces_per_host - ; Printf.sprintf " " - ] - @ List.map - (fun vm -> - Printf.sprintf - " " - vm.vbds vm.vifs vm.tag vm.num vm.has_affinity - ) - s.vms - @ [ - " " - ; Printf.sprintf " %d" s.bonds - ; Printf.sprintf " %d" s.ipbase - ; "" - ] - ) - -let oc_key = "perftestsetup" - -let sr_disk_size = Int64.mul 1048576L 2093049L - -(* limit of 1 vhd ~2 terabytes (megs, gigs, t.. what?) *) - -let sr_disk_device = "xvde" diff --git a/ocaml/perftest/statistics.ml b/ocaml/perftest/statistics.ml deleted file mode 100644 index 49c5bc29aa8..00000000000 --- a/ocaml/perftest/statistics.ml +++ /dev/null @@ -1,155 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(** Useful stats-related functions for plotting graphs and analysing the results of perftest *) - -let pi = atan 1. *. 4. - -let gaussian mu sigma x = - 1.0 - /. (sigma *. sqrt (2.0 *. pi)) - *. exp (-.((x -. mu) ** 2.0) /. (2.0 *. sigma *. sigma)) - -module Hist = struct - type t = { - bin_start: float array - ; bin_end: float array - ; bin_count: float array - (* height of each bin: multiply by width to get area *) - } - - (** Initialise a histogram covering values from [min:max] in 'n' uniform steps *) - let make (min : float) (max : float) (n : int) = - let range = max -. min in - { - bin_start= - Array.init n (fun i -> - (range /. float_of_int n *. float_of_int i) +. min - ) - ; bin_end= - Array.init n (fun i -> - (range /. float_of_int n *. float_of_int (i + 1)) +. min - ) - ; bin_count= Array.init n (fun _ -> 0.) - } - - let integrate (x : t) = - let n = Array.length x.bin_start in - let result = - make x.bin_start.(0) x.bin_end.(Array.length x.bin_end - 1) n - in - let area = ref 0. in - for i = 0 to Array.length x.bin_start - 1 do - assert (x.bin_start.(i) = result.bin_start.(i)) ; - let width = x.bin_end.(i) -. x.bin_start.(i) in - area := !area +. (x.bin_count.(i) *. width) ; - result.bin_count.(i) <- !area - done ; - result - - (** Call 'f' with the start, end and height of each bin *) - let iter (x : t) (f : float -> float -> float -> unit) = - for i = 0 to Array.length x.bin_start - 1 do - let width = x.bin_end.(i) -. x.bin_start.(i) in - f x.bin_start.(i) x.bin_end.(i) (x.bin_count.(i) /. width) - done - - (** Fold 'f' over the bins calling it with 'bin_start' 'bin_end' 'height' and 'acc' *) - let fold (x : t) (f : float -> float -> float -> 'a -> 'a) (init : 'a) = - let acc = ref init in - iter x (fun bin_start bin_end height -> - acc := f bin_start bin_end height !acc - ) ; - !acc - - (** Write output to a file descriptor in gnuplot format *) - let to_gnuplot (x : t) (fd : Unix.file_descr) = - iter x (fun bin_start bin_end height -> - let center = (bin_start +. bin_end) /. 2.0 in - let line = Printf.sprintf "%f %f\n" center height |> Bytes.of_string in - let (_ : int) = Unix.write fd line 0 (Bytes.length line) in - () - ) - - exception Stop - - (** Add a sample point *) - let add (x : t) (y : float) = - try - for i = 0 to Array.length x.bin_start - 1 do - if x.bin_start.(i) <= y && y <= x.bin_end.(i + 1) then ( - x.bin_count.(i) <- x.bin_count.(i) +. 1.0 ; - raise Stop - ) - done - with Stop -> () - - (** Evaluate 'f' given the center of each bin and add the result to the bin count *) - let convolve (x : t) (f : float -> float) = - for i = 0 to Array.length x.bin_start - 1 do - let center = (x.bin_start.(i) +. x.bin_end.(i)) /. 2.0 in - let width = x.bin_end.(i) -. x.bin_start.(i) in - let result = f center in - x.bin_count.(i) <- x.bin_count.(i) +. (result *. width) - done - - (** Given a monotonically increasing histogram find the 'x' value given a 'y' *) - let find_x (x : t) (y : float) = - match - fold x - (fun bin_start bin_end height acc -> - match acc with - | Some _ -> - acc (* got it already *) - | None -> - if height > y then - Some ((bin_start +. bin_end) /. 2.) (* no interpolation *) - else - None - ) - None - with - | Some x -> - x - | None -> - raise Not_found -end - -module Normal = struct - let mean (points : float list) = - List.fold_left ( +. ) 0. points /. float_of_int (List.length points) - - let sigma (points : float list) = - let sum_x = List.fold_left ( +. ) 0. points - and sum_xx = List.fold_left ( +. ) 0. (List.map (fun x -> x *. x) points) in - let n = float_of_int (List.length points) in - sqrt ((n *. sum_xx) -. (sum_x *. sum_x)) /. n -end - -module LogNormal = struct - let mean (points : float list) = - let points = List.map log points in - let normal_sigma = Normal.sigma points in - let normal_mean = Normal.mean points in - exp (normal_mean +. (normal_sigma *. normal_sigma /. 2.)) - - let sigma (points : float list) = - let points = List.map log points in - let normal_sigma = Normal.sigma points in - let normal_mean = Normal.mean points in - let v = - (exp (normal_sigma *. normal_sigma) -. 1.) - *. exp ((2. *. normal_mean) +. (normal_sigma *. normal_sigma)) - in - sqrt v -end diff --git a/ocaml/perftest/tests.ml b/ocaml/perftest/tests.ml deleted file mode 100644 index 731d0fa1200..00000000000 --- a/ocaml/perftest/tests.ml +++ /dev/null @@ -1,493 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -(* Tests *) - -open Client -open Xapi_stdext_std -open Xapi_stdext_pervasives.Pervasiveext -open Testtypes -open Perfdebug - -let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute - -type test = { - run: bool - ; key: string - ; testname: string - ; func: (Rpc.call -> Rpc.response) -> API.ref_session -> test -> result list -} - -let time f = - let now = Unix.gettimeofday () in - f () ; - let elapsed = Unix.gettimeofday () -. now in - elapsed - -let subtest_string key tag = - if tag = "" then - key - else - Printf.sprintf "%s (%s)" key tag - -let startall rpc session_id test = - let vms = Client.VM.get_all_records ~rpc ~session_id in - let tags = List.map (fun (_, vmr) -> vmr.API.vM_tags) vms in - let tags = Listext.List.setify (List.concat tags) in - List.map - (fun tag -> - debug "Starting VMs with tag: %s" tag ; - let vms = - List.filter (fun (_, vmr) -> List.mem tag vmr.API.vM_tags) vms - in - let vms = - List.sort - (fun (_, vmr1) (_, vmr2) -> - compare vmr1.API.vM_affinity vmr2.API.vM_affinity - ) - vms - in - let vms_names_uuids = - List.map - (fun (vm, vmr) -> (vm, vmr.API.vM_name_label, vmr.API.vM_uuid)) - vms - in - let times = - List.map - (fun (vm, name_label, uuid) -> - debug "Starting VM uuid '%s' (%s)" uuid name_label ; - let result = - time (fun () -> - Client.VM.start ~rpc ~session_id ~vm ~start_paused:false - ~force:false - ) - in - debug "Elapsed time: %f" result ; - result - ) - vms_names_uuids - in - { - resultname= test.testname - ; subtest= subtest_string test.key tag - ; xenrtresult= List.fold_left ( +. ) 0.0 times - ; rawresult= StartTest times - } - ) - tags - -let parallel_with_vms async_op opname n vms rpc session_id test subtest_name = - (* Not starting in affinity order *) - let vms_names_uuids = - List.map (fun (vm, vmr) -> (vm, vmr.API.vM_name_label, vmr.API.vM_uuid)) vms - in - - (* Manage a set of active tasks using the event system. This could be factored out into a more generic - service if/when necessary *) - - (* Start 'n' at a time *) - let active_tasks = ref [] in - let vms_to_start = ref vms_names_uuids in - let vm_to_start_time = Hashtbl.create 10 in - let tasks_to_vm = Hashtbl.create 10 in - let m = Mutex.create () in - let c = Condition.create () in - let results = ref [] in - (* Take a set of tasks which have finished, update structures and return true if there are no more active tasks - left. *) - let process_finished_tasks finished = - let to_delete = ref [] in - let finished = - with_lock m (fun () -> - List.iter - (fun task -> - if List.mem task !active_tasks then ( - ( match Hashtbl.find_opt tasks_to_vm task with - | None -> - debug ~out:stderr - "Ignoring completed task which doesn't correspond to a \ - VM %s" - opname - | Some uuid -> - let started = Hashtbl.find vm_to_start_time uuid in - let time_taken = Unix.gettimeofday () -. started in - results := time_taken :: !results ; - debug "%sing VM uuid '%s'" opname uuid ; - debug "Elapsed time: %f" time_taken ; - Hashtbl.remove vm_to_start_time uuid ; - Hashtbl.remove tasks_to_vm task - ) ; - active_tasks := List.filter (fun x -> x <> task) !active_tasks ; - Condition.signal c ; - to_delete := task :: !to_delete - ) - ) - finished ; - !active_tasks = [] (* true if no active tasks left *) - ) - in - List.iter - (fun task -> Client.Task.destroy ~rpc ~session_id ~self:task) - !to_delete ; - finished - in - (* Run this in a thread body to create a thread which will process each task completion and then terminate when all the - tasks have finished. *) - let check_active_tasks () = - let classes = ["task"] in - finally - (fun () -> - let finished = ref false in - while not !finished do - Client.Event.register ~rpc ~session_id ~classes ; - try - (* Need to check once after registering to avoid a race *) - let finished_tasks = - List.filter - (fun task -> - Client.Task.get_status ~rpc ~session_id ~self:task <> `pending - ) - (with_lock m (fun () -> !active_tasks)) - in - finished := process_finished_tasks finished_tasks ; - while not !finished do - (* debug ~out:stderr "Polling for events (%d active tasks)" (with_lock m (fun () -> List.length !active_tasks)); *) - let events = - Event_types.events_of_rpc (Client.Event.next ~rpc ~session_id) - in - let events = List.map Event_helper.record_of_event events in - let finished_tasks = - List.concat_map - (function - | Event_helper.Task (t, Some t_rec) -> - if - t_rec.API.task_status <> `pending - || t_rec.API.task_current_operations <> [] - then - [t] - else - [] - | Event_helper.Task (t, None) -> - [t] - | _ -> - [] - ) - events - in - - finished := process_finished_tasks finished_tasks - done - with - | Api_errors.Server_error (code, _) - when code = Api_errors.events_lost - -> - debug ~out:stderr "Caught EVENTS_LOST; reregistering" ; - Client.Event.unregister ~rpc ~session_id ~classes - done - ) - (fun () -> Client.Event.unregister ~rpc ~session_id ~classes) - in - let control_task = - Client.Task.create ~rpc ~session_id - ~label:("Parallel VM " ^ opname ^ " test") - ~description:"" - in - active_tasks := [control_task] ; - let thread = Thread.create check_active_tasks () in - while !vms_to_start <> [] do - let start_one () = - let vm, _, uuid = List.hd !vms_to_start in - vms_to_start := List.tl !vms_to_start ; - with_lock m (fun () -> - let task = async_op ~rpc ~session_id ~vm in - debug ~out:stderr "Issued VM %s for '%s'" opname uuid ; - Hashtbl.add tasks_to_vm task uuid ; - Hashtbl.add vm_to_start_time uuid (Unix.gettimeofday ()) ; - active_tasks := task :: !active_tasks - ) - in - (* Only start at most 'n' at once. Note that the active_task list includes a master control task *) - with_lock m (fun () -> - while List.length !active_tasks > n do - Condition.wait c m - done - ) ; - start_one () - done ; - Client.Task.cancel ~rpc ~session_id ~task:control_task ; - debug ~out:stderr "Finished %sing VMs" opname ; - Thread.join thread ; - { - resultname= test.testname - ; subtest= subtest_name - ; xenrtresult= List.fold_left ( +. ) 0.0 !results - ; rawresult= StartTest !results - } - -(** @param n the maximum number of concurrent invocations of async_op *) -let parallel async_op opname n rpc session_id test = - let vms = Client.VM.get_all_records ~rpc ~session_id in - let tags = List.map (fun (_, vmr) -> vmr.API.vM_tags) vms in - let tags = Listext.List.setify (List.concat tags) in - Printf.printf "Tags are [%s]\n%!" (String.concat "; " tags) ; - List.map - (fun tag -> - let vms = - List.filter (fun (_, vmr) -> List.mem tag vmr.API.vM_tags) vms - in - Printf.printf "%sing %d VMs with tag: %s\n%!" opname (List.length vms) tag ; - parallel_with_vms async_op opname n vms rpc session_id test - (subtest_string test.key tag) - ) - tags - -let parallel_startall = - parallel (Client.Async.VM.start ~start_paused:false ~force:false) "start" - -let parallel_stopall = parallel Client.Async.VM.hard_shutdown "stop" - -let stopall rpc session_id test = - let vms = Client.VM.get_all_records ~rpc ~session_id in - let tags = List.map (fun (_, vmr) -> vmr.API.vM_tags) vms in - let tags = Listext.List.setify (List.concat tags) in - List.map - (fun tag -> - debug "Starting VMs with tag: %s" tag ; - let vms = - List.filter (fun (_, vmr) -> List.mem tag vmr.API.vM_tags) vms - in - let vms = - List.sort - (fun (_, vmr1) (_, vmr2) -> - compare vmr1.API.vM_affinity vmr2.API.vM_affinity - ) - vms - in - let vms_names_uuids = - List.map - (fun (vm, vmr) -> (vm, vmr.API.vM_name_label, vmr.API.vM_uuid)) - vms - in - let times = - List.map - (fun (vm, name_label, uuid) -> - debug "Stopping VM uuid '%s' (%s)" uuid name_label ; - let result = - time (fun () -> Client.VM.hard_shutdown ~rpc ~session_id ~vm) - in - debug "Elapsed time: %f" result ; - result - ) - vms_names_uuids - in - { - resultname= test.testname - ; subtest= subtest_string test.key tag - ; xenrtresult= List.fold_left ( +. ) 0.0 times - ; rawresult= ShutdownTest times - } - ) - tags - -let clone num_clones rpc session_id test = - Printf.printf "Doing clone test\n%!" ; - let vms = Client.VM.get_all_records ~rpc ~session_id in - let tags = List.map (fun (_, vmr) -> vmr.API.vM_tags) vms in - let tags = Listext.List.setify (List.concat tags) in - Printf.printf "Tags are [%s]\n%!" (String.concat "; " tags) ; - List.concat_map - (fun tag -> - let vms = - List.filter (fun (_, vmr) -> List.mem tag vmr.API.vM_tags) vms - in - Printf.printf "We've got %d VMs\n%!" (List.length vms) ; - (* Start a thread to clone each one n times *) - let body (vm, vmr, res, clone_refs) = - let name_label = vmr.API.vM_name_label in - Printf.printf "Performing %d clones of '%s' within thread...\n%!" - num_clones name_label ; - for j = 0 to num_clones - 1 do - let result = - time (fun () -> - let clone = - Client.VM.clone ~rpc ~session_id ~vm ~new_name:"clone" - in - clone_refs := clone :: !clone_refs - ) - in - Printf.printf "clone %d of '%s' finished: %f\n%!" j name_label result ; - res := result :: !res - done - in - let threads_and_results = - List.map - (fun (vm, vmr) -> - let res : float list ref = ref [] in - let clones : API.ref_VM list ref = ref [] in - let t = Thread.create body (vm, vmr, res, clones) in - (t, (res, clones)) - ) - vms - in - let threads, times_and_clones = List.split threads_and_results in - let times, clones = List.split times_and_clones in - Printf.printf "Waiting for threads to finish...\n%!" ; - List.iter (fun t -> Thread.join t) threads ; - Printf.printf "Threads have finished\n%!" ; - (* times is a list of (list of floats, each being the time to clone a VM), one per SR *) - let times = List.map (fun x -> !x) times in - Printf.printf "Times are: [%s]\n%!" - (String.concat ", " - (List.map - (fun x -> - Printf.sprintf "[%s]" - (String.concat ", " - (List.map (fun x -> Printf.sprintf "%f" x) x) - ) - ) - times - ) - ) ; - let clones = List.map (fun x -> !x) clones in - (* Output the results for cloning each gold VM as a separate record *) - let results = - List.map - (fun x -> - { - resultname= test.testname - ; subtest= subtest_string test.key tag - ; xenrtresult= List.fold_left ( +. ) 0.0 (List.concat times) - ; rawresult= CloneTest x - } - ) - times - in - (* Best-effort clean-up *) - ignore_exn (fun () -> - Printf.printf "Cleaning up...\n%!" ; - (* Create a thread to clean up each set of clones *) - let threads = - List.mapi - (fun i clones -> - Thread.create - (fun clones -> - List.iteri - (fun j clone -> - Printf.printf "Thread %d destroying VM %d...\n%!" i j ; - let vbds = - Client.VM.get_VBDs ~rpc ~session_id ~self:clone - in - let vdis = - List.map - (fun vbd -> - Client.VBD.get_VDI ~rpc ~session_id ~self:vbd - ) - vbds - in - List.iter - (fun vdi -> - Client.VDI.destroy ~rpc ~session_id ~self:vdi - ) - vdis ; - Client.VM.destroy ~rpc ~session_id ~self:clone - ) - clones - ) - clones - ) - clones - in - Printf.printf "Waiting for clean-up threads to finish...\n%!" ; - List.iter (fun t -> Thread.join t) threads ; - Printf.printf "Clean-up threads have finished\n%!" - ) ; - (* Finally, return the results *) - results - ) - tags - -let recordssize rpc session_id test = - let doxmlrpctest (subtestname, testfn) = - testfn () ; - let res = Int64.to_float !Http_client.last_content_length in - { - resultname= test.testname - ; subtest= subtestname - ; xenrtresult= res - ; rawresult= SizeTest res - } - in - List.map doxmlrpctest - [ - ( "VM records" - , fun () -> ignore (Client.VM.get_all_records ~rpc ~session_id) - ) - ; ( "VBD records" - , fun () -> ignore (Client.VBD.get_all_records ~rpc ~session_id) - ) - ; ( "VIF records" - , fun () -> ignore (Client.VIF.get_all_records ~rpc ~session_id) - ) - ; ( "VDI records" - , fun () -> ignore (Client.VDI.get_all_records ~rpc ~session_id) - ) - ; ( "SR records" - , fun () -> ignore (Client.SR.get_all_records ~rpc ~session_id) - ) - ] - -let tests key = - [ - {run= true; key; testname= "clone"; func= clone 200} - ; {run= true; key; testname= "startall"; func= startall} - ; {run= true; key; testname= "recordssize"; func= recordssize} - ; {run= true; key; testname= "stopall"; func= stopall} - ; {run= false; key; testname= "parallel_startall"; func= parallel_startall 10} - ; {run= false; key; testname= "parallel_stopall"; func= parallel_stopall 10} - ] - -let testnames = List.map (fun t -> t.testname) (tests "") - -let runtestnames = - List.map (fun t -> t.testname) (List.filter (fun t -> t.run) (tests "")) - -let runone rpc session_id test = - debug "Running test: %s" test.testname ; - let results = test.func rpc session_id test in - debug "Finished: Results=[%s]" - (String.concat "; " - (List.map - (fun result -> - Printf.sprintf "subtest '%s': %f" result.subtest result.xenrtresult - ) - results - ) - ) ; - results - -let run rpc session_id key run_all iter = - let tests = - if run_all then - tests key - else - List.filter (fun t -> t.run) (tests key) - in - let rec iter_tests n = - if n = 1 then - tests - else - tests @ iter_tests (n - 1) - in - List.fold_left - (fun acc test -> runone rpc session_id test @ acc) - [] (iter_tests iter) diff --git a/ocaml/perftest/testtypes.ml b/ocaml/perftest/testtypes.ml deleted file mode 100644 index 4635c11b898..00000000000 --- a/ocaml/perftest/testtypes.ml +++ /dev/null @@ -1,49 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -(* Test results *) - -type resultdata = - | StartTest of float list - | SizeTest of float - | ShutdownTest of float list - | CloneTest of float list - -(* one float list per gold VM cloned *) - -type result = { - resultname: string - ; subtest: string - ; xenrtresult: float - ; rawresult: resultdata (* Specific to the actual test *) -} - -let header = "RAW" - -let sep = ':' - -let to_string (results : result list) = - Printf.sprintf "%s%c%s" header sep - (Marshal.to_string results [Marshal.No_sharing]) - -let from_string s : result list option = - let open Xapi_stdext_std.Xstringext.String in - if startswith header s then - match split ~limit:2 sep s with - | [_; r] -> - Some (Marshal.from_string r 0) - | _ -> - None - else - None From 2b02240c32dc9e2723d44b9d58bd8f624c464c52 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 4 Dec 2024 11:30:23 +0000 Subject: [PATCH 64/86] Remove references to perftest Signed-off-by: Rob Hoes --- ocaml/libs/http-lib/http_client.ml | 6 ------ ocaml/libs/http-lib/http_client.mli | 3 --- quality-gate.sh | 6 +++--- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/ocaml/libs/http-lib/http_client.ml b/ocaml/libs/http-lib/http_client.ml index 5cb67212bcc..fed8f5f7780 100644 --- a/ocaml/libs/http-lib/http_client.ml +++ b/ocaml/libs/http-lib/http_client.ml @@ -200,9 +200,6 @@ let response_of_fd ?(use_fastpath = false) fd = __FUNCTION__ (Printexc.to_string e) ; None -(** See perftest/tests.ml *) -let last_content_length = ref 0L - let http_rpc_recv_response use_fastpath error_msg fd = match response_of_fd ~use_fastpath fd with | None -> @@ -212,9 +209,6 @@ let http_rpc_recv_response use_fastpath error_msg fd = | ("401" | "403" | "500") as http_code -> raise (Http_error (http_code, error_msg)) | "200" -> - Option.iter - (fun x -> last_content_length := x) - response.Http.Response.content_length ; response | code -> raise (Http_request_rejected (Printf.sprintf "%s: %s" code error_msg)) diff --git a/ocaml/libs/http-lib/http_client.mli b/ocaml/libs/http-lib/http_client.mli index 68d65649e3c..3d9b6591d5f 100644 --- a/ocaml/libs/http-lib/http_client.mli +++ b/ocaml/libs/http-lib/http_client.mli @@ -40,6 +40,3 @@ val rpc : (** [rpc fd request body f] marshals the HTTP request represented by [request] and [body] through file descriptor [fd] and then applies the response to [f]. On failure an exception is thrown. *) - -val last_content_length : int64 ref -(** See perftest/tests.ml *) diff --git a/quality-gate.sh b/quality-gate.sh index b72ca099aa7..6255ce0575e 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -26,9 +26,9 @@ verify-cert () { mli-files () { N=497 - # do not count ml files from the tests in ocaml/{tests/perftest/quicktest} - MLIS=$(git ls-files -- '**/*.mli' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) - MLS=$(git ls-files -- '**/*.ml' | grep -vE "ocaml/tests|ocaml/perftest|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) + # do not count ml files from the tests in ocaml/{tests/quicktest} + MLIS=$(git ls-files -- '**/*.mli' | grep -vE "ocaml/tests|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) + MLS=$(git ls-files -- '**/*.ml' | grep -vE "ocaml/tests|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) num_mls_without_mlis=$(comm -23 <(sort <<<"$MLS") <(sort <<<"$MLIS") | wc -l) if [ "$num_mls_without_mlis" -eq "$N" ]; then echo "OK counted $num_mls_without_mlis .ml files without an .mli" From fd011dd8f1b3a82bf747cfe9f97f13cb1da414cf Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 4 Dec 2024 11:59:32 +0000 Subject: [PATCH 65/86] Update quality-gate Signed-off-by: Rob Hoes --- quality-gate.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/quality-gate.sh b/quality-gate.sh index 6255ce0575e..a7ffefea72b 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=293 + N=277 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" @@ -14,7 +14,7 @@ list-hd () { } verify-cert () { - N=14 + N=13 NONE=$(git grep -r --count 'verify_cert:None' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$NONE" -eq "$N" ]; then echo "OK counted $NONE usages of verify_cert:None" @@ -106,7 +106,7 @@ unixgetenv () { } hashtblfind () { - N=36 + N=35 # Looks for all .ml files except the ones using Core.Hashtbl.find, # which already returns Option HASHTBLFIND=$(git grep -P -r --count 'Hashtbl.find(?!_opt)' -- '**/*.ml' ':!ocaml/xapi-storage-script/main.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) From 03fccc872ff6d894012b1454382dccad44d5e1e6 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 4 Dec 2024 16:28:34 +0000 Subject: [PATCH 66/86] CA-401075: remove misleading logs from HTTP client Recent changes to Http_client were made to log errors and backtraces in case of unusual errors, in order to better diagnose problems. Unfortunately, this also led to benign exceptions being logged, which caused people to suspect problems where the exception was actually "expected". In particular the following error started appearing in the logs many times: Raised Http_client.Parse_error("Expected initial header []") The case here is that the client has sent an HTTP request, but the server disconnects before sending a response. It turns out that this happens commonly when an external connection is received by xapi and handed over to another process, such as xcp-rrdd. The connection's file descriptor is passed to the other process, and the original HTTP request is forwarded over a local socket. The other process continues to handle the request and sends responses to the forwarded socket, but never to the local socket, where xapi is waiting for the response. This is a quick change that makes the caller of the receive function aware that no response was sent, without logging scary errors, to fix the immediate problem. In addition, we should enhance the handover process to actually send responses, although funcionally this is not an issue. Signed-off-by: Rob Hoes --- ocaml/libs/http-lib/http_client.ml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ocaml/libs/http-lib/http_client.ml b/ocaml/libs/http-lib/http_client.ml index 5cb67212bcc..6f1831fb855 100644 --- a/ocaml/libs/http-lib/http_client.ml +++ b/ocaml/libs/http-lib/http_client.ml @@ -119,6 +119,8 @@ let response_of_fd_exn_slow fd = ; additional_headers= !headers ; body= None } + | [] -> + raise End_of_file | _ -> error "Failed to parse HTTP response status line [%s]" line ; raise (Parse_error (Printf.sprintf "Expected initial header [%s]" line)) @@ -192,6 +194,9 @@ let response_of_fd ?(use_fastpath = false) fd = with | Unix.Unix_error (_, _, _) as e -> raise e + | End_of_file -> + info "No response: connection closed by server" ; + None | e -> Backtrace.is_important e ; let bt = Backtrace.get e in From efff0953c92bca3e641607d7ffc6c04788cdd84b Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Wed, 4 Dec 2024 17:10:54 +0000 Subject: [PATCH 67/86] CP-52807: No more cluster stack alert As per the new design, we are no longer telling user that their cluster stack version is out of date, since the corosync upgrade will be part of the compulsory actions enforced by XC upgrade wizard and the host installer. Signed-off-by: Vincent Liu --- ocaml/xapi-consts/api_messages.ml | 2 -- ocaml/xapi/xapi_clustering.ml | 59 ++----------------------------- 2 files changed, 3 insertions(+), 58 deletions(-) diff --git a/ocaml/xapi-consts/api_messages.ml b/ocaml/xapi-consts/api_messages.ml index 8e2ed38df7f..812340d1040 100644 --- a/ocaml/xapi-consts/api_messages.ml +++ b/ocaml/xapi-consts/api_messages.ml @@ -311,8 +311,6 @@ let cluster_host_leaving = addMessage "CLUSTER_HOST_LEAVING" 3L let cluster_host_joining = addMessage "CLUSTER_HOST_JOINING" 4L -let cluster_stack_out_of_date = addMessage "CLUSTER_STACK_OUT_OF_DATE" 3L - (* Certificate expiration messages *) let host_server_certificate_expiring = "HOST_SERVER_CERTIFICATE_EXPIRING" diff --git a/ocaml/xapi/xapi_clustering.ml b/ocaml/xapi/xapi_clustering.ml index d2b61be2f55..4bef40e3d4d 100644 --- a/ocaml/xapi/xapi_clustering.ml +++ b/ocaml/xapi/xapi_clustering.ml @@ -562,8 +562,6 @@ module Watcher = struct let finish_watch = Atomic.make false - let cluster_stack_watcher : bool Atomic.t = Atomic.make false - (* This function exists to store the fact that the watcher should be destroyed, to avoid the race that the cluster is destroyed, while the watcher is still waiting/stabilising. @@ -632,41 +630,6 @@ module Watcher = struct () done - let watch_cluster_stack_version ~__context ~host = - match find_cluster_host ~__context ~host with - | Some ch -> - let cluster_ref = Db.Cluster_host.get_cluster ~__context ~self:ch in - let cluster_rec = Db.Cluster.get_record ~__context ~self:cluster_ref in - if - Cluster_stack.of_version - ( cluster_rec.API.cluster_cluster_stack - , cluster_rec.API.cluster_cluster_stack_version - ) - = Cluster_stack.Corosync2 - then ( - debug "%s: Detected Corosync 2 running as cluster stack" __FUNCTION__ ; - let body = - "The current cluster stack version of Corosync 2 is out of date, \ - consider updating to Corosync 3" - in - let name, priority = Api_messages.cluster_stack_out_of_date in - let host_uuid = Db.Host.get_uuid ~__context ~self:host in - - Helpers.call_api_functions ~__context (fun rpc session_id -> - let _ : [> `message] Ref.t = - Client.Client.Message.create ~rpc ~session_id ~name ~priority - ~cls:`Host ~obj_uuid:host_uuid ~body - in - () - ) - ) else - debug - "%s: Detected Corosync 3 as cluster stack, not generating a \ - warning messsage" - __FUNCTION__ - | None -> - debug "%s: No cluster host, no need to watch" __FUNCTION__ - (** [create_as_necessary] will create cluster watchers on the coordinator if they are not already created. There is no need to destroy them: once the clustering daemon is disabled, @@ -674,7 +637,7 @@ module Watcher = struct let create_as_necessary ~__context ~host = let is_master = Helpers.is_pool_master ~__context ~host in let daemon_enabled = Daemon.is_enabled () in - if is_master && daemon_enabled then ( + if is_master && daemon_enabled then if Atomic.compare_and_set cluster_change_watcher false true then ( debug "%s: create watcher for corosync-notifyd on coordinator" __FUNCTION__ ; @@ -687,24 +650,8 @@ module Watcher = struct (* someone else must have gone into the if branch above and created the thread before us, leave it to them *) debug "%s: not create watcher for corosync-notifyd as it already exists" - __FUNCTION__ ; - - if Xapi_cluster_helpers.corosync3_enabled ~__context then - if Atomic.compare_and_set cluster_stack_watcher false true then ( - debug - "%s: create cluster stack watcher for out-of-date cluster stack \ - (corosync2)" - __FUNCTION__ ; - let _ : Thread.t = - Thread.create - (fun () -> watch_cluster_stack_version ~__context ~host) - () - in - () - ) else - debug "%s: not create watcher for cluster stack as it already exists" - __FUNCTION__ - ) else + __FUNCTION__ + else debug "%s not create watcher because we are %b master and clustering is \ enabled %b " From 92114cd8c51968aeb264da0412fba5929ce1fa37 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Sun, 1 Dec 2024 13:24:56 +0000 Subject: [PATCH 68/86] Rewrite Delay module Old implementation had different issues. Just use mutexes and conditions using C stubs. Current Ocaml Condition module does not have support for timeout waiting for conditions, so use C stubs. This implementation does not require opening a pipe. This reduces the possibilities of errors calling "wait" to zero. Mostly of the times it does not require kernel calls. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/delay_stubs.c | 169 ++++++++++++++++++ .../xapi-stdext/lib/xapi-stdext-threads/dune | 5 + .../lib/xapi-stdext-threads/threadext.ml | 76 ++------ .../lib/xapi-stdext-threads/threadext.mli | 6 +- 4 files changed, 195 insertions(+), 61 deletions(-) create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/delay_stubs.c diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/delay_stubs.c b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/delay_stubs.c new file mode 100644 index 00000000000..05138c263d9 --- /dev/null +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/delay_stubs.c @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2024 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include +#include +#include +#include + +#include +#include +#include + +typedef struct delay { + pthread_mutex_t mtx; + pthread_cond_t cond; + bool signaled; +} delay; + +// Initialize delay +// Returns error number or 0 if success +static int delay_init(delay *d) +{ + int err; + pthread_condattr_t cond_attr; + + d->signaled = false; + + err = pthread_condattr_init(&cond_attr); + if (err) + goto err0; + err = pthread_condattr_setclock(&cond_attr, CLOCK_MONOTONIC); + if (!err) + err = pthread_cond_init(&d->cond, &cond_attr); + if (err) + goto err1; + err = pthread_mutex_init(&d->mtx, NULL); + if (err) + goto err2; + pthread_condattr_destroy(&cond_attr); + return 0; + +err2: + pthread_cond_destroy(&d->cond); +err1: + pthread_condattr_destroy(&cond_attr); +err0: + return err; +} + +static void delay_destroy(delay *d) +{ + pthread_cond_destroy(&d->cond); + pthread_mutex_destroy(&d->mtx); +} + +static void delay_signal(delay *d) +{ + // there are quite some chances lock is not held + if (pthread_mutex_trylock(&d->mtx) == 0) { + d->signaled = true; + pthread_cond_signal(&d->cond); + pthread_mutex_unlock(&d->mtx); + return; + } + + // slow way, release engine + caml_release_runtime_system(); + pthread_mutex_lock(&d->mtx); + d->signaled = true; + pthread_cond_signal(&d->cond); + pthread_mutex_unlock(&d->mtx); + caml_acquire_runtime_system(); +} + +// Wait for deadline or signal. +// Returns error number or 0 if success. +// Error can be ETIMEDOUT. +int delay_wait(delay *d, const struct timespec *deadline) +{ + int err; + + caml_release_runtime_system(); + pthread_mutex_lock(&d->mtx); + do { + if (d->signaled) { + d->signaled = false; + err = 0; + break; + } + err = pthread_cond_timedwait(&d->cond, &d->mtx, deadline); + } while (err == 0); + pthread_mutex_unlock(&d->mtx); + caml_acquire_runtime_system(); + return err; +} + +#define delay_val(v) (*((delay **)Data_custom_val(v))) + +static void delay_finalize(value v_delay) +{ + delay *d = delay_val(v_delay); + delay_destroy(d); + caml_stat_free(d); +} + +static struct custom_operations delay_ops = { + "xapi.delay", + delay_finalize, + custom_compare_default, + custom_hash_default, + custom_serialize_default, + custom_deserialize_default, + custom_compare_ext_default, + custom_fixed_length_default +}; + +CAMLprim value caml_xapi_delay_create(value v_unit) +{ + CAMLparam1(v_unit); + CAMLlocal1(res); + delay *d; + int err; + + d = caml_stat_alloc(sizeof(*d)); + err = delay_init(d); + if (err) { + caml_stat_free(d); + unix_error(err, "caml_delay_create", Nothing); + } + res = caml_alloc_custom(&delay_ops, sizeof(delay *), 0, 1); + delay_val(res) = d; + CAMLreturn(res); +} + +CAMLprim value caml_xapi_delay_signal(value v_delay) +{ + CAMLparam1(v_delay); + delay *d = delay_val(v_delay); + delay_signal(d); + CAMLreturn(Val_unit); +} + +CAMLprim value caml_xapi_delay_wait(value v_delay, value v_deadline) +{ + CAMLparam2(v_delay, v_deadline); + delay *d = delay_val(v_delay); + uint64_t deadline = (uint64_t) Int64_val(v_deadline); + struct timespec ts = { + deadline / 1000000000u, + deadline % 1000000000u + }; + + int err = delay_wait(d, &ts); + if (err != 0 && err != ETIMEDOUT) + unix_error(err, "caml_delay_wait", Nothing); + + CAMLreturn(err ? Val_true : Val_false); +} diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index e0437e881ab..7fcff9e08c2 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -3,10 +3,15 @@ (name xapi_stdext_threads) (modules :standard \ ipq scheduler threadext_test ipq_test) (libraries + mtime + mtime.clock.os threads.posix unix xapi-stdext-unix xapi-stdext-pervasives) + (foreign_stubs + (language c) + (names delay_stubs)) ) (library diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml index 311d985ca69..b954a159ddb 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml @@ -55,69 +55,27 @@ let thread_iter f xs = match thread_iter_all_exns f xs with [] -> () | (_, e) :: _ -> raise e module Delay = struct - (* Concrete type is the ends of a pipe *) - type t = { - (* A pipe is used to wake up a thread blocked in wait: *) - mutable pipe_in: Unix.file_descr option - ; (* Indicates that a signal arrived before a wait: *) - mutable signalled: bool - ; m: M.t - } + type t - let make () = {pipe_in= None; signalled= false; m= M.create ()} + external make : unit -> t = "caml_xapi_delay_create" - exception Pre_signalled + external signal : t -> unit = "caml_xapi_delay_signal" - let wait (x : t) (seconds : float) = - let to_close = ref [] in - let close' fd = - if List.mem fd !to_close then Unix.close fd ; - to_close := List.filter (fun x -> fd <> x) !to_close - in - finally - (fun () -> - try - let pipe_out = - Mutex.execute x.m (fun () -> - if x.signalled then ( - x.signalled <- false ; - raise Pre_signalled - ) ; - let pipe_out, pipe_in = Unix.pipe () in - (* these will be unconditionally closed on exit *) - to_close := [pipe_out; pipe_in] ; - x.pipe_in <- Some pipe_in ; - x.signalled <- false ; - pipe_out - ) - in - let open Xapi_stdext_unix.Unixext in - (* flush the single byte from the pipe *) - try - let (_ : string) = - time_limited_single_read pipe_out 1 ~max_wait:seconds - in - false - with Timeout -> true - (* return true if we waited the full length of time, false if we were woken *) - with Pre_signalled -> false - ) - (fun () -> - Mutex.execute x.m (fun () -> - x.pipe_in <- None ; - List.iter close' !to_close - ) - ) + external wait : t -> int64 -> bool = "caml_xapi_delay_wait" - let signal (x : t) = - Mutex.execute x.m (fun () -> - match x.pipe_in with - | Some fd -> - ignore (Unix.write fd (Bytes.of_string "X") 0 1) - | None -> - x.signalled <- true - (* If the wait hasn't happened yet then store up the signal *) - ) + let wait d t = + if t <= 0. then + true + else + match Mtime.Span.of_float_ns (t *. 1e9) with + | Some span -> + let now = Mtime_clock.now () in + let deadline = + Mtime.add_span now span |> Option.value ~default:Mtime.max_stamp + in + wait d (Mtime.to_uint64_ns deadline) + | None -> + invalid_arg "Time specified too big" end let wait_timed_read fd timeout = diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli index b5edcff21b8..a1af35ccbeb 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli @@ -31,8 +31,10 @@ module Delay : sig val wait : t -> float -> bool (** Blocks the calling thread for a given period of time with the option of returning early if someone calls 'signal'. Returns true if the full time - period elapsed and false if signalled. Note that multple 'signals' are - coalesced; 'signals' sent before 'wait' is called are not lost. *) + period elapsed and false if signalled. Note that multiple 'signals' are + coalesced; 'signals' sent before 'wait' is called are not lost. + Only one thread should call 'wait' for a given 'Delay', attempts + to call from multiple thread is an undefined behaviour. *) val signal : t -> unit (** Sends a signal to a waiting thread. See 'wait' *) From d3c9a5010fa2a3746b68f6d10681b00965570e65 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Thu, 5 Dec 2024 13:57:59 +0000 Subject: [PATCH 69/86] CA-394851: Update allowed operations on the cloned VBD Default empty allowed_operations on a cloned VBD means that XenCenter does not display the DVD option in the console tab for VMs cloned from templates, for example. Follow the practice in xapi_vbd, and update_allowed_operations immediately after Db.VBD.create. Signed-off-by: Andrii Sultanov --- ocaml/xapi/xapi_vbd_helpers.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xapi/xapi_vbd_helpers.ml b/ocaml/xapi/xapi_vbd_helpers.ml index f6b1cc260e7..a63fa6edf1f 100644 --- a/ocaml/xapi/xapi_vbd_helpers.ml +++ b/ocaml/xapi/xapi_vbd_helpers.ml @@ -443,4 +443,5 @@ let copy ~__context ?vdi ~vm vbd = ~qos_algorithm_type:all.API.vBD_qos_algorithm_type ~qos_algorithm_params:all.API.vBD_qos_algorithm_params ~qos_supported_algorithms:[] ~runtime_properties:[] ~metrics ; + update_allowed_operations ~__context ~self:new_vbd ; new_vbd From 1586c747cab62ce1cfb7796cbc01f02f5a2548f0 Mon Sep 17 00:00:00 2001 From: Lunfan Zhang Date: Mon, 25 Nov 2024 09:20:09 +0000 Subject: [PATCH 70/86] CP-51429 Avoid redundant processing when full metadata already exists during sync_updates Signed-off-by: Lunfan Zhang --- ocaml/xapi/repository.ml | 12 +++++++++++- ocaml/xapi/repository.mli | 2 +- ocaml/xapi/xapi_pool.ml | 6 +++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index dd123557a49..8024818d4d9 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -25,6 +25,8 @@ module Pkgs = (val Pkg_mgr.get_pkg_mgr) let capacity_in_parallel = 16 +let ( // ) = Filename.concat + (* The cache below is protected by pool's current_operations locking mechanism *) let updates_in_cache : (API.ref_host, Yojson.Basic.t) Hashtbl.t = Hashtbl.create 64 @@ -201,7 +203,15 @@ let sync ~__context ~self ~token ~token_id = * I.E. proxy username/password and temporary token file path. *) write_initial_yum_config () - ) + ) ; + (* The custom yum-utils will fully download repository metadata.*) + let repodata_dir = + !Xapi_globs.local_pool_repo_dir + // repo_name + // "repodata" + // "repomd.xml.asc" + in + Sys.file_exists repodata_dir with e -> error "Failed to sync with remote YUM repository: %s" (ExnHelper.string_of_exn e) ; diff --git a/ocaml/xapi/repository.mli b/ocaml/xapi/repository.mli index e7bddad8bad..81e95730ac9 100644 --- a/ocaml/xapi/repository.mli +++ b/ocaml/xapi/repository.mli @@ -40,7 +40,7 @@ val sync : -> self:[`Repository] API.Ref.t -> token:string -> token_id:string - -> unit + -> bool val create_pool_repository : __context:Context.t -> self:[`Repository] API.Ref.t -> unit diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 3a7dee78735..cfe1f5e3234 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -3534,10 +3534,10 @@ let sync_repos ~__context ~self ~repos ~force ~token ~token_id = repos |> List.iter (fun repo -> if force then cleanup_pool_repo ~__context ~self:repo ; - sync ~__context ~self:repo ~token ~token_id ; - (* Dnf sync all the metadata including updateinfo, + let complete = sync ~__context ~self:repo ~token ~token_id in + (* Dnf and custom yum-utils sync all the metadata including updateinfo, * Thus no need to re-create pool repository *) - if Pkgs.manager = Yum then + if Pkgs.manager = Yum && complete = false then create_pool_repository ~__context ~self:repo ) ; let checksum = set_available_updates ~__context in From f936acbd933749ebf9d5a1244e01e6350d694253 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Fri, 6 Dec 2024 14:32:35 +0000 Subject: [PATCH 71/86] Delay: wait a bit more testing the module In some environments the time ranges checked are too strict causing test to fail. Previously the maximum error accepted was 10 ms, increase to 50 ms. Also increase timeouts to reduce error/value ratio. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/threadext_test.ml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml index 0442e302ab7..b93df9f47a8 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml @@ -31,7 +31,7 @@ Single simple signal stored let simple () = let d = Delay.make () in Delay.signal d ; - delay_wait_check ~min:0. ~max:0.01 d 1.0 false + delay_wait_check ~min:0. ~max:0.05 d 1.0 false (* No signal @@ -39,7 +39,7 @@ No signal *) let no_signal () = let d = Delay.make () in - delay_wait_check ~min:0.1 ~max:0.11 d 0.1 true + delay_wait_check ~min:0.2 ~max:0.25 d 0.2 true (* Signal twice, collapsed @@ -52,8 +52,8 @@ let collapsed () = let d = Delay.make () in Delay.signal d ; Delay.signal d ; - delay_wait_check ~min:0. ~max:0.01 d 0.1 false ; - delay_wait_check ~min:0.1 ~max:0.11 d 0.1 true + delay_wait_check ~min:0. ~max:0.05 d 0.2 false ; + delay_wait_check ~min:0.2 ~max:0.25 d 0.2 true (* Signal from another thread @@ -62,8 +62,8 @@ Signal from another thread *) let other_thread () = let d = Delay.make () in - let th = Thread.create (fun d -> Thread.delay 0.1 ; Delay.signal d) d in - delay_wait_check ~min:0.1 ~max:0.11 d 1.0 false ; + let th = Thread.create (fun d -> Thread.delay 0.2 ; Delay.signal d) d in + delay_wait_check ~min:0.2 ~max:0.25 d 1.0 false ; Thread.join th let tests = From 3476a22562171cbb79de61359f11d627abe2586c Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Thu, 28 Nov 2024 14:47:51 +0000 Subject: [PATCH 72/86] Simple test for periodic scheduler Test that the event is correctly executed. Signed-off-by: Frediano Ziglio --- .../xapi-stdext/lib/xapi-stdext-threads/dune | 6 +-- .../lib/xapi-stdext-threads/scheduler_test.ml | 37 +++++++++++++++++++ .../xapi-stdext-threads/scheduler_test.mli | 0 3 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml create mode 100644 ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.mli diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index 7fcff9e08c2..d8036380cd7 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -1,7 +1,7 @@ (library (public_name xapi-stdext-threads) (name xapi_stdext_threads) - (modules :standard \ ipq scheduler threadext_test ipq_test) + (modules :standard \ ipq scheduler threadext_test ipq_test scheduler_test) (libraries mtime mtime.clock.os @@ -22,8 +22,8 @@ ) (tests - (names threadext_test ipq_test) + (names threadext_test ipq_test scheduler_test) (package xapi-stdext-threads) - (modules threadext_test ipq_test) + (modules threadext_test ipq_test scheduler_test) (libraries xapi_stdext_threads alcotest mtime.clock.os mtime fmt threads.posix xapi_stdext_threads_scheduler) ) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml new file mode 100644 index 00000000000..272b0572943 --- /dev/null +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml @@ -0,0 +1,37 @@ +(* + * Copyright (C) 2024 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module Scheduler = Xapi_stdext_threads_scheduler.Scheduler + +let started = Atomic.make false + +let start_schedule () = + if not (Atomic.exchange started true) then + Thread.create Scheduler.loop () |> ignore + +let send event data = Event.(send event data |> sync) + +let receive event = Event.(receive event |> sync) + +let test_single () = + let finished = Event.new_channel () in + Scheduler.add_to_queue "one" Scheduler.OneShot 0.001 (fun () -> + send finished true + ) ; + start_schedule () ; + Alcotest.(check bool) "result" true (receive finished) + +let tests = [("test_single", `Quick, test_single)] + +let () = Alcotest.run "Scheduler" [("generic", tests)] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.mli new file mode 100644 index 00000000000..e69de29bb2d From 624926133b9b821391c2f5cfcbde72ed35afdb87 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Fri, 6 Dec 2024 09:56:53 +0000 Subject: [PATCH 73/86] Limit mutex contention in add_to_queue Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/scheduler.ml | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index 3e8543ec04d..50c4c17d4b9 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -49,14 +49,11 @@ module Clock = struct end let add_to_queue ?(signal = true) name ty start newfunc = - with_lock lock (fun () -> - let ( ++ ) = Clock.add_span in - Ipq.add queue - { - Ipq.ev= {func= newfunc; ty; name} - ; Ipq.time= Mtime_clock.now () ++ start - } - ) ; + let ( ++ ) = Clock.add_span in + let item = + {Ipq.ev= {func= newfunc; ty; name}; Ipq.time= Mtime_clock.now () ++ start} + in + with_lock lock (fun () -> Ipq.add queue item) ; if signal then Delay.signal delay let remove_from_queue name = From f86c07666fd098b471511cd742c6bb08b8e21514 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Fri, 6 Dec 2024 10:05:22 +0000 Subject: [PATCH 74/86] Compare correctly Mtime.t Do not use ">" or other operators to compare Mtime.t, the value is intended to be unsigned and should be compared with Int64.unsigned_compare as Mtime functions do. Signed-off-by: Frediano Ziglio --- ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index 50c4c17d4b9..2e0f28f8800 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -72,7 +72,7 @@ let loop () = else let next = with_lock lock (fun () -> Ipq.maximum queue) in let now = Mtime_clock.now () in - if next.Ipq.time < now then ( + if Mtime.is_earlier next.Ipq.time ~than:now then ( let todo = (with_lock lock (fun () -> Ipq.pop_maximum queue)).Ipq.ev in From 2950dd91f171b1be1297e446a8585fa1a1e10555 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Fri, 6 Dec 2024 10:10:19 +0000 Subject: [PATCH 75/86] Protect queue with mutex in remove_from_queue Signed-off-by: Frediano Ziglio --- ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index 2e0f28f8800..03ee8ef976e 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -57,6 +57,7 @@ let add_to_queue ?(signal = true) name ty start newfunc = if signal then Delay.signal delay let remove_from_queue name = + with_lock lock @@ fun () -> let index = Ipq.find_p queue (fun {name= n; _} -> name = n) in if index > -1 then Ipq.remove queue index From 529eeaa98c9d225ac9a49c70094acfde4b6f62c7 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Fri, 6 Dec 2024 10:17:45 +0000 Subject: [PATCH 76/86] Remove signal parameter from add_to_queue The parameter was false only to support an internal usage, external users should always alert the thread loop. Signed-off-by: Frediano Ziglio --- .../libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml | 8 ++++++-- .../xapi-stdext/lib/xapi-stdext-threads/scheduler.mli | 3 +-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index 03ee8ef976e..a8c56dc47e8 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -48,7 +48,7 @@ module Clock = struct Mtime.min_stamp end -let add_to_queue ?(signal = true) name ty start newfunc = +let add_to_queue_internal ?(signal = true) name ty start newfunc = let ( ++ ) = Clock.add_span in let item = {Ipq.ev= {func= newfunc; ty; name}; Ipq.time= Mtime_clock.now () ++ start} @@ -56,6 +56,9 @@ let add_to_queue ?(signal = true) name ty start newfunc = with_lock lock (fun () -> Ipq.add queue item) ; if signal then Delay.signal delay +let add_to_queue name ty start newfunc = + add_to_queue_internal name ty start newfunc + let remove_from_queue name = with_lock lock @@ fun () -> let index = Ipq.find_p queue (fun {name= n; _} -> name = n) in @@ -82,7 +85,8 @@ let loop () = | OneShot -> () | Periodic timer -> - add_to_queue ~signal:false todo.name todo.ty timer todo.func + add_to_queue_internal ~signal:false todo.name todo.ty timer + todo.func ) else (* Sleep until next event. *) let sleep = Mtime.(span next.Ipq.time now) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli index b087a35c5cf..d4d19b1f790 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.mli @@ -18,8 +18,7 @@ type func_ty = | OneShot (** Fire just once *) | Periodic of float (** Fire periodically with a given period in seconds *) -val add_to_queue : - ?signal:bool -> string -> func_ty -> float -> (unit -> unit) -> unit +val add_to_queue : string -> func_ty -> float -> (unit -> unit) -> unit (** Start a new timer. *) val remove_from_queue : string -> unit From 2c192c955825b7f8833c2e2565cc02fc887f44a6 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Tue, 26 Nov 2024 22:30:31 +0000 Subject: [PATCH 77/86] Fix multiple issues in periodic scheduler - Do not wait huge amount of time if the queue is empty but use Delay.wait if possible; - Fix delete of periodic events. In case the event is processed it's removed from the queue. Previously remove_from_queue was not able to mark this event as removed; - Do not race between checking the first event and removing it. These 2 actions were done in 2 separate critical section, now they are done in a single one. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/scheduler.ml | 79 ++++++++++++------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index a8c56dc47e8..a544ed79bbb 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -27,6 +27,8 @@ let delay = Delay.make () let queue_default = {func= (fun () -> ()); ty= OneShot; name= ""} +let (pending_event : t option ref) = ref None + let (queue : t Ipq.t) = Ipq.create 50 queue_default let lock = Mutex.create () @@ -48,50 +50,68 @@ module Clock = struct Mtime.min_stamp end -let add_to_queue_internal ?(signal = true) name ty start newfunc = +let add_to_queue name ty start newfunc = let ( ++ ) = Clock.add_span in let item = {Ipq.ev= {func= newfunc; ty; name}; Ipq.time= Mtime_clock.now () ++ start} in with_lock lock (fun () -> Ipq.add queue item) ; - if signal then Delay.signal delay - -let add_to_queue name ty start newfunc = - add_to_queue_internal name ty start newfunc + Delay.signal delay let remove_from_queue name = with_lock lock @@ fun () -> - let index = Ipq.find_p queue (fun {name= n; _} -> name = n) in - if index > -1 then - Ipq.remove queue index + match !pending_event with + | Some ev when ev.name = name -> + pending_event := None + | Some _ | None -> + let index = Ipq.find_p queue (fun {name= n; _} -> name = n) in + if index > -1 then + Ipq.remove queue index + +let add_periodic_pending () = + with_lock lock @@ fun () -> + match !pending_event with + | Some ({ty= Periodic timer; _} as ev) -> + let ( ++ ) = Clock.add_span in + let item = {Ipq.ev; Ipq.time= Mtime_clock.now () ++ timer} in + Ipq.add queue item ; + pending_event := None + | Some {ty= OneShot; _} -> + pending_event := None + | None -> + () let loop () = debug "%s started" __MODULE__ ; try while true do - let empty = with_lock lock (fun () -> Ipq.is_empty queue) in - if empty then - Thread.delay 10.0 - (* Doesn't happen often - the queue isn't usually empty *) - else - let next = with_lock lock (fun () -> Ipq.maximum queue) in - let now = Mtime_clock.now () in - if Mtime.is_earlier next.Ipq.time ~than:now then ( - let todo = - (with_lock lock (fun () -> Ipq.pop_maximum queue)).Ipq.ev - in + let now = Mtime_clock.now () in + let deadline, item = + with_lock lock @@ fun () -> + (* empty: wait till we get something *) + if Ipq.is_empty queue then + (Clock.add_span now 10.0, None) + else + let next = Ipq.maximum queue in + if Mtime.is_later next.Ipq.time ~than:now then + (* not expired: wait till time or interrupted *) + (next.Ipq.time, None) + else ( + (* remove expired item *) + Ipq.pop_maximum queue |> ignore ; + (* save periodic to be scheduled again *) + if next.Ipq.ev.ty <> OneShot then pending_event := Some next.Ipq.ev ; + (now, Some next.Ipq.ev) + ) + in + match item with + | Some todo -> (try todo.func () with _ -> ()) ; - match todo.ty with - | OneShot -> - () - | Periodic timer -> - add_to_queue_internal ~signal:false todo.name todo.ty timer - todo.func - ) else (* Sleep until next event. *) + add_periodic_pending () + | None -> ( + (* Sleep until next event. *) let sleep = - Mtime.(span next.Ipq.time now) - |> Mtime.Span.(add ms) - |> Clock.span_to_s + Mtime.(span deadline now) |> Mtime.Span.(add ms) |> Clock.span_to_s in try ignore (Delay.wait delay sleep) with e -> @@ -107,6 +127,7 @@ let loop () = normal delay. New events may be missed." detailed_msg ; Thread.delay sleep + ) done with _ -> error From 935c84f865bcdafac73fe203d6c5c1f058a4f22d Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Thu, 28 Nov 2024 15:19:12 +0000 Subject: [PATCH 78/86] Add test for removing periodic event in periodic scheduler Potentially a periodic event can be cancelled while this is processed. Simulate this behavior removing the event inside the handler. This was fixed by previous commit. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/scheduler_test.ml | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml index 272b0572943..8b0baeb74b1 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml @@ -24,6 +24,12 @@ let send event data = Event.(send event data |> sync) let receive event = Event.(receive event |> sync) +let elapsed_ms cnt = + let elapsed_ns = Mtime_clock.count cnt |> Mtime.Span.to_uint64_ns in + Int64.(div elapsed_ns 1000000L |> to_int) + +let is_less = Alcotest.(testable (pp int)) Stdlib.( > ) + let test_single () = let finished = Event.new_channel () in Scheduler.add_to_queue "one" Scheduler.OneShot 0.001 (fun () -> @@ -32,6 +38,28 @@ let test_single () = start_schedule () ; Alcotest.(check bool) "result" true (receive finished) -let tests = [("test_single", `Quick, test_single)] +let test_remove_self () = + let which = Event.new_channel () in + Scheduler.add_to_queue "self" (Scheduler.Periodic 0.001) 0.001 (fun () -> + (* this should remove the periodic scheduling *) + Scheduler.remove_from_queue "self" ; + (* add an operation to stop the test *) + Scheduler.add_to_queue "stop" Scheduler.OneShot 0.1 (fun () -> + send which "stop" + ) ; + send which "self" + ) ; + start_schedule () ; + let cnt = Mtime_clock.counter () in + Alcotest.(check string) "same event name" "self" (receive which) ; + Alcotest.(check string) "same event name" "stop" (receive which) ; + let elapsed_ms = elapsed_ms cnt in + Alcotest.check is_less "small time" 300 elapsed_ms + +let tests = + [ + ("test_single", `Quick, test_single) + ; ("test_remove_self", `Quick, test_remove_self) + ] let () = Alcotest.run "Scheduler" [("generic", tests)] From 60e12576ac08f6db431b1ddb251cba0b54c8d30e Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Thu, 28 Nov 2024 16:32:38 +0000 Subject: [PATCH 79/86] Add test for handling event if queue was empty in periodic scheduler Previously if the queue was empty and the loop thread was active the scheduler took quite some time to pick up the new event. Check that this is done in a timely fashion to avoid regressions in code. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/scheduler_test.ml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml index 8b0baeb74b1..2828b3a10a3 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml @@ -56,10 +56,28 @@ let test_remove_self () = let elapsed_ms = elapsed_ms cnt in Alcotest.check is_less "small time" 300 elapsed_ms +let test_empty () = + let finished = Event.new_channel () in + Scheduler.add_to_queue "one" Scheduler.OneShot 0.001 (fun () -> + send finished true + ) ; + start_schedule () ; + Alcotest.(check bool) "finished" true (receive finished) ; + (* wait loop to go to wait with no work to do *) + Thread.delay 0.1 ; + Scheduler.add_to_queue "two" Scheduler.OneShot 0.001 (fun () -> + send finished true + ) ; + let cnt = Mtime_clock.counter () in + Alcotest.(check bool) "finished" true (receive finished) ; + let elapsed_ms = elapsed_ms cnt in + Alcotest.check is_less "small time" 100 elapsed_ms + let tests = [ ("test_single", `Quick, test_single) ; ("test_remove_self", `Quick, test_remove_self) + ; ("test_empty", `Quick, test_empty) ] let () = Alcotest.run "Scheduler" [("generic", tests)] From 21b56b4159eacb3468334757dab9d70e5a1d2043 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 5 Dec 2024 15:05:17 +0000 Subject: [PATCH 80/86] xapi_sr: remove commented code from 2009 Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_sr.ml | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index 8af5bd6e62f..0c4c88aa5d0 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -360,23 +360,6 @@ let create ~__context ~host ~device_config ~(physical_size : int64) ~name_label Helpers.assert_rolling_upgrade_not_in_progress ~__context ; debug "SR.create name_label=%s sm_config=[ %s ]" name_label (String.concat "; " (List.map (fun (k, v) -> k ^ " = " ^ v) sm_config)) ; - (* This breaks the udev SR which doesn't support sr_probe *) - (* - let probe_result = probe ~__context ~host ~device_config ~_type ~sm_config in - begin - match Xml.parse_string probe_result with - | Xml.Element("SRlist", _, children) -> () - | _ -> - (* Figure out what was missing, then throw the appropriate error *) - match String.lowercase_ascii _type with - | "lvmoiscsi" -> - if not (List.exists (fun (s,_) -> "targetiqn" = String.lowercase_ascii s) device_config) - then raise (Api_errors.Server_error ("SR_BACKEND_FAILURE_96",["";"";probe_result])) - else if not (List.exists (fun (s,_) -> "scsiid" = String.lowercase_ascii s) device_config) - then raise (Api_errors.Server_error ("SR_BACKEND_FAILURE_107",["";"";probe_result])) - | _ -> () - end; -*) let sr_uuid = Uuidx.make () in let sr_uuid_str = Uuidx.to_string sr_uuid in (* Create the SR in the database before creating on disk, so the backends can read the sm_config field. If an error happens here From 88dd4d9f5f2969532c43fce16177b21d9d7ac6e8 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Mon, 9 Dec 2024 14:50:27 +0000 Subject: [PATCH 81/86] Add a test to check the loop is woken up adding a new event Similar to test_empty test however the state of the loop should be different. Signed-off-by: Frediano Ziglio --- .../lib/xapi-stdext-threads/scheduler_test.ml | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml index 2828b3a10a3..0a4a847403f 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml @@ -73,11 +73,31 @@ let test_empty () = let elapsed_ms = elapsed_ms cnt in Alcotest.check is_less "small time" 100 elapsed_ms +let test_wakeup () = + let which = Event.new_channel () in + (* schedule a long event *) + Scheduler.add_to_queue "long" Scheduler.OneShot 2.0 (fun () -> + send which "long" + ) ; + start_schedule () ; + (* wait loop to go to wait with no work to do *) + Thread.delay 0.1 ; + let cnt = Mtime_clock.counter () in + (* schedule a quick event, should wake up the loop *) + Scheduler.add_to_queue "quick" Scheduler.OneShot 0.1 (fun () -> + send which "quick" + ) ; + Alcotest.(check string) "same event name" "quick" (receive which) ; + Scheduler.remove_from_queue "long" ; + let elapsed_ms = elapsed_ms cnt in + Alcotest.check is_less "small time" 150 elapsed_ms + let tests = [ ("test_single", `Quick, test_single) ; ("test_remove_self", `Quick, test_remove_self) ; ("test_empty", `Quick, test_empty) + ; ("test_wakeup", `Quick, test_wakeup) ] let () = Alcotest.run "Scheduler" [("generic", tests)] From 098546a8d36ec4956ae3877d68847d079ab26729 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 5 Dec 2024 15:06:36 +0000 Subject: [PATCH 82/86] CA-390025: do not override SR's client-set metadata on update Some plugins may not store the client-set metadata, and return a static value when replying to the update. This would override the values that a client used when the SR was created, or set afterwards, which is unexpected. Now name_label and name_description fields returned by the plugins are ignored on update. Current set_name_label and set_name_description rely on the update mechanism to work. Instead add database call at the end of the methods to ensure both xapi and the SR backend are synchronized, even when the latter fails to update the values. Signed-off-by: Pau Ruiz Safont --- ocaml/xapi/xapi_sr.ml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index 0c4c88aa5d0..12ab2bef924 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -575,9 +575,6 @@ let update ~__context ~sr = Db.SR.get_uuid ~__context ~self:sr |> Storage_interface.Sr.of_string in let sr_info = C.SR.stat (Ref.string_of task) sr' in - Db.SR.set_name_label ~__context ~self:sr ~value:sr_info.name_label ; - Db.SR.set_name_description ~__context ~self:sr - ~value:sr_info.name_description ; Db.SR.set_physical_size ~__context ~self:sr ~value:sr_info.total_space ; Db.SR.set_physical_utilisation ~__context ~self:sr ~value:(Int64.sub sr_info.total_space sr_info.free_space) ; @@ -846,7 +843,7 @@ let set_name_label ~__context ~sr ~value = (Storage_interface.Sr.of_string sr') value ) ; - update ~__context ~sr + Db.SR.set_name_label ~__context ~self:sr ~value let set_name_description ~__context ~sr ~value = let open Storage_access in @@ -860,7 +857,7 @@ let set_name_description ~__context ~sr ~value = (Storage_interface.Sr.of_string sr') value ) ; - update ~__context ~sr + Db.SR.set_name_description ~__context ~self:sr ~value let set_virtual_allocation ~__context ~self ~value = Db.SR.set_virtual_allocation ~__context ~self ~value From ea46f81b3e8da3cb9bff4da42db46b934c57148f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 10 Dec 2024 09:21:09 +0000 Subject: [PATCH 83/86] xe-cli completion: Hide COMPREPLY manipulation behind functions Though the majority of completions were already using set_completions and the like to add completion suggestions, there were two leftovers still needlessly changing COMPREPLY themselves. This caused bugs, as in the case of xe vm-import filename= autocompleting all of the filenames into the prompt instead of presenting the choice. Let only these functions operate on COMPREPLY directly. Signed-off-by: Andrii Sultanov --- ocaml/xe-cli/bash-completion | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index 84fd3656a65..aae832f4d67 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -143,9 +143,9 @@ _xe() IFS=$'\n,' # Here we actually WANT file name completion, so using compgen is OK. local comp_files=$(compgen -f "$value") - COMPREPLY=( "$comp_files" ) __xe_debug "triggering filename completion for the value:" __xe_debug $(__tab_delimited_newline_array "$comp_files") + set_completions "$comp_files" "$value" return 0 ;; @@ -156,7 +156,6 @@ _xe() if [ "${OLDSTYLE_WORDS[1]}" == "pif-reconfigure-ip" ]; then IFS=$'\n,' suggested_modes="dhcp,static,none" - COMPREPLY=( $(compgen -W "dhcp ,static ,none" -- "$value") ) elif [ "${COMP_WORDS[1]}" == "pif-reconfigure-ipv6" ]; then IFS=$'\n,' suggested_modes="dhcp,static,none,autoconf" From 3e70a6d5b31024f3894577e674b5c79d73c1069c Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Tue, 10 Dec 2024 14:19:00 +0000 Subject: [PATCH 84/86] Improve the scan comparison logic For the scan retry, previously we were comparing the entire vdi data structure from the database using the (<>) operator. This is a bit wasteful and not very stable. Instead let us just compare the vdi refs, since the race here comes from `VDI.db_{introduce,forget}`, which would only add/remove vdis from the db, but not change its actual data structure. Also add a bit more logging when retrying, since this should not happen very often. Signed-off-by: Vincent Liu --- ocaml/xapi/xapi_sr.ml | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index 12ab2bef924..a40a644ba04 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -778,15 +778,34 @@ let scan ~__context ~sr = Db.VDI.get_records_where ~__context ~expr:(Eq (Field "SR", Literal sr')) in + (* It is sufficient to just compare the refs in two db_vdis, as this + is what update_vdis uses to determine what to delete *) + let vdis_ref_equal db_vdi1 db_vdi2 = + Listext.List.set_difference (List.map fst db_vdi1) + (List.map fst db_vdi2) + = [] + in let db_vdis_before = find_vdis () in let vs, sr_info = C.SR.scan2 (Ref.string_of task) (Storage_interface.Sr.of_string sr_uuid) in let db_vdis_after = find_vdis () in - if limit > 0 && db_vdis_after <> db_vdis_before then + if limit > 0 && not (vdis_ref_equal db_vdis_before db_vdis_after) + then ( + debug + "%s detected db change while scanning, before scan vdis %s, \ + after scan vdis %s, retry limit left %d" + __FUNCTION__ + (List.map (fun (_, v) -> v.vDI_uuid) db_vdis_before + |> String.concat "," + ) + (List.map (fun (_, v) -> v.vDI_uuid) db_vdis_after + |> String.concat "," + ) + limit ; (scan_rec [@tailcall]) (limit - 1) - else if limit = 0 then + ) else if limit = 0 then raise (Api_errors.Server_error (Api_errors.internal_error, ["SR.scan retry limit exceeded"]) From 9ad46260077d799bb43d7debfb8d123cb99b7aa4 Mon Sep 17 00:00:00 2001 From: Changlei Li Date: Tue, 10 Dec 2024 14:18:03 +0800 Subject: [PATCH 85/86] CA-402901: Update leaked dp to Sr When add leaked datapath: 1. add leaked datapath to Sr.vdis 2. write to db file 3. log enhance If there are storage exceptions raised when destroying datapath, the procedure fails and the state of VDI becomes incorrect, which leads to various abnormalresults in subsequent operations. To handle this, the leaked datapath is designed to redestroy the datapath and refresh the state before next storage operation via function remove_datapaths_andthen_nolock. But this mechanism doesn't take effect in current code. This commit is to fix this bug. Leaked datapath should be added to Sr.vdis to make the leaked datapath really work. And write to db file to avoid losing the leaked datapath if xapi restarts. Signed-off-by: Changlei Li --- ocaml/xapi/storage_smapiv1_wrapper.ml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index ae1f21f72f3..55067efd9de 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -453,6 +453,9 @@ functor List.fold_left perform_one vdi_t ops let perform_nolock context ~dbg ~dp ~sr ~vdi ~vm this_op = + debug "%s dp=%s, sr=%s, vdi=%s, vm=%s, op=%s" __FUNCTION__ dp + (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) + (Vdi_automaton.string_of_op this_op) ; match Host.find sr !Host.host with | None -> raise (Storage_error (Sr_not_attached (s_of_sr sr))) @@ -473,6 +476,15 @@ functor superstate to superstate'. These may fail: if so we revert the datapath+VDI state to the most appropriate value. *) let ops = Vdi_automaton.( - ) superstate superstate' in + debug "%s %s -> %s: %s" __FUNCTION__ + (Vdi_automaton.string_of_state superstate) + (Vdi_automaton.string_of_state superstate') + (String.concat ", " + (List.map + (fun (op, _) -> Vdi_automaton.string_of_op op) + ops + ) + ) ; side_effects context dbg dp sr sr_t vdi vdi_t vm ops with e -> let e = @@ -529,7 +541,8 @@ functor ) with e -> if not allow_leak then ( - ignore (Vdi.add_leaked dp vdi_t) ; + Sr.add_or_replace vdi (Vdi.add_leaked dp vdi_t) sr_t ; + Everything.to_file !host_state_path (Everything.make ()) ; raise e ) else ( (* allow_leak means we can forget this dp *) From a540ac83579381583438df85d6c54ee38b866de8 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 12 Dec 2024 10:29:21 +0000 Subject: [PATCH 86/86] CA-403633: Keep vPCI devices in the same order QEMU orders devices by the time of plugging. Parallelizing them introduces randomness, which breaks the assumption that devices are ordered in a deterministic way. Serialize all PCI and VUSB plugs to restore behaviour. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/lib/xenops_server.ml | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index e5d8016bedb..f4c784faa11 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -1630,12 +1630,11 @@ let rec atomics_of_operation = function ] ; [VM_create_device_model (id, false)] (* PCI and USB devices are hot-plugged into HVM guests via QEMU, so the - following operations occur after creating the device models *) - ; parallel_concat "Devices.plug (qemu)" ~id - [ - List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other - ; List.map (fun vusb -> VUSB_plug vusb.Vusb.id) vusbs - ] + following operations occur after creating the device models. + The order of PCI devices depends on the order they are plugged, they + must be kept serialized. *) + ; List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other + ; List.map (fun vusb -> VUSB_plug vusb.Vusb.id) vusbs (* At this point the domain is considered survivable. *) ; [VM_set_domain_action_request (id, None)] ] @@ -1698,10 +1697,10 @@ let rec atomics_of_operation = function ) ; [VM_create_device_model (id, true)] (* PCI and USB devices are hot-plugged into HVM guests via QEMU, so - the following operations occur after creating the device models *) - ; parallel_map "PCIs.plug" ~id pcis_other (fun pci -> - [PCI_plug (pci.Pci.id, true)] - ) + the following operations occur after creating the device models. + The order of PCI devices depends on the order they are plugged, they + must be kept serialized. *) + ; List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other ] |> List.concat | VM_poweroff (id, timeout) ->