From e781c8005bd839b12ea0a85c920080bb4d1d8ba7 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Fri, 23 Aug 2024 13:49:11 +0700 Subject: [PATCH 01/20] introduce performance regression tests --- src/SIL.Harmony.Sample/CrdtSampleKernel.cs | 15 +- .../DataModelPerformanceTests.cs | 186 ++++++++++++++++++ src/SIL.Harmony.Tests/DataModelTestBase.cs | 27 ++- src/SIL.Harmony.Tests/RepositoryTests.cs | 4 +- .../SIL.Harmony.Tests.csproj | 2 + src/SIL.Harmony/DataModel.cs | 18 +- src/SIL.Harmony/Db/CrdtRepository.cs | 33 +++- 7 files changed, 259 insertions(+), 26 deletions(-) create mode 100644 src/SIL.Harmony.Tests/DataModelPerformanceTests.cs diff --git a/src/SIL.Harmony.Sample/CrdtSampleKernel.cs b/src/SIL.Harmony.Sample/CrdtSampleKernel.cs index 4a8969c..188b4b7 100644 --- a/src/SIL.Harmony.Sample/CrdtSampleKernel.cs +++ b/src/SIL.Harmony.Sample/CrdtSampleKernel.cs @@ -1,4 +1,5 @@ -using System.Diagnostics; +using System.Data.Common; +using System.Diagnostics; using SIL.Harmony.Changes; using SIL.Harmony.Linq2db; using SIL.Harmony.Sample.Changes; @@ -11,11 +12,21 @@ namespace SIL.Harmony.Sample; public static class CrdtSampleKernel { public static IServiceCollection AddCrdtDataSample(this IServiceCollection services, string dbPath) + { + return services.AddCrdtDataSample(builder => builder.UseSqlite($"Data Source={dbPath}")); + } + public static IServiceCollection AddCrdtDataSample(this IServiceCollection services, DbConnection connection) + { + return services.AddCrdtDataSample(builder => builder.UseSqlite(connection, true)); + } + + public static IServiceCollection AddCrdtDataSample(this IServiceCollection services, + Action optionsBuilder) { services.AddDbContext((provider, builder) => { builder.UseLinqToDbCrdt(provider); - builder.UseSqlite($"Data Source={dbPath}"); + optionsBuilder(builder); builder.EnableDetailedErrors(); builder.EnableSensitiveDataLogging(); #if DEBUG diff --git a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs new file mode 100644 index 0000000..cb02bfb --- /dev/null +++ b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs @@ -0,0 +1,186 @@ +using System.Diagnostics; +using System.Text; +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Columns; +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Engines; +using BenchmarkDotNet.Loggers; +using BenchmarkDotNet.Running; +using Microsoft.Data.Sqlite; +using SIL.Harmony.Changes; +using SIL.Harmony.Core; +using SIL.Harmony.Db; +using SIL.Harmony.Sample.Models; +using Xunit.Abstractions; + +namespace SIL.Harmony.Tests; + +public class DataModelPerformanceTests(ITestOutputHelper output) : DataModelTestBase +{ + [Fact] + public void AddingChangePerformance() + { + using var fileStream = File.Open("DataModelPerformanceTests.txt", FileMode.Create); + var summary = + BenchmarkRunner.Run( + ManualConfig.CreateEmpty() + .AddColumnProvider(DefaultColumnProviders.Instance) + .AddLogger(new XUnitBenchmarkLogger(output)) + .AddLogger(new TextLogger(new StreamWriter(fileStream))) + ); + foreach (var benchmarkCase in summary.BenchmarksCases.Where(b => !summary.IsBaseline(b))) + { + var ratio = double.Parse(BaselineRatioColumn.RatioMean.GetValue(summary, benchmarkCase)); + ratio.Should().BeInRange(0, 2, "performance should not get worse, benchmark " + benchmarkCase.DisplayInfo); + } + } + + [Fact] + public async Task SimpleAddChangePerformanceTest() + { + var dataModelTest = new DataModelTestBase(new SqliteConnection("Data Source=test.db")); + // warmup the code, this causes jit to run and keeps our actual test below consistent + var word1Id = Guid.NewGuid(); + await dataModelTest.WriteNextChange(dataModelTest.SetWord(word1Id, "entity 0")); + var start = Stopwatch.GetTimestamp(); + var parentHash = (await dataModelTest.WriteNextChange(dataModelTest.SetWord(word1Id, "entity 1"))).Hash; + var runtimeAddChange1Snapshot = Stopwatch.GetElapsedTime(start); + for (var i = 0; i < 10_000; i++) + { + var change = dataModelTest.SetWord(Guid.NewGuid(), $"entity {i}"); + var commitId = Guid.NewGuid(); + var commit = new Commit(commitId) + { + ClientId = Guid.NewGuid(), + HybridDateTime = new HybridDateTime(dataModelTest.NextDate(), 0), + ChangeEntities = + [ + new ChangeEntity() + { + Change = change, + Index = 0, + CommitId = commitId, + EntityId = change.EntityId + } + ] + }; + commit.SetParentHash(parentHash); + parentHash = commit.Hash; + dataModelTest.DbContext.Commits.Add(commit); + dataModelTest.DbContext.Snapshots.Add(new ObjectSnapshot(await change.NewEntity(commit, null!), commit, true)); + } + + await dataModelTest.DbContext.SaveChangesAsync(); + + start = Stopwatch.GetTimestamp(); + await dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity1")); + // var snapshots = await dataModelTest.CrdtRepository.CurrenSimpleSnapshots().ToArrayAsync(); + var runtimeAddChange10000Snapshots = Stopwatch.GetElapsedTime(start); + runtimeAddChange10000Snapshots.Should() + .BeCloseTo(runtimeAddChange1Snapshot, runtimeAddChange1Snapshot / 10); + // snapshots.Should().HaveCount(1002); + await dataModelTest.DisposeAsync(); + } + + private class XUnitBenchmarkLogger(ITestOutputHelper output) : ILogger + { + public string Id => nameof(XUnitBenchmarkLogger); + public int Priority => 0; + private StringBuilder? sb; + + public void Write(LogKind logKind, string text) + { + if (sb == null) + { + sb = new StringBuilder(); + } + + sb.Append(text); + } + + public void WriteLine() + { + if (sb is not null) + { + output.WriteLine(sb.ToString()); + sb.Clear(); + } + else + output.WriteLine(string.Empty); + } + + public void WriteLine(LogKind logKind, string text) + { + if (sb is not null) + { + output.WriteLine(sb.Append(text).ToString()); + sb.Clear(); + } + else + output.WriteLine(text); + } + + public void Flush() + { + if (sb is not null) + { + output.WriteLine(sb.ToString()); + sb.Clear(); + } + } + } +} + +[SimpleJob(RunStrategy.Monitoring, warmupCount: 2)] +public class DataModelPerformanceBenchmarks +{ + private DataModelTestBase _templateModel = null!; + private DataModelTestBase _dataModelTestBase = null!; + private DataModelTestBase _emptyDataModel = null!; + + + [GlobalSetup] + public void GlobalSetup() + { + _templateModel = new DataModelTestBase(); + for (var i = 0; i < StartingSnapshots; i++) + { + _ = _templateModel.WriteNextChange(_templateModel.SetWord(Guid.NewGuid(), $"entity {i}")).Result; + } + } + + [Params(0, 1000)] + public int StartingSnapshots { get; set; } + + [IterationSetup] + public void IterationSetup() + { + _emptyDataModel = new(); + _dataModelTestBase = _templateModel.ForkDatabase(); + } + + [Benchmark(Baseline = true)] + public Commit AddSingleChangePerformance() + { + return _emptyDataModel.WriteNextChange(_emptyDataModel.SetWord(Guid.NewGuid(), "entity1")).Result; + } + + [Benchmark] + public Commit AddSingleChangeWithManySnapshots() + { + return _dataModelTestBase.WriteNextChange(_dataModelTestBase.SetWord(Guid.NewGuid(), "entity1")).Result; + } + + [IterationCleanup] + public void IterationCleanup() + { + _emptyDataModel.DisposeAsync().GetAwaiter().GetResult(); + _dataModelTestBase.DisposeAsync().GetAwaiter().GetResult(); + } + + [GlobalCleanup] + public void GlobalCleanup() + { + _templateModel.DisposeAsync().GetAwaiter().GetResult(); + } +} \ No newline at end of file diff --git a/src/SIL.Harmony.Tests/DataModelTestBase.cs b/src/SIL.Harmony.Tests/DataModelTestBase.cs index 8ad5a89..ce3b50b 100644 --- a/src/SIL.Harmony.Tests/DataModelTestBase.cs +++ b/src/SIL.Harmony.Tests/DataModelTestBase.cs @@ -1,3 +1,4 @@ +using Microsoft.Data.Sqlite; using SIL.Harmony.Changes; using SIL.Harmony.Core; using SIL.Harmony.Sample; @@ -7,6 +8,7 @@ using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection.Extensions; +using SIL.Harmony.Db; namespace SIL.Harmony.Tests; @@ -16,27 +18,44 @@ public class DataModelTestBase : IAsyncLifetime protected readonly Guid _localClientId = Guid.NewGuid(); public readonly DataModel DataModel; public readonly SampleDbContext DbContext; + internal readonly CrdtRepository CrdtRepository; protected readonly MockTimeProvider MockTimeProvider = new(); - public DataModelTestBase() + public DataModelTestBase() : this(new SqliteConnection("Data Source=:memory:")) + { + } + + public DataModelTestBase(SqliteConnection connection) { _services = new ServiceCollection() - .AddCrdtDataSample(":memory:") + .AddCrdtDataSample(connection) .Replace(ServiceDescriptor.Singleton(MockTimeProvider)) .BuildServiceProvider(); DbContext = _services.GetRequiredService(); DbContext.Database.OpenConnection(); DbContext.Database.EnsureCreated(); DataModel = _services.GetRequiredService(); + CrdtRepository = _services.GetRequiredService(); + } + + public DataModelTestBase ForkDatabase() + { + var connection = new SqliteConnection("Data Source=:memory:"); + connection.Open(); + var existingConnection = DbContext.Database.GetDbConnection() as SqliteConnection; + if (existingConnection is null) throw new InvalidOperationException("Database is not SQLite"); + existingConnection.BackupDatabase(connection); + return new DataModelTestBase(connection); } public void SetCurrentDate(DateTime dateTime) { currentDate = dateTime; } + private static int _instanceCount = 0; private DateTimeOffset currentDate = new(new DateTime(2000, 1, 1, 0, 0, 0).AddHours(_instanceCount++)); - private DateTimeOffset NextDate() => currentDate = currentDate.AddDays(1); + public DateTimeOffset NextDate() => currentDate = currentDate.AddDays(1); public async ValueTask WriteNextChange(IChange change, bool add = true) { @@ -130,4 +149,4 @@ protected IEnumerable AllData() .OfType() .Concat(DbContext.Set().OrderBy(w => w.Text)); } -} +} \ No newline at end of file diff --git a/src/SIL.Harmony.Tests/RepositoryTests.cs b/src/SIL.Harmony.Tests/RepositoryTests.cs index b5a7609..310e745 100644 --- a/src/SIL.Harmony.Tests/RepositoryTests.cs +++ b/src/SIL.Harmony.Tests/RepositoryTests.cs @@ -11,8 +11,8 @@ namespace SIL.Harmony.Tests; public class RepositoryTests : IAsyncLifetime { private readonly ServiceProvider _services; - private CrdtRepository _repository; - private SampleDbContext _crdtDbContext; + private readonly CrdtRepository _repository; + private readonly SampleDbContext _crdtDbContext; public RepositoryTests() { diff --git a/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj b/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj index 615050f..edd65bd 100644 --- a/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj +++ b/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj @@ -7,6 +7,8 @@ + + all diff --git a/src/SIL.Harmony/DataModel.cs b/src/SIL.Harmony/DataModel.cs index 1adc21c..48140da 100644 --- a/src/SIL.Harmony/DataModel.cs +++ b/src/SIL.Harmony/DataModel.cs @@ -190,7 +190,7 @@ public async Task GetLatestSnapshotByObjectId(Guid entityId) public async Task GetProjectSnapshot(bool includeDeleted = false) { - return new ModelSnapshot(await GetEntitySnapshots(includeDeleted)); + return new ModelSnapshot(await _crdtRepository.CurrenSimpleSnapshots(includeDeleted).ToArrayAsync()); } public IQueryable GetLatestObjects() where T : class, IObjectBase @@ -208,22 +208,6 @@ public async Task GetBySnapshotId(Guid snapshotId) return await _crdtRepository.GetObjectBySnapshotId(snapshotId); } - private async Task GetEntitySnapshots(bool includeDeleted = false) - { - var queryable = _crdtRepository.CurrentSnapshots(); - if (!includeDeleted) queryable = queryable.Where(s => !s.EntityIsDeleted); - var snapshots = await queryable.Select(s => - new SimpleSnapshot(s.Id, - s.TypeName, - s.EntityId, - s.CommitId, - s.IsRoot, - s.Commit.HybridDateTime, - s.Commit.Hash, - s.EntityIsDeleted)).AsNoTracking().ToArrayAsync(); - return snapshots; - } - public async Task> GetSnapshotsAt(DateTimeOffset dateTime) { var repository = _crdtRepository.GetScopedRepository(dateTime); diff --git a/src/SIL.Harmony/Db/CrdtRepository.cs b/src/SIL.Harmony/Db/CrdtRepository.cs index ef63447..5f01eaa 100644 --- a/src/SIL.Harmony/Db/CrdtRepository.cs +++ b/src/SIL.Harmony/Db/CrdtRepository.cs @@ -54,7 +54,38 @@ public IQueryable CurrentCommits() public IQueryable CurrentSnapshots() { - return _dbContext.Snapshots.Where(snapshot => CurrentSnapshotIds().Contains(snapshot.Id)); + //todo this does not respect ignoreChangesAfter + return _dbContext.Snapshots.FromSql( +$""" +WITH LatestSnapshots AS (SELECT first_value(s1.Id) OVER ( + PARTITION BY "s1"."EntityId" + ORDER BY "c"."DateTime" DESC, "c"."Counter" DESC, "c"."Id" DESC + ) AS "LatestSnapshotId" + FROM "Snapshots" AS "s1" + INNER JOIN "Commits" AS "c" ON "s1"."CommitId" = "c"."Id" + GROUP BY "s1"."EntityId") +SELECT * +FROM "Snapshots" AS "s" + INNER JOIN LatestSnapshots AS "ls" ON "s"."Id" = "ls"."LatestSnapshotId" +"""); + } + + public IAsyncEnumerable CurrenSimpleSnapshots(bool includeDeleted = false) + { + var queryable = CurrentSnapshots(); + if (!includeDeleted) queryable = queryable.Where(s => !s.EntityIsDeleted); + var snapshots = queryable.Select(s => + new SimpleSnapshot(s.Id, + s.TypeName, + s.EntityId, + s.CommitId, + s.IsRoot, + s.Commit.HybridDateTime, + s.Commit.Hash, + s.EntityIsDeleted)) + .AsNoTracking() + .AsAsyncEnumerable(); + return snapshots; } private IQueryable CurrentSnapshotIds() From b73e9a5ca4875c64e07efb25fbaed05f32ed0aea Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 26 Aug 2024 14:54:44 +0700 Subject: [PATCH 02/20] fix filtering issue with current snapshots query --- src/SIL.Harmony/Db/CrdtRepository.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/SIL.Harmony/Db/CrdtRepository.cs b/src/SIL.Harmony/Db/CrdtRepository.cs index 5f01eaa..7e5d895 100644 --- a/src/SIL.Harmony/Db/CrdtRepository.cs +++ b/src/SIL.Harmony/Db/CrdtRepository.cs @@ -57,16 +57,18 @@ public IQueryable CurrentSnapshots() //todo this does not respect ignoreChangesAfter return _dbContext.Snapshots.FromSql( $""" -WITH LatestSnapshots AS (SELECT first_value(s1.Id) OVER ( +WITH LatestSnapshots AS (SELECT first_value(s1.Id) + OVER ( PARTITION BY "s1"."EntityId" ORDER BY "c"."DateTime" DESC, "c"."Counter" DESC, "c"."Id" DESC ) AS "LatestSnapshotId" FROM "Snapshots" AS "s1" INNER JOIN "Commits" AS "c" ON "s1"."CommitId" = "c"."Id" - GROUP BY "s1"."EntityId") + WHERE "c"."DateTime" < {ignoreChangesAfter?.UtcDateTime} OR {ignoreChangesAfter} IS NULL) SELECT * FROM "Snapshots" AS "s" INNER JOIN LatestSnapshots AS "ls" ON "s"."Id" = "ls"."LatestSnapshotId" +GROUP BY s.EntityId """); } From 64c1bafe0eb53e995c8bc71261baa1ac0bfd684c Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 26 Aug 2024 14:56:23 +0700 Subject: [PATCH 03/20] use in memory db --- src/SIL.Harmony.Tests/DataModelPerformanceTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs index cb02bfb..1cc0ea0 100644 --- a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs +++ b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs @@ -38,7 +38,7 @@ public void AddingChangePerformance() [Fact] public async Task SimpleAddChangePerformanceTest() { - var dataModelTest = new DataModelTestBase(new SqliteConnection("Data Source=test.db")); + var dataModelTest = new DataModelTestBase(); // warmup the code, this causes jit to run and keeps our actual test below consistent var word1Id = Guid.NewGuid(); await dataModelTest.WriteNextChange(dataModelTest.SetWord(word1Id, "entity 0")); From 83170684285c392c335a9f37ffeb70cfac5dec28 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 26 Aug 2024 15:36:53 +0700 Subject: [PATCH 04/20] ensure the harmony table names are consistent now that we have a hardcoded sql query --- src/SIL.Harmony/Db/EntityConfig/ChangeEntityConfig.cs | 3 ++- src/SIL.Harmony/Db/EntityConfig/CommitEntityConfig.cs | 3 ++- src/SIL.Harmony/Db/EntityConfig/SnapshotEntityConfig.cs | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/SIL.Harmony/Db/EntityConfig/ChangeEntityConfig.cs b/src/SIL.Harmony/Db/EntityConfig/ChangeEntityConfig.cs index fadd671..1159d95 100644 --- a/src/SIL.Harmony/Db/EntityConfig/ChangeEntityConfig.cs +++ b/src/SIL.Harmony/Db/EntityConfig/ChangeEntityConfig.cs @@ -11,6 +11,7 @@ public class ChangeEntityConfig(JsonSerializerOptions jsonSerializerOptions) : I { public void Configure(EntityTypeBuilder> builder) { + builder.ToTable("ChangeEntities"); builder.HasKey(c => new { c.CommitId, c.Index }); builder.Property(c => c.Change) .HasColumnType("jsonb") @@ -25,4 +26,4 @@ private IChange DeserializeChange(string json) return JsonSerializer.Deserialize(json, jsonSerializerOptions) ?? throw new SerializationException("Could not deserialize Change: " + json); } -} \ No newline at end of file +} diff --git a/src/SIL.Harmony/Db/EntityConfig/CommitEntityConfig.cs b/src/SIL.Harmony/Db/EntityConfig/CommitEntityConfig.cs index 5fbcf41..77352c7 100644 --- a/src/SIL.Harmony/Db/EntityConfig/CommitEntityConfig.cs +++ b/src/SIL.Harmony/Db/EntityConfig/CommitEntityConfig.cs @@ -9,6 +9,7 @@ public class CommitEntityConfig : IEntityTypeConfiguration { public void Configure(EntityTypeBuilder builder) { + builder.ToTable("Commits"); builder.HasKey(c => c.Id); builder.ComplexProperty(c => c.HybridDateTime, hybridEntity => @@ -31,4 +32,4 @@ public void Configure(EntityTypeBuilder builder) .WithOne() .HasForeignKey(c => c.CommitId); } -} \ No newline at end of file +} diff --git a/src/SIL.Harmony/Db/EntityConfig/SnapshotEntityConfig.cs b/src/SIL.Harmony/Db/EntityConfig/SnapshotEntityConfig.cs index 47c65ce..fb15099 100644 --- a/src/SIL.Harmony/Db/EntityConfig/SnapshotEntityConfig.cs +++ b/src/SIL.Harmony/Db/EntityConfig/SnapshotEntityConfig.cs @@ -10,6 +10,7 @@ public class SnapshotEntityConfig(JsonSerializerOptions jsonSerializerOptions) : { public void Configure(EntityTypeBuilder builder) { + builder.ToTable("Snapshots"); builder.HasKey(s => s.Id); builder.HasIndex(s => new { s.CommitId, s.EntityId }).IsUnique(); builder @@ -29,4 +30,4 @@ private IObjectBase DeserializeObject(string json) return JsonSerializer.Deserialize(json, jsonSerializerOptions) ?? throw new SerializationException($"Could not deserialize Entry: {json}"); } -} \ No newline at end of file +} From 89857f8061fd751d25e69a7334e914a51ed0c74c Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Thu, 29 Aug 2024 11:03:21 +0700 Subject: [PATCH 05/20] make it easy to trace performance test and persist the database to disk --- .../DataModelPerformanceTests.cs | 29 ++++++++++++++++++- src/SIL.Harmony.Tests/DataModelTestBase.cs | 6 ++++ .../SIL.Harmony.Tests.csproj | 1 + 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs index 1cc0ea0..89411eb 100644 --- a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs +++ b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs @@ -6,6 +6,7 @@ using BenchmarkDotNet.Engines; using BenchmarkDotNet.Loggers; using BenchmarkDotNet.Running; +using JetBrains.Profiler.SelfApi; using Microsoft.Data.Sqlite; using SIL.Harmony.Changes; using SIL.Harmony.Core; @@ -34,11 +35,33 @@ public void AddingChangePerformance() ratio.Should().BeInRange(0, 2, "performance should not get worse, benchmark " + benchmarkCase.DisplayInfo); } } + + //enable this to profile tests + private static readonly bool trace = Environment.GetEnvironmentVariable("DOTNET_TRACE") != "false"; + private async Task StartTrace() + { + if (!trace) return; + await DotTrace.InitAsync(); + // config that sets the save directory + var config = new DotTrace.Config(); + var dirPath = Path.Combine(Path.GetTempPath(), "harmony-perf"); + Directory.CreateDirectory(dirPath); + config.SaveToDir(dirPath); + DotTrace.Attach(config); + DotTrace.StartCollectingData(); + } + + private void StopTrace() + { + if (!trace) return; + DotTrace.SaveData(); + DotTrace.Detach(); + } [Fact] public async Task SimpleAddChangePerformanceTest() { - var dataModelTest = new DataModelTestBase(); + var dataModelTest = new DataModelTestBase(true); // warmup the code, this causes jit to run and keeps our actual test below consistent var word1Id = Guid.NewGuid(); await dataModelTest.WriteNextChange(dataModelTest.SetWord(word1Id, "entity 0")); @@ -71,11 +94,15 @@ public async Task SimpleAddChangePerformanceTest() } await dataModelTest.DbContext.SaveChangesAsync(); + await dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity3")); + + await StartTrace(); start = Stopwatch.GetTimestamp(); await dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity1")); // var snapshots = await dataModelTest.CrdtRepository.CurrenSimpleSnapshots().ToArrayAsync(); var runtimeAddChange10000Snapshots = Stopwatch.GetElapsedTime(start); + StopTrace(); runtimeAddChange10000Snapshots.Should() .BeCloseTo(runtimeAddChange1Snapshot, runtimeAddChange1Snapshot / 10); // snapshots.Should().HaveCount(1002); diff --git a/src/SIL.Harmony.Tests/DataModelTestBase.cs b/src/SIL.Harmony.Tests/DataModelTestBase.cs index ce3b50b..190b296 100644 --- a/src/SIL.Harmony.Tests/DataModelTestBase.cs +++ b/src/SIL.Harmony.Tests/DataModelTestBase.cs @@ -21,6 +21,12 @@ public class DataModelTestBase : IAsyncLifetime internal readonly CrdtRepository CrdtRepository; protected readonly MockTimeProvider MockTimeProvider = new(); + public DataModelTestBase(bool saveToDisk) : this(saveToDisk + ? new SqliteConnection("Data Source=test.db") + : new SqliteConnection("Data Source=:memory:")) + { + } + public DataModelTestBase() : this(new SqliteConnection("Data Source=:memory:")) { } diff --git a/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj b/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj index edd65bd..eb1fa7c 100644 --- a/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj +++ b/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj @@ -14,6 +14,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive + From 4baa679f39dd3299625283e9f5e1797b48ed8ab1 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Thu, 29 Aug 2024 15:35:26 +0700 Subject: [PATCH 06/20] implement some performance improvements, tweak tests to be more realistic --- .../DataModelPerformanceTests.cs | 93 +++++++++++-------- src/SIL.Harmony.Tests/DataModelTestBase.cs | 15 ++- src/SIL.Harmony/CrdtConfig.cs | 4 + src/SIL.Harmony/CrdtKernel.cs | 3 +- src/SIL.Harmony/DataModel.cs | 27 ++++-- src/SIL.Harmony/Db/CrdtRepository.cs | 15 ++- src/SIL.Harmony/SnapshotWorker.cs | 22 +++-- 7 files changed, 121 insertions(+), 58 deletions(-) diff --git a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs index 89411eb..eccfd90 100644 --- a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs +++ b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs @@ -21,23 +21,23 @@ public class DataModelPerformanceTests(ITestOutputHelper output) : DataModelTest [Fact] public void AddingChangePerformance() { - using var fileStream = File.Open("DataModelPerformanceTests.txt", FileMode.Create); var summary = BenchmarkRunner.Run( ManualConfig.CreateEmpty() .AddColumnProvider(DefaultColumnProviders.Instance) .AddLogger(new XUnitBenchmarkLogger(output)) - .AddLogger(new TextLogger(new StreamWriter(fileStream))) ); foreach (var benchmarkCase in summary.BenchmarksCases.Where(b => !summary.IsBaseline(b))) { var ratio = double.Parse(BaselineRatioColumn.RatioMean.GetValue(summary, benchmarkCase)); - ratio.Should().BeInRange(0, 2, "performance should not get worse, benchmark " + benchmarkCase.DisplayInfo); + //for now it just makes sure that no case is worse that 7x, this is based on the 10_000 test being 5 times worse. + //it would be better to have this scale off the number of changes + ratio.Should().BeInRange(0, 7, "performance should not get worse, benchmark " + benchmarkCase.DisplayInfo); } } //enable this to profile tests - private static readonly bool trace = Environment.GetEnvironmentVariable("DOTNET_TRACE") != "false"; + private static readonly bool trace = (Environment.GetEnvironmentVariable("DOTNET_TRACE") ?? "false") != "false"; private async Task StartTrace() { if (!trace) return; @@ -57,18 +57,47 @@ private void StopTrace() DotTrace.SaveData(); DotTrace.Detach(); } + + private static async Task MeasureTime(Func action, int iterations = 10) + { + TimeSpan total = TimeSpan.Zero; + for (var i = 0; i < iterations; i++) + { + var start = Stopwatch.GetTimestamp(); + await action(); + total += Stopwatch.GetElapsedTime(start); + } + return total / iterations; + } [Fact] public async Task SimpleAddChangePerformanceTest() { - var dataModelTest = new DataModelTestBase(true); + //disable validation because it's slow + var dataModelTest = new DataModelTestBase(false, alwaysValidate: false); // warmup the code, this causes jit to run and keeps our actual test below consistent - var word1Id = Guid.NewGuid(); - await dataModelTest.WriteNextChange(dataModelTest.SetWord(word1Id, "entity 0")); - var start = Stopwatch.GetTimestamp(); - var parentHash = (await dataModelTest.WriteNextChange(dataModelTest.SetWord(word1Id, "entity 1"))).Hash; - var runtimeAddChange1Snapshot = Stopwatch.GetElapsedTime(start); - for (var i = 0; i < 10_000; i++) + await dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity 0")); + var runtimeAddChange1Snapshot = await MeasureTime(() => dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity 1")).AsTask()); + + await BulkInsertChanges(dataModelTest); + //fork the database, this creates a new DbContext which does not have a cache of all the snapshots created above + //that cache causes DetectChanges (used by SaveChanges) to be slower than it should be + dataModelTest = dataModelTest.ForkDatabase(false); + + await StartTrace(); + var runtimeAddChange10000Snapshots = await MeasureTime(() => dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity1")).AsTask()); + StopTrace(); + output.WriteLine($"Runtime AddChange with 10,000 Snapshots: {runtimeAddChange10000Snapshots.TotalMilliseconds:N}ms"); + runtimeAddChange10000Snapshots.Should() + .BeCloseTo(runtimeAddChange1Snapshot, runtimeAddChange1Snapshot * 4); + // snapshots.Should().HaveCount(1002); + await dataModelTest.DisposeAsync(); + } + + internal static async Task BulkInsertChanges(DataModelTestBase dataModelTest, int count = 10_000) + { + var parentHash = (await dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity 1"))).Hash; + for (var i = 0; i < count; i++) { var change = dataModelTest.SetWord(Guid.NewGuid(), $"entity {i}"); var commitId = Guid.NewGuid(); @@ -94,19 +123,8 @@ public async Task SimpleAddChangePerformanceTest() } await dataModelTest.DbContext.SaveChangesAsync(); - await dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity3")); - - - await StartTrace(); - start = Stopwatch.GetTimestamp(); - await dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity1")); - // var snapshots = await dataModelTest.CrdtRepository.CurrenSimpleSnapshots().ToArrayAsync(); - var runtimeAddChange10000Snapshots = Stopwatch.GetElapsedTime(start); - StopTrace(); - runtimeAddChange10000Snapshots.Should() - .BeCloseTo(runtimeAddChange1Snapshot, runtimeAddChange1Snapshot / 10); - // snapshots.Should().HaveCount(1002); - await dataModelTest.DisposeAsync(); + //ensure changes were made correctly + await dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity after bulk insert")); } private class XUnitBenchmarkLogger(ITestOutputHelper output) : ILogger @@ -158,7 +176,7 @@ public void Flush() } } -[SimpleJob(RunStrategy.Monitoring, warmupCount: 2)] +[SimpleJob(RunStrategy.Throughput, warmupCount: 2)] public class DataModelPerformanceBenchmarks { private DataModelTestBase _templateModel = null!; @@ -169,32 +187,33 @@ public class DataModelPerformanceBenchmarks [GlobalSetup] public void GlobalSetup() { - _templateModel = new DataModelTestBase(); - for (var i = 0; i < StartingSnapshots; i++) - { - _ = _templateModel.WriteNextChange(_templateModel.SetWord(Guid.NewGuid(), $"entity {i}")).Result; - } + _templateModel = new DataModelTestBase(alwaysValidate: false); + DataModelPerformanceTests.BulkInsertChanges(_templateModel, StartingSnapshots).GetAwaiter().GetResult(); } - [Params(0, 1000)] + [Params(0, 1000, 10_000)] public int StartingSnapshots { get; set; } [IterationSetup] public void IterationSetup() { - _emptyDataModel = new(); - _dataModelTestBase = _templateModel.ForkDatabase(); + _emptyDataModel = new(alwaysValidate: false); + _ = _emptyDataModel.WriteNextChange(_emptyDataModel.SetWord(Guid.NewGuid(), "entity1")).Result; + _dataModelTestBase = _templateModel.ForkDatabase(false); } - - [Benchmark(Baseline = true)] + + [Benchmark(Baseline = true), BenchmarkCategory("WriteChange")] public Commit AddSingleChangePerformance() { return _emptyDataModel.WriteNextChange(_emptyDataModel.SetWord(Guid.NewGuid(), "entity1")).Result; } - - [Benchmark] + + [Benchmark, BenchmarkCategory("WriteChange")] public Commit AddSingleChangeWithManySnapshots() { + var count = _dataModelTestBase.DbContext.Snapshots.Count(); + // had a bug where there were no snapshots, this means the test was useless, this is slower, but it's better that then a useless test + if (count < (StartingSnapshots - 5)) throw new Exception($"Not enough snapshots, found {count}"); return _dataModelTestBase.WriteNextChange(_dataModelTestBase.SetWord(Guid.NewGuid(), "entity1")).Result; } diff --git a/src/SIL.Harmony.Tests/DataModelTestBase.cs b/src/SIL.Harmony.Tests/DataModelTestBase.cs index 190b296..de2329b 100644 --- a/src/SIL.Harmony.Tests/DataModelTestBase.cs +++ b/src/SIL.Harmony.Tests/DataModelTestBase.cs @@ -8,6 +8,7 @@ using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection.Extensions; +using Microsoft.Extensions.Options; using SIL.Harmony.Db; namespace SIL.Harmony.Tests; @@ -21,9 +22,9 @@ public class DataModelTestBase : IAsyncLifetime internal readonly CrdtRepository CrdtRepository; protected readonly MockTimeProvider MockTimeProvider = new(); - public DataModelTestBase(bool saveToDisk) : this(saveToDisk + public DataModelTestBase(bool saveToDisk = false, bool alwaysValidate = true) : this(saveToDisk ? new SqliteConnection("Data Source=test.db") - : new SqliteConnection("Data Source=:memory:")) + : new SqliteConnection("Data Source=:memory:"), alwaysValidate) { } @@ -31,10 +32,12 @@ public DataModelTestBase() : this(new SqliteConnection("Data Source=:memory:")) { } - public DataModelTestBase(SqliteConnection connection) + public DataModelTestBase(SqliteConnection connection, bool alwaysValidate = true) { _services = new ServiceCollection() .AddCrdtDataSample(connection) + .AddOptions().Configure(config => config.AlwaysValidateCommits = alwaysValidate) + .Services .Replace(ServiceDescriptor.Singleton(MockTimeProvider)) .BuildServiceProvider(); DbContext = _services.GetRequiredService(); @@ -44,14 +47,16 @@ public DataModelTestBase(SqliteConnection connection) CrdtRepository = _services.GetRequiredService(); } - public DataModelTestBase ForkDatabase() + public DataModelTestBase ForkDatabase(bool alwaysValidate = true) { var connection = new SqliteConnection("Data Source=:memory:"); connection.Open(); var existingConnection = DbContext.Database.GetDbConnection() as SqliteConnection; if (existingConnection is null) throw new InvalidOperationException("Database is not SQLite"); existingConnection.BackupDatabase(connection); - return new DataModelTestBase(connection); + var newTestBase = new DataModelTestBase(connection, alwaysValidate); + newTestBase.SetCurrentDate(currentDate.DateTime); + return newTestBase; } public void SetCurrentDate(DateTime dateTime) diff --git a/src/SIL.Harmony/CrdtConfig.cs b/src/SIL.Harmony/CrdtConfig.cs index 94159b9..849bdcc 100644 --- a/src/SIL.Harmony/CrdtConfig.cs +++ b/src/SIL.Harmony/CrdtConfig.cs @@ -15,6 +15,10 @@ public class CrdtConfig /// it does however increase database size as now objects are stored both in snapshots and in their projected tables /// public bool EnableProjectedTables { get; set; } = true; + /// + /// after adding any commit validate the commit history, not great for performance but good for testing. + /// + public bool AlwaysValidateCommits { get; set; } = true; public ChangeTypeListBuilder ChangeTypeListBuilder { get; } = new(); public ObjectTypeListBuilder ObjectTypeListBuilder { get; } = new(); public JsonSerializerOptions JsonSerializerOptions => _lazyJsonSerializerOptions.Value; diff --git a/src/SIL.Harmony/CrdtKernel.cs b/src/SIL.Harmony/CrdtKernel.cs index 7a4f85c..6818b72 100644 --- a/src/SIL.Harmony/CrdtKernel.cs +++ b/src/SIL.Harmony/CrdtKernel.cs @@ -23,7 +23,8 @@ public static IServiceCollection AddCrdtData(this IServiceCollection s services.AddScoped(provider => new DataModel( provider.GetRequiredService(), provider.GetRequiredService(), - provider.GetRequiredService() + provider.GetRequiredService(), + provider.GetRequiredService>() )); return services; } diff --git a/src/SIL.Harmony/DataModel.cs b/src/SIL.Harmony/DataModel.cs index 48140da..c8976f2 100644 --- a/src/SIL.Harmony/DataModel.cs +++ b/src/SIL.Harmony/DataModel.cs @@ -1,6 +1,7 @@ using System.Text.Json; using SIL.Harmony.Core; using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Options; using SIL.Harmony.Changes; using SIL.Harmony.Db; using SIL.Harmony.Entities; @@ -14,18 +15,20 @@ public class DataModel : ISyncable, IAsyncDisposable /// /// after adding any commit validate the commit history, not great for performance but good for testing. /// - private readonly bool _autoValidate = true; + private bool AlwaysValidate => _crdtConfig.Value.AlwaysValidateCommits; private readonly CrdtRepository _crdtRepository; private readonly JsonSerializerOptions _serializerOptions; private readonly IHybridDateTimeProvider _timeProvider; + private readonly IOptions _crdtConfig; //constructor must be internal because CrdtRepository is internal - internal DataModel(CrdtRepository crdtRepository, JsonSerializerOptions serializerOptions, IHybridDateTimeProvider timeProvider) + internal DataModel(CrdtRepository crdtRepository, JsonSerializerOptions serializerOptions, IHybridDateTimeProvider timeProvider, IOptions crdtConfig) { _crdtRepository = crdtRepository; _serializerOptions = serializerOptions; _timeProvider = timeProvider; + _crdtConfig = crdtConfig; } @@ -88,7 +91,7 @@ private async Task Add(Commit commit, bool deferSnapshotUpdates) //if there are deferred commits, update snapshots with them first if (_deferredCommits is not []) await UpdateSnapshotsByDeferredCommits(); await UpdateSnapshots(commit, [commit]); - if (_autoValidate) await ValidateCommits(); + if (AlwaysValidate) await ValidateCommits(); } else { @@ -109,7 +112,7 @@ private async Task UpdateSnapshotsByDeferredCommits() var oldestChange = commits.MinBy(c => c.CompareKey); if (oldestChange is null) return; await UpdateSnapshots(oldestChange, commits.ToArray()); - if (_autoValidate) await ValidateCommits(); + if (AlwaysValidate) await ValidateCommits(); } @@ -147,8 +150,18 @@ ValueTask ISyncable.ShouldSync() private async Task UpdateSnapshots(Commit oldestAddedCommit, Commit[] newCommits) { await _crdtRepository.DeleteStaleSnapshots(oldestAddedCommit); - var modelSnapshot = await GetProjectSnapshot(true); - var snapshotWorker = new SnapshotWorker(modelSnapshot.Snapshots, _crdtRepository); + Dictionary snapshotLookup; + //this is a performance optimization to avoid loading all the snapshots, this number is somewhat arbitrary + if (newCommits.Length > 10) + { + snapshotLookup = await _crdtRepository.CurrentSnapshots().ToDictionaryAsync(s => s.EntityId, s => (Guid?) s.Id); + } + else + { + snapshotLookup = []; + } + + var snapshotWorker = new SnapshotWorker(snapshotLookup, _crdtRepository); await snapshotWorker.UpdateSnapshots(oldestAddedCommit, newCommits); } @@ -180,7 +193,7 @@ private async Task ValidateCommits() public async Task GetLatestSnapshotByObjectId(Guid entityId) { - return await _crdtRepository.GetCurrentSnapshotByObjectId(entityId); + return await _crdtRepository.GetCurrentSnapshotByObjectId(entityId) ?? throw new ArgumentException($"unable to find snapshot for entity {entityId}"); } public async Task GetLatest(Guid objectId) where T : class, IObjectBase diff --git a/src/SIL.Harmony/Db/CrdtRepository.cs b/src/SIL.Harmony/Db/CrdtRepository.cs index 7e5d895..ebfc20f 100644 --- a/src/SIL.Harmony/Db/CrdtRepository.cs +++ b/src/SIL.Harmony/Db/CrdtRepository.cs @@ -40,6 +40,9 @@ public async Task HasCommit(Guid commitId) public async Task DeleteStaleSnapshots(Commit oldestChange) { //use the oldest commit added to clear any snapshots that are based on a now incomplete history + //this is a performance optimization to avoid deleting snapshots where there are none to delete + var mostRecentCommit = await _dbContext.Snapshots.MaxAsync(s => (DateTimeOffset?)s.Commit.HybridDateTime.DateTime); + if (mostRecentCommit < oldestChange.HybridDateTime.DateTime) return; await _dbContext.Snapshots .WhereAfter(oldestChange) .ExecuteDeleteAsync(); @@ -141,14 +144,22 @@ public async Task GetCommitsAfter(Commit? commit) public async Task FindSnapshot(Guid id) { + //try to find the snapshot in the local cache first + var entry = _dbContext.Snapshots.Local.FindEntry(id); + if (entry is not null) + { + //ensure the commit is loaded, won't load if already loaded + await entry.Reference(s => s.Commit).LoadAsync(); + return entry.Entity; + } return await _dbContext.Snapshots.Include(s => s.Commit).SingleOrDefaultAsync(s => s.Id == id); } - public async Task GetCurrentSnapshotByObjectId(Guid objectId) + public async Task GetCurrentSnapshotByObjectId(Guid objectId) { return await _dbContext.Snapshots.Include(s => s.Commit) .DefaultOrder() - .LastAsync(s => s.EntityId == objectId && (ignoreChangesAfter == null || s.Commit.DateTime <= ignoreChangesAfter)); + .LastOrDefaultAsync(s => s.EntityId == objectId && (ignoreChangesAfter == null || s.Commit.DateTime <= ignoreChangesAfter)); } public async Task GetObjectBySnapshotId(Guid snapshotId) diff --git a/src/SIL.Harmony/SnapshotWorker.cs b/src/SIL.Harmony/SnapshotWorker.cs index 11507e5..7b2a970 100644 --- a/src/SIL.Harmony/SnapshotWorker.cs +++ b/src/SIL.Harmony/SnapshotWorker.cs @@ -12,7 +12,7 @@ namespace SIL.Harmony; /// internal class SnapshotWorker { - private readonly IReadOnlyDictionary? _snapshots; + private readonly Dictionary _snapshotLookup; private readonly CrdtRepository _crdtRepository; private readonly Dictionary _pendingSnapshots = []; private readonly List _newIntermediateSnapshots = []; @@ -21,6 +21,7 @@ private SnapshotWorker(Dictionary snapshots, CrdtRepositor { _pendingSnapshots = snapshots; _crdtRepository = crdtRepository; + _snapshotLookup = []; } internal static async Task> ApplyCommitsToSnapshots(Dictionary snapshots, @@ -33,9 +34,14 @@ internal static async Task> ApplyCommitsToSnaps return snapshots; } - internal SnapshotWorker(IReadOnlyDictionary snapshots, CrdtRepository crdtRepository) + /// + /// + /// + /// a dictionary of entity id to latest snapshot id + /// + internal SnapshotWorker(Dictionary snapshotLookup, CrdtRepository crdtRepository) { - _snapshots = snapshots; + _snapshotLookup = snapshotLookup; _crdtRepository = crdtRepository; } @@ -168,12 +174,16 @@ private async ValueTask MarkDeleted(Guid deletedEntityId, Commit commit) return snapshot; } - if (_snapshots?.TryGetValue(entityId, out var simpleSnapshot) == true) + if (_snapshotLookup.TryGetValue(entityId, out var snapshotId)) { - return await _crdtRepository.FindSnapshot(simpleSnapshot.Id); + if (snapshotId is null) return null; + return await _crdtRepository.FindSnapshot(snapshotId.Value); } - return null; + snapshot = await _crdtRepository.GetCurrentSnapshotByObjectId(entityId); + _snapshotLookup[entityId] = snapshot?.Id; + + return snapshot; } private void AddSnapshot(ObjectSnapshot snapshot) From 812eeeb55a0d3962010bd7804ddf00dcac73dc92 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Thu, 29 Aug 2024 15:45:38 +0700 Subject: [PATCH 07/20] disable task wait warnings in benchmark code --- .../DataModelPerformanceTests.cs | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs index eccfd90..48fb253 100644 --- a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs +++ b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs @@ -60,7 +60,7 @@ private void StopTrace() private static async Task MeasureTime(Func action, int iterations = 10) { - TimeSpan total = TimeSpan.Zero; + var total = TimeSpan.Zero; for (var i = 0; i < iterations; i++) { var start = Stopwatch.GetTimestamp(); @@ -74,7 +74,7 @@ private static async Task MeasureTime(Func action, int iteration public async Task SimpleAddChangePerformanceTest() { //disable validation because it's slow - var dataModelTest = new DataModelTestBase(false, alwaysValidate: false); + var dataModelTest = new DataModelTestBase(alwaysValidate: false); // warmup the code, this causes jit to run and keeps our actual test below consistent await dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity 0")); var runtimeAddChange1Snapshot = await MeasureTime(() => dataModelTest.WriteNextChange(dataModelTest.SetWord(Guid.NewGuid(), "entity 1")).AsTask()); @@ -131,24 +131,21 @@ private class XUnitBenchmarkLogger(ITestOutputHelper output) : ILogger { public string Id => nameof(XUnitBenchmarkLogger); public int Priority => 0; - private StringBuilder? sb; + private StringBuilder? _sb; public void Write(LogKind logKind, string text) { - if (sb == null) - { - sb = new StringBuilder(); - } + _sb ??= new StringBuilder(); - sb.Append(text); + _sb.Append(text); } public void WriteLine() { - if (sb is not null) + if (_sb is not null) { - output.WriteLine(sb.ToString()); - sb.Clear(); + output.WriteLine(_sb.ToString()); + _sb.Clear(); } else output.WriteLine(string.Empty); @@ -156,10 +153,10 @@ public void WriteLine() public void WriteLine(LogKind logKind, string text) { - if (sb is not null) + if (_sb is not null) { - output.WriteLine(sb.Append(text).ToString()); - sb.Clear(); + output.WriteLine(_sb.Append(text).ToString()); + _sb.Clear(); } else output.WriteLine(text); @@ -167,15 +164,17 @@ public void WriteLine(LogKind logKind, string text) public void Flush() { - if (sb is not null) + if (_sb is not null) { - output.WriteLine(sb.ToString()); - sb.Clear(); + output.WriteLine(_sb.ToString()); + _sb.Clear(); } } } } +// disable warning about waiting for sync code, benchmarkdotnet does not support async code, and it doesn't deadlock when waiting. +#pragma warning disable VSTHRD002 [SimpleJob(RunStrategy.Throughput, warmupCount: 2)] public class DataModelPerformanceBenchmarks { @@ -229,4 +228,5 @@ public void GlobalCleanup() { _templateModel.DisposeAsync().GetAwaiter().GetResult(); } -} \ No newline at end of file +} +#pragma warning restore VSTHRD002 \ No newline at end of file From 2690f71c75acd064330defb35da323cb6447cb10 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Thu, 29 Aug 2024 15:54:53 +0700 Subject: [PATCH 08/20] DataModelPerformanceTests doesn't need to inherit from DataModelTestBase --- src/SIL.Harmony.Tests/DataModelPerformanceTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs index 48fb253..b583447 100644 --- a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs +++ b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs @@ -16,7 +16,7 @@ namespace SIL.Harmony.Tests; -public class DataModelPerformanceTests(ITestOutputHelper output) : DataModelTestBase +public class DataModelPerformanceTests(ITestOutputHelper output) { [Fact] public void AddingChangePerformance() From fd59105adeba3645d4687eb074aec4deb34af0ec Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Thu, 29 Aug 2024 16:08:35 +0700 Subject: [PATCH 09/20] re-enable some tests that were disabled because they were slow --- src/SIL.Harmony.Tests/ModelSnapshotTests.cs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/SIL.Harmony.Tests/ModelSnapshotTests.cs b/src/SIL.Harmony.Tests/ModelSnapshotTests.cs index 04bbcf9..e1aca1a 100644 --- a/src/SIL.Harmony.Tests/ModelSnapshotTests.cs +++ b/src/SIL.Harmony.Tests/ModelSnapshotTests.cs @@ -36,9 +36,8 @@ public async Task ModelSnapshotShowsMultipleChanges() [Theory] [InlineData(10)] - // [InlineData(100)] - //not going higher because we run into insert performance issues - // [InlineData(1_000)] + [InlineData(100)] + [InlineData(1_000)] public async Task CanGetSnapshotFromEarlier(int changeCount) { var entityId = Guid.NewGuid(); @@ -65,14 +64,10 @@ public async Task CanGetSnapshotFromEarlier(int changeCount) } } - - /// - /// test isn't super useful as a perf test as 99% of the time is just inserting data - /// - [Fact(Skip = "Slow test")] + [Fact] public async Task WorstCaseSnapshotReApply() { - int changeCount = 10_000; + int changeCount = 1_000; var entityId = Guid.NewGuid(); await WriteNextChange(SetWord(entityId, "first")); //adding all in one AddRange means there's sparse snapshots From 93a14ff319bce9ca0f7502ac17e52f63c5299567 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 2 Sep 2024 09:52:27 +0700 Subject: [PATCH 10/20] remove benchmarkdotnet test adapter, we want to run the benchmarks manually and verify the performance as part of a test --- src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj | 1 - 1 file changed, 1 deletion(-) diff --git a/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj b/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj index eb1fa7c..c5b24ee 100644 --- a/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj +++ b/src/SIL.Harmony.Tests/SIL.Harmony.Tests.csproj @@ -8,7 +8,6 @@ - all From fedce8a8f4946b977e93dad1ff5e41d091389b08 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 2 Sep 2024 14:48:59 +0700 Subject: [PATCH 11/20] add test to ensure changes are round tripped through the db properly --- src/SIL.Harmony.Tests/RepositoryTests.cs | 34 ++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/SIL.Harmony.Tests/RepositoryTests.cs b/src/SIL.Harmony.Tests/RepositoryTests.cs index 310e745..f55d936 100644 --- a/src/SIL.Harmony.Tests/RepositoryTests.cs +++ b/src/SIL.Harmony.Tests/RepositoryTests.cs @@ -5,6 +5,8 @@ using SIL.Harmony.Tests.Mocks; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.DependencyInjection; +using SIL.Harmony.Changes; +using SIL.Harmony.Sample.Changes; namespace SIL.Harmony.Tests; @@ -36,8 +38,23 @@ public async Task DisposeAsync() await _services.DisposeAsync(); } - private Commit Commit(Guid id, HybridDateTime hybridDateTime) => - new(id) { ClientId = Guid.Empty, HybridDateTime = hybridDateTime }; + private Commit Commit(Guid id, HybridDateTime hybridDateTime) + { + var entityId = Guid.NewGuid(); + return new Commit(id) + { + ClientId = Guid.Empty, HybridDateTime = hybridDateTime, ChangeEntities = + [ + new ChangeEntity() + { + Change = new SetWordTextChange(entityId, "test"), + CommitId = id, + EntityId = entityId, + Index = 0 + } + ] + }; + } private ObjectSnapshot Snapshot(Guid entityId, Guid commitId, HybridDateTime time) { @@ -257,4 +274,17 @@ await _repository.AddCommits([ })); changes.MissingFromClient.Select(c => c.DateTime.ToUnixTimeMilliseconds()).Should().ContainSingle("because {0} is only before the last commit", commit2Time.DateTime.ToUnixTimeMilliseconds()); } + + [Fact] + public async Task AddCommit_RoundTripsData() + { + var commit = Commit(Guid.NewGuid(), Time(1, 0)); + await _repository.AddCommit(commit); + + var queriedCommit = _repository.CurrentCommits() + .AsNoTracking()//ensures that the commit which is tracked above is not returned + .Include(c => c.ChangeEntities) + .Should().ContainSingle().Subject; + queriedCommit.Should().NotBeSameAs(commit).And.BeEquivalentTo(commit); + } } From af44a4e6f678ebd6638d56306ab60b1b52754015 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 2 Sep 2024 16:18:13 +0700 Subject: [PATCH 12/20] Fix bug where changes outside the model were saved in snapshots, covered with `ChangesToSnapshotsAreNotSaved`. Fixing that uncovered tests that depend on snapshots being tracked between test phases. Also fixed tests which were over-specifying the expected time when there was a mismatch in time offset that was acceptable. --- .../DataModelSimpleChanges.cs | 16 ++++++++++ src/SIL.Harmony.Tests/ExampleSentenceTests.cs | 5 +-- src/SIL.Harmony.Tests/ModelSnapshotTests.cs | 2 +- src/SIL.Harmony.Tests/RepositoryTests.cs | 25 ++++++++------- src/SIL.Harmony/Db/CrdtRepository.cs | 32 ++++++++----------- 5 files changed, 47 insertions(+), 33 deletions(-) diff --git a/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs b/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs index cc389f1..a7af943 100644 --- a/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs +++ b/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs @@ -192,6 +192,22 @@ public async Task CanModifyAnEntryAfterDelete() word.DeletedAt.Should().Be(deleteCommit.DateTime); } + [Fact] + public async Task ChangesToSnapshotsAreNotSaved() + { + await WriteNextChange(SetWord(_entity1Id, "test-value")); + var word = await DataModel.GetLatest(_entity1Id); + word!.Text.Should().Be("test-value"); + + //change made outside the model, should not be saved when writing the next change + word.Note = "a note"; + + var commit = await WriteNextChange(SetWord(_entity1Id, "after-change")); + var objectSnapshot = commit.Snapshots.Should().ContainSingle().Subject; + objectSnapshot.Entity.Is().Text.Should().Be("after-change"); + objectSnapshot.Entity.Is().Note.Should().BeNull(); + } + // [Fact] // public async Task CanGetEntryLinq2Db() diff --git a/src/SIL.Harmony.Tests/ExampleSentenceTests.cs b/src/SIL.Harmony.Tests/ExampleSentenceTests.cs index 259c6c9..fa6128c 100644 --- a/src/SIL.Harmony.Tests/ExampleSentenceTests.cs +++ b/src/SIL.Harmony.Tests/ExampleSentenceTests.cs @@ -65,7 +65,8 @@ public async Task CanEditExampleText() example.Should().NotBeNull(); await WriteNextChange(EditExampleChange.EditExample(example!, text => text.Insert(3, "What's up "))); - example = await DataModel.GetLatest(exampleId); - example!.YText.ToString().Should().Be("Yo What's up Bob"); + var actualExample = await DataModel.GetLatest(exampleId); + actualExample.Should().NotBeSameAs(example); + actualExample!.YText.ToString().Should().Be("Yo What's up Bob"); } } \ No newline at end of file diff --git a/src/SIL.Harmony.Tests/ModelSnapshotTests.cs b/src/SIL.Harmony.Tests/ModelSnapshotTests.cs index e1aca1a..e3fa08e 100644 --- a/src/SIL.Harmony.Tests/ModelSnapshotTests.cs +++ b/src/SIL.Harmony.Tests/ModelSnapshotTests.cs @@ -81,7 +81,7 @@ await AddCommitsViaSync(Enumerable.Range(0, changeCount) var computedModelSnapshots = await DataModel.GetSnapshotsAt(latestSnapshot.Commit.DateTime); var entitySnapshot = computedModelSnapshots.Should().ContainSingle().Subject.Value; - entitySnapshot.Should().BeEquivalentTo(latestSnapshot, options => options.Excluding(snapshot => snapshot.Id)); + entitySnapshot.Should().BeEquivalentTo(latestSnapshot, options => options.Excluding(snapshot => snapshot.Id).Excluding(snapshot => snapshot.Commit)); var latestSnapshotEntry = latestSnapshot.Entity.Is(); var entitySnapshotEntry = entitySnapshot.Entity.Is(); entitySnapshotEntry.Text.Should().Be(latestSnapshotEntry.Text); diff --git a/src/SIL.Harmony.Tests/RepositoryTests.cs b/src/SIL.Harmony.Tests/RepositoryTests.cs index f55d936..fcfc972 100644 --- a/src/SIL.Harmony.Tests/RepositoryTests.cs +++ b/src/SIL.Harmony.Tests/RepositoryTests.cs @@ -157,12 +157,13 @@ public async Task CurrentSnapshots_Works() public async Task CurrentSnapshots_GroupsByEntityIdSortedByTime() { var entityId = Guid.NewGuid(); + var expectedTime = Time(2, 0); await _repository.AddSnapshots([ Snapshot(entityId, Guid.NewGuid(), Time(1, 0)), - Snapshot(entityId, Guid.NewGuid(), Time(2, 0)), + Snapshot(entityId, Guid.NewGuid(), expectedTime), ]); - var snapshots = await _repository.CurrentSnapshots().ToArrayAsync(); - snapshots.Should().ContainSingle().Which.Commit.HybridDateTime.DateTime.Hour.Should().Be(2); + var snapshots = await _repository.CurrentSnapshots().Include(s => s.Commit).ToArrayAsync(); + snapshots.Should().ContainSingle().Which.Commit.HybridDateTime.Should().BeEquivalentTo(expectedTime); } [Fact] @@ -173,7 +174,7 @@ await _repository.AddSnapshots([ Snapshot(entityId, Guid.NewGuid(), Time(1, 0)), Snapshot(entityId, Guid.NewGuid(), Time(1, 1)), ]); - var snapshots = await _repository.CurrentSnapshots().ToArrayAsync(); + var snapshots = await _repository.CurrentSnapshots().Include(s => s.Commit).ToArrayAsync(); snapshots.Should().ContainSingle().Which.Commit.HybridDateTime.Counter.Should().Be(1); } @@ -187,7 +188,7 @@ await _repository.AddSnapshots([ Snapshot(entityId, ids[0], time), Snapshot(entityId, ids[1], time), ]); - var snapshots = await _repository.CurrentSnapshots().ToArrayAsync(); + var snapshots = await _repository.CurrentSnapshots().Include(s => s.Commit).ToArrayAsync(); snapshots.Should().ContainSingle().Which.Commit.Id.Should().Be(ids[1]); } @@ -195,17 +196,19 @@ await _repository.AddSnapshots([ public async Task CurrentSnapshots_FiltersByDate() { var entityId = Guid.NewGuid(); + var commit1Time = Time(1, 0); + var commit2Time = Time(3, 0); await _repository.AddSnapshots([ - Snapshot(entityId, Guid.NewGuid(), Time(1, 0)), - Snapshot(entityId, Guid.NewGuid(), Time(3, 0)), + Snapshot(entityId, Guid.NewGuid(), commit1Time), + Snapshot(entityId, Guid.NewGuid(), commit2Time), ]); - var snapshots = await _repository.CurrentSnapshots().ToArrayAsync(); - snapshots.Should().ContainSingle().Which.Commit.HybridDateTime.DateTime.Hour.Should().Be(3); + var snapshots = await _repository.CurrentSnapshots().Include(s => s.Commit).ToArrayAsync(); + snapshots.Should().ContainSingle().Which.Commit.HybridDateTime.Should().BeEquivalentTo(commit2Time); var newCurrentTime = Time(2, 0).DateTime; - snapshots = await _repository.GetScopedRepository(newCurrentTime).CurrentSnapshots().ToArrayAsync(); - snapshots.Should().ContainSingle().Which.Commit.HybridDateTime.DateTime.Hour.Should().Be(1); + snapshots = await _repository.GetScopedRepository(newCurrentTime).CurrentSnapshots().Include(s => s.Commit).ToArrayAsync(); + snapshots.Should().ContainSingle().Which.Commit.HybridDateTime.Should().BeEquivalentTo(commit1Time); } [Fact] diff --git a/src/SIL.Harmony/Db/CrdtRepository.cs b/src/SIL.Harmony/Db/CrdtRepository.cs index ebfc20f..f0ba970 100644 --- a/src/SIL.Harmony/Db/CrdtRepository.cs +++ b/src/SIL.Harmony/Db/CrdtRepository.cs @@ -11,6 +11,7 @@ namespace SIL.Harmony.Db; internal class CrdtRepository(ICrdtDbContext _dbContext, IOptions crdtConfig, DateTimeOffset? ignoreChangesAfter = null) { + private IQueryable Snapshots => _dbContext.Snapshots.AsNoTracking(); public Task BeginTransactionAsync() { return _dbContext.Database.BeginTransactionAsync(); @@ -41,9 +42,9 @@ public async Task DeleteStaleSnapshots(Commit oldestChange) { //use the oldest commit added to clear any snapshots that are based on a now incomplete history //this is a performance optimization to avoid deleting snapshots where there are none to delete - var mostRecentCommit = await _dbContext.Snapshots.MaxAsync(s => (DateTimeOffset?)s.Commit.HybridDateTime.DateTime); + var mostRecentCommit = await Snapshots.MaxAsync(s => (DateTimeOffset?)s.Commit.HybridDateTime.DateTime); if (mostRecentCommit < oldestChange.HybridDateTime.DateTime) return; - await _dbContext.Snapshots + await Snapshots .WhereAfter(oldestChange) .ExecuteDeleteAsync(); } @@ -72,7 +73,7 @@ PARTITION BY "s1"."EntityId" FROM "Snapshots" AS "s" INNER JOIN LatestSnapshots AS "ls" ON "s"."Id" = "ls"."LatestSnapshotId" GROUP BY s.EntityId -"""); +""").AsNoTracking(); } public IAsyncEnumerable CurrenSimpleSnapshots(bool includeDeleted = false) @@ -95,7 +96,7 @@ public IAsyncEnumerable CurrenSimpleSnapshots(bool includeDelete private IQueryable CurrentSnapshotIds() { - return _dbContext.Snapshots.GroupBy(s => s.EntityId, + return Snapshots.GroupBy(s => s.EntityId, (entityId, snapshots) => snapshots //unfortunately this can not be extracted into a helper because the whole thing is part of an expression .OrderByDescending(c => c.Commit.HybridDateTime.DateTime) @@ -143,28 +144,20 @@ public async Task GetCommitsAfter(Commit? commit) } public async Task FindSnapshot(Guid id) - { - //try to find the snapshot in the local cache first - var entry = _dbContext.Snapshots.Local.FindEntry(id); - if (entry is not null) - { - //ensure the commit is loaded, won't load if already loaded - await entry.Reference(s => s.Commit).LoadAsync(); - return entry.Entity; - } - return await _dbContext.Snapshots.Include(s => s.Commit).SingleOrDefaultAsync(s => s.Id == id); + { + return await Snapshots.Include(s => s.Commit).SingleOrDefaultAsync(s => s.Id == id); } public async Task GetCurrentSnapshotByObjectId(Guid objectId) { - return await _dbContext.Snapshots.Include(s => s.Commit) + return await Snapshots.Include(s => s.Commit) .DefaultOrder() .LastOrDefaultAsync(s => s.EntityId == objectId && (ignoreChangesAfter == null || s.Commit.DateTime <= ignoreChangesAfter)); } public async Task GetObjectBySnapshotId(Guid snapshotId) { - var entity = await _dbContext.Snapshots + var entity = await Snapshots .Where(s => s.Id == snapshotId) .Select(s => s.Entity) .SingleOrDefaultAsync() @@ -174,7 +167,7 @@ public async Task GetObjectBySnapshotId(Guid snapshotId) public async Task GetCurrent(Guid objectId) where T: class, IObjectBase { - var snapshot = await _dbContext.Snapshots + var snapshot = await Snapshots .DefaultOrder() .LastOrDefaultAsync(s => s.EntityId == objectId && (ignoreChangesAfter == null || s.Commit.DateTime <= ignoreChangesAfter)); return snapshot?.Entity.Is(); @@ -220,8 +213,9 @@ public async ValueTask AddIfNew(IEnumerable snapshots) { foreach (var snapshot in snapshots) { - if (_dbContext.Snapshots.Local.Contains(snapshot)) continue; - _dbContext.Snapshots.Add(snapshot); + + if (_dbContext.Snapshots.Local.FindEntry(snapshot.Id) is not null) continue; + _dbContext.Add(snapshot); await SnapshotAdded(snapshot); } From de6236b9ea322f6f961e99b7555c087e51986aca Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 2 Sep 2024 16:28:49 +0700 Subject: [PATCH 13/20] add HybridDateTimeTests.cs --- .../Core/HybridDateTimeTests.cs | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 src/SIL.Harmony.Tests/Core/HybridDateTimeTests.cs diff --git a/src/SIL.Harmony.Tests/Core/HybridDateTimeTests.cs b/src/SIL.Harmony.Tests/Core/HybridDateTimeTests.cs new file mode 100644 index 0000000..9c5a16a --- /dev/null +++ b/src/SIL.Harmony.Tests/Core/HybridDateTimeTests.cs @@ -0,0 +1,77 @@ +using SIL.Harmony.Core; + +namespace SIL.Harmony.Tests.Core; + +public class HybridDateTimeTests +{ + [Fact] + public void Equals_TrueWhenTheSame() + { + var dateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 1, 0, 0, 0, TimeSpan.Zero), 0); + var otherDateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 1, 0, 0, 0, TimeSpan.Zero), 0); + + (dateTime == otherDateTime).Should().BeTrue(); + } + + [Fact] + public void Equals_FalseWhenDifferentDateTime() + { + var dateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 1, 0, 0, 0, TimeSpan.Zero), 0); + var otherDateTime = new HybridDateTime(new DateTimeOffset(2001, 1, 1, 0, 0, 0, TimeSpan.Zero), 0); + + (dateTime != otherDateTime).Should().BeTrue(); + } + + [Fact] + public void Equals_FalseWhenDifferentCounter() + { + var dateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 1, 0, 0, 0, TimeSpan.Zero), 0); + var otherDateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 1, 0, 0, 0, TimeSpan.Zero), 1); + + dateTime.Should().NotBe(otherDateTime); + } + + [Fact] + public void Constructor_ThrowsArgumentOutOfRangeExceptionWhenCounterIsNegative() + { + Action action = () => new HybridDateTime(new DateTimeOffset(2000, 1, 1, 0, 0, 0, TimeSpan.Zero), -1); + action.Should().Throw(); + } + + [Fact] + public void CompareTo_ReturnsOneWhenOtherIsNull() + { + var dateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 1, 0, 0, 0, TimeSpan.Zero), 0); + dateTime.CompareTo(null).Should().Be(1); + } + + [Fact] + public void CompareTo_ReturnsNegativeOneWhenThisIsLessThanOther() + { + var dateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 1, 0, 0, 0, TimeSpan.Zero), 0); + var otherDateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 2, 0, 0, 0, TimeSpan.Zero), 0); + + var result = dateTime.CompareTo(otherDateTime); + result.Should().BeLessThan(0); + } + + [Fact] + public void CompareTo_ReturnsZeroWhenThisIsEqualToOther() + { + var dateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 1, 0, 0, 0, TimeSpan.Zero), 0); + var otherDateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 1, 0, 0, 0, TimeSpan.Zero), 0); + + var result = dateTime.CompareTo(otherDateTime); + result.Should().Be(0); + } + + [Fact] + public void CompareTo_ReturnsOneWhenThisIsGreaterThanOther() + { + var dateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 2, 0, 0, 0, TimeSpan.Zero), 0); + var otherDateTime = new HybridDateTime(new DateTimeOffset(2000, 1, 1, 0, 0, 0, TimeSpan.Zero), 0); + + var result = dateTime.CompareTo(otherDateTime); + result.Should().Be(1); + } +} \ No newline at end of file From 819cf2a0b2c1a63ad7fbb9ebc019ff24bb2a70d7 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Wed, 4 Sep 2024 11:00:50 +0700 Subject: [PATCH 14/20] Define interface to describe Changes return value, should help APIs implement the matching return type without restricting the collection type. Split `GetChanges` helper to `GetMissingCommits` which returns an IAsyncEnumerable, this allows the server to use less memory when responding to requests with a large number of commits --- src/SIL.Harmony.Core/QueryHelpers.cs | 43 +++++++++++++++++++--------- src/SIL.Harmony.Core/SyncState.cs | 9 ++++-- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/src/SIL.Harmony.Core/QueryHelpers.cs b/src/SIL.Harmony.Core/QueryHelpers.cs index 14f65b0..6f4d565 100644 --- a/src/SIL.Harmony.Core/QueryHelpers.cs +++ b/src/SIL.Harmony.Core/QueryHelpers.cs @@ -13,34 +13,49 @@ public static async Task GetSyncState(this IQueryable com return new SyncState(dict); } - public static async Task> GetChanges(this IQueryable commits, SyncState remoteState) where TCommit : CommitBase + public static async Task> GetChanges(this IQueryable commits, + SyncState remoteState) where TCommit : CommitBase { - var newHistory = new List(); - var localSyncState = await commits.GetSyncState(); - foreach (var (clientId, localTimestamp) in localSyncState.ClientHeads) + var localState = await commits.AsNoTracking().GetSyncState(); + return new ChangesResult( + await GetMissingCommits(commits, localState, remoteState).ToArrayAsync(), + localState); + } + + public static async IAsyncEnumerable GetMissingCommits( + this IQueryable commits, + SyncState localState, + SyncState remoteState) where TCommit : CommitBase + { + commits = commits.AsNoTracking(); + foreach (var (clientId, localTimestamp) in localState.ClientHeads) { //client is new to the other history if (!remoteState.ClientHeads.TryGetValue(clientId, out var otherTimestamp)) { //todo slow, it would be better if we could query on client id and get latest changes per client - newHistory.AddRange(await commits.Include(c => c.ChangeEntities).DefaultOrder() - .Where(c => c.ClientId == clientId) - .ToArrayAsync()); + await foreach (var commit in commits.Include(c => c.ChangeEntities).DefaultOrder() + .Where(c => c.ClientId == clientId) + .AsAsyncEnumerable()) + { + yield return commit; + } } //client has newer history than the other history else if (localTimestamp > otherTimestamp) { var otherDt = DateTimeOffset.FromUnixTimeMilliseconds(otherTimestamp); //todo even slower because we need to filter out changes that are already in the other history - newHistory.AddRange((await commits.Include(c => c.ChangeEntities).DefaultOrder() - .Where(c => c.ClientId == clientId && c.HybridDateTime.DateTime > otherDt) - .ToArrayAsync()) - //fixes an issue where the query would include commits that are already in the other history - .Where(c => c.DateTime.ToUnixTimeMilliseconds() > otherTimestamp)); + await foreach (var commit in commits.Include(c => c.ChangeEntities) + .DefaultOrder() + .Where(c => c.ClientId == clientId && c.HybridDateTime.DateTime > otherDt) + .AsAsyncEnumerable()) + { + if (commit.DateTime.ToUnixTimeMilliseconds() > otherTimestamp) + yield return commit; + } } } - - return new(newHistory.ToArray(), localSyncState); } public static IQueryable DefaultOrder(this IQueryable queryable) where T: CommitBase diff --git a/src/SIL.Harmony.Core/SyncState.cs b/src/SIL.Harmony.Core/SyncState.cs index b8ac0d4..57e3e07 100644 --- a/src/SIL.Harmony.Core/SyncState.cs +++ b/src/SIL.Harmony.Core/SyncState.cs @@ -1,8 +1,13 @@ namespace SIL.Harmony.Core; public record SyncState(Dictionary ClientHeads); - -public record ChangesResult(TCommit[] MissingFromClient, SyncState ServerSyncState) where TCommit : CommitBase +public interface IChangesResult +{ + IEnumerable MissingFromClient { get; } + SyncState ServerSyncState { get; } +} +public record ChangesResult(TCommit[] MissingFromClient, SyncState ServerSyncState): IChangesResult where TCommit : CommitBase { + IEnumerable IChangesResult.MissingFromClient => MissingFromClient; public static ChangesResult Empty => new([], new SyncState([])); } From 5508102db01a3e9b28f26fc024c86f5667212de0 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Wed, 4 Sep 2024 12:58:27 +0700 Subject: [PATCH 15/20] add a performance category to our performance tests --- src/SIL.Harmony.Tests/DataModelPerformanceTests.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs index b583447..65a4bd6 100644 --- a/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs +++ b/src/SIL.Harmony.Tests/DataModelPerformanceTests.cs @@ -16,6 +16,7 @@ namespace SIL.Harmony.Tests; +[Trait("Category", "Performance")] public class DataModelPerformanceTests(ITestOutputHelper output) { [Fact] From 59a1700b8060f0ebd2a54057155d53a5da0f8a69 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Thu, 12 Sep 2024 10:51:58 -1000 Subject: [PATCH 16/20] introduce test to ensure that changes can still be written after an application restart --- src/SIL.Harmony.Tests/DataModelSimpleChanges.cs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs b/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs index a7af943..f909da6 100644 --- a/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs +++ b/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs @@ -33,6 +33,17 @@ public async Task CanUpdateTheNoteField() word.Note.Should().Be("a word note"); } + [Fact] + public async Task CanUpdateAWordAfterRestarting() + { + await WriteNextChange(SetWord(_entity1Id, "test-value")); + var instance2 = ForkDatabase();//creates new services, but copies database. Simulates restarting the application + await instance2.WriteNextChange(new SetWordNoteChange(_entity1Id, "a word note")); + var word = await instance2.DataModel.GetLatest(_entity1Id); + word!.Text.Should().Be("test-value"); + word.Note.Should().Be("a word note"); + } + [Fact] public async Task WritingA2ndChangeDoesNotEffectTheFirstSnapshot() { From 0063f050e8ef3d79f64c81d89f36c8a5dceff489 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Fri, 20 Sep 2024 12:05:24 +0700 Subject: [PATCH 17/20] fix bug where you couldn't submit updates to an existing entity after creating a new data model instance. --- src/SIL.Harmony/Db/CrdtRepository.cs | 11 +++++++---- src/SIL.Harmony/Db/DbSetExtensions.cs | 7 +++++++ src/SIL.Harmony/SnapshotWorker.cs | 4 ++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/SIL.Harmony/Db/CrdtRepository.cs b/src/SIL.Harmony/Db/CrdtRepository.cs index f0ba970..0636008 100644 --- a/src/SIL.Harmony/Db/CrdtRepository.cs +++ b/src/SIL.Harmony/Db/CrdtRepository.cs @@ -143,14 +143,17 @@ public async Task GetCommitsAfter(Commit? commit) .ToArrayAsync(); } - public async Task FindSnapshot(Guid id) + public async Task FindSnapshot(Guid id, bool tracking = false) { - return await Snapshots.Include(s => s.Commit).SingleOrDefaultAsync(s => s.Id == id); + return await Snapshots + .AsTracking(tracking) + .Include(s => s.Commit) + .SingleOrDefaultAsync(s => s.Id == id); } - public async Task GetCurrentSnapshotByObjectId(Guid objectId) + public async Task GetCurrentSnapshotByObjectId(Guid objectId, bool tracking = false) { - return await Snapshots.Include(s => s.Commit) + return await Snapshots.AsTracking(tracking).Include(s => s.Commit) .DefaultOrder() .LastOrDefaultAsync(s => s.EntityId == objectId && (ignoreChangesAfter == null || s.Commit.DateTime <= ignoreChangesAfter)); } diff --git a/src/SIL.Harmony/Db/DbSetExtensions.cs b/src/SIL.Harmony/Db/DbSetExtensions.cs index 73e2e4b..ece85de 100644 --- a/src/SIL.Harmony/Db/DbSetExtensions.cs +++ b/src/SIL.Harmony/Db/DbSetExtensions.cs @@ -1,3 +1,5 @@ +using Microsoft.EntityFrameworkCore; + namespace SIL.Harmony.Db; //todo, I would like to move these extensions into QueryHelperTests but that's in Core and ObjectSnapshot is not part of core @@ -26,4 +28,9 @@ public static IQueryable WhereAfter(this IQueryable AsTracking(this IQueryable queryable, bool tracking = true) where T : class + { + return queryable.AsTracking(tracking ? QueryTrackingBehavior.TrackAll : QueryTrackingBehavior.NoTracking); + } } \ No newline at end of file diff --git a/src/SIL.Harmony/SnapshotWorker.cs b/src/SIL.Harmony/SnapshotWorker.cs index 7b2a970..60ab1dd 100644 --- a/src/SIL.Harmony/SnapshotWorker.cs +++ b/src/SIL.Harmony/SnapshotWorker.cs @@ -177,10 +177,10 @@ private async ValueTask MarkDeleted(Guid deletedEntityId, Commit commit) if (_snapshotLookup.TryGetValue(entityId, out var snapshotId)) { if (snapshotId is null) return null; - return await _crdtRepository.FindSnapshot(snapshotId.Value); + return await _crdtRepository.FindSnapshot(snapshotId.Value, true); } - snapshot = await _crdtRepository.GetCurrentSnapshotByObjectId(entityId); + snapshot = await _crdtRepository.GetCurrentSnapshotByObjectId(entityId, true); _snapshotLookup[entityId] = snapshot?.Id; return snapshot; From 74e5b2eba2d0e87b43ea00a1cad7a7db8037f0df Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 7 Oct 2024 13:21:37 +0700 Subject: [PATCH 18/20] add node check to prove that it's null before --- src/SIL.Harmony.Tests/DataModelSimpleChanges.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs b/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs index f909da6..661b8fd 100644 --- a/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs +++ b/src/SIL.Harmony.Tests/DataModelSimpleChanges.cs @@ -209,6 +209,7 @@ public async Task ChangesToSnapshotsAreNotSaved() await WriteNextChange(SetWord(_entity1Id, "test-value")); var word = await DataModel.GetLatest(_entity1Id); word!.Text.Should().Be("test-value"); + word.Note.Should().BeNull(); //change made outside the model, should not be saved when writing the next change word.Note = "a note"; From 39204dbc4a2d9a2117756b74416b6ab0740d5000 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Mon, 7 Oct 2024 13:24:32 +0700 Subject: [PATCH 19/20] make it more obvious that the ignore data params are the same in the query --- src/SIL.Harmony/Db/CrdtRepository.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SIL.Harmony/Db/CrdtRepository.cs b/src/SIL.Harmony/Db/CrdtRepository.cs index 0636008..8358e72 100644 --- a/src/SIL.Harmony/Db/CrdtRepository.cs +++ b/src/SIL.Harmony/Db/CrdtRepository.cs @@ -58,7 +58,7 @@ public IQueryable CurrentCommits() public IQueryable CurrentSnapshots() { - //todo this does not respect ignoreChangesAfter + var ignoreDate = ignoreChangesAfter?.UtcDateTime; return _dbContext.Snapshots.FromSql( $""" WITH LatestSnapshots AS (SELECT first_value(s1.Id) @@ -68,7 +68,7 @@ PARTITION BY "s1"."EntityId" ) AS "LatestSnapshotId" FROM "Snapshots" AS "s1" INNER JOIN "Commits" AS "c" ON "s1"."CommitId" = "c"."Id" - WHERE "c"."DateTime" < {ignoreChangesAfter?.UtcDateTime} OR {ignoreChangesAfter} IS NULL) + WHERE "c"."DateTime" < {ignoreDate} OR {ignoreDate} IS NULL) SELECT * FROM "Snapshots" AS "s" INNER JOIN LatestSnapshots AS "ls" ON "s"."Id" = "ls"."LatestSnapshotId" From a76721994412d03f478356ce95de17814dfbb082 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Tue, 8 Oct 2024 00:10:39 +0700 Subject: [PATCH 20/20] instead of looking up all snapshot ids when there's more than 10 commits, lookup just the ids for entities changed --- src/SIL.Harmony/DataModel.cs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/SIL.Harmony/DataModel.cs b/src/SIL.Harmony/DataModel.cs index c8976f2..89be491 100644 --- a/src/SIL.Harmony/DataModel.cs +++ b/src/SIL.Harmony/DataModel.cs @@ -151,10 +151,13 @@ private async Task UpdateSnapshots(Commit oldestAddedCommit, Commit[] newCommits { await _crdtRepository.DeleteStaleSnapshots(oldestAddedCommit); Dictionary snapshotLookup; - //this is a performance optimization to avoid loading all the snapshots, this number is somewhat arbitrary if (newCommits.Length > 10) { - snapshotLookup = await _crdtRepository.CurrentSnapshots().ToDictionaryAsync(s => s.EntityId, s => (Guid?) s.Id); + var entityIds = newCommits.SelectMany(c => c.ChangeEntities.Select(ce => ce.EntityId)); + snapshotLookup = await _crdtRepository.CurrentSnapshots() + .Where(s => entityIds.Contains(s.EntityId)) + .Select(s => new KeyValuePair(s.EntityId, s.Id)) + .ToDictionaryAsync(s => s.Key, s => s.Value); } else {