Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/build_summary_gprc' into build_s…
Browse files Browse the repository at this point in the history
…ummary_gprc
  • Loading branch information
mudiagaobrikisil committed Dec 2, 2024
2 parents 3dcedf4 + 2d79791 commit 9e18ec5
Show file tree
Hide file tree
Showing 46 changed files with 1,072 additions and 692 deletions.
10 changes: 10 additions & 0 deletions Serval.sln
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{C3A14577-A65
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SIL.ServiceToolkit", "src\ServiceToolkit\src\SIL.ServiceToolkit\SIL.ServiceToolkit.csproj", "{0E40F959-C641-40A2-9750-B17A4F9F9E55}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{1DB5E6D1-17A8-4FF2-B90A-C5DFBEF63126}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SIL.ServiceToolkit.Tests", "src\ServiceToolkit\test\SIL.ServiceToolkit.Tests\SIL.ServiceToolkit.Tests.csproj", "{C50ED15A-876D-42BF-980A-388E8C49C78D}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -180,6 +184,10 @@ Global
{0E40F959-C641-40A2-9750-B17A4F9F9E55}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0E40F959-C641-40A2-9750-B17A4F9F9E55}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0E40F959-C641-40A2-9750-B17A4F9F9E55}.Release|Any CPU.Build.0 = Release|Any CPU
{C50ED15A-876D-42BF-980A-388E8C49C78D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C50ED15A-876D-42BF-980A-388E8C49C78D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C50ED15A-876D-42BF-980A-388E8C49C78D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C50ED15A-876D-42BF-980A-388E8C49C78D}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -215,6 +223,8 @@ Global
{10657805-48F1-4205-B8F5-79447F6EF620} = {25CDB05B-4E24-4A6E-933E-1E0BEC97D74D}
{C3A14577-A654-4604-818C-4E683DD45A51} = {EA69B41C-49EF-4017-A687-44B9DF37FF98}
{0E40F959-C641-40A2-9750-B17A4F9F9E55} = {C3A14577-A654-4604-818C-4E683DD45A51}
{1DB5E6D1-17A8-4FF2-B90A-C5DFBEF63126} = {EA69B41C-49EF-4017-A687-44B9DF37FF98}
{C50ED15A-876D-42BF-980A-388E8C49C78D} = {1DB5E6D1-17A8-4FF2-B90A-C5DFBEF63126}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {9F18C25E-E140-43C3-B177-D562E1628370}
Expand Down
6 changes: 3 additions & 3 deletions deploy/qa-ext-values.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
externalHost: qa.serval-api.org
environment: Production
deploymentVersion: '1.7.QA7'
deploymentVersion: '1.8.QA1'
alertEmail: [email protected]
emailsToAlert: [email protected]
enableTls: true
namespace: serval
auth0Domain: dev-sillsdev.auth0.com
lokiTenent: serval-tenant
lokiUrl: http://loki-distributed-gateway.loki.svc.cluster.local
servalImage: ghcr.io/sillsdev/serval:1.7.7
ClearMLDockerImage: ghcr.io/sillsdev/machine.py:1.7.2
servalImage: ghcr.io/sillsdev/serval:1.8.1
ClearMLDockerImage: ghcr.io/sillsdev/machine.py:1.8.1
ClearMLQueue: production
MongoConnectionPrefix: qa_
SharedFileLocation: s3://silnlp/ext-qa/
Expand Down
2 changes: 1 addition & 1 deletion samples/ApiExample/ApiExample.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<PackageReference Include="Microsoft.Extensions.Caching.Memory" Version="8.0.1" />
<PackageReference Include="Microsoft.Extensions.Configuration.UserSecrets" Version="8.0.1" />
<PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="8.0.1" />
<PackageReference Include="Serval.Client" Version="1.7.3" />
<PackageReference Include="Serval.Client" Version="1.8.0" />
</ItemGroup>

</Project>
2 changes: 2 additions & 0 deletions src/Echo/src/EchoTranslationEngine/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
builder.Services.AddHostedService<BackgroundTaskService>();
builder.Services.AddSingleton<BackgroundTaskQueue>();

builder.Services.AddParallelCorpusPreprocessor();

builder.Services.AddHealthChecks().AddCheck("Live", () => HealthCheckResult.Healthy());

builder.Services.Configure<Bugsnag.Configuration>(builder.Configuration.GetSection("Bugsnag"));
Expand Down
250 changes: 103 additions & 147 deletions src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
namespace EchoTranslationEngine;

public class TranslationEngineServiceV1(BackgroundTaskQueue taskQueue) : TranslationEngineApi.TranslationEngineApiBase
public class TranslationEngineServiceV1(
BackgroundTaskQueue taskQueue,
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
) : TranslationEngineApi.TranslationEngineApiBase
{
private static readonly Empty Empty = new();
private readonly BackgroundTaskQueue _taskQueue = taskQueue;

private readonly IParallelCorpusPreprocessingService _parallelCorpusPreprocessingService =
parallelCorpusPreprocessingService;

public override Task<CreateResponse> Create(CreateRequest request, ServerCallContext context)
{
if (request.SourceLanguage != request.TargetLanguage)
Expand Down Expand Up @@ -75,159 +81,35 @@ await client.BuildStartedAsync(

try
{
List<InsertPretranslationsRequest> pretranslationsRequests = [];
await _parallelCorpusPreprocessingService.Preprocess(
request.Corpora.Select(Map).ToList(),
row => Task.CompletedTask,
(row, corpus) =>
{
pretranslationsRequests.Add(
new InsertPretranslationsRequest
{
EngineId = request.EngineId,
CorpusId = corpus.Id,
TextId = row.TextId,
Refs = { row.Refs.Select(r => r.ToString()) },
Translation = row.SourceSegment
}
);
return Task.CompletedTask;
},
false
);
using (
AsyncClientStreamingCall<InsertPretranslationsRequest, Empty> call =
client.InsertPretranslations(cancellationToken: cancellationToken)
)
{
foreach (ParallelCorpus corpus in request.Corpora)
foreach (InsertPretranslationsRequest request in pretranslationsRequests)
{
var sourceFiles = corpus
.SourceCorpora.SelectMany(sc =>
sc.Files.Where(f =>
(
sc.PretranslateAll
|| sc.PretranslateTextIds is null
|| sc.PretranslateTextIds.Contains(f.TextId)
)
&& f.Format == FileFormat.Text
)
)
.ToDictionary(f => f.TextId, f => f.Location);
var targetFiles = corpus
.TargetCorpora.SelectMany(tc =>
tc.Files.Where(f =>
(
tc.PretranslateAll
|| tc.PretranslateTextIds is null
|| tc.PretranslateTextIds.Contains(f.TextId)
)
&& f.Format == FileFormat.Text
)
)
.ToDictionary(f => f.TextId, f => f.Location);

foreach (KeyValuePair<string, string> sourceFile in sourceFiles)
{
string[] sourceLines = await File.ReadAllLinesAsync(
sourceFile.Value,
cancellationToken
);

if (targetFiles.TryGetValue(sourceFile.Key, out string? targetPath))
{
string[] targetLines = await File.ReadAllLinesAsync(targetPath, cancellationToken);
bool isTabSeparated = (sourceLines.Length > 0) && sourceLines[0].Contains('/');
if (!isTabSeparated)
{
int lineNum = 1;
foreach (
(string sourceLine, string targetLine) in sourceLines
.Select(l => l.Trim())
.Zip(targetLines.Select(l => l.Trim()))
)
{
if (sourceLine.Length > 0 && targetLine.Length == 0)
{
await call.RequestStream.WriteAsync(
new InsertPretranslationsRequest
{
EngineId = request.EngineId,
CorpusId = corpus.Id,
TextId = sourceFile.Key,
Refs = { $"{sourceFile.Key}:{lineNum}" },
Translation = sourceLine
},
cancellationToken
);
}
lineNum++;
}
}
else
{
var sourceLinesDict = sourceLines.ToDictionary(
l => l.Split('\t')[0].Trim(),
l => l.Split('\t')[1].Trim()
);
var targetLinesDict = targetLines.ToDictionary(
l => l.Split('\t')[0].Trim(),
l => l.Contains('\t') ? l.Split('\t')[1].Trim() : string.Empty
);
foreach (KeyValuePair<string, string> targetLineKVPair in targetLinesDict)
{
string? sourceLine = null;
sourceLinesDict.TryGetValue(targetLineKVPair.Key, out sourceLine);
sourceLine ??= string.Empty;
string? targetLine = targetLineKVPair.Value;
if (sourceLine.Length > 0 && targetLine.Length == 0)
{
await call.RequestStream.WriteAsync(
new InsertPretranslationsRequest
{
EngineId = request.EngineId,
CorpusId = corpus.Id,
TextId = sourceFile.Key,
Refs = { $"{sourceFile.Key}:{targetLineKVPair.Key}" },
Translation = sourceLine
},
cancellationToken
);
}
}
}
}
else
{
bool isTabSeparated = (sourceLines.Length > 0) && sourceLines[0].Contains('/');
if (!isTabSeparated)
{
int lineNum = 1;
foreach (string sourceLine in sourceLines.Select(l => l.Trim()))
{
if (sourceLine.Length > 0)
{
await call.RequestStream.WriteAsync(
new InsertPretranslationsRequest
{
EngineId = request.EngineId,
CorpusId = corpus.Id,
TextId = sourceFile.Key,
Refs = { $"{sourceFile.Key}:{lineNum}" },
Translation = sourceLine
},
cancellationToken
);
}
lineNum++;
}
}
else
{
foreach (string sourceLine in sourceLines.Select(l => l.Trim()))
{
if (sourceLine.Length > 0)
{
await call.RequestStream.WriteAsync(
new InsertPretranslationsRequest
{
EngineId = request.EngineId,
CorpusId = corpus.Id,
TextId = sourceFile.Key,
Refs = { $"{sourceFile.Key}:{sourceLine.Split('\t')[0]}" },
Translation = sourceLine.Contains('\t')
? sourceLine.Split('\t')[1].Trim()
: string.Empty
},
cancellationToken
);
}
}
}
}
}
await call.RequestStream.WriteAsync(request, cancellationToken);
}

await call.RequestStream.CompleteAsync();
await call;
}
Expand Down Expand Up @@ -325,4 +207,78 @@ ServerCallContext context
new GetLanguageInfoResponse { InternalCode = request.Language + "_echo", IsNative = true, }
);
}

private static SIL.ServiceToolkit.Models.ParallelCorpus Map(ParallelCorpus source)
{
return new SIL.ServiceToolkit.Models.ParallelCorpus
{
Id = source.Id,
SourceCorpora = source.SourceCorpora.Select(Map).ToList(),
TargetCorpora = source.TargetCorpora.Select(Map).ToList()
};
}

private static SIL.ServiceToolkit.Models.MonolingualCorpus Map(MonolingualCorpus source)
{
var trainOnChapters = source.TrainOnChapters.ToDictionary(
kvp => kvp.Key,
kvp => kvp.Value.Chapters.ToHashSet()
);
var trainOnTextIds = source.TrainOnTextIds.ToHashSet();
FilterChoice trainingFilter = GetFilterChoice(trainOnChapters, trainOnTextIds, source.TrainOnAll);

var pretranslateChapters = source.PretranslateChapters.ToDictionary(
kvp => kvp.Key,
kvp => kvp.Value.Chapters.ToHashSet()
);
var pretranslateTextIds = source.PretranslateTextIds.ToHashSet();
FilterChoice pretranslateFilter = GetFilterChoice(
pretranslateChapters,
pretranslateTextIds,
source.PretranslateAll
);

return new SIL.ServiceToolkit.Models.MonolingualCorpus
{
Id = source.Id,
Language = source.Language,
Files = source.Files.Select(Map).ToList(),
TrainOnChapters = trainingFilter == FilterChoice.Chapters ? trainOnChapters : null,
TrainOnTextIds = trainingFilter == FilterChoice.TextIds ? trainOnTextIds : null,
PretranslateChapters = pretranslateFilter == FilterChoice.Chapters ? pretranslateChapters : null,
PretranslateTextIds = pretranslateFilter == FilterChoice.TextIds ? pretranslateTextIds : null
};
}

private static SIL.ServiceToolkit.Models.CorpusFile Map(CorpusFile source)
{
return new SIL.ServiceToolkit.Models.CorpusFile
{
Location = source.Location,
Format = (SIL.ServiceToolkit.Models.FileFormat)source.Format,
TextId = source.TextId
};
}

private enum FilterChoice
{
Chapters,
TextIds,
None
}

private static FilterChoice GetFilterChoice(
IReadOnlyDictionary<string, HashSet<int>> chapters,
HashSet<string> textIds,
bool noFilter
)
{
// Only either textIds or Scripture Range will be used at a time
// TextIds may be an empty array, so prefer that if both are empty (which applies to both scripture and text)
if (noFilter || (chapters is null && textIds is null))
return FilterChoice.None;
if (chapters is null || chapters.Count == 0)
return FilterChoice.TextIds;
return FilterChoice.Chapters;
}
}
1 change: 1 addition & 0 deletions src/Echo/src/EchoTranslationEngine/Usings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
global using Grpc.Core;
global using Microsoft.Extensions.Diagnostics.HealthChecks;
global using Serval.Translation.V1;
global using SIL.ServiceToolkit.Utils;
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ public static IMachineBuilder AddBuildJobOptions(this IMachineBuilder builder, I
return builder;
}

public static IMachineBuilder AddServiceToolkitServices(this IMachineBuilder builder)
{
builder.Services.AddParallelCorpusPreprocessor();
return builder;
}

public static IMachineBuilder AddThotSmtModel(this IMachineBuilder builder)
{
return builder.AddThotSmtModel(builder.Configuration.GetSection(ThotSmtModelOptions.Key));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ public static IMachineBuilder AddMachine(this IServiceCollection services, IConf
services.AddTransient<IFileSystem, FileSystem>();

services.AddScoped<IDistributedReaderWriterLockFactory, DistributedReaderWriterLockFactory>();
services.AddSingleton<ICorpusService, CorpusService>();
services.AddStartupTask(
(sp, cancellationToken) =>
sp.GetRequiredService<IDistributedReaderWriterLockFactory>().InitAsync(cancellationToken)
);
services.AddParallelCorpusPreprocessor();

var builder = new MachineBuilder(services, configuration);
builder.AddServiceOptions(configuration.GetSection(ServiceOptions.Key));
Expand Down
Loading

0 comments on commit 9e18ec5

Please sign in to comment.