diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/machine-api/build-dto.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/machine-api/build-dto.ts
index 6884f7fc4d..c506b2c9ed 100644
--- a/src/SIL.XForge.Scripture/ClientApp/src/app/machine-api/build-dto.ts
+++ b/src/SIL.XForge.Scripture/ClientApp/src/app/machine-api/build-dto.ts
@@ -14,6 +14,7 @@ export interface ServalBuildAdditionalInfo {
buildId: string;
corporaIds?: string[];
dateFinished?: string;
+ parallelCorporaIds?: string[];
step: number;
translationEngineId: string;
}
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.spec.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.spec.ts
index 1e3343a952..3d9bb62f32 100644
--- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.spec.ts
+++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.spec.ts
@@ -287,9 +287,11 @@ describe('DraftGenerationStepsComponent', () => {
fixture.detectChanges();
expect(component.done.emit).toHaveBeenCalledWith({
- translationBooks,
- trainingDataFiles,
trainingBooks: trainingBooks.filter(book => !translationBooks.includes(book)),
+ trainingDataFiles,
+ trainingScriptureRanges: [],
+ translationBooks,
+ translationScriptureRanges: [],
fastTraining: false
} as DraftGenerationStepsResult);
expect(component.isStepsCompleted).toBe(true);
@@ -402,7 +404,9 @@ describe('DraftGenerationStepsComponent', () => {
expect(component.done.emit).toHaveBeenCalledWith({
trainingBooks,
trainingDataFiles,
+ trainingScriptureRanges: [],
translationBooks,
+ translationScriptureRanges: [],
fastTraining: true
} as DraftGenerationStepsResult);
expect(generateDraftButton['disabled']).toBe(true);
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.ts
index a1da09a380..f549871f50 100644
--- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.ts
+++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.ts
@@ -20,6 +20,7 @@ import { BookMultiSelectComponent } from '../../../shared/book-multi-select/book
import { SharedModule } from '../../../shared/shared.module';
import { NllbLanguageService } from '../../nllb-language.service';
import { ConfirmSourcesComponent } from '../confirm-sources/confirm-sources.component';
+import { ProjectScriptureRange } from '../draft-generation';
import { DraftSource, DraftSourcesService } from '../draft-sources.service';
import { TrainingDataMultiSelectComponent } from '../training-data/training-data-multi-select.component';
import { TrainingDataUploadDialogComponent } from '../training-data/training-data-upload-dialog.component';
@@ -29,8 +30,10 @@ export interface DraftGenerationStepsResult {
trainingBooks: number[];
trainingDataFiles: string[];
trainingScriptureRange?: string;
+ trainingScriptureRanges: ProjectScriptureRange[];
translationBooks: number[];
translationScriptureRange?: string;
+ translationScriptureRanges: ProjectScriptureRange[];
fastTraining: boolean;
}
@@ -263,8 +266,10 @@ export class DraftGenerationStepsComponent extends SubscriptionDisposable implem
this.isStepsCompleted = true;
this.done.emit({
trainingBooks: this.userSelectedTrainingBooks,
+ trainingScriptureRanges: [],
trainingDataFiles: this.selectedTrainingDataIds,
translationBooks: this.userSelectedTranslateBooks,
+ translationScriptureRanges: [],
fastTraining: this.fastTraining
});
}
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.html b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.html
index 907b400c18..2bf640f292 100644
--- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.html
+++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.html
@@ -254,6 +254,10 @@
Corpora Ids:
{{ draftJob?.additionalInfo?.corporaIds?.join(", ") ?? "unknown" }}
+
+ Parallel Corpora Ids:
+ {{ draftJob?.additionalInfo?.parallelCorporaIds?.join(", ") ?? "unknown" }}
+
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.spec.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.spec.ts
index 1f58da4d25..c5b1e1efc4 100644
--- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.spec.ts
+++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.spec.ts
@@ -1976,7 +1976,9 @@ describe('DraftGenerationComponent', () => {
env.component.startBuild({
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false,
projectId: projectId
});
@@ -1986,7 +1988,9 @@ describe('DraftGenerationComponent', () => {
projectId: projectId,
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false
});
env.startedOrActiveBuild$.next(buildDto);
@@ -2003,7 +2007,9 @@ describe('DraftGenerationComponent', () => {
env.component.startBuild({
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false,
projectId: projectId
});
@@ -2012,7 +2018,9 @@ describe('DraftGenerationComponent', () => {
projectId: projectId,
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false
});
verify(mockDialogRef.getState()).never();
@@ -2028,7 +2036,9 @@ describe('DraftGenerationComponent', () => {
env.component.startBuild({
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false,
projectId: projectId
});
@@ -2037,7 +2047,9 @@ describe('DraftGenerationComponent', () => {
projectId: projectId,
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false
});
verify(mockDialogRef.getState()).never();
@@ -2053,7 +2065,9 @@ describe('DraftGenerationComponent', () => {
env.component.startBuild({
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false,
projectId: projectId
});
@@ -2062,7 +2076,9 @@ describe('DraftGenerationComponent', () => {
projectId: projectId,
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false
});
verify(mockDialogRef.getState()).never();
@@ -2079,7 +2095,9 @@ describe('DraftGenerationComponent', () => {
env.component.startBuild({
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false,
projectId: projectId
});
@@ -2088,7 +2106,9 @@ describe('DraftGenerationComponent', () => {
projectId: projectId,
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false
});
verify(mockDialogRef.close()).once();
@@ -2104,7 +2124,9 @@ describe('DraftGenerationComponent', () => {
env.component.startBuild({
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false,
projectId: projectId
});
@@ -2114,7 +2136,9 @@ describe('DraftGenerationComponent', () => {
projectId: projectId,
trainingBooks: [],
trainingDataFiles: [],
+ trainingScriptureRanges: [],
translationBooks: [],
+ translationScriptureRanges: [],
fastTraining: false
});
expect(mockAuthService.requestParatextCredentialUpdate).toHaveBeenCalled();
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.ts
index a8d0fd3cb0..ac43d299db 100644
--- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.ts
+++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.ts
@@ -455,8 +455,10 @@ export class DraftGenerationComponent extends DataLoadingComponent implements On
trainingBooks: result.trainingBooks,
trainingDataFiles: result.trainingDataFiles,
trainingScriptureRange: result.trainingScriptureRange,
+ trainingScriptureRanges: result.trainingScriptureRanges,
translationBooks: result.translationBooks,
translationScriptureRange: result.translationScriptureRange,
+ translationScriptureRanges: result.trainingScriptureRanges,
fastTraining: result.fastTraining
});
}
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.service.spec.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.service.spec.ts
index a6fa6a1088..1599ff8542 100644
--- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.service.spec.ts
+++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.service.spec.ts
@@ -43,7 +43,9 @@ describe('DraftGenerationService', () => {
projectId,
trainingBooks: [],
trainingDataFiles: [],
+ translationScriptureRanges: [],
translationBooks: [],
+ trainingScriptureRanges: [],
fastTraining: false
};
const buildDto: BuildDto = {
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.ts
index 615b18e306..8e08feafdb 100644
--- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.ts
+++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.ts
@@ -1,7 +1,7 @@
import { InjectionToken } from '@angular/core';
import { BuildStates } from '../../machine-api/build-states';
-/*
+/**
* The build configuration for a pre-translation build.
*/
export interface BuildConfig {
@@ -9,11 +9,21 @@ export interface BuildConfig {
trainingBooks: number[];
trainingDataFiles: string[];
trainingScriptureRange?: string;
+ trainingScriptureRanges: ProjectScriptureRange[];
translationBooks: number[];
translationScriptureRange?: string;
+ translationScriptureRanges: ProjectScriptureRange[];
fastTraining: boolean;
}
+/**
+ * A per-project scripture range.
+ */
+export interface ProjectScriptureRange {
+ projectId: string;
+ scriptureRange: string;
+}
+
/**
* Dictionary of 'segmentRef -> segment text'.
*/
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-information/draft-information.component.html b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-information/draft-information.component.html
index bd325e4d1b..bc11884ca9 100644
--- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-information/draft-information.component.html
+++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-information/draft-information.component.html
@@ -1,6 +1,7 @@
@if (canShowAdditionalInfo) {
Build Id: {{ draftJob?.additionalInfo?.buildId }}
Corpora Ids: {{ draftJob?.additionalInfo?.corporaIds?.join(", ") }}
+ Parallel Corpora Ids: {{ draftJob?.additionalInfo?.parallelCorporaIds?.join(", ") }}
Date Finished: {{ draftJob?.additionalInfo?.dateFinished?.toLocaleString() }}
Message: {{ draftJob?.message }}
Percent Completed: {{ draftJob?.percentCompleted }}
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/editor/editor-draft/editor-draft.component.spec.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/editor/editor-draft/editor-draft.component.spec.ts
index 777e8e42af..81b7100eba 100644
--- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/editor/editor-draft/editor-draft.component.spec.ts
+++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/editor/editor-draft/editor-draft.component.spec.ts
@@ -16,7 +16,6 @@ import { TestRealtimeModule } from 'xforge-common/test-realtime.module';
import { configureTestingModule, TestTranslocoModule } from 'xforge-common/test-utils';
import { SFProjectProfileDoc } from '../../../core/models/sf-project-profile-doc';
import { SF_TYPE_REGISTRY } from '../../../core/models/sf-type-registry';
-import { TextDocService } from '../../../core/text-doc.service';
import { SharedModule } from '../../../shared/shared.module';
import { EDITOR_READY_TIMEOUT } from '../../../shared/text/text.component';
import { DraftSegmentMap } from '../../draft-generation/draft-generation';
@@ -29,7 +28,6 @@ const mockActivatedProjectService = mock(ActivatedProjectService);
const mockDraftHandlingService = mock(DraftHandlingService);
const mockI18nService = mock(I18nService);
const mockDialogService = mock(DialogService);
-const mockTextDocService = mock(TextDocService);
describe('EditorDraftComponent', () => {
let fixture: ComponentFixture;
@@ -52,8 +50,7 @@ describe('EditorDraftComponent', () => {
{ provide: DraftHandlingService, useMock: mockDraftHandlingService },
{ provide: I18nService, useMock: mockI18nService },
{ provide: OnlineStatusService, useClass: TestOnlineStatusService },
- { provide: DialogService, useMock: mockDialogService },
- { provide: TextDocService, useMock: mockTextDocService }
+ { provide: DialogService, useMock: mockDialogService }
]
}));
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json b/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json
index 3d835bea00..a802f9a6d4 100644
--- a/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json
+++ b/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json
@@ -194,7 +194,6 @@
"generate_draft_button": "Generate draft",
"generate_forward_translation_drafts_header": "Generate translation drafts",
"generate_new_draft": "New draft",
- "go_to_draft_viewer": "Go to [link:draftViewerUrl]draft viewer[/link] to preview and apply the generated draft to chapters of your choice.",
"improved_learning_rate_notice": "Drafting is now much faster! Most drafts will now take about {1}{2} hours to complete instead of {3}{4}. Draft quality should be about the same as before, but please {5}contact us{6} if you notice any issues.",
"info_alert_different_additional_training_and_source_language": "The language for your additional training text ({{ additionalTrainingSourceLanguageDisplayName }}) must be the same as the training source language ({{ alternateTrainingSourceLanguageDisplayName }}). Select a different additional training text on the [link:projectSettingsUrl]settings page[/link].",
"info_alert_different_training_and_source_language": "The language for your alternate training text ({{ alternateTrainingSourceLanguageDisplayName }}) must be the same as the source language ({{ sourceLanguageDisplayName }}). Select a different alternate training text on the [link:projectSettingsUrl]settings page[/link].",
diff --git a/src/SIL.XForge.Scripture/Models/BuildConfig.cs b/src/SIL.XForge.Scripture/Models/BuildConfig.cs
index c8c22a65f7..3fd3e85100 100644
--- a/src/SIL.XForge.Scripture/Models/BuildConfig.cs
+++ b/src/SIL.XForge.Scripture/Models/BuildConfig.cs
@@ -19,6 +19,7 @@ public class BuildConfig
///
/// The numbers of the books to use as the source texts for training.
///
+ /// This property is for legacy client use.
/// You should not set this property and at the same time.
///
public HashSet TrainingBooks { get; set; } = [];
@@ -34,16 +35,26 @@ public class BuildConfig
///
/// The book ids and chapter numbers separated by semicolons.
///
+ /// This property is for legacy client use.
/// See https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range for syntax.
/// You should not set this property and at the same time.
///
public string? TrainingScriptureRange { get; set; }
+ ///
+ /// Gets or sets the per-project books and chapters to use for training.
+ ///
+ ///
+ /// A list containing the project identifiers and scripture ranges.
+ ///
+ public HashSet TrainingScriptureRanges { get; set; } = [];
+
///
/// Gets or sets the books to use for translation.
///
/// The numbers of the books to use as the source texts for training.
///
+ /// This property is for legacy client use.
/// You should not set this property and at the same time.
///
public HashSet TranslationBooks { get; set; } = [];
@@ -53,11 +64,20 @@ public class BuildConfig
///
/// The book ids and chapter numbers separated by semicolons.
///
+ /// This property is for legacy client use.
/// See https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range for syntax.
/// You should not set this property and at the same time.
///
public string? TranslationScriptureRange { get; set; }
+ ///
+ /// Gets or sets the per-project books and chapters to use for training.
+ ///
+ ///
+ /// A list containing the project identifiers and scripture ranges.
+ ///
+ public HashSet TranslationScriptureRanges { get; set; } = [];
+
///
/// Gets or sets the project identifier.
///
diff --git a/src/SIL.XForge.Scripture/Models/DraftConfig.cs b/src/SIL.XForge.Scripture/Models/DraftConfig.cs
index a241d601fe..ba1937f5a1 100644
--- a/src/SIL.XForge.Scripture/Models/DraftConfig.cs
+++ b/src/SIL.XForge.Scripture/Models/DraftConfig.cs
@@ -13,8 +13,10 @@ public class DraftConfig
public TranslateSource? AlternateTrainingSource { get; set; }
public IList LastSelectedTrainingBooks { get; set; } = [];
public string? LastSelectedTrainingScriptureRange { get; set; }
+ public IList LastSelectedTrainingScriptureRanges { get; set; } = [];
public IList LastSelectedTrainingDataFiles { get; set; } = [];
public IList LastSelectedTranslationBooks { get; set; } = [];
public string? LastSelectedTranslationScriptureRange { get; set; }
+ public IList LastSelectedTranslationScriptureRanges { get; set; } = [];
public string? ServalConfig { get; set; }
}
diff --git a/src/SIL.XForge.Scripture/Models/MachineApi.cs b/src/SIL.XForge.Scripture/Models/MachineApi.cs
index c645133518..f083e4701f 100644
--- a/src/SIL.XForge.Scripture/Models/MachineApi.cs
+++ b/src/SIL.XForge.Scripture/Models/MachineApi.cs
@@ -9,6 +9,7 @@ namespace SIL.XForge.Scripture.Models;
public static class MachineApi
{
public const string HttpClientName = "machine_api";
+ public const string TokenClientName = "machine_api_token";
public const string Namespace = "machine-api/v3";
public const string StartBuild = "translation/builds";
public const string GetBuild = "translation/builds/id:{sfProjectId}.{buildId?}";
diff --git a/src/SIL.XForge.Scripture/Models/ProjectScriptureRange.cs b/src/SIL.XForge.Scripture/Models/ProjectScriptureRange.cs
new file mode 100644
index 0000000000..82e986452c
--- /dev/null
+++ b/src/SIL.XForge.Scripture/Models/ProjectScriptureRange.cs
@@ -0,0 +1,22 @@
+namespace SIL.XForge.Scripture.Models;
+
+///
+/// A scripture range for a specific project.
+/// This is used by .
+///
+public record ProjectScriptureRange
+{
+ ///
+ /// The project identifier.
+ ///
+ public string ProjectId { get; set; } = string.Empty;
+
+ ///
+ /// The scripture range.
+ ///
+ /// The book ids and chapter numbers separated by semicolons.
+ ///
+ /// See https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range for syntax.
+ ///
+ public string ScriptureRange { get; set; } = string.Empty;
+}
diff --git a/src/SIL.XForge.Scripture/Models/ServalAdditionalTrainingData.cs b/src/SIL.XForge.Scripture/Models/ServalAdditionalTrainingData.cs
new file mode 100644
index 0000000000..96ec24911f
--- /dev/null
+++ b/src/SIL.XForge.Scripture/Models/ServalAdditionalTrainingData.cs
@@ -0,0 +1,32 @@
+using System.Collections.Generic;
+
+namespace SIL.XForge.Scripture.Models;
+
+///
+/// Configuration for uploading Additional Training Data to Serval.
+///
+public class ServalAdditionalTrainingData
+{
+ ///
+ /// Gets or sets the Parallel Corpus identifier.
+ ///
+ public string ParallelCorpusId { get; set; } = string.Empty;
+
+ ///
+ /// Gets or sets the identifier of the corpus to be used as the source in the Parallel Corpus.
+ ///
+ public string SourceCorpusId { get; set; } = string.Empty;
+
+ ///
+ /// Gets or sets the identifier of the corpus to be used as the target in the Parallel Corpus.
+ ///
+ public string TargetCorpusId { get; set; } = string.Empty;
+
+ ///
+ /// Gets or sets the corpus files uploaded to Serval.
+ ///
+ ///
+ /// The files in both the source and target corpora.
+ ///
+ public List CorpusFiles { get; set; } = [];
+}
diff --git a/src/SIL.XForge.Scripture/Models/ServalBuildAdditionalInfo.cs b/src/SIL.XForge.Scripture/Models/ServalBuildAdditionalInfo.cs
index b01c7d95ef..2a1d7a248a 100644
--- a/src/SIL.XForge.Scripture/Models/ServalBuildAdditionalInfo.cs
+++ b/src/SIL.XForge.Scripture/Models/ServalBuildAdditionalInfo.cs
@@ -5,9 +5,10 @@ namespace SIL.XForge.Scripture.Models;
public class ServalBuildAdditionalInfo
{
- public string BuildId { get; set; } = string.Empty;
- public IEnumerable? CorporaIds { get; set; }
- public DateTimeOffset? DateFinished { get; set; }
- public int Step { get; set; }
- public string TranslationEngineId { get; set; } = string.Empty;
+ public string BuildId { get; init; } = string.Empty;
+ public IEnumerable? CorporaIds { get; init; }
+ public DateTimeOffset? DateFinished { get; init; }
+ public IEnumerable? ParallelCorporaIds { get; init; }
+ public int Step { get; init; }
+ public string TranslationEngineId { get; init; } = string.Empty;
}
diff --git a/src/SIL.XForge.Scripture/Models/ServalCorpus.cs b/src/SIL.XForge.Scripture/Models/ServalCorpus.cs
index dae8a48dac..7bb189abce 100644
--- a/src/SIL.XForge.Scripture/Models/ServalCorpus.cs
+++ b/src/SIL.XForge.Scripture/Models/ServalCorpus.cs
@@ -50,7 +50,7 @@ public class ServalCorpus
///
/// The source corpus files.
///
- public List SourceFiles { get; set; } = new List();
+ public List SourceFiles { get; set; } = [];
///
/// Gets or sets the target files uploaded to Serval.
@@ -58,5 +58,5 @@ public class ServalCorpus
///
/// The target corpus files.
///
- public List TargetFiles { get; set; } = new List();
+ public List TargetFiles { get; set; } = [];
}
diff --git a/src/SIL.XForge.Scripture/Models/ServalCorpusFile.cs b/src/SIL.XForge.Scripture/Models/ServalCorpusFile.cs
index 12ec6ea9c3..557a68a2d4 100644
--- a/src/SIL.XForge.Scripture/Models/ServalCorpusFile.cs
+++ b/src/SIL.XForge.Scripture/Models/ServalCorpusFile.cs
@@ -5,6 +5,11 @@ namespace SIL.XForge.Scripture.Models;
///
public class ServalCorpusFile
{
+ ///
+ /// Gets or sets the corpus this file is associated with.
+ ///
+ public string CorpusId { get; set; } = string.Empty;
+
///
/// The MD5 Hash of the corpus file's contents.
/// This is used to see if the file has changed since its last upload to Serval.
@@ -16,6 +21,11 @@ public class ServalCorpusFile
///
public string FileId { get; set; } = string.Empty;
+ ///
+ /// Gets or sets the language of the file and corpus.
+ ///
+ public string LanguageCode { get; set; } = string.Empty;
+
///
/// Gets or sets the project this file is from. For example, the mixed source project.
///
@@ -25,8 +35,10 @@ public class ServalCorpusFile
/// Gets or sets the text identifier this file represents.
///
///
- /// For text files, this will be in format bookNum_chapterNum.
- /// For Paratext files, this will be the target project id.
+ /// Notes:
+ /// - For text files, this will be in format bookNum_chapterNum.
+ /// - For Paratext files, this will be the target project id.
+ /// - When using mixed sources, TextId must be the same for each file to mix the sources together.
///
public string TextId { get; set; } = string.Empty;
}
diff --git a/src/SIL.XForge.Scripture/Models/ServalCorpusSync.cs b/src/SIL.XForge.Scripture/Models/ServalCorpusSync.cs
new file mode 100644
index 0000000000..9940f09fff
--- /dev/null
+++ b/src/SIL.XForge.Scripture/Models/ServalCorpusSync.cs
@@ -0,0 +1,33 @@
+namespace SIL.XForge.Scripture.Models;
+
+///
+/// Serval Corpus Synchronization Information.
+///
+///
+/// This class is used by
+/// to determine the pre-translate and train on corpus configuration.
+///
+public class ServalCorpusSyncInfo
+{
+ ///
+ /// Gets or sets the corpus that was synchronized.
+ ///
+ public string CorpusId { get; init; } = string.Empty;
+
+ ///
+ /// Gets or sets whether this corpus is a source corpus.
+ ///
+ /// true if a source corpus; otherwise,
+ public bool IsSource { get; init; }
+
+ ///
+ /// Gets or sets the identifier of the parallel corpus
+ /// that was synchronized for this corpus.
+ ///
+ public string ParallelCorpusId { get; init; } = string.Empty;
+
+ ///
+ /// Gets or sets the project that was synchronized for this corpus.
+ ///
+ public string ProjectId { get; init; } = string.Empty;
+}
diff --git a/src/SIL.XForge.Scripture/Models/ServalData.cs b/src/SIL.XForge.Scripture/Models/ServalData.cs
index 60ab2e4a89..94ce977375 100644
--- a/src/SIL.XForge.Scripture/Models/ServalData.cs
+++ b/src/SIL.XForge.Scripture/Models/ServalData.cs
@@ -4,15 +4,15 @@
namespace SIL.XForge.Scripture.Models;
///
-/// Serval Data.
+/// Serval Configuration Data.
///
public class ServalData
{
///
- /// Gets or sets the SMT Translation Engine Id for the project.
+ /// Gets or sets the SMT Translation Engine identifier for the project.
///
///
- /// The SMT Translation Engine Id.
+ /// The SMT Translation Engine identifier.
///
///
/// The user should not interact with the translation engine directly by ID.
@@ -32,7 +32,7 @@ public class ServalData
public string? TranslationErrorMessage { get; set; }
///
- /// Gets or sets the Hangfire Job Id for the Translation job.
+ /// Gets or sets the Hangfire Job identifier for the Translation job.
///
public string? TranslationJobId { get; set; }
@@ -50,10 +50,10 @@ public class ServalData
public DateTime? TranslationQueuedAt { get; set; }
///
- /// Gets or sets the NMT Translation Engine Id for the project.
+ /// Gets or sets the NMT Translation Engine identifier for the project.
///
///
- /// The NMT Translation Engine Id.
+ /// The NMT Translation Engine identifier.
///
public string? PreTranslationEngineId { get; set; }
@@ -70,10 +70,27 @@ public class ServalData
public string? PreTranslationErrorMessage { get; set; }
///
- /// Gets or sets the Hangfire Job Id for the Pre-Translation job.
+ /// Gets or sets the Hangfire Job identifier for the Pre-Translation job.
///
public string? PreTranslationJobId { get; set; }
+ ///
+ /// Gets or sets the Identifier of the Parallel Corpus to be used in the PreTranslate section of the
+ /// for pre-translation (NMT) builds.
+ ///
+ public string? ParallelCorpusIdForPreTranslate { get; set; }
+
+ ///
+ /// Gets or sets the Identifier of the Parallel Corpus to be used for translation (SMT) builds.
+ ///
+ public string? ParallelCorpusIdForSmt { get; set; }
+
+ ///
+ /// Gets or sets the Identifier of the Parallel Corpus to be used in the TrainOn section of the
+ /// for pre-translation (NMT) builds.
+ ///
+ public string? ParallelCorpusIdForTrainOn { get; set; }
+
///
/// Gets or sets the date and time that the pre-translation build was queued.
///
@@ -110,5 +127,18 @@ public class ServalData
///
/// The dictionary key is the corpus ID.
///
- public Dictionary Corpora { get; set; } = new Dictionary();
+ public Dictionary? Corpora { get; set; }
+
+ ///
+ /// Gets or sets the additional training data configuration for pre-translation (NMT) builds.
+ ///
+ public ServalAdditionalTrainingData? AdditionalTrainingData { get; set; }
+
+ ///
+ /// Gets or sets the corpus and data files configuration.
+ ///
+ ///
+ /// These are shared by translation (SMT) and pre-translation (NMT) translation engines.
+ ///
+ public List CorpusFiles { get; set; } = [];
}
diff --git a/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj b/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj
index 8b0617a599..d4c49194f2 100644
--- a/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj
+++ b/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj
@@ -28,8 +28,9 @@
-
+
+
@@ -39,7 +40,7 @@
-
+
diff --git a/src/SIL.XForge.Scripture/Services/BuildConfigJsonConverter.cs b/src/SIL.XForge.Scripture/Services/BuildConfigJsonConverter.cs
index 1957826e1d..968b0d5d0f 100644
--- a/src/SIL.XForge.Scripture/Services/BuildConfigJsonConverter.cs
+++ b/src/SIL.XForge.Scripture/Services/BuildConfigJsonConverter.cs
@@ -42,6 +42,12 @@ public override void WriteJson(JsonWriter writer, BuildConfig? value, JsonSerial
serializer.Serialize(writer, value.TrainingScriptureRange);
}
+ if (value.TrainingScriptureRanges.Count > 0)
+ {
+ writer.WritePropertyName(nameof(value.TrainingScriptureRanges));
+ serializer.Serialize(writer, value.TrainingScriptureRanges);
+ }
+
if (value.TranslationBooks.Count > 0)
{
writer.WritePropertyName(nameof(value.TranslationBooks));
@@ -54,6 +60,12 @@ public override void WriteJson(JsonWriter writer, BuildConfig? value, JsonSerial
serializer.Serialize(writer, value.TranslationScriptureRange);
}
+ if (value.TranslationScriptureRanges.Count > 0)
+ {
+ writer.WritePropertyName(nameof(value.TranslationScriptureRanges));
+ serializer.Serialize(writer, value.TranslationScriptureRanges);
+ }
+
if (value.FastTraining)
{
writer.WritePropertyName(nameof(value.FastTraining));
diff --git a/src/SIL.XForge.Scripture/Services/IMachineProjectService.cs b/src/SIL.XForge.Scripture/Services/IMachineProjectService.cs
index cca7cf0548..0653a54b1b 100644
--- a/src/SIL.XForge.Scripture/Services/IMachineProjectService.cs
+++ b/src/SIL.XForge.Scripture/Services/IMachineProjectService.cs
@@ -1,50 +1,13 @@
using System.IO;
using System.Threading;
using System.Threading.Tasks;
-using Serval.Client;
-using SIL.XForge.Scripture.Models;
namespace SIL.XForge.Scripture.Services;
public interface IMachineProjectService
{
- Task AddProjectAsync(
- string curUserId,
- string sfProjectId,
- bool preTranslate,
- CancellationToken cancellationToken
- );
- Task BuildProjectAsync(
- string curUserId,
- BuildConfig buildConfig,
- bool preTranslate,
- CancellationToken cancellationToken
- );
- Task BuildProjectForBackgroundJobAsync(
- string curUserId,
- BuildConfig buildConfig,
- bool preTranslate,
- CancellationToken cancellationToken
- );
+ Task AddProjectAsync(string sfProjectId, bool preTranslate, CancellationToken cancellationToken);
Task GetProjectZipAsync(string sfProjectId, Stream outputStream, CancellationToken cancellationToken);
Task GetTranslationEngineTypeAsync(bool preTranslate);
- Task RemoveProjectAsync(
- string curUserId,
- string sfProjectId,
- bool preTranslate,
- CancellationToken cancellationToken
- );
- Task SyncProjectCorporaAsync(
- string curUserId,
- BuildConfig buildConfig,
- bool preTranslate,
- CancellationToken cancellationToken
- );
- Task TranslationEngineExistsAsync(
- string projectId,
- string translationEngineId,
- bool preTranslate,
- CancellationToken cancellationToken
- );
- Task UpdateTranslationSourcesAsync(string curUserId, string sfProjectId);
+ Task RemoveProjectAsync(string sfProjectId, bool preTranslate, CancellationToken cancellationToken);
}
diff --git a/src/SIL.XForge.Scripture/Services/MachineApiService.cs b/src/SIL.XForge.Scripture/Services/MachineApiService.cs
index 0b123e7a18..5d2ad3c56d 100644
--- a/src/SIL.XForge.Scripture/Services/MachineApiService.cs
+++ b/src/SIL.XForge.Scripture/Services/MachineApiService.cs
@@ -62,6 +62,8 @@ ITranslationEngineTypesClient translationEngineTypesClient
private static readonly IEqualityComparer> _listStringComparer = SequenceEqualityComparer.Create(
EqualityComparer.Default
);
+ private static readonly IEqualityComparer> _listProjectScriptureRangeComparer =
+ SequenceEqualityComparer.Create(EqualityComparer.Default);
public async Task CancelPreTranslationBuildAsync(
string curUserId,
@@ -221,7 +223,7 @@ CancellationToken cancellationToken
// Make sure the DTO conforms to the machine-api V2 URLs
if (buildDto is not null)
{
- UpdateDto(buildDto, sfProjectId);
+ buildDto = UpdateDto(buildDto, sfProjectId);
}
return buildDto;
@@ -266,7 +268,7 @@ await translationEnginesClient.GetAllBuildsAsync(translationEngineId, cancellati
// Make sure the DTO conforms to the machine-api V2 URLs
if (buildDto is not null)
{
- UpdateDto(buildDto, sfProjectId);
+ buildDto = UpdateDto(buildDto, sfProjectId);
}
return buildDto;
@@ -314,7 +316,7 @@ CancellationToken cancellationToken
}
buildDto = CreateDto(translationBuild);
- UpdateDto(buildDto, sfProjectId);
+ buildDto = UpdateDto(buildDto, sfProjectId);
}
catch (ServalApiException e)
{
@@ -474,7 +476,7 @@ CancellationToken cancellationToken
{
State = BuildStateFaulted,
Message = errorMessage,
- AdditionalInfo = new ServalBuildAdditionalInfo { TranslationEngineId = engineId ?? string.Empty, },
+ AdditionalInfo = new ServalBuildAdditionalInfo { TranslationEngineId = engineId ?? string.Empty },
};
}
else
@@ -517,7 +519,7 @@ CancellationToken cancellationToken
// Make sure the DTO conforms to the machine-api V2 URLs
if (buildDto is not null)
{
- UpdateDto(buildDto, sfProjectId);
+ buildDto = UpdateDto(buildDto, sfProjectId);
}
return buildDto;
@@ -697,7 +699,7 @@ public async Task StartPreTranslationBuildAsync(
CancellationToken cancellationToken
)
{
- // Ensure that there are no errors in the build configuration
+ // Ensure that there are no errors in the build configuration for training
if (!string.IsNullOrWhiteSpace(buildConfig.TrainingScriptureRange) && buildConfig.TrainingBooks.Count > 0)
{
throw new DataNotFoundException(
@@ -706,6 +708,26 @@ CancellationToken cancellationToken
);
}
+ if (
+ !string.IsNullOrWhiteSpace(buildConfig.TrainingScriptureRange)
+ && buildConfig.TrainingScriptureRanges.Count > 0
+ )
+ {
+ throw new DataNotFoundException(
+ $"You cannot specify both {nameof(buildConfig.TrainingScriptureRange)}"
+ + $" and {nameof(buildConfig.TrainingScriptureRanges)}."
+ );
+ }
+
+ if (buildConfig.TrainingScriptureRanges.Count > 0 && buildConfig.TrainingBooks.Count > 0)
+ {
+ throw new DataNotFoundException(
+ $"You cannot specify both {nameof(buildConfig.TrainingScriptureRanges)}"
+ + $" and {nameof(buildConfig.TrainingBooks)}."
+ );
+ }
+
+ // Ensure that there are no errors in the build configuration for translation
if (!string.IsNullOrWhiteSpace(buildConfig.TranslationScriptureRange) && buildConfig.TranslationBooks.Count > 0)
{
throw new DataNotFoundException(
@@ -714,6 +736,25 @@ CancellationToken cancellationToken
);
}
+ if (
+ !string.IsNullOrWhiteSpace(buildConfig.TranslationScriptureRange)
+ && buildConfig.TranslationScriptureRanges.Count > 0
+ )
+ {
+ throw new DataNotFoundException(
+ $"You cannot specify both {nameof(buildConfig.TranslationScriptureRange)}"
+ + $" and {nameof(buildConfig.TranslationScriptureRanges)}."
+ );
+ }
+
+ if (buildConfig.TranslationScriptureRanges.Count > 0 && buildConfig.TranslationBooks.Count > 0)
+ {
+ throw new DataNotFoundException(
+ $"You cannot specify both {nameof(buildConfig.TranslationScriptureRanges)}"
+ + $" and {nameof(buildConfig.TranslationBooks)}."
+ );
+ }
+
// Load the project from the realtime service
await using IConnection conn = await realtimeService.ConnectAsync(curUserId);
IDocument projectDoc = await conn.FetchAsync(buildConfig.ProjectId);
@@ -730,29 +771,41 @@ await projectDoc.SubmitJson0OpAsync(op =>
{
op.Set(
p => p.TranslateConfig.DraftConfig.LastSelectedTrainingBooks,
- buildConfig.TrainingBooks.ToList(),
+ [.. buildConfig.TrainingBooks],
_listIntComparer
);
op.Set(
p => p.TranslateConfig.DraftConfig.LastSelectedTrainingDataFiles,
- buildConfig.TrainingDataFiles.ToList(),
+ [.. buildConfig.TrainingDataFiles],
_listStringComparer
);
op.Set(
p => p.TranslateConfig.DraftConfig.LastSelectedTrainingScriptureRange,
buildConfig.TrainingScriptureRange
);
+ op.Set(
+ p => p.TranslateConfig.DraftConfig.LastSelectedTrainingScriptureRanges,
+ [.. buildConfig.TrainingScriptureRanges],
+ _listProjectScriptureRangeComparer
+ );
op.Set(
p => p.TranslateConfig.DraftConfig.LastSelectedTranslationBooks,
- buildConfig.TranslationBooks.ToList(),
+ [.. buildConfig.TranslationBooks],
_listIntComparer
);
op.Set(
p => p.TranslateConfig.DraftConfig.LastSelectedTranslationScriptureRange,
buildConfig.TranslationScriptureRange
);
+ op.Set(
+ p => p.TranslateConfig.DraftConfig.LastSelectedTranslationScriptureRanges,
+ [.. buildConfig.TranslationScriptureRanges],
+ _listProjectScriptureRangeComparer
+ );
if (!projectDoc.Data.TranslateConfig.PreTranslate)
+ {
op.Set(p => p.TranslateConfig.PreTranslate, true);
+ }
});
// Sync the source and target before running the build
@@ -935,7 +988,29 @@ private static ServalBuildDto CreateDto(TranslationBuild translationBuild) =>
AdditionalInfo = new ServalBuildAdditionalInfo
{
BuildId = translationBuild.Id,
- CorporaIds = translationBuild.Pretranslate?.Select(p => p.Corpus.Id),
+ CorporaIds = new HashSet(
+ // Use a HashSet to ensure there are no duplicate corpus ids
+ [
+ .. translationBuild
+ .Pretranslate?.SelectMany(t => t.SourceFilters ?? [])
+ .Select(f => f.Corpus.Id) ?? [],
+ .. translationBuild.TrainOn?.SelectMany(t => t.SourceFilters ?? []).Select(f => f.Corpus.Id)
+ ?? [],
+ .. translationBuild.TrainOn?.SelectMany(t => t.TargetFilters ?? []).Select(f => f.Corpus.Id)
+ ?? [],
+ ]
+ ),
+ ParallelCorporaIds = new HashSet(
+ // Use a HashSet to ensure there are no duplicate parallel corpus ids
+ [
+ .. translationBuild
+ .Pretranslate?.Select(t => t.ParallelCorpus?.Id)
+ .Where(id => !string.IsNullOrEmpty(id)) ?? [],
+ .. translationBuild
+ .TrainOn?.Select(t => t.ParallelCorpus?.Id)
+ .Where(id => !string.IsNullOrEmpty(id)) ?? [],
+ ]
+ ),
DateFinished = translationBuild.DateFinished,
Step = translationBuild.Step,
TranslationEngineId = translationBuild.Engine.Id,
@@ -961,7 +1036,11 @@ private static ServalEngineDto CreateDto(TranslationEngine translationEngine) =>
///
/// Method not allowed or not supported for the specified translation engine.
///
- /// If this method returns, it is expected that the DTO will be null.
+ ///
+ /// If this method returns, it is expected that the DTO will be null.
+ /// The following status codes may be thrown by Serval, and are not handled by this method:
+ /// - 499: Operation Cancelled
+ ///
private static void ProcessServalApiException(ServalApiException e)
{
switch (e)
diff --git a/src/SIL.XForge.Scripture/Services/MachineProjectService.cs b/src/SIL.XForge.Scripture/Services/MachineProjectService.cs
index 36495e96d4..99e5f608b6 100644
--- a/src/SIL.XForge.Scripture/Services/MachineProjectService.cs
+++ b/src/SIL.XForge.Scripture/Services/MachineProjectService.cs
@@ -13,7 +13,6 @@
using Microsoft.FeatureManagement;
using Newtonsoft.Json.Linq;
using Serval.Client;
-using SIL.Extensions;
using SIL.Scripture;
using SIL.XForge.Configuration;
using SIL.XForge.DataAccess;
@@ -33,6 +32,7 @@ namespace SIL.XForge.Scripture.Services;
/// Provides functionality to add, remove, and build Machine projects.
///
public class MachineProjectService(
+ ICorporaClient corporaClient,
IDataFilesClient dataFilesClient,
IExceptionHandler exceptionHandler,
IFeatureManager featureManager,
@@ -53,8 +53,15 @@ IRepository userSecrets
internal const string Nmt = "nmt";
internal const string SmtTransfer = "smt-transfer";
+ ///
+ /// Adds the project to Serval, if the required data is present.
+ ///
+ /// The Scripture Forge project identifier.
+ /// If true use NMT; otherwise if false use SMT.
+ /// The cancellation token.
+ /// The translation engine identifier.
+ /// The project does not exist.
public async Task AddProjectAsync(
- string curUserId,
string sfProjectId,
bool preTranslate,
CancellationToken cancellationToken
@@ -82,274 +89,17 @@ CancellationToken cancellationToken
return string.Empty;
}
- public async Task BuildProjectAsync(
- string curUserId,
- BuildConfig buildConfig,
- bool preTranslate,
- CancellationToken cancellationToken
- )
- {
- // Load the target project secrets, so we can get the translation engine ID
- if (!(await projectSecrets.TryGetAsync(buildConfig.ProjectId)).TryResult(out SFProjectSecret projectSecret))
- {
- throw new DataNotFoundException("The project secret cannot be found.");
- }
-
- // Load the project from the realtime service
- await using IConnection conn = await realtimeService.ConnectAsync(curUserId);
- IDocument projectDoc = await conn.FetchAsync(buildConfig.ProjectId);
- if (!projectDoc.IsLoaded)
- {
- throw new DataNotFoundException("The project does not exist.");
- }
-
- // Ensure we have a translation engine id or a pre-translation engine id, and that it exists
- string translationEngineId = preTranslate
- ? projectSecret.ServalData?.PreTranslationEngineId
- : projectSecret.ServalData?.TranslationEngineId;
- if (
- !await TranslationEngineExistsAsync(
- buildConfig.ProjectId,
- translationEngineId,
- preTranslate,
- cancellationToken
- )
- )
- {
- // We do not have one, likely because the translation is a back translation
- // We can only get the language tags for back translations from the ScrText,
- // which is not present until after the first sync (not from the Registry).
-
- // If the source or target writing system tag is missing, get them from the ScrText
- // We do not need to do this for the alternate source as this would have been populated correctly
- if (
- string.IsNullOrWhiteSpace(projectDoc.Data.WritingSystem.Tag)
- || string.IsNullOrWhiteSpace(projectDoc.Data.TranslateConfig.Source?.WritingSystem.Tag)
- )
- {
- // Get the user secret
- Attempt userSecretAttempt = await userSecrets.TryGetAsync(curUserId);
- if (!userSecretAttempt.TryResult(out UserSecret userSecret))
- throw new DataNotFoundException("The user does not exist.");
-
- // This error can occur if the project is deleted while the build is running
- if (projectDoc.Data is null)
- {
- throw new DataNotFoundException("The project does not exist.");
- }
-
- // Update the target writing system tag
- if (string.IsNullOrWhiteSpace(projectDoc.Data.WritingSystem.Tag))
- {
- WritingSystem writingSystem = paratextService.GetWritingSystem(
- userSecret,
- projectDoc.Data.ParatextId
- );
- if (!string.IsNullOrEmpty(writingSystem.Tag))
- {
- await projectDoc.SubmitJson0OpAsync(op =>
- {
- op.Set(p => p.WritingSystem.Region, writingSystem.Region);
- op.Set(p => p.WritingSystem.Script, writingSystem.Script);
- op.Set(p => p.WritingSystem.Tag, writingSystem.Tag);
- });
- }
- }
-
- // This error can occur if the project is deleted while the build is running
- if (projectDoc.Data is null)
- {
- throw new DataNotFoundException("The project does not exist.");
- }
-
- // This error can occur if the project source is cleared while the build is running
- if (projectDoc.Data.TranslateConfig.Source is null)
- {
- throw new DataNotFoundException("The project source is not specified.");
- }
-
- // Update the source writing system tag
- if (string.IsNullOrWhiteSpace(projectDoc.Data.TranslateConfig.Source.WritingSystem.Tag))
- {
- WritingSystem writingSystem = paratextService.GetWritingSystem(
- userSecret,
- projectDoc.Data.TranslateConfig.Source.ParatextId
- );
- if (!string.IsNullOrEmpty(writingSystem.Tag))
- {
- await projectDoc.SubmitJson0OpAsync(op =>
- op.Set(p => p.TranslateConfig.Source.WritingSystem.Tag, writingSystem.Tag)
- );
- }
- }
- }
-
- // Clear the existing translation engine id and corpora, based on whether this is pre-translation or not
- string[] corporaIds =
- projectSecret
- .ServalData?.Corpora.Where(c => preTranslate ? c.Value.PreTranslate : !c.Value.PreTranslate)
- .Select(c => c.Key)
- .ToArray() ?? [];
- await projectSecrets.UpdateAsync(
- projectDoc.Id,
- u =>
- {
- if (preTranslate)
- {
- u.Unset(p => p.ServalData.PreTranslationEngineId);
- }
- else
- {
- u.Unset(p => p.ServalData.TranslationEngineId);
- }
-
- foreach (string corporaId in corporaIds)
- {
- u.Unset(p => p.ServalData.Corpora[corporaId]);
- }
- }
- );
-
- // Create the Serval project, and get the translation engine id
- translationEngineId = await CreateServalProjectAsync(projectDoc.Data, preTranslate, cancellationToken);
- }
-
- // Ensure a translation engine id is present
- if (string.IsNullOrWhiteSpace(translationEngineId))
- {
- throw new DataNotFoundException("The translation engine is not specified.");
- }
-
- // Get the translation engine from Serval
- try
- {
- TranslationEngine translationEngine = await translationEnginesClient.GetAsync(
- translationEngineId,
- cancellationToken
- );
- bool recreateTranslationEngine = false;
-
- // See if the target language has changed
- string projectTargetLanguage = await GetTargetLanguageAsync(projectDoc.Data);
- if (translationEngine.TargetLanguage != projectTargetLanguage)
- {
- string message =
- $"Target language has changed from {translationEngine.TargetLanguage} to {projectTargetLanguage}.";
- logger.LogInformation(message);
- recreateTranslationEngine = true;
- }
-
- // See if the source language has changed
- string projectSourceLanguage = GetSourceLanguage(projectDoc.Data, useAlternateTrainingSource: false);
- if (translationEngine.SourceLanguage != projectSourceLanguage)
- {
- string message =
- $"Source language has changed from {translationEngine.SourceLanguage} to {projectSourceLanguage}.";
- logger.LogInformation(message);
- recreateTranslationEngine = true;
- }
-
- // Delete then recreate the translation engine if they have changed
- if (recreateTranslationEngine)
- {
- // Removal can be a slow process
- await RemoveProjectAsync(curUserId, buildConfig.ProjectId, preTranslate, cancellationToken);
- await AddProjectAsync(curUserId, buildConfig.ProjectId, preTranslate, cancellationToken);
- }
- }
- catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound)
- {
- // A 404 means that the translation engine does not exist
- logger.LogInformation($"Translation Engine {translationEngineId} does not exist.");
- string? corporaId = projectSecret
- .ServalData?.Corpora.FirstOrDefault(c => preTranslate ? c.Value.PreTranslate : !c.Value.PreTranslate)
- .Key;
- // Clear the existing translation engine id and corpora
- await projectSecrets.UpdateAsync(
- projectDoc.Id,
- u =>
- {
- if (preTranslate)
- {
- u.Unset(p => p.ServalData.PreTranslationEngineId);
- }
- else
- {
- u.Unset(p => p.ServalData.TranslationEngineId);
- }
-
- if (!string.IsNullOrWhiteSpace(corporaId))
- {
- u.Unset(p => p.ServalData.Corpora[corporaId]);
- }
- }
- );
-
- // Create the new translation engine id
- translationEngineId = await CreateServalProjectAsync(projectDoc.Data, preTranslate, cancellationToken);
- logger.LogInformation($"Created Translation Engine {translationEngineId}.");
- }
-
- // Sync the corpus
- if ((await SyncProjectCorporaAsync(curUserId, buildConfig, preTranslate, cancellationToken)) || preTranslate)
- {
- // If the corpus was updated (or this is a pre-translation engine), start the build
- // We do not need the build ID for tracking as we use GetCurrentBuildAsync for that
-
- // Get the updated project secrets
- projectSecret = await projectSecrets.GetAsync(buildConfig.ProjectId);
-
- // Get the appropriate translation engine
- TranslationBuildConfig translationBuildConfig;
- if (preTranslate)
- {
- translationEngineId = projectSecret.ServalData!.PreTranslationEngineId!;
-
- // Execute a complete pre-translation
- translationBuildConfig = await GetTranslationBuildConfigAsync(
- projectSecret.ServalData,
- projectDoc.Data.TranslateConfig.DraftConfig,
- buildConfig
- );
- }
- else
- {
- translationEngineId = projectSecret.ServalData!.TranslationEngineId!;
- translationBuildConfig = new TranslationBuildConfig();
- }
-
- // Start the build
- TranslationBuild translationBuild = await translationEnginesClient.StartBuildAsync(
- translationEngineId,
- translationBuildConfig,
- cancellationToken
- );
-
- // Clear the queued status and job id
- await projectSecrets.UpdateAsync(
- buildConfig.ProjectId,
- u =>
- {
- if (preTranslate)
- {
- u.Unset(p => p.ServalData.PreTranslationJobId);
- u.Unset(p => p.ServalData.PreTranslationQueuedAt);
- }
- else
- {
- u.Unset(p => p.ServalData.TranslationJobId);
- u.Unset(p => p.ServalData.TranslationQueuedAt);
- }
- }
- );
-
- return translationBuild;
- }
-
- // No build started
- return null;
- }
-
+ ///
+ /// Executes , and traps any errors during execution.
+ ///
+ /// The current user identifier.
+ /// The build configuration.
+ /// If true use NMT; otherwise if false use SMT.
+ /// The cancellation token.
+ /// An asynchronous task.
+ ///
+ /// This cannot be run multiple times in different threads.
+ ///
[Mutex]
public async Task BuildProjectForBackgroundJobAsync(
string curUserId,
@@ -407,13 +157,16 @@ await projectSecrets.UpdateAsync(
{
// This will occur if the project is deleted while the job is running
string message =
- $"Build DataNotFoundException occurred for project {buildConfig.ProjectId} running in background job.";
+ $"Build DataNotFoundException occurred for project {buildConfig.ProjectId.Sanitize()}"
+ + " running in background job.";
logger.LogWarning(e, message);
}
catch (Exception e)
{
// Log the error and report to bugsnag
- string message = $"Build exception occurred for project {buildConfig.ProjectId} running in background job.";
+ string message =
+ $"Build exception occurred for project {buildConfig.ProjectId.Sanitize()}"
+ + " running in background job.";
logger.LogError(e, message);
exceptionHandler.ReportException(e);
@@ -446,7 +199,9 @@ await projectSecrets.UpdateAsync(
/// The output stream.
/// The cancellation token.
/// The name of the zip file, e.g. ABC.zip.
- /// The project does not exist, is a resource, or could not be found on disk.
+ ///
+ /// The project does not exist, is a resource, or could not be found on disk.
+ ///
public async Task GetProjectZipAsync(
string sfProjectId,
Stream outputStream,
@@ -466,24 +221,8 @@ CancellationToken cancellationToken
throw new DataNotFoundException("You cannot download a resource.");
}
- // Get the path to the Paratext directory
- string path = Path.Combine(siteOptions.Value.SiteDir, "sync", project.ParatextId, "target");
-
- // Ensure that the path exists
- if (!fileSystemService.DirectoryExists(path))
- {
- throw new DataNotFoundException($"The directory could not be found for {project.ParatextId}");
- }
-
// Create the zip file from the directory in memory
- using var archive = new ZipArchive(outputStream, ZipArchiveMode.Create, true);
- foreach (string filePath in fileSystemService.EnumerateFiles(path))
- {
- await using Stream fileStream = fileSystemService.OpenFile(filePath, FileMode.Open);
- ZipArchiveEntry entry = archive.CreateEntry(Path.GetFileName(filePath));
- await using Stream entryStream = entry.Open();
- await fileStream.CopyToAsync(entryStream, cancellationToken);
- }
+ await CreateZipFileFromParatextDirectoryAsync(project.ParatextId, outputStream, cancellationToken);
// Strip invalid characters from the file name
string fileName = Path.GetInvalidFileNameChars()
@@ -494,7 +233,7 @@ CancellationToken cancellationToken
///
/// Gets the translation engine type string for Serval.
///
- /// If true, then the translation engine is for pre-translation.
+ /// If true use NMT; otherwise if false use SMT.
/// The translation engine type string for Serval.
public async Task GetTranslationEngineTypeAsync(bool preTranslate)
{
@@ -507,8 +246,16 @@ public async Task GetTranslationEngineTypeAsync(bool preTranslate)
};
}
- public async Task RemoveProjectAsync(
- string curUserId,
+ ///
+ /// Removes a project from Serval.
+ ///
+ /// The Scripture Forge project identifier.
+ /// If true use NMT; otherwise if false use SMT.
+ /// The cancellation token.
+ /// An asynchronous task.
+ /// The project secret cannot be found.
+ /// This can be mocked in unit tests.
+ public virtual async Task RemoveProjectAsync(
string sfProjectId,
bool preTranslate,
CancellationToken cancellationToken
@@ -521,485 +268,117 @@ CancellationToken cancellationToken
}
// Ensure we have a translation engine id
- string translationEngineId = preTranslate
- ? projectSecret.ServalData?.PreTranslationEngineId
- : projectSecret.ServalData?.TranslationEngineId;
+ string translationEngineId = GetTranslationEngineId(projectSecret, preTranslate);
if (string.IsNullOrWhiteSpace(translationEngineId))
{
- logger.LogInformation($"No Translation Engine Id specified for project {sfProjectId}");
+ logger.LogInformation($"No Translation Engine Id specified for project {sfProjectId.Sanitize()}");
return;
}
- // Remove the corpus files
- foreach (
- (string corpusId, _) in projectSecret.ServalData.Corpora.Where(c => c.Value.PreTranslate == preTranslate)
- )
+ // Remove the legacy serval data
+ await RemoveLegacyServalDataAsync(sfProjectId, preTranslate, cancellationToken);
+
+ // Build the list of files, corpora, and parallel corpora to remove
+ List fileIdsToRemove = [];
+ List corpusIdsToRemove = [];
+ if (preTranslate)
{
- foreach (
- string fileId in projectSecret
- .ServalData.Corpora[corpusId]
- .SourceFiles.Concat(projectSecret.ServalData.Corpora[corpusId].TargetFiles)
- .Select(f => f.FileId)
- )
+ // Remove the additional training data
+ if (projectSecret.ServalData?.AdditionalTrainingData is not null)
{
- try
- {
- await dataFilesClient.DeleteAsync(fileId, cancellationToken);
- }
- catch (ServalApiException e)
- {
- // A 404 means that the file does not exist
- string message;
- if (e.StatusCode == StatusCodes.Status404NotFound)
- {
- message =
- $"Corpora file {fileId} in corpus {corpusId} for project {sfProjectId}"
- + " was missing or already deleted.";
- logger.LogInformation(message);
- }
- else
- {
- message =
- $"Ignored exception while deleting file {fileId} in corpus {corpusId}"
- + $" for project {sfProjectId}.";
- logger.LogError(e, message);
- }
- }
+ corpusIdsToRemove.Add(projectSecret.ServalData.AdditionalTrainingData.SourceCorpusId);
+ corpusIdsToRemove.Add(projectSecret.ServalData.AdditionalTrainingData.TargetCorpusId);
+ fileIdsToRemove.AddRange(
+ projectSecret.ServalData.AdditionalTrainingData.CorpusFiles.Select(f => f.FileId)
+ );
}
- // Delete the corpus
- try
- {
- await translationEnginesClient.DeleteCorpusAsync(translationEngineId, corpusId, cancellationToken);
- }
- catch (ServalApiException e)
+ // If there is no SMT training engine, remove all files and corpora
+ if (
+ projectSecret.ServalData is not null
+ && string.IsNullOrWhiteSpace(projectSecret.ServalData.TranslationEngineId)
+ )
{
- // A 404 means that the translation engine does not exist
- string message;
- if (e.StatusCode == StatusCodes.Status404NotFound)
- {
- message =
- $"Translation Engine {translationEngineId} for project {sfProjectId}"
- + " was missing or already deleted.";
- logger.LogInformation(message);
- }
- else
- {
- message =
- $"Ignored exception while deleting translation engine {translationEngineId}"
- + $" for project {sfProjectId}.";
- logger.LogError(e, message);
- }
+ corpusIdsToRemove.AddRange(projectSecret.ServalData.CorpusFiles.Select(f => f.CorpusId));
+ fileIdsToRemove.AddRange(projectSecret.ServalData.CorpusFiles.Select(f => f.FileId));
}
-
- // Remove our record of the corpus
- await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.Corpora[corpusId]));
- }
-
- // Remove the project from Serval
- await translationEnginesClient.DeleteAsync(translationEngineId, cancellationToken);
-
- // Remove the Serval Data
- if (preTranslate)
- {
- await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.PreTranslationEngineId));
- }
- else
- {
- await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.TranslationEngineId));
- }
- }
-
- ///
- /// Syncs the project corpora from the file system to Serval.
- ///
- /// The current user identifier.
- /// The build configuration.
- /// The project is for pre-translation.
- /// The cancellation token.
- /// true if the project corpora and its files were updated; otherwise, false.
- /// The project does not exist.
- ///
- /// Notes:
- /// - If the corpus was updated, then you should start the Build with .
- /// - If a corpus is not configured on Serval, one is created and recorded in the project secret.
- /// - Any corpus files without project ids will be deleted and recreated with project ids.
- ///
- public async Task SyncProjectCorporaAsync(
- string curUserId,
- BuildConfig buildConfig,
- bool preTranslate,
- CancellationToken cancellationToken
- )
- {
- // Used to return whether the corpus was updated
- bool corpusUpdated = false;
-
- // Load the project from the realtime service
- Attempt attempt = await realtimeService.TryGetSnapshotAsync(buildConfig.ProjectId);
- if (!attempt.TryResult(out SFProject project))
- {
- throw new DataNotFoundException("The project does not exist.");
- }
-
- // Ensure we have a source
- if (project.TranslateConfig.Source is null)
- {
- throw new DataNotFoundException("The project source is not specified.");
- }
-
- // Load the project secrets, so we can get the corpus files
- if (!(await projectSecrets.TryGetAsync(project.Id)).TryResult(out SFProjectSecret projectSecret))
- {
- throw new DataNotFoundException("The project secret cannot be found.");
- }
-
- // Ensure we have serval data
- if (projectSecret.ServalData is null)
- {
- throw new DataNotFoundException("The Serval data cannot be found.");
- }
-
- // Ensure we have a translation engine ID
- string translationEngineId = preTranslate
- ? projectSecret.ServalData?.PreTranslationEngineId
- : projectSecret.ServalData?.TranslationEngineId;
- if (string.IsNullOrWhiteSpace(translationEngineId))
- {
- throw new DataNotFoundException("The translation engine ID cannot be found.");
}
-
- // See if there is a translation corpus
- string? corpusId = projectSecret
- .ServalData.Corpora.FirstOrDefault(c =>
- c.Value.PreTranslate == preTranslate && !c.Value.AlternateTrainingSource
- )
- .Key;
-
- // See if there is an alternate source to use for drafting
- bool useAlternateSource =
- project.TranslateConfig.DraftConfig.AlternateSourceEnabled
- && project.TranslateConfig.DraftConfig.AlternateSource is not null
- && preTranslate;
-
- // See if there is an alternate training source corpus
- bool useAlternateTrainingSource =
- project.TranslateConfig.DraftConfig.AlternateTrainingSourceEnabled
- && project.TranslateConfig.DraftConfig.AlternateTrainingSource is not null
- && preTranslate;
-
- // See if there is an additional training source
- bool useAdditionalTrainingSource =
- project.TranslateConfig.DraftConfig.AdditionalTrainingSourceEnabled
- && project.TranslateConfig.DraftConfig.AdditionalTrainingSource is not null
- && preTranslate;
-
- // Get the alternate training source corpus id, if present
- string? alternateTrainingSourceCorpusId = projectSecret
- .ServalData.Corpora.FirstOrDefault(c => c.Value.PreTranslate && c.Value.AlternateTrainingSource)
- .Key;
-
- // If we are to use the alternate source, only use it for drafting
- bool useSourceAsAlternateTrainingSource = false;
- string sourceProjectId = project.TranslateConfig.Source.ProjectRef;
- string sourceParatextId = project.TranslateConfig.Source.ParatextId;
- if (useAlternateSource)
- {
- sourceProjectId = project.TranslateConfig.DraftConfig.AlternateSource.ProjectRef;
- sourceParatextId = project.TranslateConfig.DraftConfig.AlternateSource.ParatextId;
-
- // If we do not have an alternate training source, use the reference source for training
- useSourceAsAlternateTrainingSource = !useAlternateTrainingSource;
- }
-
- // Get the files we have already synced
- List oldSourceCorpusFiles = [];
- List oldTargetCorpusFiles = [];
- List newTargetCorpusFiles = [];
- List newSourceCorpusFiles = [];
- if (!string.IsNullOrWhiteSpace(corpusId))
- {
- oldSourceCorpusFiles = projectSecret.ServalData.Corpora[corpusId].SourceFiles;
- oldTargetCorpusFiles = projectSecret.ServalData.Corpora[corpusId].TargetFiles;
- }
-
- // Upload the translation source
- corpusUpdated |= await UploadNewCorpusFilesAsync(
- targetProjectId: project.Id,
- sourceProjectId,
- paratextId: sourceParatextId,
- uploadParatextZipFile: true,
- texts: [],
- oldSourceCorpusFiles,
- newSourceCorpusFiles,
- cancellationToken
- );
-
- // Upload the translation target
- corpusUpdated |= await UploadNewCorpusFilesAsync(
- targetProjectId: project.Id,
- sourceProjectId: project.Id,
- project.ParatextId,
- uploadParatextZipFile: true,
- texts: [],
- oldTargetCorpusFiles,
- newTargetCorpusFiles,
- cancellationToken
- );
-
- // Update the translation corpus
- corpusUpdated |= await UpdateCorpusConfigAsync(
- project,
- translationEngineId,
- corpusId,
- preTranslate,
- additionalTrainingData: false,
- useAlternateTrainingSource: false,
- uploadParatextZipFile: true,
- corpusUpdated,
- newSourceCorpusFiles,
- newTargetCorpusFiles,
- cancellationToken
- );
-
- // Get the files we have already synced for the alternate training source
- List oldAlternateTrainingSourceCorpusFiles = [];
- List newAlternateTrainingSourceCorpusFiles = [];
- if (!string.IsNullOrWhiteSpace(alternateTrainingSourceCorpusId))
+ else if (
+ projectSecret.ServalData is not null
+ && string.IsNullOrWhiteSpace(projectSecret.ServalData.PreTranslationEngineId)
+ )
{
- oldAlternateTrainingSourceCorpusFiles = projectSecret
- .ServalData
- .Corpora[alternateTrainingSourceCorpusId]
- .SourceFiles;
+ // If there is no NMT training engine, remove all files and corpora
+ corpusIdsToRemove.AddRange(projectSecret.ServalData.CorpusFiles.Select(f => f.CorpusId));
+ fileIdsToRemove.AddRange(projectSecret.ServalData.CorpusFiles.Select(f => f.FileId));
}
- // Upload the training corpus, or remove it if no longer used
- if (useAlternateTrainingSource || useSourceAsAlternateTrainingSource || useAdditionalTrainingSource)
+ // Remove the specified corpora
+ foreach (string corpusId in corpusIdsToRemove.Where(s => !string.IsNullOrWhiteSpace(s)))
{
- // Determine which project to use for training
- string paratextId = useAlternateTrainingSource
- ? project.TranslateConfig.DraftConfig.AlternateTrainingSource.ParatextId
- : project.TranslateConfig.Source.ParatextId;
- string projectId = useAlternateTrainingSource
- ? project.TranslateConfig.DraftConfig.AlternateTrainingSource.ProjectRef
- : project.TranslateConfig.Source.ProjectRef;
-
- // Upload the training corpus
- corpusUpdated |= await UploadNewCorpusFilesAsync(
- targetProjectId: project.Id,
- sourceProjectId: projectId,
- paratextId,
- uploadParatextZipFile: true,
- texts: [],
- oldAlternateTrainingSourceCorpusFiles,
- newAlternateTrainingSourceCorpusFiles,
- cancellationToken
- );
-
- // Upload the additional training source
- if (useAdditionalTrainingSource)
+ try
{
- corpusUpdated |= await UploadNewCorpusFilesAsync(
- targetProjectId: project.Id,
- sourceProjectId: project.TranslateConfig.DraftConfig.AdditionalTrainingSource.ProjectRef,
- paratextId: project.TranslateConfig.DraftConfig.AdditionalTrainingSource.ParatextId,
- uploadParatextZipFile: true,
- texts: [],
- oldAlternateTrainingSourceCorpusFiles,
- newAlternateTrainingSourceCorpusFiles,
- cancellationToken
- );
+ await corporaClient.DeleteAsync(corpusId, cancellationToken);
+ }
+ catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound)
+ {
+ // If the file was already deleted, just log a message
+ string message =
+ $"Corpus {corpusId.Sanitize()} in project {sfProjectId.Sanitize()}"
+ + " was missing or already deleted.";
+ logger.LogInformation(e, message);
}
-
- // Update the training corpus
- corpusUpdated |= await UpdateCorpusConfigAsync(
- project,
- translationEngineId,
- corpusId: alternateTrainingSourceCorpusId,
- preTranslate: true,
- additionalTrainingData: false,
- useAlternateTrainingSource: true,
- uploadParatextZipFile: true,
- corpusUpdated,
- sourceCorpusFiles: newAlternateTrainingSourceCorpusFiles,
- targetCorpusFiles: newAlternateTrainingSourceCorpusFiles.Count > 0 ? newTargetCorpusFiles : [],
- cancellationToken
- );
}
- else if (preTranslate && !string.IsNullOrWhiteSpace(alternateTrainingSourceCorpusId))
- {
- // If there is an existing alternate training source, remove it
- // Remove the corpus from Serval
+ // Remove the specified files
+ foreach (string fileId in fileIdsToRemove.Where(s => !string.IsNullOrWhiteSpace(s)))
+ {
try
{
- await translationEnginesClient.DeleteCorpusAsync(
- translationEngineId,
- alternateTrainingSourceCorpusId,
- cancellationToken
- );
+ await dataFilesClient.DeleteAsync(fileId, cancellationToken);
}
catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound)
{
// If the file was already deleted, just log a message
string message =
- $"Corpus {alternateTrainingSourceCorpusId} in project {buildConfig.ProjectId}"
+ $"File {fileId.Sanitize()} in project {sfProjectId.Sanitize()}"
+ " was missing or already deleted.";
logger.LogInformation(e, message);
}
-
- // Remove the files from Serval
- foreach (ServalCorpusFile corpusFile in oldAlternateTrainingSourceCorpusFiles)
- {
- try
- {
- await dataFilesClient.DeleteAsync(corpusFile.FileId, cancellationToken);
- }
- catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound)
- {
- // If the file was already deleted, just log a message
- string message =
- $"Corpora file {corpusFile.FileId} for text {corpusFile.TextId} in project {buildConfig.ProjectId}"
- + " was missing or already deleted.";
- logger.LogInformation(e, message);
- }
- }
-
- // Remove the reference to the corpus from the project secret
- await projectSecrets.UpdateAsync(
- project.Id,
- u => u.Unset(p => p.ServalData.Corpora[alternateTrainingSourceCorpusId])
- );
}
- // See if we have an additional training data
- if (preTranslate)
+ // Remove the project from Serval
+ try
{
- // Get the training data corpus id
- string trainingDataCorpusId = projectSecret
- .ServalData.Corpora.FirstOrDefault(c => c.Value.PreTranslate && c.Value.AdditionalTrainingData)
- .Key;
-
- // If there are training data files, or they were removed (i.e. we have a corpus record for it)
- if (buildConfig.TrainingDataFiles.Count > 0 || !string.IsNullOrWhiteSpace(trainingDataCorpusId))
- {
- // Set up the collections required to upload the corpus data files
- List newTrainingDataSourceTexts = [];
- List newTrainingDataTargetTexts = [];
- List newTrainingDataSourceCorpusFiles = [];
- List newTrainingDataTargetCorpusFiles = [];
- List oldTrainingDataSourceCorpusFiles = [];
- List oldTrainingDataTargetCorpusFiles = [];
-
- // Get the training data texts
- await trainingDataService.GetTextsAsync(
- curUserId,
- buildConfig.ProjectId,
- buildConfig.TrainingDataFiles,
- newTrainingDataSourceTexts,
- newTrainingDataTargetTexts
- );
-
- // Get the training data files we have already synced
- if (!string.IsNullOrWhiteSpace(trainingDataCorpusId))
- {
- oldTrainingDataSourceCorpusFiles = projectSecret
- .ServalData
- .Corpora[trainingDataCorpusId]
- .SourceFiles;
- oldTrainingDataTargetCorpusFiles = projectSecret
- .ServalData
- .Corpora[trainingDataCorpusId]
- .TargetFiles;
- }
-
- // Upload the source files for the training data
- corpusUpdated |= await UploadNewCorpusFilesAsync(
- targetProjectId: project.Id,
- sourceProjectId: project.Id,
- project.ParatextId,
- uploadParatextZipFile: false,
- newTrainingDataSourceTexts,
- oldTrainingDataSourceCorpusFiles,
- newTrainingDataSourceCorpusFiles,
- cancellationToken
- );
-
- // Upload the target files for the training data
- corpusUpdated |= await UploadNewCorpusFilesAsync(
- targetProjectId: project.Id,
- sourceProjectId: project.Id,
- project.ParatextId,
- uploadParatextZipFile: false,
- newTrainingDataTargetTexts,
- oldTrainingDataTargetCorpusFiles,
- newTrainingDataTargetCorpusFiles,
- cancellationToken
- );
-
- // Update the training data corpus
- corpusUpdated |= await UpdateCorpusConfigAsync(
- project,
- translationEngineId,
- corpusId: trainingDataCorpusId,
- preTranslate: true,
- additionalTrainingData: true,
- useAlternateTrainingSource: false,
- uploadParatextZipFile: false,
- corpusUpdated,
- sourceCorpusFiles: newTrainingDataSourceCorpusFiles,
- targetCorpusFiles: newTrainingDataTargetCorpusFiles,
- cancellationToken
- );
- }
+ await translationEnginesClient.DeleteAsync(translationEngineId, cancellationToken);
}
-
- return corpusUpdated;
- }
-
- ///
- /// Determines whether a translation engine exists for the specified project.
- ///
- /// The Scripture Forge project identifier.
- /// The Serval translation engine identifier.
- /// The Serval translation engine identifier.
- /// The cancellation token.
- ///
- public async Task TranslationEngineExistsAsync(
- string projectId,
- string? translationEngineId,
- bool preTranslate,
- CancellationToken cancellationToken
- )
- {
- if (string.IsNullOrWhiteSpace(translationEngineId))
+ catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound)
{
- return false;
+ // If the file was already deleted, just log a message
+ string message =
+ $"Translation Engine {translationEngineId.Sanitize()} in project {sfProjectId.Sanitize()}"
+ + " was missing or already deleted.";
+ logger.LogInformation(e, message);
}
- try
+ // Remove the translation engine identifier
+ if (preTranslate)
{
- TranslationEngine translationEngine = await translationEnginesClient.GetAsync(
- translationEngineId,
- cancellationToken
- );
- string type = await GetTranslationEngineTypeAsync(preTranslate);
-
- // We check for the type, taking account of Pascal Case (Serval 1.1) and Kebab Case (Serval 1.2)
- return translationEngine.Name == projectId
- && string.Equals(
- translationEngine.Type.Replace("-", string.Empty),
- type.Replace("-", string.Empty),
- StringComparison.InvariantCultureIgnoreCase
- );
+ await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.PreTranslationEngineId));
}
- catch (ServalApiException e)
- when (e.StatusCode is StatusCodes.Status403Forbidden or StatusCodes.Status404NotFound)
+ else
{
- return false;
+ await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.TranslationEngineId));
}
}
+ ///
+ /// Updates the language configuration for the additional and alternate sources.
+ ///
+ /// The current user identifier.
+ /// The Scripture Forge project identifier.
+ ///
+ /// The project or user secret does not exist.
[Mutex]
public async Task UpdateTranslationSourcesAsync(string curUserId, string sfProjectId)
{
@@ -1017,7 +396,7 @@ public async Task UpdateTranslationSourcesAsync(string curUserId, string sfProje
throw new DataNotFoundException("The project does not exist.");
}
- // If there is an alternate source, ensure that writing system and RTL is correct
+ // If there is an alternate source, ensure that name, writing system and RTL is correct
if (projectDoc.Data.TranslateConfig.DraftConfig.AlternateSource is not null)
{
ParatextSettings? alternateSourceSettings = paratextService.GetParatextSettings(
@@ -1033,15 +412,24 @@ await projectDoc.SubmitJson0OpAsync(op =>
alternateSourceSettings.IsRightToLeft
);
if (alternateSourceSettings.LanguageTag is not null)
+ {
op.Set(
pd => pd.TranslateConfig.DraftConfig.AlternateSource.WritingSystem.Tag,
alternateSourceSettings.LanguageTag
);
+ }
+ if (alternateSourceSettings.FullName is not null)
+ {
+ op.Set(
+ pd => pd.TranslateConfig.DraftConfig.AlternateSource.Name,
+ alternateSourceSettings.FullName
+ );
+ }
});
}
}
- // If there is an alternate training source, ensure that writing system and RTL is correct
+ // If there is an alternate training source, ensure that name, writing system and RTL is correct
if (projectDoc.Data.TranslateConfig.DraftConfig.AlternateTrainingSource is not null)
{
ParatextSettings? alternateSourceSettings = paratextService.GetParatextSettings(
@@ -1057,15 +445,24 @@ await projectDoc.SubmitJson0OpAsync(op =>
alternateSourceSettings.IsRightToLeft
);
if (alternateSourceSettings.LanguageTag is not null)
+ {
op.Set(
pd => pd.TranslateConfig.DraftConfig.AlternateTrainingSource.WritingSystem.Tag,
alternateSourceSettings.LanguageTag
);
+ }
+ if (alternateSourceSettings.FullName is not null)
+ {
+ op.Set(
+ pd => pd.TranslateConfig.DraftConfig.AlternateTrainingSource.Name,
+ alternateSourceSettings.FullName
+ );
+ }
});
}
}
- // If there is an additional training source, ensure that writing system and RTL is correct
+ // If there is an additional training source, ensure that name, writing system and RTL is correct
if (projectDoc.Data.TranslateConfig.DraftConfig.AdditionalTrainingSource is not null)
{
ParatextSettings? additionalTrainingSourceSettings = paratextService.GetParatextSettings(
@@ -1081,121 +478,103 @@ await projectDoc.SubmitJson0OpAsync(op =>
additionalTrainingSourceSettings.IsRightToLeft
);
if (additionalTrainingSourceSettings.LanguageTag is not null)
+ {
op.Set(
pd => pd.TranslateConfig.DraftConfig.AdditionalTrainingSource.WritingSystem.Tag,
additionalTrainingSourceSettings.LanguageTag
);
+ }
+ if (additionalTrainingSourceSettings.FullName is not null)
+ {
+ op.Set(
+ pd => pd.TranslateConfig.DraftConfig.AdditionalTrainingSource.Name,
+ additionalTrainingSourceSettings.FullName
+ );
+ }
});
}
}
}
///
- /// Gets the source language for the project.
+ /// Builds a project on Serval, including syncing and any required setup.
///
- /// The project.
- /// If true, use the alternate training source.
- /// The source language.
- ///
- private static string GetSourceLanguage(SFProject? project, bool useAlternateTrainingSource)
+ /// The current user identifier.
+ /// The build configuration.
+ /// If true use NMT; otherwise if false use SMT.
+ /// The cancellation token.
+ /// An asynchronous task.
+ /// The project or project secret could not be found.
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task BuildProjectAsync(
+ string curUserId,
+ BuildConfig buildConfig,
+ bool preTranslate,
+ CancellationToken cancellationToken
+ )
{
- // This error can occur if the project is deleted while the build is running
- if (project is null)
+ // Load the target project secrets, so we can get the translation engine ID
+ if (!(await projectSecrets.TryGetAsync(buildConfig.ProjectId)).TryResult(out SFProjectSecret projectSecret))
{
- throw new DataNotFoundException("The project does not exist.");
+ throw new DataNotFoundException("The project secret cannot be found.");
}
- // This error can occur if the project source is cleared while the build is running
- if (project.TranslateConfig.Source is null)
- {
- throw new DataNotFoundException("The project source is not specified.");
- }
-
- if (useAlternateTrainingSource)
+ // Load the project from the realtime service
+ await using IConnection conn = await realtimeService.ConnectAsync(curUserId);
+ IDocument projectDoc = await conn.FetchAsync(buildConfig.ProjectId);
+ if (!projectDoc.IsLoaded)
{
- return project.TranslateConfig.DraftConfig.AlternateTrainingSource?.WritingSystem.Tag
- ?? project.TranslateConfig.Source?.WritingSystem.Tag
- ?? project.TranslateConfig.DraftConfig.AlternateSource?.WritingSystem.Tag
- ?? throw new ArgumentNullException(nameof(project));
+ throw new DataNotFoundException("The project does not exist.");
}
- string alternateSourceLanguage = project.TranslateConfig.DraftConfig.AlternateSource?.WritingSystem.Tag;
- bool useAlternateSourceLanguage =
- project.TranslateConfig.DraftConfig.AlternateSourceEnabled
- && !string.IsNullOrWhiteSpace(alternateSourceLanguage);
- return useAlternateSourceLanguage
- ? alternateSourceLanguage
- : project.TranslateConfig.Source?.WritingSystem.Tag ?? throw new ArgumentNullException(nameof(project));
- }
-
- ///
- /// Gets the segments from the text with Unix/Linux line endings.
- ///
- /// The .
- /// The text file data to be uploaded to Serval.
- private static string GetTextFileData(ISFText text)
- {
- var sb = new StringBuilder();
+ // Remove the legacy serval data, if present
+ await RemoveLegacyServalDataAsync(buildConfig.ProjectId, preTranslate, cancellationToken);
- // For pre-translation, we must upload empty lines with segment refs for the correct references to be returned
- foreach (SFTextSegment segment in text.Segments.Where(s => !s.IsEmpty))
- {
- sb.Append(segment.SegmentRef);
- sb.Append('\t');
- sb.Append(segment.SegmentText);
- sb.Append('\t');
- if (segment.IsSentenceStart)
- {
- sb.Append("ss,");
- }
+ // Ensure we have a translation engine id or a pre-translation engine id, and that it exists
+ string translationEngineId = await EnsureTranslationEngineExistsAsync(
+ curUserId,
+ projectDoc,
+ projectSecret,
+ preTranslate,
+ cancellationToken
+ );
- if (segment.IsInRange)
- {
- sb.Append("ir,");
- }
+ // Recreate the translation engine if it is missing, or the language has changed
+ await RecreateTranslationEngineIfRequiredAsync(
+ translationEngineId,
+ projectDoc.Data,
+ preTranslate,
+ cancellationToken
+ );
- if (segment.IsRangeStart)
- {
- sb.Append("rs,");
- }
+ // Perform the file and corpora sync with Serval
+ IList corporaSyncInfo = await SyncProjectCorporaAsync(
+ curUserId,
+ buildConfig,
+ preTranslate,
+ cancellationToken
+ );
- // Strip the last comma, or the tab if there are no flags
- sb.Length--;
+ // Get the updated project secret
+ projectSecret = await projectSecrets.GetAsync(buildConfig.ProjectId);
- // Append the Unix EOL to ensure consistency as this text data is uploaded to Serval
- sb.Append('\n');
+ // Ensure we have the ServalData
+ if (projectSecret.ServalData is null)
+ {
+ throw new DataNotFoundException("The project secret does not contain Serval data.");
}
- return sb.ToString();
- }
-
- ///
- /// Gets the TranslationBuildConfig for the specified ServalData object.
- ///
- /// The Serval data from .
- ///
- /// The Draft configuration from ..
- ///
- /// The build configuration from the user, specified on the front end.
- /// The TranslationBuildConfig for a Pre-Translate build.
- /// Do not use with SMT builds.
- private async Task GetTranslationBuildConfigAsync(
- ServalData servalData,
- DraftConfig draftConfig,
- BuildConfig buildConfig
- )
- {
- JObject? servalConfig = null;
- if (draftConfig.ServalConfig is not null)
+ // Set up the Serval Configuration
+ string? servalConfig = null;
+ if (projectDoc.Data.TranslateConfig.DraftConfig.ServalConfig is not null)
{
// Load the Serval Config from the Draft Config
- servalConfig = JObject.Parse(draftConfig.ServalConfig);
+ servalConfig = projectDoc.Data.TranslateConfig.DraftConfig.ServalConfig;
}
else if (await featureManager.IsEnabledAsync(FeatureFlags.UpdatedLearningRateForServal))
{
// Specify the updated learning rate
- servalConfig = JObject.Parse(
- """
+ servalConfig = """
{
"train_params":
{
@@ -1205,149 +584,123 @@ BuildConfig buildConfig
"max_steps": 5000
}
}
- """
- );
+ """;
}
- // If Fast Training is enabled, override the max_steps
- if (buildConfig.FastTraining)
+ // Get the appropriate translation engine
+ TranslationBuildConfig translationBuildConfig;
+ if (preTranslate)
{
- // Ensure that there is a servalConfig JSON object
- servalConfig ??= new JObject();
+ translationEngineId = projectSecret.ServalData.PreTranslationEngineId!;
- // 20 is the number of steps used on Serval QA by default
- servalConfig["max_steps"] = 20;
+ // Execute a complete pre-translation
+ translationBuildConfig = GetTranslationBuildConfig(
+ projectSecret.ServalData,
+ servalConfig,
+ buildConfig,
+ corporaSyncInfo
+ );
}
-
- // See if there is an alternate training source or alternate drafting source corpus
- bool useAlternateTrainingCorpus =
- (draftConfig.AlternateTrainingSourceEnabled && draftConfig.AlternateTrainingSource is not null)
- || draftConfig.AlternateSourceEnabled && draftConfig.AlternateSource is not null;
-
- // Set up the pre-translation and training corpora
- List preTranslate = [];
- List? trainOn = null;
-
- // Add the pre-translation books
- foreach (
- KeyValuePair corpus in servalData.Corpora.Where(s =>
- s.Value.PreTranslate && !s.Value.AlternateTrainingSource && !s.Value.AdditionalTrainingData
- )
- )
+ else
{
- var preTranslateCorpusConfig = new PretranslateCorpusConfig { CorpusId = corpus.Key };
+ translationEngineId = projectSecret.ServalData.TranslationEngineId!;
+ translationBuildConfig = new TranslationBuildConfig();
+ }
- // If this is a Paratext zip file corpus
- if (corpus.Value.UploadParatextZipFile)
- {
- // Since all books are uploaded via the zip file, we need to specify the target books to translate
- preTranslateCorpusConfig.ScriptureRange = !string.IsNullOrWhiteSpace(
- buildConfig.TranslationScriptureRange
- )
- ? buildConfig.TranslationScriptureRange
- : string.Join(';', buildConfig.TranslationBooks.Select(Canon.BookNumberToId));
+ // Start the build
+ await translationEnginesClient.StartBuildAsync(translationEngineId, translationBuildConfig, cancellationToken);
- // Ensure that the pre-translate scripture range is null if it is blank
- if (string.IsNullOrWhiteSpace(preTranslateCorpusConfig.ScriptureRange))
+ // Clear the queued status and job id
+ await projectSecrets.UpdateAsync(
+ buildConfig.ProjectId,
+ u =>
+ {
+ if (preTranslate)
{
- preTranslateCorpusConfig.ScriptureRange = null;
+ u.Unset(p => p.ServalData.PreTranslationJobId);
+ u.Unset(p => p.ServalData.PreTranslationQueuedAt);
}
-
- if (!useAlternateTrainingCorpus)
+ else
{
- string? scriptureRange = !string.IsNullOrWhiteSpace(buildConfig.TrainingScriptureRange)
- ? buildConfig.TrainingScriptureRange
- : string.Join(';', buildConfig.TrainingBooks.Select(Canon.BookNumberToId));
- string[]? textIds = null;
-
- // Ensure that the trainOn scripture range is null if it is blank,
- // and that the textIds array is empty so no books are trained on.
- if (string.IsNullOrWhiteSpace(scriptureRange))
- {
- scriptureRange = null;
- textIds = [];
- }
-
- // As we do not have an alternate train on source specified, use the source texts to train on
- trainOn ??= [];
- trainOn.Add(
- new TrainingCorpusConfig
- {
- CorpusId = corpus.Key,
- ScriptureRange = scriptureRange,
- TextIds = textIds
- }
- );
+ u.Unset(p => p.ServalData.TranslationJobId);
+ u.Unset(p => p.ServalData.TranslationQueuedAt);
}
}
+ );
+ }
- preTranslate.Add(preTranslateCorpusConfig);
- }
-
- // Add the alternate training corpus, if enabled
- // This will be the reference source if we are using an alternate drafting source
- if (useAlternateTrainingCorpus)
+ ///
+ /// Creates or Updates a Parallel Corpus on Serval.
+ ///
+ /// The translation engine identifier.
+ ///
+ /// The parallel corpus to be updated. If null or empty, a new parallel corpus will be created.
+ ///
+ ///
+ /// The name of the parallel corpus. This will only be used if the parallel corpus is being created.
+ ///
+ /// The source corpus identifiers.
+ /// The target corpus identifiers.
+ /// The cancellation token.
+ ///
+ /// The new or updated parallel corpus identifier. If is not null,
+ /// this will be the same value as . If
+ /// is null, this will be the identifier of the new parallel corpus.
+ ///
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task CreateOrUpdateParallelCorpusAsync(
+ string translationEngineId,
+ string? parallelCorpusId,
+ string? name,
+ IList sourceCorpusIds,
+ IList targetCorpusIds,
+ CancellationToken cancellationToken
+ )
+ {
+ if (string.IsNullOrWhiteSpace(parallelCorpusId))
{
- trainOn = [];
- foreach (
- KeyValuePair corpus in servalData.Corpora.Where(s =>
- s.Value.PreTranslate && s.Value.AlternateTrainingSource
- )
- )
- {
- var trainingCorpusConfig = new TrainingCorpusConfig { CorpusId = corpus.Key };
- if (corpus.Value.UploadParatextZipFile)
+ // Create a new parallel corpus
+ TranslationParallelCorpus parallelCorpus = await translationEnginesClient.AddParallelCorpusAsync(
+ translationEngineId,
+ new TranslationParallelCorpusConfig
{
- // As all books are uploaded via the zip file, specify the source books to train on
- trainingCorpusConfig.ScriptureRange = !string.IsNullOrWhiteSpace(buildConfig.TrainingScriptureRange)
- ? buildConfig.TrainingScriptureRange
- : string.Join(';', buildConfig.TrainingBooks.Select(Canon.BookNumberToId));
-
- // Ensure that the alternate training corpus scripture range is null if it is blank,
- // and that the textIds array is empty so no books are trained on.
- if (string.IsNullOrWhiteSpace(trainingCorpusConfig.ScriptureRange))
- {
- trainingCorpusConfig.ScriptureRange = null;
- trainingCorpusConfig.TextIds = [];
- }
- }
-
- trainOn.Add(trainingCorpusConfig);
- }
+ Name = name,
+ SourceCorpusIds = sourceCorpusIds,
+ TargetCorpusIds = targetCorpusIds,
+ },
+ cancellationToken
+ );
+ parallelCorpusId = parallelCorpus.Id;
}
-
- var translationBuildConfig = new TranslationBuildConfig
- {
- Options = servalConfig,
- Pretranslate = preTranslate,
- TrainOn = trainOn,
- };
-
- // If we have an alternate training source, we need to add the additional files
- // If not, Serval will use the additional files corpus automatically, so we do not need to do anything
- if (buildConfig.TrainingDataFiles.Count > 0 && useAlternateTrainingCorpus)
+ else
{
- // Include the additional training data with the alternate training corpora
- translationBuildConfig.TrainOn.AddRange(
- servalData
- .Corpora.Where(s => s.Value.PreTranslate && s.Value.AdditionalTrainingData)
- .Select(c => new TrainingCorpusConfig { CorpusId = c.Key })
- .ToList()
+ // Update the specified parallel corpus
+ await translationEnginesClient.UpdateParallelCorpusAsync(
+ translationEngineId,
+ parallelCorpusId,
+ new TranslationParallelCorpusUpdateConfig
+ {
+ SourceCorpusIds = sourceCorpusIds,
+ TargetCorpusIds = targetCorpusIds,
+ },
+ cancellationToken
);
}
- return translationBuildConfig;
+ return parallelCorpusId;
}
///
- /// Creates a project in Serval.
+ /// Creates the translation engine for a project in Serval,
+ /// and updates the project secret with the translation engine identifier.
///
/// The Scripture Forge project
- /// The project is for pre-translation.
+ /// If true use NMT; otherwise if false use SMT.
/// The cancellation token.
/// The translation engine id.
/// The translation engine could not be created.
- private async Task CreateServalProjectAsync(
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task CreateServalProjectAsync(
SFProject sfProject,
bool preTranslate,
CancellationToken cancellationToken
@@ -1355,15 +708,13 @@ CancellationToken cancellationToken
{
// Get the existing project secret, so we can see how to create the engine and update the Serval data
SFProjectSecret projectSecret = await projectSecrets.GetAsync(sfProject.Id);
- string translationEngineId = preTranslate
- ? projectSecret.ServalData?.PreTranslationEngineId
- : projectSecret.ServalData?.TranslationEngineId;
+ string translationEngineId = GetTranslationEngineId(projectSecret, preTranslate);
if (string.IsNullOrWhiteSpace(translationEngineId))
{
TranslationEngineConfig engineConfig = new TranslationEngineConfig
{
Name = sfProject.Id,
- SourceLanguage = GetSourceLanguage(sfProject, useAlternateTrainingSource: false),
+ SourceLanguage = GetSourceLanguage(sfProject),
TargetLanguage = await GetTargetLanguageAsync(sfProject),
Type = await GetTranslationEngineTypeAsync(preTranslate),
};
@@ -1386,7 +737,7 @@ CancellationToken cancellationToken
// Store the Pre-Translation Engine ID
await projectSecrets.UpdateAsync(
sfProject.Id,
- u => u.Set(p => p.ServalData.PreTranslationEngineId, translationEngine.Id)
+ u => u.Set(p => p.ServalData.PreTranslationEngineId, translationEngineId)
);
}
else if (projectSecret.ServalData is not null)
@@ -1394,7 +745,7 @@ await projectSecrets.UpdateAsync(
// Store the Translation Engine ID
await projectSecrets.UpdateAsync(
sfProject.Id,
- u => u.Set(p => p.ServalData.TranslationEngineId, translationEngine.Id)
+ u => u.Set(p => p.ServalData.TranslationEngineId, translationEngineId)
);
}
else if (preTranslate)
@@ -1402,7 +753,11 @@ await projectSecrets.UpdateAsync(
// Store the Pre-Translation Engine ID
await projectSecrets.UpdateAsync(
sfProject.Id,
- u => u.Set(p => p.ServalData, new ServalData { PreTranslationEngineId = translationEngine.Id })
+ u =>
+ u.Set(
+ p => p.ServalData,
+ new ServalData { PreTranslationEngineId = translationEngineId, CorpusFiles = [] }
+ )
);
}
else
@@ -1410,7 +765,11 @@ await projectSecrets.UpdateAsync(
// Store the Translation Engine ID
await projectSecrets.UpdateAsync(
sfProject.Id,
- u => u.Set(p => p.ServalData, new ServalData { TranslationEngineId = translationEngine.Id })
+ u =>
+ u.Set(
+ p => p.ServalData,
+ new ServalData { TranslationEngineId = translationEngineId, CorpusFiles = [] }
+ )
);
}
}
@@ -1418,393 +777,1280 @@ await projectSecrets.UpdateAsync(
return translationEngineId;
}
- private async Task UploadFileAsync(
- string textId,
- string projectId,
- string textFileData,
- FileFormat fileFormat,
- ICollection? oldCorpusFiles,
- ICollection newCorpusFiles,
- CancellationToken cancellationToken
- )
- {
- byte[] buffer = Encoding.UTF8.GetBytes(textFileData);
- await using Stream stream = new MemoryStream(buffer, false);
- return await UploadFileAsync(
- textId,
- projectId,
- stream,
- fileFormat,
- oldCorpusFiles,
- newCorpusFiles,
- cancellationToken
- );
- }
-
- private async Task UploadFileAsync(
- string textId,
- string projectId,
- Stream stream,
- FileFormat fileFormat,
- ICollection? oldCorpusFiles,
- ICollection newCorpusFiles,
+ ///
+ /// Creates a zip file from the contents of a directory.
+ ///
+ /// The Paratext identifier for the project.
+ /// The output stream.
+ /// The cancellation token.
+ /// An asynchronous task.
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task CreateZipFileFromParatextDirectoryAsync(
+ string paratextId,
+ Stream outputStream,
CancellationToken cancellationToken
)
{
- // See if the corpus exists and update it if it is missing, or if the checksum has changed
- bool uploadText = false;
-
- // Reset the stream to the start
- stream.Seek(0, SeekOrigin.Begin);
-
- // Calculate the checksum from the stream
- using MD5 md5 = MD5.Create();
- StringBuilder sb = new StringBuilder();
- foreach (var hashByte in await md5.ComputeHashAsync(stream, cancellationToken))
- {
- sb.Append(hashByte.ToString("X2").ToLower());
- }
+ // Get the path to the Paratext directory
+ string path = Path.Combine(siteOptions.Value.SiteDir, "sync", paratextId, "target");
- // Upload the file if it is not there or has changed
- string checksum = sb.ToString();
- ServalCorpusFile? previousCorpusFile = oldCorpusFiles?.FirstOrDefault(c =>
- c.TextId == textId && c.ProjectId == projectId
- );
- if (previousCorpusFile is null || previousCorpusFile.FileChecksum != checksum)
+ // Ensure that the path exists
+ if (!fileSystemService.DirectoryExists(path))
{
- uploadText = true;
+ throw new DataNotFoundException($"The directory could not be found for {paratextId}");
}
- // No update, so do not upload
- if (!uploadText)
+ using var archive = new ZipArchive(outputStream, ZipArchiveMode.Create, leaveOpen: true);
+ foreach (string filePath in fileSystemService.EnumerateFiles(path))
{
- newCorpusFiles.Add(previousCorpusFile);
- return false;
+ await using Stream fileStream = fileSystemService.OpenFile(filePath, FileMode.Open);
+ ZipArchiveEntry entry = archive.CreateEntry(Path.GetFileName(filePath));
+ await using Stream entryStream = entry.Open();
+ await fileStream.CopyToAsync(entryStream, cancellationToken);
}
+ }
- // Reset the stream to the start
- stream.Seek(0, SeekOrigin.Begin);
-
- // Upload the file
- DataFile dataFile;
- if (previousCorpusFile is null)
- {
- dataFile = await dataFilesClient.CreateAsync(
- new FileParameter(stream),
- fileFormat,
- textId,
- cancellationToken
- );
- }
- else
+ ///
+ /// Deletes all corpora and files for the specified collection.
+ ///
+ /// The Serval Corpus Files.
+ /// The project identifier
+ /// The cancellation token.
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task DeleteAllCorporaAndFilesAsync(
+ IEnumerable servalCorpusFiles,
+ string projectId,
+ CancellationToken cancellationToken
+ )
+ {
+ foreach (ServalCorpusFile servalCorpusFile in servalCorpusFiles)
{
- // See if the file exists, and it is the same format
- bool dataFileExists;
try
{
- dataFile = await dataFilesClient.GetAsync(previousCorpusFile.FileId, cancellationToken);
- dataFileExists = dataFile.Format == fileFormat;
+ await corporaClient.DeleteAsync(servalCorpusFile.CorpusId, cancellationToken);
}
catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound)
{
- logger.LogInformation($"File {previousCorpusFile.FileId} does not exist - creating.");
- dataFileExists = false;
+ // If the file was already deleted, just log a message
+ string message =
+ $"Corpus {servalCorpusFile.CorpusId.Sanitize()} in project {projectId.Sanitize()}"
+ + " was missing or already deleted.";
+ logger.LogInformation(e, message);
}
- // Update the file if it exists, otherwise create it
- dataFile = dataFileExists
- ? await dataFilesClient.UpdateAsync(
- previousCorpusFile.FileId,
- new FileParameter(stream),
- cancellationToken
- )
- : await dataFilesClient.CreateAsync(new FileParameter(stream), fileFormat, textId, cancellationToken);
- }
-
- newCorpusFiles.Add(
- new ServalCorpusFile
+ try
{
- FileChecksum = checksum,
- FileId = dataFile.Id,
- ProjectId = projectId,
- TextId = textId,
+ await dataFilesClient.DeleteAsync(servalCorpusFile.FileId, cancellationToken);
}
- );
-
- return true;
- }
-
- ///
- /// Gets the target language for the project
- ///
- /// The project.
- /// The target language.
- ///
- private async Task GetTargetLanguageAsync(SFProject project)
- {
- // Echo requires the target and source language to be the same, as it outputs your source texts
- bool useEcho = await featureManager.IsEnabledAsync(FeatureFlags.UseEchoForPreTranslation);
- return useEcho ? GetSourceLanguage(project, useAlternateTrainingSource: false) : project.WritingSystem.Tag;
+ catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound)
+ {
+ // If the file was already deleted, just log a message
+ string message =
+ $"File {servalCorpusFile.FileId.Sanitize()} in project {projectId.Sanitize()}"
+ + " was missing or already deleted.";
+ logger.LogInformation(e, message);
+ }
+ }
}
///
- /// Updates the corpus configuration in the project secrets.
+ /// Ensures that the translation engine exists, and that the Scripture Forge project is in a compatible state.
///
- /// The project.
- /// The translation engine identifier.
- /// The corpus identifier. If null, a new corpus is created.
- /// The project is for pre-translation.
- /// If true, this is the additional training data corpus.
- /// If true, use the alternate training source.
- /// A Paratext zip file was used for the upload.
- /// The files in the corpus have been updated.
- /// The source corpus files.
- /// The target corpus files.
+ /// The current user identifier.
+ /// The project document.
+ /// The project secret.
+ /// If true use NMT; otherwise if false use SMT.
/// The cancellation token.
- /// true if the corpus was updated; otherwise, false.
- private async Task UpdateCorpusConfigAsync(
- SFProject project,
- string translationEngineId,
- string? corpusId,
+ /// The translation engine identifier.
+ /// The project, user, or translation engine does not exist.
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task EnsureTranslationEngineExistsAsync(
+ string curUserId,
+ IDocument projectDoc,
+ SFProjectSecret projectSecret,
bool preTranslate,
- bool additionalTrainingData,
- bool useAlternateTrainingSource,
- bool uploadParatextZipFile,
- bool corpusUpdated,
- List sourceCorpusFiles,
- List targetCorpusFiles,
CancellationToken cancellationToken
)
{
- // Create or update the corpus
- TranslationCorpus corpus;
- TranslationCorpusConfig corpusConfig = new TranslationCorpusConfig
- {
- Name = project.Id,
- SourceFiles = sourceCorpusFiles
- .Select(f => new TranslationCorpusFileConfig { FileId = f.FileId, TextId = f.TextId })
- .ToList(),
- SourceLanguage = GetSourceLanguage(project, useAlternateTrainingSource),
- TargetFiles = targetCorpusFiles
- .Select(f => new TranslationCorpusFileConfig { FileId = f.FileId, TextId = f.TextId })
- .ToList(),
- TargetLanguage = await GetTargetLanguageAsync(project),
- };
-
- // See if we need to create or update the corpus
- if (string.IsNullOrEmpty(corpusId))
+ string translationEngineId = GetTranslationEngineId(projectSecret, preTranslate);
+ if (!await TranslationEngineExistsAsync(projectDoc.Id, translationEngineId, preTranslate, cancellationToken))
{
- corpus = await translationEnginesClient.AddCorpusAsync(
- translationEngineId,
- corpusConfig,
- cancellationToken
- );
- }
- else
- {
- // Get the corpus to see if the language has changed
- bool createCorpus;
- bool deleteCorpus;
- try
- {
- corpus = await translationEnginesClient.GetCorpusAsync(
- translationEngineId,
- corpusId,
- cancellationToken
- );
- createCorpus =
- corpus.SourceLanguage != corpusConfig.SourceLanguage
- || corpus.TargetLanguage != corpusConfig.TargetLanguage;
- deleteCorpus = createCorpus;
- }
- catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound)
- {
- // A 404 means that the translation engine does not exist
- logger.LogInformation($"Corpus {corpusId} in Translation Engine {translationEngineId} does not exist.");
- createCorpus = true;
- deleteCorpus = false;
- }
+ // We do not have one, likely because the translation is a back translation
+ // We can only get the language tags for back translations from the ScrText,
+ // which is not present until after the first sync (not from the Registry).
- // The language has changed, or the corpus is missing
- if (createCorpus)
+ // If the source or target writing system tag is missing, get them from the ScrText
+ // We do not need to do this for the alternate source as this would have been populated correctly
+ if (
+ string.IsNullOrWhiteSpace(projectDoc.Data?.WritingSystem.Tag)
+ || string.IsNullOrWhiteSpace(projectDoc.Data?.TranslateConfig.Source?.WritingSystem.Tag)
+ )
{
- // Delete the old corpus
- if (deleteCorpus)
+ // Get the user secret
+ Attempt userSecretAttempt = await userSecrets.TryGetAsync(curUserId);
+ if (!userSecretAttempt.TryResult(out UserSecret userSecret))
+ {
+ throw new DataNotFoundException("The user does not exist.");
+ }
+
+ // This error can occur if the project is deleted while the build is running
+ if (projectDoc.Data is null)
+ {
+ throw new DataNotFoundException("The project does not exist.");
+ }
+
+ // Update the target writing system tag
+ if (string.IsNullOrWhiteSpace(projectDoc.Data.WritingSystem.Tag))
+ {
+ WritingSystem writingSystem = paratextService.GetWritingSystem(
+ userSecret,
+ projectDoc.Data.ParatextId
+ );
+ if (!string.IsNullOrEmpty(writingSystem.Tag))
+ {
+ await projectDoc.SubmitJson0OpAsync(op =>
+ {
+ op.Set(p => p.WritingSystem.Region, writingSystem.Region);
+ op.Set(p => p.WritingSystem.Script, writingSystem.Script);
+ op.Set(p => p.WritingSystem.Tag, writingSystem.Tag);
+ });
+ }
+ }
+
+ // This error can occur if the project is deleted while the build is running
+ if (projectDoc.Data is null)
+ {
+ throw new DataNotFoundException("The project does not exist.");
+ }
+
+ // This error can occur if the project source is cleared while the build is running
+ if (projectDoc.Data.TranslateConfig.Source is null)
+ {
+ throw new DataNotFoundException("The project source is not specified.");
+ }
+
+ // Update the source writing system tag
+ if (string.IsNullOrWhiteSpace(projectDoc.Data.TranslateConfig.Source.WritingSystem.Tag))
+ {
+ WritingSystem writingSystem = paratextService.GetWritingSystem(
+ userSecret,
+ projectDoc.Data.TranslateConfig.Source.ParatextId
+ );
+ if (!string.IsNullOrWhiteSpace(writingSystem.Tag))
+ {
+ await projectDoc.SubmitJson0OpAsync(op =>
+ op.Set(p => p.TranslateConfig.Source.WritingSystem.Tag, writingSystem.Tag)
+ );
+ }
+ }
+ }
+
+ // Clear the existing translation engine id, based on whether this is pre-translation or not
+ await projectSecrets.UpdateAsync(
+ projectDoc.Id,
+ u =>
+ {
+ if (preTranslate)
+ {
+ u.Unset(p => p.ServalData.PreTranslationEngineId);
+ }
+ else
+ {
+ u.Unset(p => p.ServalData.TranslationEngineId);
+ }
+ }
+ );
+
+ // Create the Serval project, and get the translation engine id
+ translationEngineId = await CreateServalProjectAsync(projectDoc.Data, preTranslate, cancellationToken);
+ }
+
+ // Ensure a translation engine id is present
+ if (string.IsNullOrWhiteSpace(translationEngineId))
+ {
+ throw new DataNotFoundException("Failed to create a translation engine.");
+ }
+
+ return translationEngineId;
+ }
+
+ ///
+ /// Gets the drafting source language for the project.
+ ///
+ /// The project.
+ /// The source language.
+ ///
+ /// The writing system tag was not specified for the source project.
+ ///
+ ///
+ /// The source was not specified for the project, or the project does not exist.
+ ///
+ /// This can be mocked in unit tests.
+ protected internal virtual string GetSourceLanguage(SFProject? project)
+ {
+ // This error can occur if the project is deleted while the build is running
+ if (project is null)
+ {
+ throw new DataNotFoundException("The project does not exist.");
+ }
+
+ // This error can occur if the project source is cleared while the build is running
+ if (project.TranslateConfig.Source is null)
+ {
+ throw new DataNotFoundException("The project source is not specified.");
+ }
+
+ string alternateSourceLanguage = project.TranslateConfig.DraftConfig.AlternateSource?.WritingSystem.Tag;
+ bool useAlternateSourceLanguage =
+ project.TranslateConfig.DraftConfig.AlternateSourceEnabled
+ && !string.IsNullOrWhiteSpace(alternateSourceLanguage);
+ return useAlternateSourceLanguage
+ ? alternateSourceLanguage
+ : project.TranslateConfig.Source?.WritingSystem.Tag ?? throw new ArgumentNullException(nameof(project));
+ }
+
+ ///
+ /// Gets the target language for the project
+ ///
+ /// The project.
+ /// The target language.
+ ///
+ /// The writing system tag was not specified for the source project.
+ ///
+ ///
+ /// The source was not specified for the project, or the project does not exist.
+ ///
+ ///
+ /// If Echo is enabled, the source language will be returned.
+ /// This can be mocked in unit tests.
+ ///
+ protected internal virtual async Task GetTargetLanguageAsync(SFProject project)
+ {
+ // Echo requires the target and source language to be the same, as it outputs your source texts
+ bool useEcho = await featureManager.IsEnabledAsync(FeatureFlags.UseEchoForPreTranslation);
+ return useEcho ? GetSourceLanguage(project) : project.WritingSystem.Tag!;
+ }
+
+ ///
+ /// Gets the segments from the text with Unix/Linux line endings.
+ ///
+ /// The .
+ /// The text file data to be uploaded to Serval.
+ /// This can be mocked in unit tests.
+ protected internal virtual string GetTextFileData(ISFText text)
+ {
+ var sb = new StringBuilder();
+
+ // For pre-translation, we must upload empty lines with segment refs for the correct references to be returned
+ foreach (SFTextSegment segment in text.Segments.Where(s => !s.IsEmpty))
+ {
+ sb.Append(segment.SegmentRef);
+ sb.Append('\t');
+ sb.Append(segment.SegmentText);
+ sb.Append('\t');
+ if (segment.IsSentenceStart)
+ {
+ sb.Append("ss,");
+ }
+
+ if (segment.IsInRange)
+ {
+ sb.Append("ir,");
+ }
+
+ if (segment.IsRangeStart)
+ {
+ sb.Append("rs,");
+ }
+
+ // Strip the last comma, or the tab if there are no flags
+ sb.Length--;
+
+ // Append the Unix EOL to ensure consistency as this text data is uploaded to Serval
+ sb.Append('\n');
+ }
+
+ return sb.ToString();
+ }
+
+ ///
+ /// Gets the TranslationBuildConfig for the specified ServalData object.
+ ///
+ /// The Serval data from .
+ ///
+ /// The Serval JSON configuration from .
+ ///
+ /// The build configuration from the user, specified on the front end.
+ /// The synchronization information for the corpora.
+ /// The TranslationBuildConfig for a Pre-Translate build.
+ ///
+ /// Do not use with SMT builds.
+ /// This can be mocked in unit tests.
+ ///
+ protected internal virtual TranslationBuildConfig GetTranslationBuildConfig(
+ ServalData servalData,
+ string? servalConfig,
+ BuildConfig buildConfig,
+ IList corporaSyncInfo
+ )
+ {
+ // Load the Serval Config from the Draft Config
+ JObject? options = null;
+ if (!string.IsNullOrWhiteSpace(servalConfig))
+ {
+ options = JObject.Parse(servalConfig);
+ }
+
+ // If Fast Training is enabled, override the max_steps
+ if (buildConfig.FastTraining)
+ {
+ // Ensure that there is a servalConfig JSON object
+ options ??= [];
+
+ // 20 is the number of steps used on Serval QA by default
+ options["max_steps"] = 20;
+ }
+
+ // Get the scripture ranges
+ // These scripture ranges will be used if no per project configuration was used
+ string? trainOnScriptureRange = !string.IsNullOrWhiteSpace(buildConfig.TrainingScriptureRange)
+ ? buildConfig.TrainingScriptureRange
+ : string.Join(';', buildConfig.TrainingBooks.Select(Canon.BookNumberToId));
+ if (string.IsNullOrWhiteSpace(trainOnScriptureRange))
+ {
+ trainOnScriptureRange = null;
+ }
+
+ string? preTranslateScriptureRange = !string.IsNullOrWhiteSpace(buildConfig.TranslationScriptureRange)
+ ? buildConfig.TranslationScriptureRange
+ : string.Join(';', buildConfig.TranslationBooks.Select(Canon.BookNumberToId));
+ if (string.IsNullOrWhiteSpace(preTranslateScriptureRange))
+ {
+ preTranslateScriptureRange = null;
+ }
+
+ // Create the build configuration
+ var translationBuildConfig = new TranslationBuildConfig
+ {
+ Options = options,
+ Pretranslate =
+ [
+ new PretranslateCorpusConfig
+ {
+ ParallelCorpusId = servalData.ParallelCorpusIdForPreTranslate,
+ SourceFilters =
+ [
+ .. corporaSyncInfo
+ .Where(s => s.ParallelCorpusId == servalData.ParallelCorpusIdForPreTranslate && s.IsSource)
+ .Select(s => new ParallelCorpusFilterConfig
+ {
+ CorpusId = s.CorpusId,
+ ScriptureRange =
+ buildConfig
+ .TranslationScriptureRanges.FirstOrDefault(t => t.ProjectId == s.ProjectId)
+ ?.ScriptureRange ?? preTranslateScriptureRange,
+ }),
+ ],
+ },
+ ],
+ TrainOn =
+ [
+ new TrainingCorpusConfig
+ {
+ ParallelCorpusId = servalData.ParallelCorpusIdForTrainOn,
+ SourceFilters =
+ [
+ .. corporaSyncInfo
+ .Where(s => s.ParallelCorpusId == servalData.ParallelCorpusIdForTrainOn && s.IsSource)
+ .Select(s => new ParallelCorpusFilterConfig
+ {
+ CorpusId = s.CorpusId,
+ ScriptureRange =
+ buildConfig
+ .TrainingScriptureRanges.FirstOrDefault(t => t.ProjectId == s.ProjectId)
+ ?.ScriptureRange ?? trainOnScriptureRange,
+ }),
+ ],
+ TargetFilters =
+ [
+ .. corporaSyncInfo
+ .Where(s => s.ParallelCorpusId == servalData.ParallelCorpusIdForTrainOn && !s.IsSource)
+ .Select(s => new ParallelCorpusFilterConfig
+ {
+ CorpusId = s.CorpusId,
+ ScriptureRange =
+ buildConfig
+ .TrainingScriptureRanges.FirstOrDefault(t => t.ProjectId == s.ProjectId)
+ ?.ScriptureRange ?? trainOnScriptureRange,
+ }),
+ ],
+ },
+ ],
+ };
+
+ // Add the additional training data
+ if (
+ !string.IsNullOrWhiteSpace(servalData.AdditionalTrainingData?.ParallelCorpusId)
+ && buildConfig.TrainingDataFiles.Count > 0
+ )
+ {
+ translationBuildConfig.TrainOn.Add(
+ new TrainingCorpusConfig { ParallelCorpusId = servalData.AdditionalTrainingData.ParallelCorpusId }
+ );
+ }
+
+ return translationBuildConfig;
+ }
+
+ ///
+ /// Recreates the translation engine if the source or target language has changed.
+ ///
+ /// The translation engine identifier.
+ /// The project.
+ /// If true use NMT; otherwise if false use SMT.
+ /// The cancellation token.
+ /// An asynchronous task.
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task RecreateTranslationEngineIfRequiredAsync(
+ string translationEngineId,
+ SFProject project,
+ bool preTranslate,
+ CancellationToken cancellationToken
+ )
+ {
+ // Get the translation engine from Serval
+ try
+ {
+ TranslationEngine translationEngine = await translationEnginesClient.GetAsync(
+ translationEngineId,
+ cancellationToken
+ );
+ bool recreateTranslationEngine = false;
+
+ // See if the target language has changed
+ string projectTargetLanguage = await GetTargetLanguageAsync(project);
+ if (translationEngine.TargetLanguage != projectTargetLanguage)
+ {
+ string message =
+ $"Target language has changed from {translationEngine.TargetLanguage} to {projectTargetLanguage}.";
+ logger.LogInformation(message);
+ recreateTranslationEngine = true;
+ }
+
+ // See if the source language has changed
+ string projectSourceLanguage = GetSourceLanguage(project);
+ if (translationEngine.SourceLanguage != projectSourceLanguage)
+ {
+ string message =
+ $"Source language has changed from {translationEngine.SourceLanguage} to {projectSourceLanguage}.";
+ logger.LogInformation(message);
+ recreateTranslationEngine = true;
+ }
+
+ // Delete then recreate the translation engine if they have changed
+ if (recreateTranslationEngine)
+ {
+ // Removal can be a slow process
+ await RemoveProjectAsync(project.Id, preTranslate, cancellationToken);
+ await CreateServalProjectAsync(project, preTranslate, cancellationToken);
+ }
+ }
+ catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound)
+ {
+ // A 404 means that the translation engine does not exist
+ logger.LogInformation(e, $"Translation Engine {translationEngineId} does not exist.");
+
+ // Clear the existing translation engine id and corpora
+ await projectSecrets.UpdateAsync(
+ project.Id,
+ u =>
+ {
+ if (preTranslate)
+ {
+ u.Unset(p => p.ServalData.PreTranslationEngineId);
+ }
+ else
+ {
+ u.Unset(p => p.ServalData.TranslationEngineId);
+ }
+ }
+ );
+
+ // Create the new translation engine id
+ translationEngineId = await CreateServalProjectAsync(project, preTranslate, cancellationToken);
+ logger.LogInformation($"Created Translation Engine {translationEngineId}.");
+ }
+ }
+
+ ///
+ /// Removes the legacy files and corpora from Serval.
+ ///
+ /// The Scripture Forge project identifier.
+ /// If true use NMT; otherwise if false use SMT.
+ /// The Cancellation token
+ /// An asynchronous task.
+ /// The project secret cannot be found.
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task RemoveLegacyServalDataAsync(
+ string sfProjectId,
+ bool preTranslate,
+ CancellationToken cancellationToken
+ )
+ {
+ // Load the target project secrets, so we can get the translation engine ID
+ if (!(await projectSecrets.TryGetAsync(sfProjectId)).TryResult(out SFProjectSecret projectSecret))
+ {
+ throw new DataNotFoundException("The project secret cannot be found.");
+ }
+
+ // Ensure we have a translation engine id
+ string translationEngineId = GetTranslationEngineId(projectSecret, preTranslate);
+ if (string.IsNullOrWhiteSpace(translationEngineId))
+ {
+ logger.LogInformation($"No Translation Engine Id specified for project {sfProjectId.Sanitize()}");
+ return;
+ }
+
+ // Remove the corpora and files
+ string[] corpusIds =
+ projectSecret
+ .ServalData?.Corpora?.Where(c => c.Value.PreTranslate == preTranslate)
+ .Select(c => c.Key)
+ .ToArray() ?? [];
+ foreach (string corpusId in corpusIds)
+ {
+ // Delete the corpus
+ try
+ {
+ await translationEnginesClient.DeleteCorpusAsync(
+ translationEngineId,
+ corpusId,
+ deleteFiles: true,
+ cancellationToken
+ );
+ }
+ catch (ServalApiException e)
+ {
+ // A 404 means that the translation engine does not exist
+ string message;
+ if (e.StatusCode == StatusCodes.Status404NotFound)
+ {
+ message =
+ $"Translation Engine {translationEngineId.Sanitize()} for project {sfProjectId.Sanitize()}"
+ + " was missing or already deleted.";
+ logger.LogInformation(message);
+ }
+ else
{
- await translationEnginesClient.DeleteCorpusAsync(translationEngineId, corpusId, cancellationToken);
+ message =
+ $"Ignored exception while deleting translation engine {translationEngineId.Sanitize()}"
+ + " for project {sfProjectId.Sanitize()}.";
+ logger.LogError(e, message);
}
+ }
+
+ // Remove our record of the corpus
+ await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.Corpora[corpusId]));
+ }
+
+ // Remove the corpora property if it is empty
+ if (projectSecret.ServalData?.Corpora?.Any(c => c.Value.PreTranslate != preTranslate) == false)
+ {
+ await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.Corpora));
+ }
+ }
+
+ ///
+ /// Synchronizes the additional training data for a pre-translation project.
+ ///
+ /// The current user identifier
+ /// The project.
+ /// The translation engine identifier.
+ /// The build configuration from the user.
+ /// The additional training data.
+ /// The cancellation token.
+ /// The updated .
+ ///
+ /// If there are no TrainingDataFiles specified in , then the additional training
+ /// data corpora will be removed from Serval. Otherwise, the corpora will be created or updated as required.
+ /// This can be mocked in unit tests.
+ ///
+ protected internal virtual async Task SyncAdditionalTrainingData(
+ string curUserId,
+ SFProject project,
+ string translationEngineId,
+ BuildConfig buildConfig,
+ ServalAdditionalTrainingData? additionalTrainingData,
+ CancellationToken cancellationToken
+ )
+ {
+ // If there are training data files
+ if (buildConfig.TrainingDataFiles.Count > 0)
+ {
+ // Get the training data texts
+ List sourceTexts = [];
+ List targetTexts = [];
+ await trainingDataService.GetTextsAsync(
+ curUserId,
+ project.Id,
+ buildConfig.TrainingDataFiles,
+ sourceTexts,
+ targetTexts
+ );
+
+ // Create the additional training data object if it is missing
+ additionalTrainingData ??= new ServalAdditionalTrainingData();
+
+ // Upload the target texts
+ List targetCorpusFiles = [.. additionalTrainingData.CorpusFiles];
+ additionalTrainingData.TargetCorpusId = await UploadAdditionalTrainingDataAsync(
+ project.Id,
+ additionalTrainingData.TargetCorpusId,
+ languageCode: await GetTargetLanguageAsync(project),
+ targetCorpusFiles,
+ targetTexts,
+ cancellationToken
+ );
+
+ // Upload the source texts
+ List sourceCorpusFiles = [.. additionalTrainingData.CorpusFiles];
+ additionalTrainingData.SourceCorpusId = await UploadAdditionalTrainingDataAsync(
+ project.Id,
+ additionalTrainingData.SourceCorpusId,
+ GetSourceLanguage(project),
+ sourceCorpusFiles,
+ sourceTexts,
+ cancellationToken
+ );
+
+ // Update the project corpora with the new files
+ additionalTrainingData.CorpusFiles = [.. targetCorpusFiles.Union(sourceCorpusFiles)];
+ foreach (var corpus in additionalTrainingData.CorpusFiles.GroupBy(c => c.CorpusId))
+ {
+ await corporaClient.UpdateAsync(
+ corpus.Key,
+ files: [.. corpus.Select(f => new CorpusFileConfig { FileId = f.FileId, TextId = f.TextId })],
+ cancellationToken
+ );
+ }
+
+ // Set up the parallel corpus for additional training data
+ List sourceCorpusIds = [additionalTrainingData.SourceCorpusId];
+ List targetCorpusIds = [additionalTrainingData.TargetCorpusId];
+
+ // Create or update the additional training data parallel corpora
+ additionalTrainingData.ParallelCorpusId = await CreateOrUpdateParallelCorpusAsync(
+ translationEngineId,
+ additionalTrainingData.ParallelCorpusId,
+ name: "AdditionalTrainingData",
+ sourceCorpusIds,
+ targetCorpusIds,
+ cancellationToken
+ );
+ }
+ else if (additionalTrainingData is not null)
+ {
+ // Remove the parallel corpora
+ if (!string.IsNullOrWhiteSpace(additionalTrainingData.ParallelCorpusId))
+ {
+ await translationEnginesClient.DeleteParallelCorpusAsync(
+ translationEngineId,
+ additionalTrainingData.ParallelCorpusId,
+ cancellationToken
+ );
+ }
+
+ // Remove the corpora and files
+ await DeleteAllCorporaAndFilesAsync(additionalTrainingData.CorpusFiles, project.Id, cancellationToken);
+
+ // Remove reference to the additional training data from the project secrets
+ additionalTrainingData = null;
+ }
+
+ return additionalTrainingData;
+ }
+
+ ///
+ /// Synchronizes the corpora and files with Serval.
+ ///
+ /// The current user identifier.
+ /// The build configuration from the user.
+ /// If true use NMT; otherwise if false use SMT.
+ /// The cancellation token.
+ ///
+ /// The for all source and target corpora that were synchronised,
+ /// excluding the additional data corpora.
+ ///
+ ///
+ /// The project, project source, or project secret could not be found.
+ ///
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task> SyncProjectCorporaAsync(
+ string curUserId,
+ BuildConfig buildConfig,
+ bool preTranslate,
+ CancellationToken cancellationToken
+ )
+ {
+ // Load the project from the realtime service
+ Attempt attempt = await realtimeService.TryGetSnapshotAsync(buildConfig.ProjectId);
+ if (!attempt.TryResult(out SFProject project))
+ {
+ throw new DataNotFoundException("The project does not exist.");
+ }
+
+ // Ensure we have a source
+ if (project.TranslateConfig.Source is null)
+ {
+ throw new DataNotFoundException("The project source is not specified.");
+ }
+
+ // Load the project secrets, so we can get the corpus files
+ if (!(await projectSecrets.TryGetAsync(project.Id)).TryResult(out SFProjectSecret projectSecret))
+ {
+ throw new DataNotFoundException("The project secret cannot be found.");
+ }
- // Recreate the corpus
- corpus = await translationEnginesClient.AddCorpusAsync(
- translationEngineId,
- corpusConfig,
- cancellationToken
- );
+ // Ensure we have serval data
+ if (projectSecret.ServalData is null)
+ {
+ throw new DataNotFoundException("The Serval data cannot be found.");
+ }
+
+ // Return sync information so the translation build configuration can be generated
+ List corporaSyncInfo = [];
+
+ // Ensure we have a translation engine ID
+ string translationEngineId = GetTranslationEngineId(projectSecret, preTranslate);
+ if (string.IsNullOrWhiteSpace(translationEngineId))
+ {
+ throw new DataNotFoundException("The translation engine ID cannot be found.");
+ }
+
+ // See if there is an alternate source to use for drafting
+ bool hasAlternateSource =
+ project.TranslateConfig.DraftConfig.AlternateSourceEnabled
+ && project.TranslateConfig.DraftConfig.AlternateSource is not null
+ && project.TranslateConfig.PreTranslate;
+
+ // See if there is an alternate training source corpus
+ bool hasAlternateTrainingSource =
+ project.TranslateConfig.DraftConfig.AlternateTrainingSourceEnabled
+ && project.TranslateConfig.DraftConfig.AlternateTrainingSource is not null
+ && project.TranslateConfig.PreTranslate;
+
+ // See if there is an additional training source
+ bool hasAdditionalTrainingSource =
+ project.TranslateConfig.DraftConfig.AdditionalTrainingSourceEnabled
+ && project.TranslateConfig.DraftConfig.AdditionalTrainingSource is not null
+ && project.TranslateConfig.PreTranslate;
+
+ // Build the list of corpora and files to upload
+ List<(string projectId, string paratextId, string writingSystemTag)> projects =
+ [
+ // Target Project
+ (project.Id, project.ParatextId, project.WritingSystem.Tag),
+ // Source Project
+ (
+ project.TranslateConfig.Source.ProjectRef,
+ project.TranslateConfig.Source.ParatextId,
+ project.TranslateConfig.Source.WritingSystem.Tag
+ ),
+ ];
+ if (hasAlternateSource)
+ {
+ projects.Add(
+ (
+ project.TranslateConfig.DraftConfig.AlternateSource.ProjectRef,
+ project.TranslateConfig.DraftConfig.AlternateSource.ParatextId,
+ project.TranslateConfig.DraftConfig.AlternateSource.WritingSystem.Tag
+ )
+ );
+ }
+
+ if (hasAlternateTrainingSource)
+ {
+ projects.Add(
+ (
+ project.TranslateConfig.DraftConfig.AlternateTrainingSource.ProjectRef,
+ project.TranslateConfig.DraftConfig.AlternateTrainingSource.ParatextId,
+ project.TranslateConfig.DraftConfig.AlternateTrainingSource.WritingSystem.Tag
+ )
+ );
+ }
+
+ if (hasAdditionalTrainingSource)
+ {
+ projects.Add(
+ (
+ project.TranslateConfig.DraftConfig.AdditionalTrainingSource.ProjectRef,
+ project.TranslateConfig.DraftConfig.AdditionalTrainingSource.ParatextId,
+ project.TranslateConfig.DraftConfig.AdditionalTrainingSource.WritingSystem.Tag
+ )
+ );
+ }
+
+ // Create and upload the Serval Corpus Files
+ List servalCorpusFiles = [];
+ foreach ((string projectId, string paratextId, string languageCode) in projects)
+ {
+ if (servalCorpusFiles.Any(f => f.ProjectId == projectId))
+ {
+ // Do not allow duplicate corpora for the same project
+ continue;
}
- else if (corpusUpdated)
+
+ ServalCorpusFile servalCorpusFile = projectSecret.ServalData.CorpusFiles.SingleOrDefault(f =>
+ f.ProjectId == projectId
+ );
+ if (servalCorpusFile is null || servalCorpusFile.LanguageCode != languageCode)
{
- // Update the corpus
- TranslationCorpusUpdateConfig corpusUpdateConfig = new TranslationCorpusUpdateConfig
- {
- SourceFiles = corpusConfig.SourceFiles,
- TargetFiles = corpusConfig.TargetFiles,
- };
- corpus = await translationEnginesClient.UpdateCorpusAsync(
- translationEngineId,
- corpusId,
- corpusUpdateConfig,
+ // Create the corpus if it does not exist or the language code has changed
+ Corpus corpus = await corporaClient.CreateAsync(
+ new CorpusConfig { Name = $"{project.Id}_{projectId}", Language = languageCode },
cancellationToken
);
+ servalCorpusFile = new ServalCorpusFile
+ {
+ CorpusId = corpus.Id,
+ LanguageCode = languageCode,
+ ProjectId = projectId,
+ TextId = project.Id,
+ };
}
- else
+
+ // Upload the file
+ await UploadParatextFileAsync(servalCorpusFile, paratextId, cancellationToken);
+ servalCorpusFiles.Add(servalCorpusFile);
+ }
+
+ // Update the project corpora with the files
+ foreach (ServalCorpusFile servalCorpusFile in servalCorpusFiles)
+ {
+ await corporaClient.UpdateAsync(
+ servalCorpusFile.CorpusId,
+ files: [new CorpusFileConfig { FileId = servalCorpusFile.FileId, TextId = servalCorpusFile.TextId }],
+ cancellationToken
+ );
+ }
+
+ // Get the source project for the NMT/SMT translation corpus
+ string sourceProjectId =
+ hasAlternateSource && preTranslate
+ ? project.TranslateConfig.DraftConfig.AlternateSource.ProjectRef
+ : project.TranslateConfig.Source.ProjectRef;
+
+ // Set up the parallel corpus for NMT/SMT translation
+ List sourceCorpora = [servalCorpusFiles.Single(f => f.ProjectId == sourceProjectId)];
+ List targetCorpora = [servalCorpusFiles.Single(f => f.ProjectId == project.Id)];
+ List sourceCorpusIds = [.. sourceCorpora.Select(f => f.CorpusId)];
+ List targetCorpusIds = [.. targetCorpora.Select(f => f.CorpusId)];
+
+ // Get the NMT/SMT translation parallel corpus id (might be null)
+ string translationParallelCorpusId = preTranslate
+ ? projectSecret.ServalData.ParallelCorpusIdForPreTranslate
+ : projectSecret.ServalData.ParallelCorpusIdForSmt;
+
+ // Create or update the NMT/SMT translation parallel corpora
+ translationParallelCorpusId = await CreateOrUpdateParallelCorpusAsync(
+ translationEngineId,
+ translationParallelCorpusId,
+ name: preTranslate ? "PreTranslation" : "SmtTranslation",
+ sourceCorpusIds,
+ targetCorpusIds,
+ cancellationToken
+ );
+
+ // Record the corpus sync info for the pre-translate corpora
+ corporaSyncInfo = RecordServalCorpusSyncInfo(
+ corporaSyncInfo,
+ sourceCorpora,
+ targetCorpora,
+ translationParallelCorpusId
+ );
+
+ // If we are NMT pre-translating, add the training parallel corpus
+ string trainOnParallelCorpusId = null;
+ ServalAdditionalTrainingData? additionalTrainingData = projectSecret.ServalData.AdditionalTrainingData;
+ if (preTranslate)
+ {
+ // Build the source corpus ids for training
+ sourceProjectId = hasAlternateTrainingSource
+ ? project.TranslateConfig.DraftConfig.AlternateTrainingSource.ProjectRef
+ : project.TranslateConfig.Source.ProjectRef;
+
+ sourceCorpora = [servalCorpusFiles.Single(f => f.ProjectId == sourceProjectId)];
+
+ // Add the additional training source, if present and we are pre-translating
+ if (hasAdditionalTrainingSource)
{
- // The corpus was not updated
- return false;
+ string additionalTrainingSourceProjectId = project
+ .TranslateConfig
+ .DraftConfig
+ .AdditionalTrainingSource
+ .ProjectRef;
+ sourceCorpora.Add(servalCorpusFiles.Single(f => f.ProjectId == additionalTrainingSourceProjectId));
}
+
+ sourceCorpusIds = [.. sourceCorpora.Select(f => f.CorpusId)];
+
+ // Build the target corpus ids for training
+ targetCorpora = [servalCorpusFiles.Single(f => f.ProjectId == project.Id)];
+ targetCorpusIds = [.. targetCorpora.Select(f => f.CorpusId)];
+
+ // Get the train on parallel corpus id (might be null)
+ trainOnParallelCorpusId = projectSecret.ServalData.ParallelCorpusIdForTrainOn;
+
+ // Create or update the train on parallel corpora
+ trainOnParallelCorpusId = await CreateOrUpdateParallelCorpusAsync(
+ translationEngineId,
+ trainOnParallelCorpusId,
+ name: "TrainOn",
+ sourceCorpusIds,
+ targetCorpusIds,
+ cancellationToken
+ );
+
+ // Record the corpus sync info for the train on corpora
+ corporaSyncInfo = RecordServalCorpusSyncInfo(
+ corporaSyncInfo,
+ sourceCorpora,
+ targetCorpora,
+ trainOnParallelCorpusId
+ );
+
+ // Sync the additional training data
+ // NOTE: We do not record the corpus sync info for the additional training data
+ // You can get that information from ServalData.AdditionalTrainingData
+ additionalTrainingData = await SyncAdditionalTrainingData(
+ curUserId,
+ project,
+ translationEngineId,
+ buildConfig,
+ additionalTrainingData,
+ cancellationToken
+ );
}
- // Update the project secret with the new corpus information
- await projectSecrets.UpdateAsync(
+ // Delete any project corpora and files that are no longer used
+ await DeleteAllCorporaAndFilesAsync(
+ projectSecret.ServalData.CorpusFiles.Except(servalCorpusFiles),
project.Id,
+ cancellationToken
+ );
+
+ // Update the project secret
+ await projectSecrets.UpdateAsync(
+ projectSecret,
u =>
- u.Set(
- p => p.ServalData.Corpora[corpus.Id],
- new ServalCorpus
- {
- SourceFiles = sourceCorpusFiles,
- TargetFiles = targetCorpusFiles,
- PreTranslate = preTranslate,
- AdditionalTrainingData = additionalTrainingData,
- AlternateTrainingSource = useAlternateTrainingSource,
- UploadParatextZipFile = uploadParatextZipFile,
- }
- )
+ {
+ u.Set(p => p.ServalData.CorpusFiles, servalCorpusFiles);
+ if (preTranslate)
+ {
+ u.Set(p => p.ServalData.ParallelCorpusIdForPreTranslate, translationParallelCorpusId);
+ u.Set(p => p.ServalData.ParallelCorpusIdForTrainOn, trainOnParallelCorpusId);
+ u.Set(p => p.ServalData.AdditionalTrainingData, additionalTrainingData);
+ }
+ else
+ {
+ u.Set(p => p.ServalData.ParallelCorpusIdForSmt, translationParallelCorpusId);
+ }
+ }
);
- return true;
+ return corporaSyncInfo;
}
///
- /// Syncs a collection of to Serval, creating files on Serval as necessary.
+ /// Determines whether a translation engine exists for the specified project.
///
- /// The target project identifier.
- /// The source project identifier (this may be a training source).
- /// The Paratext identifier.
- ///
- /// true if we are uploading a Paratext zip file; otherwise false.
- ///
- /// The texts created by .
- /// The existing corpus files (optional).
- /// The updated list of corpus files.
- ///
- /// true if the corpus was created or updated; otherwise, false.
- ///
- /// The project secret is updated with the corpus file details added to or removed from Serval.
- ///
- private async Task UploadNewCorpusFilesAsync(
- string targetProjectId,
- string sourceProjectId,
- string paratextId,
- bool uploadParatextZipFile,
- IEnumerable texts,
- ICollection? oldCorpusFiles,
- ICollection newCorpusFiles,
+ /// The Scripture Forge project identifier.
+ /// The Serval translation engine identifier.
+ /// If true use NMT; otherwise if false use SMT.
+ /// The cancellation token.
+ /// true if the translation engine exists; otherwise false.
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task TranslationEngineExistsAsync(
+ string projectId,
+ string? translationEngineId,
+ bool preTranslate,
CancellationToken cancellationToken
)
{
- // Used to return whether the corpus files were created or updated
- bool corpusUpdated = false;
+ if (string.IsNullOrWhiteSpace(translationEngineId))
+ {
+ return false;
+ }
+
+ try
+ {
+ TranslationEngine translationEngine = await translationEnginesClient.GetAsync(
+ translationEngineId,
+ cancellationToken
+ );
+ string type = await GetTranslationEngineTypeAsync(preTranslate);
- // Upload the Paratext zip file, if we are supposed to
- if (uploadParatextZipFile)
+ // We check for the type, taking account of Pascal Case (Serval 1.1) and Kebab Case (Serval 1.2)
+ return translationEngine.Name == projectId
+ && string.Equals(
+ translationEngine.Type.Replace("-", string.Empty, StringComparison.OrdinalIgnoreCase),
+ type.Replace("-", string.Empty, StringComparison.OrdinalIgnoreCase),
+ StringComparison.InvariantCultureIgnoreCase
+ );
+ }
+ catch (ServalApiException e)
+ when (e.StatusCode is StatusCodes.Status403Forbidden or StatusCodes.Status404NotFound)
{
- // Get the path to the Paratext directory
- string path = Path.Combine(siteOptions.Value.SiteDir, "sync", paratextId, "target");
+ return false;
+ }
+ }
- // Ensure that the path exists
- if (!fileSystemService.DirectoryExists(path))
- {
- throw new DirectoryNotFoundException($"The directory could not be found for {paratextId}");
- }
+ ///
+ /// Uploads the additional training data for a project.
+ ///
+ /// The project identifier.
+ /// The corpus identifier.
+ /// The language for the corpus.
+ /// The existing corpus files. These will be replaced with the new corpus files.
+ /// The texts to upload.
+ /// The cancellation token.
+ /// The updated corpus identifier.
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task UploadAdditionalTrainingDataAsync(
+ string projectId,
+ string? corpusId,
+ string languageCode,
+ List corpusFiles,
+ List texts,
+ CancellationToken cancellationToken
+ )
+ {
+ // Make a local copy of the previous corpus files
+ List previousCorpusFiles = [.. corpusFiles];
+ corpusFiles.Clear();
- // Create the zip file from the directory in memory
- await using var memoryStream = new MemoryStream();
- using (var archive = new ZipArchive(memoryStream, ZipArchiveMode.Create, true))
- {
- // Do not convert the ZipArchive using statement above into a using declaration,
- // otherwise the ZipArchive disposal will crash after the MemoryStream disposal.
- foreach (string filePath in fileSystemService.EnumerateFiles(path))
- {
- await using Stream fileStream = fileSystemService.OpenFile(filePath, FileMode.Open);
- ZipArchiveEntry entry = archive.CreateEntry(Path.GetFileName(filePath));
- await using Stream entryStream = entry.Open();
- await fileStream.CopyToAsync(entryStream, cancellationToken);
- }
- }
+ // Delete the old corpus if the language has changed
+ string corpusLanguageCode = previousCorpusFiles.FirstOrDefault(f => f.CorpusId == corpusId)?.LanguageCode;
+ if (
+ !string.IsNullOrWhiteSpace(corpusLanguageCode)
+ && languageCode != corpusLanguageCode
+ && !string.IsNullOrWhiteSpace(corpusId)
+ )
+ {
+ await corporaClient.DeleteAsync(corpusId, cancellationToken);
+ corpusId = null;
+ }
- // Upload the zip file
- corpusUpdated = await UploadFileAsync(
- textId: targetProjectId,
- projectId: sourceProjectId,
- memoryStream,
- FileFormat.Paratext,
- oldCorpusFiles,
- newCorpusFiles,
+ // If there is no corpus, create it
+ if (string.IsNullOrWhiteSpace(corpusId))
+ {
+ Corpus corpus = await corporaClient.CreateAsync(
+ new CorpusConfig
+ {
+ Name = $"{projectId}_additionalTrainingData_{languageCode}",
+ Language = languageCode,
+ },
cancellationToken
);
+ corpusId = corpus.Id;
}
- else
+
+ foreach (ISFText text in texts)
{
- // Sync each text
- foreach (ISFText text in texts)
- {
- string textFileData = GetTextFileData(text);
- if (!string.IsNullOrWhiteSpace(textFileData))
+ // The text ids are in the format projectId_dataId
+ string textId = text.Id.Split('_').Last();
+
+ // Get the existing Serval Corpus File, or create a new one
+ ServalCorpusFile servalCorpusFile =
+ previousCorpusFiles.SingleOrDefault(f => f.TextId == textId && f.CorpusId == corpusId)
+ ?? new ServalCorpusFile
{
- // Remove the target project id from the start of the text id (if present)
- string textId = text.Id.StartsWith($"{targetProjectId}_")
- ? text.Id[(targetProjectId.Length + 1)..]
- : text.Id;
-
- // Remove the source project id from the start of the text id (if present)
- textId = textId.StartsWith($"{sourceProjectId}_") ? textId[(sourceProjectId.Length + 1)..] : textId;
-
- // Upload the text file
- corpusUpdated |= await UploadFileAsync(
- textId,
- sourceProjectId,
- textFileData,
- FileFormat.Text,
- oldCorpusFiles,
- newCorpusFiles,
- cancellationToken
- );
- }
+ CorpusId = corpusId,
+ LanguageCode = languageCode,
+ ProjectId = projectId,
+ TextId = textId,
+ };
+
+ // Upload the text
+ if (await UploadTextFileAsync(servalCorpusFile, text, cancellationToken))
+ {
+ corpusFiles.Add(servalCorpusFile);
}
}
- // Delete corpus files for removed texts
- if (oldCorpusFiles is not null)
+ return corpusId;
+ }
+
+ ///
+ /// Uploads a file to Serval.
+ ///
+ /// The Serval corpus file
+ /// The stream of file data.
+ /// The Serval file format.
+ /// The cancellation token.
+ /// An asynchronous task.
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task UploadFileAsync(
+ ServalCorpusFile servalCorpusFile,
+ Stream stream,
+ FileFormat fileFormat,
+ CancellationToken cancellationToken
+ )
+ {
+ // Reset the stream to the start
+ stream.Seek(0, SeekOrigin.Begin);
+
+ // Calculate the checksum from the stream
+ using MD5 md5 = MD5.Create();
+ StringBuilder sb = new StringBuilder();
+ foreach (var hashByte in await md5.ComputeHashAsync(stream, cancellationToken))
+ {
+ sb.Append(hashByte.ToString("X2").ToLower());
+ }
+
+ // See if the file has changed
+ string checksum = sb.ToString();
+ if (servalCorpusFile.FileChecksum == checksum)
+ {
+ // No update, so do not upload
+ return;
+ }
+
+ // Reset the stream to the start
+ stream.Seek(0, SeekOrigin.Begin);
+
+ // See if the file exists, and it is the same format
+ bool dataFileExists = false;
+ if (!string.IsNullOrWhiteSpace(servalCorpusFile.FileId))
{
- foreach (var corpusFile in oldCorpusFiles.Where(c => newCorpusFiles.All(n => n.FileId != c.FileId)))
+ try
{
- try
- {
- await dataFilesClient.DeleteAsync(corpusFile.FileId, cancellationToken);
- }
- catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound)
+ DataFile existingDataFile = await dataFilesClient.GetAsync(servalCorpusFile.FileId, cancellationToken);
+ dataFileExists = existingDataFile.Format == fileFormat;
+
+ // Delete the file if we are changing the format
+ if (!dataFileExists)
{
- // If the file was already deleted, just log a message
- string message =
- $"Corpora file {corpusFile.FileId} for text {corpusFile.TextId} in project {targetProjectId}"
- + " was missing or already deleted.";
- logger.LogInformation(e, message);
+ logger.LogInformation($"File {servalCorpusFile.FileId} has the wrong format - deleting.");
+ await dataFilesClient.DeleteAsync(servalCorpusFile.FileId, cancellationToken);
}
-
- corpusUpdated = true;
}
+ catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound)
+ {
+ logger.LogInformation(e, $"File {servalCorpusFile.FileId} does not exist - creating.");
+ }
+ }
+
+ // Update the file if it exists, otherwise create it
+ DataFile dataFile = dataFileExists
+ ? await dataFilesClient.UpdateAsync(servalCorpusFile.FileId, new FileParameter(stream), cancellationToken)
+ : await dataFilesClient.CreateAsync(
+ new FileParameter(stream),
+ fileFormat,
+ servalCorpusFile.TextId,
+ cancellationToken
+ );
+
+ // Update the Serval Corpus File
+ servalCorpusFile.FileChecksum = checksum;
+ servalCorpusFile.FileId = dataFile.Id;
+ }
+
+ ///
+ /// Uploads a Paratext zip file to Serval.
+ ///
+ /// The Serval corpus file
+ /// The Paratext identifier.
+ /// The cancellation token.
+ /// An asynchronous task.
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task UploadParatextFileAsync(
+ ServalCorpusFile servalCorpusFile,
+ string paratextId,
+ CancellationToken cancellationToken
+ )
+ {
+ // Create the zip file from the directory in memory
+ await using var stream = new MemoryStream();
+ await CreateZipFileFromParatextDirectoryAsync(paratextId, stream, cancellationToken);
+
+ // Upload the zip file
+ await UploadFileAsync(servalCorpusFile, stream, FileFormat.Paratext, cancellationToken);
+ }
+
+ ///
+ /// Uploads a text file to Serval.
+ ///
+ /// The Serval corpus file
+ /// The text.
+ /// The cancellation token.
+ /// true if the file was uploaded; otherwise, false.
+ /// This can be mocked in unit tests.
+ protected internal virtual async Task UploadTextFileAsync(
+ ServalCorpusFile servalCorpusFile,
+ ISFText text,
+ CancellationToken cancellationToken
+ )
+ {
+ string textFileData = GetTextFileData(text);
+
+ // Ensure that there is file data
+ if (string.IsNullOrWhiteSpace(textFileData))
+ {
+ return false;
}
- return corpusUpdated;
+ // Upload the text file
+ byte[] buffer = Encoding.UTF8.GetBytes(textFileData);
+ await using Stream stream = new MemoryStream(buffer, false);
+ await UploadFileAsync(servalCorpusFile, stream, FileFormat.Text, cancellationToken);
+ return true;
+ }
+
+ ///
+ /// Gets the translation engine identifier from the project secret,
+ /// depending on whether we are pre-translating or not.
+ ///
+ /// The project secret.
+ /// If true, we are pre-translating.
+ /// The translation engine identifier.
+ private static string? GetTranslationEngineId(SFProjectSecret projectSecret, bool preTranslate) =>
+ preTranslate ? projectSecret.ServalData?.PreTranslationEngineId : projectSecret.ServalData?.TranslationEngineId;
+
+ ///
+ /// Records the Corpus Synchronization information.
+ ///
+ /// The List of corpus synchronization information.
+ /// The list of source corpora
+ /// The list of target corpora.
+ /// The parallel corpus identifier.
+ ///
+ /// Used by .
+ private static List RecordServalCorpusSyncInfo(
+ List corpusSyncInfo,
+ IList sourceCorpora,
+ IList targetCorpora,
+ string parallelCorpusId
+ )
+ {
+ corpusSyncInfo.AddRange(
+ sourceCorpora.Select(f => new ServalCorpusSyncInfo
+ {
+ CorpusId = f.CorpusId,
+ ParallelCorpusId = parallelCorpusId,
+ IsSource = true,
+ ProjectId = f.ProjectId,
+ })
+ );
+ corpusSyncInfo.AddRange(
+ targetCorpora.Select(f => new ServalCorpusSyncInfo
+ {
+ CorpusId = f.CorpusId,
+ ParallelCorpusId = parallelCorpusId,
+ IsSource = false,
+ ProjectId = f.ProjectId,
+ })
+ );
+ return corpusSyncInfo;
}
}
diff --git a/src/SIL.XForge.Scripture/Services/MachineServiceCollectionExtensions.cs b/src/SIL.XForge.Scripture/Services/MachineServiceCollectionExtensions.cs
index 2f20a41c06..08242daada 100644
--- a/src/SIL.XForge.Scripture/Services/MachineServiceCollectionExtensions.cs
+++ b/src/SIL.XForge.Scripture/Services/MachineServiceCollectionExtensions.cs
@@ -1,4 +1,5 @@
using System;
+using System.Diagnostics.CodeAnalysis;
using System.Net;
using System.Net.Http;
using IdentityModel.Client;
@@ -13,6 +14,7 @@
namespace Microsoft.Extensions.DependencyInjection;
+[ExcludeFromCodeCoverage(Justification = "This logic will only work in a valid ASP.NET Core Context")]
public static class MachineServiceCollectionExtensions
{
public static IServiceCollection AddSFMachine(
@@ -21,24 +23,25 @@ public static IServiceCollection AddSFMachine(
IWebHostEnvironment env
)
{
- // Setup the Machine API
+ // Set up the Machine API
var servalOptions = configuration.GetOptions();
- services.AddAccessTokenManagement(options =>
- {
- options.Client.Clients.Add(
- MachineApi.HttpClientName,
- new ClientCredentialsTokenRequest
+ services.AddDistributedMemoryCache();
+ services
+ .AddClientCredentialsTokenManagement()
+ .AddClient(
+ MachineApi.TokenClientName,
+ client =>
{
- Address = servalOptions.TokenUrl,
- ClientId = servalOptions.ClientId,
- ClientSecret = servalOptions.ClientSecret,
- Parameters = new Parameters { { "audience", servalOptions.Audience } },
+ client.TokenEndpoint = servalOptions.TokenUrl;
+ client.ClientId = servalOptions.ClientId;
+ client.ClientSecret = servalOptions.ClientSecret;
+ client.Parameters = new Parameters { { "audience", servalOptions.Audience } };
}
);
- });
services
- .AddClientAccessTokenHttpClient(
+ .AddClientCredentialsHttpClient(
MachineApi.HttpClientName,
+ MachineApi.TokenClientName,
configureClient: client => client.BaseAddress = new Uri(servalOptions.ApiServer)
)
.ConfigurePrimaryHttpMessageHandler(() =>
@@ -78,6 +81,13 @@ IWebHostEnvironment env
var httpClient = factory.CreateClient(MachineApi.HttpClientName);
return new DataFilesClient(httpClient);
});
+ services.AddSingleton(sp =>
+ {
+ // Instantiate the corpora client with our named HTTP client
+ var factory = sp.GetService();
+ var httpClient = factory.CreateClient(MachineApi.HttpClientName);
+ return new CorporaClient(httpClient);
+ });
services.AddSingleton();
services.AddSingleton();
services.AddSingleton();
diff --git a/src/SIL.XForge.Scripture/Services/PreTranslationService.cs b/src/SIL.XForge.Scripture/Services/PreTranslationService.cs
index a3f7a53165..eb9c67adec 100644
--- a/src/SIL.XForge.Scripture/Services/PreTranslationService.cs
+++ b/src/SIL.XForge.Scripture/Services/PreTranslationService.cs
@@ -34,24 +34,11 @@ CancellationToken cancellationToken
{
List preTranslations = [];
- // Load the target project secrets, so we can get the translation engine ID and corpus ID
- if (!(await projectSecrets.TryGetAsync(sfProjectId)).TryResult(out SFProjectSecret projectSecret))
- {
- throw new DataNotFoundException("The project secret cannot be found.");
- }
-
// Ensure we have the parameters to retrieve the pre-translation
- string translationEngineId = projectSecret.ServalData?.PreTranslationEngineId;
- string corpusId = projectSecret
- .ServalData?.Corpora.FirstOrDefault(c => c.Value.PreTranslate && !c.Value.AlternateTrainingSource)
- .Key;
- if (string.IsNullOrWhiteSpace(translationEngineId) || string.IsNullOrWhiteSpace(corpusId))
- {
- throw new DataNotFoundException("The pre-translation engine is not configured.");
- }
+ (string? translationEngineId, string corpusId, bool useParatextVerseRef) =
+ await GetPreTranslationParametersAsync(sfProjectId);
// Get the pre-translation data from Serval
- bool useParatextVerseRef = projectSecret.ServalData.Corpora[corpusId].UploadParatextZipFile;
string textId = useParatextVerseRef ? GetTextId(bookNum) : GetTextId(bookNum, chapterNum);
foreach (
Pretranslation preTranslation in await translationEnginesClient.GetAllPretranslationsAsync(
@@ -195,21 +182,8 @@ public async Task GetPreTranslationUsfmAsync(
CancellationToken cancellationToken
)
{
- // Load the project secrets, so we can get the translation engine ID and corpus ID
- if (!(await projectSecrets.TryGetAsync(sfProjectId)).TryResult(out SFProjectSecret projectSecret))
- {
- throw new DataNotFoundException("The project secret cannot be found.");
- }
-
// Ensure we have the parameters to retrieve the pre-translation
- string translationEngineId = projectSecret.ServalData?.PreTranslationEngineId;
- string? corpusId = projectSecret
- .ServalData?.Corpora.FirstOrDefault(c => c.Value.PreTranslate && !c.Value.AlternateTrainingSource)
- .Key;
- if (string.IsNullOrWhiteSpace(translationEngineId) || string.IsNullOrWhiteSpace(corpusId))
- {
- throw new DataNotFoundException("The pre-translation engine is not configured.");
- }
+ (string? translationEngineId, string corpusId, bool _) = await GetPreTranslationParametersAsync(sfProjectId);
// Get the USFM
string usfm = await translationEnginesClient.GetPretranslatedUsfmAsync(
@@ -243,12 +217,6 @@ CancellationToken cancellationToken
public async Task UpdatePreTranslationStatusAsync(string sfProjectId, CancellationToken cancellationToken)
{
- // Load the target project secrets, so we can get the translation engine ID and corpus ID
- if (!(await projectSecrets.TryGetAsync(sfProjectId)).TryResult(out SFProjectSecret projectSecret))
- {
- throw new DataNotFoundException("The project secret cannot be found.");
- }
-
// Load the project from the realtime service
await using IConnection conn = await realtimeService.ConnectAsync();
IDocument projectDoc = await conn.FetchAsync(sfProjectId);
@@ -258,18 +226,11 @@ public async Task UpdatePreTranslationStatusAsync(string sfProjectId, Cancellati
}
// Ensure we have the parameters to retrieve the pre-translation
- string translationEngineId = projectSecret.ServalData?.PreTranslationEngineId;
- string corpusId = projectSecret
- .ServalData?.Corpora.FirstOrDefault(c => c.Value.PreTranslate && !c.Value.AlternateTrainingSource)
- .Key;
- if (string.IsNullOrWhiteSpace(translationEngineId) || string.IsNullOrWhiteSpace(corpusId))
- {
- throw new DataNotFoundException("The pre-translation engine is not configured.");
- }
+ (string? translationEngineId, string corpusId, bool useParatextVerseRef) =
+ await GetPreTranslationParametersAsync(sfProjectId);
// Get all the pre-translations and update the chapters
Dictionary> bookChapters = [];
- bool useParatextVerseRef = projectSecret.ServalData.Corpora[corpusId].UploadParatextZipFile;
foreach (
Pretranslation preTranslation in await translationEnginesClient.GetAllPretranslationsAsync(
translationEngineId,
@@ -351,4 +312,53 @@ await projectDoc.SubmitJson0OpAsync(op =>
}
});
}
+
+ ///
+ /// Gets the required parameters from the project secret to retrieve the pre-translations.
+ ///
+ /// The Scripture Forge project identifier.
+ ///
+ /// The translation engine identifier, the corpus identifier, and whether to use Paratext verse references.
+ ///
+ /// This can be mocked in unit tests.
+ /// The pre-translation engine is not configured, or the project secret cannot be found.
+ protected internal virtual async Task<(
+ string translationEngineId,
+ string corpusId,
+ bool useParatextVerseRef
+ )> GetPreTranslationParametersAsync(string sfProjectId)
+ {
+ // Load the target project secrets, so we can get the translation engine ID and corpus ID
+ if (!(await projectSecrets.TryGetAsync(sfProjectId)).TryResult(out SFProjectSecret projectSecret))
+ {
+ throw new DataNotFoundException("The project secret cannot be found.");
+ }
+
+ string translationEngineId = projectSecret.ServalData?.PreTranslationEngineId;
+ string corpusId;
+ bool useParatextVerseRef = false;
+ if (!string.IsNullOrWhiteSpace(projectSecret.ServalData?.ParallelCorpusIdForPreTranslate))
+ {
+ corpusId = projectSecret.ServalData.ParallelCorpusIdForPreTranslate;
+ useParatextVerseRef = true;
+ }
+ else
+ {
+ // Legacy Serval Project
+ corpusId = projectSecret
+ .ServalData?.Corpora?.FirstOrDefault(c => c.Value.PreTranslate && !c.Value.AlternateTrainingSource)
+ .Key;
+ if (!string.IsNullOrWhiteSpace(corpusId))
+ {
+ useParatextVerseRef = projectSecret.ServalData.Corpora[corpusId].UploadParatextZipFile;
+ }
+ }
+
+ if (string.IsNullOrWhiteSpace(translationEngineId) || string.IsNullOrWhiteSpace(corpusId))
+ {
+ throw new DataNotFoundException("The pre-translation engine is not configured.");
+ }
+
+ return (translationEngineId, corpusId, useParatextVerseRef);
+ }
}
diff --git a/src/SIL.XForge.Scripture/Services/SFBiblicalTermsText.cs b/src/SIL.XForge.Scripture/Services/SFBiblicalTermsText.cs
deleted file mode 100644
index be5daec431..0000000000
--- a/src/SIL.XForge.Scripture/Services/SFBiblicalTermsText.cs
+++ /dev/null
@@ -1,108 +0,0 @@
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text.RegularExpressions;
-using System.Xml.Linq;
-using SIL.XForge.Scripture.Models;
-
-namespace SIL.XForge.Scripture.Services;
-
-public class SFBiblicalTermsText : ISFText
-{
- private static readonly Regex BracketedTextRegex = new Regex(@"\([^)]*\)", RegexOptions.Compiled);
- private static readonly Regex WhitespaceRegex = new Regex(@"\s+", RegexOptions.Compiled);
-
- public SFBiblicalTermsText(string projectId, IList biblicalTerms)
- {
- Id = $"{projectId}_biblical_terms";
- Segments = GetSegments(biblicalTerms).OrderBy(s => s.SegmentRef).ToArray();
- }
-
- public SFBiblicalTermsText(string projectId, XDocument termRenderingsDoc)
- {
- Id = $"{projectId}_biblical_terms";
- Segments = GetSegments(termRenderingsDoc).OrderBy(s => s.SegmentRef).ToArray();
- }
-
- public string Id { get; }
-
- public IEnumerable Segments { get; }
-
- ///
- /// Removes Paratext specific codes from the Biblical Term Rendering.
- ///
- /// The BT rendering.
- /// The cleaned rendering.
- ///
- /// This method removes text in brackets, asterisks, forward slashes, and normalizes the whitespace.
- /// See the Guide in the Edit Biblical Term Rendering dialog in Paratext for details on these codes.
- ///
- private static string RemoveParatextSyntaxFromRendering(string rendering)
- {
- rendering = rendering.Replace("*", string.Empty);
- rendering = BracketedTextRegex.Replace(rendering, string.Empty);
- rendering = rendering.Replace("/", " ");
- rendering = WhitespaceRegex.Replace(rendering, " ");
- return rendering.Trim();
- }
-
- private static IEnumerable GetSegments(IList biblicalTerms)
- {
- if (!biblicalTerms.Any())
- {
- yield break;
- }
-
- foreach (BiblicalTerm biblicalTerm in biblicalTerms.OrderBy(t => t.TermId))
- {
- foreach (string rendering in biblicalTerm.Renderings.Select(RemoveParatextSyntaxFromRendering))
- {
- // Do not add blank renderings
- if (string.IsNullOrWhiteSpace(rendering))
- {
- continue;
- }
-
- // Sentence placement is not essential for biblical terms. Set all to false
- yield return new SFTextSegment([biblicalTerm.TermId], rendering, false, false, false);
- }
- }
- }
-
- private static IEnumerable GetSegments(XDocument termRenderingsDoc)
- {
- if (termRenderingsDoc.Root is null)
- {
- yield break;
- }
-
- foreach (
- XElement termRenderingElem in termRenderingsDoc
- .Root.Elements("TermRendering")
- .Where(t => !(bool)t.Attribute("Guess"))
- .OrderBy(t => t.Attribute("Id")?.Value)
- )
- {
- string id = termRenderingElem.Attribute("Id")?.Value;
- if (string.IsNullOrWhiteSpace(id))
- {
- continue;
- }
-
- var renderingsStr = (string?)termRenderingElem.Element("Renderings");
- string[] renderings = renderingsStr?.Trim().Split("||", StringSplitOptions.RemoveEmptyEntries) ?? [];
-
- foreach (string rendering in renderings.Select(RemoveParatextSyntaxFromRendering))
- {
- // Do not add blank renderings
- if (string.IsNullOrWhiteSpace(rendering))
- {
- continue;
- }
-
- // Sentence placement is not essential for biblical terms. Set all to false
- yield return new SFTextSegment([id], rendering, false, false, false);
- }
- }
- }
-}
diff --git a/src/SIL.XForge.Scripture/Services/SFProjectService.cs b/src/SIL.XForge.Scripture/Services/SFProjectService.cs
index fb485e6b19..0ef95e8dbd 100644
--- a/src/SIL.XForge.Scripture/Services/SFProjectService.cs
+++ b/src/SIL.XForge.Scripture/Services/SFProjectService.cs
@@ -299,18 +299,8 @@ async Task removeSourceReference(string projectId)
await RealtimeService.DeleteProjectAsync(projectId);
// The machine service requires the project secrets, so call it before removing them
- await _machineProjectService.RemoveProjectAsync(
- curUserId,
- projectId,
- preTranslate: false,
- CancellationToken.None
- );
- await _machineProjectService.RemoveProjectAsync(
- curUserId,
- projectId,
- preTranslate: true,
- CancellationToken.None
- );
+ await _machineProjectService.RemoveProjectAsync(projectId, preTranslate: false, CancellationToken.None);
+ await _machineProjectService.RemoveProjectAsync(projectId, preTranslate: true, CancellationToken.None);
await ProjectSecrets.DeleteAsync(projectId);
}
@@ -507,7 +497,6 @@ await projectDoc.SubmitJson0OpAsync(op =>
if (hasExistingMachineProject)
{
await _machineProjectService.RemoveProjectAsync(
- curUserId,
projectId,
preTranslate: false,
CancellationToken.None
@@ -516,7 +505,6 @@ await _machineProjectService.RemoveProjectAsync(
await EnsureWritingSystemTagIsSetAsync(curUserId, projectDoc, ptProjects);
await _machineProjectService.AddProjectAsync(
- curUserId,
projectId,
preTranslate: false,
CancellationToken.None
@@ -527,7 +515,6 @@ await _machineProjectService.AddProjectAsync(
{
// translation suggestions was disabled or source project set to null
await _machineProjectService.RemoveProjectAsync(
- curUserId,
projectId,
preTranslate: false,
CancellationToken.None
diff --git a/src/SIL.XForge/Utils/StringUtils.cs b/src/SIL.XForge/Utils/StringUtils.cs
index ad1a2438f5..6e2d30cf6c 100644
--- a/src/SIL.XForge/Utils/StringUtils.cs
+++ b/src/SIL.XForge/Utils/StringUtils.cs
@@ -23,6 +23,14 @@ public static string ComputeMd5Hash(string message)
return sb.ToString().ToLower();
}
+ ///
+ /// Sanitizes a string for logging.
+ ///
+ /// The string value.
+ /// The string sanitized for logging.
+ /// This extension method resolves CodeQL cs/log-forging.
+ public static string Sanitize(this string value) => value.ReplaceLineEndings(string.Empty);
+
public static string ToCamelCase(this string str) => CamelCaseNamingStrategy.GetPropertyName(str, false);
public static bool ValidateId(string id) => ObjectId.TryParse(id, out _);
diff --git a/test/SIL.XForge.Scripture.Tests/Services/BuildConfigJsonConverterTests.cs b/test/SIL.XForge.Scripture.Tests/Services/BuildConfigJsonConverterTests.cs
index ba1747679f..0f1656469c 100644
--- a/test/SIL.XForge.Scripture.Tests/Services/BuildConfigJsonConverterTests.cs
+++ b/test/SIL.XForge.Scripture.Tests/Services/BuildConfigJsonConverterTests.cs
@@ -94,7 +94,7 @@ public void WriteJson_Serializes_BuildConfig_WithoutTrainingBooks()
var converter = new BuildConfigJsonConverter();
var writer = Substitute.For();
var serializer = Substitute.For();
- var buildConfig = new BuildConfig { ProjectId = Project01, TranslationBooks = [1, 2, 3], };
+ var buildConfig = new BuildConfig { ProjectId = Project01, TranslationBooks = [1, 2, 3] };
// SUT
converter.WriteJson(writer, buildConfig, serializer);
@@ -114,7 +114,7 @@ public void WriteJson_Serializes_BuildConfig_WithoutTranslationBooks()
var converter = new BuildConfigJsonConverter();
var writer = Substitute.For();
var serializer = Substitute.For();
- var buildConfig = new BuildConfig { ProjectId = Project01, TrainingBooks = [1, 2, 3], };
+ var buildConfig = new BuildConfig { ProjectId = Project01, TrainingBooks = [1, 2, 3] };
// SUT
converter.WriteJson(writer, buildConfig, serializer);
@@ -134,7 +134,7 @@ public void WriteJson_Serializes_BuildConfig_TrainingDataFiles()
var converter = new BuildConfigJsonConverter();
var writer = Substitute.For();
var serializer = Substitute.For();
- var buildConfig = new BuildConfig { TrainingDataFiles = [Data01, Data02], };
+ var buildConfig = new BuildConfig { TrainingDataFiles = [Data01, Data02] };
// SUT
converter.WriteJson(writer, buildConfig, serializer);
@@ -151,7 +151,7 @@ public void WriteJson_Serializes_BuildConfig_TrainingScriptureRange()
var converter = new BuildConfigJsonConverter();
var writer = Substitute.For();
var serializer = Substitute.For();
- var buildConfig = new BuildConfig { TrainingScriptureRange = "MAT;MRK1-2,4", };
+ var buildConfig = new BuildConfig { TrainingScriptureRange = "MAT;MRK1-2,4" };
// SUT
converter.WriteJson(writer, buildConfig, serializer);
@@ -168,14 +168,57 @@ public void WriteJson_Serializes_BuildConfig_TranslationScriptureRange()
var converter = new BuildConfigJsonConverter();
var writer = Substitute.For();
var serializer = Substitute.For();
- var buildConfig = new BuildConfig { TrainingScriptureRange = "JHN", };
+ var buildConfig = new BuildConfig { TranslationScriptureRange = "JHN" };
// SUT
converter.WriteJson(writer, buildConfig, serializer);
writer.Received().WriteStartObject();
- writer.Received().WritePropertyName(nameof(buildConfig.TrainingScriptureRange));
- serializer.Received().Serialize(writer, buildConfig.TrainingScriptureRange);
+ writer.Received().WritePropertyName(nameof(buildConfig.TranslationScriptureRange));
+ serializer.Received().Serialize(writer, buildConfig.TranslationScriptureRange);
+ writer.Received().WriteEndObject();
+ }
+
+ [Test]
+ public void WriteJson_Serializes_BuildConfig_TrainingScriptureRanges()
+ {
+ var converter = new BuildConfigJsonConverter();
+ var writer = Substitute.For();
+ var serializer = Substitute.For();
+ var buildConfig = new BuildConfig
+ {
+ TrainingScriptureRanges = [new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "MAT;MRK" }],
+ };
+
+ // SUT
+ converter.WriteJson(writer, buildConfig, serializer);
+
+ writer.Received().WriteStartObject();
+ writer.Received().WritePropertyName(nameof(buildConfig.TrainingScriptureRanges));
+ serializer.Received().Serialize(writer, buildConfig.TrainingScriptureRanges);
+ writer.Received().WriteEndObject();
+ }
+
+ [Test]
+ public void WriteJson_Serializes_BuildConfig_TranslationScriptureRanges()
+ {
+ var converter = new BuildConfigJsonConverter();
+ var writer = Substitute.For();
+ var serializer = Substitute.For();
+ var buildConfig = new BuildConfig
+ {
+ TranslationScriptureRanges =
+ [
+ new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "MAT;MRK" },
+ ],
+ };
+
+ // SUT
+ converter.WriteJson(writer, buildConfig, serializer);
+
+ writer.Received().WriteStartObject();
+ writer.Received().WritePropertyName(nameof(buildConfig.TranslationScriptureRanges));
+ serializer.Received().Serialize(writer, buildConfig.TranslationScriptureRanges);
writer.Received().WriteEndObject();
}
@@ -213,8 +256,14 @@ public void ReadJson_Deserializes_JSON_String()
public void ReadJson_Deserializes_JSON_Object()
{
var converter = new BuildConfigJsonConverter();
- const string jsonString =
- $"{{\"{nameof(BuildConfig.ProjectId)}\":\"{Project01}\",\"{nameof(BuildConfig.TrainingBooks)}\":[1,2,3],\"{nameof(BuildConfig.TranslationBooks)}\":[4,5,6],\"{nameof(BuildConfig.FastTraining)}\":true}}";
+ const string jsonString = $$"""
+ {
+ "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}",
+ "{{nameof(BuildConfig.TrainingBooks)}}":[1,2,3],
+ "{{nameof(BuildConfig.TranslationBooks)}}":[4,5,6],
+ "{{nameof(BuildConfig.FastTraining)}}":true
+ }
+ """;
using var stringReader = new StringReader(jsonString);
using var reader = new JsonTextReader(stringReader);
var serializer = new JsonSerializer();
@@ -234,8 +283,13 @@ public void ReadJson_Deserializes_JSON_Object()
public void ReadJson_Deserializes_JSON_Object_WithoutFastConfig()
{
var converter = new BuildConfigJsonConverter();
- const string jsonString =
- $"{{\"{nameof(BuildConfig.ProjectId)}\":\"{Project01}\",\"{nameof(BuildConfig.TrainingBooks)}\":[1,2,3],\"{nameof(BuildConfig.TranslationBooks)}\":[4,5,6]}}";
+ const string jsonString = $$"""
+ {
+ "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}",
+ "{{nameof(BuildConfig.TrainingBooks)}}":[1,2,3],
+ "{{nameof(BuildConfig.TranslationBooks)}}":[4,5,6]
+ }
+ """;
using var stringReader = new StringReader(jsonString);
using var reader = new JsonTextReader(stringReader);
var serializer = new JsonSerializer();
@@ -255,8 +309,12 @@ public void ReadJson_Deserializes_JSON_Object_WithoutFastConfig()
public void ReadJson_Deserializes_JSON_Object_TrainingDataFiles()
{
var converter = new BuildConfigJsonConverter();
- const string jsonString =
- $"{{\"{nameof(BuildConfig.ProjectId)}\":\"{Project01}\",\"{nameof(BuildConfig.TrainingDataFiles)}\":[\"{Data01}\",\"{Data02}\"]}}";
+ const string jsonString = $$"""
+ {
+ "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}",
+ "{{nameof(BuildConfig.TrainingDataFiles)}}":["{{Data01}}","{{Data02}}"]
+ }
+ """;
using var stringReader = new StringReader(jsonString);
using var reader = new JsonTextReader(stringReader);
var serializer = new JsonSerializer();
@@ -276,8 +334,12 @@ public void ReadJson_Deserializes_JSON_Object_TrainingScriptureRange()
{
var converter = new BuildConfigJsonConverter();
const string scriptureRange = "MAT;MRK1-2,4";
- const string jsonString =
- $"{{\"{nameof(BuildConfig.ProjectId)}\":\"{Project01}\",\"{nameof(BuildConfig.TrainingScriptureRange)}\":\"{scriptureRange}\"}}";
+ const string jsonString = $$"""
+ {
+ "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}",
+ "{{nameof(BuildConfig.TrainingScriptureRange)}}":"{{scriptureRange}}"
+ }
+ """;
using var stringReader = new StringReader(jsonString);
using var reader = new JsonTextReader(stringReader);
var serializer = new JsonSerializer();
@@ -288,7 +350,7 @@ public void ReadJson_Deserializes_JSON_Object_TrainingScriptureRange()
Assert.IsNotNull(buildConfig);
Assert.IsInstanceOf(buildConfig);
Assert.IsFalse(buildConfig!.FastTraining);
- CollectionAssert.AreEqual(scriptureRange, buildConfig.TrainingScriptureRange);
+ Assert.AreEqual(scriptureRange, buildConfig.TrainingScriptureRange);
Assert.AreEqual(Project01, buildConfig.ProjectId);
}
@@ -297,8 +359,80 @@ public void ReadJson_Deserializes_JSON_Object_TranslationScriptureRange()
{
var converter = new BuildConfigJsonConverter();
const string scriptureRange = "JHN";
- const string jsonString =
- $"{{\"{nameof(BuildConfig.ProjectId)}\":\"{Project01}\",\"{nameof(BuildConfig.TranslationScriptureRange)}\":\"{scriptureRange}\"}}";
+ const string jsonString = $$"""
+ {
+ "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}",
+ "{{nameof(BuildConfig.TranslationScriptureRange)}}":"{{scriptureRange}}"
+ }
+ """;
+ using var stringReader = new StringReader(jsonString);
+ using var reader = new JsonTextReader(stringReader);
+ var serializer = new JsonSerializer();
+
+ // SUT
+ var buildConfig = converter.ReadJson(reader, typeof(BuildConfig), null, false, serializer);
+
+ Assert.IsNotNull(buildConfig);
+ Assert.IsInstanceOf(buildConfig);
+ Assert.IsFalse(buildConfig!.FastTraining);
+ Assert.AreEqual(scriptureRange, buildConfig.TranslationScriptureRange);
+ Assert.AreEqual(Project01, buildConfig.ProjectId);
+ }
+
+ [Test]
+ public void ReadJson_Deserializes_JSON_Object_TrainingScriptureRanges()
+ {
+ var converter = new BuildConfigJsonConverter();
+ const string scriptureRange = "JHN";
+ const string jsonString = $$"""
+ {
+ "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}",
+ "{{nameof(BuildConfig.TrainingScriptureRanges)}}":
+ [
+ {
+ "{{nameof(ProjectScriptureRange.ProjectId)}}":"{{Project01}}",
+ "{{nameof(ProjectScriptureRange.ScriptureRange)}}":"{{scriptureRange}}"
+ }
+ ]
+ }
+ """;
+ using var stringReader = new StringReader(jsonString);
+ using var reader = new JsonTextReader(stringReader);
+ var serializer = new JsonSerializer();
+
+ // SUT
+ var buildConfig = converter.ReadJson(reader, typeof(BuildConfig), null, false, serializer);
+
+ Assert.IsNotNull(buildConfig);
+ Assert.IsInstanceOf(buildConfig);
+ Assert.IsFalse(buildConfig!.FastTraining);
+ CollectionAssert.AreEqual(
+ new List
+ {
+ new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = scriptureRange },
+ },
+ buildConfig.TrainingScriptureRanges
+ );
+ Assert.AreEqual(Project01, buildConfig.ProjectId);
+ }
+
+ [Test]
+ public void ReadJson_Deserializes_JSON_Object_TranslationScriptureRanges()
+ {
+ var converter = new BuildConfigJsonConverter();
+ const string scriptureRange = "JHN";
+ const string jsonString = $$"""
+ {
+ "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}",
+ "{{nameof(BuildConfig.TranslationScriptureRanges)}}":
+ [
+ {
+ "{{nameof(ProjectScriptureRange.ProjectId)}}":"{{Project01}}",
+ "{{nameof(ProjectScriptureRange.ScriptureRange)}}":"{{scriptureRange}}"
+ }
+ ]
+ }
+ """;
using var stringReader = new StringReader(jsonString);
using var reader = new JsonTextReader(stringReader);
var serializer = new JsonSerializer();
@@ -309,7 +443,13 @@ public void ReadJson_Deserializes_JSON_Object_TranslationScriptureRange()
Assert.IsNotNull(buildConfig);
Assert.IsInstanceOf(buildConfig);
Assert.IsFalse(buildConfig!.FastTraining);
- CollectionAssert.AreEqual(scriptureRange, buildConfig.TranslationScriptureRange);
+ CollectionAssert.AreEqual(
+ new List
+ {
+ new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = scriptureRange },
+ },
+ buildConfig.TranslationScriptureRanges
+ );
Assert.AreEqual(Project01, buildConfig.ProjectId);
}
}
diff --git a/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs b/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs
index 7a606a431b..c69a96f3fb 100644
--- a/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs
+++ b/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs
@@ -409,6 +409,10 @@ public async Task GetBuildAsync_IncludesAdditionalInfo()
const string engineId = "engineId1";
const string corpusId1 = "corpusId1";
const string corpusId2 = "corpusId2";
+ const string corpusId3 = "corpusId3";
+ const string corpusId4 = "corpusId4";
+ const string parallelCorpusId1 = "parallelCorpusId1";
+ const string parallelCorpusId2 = "parallelCorpusId2";
const int step = 123;
env.TranslationEnginesClient.GetBuildAsync(
TranslationEngine01,
@@ -434,12 +438,62 @@ public async Task GetBuildAsync_IncludesAdditionalInfo()
[
new PretranslateCorpus
{
- Corpus = new ResourceLink { Id = corpusId1, Url = "https://example.com" },
+ ParallelCorpus = new ResourceLink
+ {
+ Id = parallelCorpusId1,
+ Url = "https://example.com",
+ },
},
new PretranslateCorpus
{
- Corpus = new ResourceLink { Id = corpusId2, Url = "https://example.com" },
+ ParallelCorpus = new ResourceLink
+ {
+ Id = parallelCorpusId2,
+ Url = "https://example.com",
+ },
},
+ new PretranslateCorpus
+ {
+ SourceFilters =
+ [
+ new ParallelCorpusFilter
+ {
+ Corpus = new ResourceLink { Id = corpusId1, Url = "https://example.com" },
+ },
+ new ParallelCorpusFilter
+ {
+ Corpus = new ResourceLink { Id = corpusId2, Url = "https://example.com" },
+ },
+ ],
+ },
+ // Invalid corpus format
+ new PretranslateCorpus(),
+ ],
+ TrainOn =
+ [
+ new TrainingCorpus
+ {
+ ParallelCorpus = new ResourceLink { Id = corpusId3, Url = "https://example.com" },
+ },
+ new TrainingCorpus
+ {
+ SourceFilters =
+ [
+ new ParallelCorpusFilter
+ {
+ Corpus = new ResourceLink { Id = corpusId3, Url = "https://example.com" },
+ },
+ ],
+ TargetFilters =
+ [
+ new ParallelCorpusFilter
+ {
+ Corpus = new ResourceLink { Id = corpusId4, Url = "https://example.com" },
+ },
+ ],
+ },
+ // Invalid corpus format
+ new TrainingCorpus(),
],
}
)
@@ -472,9 +526,14 @@ public async Task GetBuildAsync_IncludesAdditionalInfo()
Assert.AreEqual(step, actual.AdditionalInfo.Step);
Assert.AreEqual(engineId, actual.AdditionalInfo.TranslationEngineId);
Assert.IsNotNull(actual.AdditionalInfo.CorporaIds);
- Assert.AreEqual(2, actual.AdditionalInfo.CorporaIds.Count());
- Assert.AreEqual(corpusId1, actual.AdditionalInfo.CorporaIds.First());
- Assert.AreEqual(corpusId2, actual.AdditionalInfo.CorporaIds.Last());
+ Assert.AreEqual(4, actual.AdditionalInfo.CorporaIds.Count());
+ Assert.AreEqual(corpusId1, actual.AdditionalInfo.CorporaIds.ElementAt(0));
+ Assert.AreEqual(corpusId2, actual.AdditionalInfo.CorporaIds.ElementAt(1));
+ Assert.AreEqual(corpusId3, actual.AdditionalInfo.CorporaIds.ElementAt(2));
+ Assert.AreEqual(corpusId4, actual.AdditionalInfo.CorporaIds.ElementAt(3));
+ Assert.IsNotNull(actual.AdditionalInfo.ParallelCorporaIds);
+ Assert.AreEqual(parallelCorpusId1, actual.AdditionalInfo.ParallelCorporaIds.ElementAt(0));
+ Assert.AreEqual(parallelCorpusId2, actual.AdditionalInfo.ParallelCorporaIds.ElementAt(1));
}
[Test]
@@ -1080,7 +1139,7 @@ public async Task GetPreTranslationAsync_Success()
Task.FromResult(
new PreTranslation[]
{
- new PreTranslation { Reference = reference, Translation = translation, },
+ new PreTranslation { Reference = reference, Translation = translation },
}
)
);
@@ -1592,7 +1651,7 @@ public async Task IsLanguageSupportedAsync_LanguageSupported()
{
EngineType = MachineProjectService.Nmt,
InternalCode = internalCode,
- IsNative = true
+ IsNative = true,
}
)
);
@@ -1760,6 +1819,56 @@ public void StartPreTranslationBuildAsync_DoNotAllowTrainingScriptureRangeWithTr
);
}
+ [Test]
+ public void StartPreTranslationBuildAsync_DoNotAllowTrainingScriptureRangesWithTrainingScriptureRange()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+
+ // SUT
+ Assert.ThrowsAsync(
+ () =>
+ env.Service.StartPreTranslationBuildAsync(
+ User01,
+ new BuildConfig
+ {
+ ProjectId = Project01,
+ TrainingScriptureRange = "GEN",
+ TrainingScriptureRanges =
+ [
+ new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "GEN" },
+ ],
+ },
+ CancellationToken.None
+ )
+ );
+ }
+
+ [Test]
+ public void StartPreTranslationBuildAsync_DoNotAllowTrainingScriptureRangesWithTrainingBooks()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+
+ // SUT
+ Assert.ThrowsAsync(
+ () =>
+ env.Service.StartPreTranslationBuildAsync(
+ User01,
+ new BuildConfig
+ {
+ ProjectId = Project01,
+ TrainingBooks = [1],
+ TrainingScriptureRanges =
+ [
+ new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "GEN" },
+ ],
+ },
+ CancellationToken.None
+ )
+ );
+ }
+
[Test]
public void StartPreTranslationBuildAsync_DoNotAllowTranslationScriptureRangeWithTranslationBooks()
{
@@ -1782,6 +1891,56 @@ public void StartPreTranslationBuildAsync_DoNotAllowTranslationScriptureRangeWit
);
}
+ [Test]
+ public void StartPreTranslationBuildAsync_DoNotAllowTranslationScriptureRangesWithTranslationScriptureRange()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+
+ // SUT
+ Assert.ThrowsAsync(
+ () =>
+ env.Service.StartPreTranslationBuildAsync(
+ User01,
+ new BuildConfig
+ {
+ ProjectId = Project01,
+ TranslationScriptureRange = "GEN",
+ TranslationScriptureRanges =
+ [
+ new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "GEN" },
+ ],
+ },
+ CancellationToken.None
+ )
+ );
+ }
+
+ [Test]
+ public void StartPreTranslationBuildAsync_DoNotAllowTranslationScriptureRangesWithTranslationBooks()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+
+ // SUT
+ Assert.ThrowsAsync(
+ () =>
+ env.Service.StartPreTranslationBuildAsync(
+ User01,
+ new BuildConfig
+ {
+ ProjectId = Project01,
+ TranslationBooks = [1],
+ TranslationScriptureRanges =
+ [
+ new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "GEN" },
+ ],
+ },
+ CancellationToken.None
+ )
+ );
+ }
+
[Test]
public async Task StartPreTranslationBuildAsync_SuccessNoTrainingOrTranslationBooks()
{
@@ -1871,6 +2030,71 @@ await env.Service.StartPreTranslationBuildAsync(
);
}
+ [Test]
+ public async Task StartPreTranslationBuildAsync_SuccessWithTrainingAndTranslationScriptureRanges()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+ const string scriptureRange1 = "GEN";
+ const string scriptureRange2 = "EXO";
+
+ // SUT
+ await env.Service.StartPreTranslationBuildAsync(
+ User01,
+ new BuildConfig
+ {
+ ProjectId = Project01,
+ TrainingScriptureRanges =
+ [
+ new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = scriptureRange1 },
+ ],
+ TranslationScriptureRanges =
+ [
+ new ProjectScriptureRange { ProjectId = Project02, ScriptureRange = scriptureRange2 },
+ ],
+ },
+ CancellationToken.None
+ );
+
+ await env.ProjectService.Received(1).SyncAsync(User01, Project01);
+ env.BackgroundJobClient.Received(1).Create(Arg.Any(), Arg.Any());
+ Assert.AreEqual(JobId, env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationJobId);
+ Assert.IsNotNull(env.ProjectSecrets.Get(Project01).ServalData?.PreTranslationQueuedAt);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData?.PreTranslationErrorMessage);
+ Assert.AreEqual(
+ 1,
+ env.Projects.Get(Project01).TranslateConfig.DraftConfig.LastSelectedTrainingScriptureRanges.Count
+ );
+ Assert.AreEqual(
+ Project01,
+ env.Projects.Get(Project01)
+ .TranslateConfig.DraftConfig.LastSelectedTrainingScriptureRanges.First()
+ .ProjectId
+ );
+ Assert.AreEqual(
+ scriptureRange1,
+ env.Projects.Get(Project01)
+ .TranslateConfig.DraftConfig.LastSelectedTrainingScriptureRanges.First()
+ .ScriptureRange
+ );
+ Assert.AreEqual(
+ 1,
+ env.Projects.Get(Project01).TranslateConfig.DraftConfig.LastSelectedTranslationScriptureRanges.Count
+ );
+ Assert.AreEqual(
+ Project02,
+ env.Projects.Get(Project01)
+ .TranslateConfig.DraftConfig.LastSelectedTranslationScriptureRanges.First()
+ .ProjectId
+ );
+ Assert.AreEqual(
+ scriptureRange2,
+ env.Projects.Get(Project01)
+ .TranslateConfig.DraftConfig.LastSelectedTranslationScriptureRanges.First()
+ .ScriptureRange
+ );
+ }
+
[Test]
public async Task StartPreTranslationBuildAsync_AlternateTrainingSource()
{
@@ -2207,14 +2431,6 @@ public TestEnvironment()
MachineProjectService
.GetTranslationEngineTypeAsync(preTranslate: true)
.Returns(Task.FromResult(Services.MachineProjectService.Nmt));
- MachineProjectService
- .TranslationEngineExistsAsync(
- Project01,
- TranslationEngine01,
- preTranslate: false,
- CancellationToken.None
- )
- .Returns(Task.FromResult(true));
MockLogger = new MockLogger();
ParatextService = Substitute.For();
PreTranslationService = Substitute.For();
diff --git a/test/SIL.XForge.Scripture.Tests/Services/MachineProjectServiceTests.cs b/test/SIL.XForge.Scripture.Tests/Services/MachineProjectServiceTests.cs
index 07cf3bd838..f57b763196 100644
--- a/test/SIL.XForge.Scripture.Tests/Services/MachineProjectServiceTests.cs
+++ b/test/SIL.XForge.Scripture.Tests/Services/MachineProjectServiceTests.cs
@@ -12,16 +12,18 @@
using Newtonsoft.Json.Linq;
using NSubstitute;
using NSubstitute.ExceptionExtensions;
+using NSubstitute.Extensions;
using NUnit.Framework;
-using Polly.CircuitBreaker;
using Serval.Client;
using SIL.XForge.Configuration;
using SIL.XForge.DataAccess;
using SIL.XForge.Models;
using SIL.XForge.Realtime;
+using SIL.XForge.Realtime.Json0;
using SIL.XForge.Scripture.Models;
using SIL.XForge.Scripture.Realtime;
using SIL.XForge.Services;
+using SIL.XForge.Utils;
namespace SIL.XForge.Scripture.Services;
@@ -31,20 +33,44 @@ public class MachineProjectServiceTests
private const string Paratext01 = "paratext01";
private const string Paratext02 = "paratext02";
private const string Paratext03 = "paratext03";
+ private const string Paratext04 = "paratext04";
+ private const string Paratext05 = "paratext05";
private const string Project01 = "project01";
private const string Project02 = "project02";
private const string Project03 = "project03";
+ private const string Project04 = "project04";
+ private const string Project05 = "project05";
private const string User01 = "user01";
private const string Corpus01 = "corpus01";
private const string Corpus02 = "corpus02";
private const string Corpus03 = "corpus03";
+ private const string Corpus04 = "corpus04";
private const string Data01 = "data01";
private const string File01 = "file01";
private const string File02 = "file02";
+ private const string File03 = "file03";
+ private const string File04 = "file04";
+ private const string File05 = "file05";
+ private const string File06 = "file06";
+ private const string Job01 = "job01";
+ private const string ParallelCorpus01 = "parallelCorpus01";
+ private const string ParallelCorpus02 = "parallelCorpus02";
+ private const string ParallelCorpus03 = "parallelCorpus03";
private const string TranslationEngine01 = "translationEngine01";
private const string TranslationEngine02 = "translationEngine02";
private const string LanguageTag = "he";
+ [Test]
+ public async Task AddProjectAsync_DoesNotCreateIfLanguageMissing()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+
+ // SUT
+ string actual = await env.Service.AddProjectAsync(Project03, preTranslate: false, CancellationToken.None);
+ Assert.IsEmpty(actual);
+ }
+
[Test]
public void AddProjectAsync_ThrowsExceptionWhenProjectSecretMissing()
{
@@ -53,292 +79,464 @@ public void AddProjectAsync_ThrowsExceptionWhenProjectSecretMissing()
// SUT
Assert.ThrowsAsync(
- () => env.Service.AddProjectAsync(User01, "invalid_project_id", preTranslate: false, CancellationToken.None)
+ () => env.Service.AddProjectAsync("invalid_project_id", preTranslate: false, CancellationToken.None)
);
}
[Test]
- public async Task AddProjectAsync_DoesNotCreateIfLanguageMissing()
+ public async Task AddProjectAsync_Success()
{
// Set up test environment
var env = new TestEnvironment();
+ env.Service.Configure()
+ .CreateServalProjectAsync(Arg.Any(), preTranslate: true, CancellationToken.None)
+ .Returns(Task.FromResult(TranslationEngine01));
// SUT
- string actual = await env.Service.AddProjectAsync(
- User01,
- Project03,
- preTranslate: false,
- CancellationToken.None
- );
- Assert.IsEmpty(actual);
+ string actual = await env.Service.AddProjectAsync(Project01, preTranslate: true, CancellationToken.None);
+ Assert.AreEqual(TranslationEngine01, actual);
}
[Test]
- public async Task BuildProjectAsync_UsesTheUpdatedLearningRateForServal()
+ public async Task BuildProjectForBackgroundJobAsync_DoesNotRecordBuildInProgressErrors()
{
// Set up test environment
- var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false });
- env.FeatureManager.IsEnabledAsync(FeatureFlags.UpdatedLearningRateForServal).Returns(Task.FromResult(true));
+ var env = new TestEnvironment();
+ ServalApiException ex = ServalApiExceptions.BuildInProgress;
+ var buildConfig = new BuildConfig { ProjectId = Project01 };
+ env.Service.Configure()
+ .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None)
+ .ThrowsAsync(ex);
+
+ // A pre-translation job has been queued
+ await env.SetupProjectSecretAsync(
+ Project01,
+ new ServalData { PreTranslationJobId = Job01, PreTranslationQueuedAt = DateTime.UtcNow }
+ );
// SUT
- await env.Service.BuildProjectAsync(
+ await env.Service.BuildProjectForBackgroundJobAsync(
User01,
- new BuildConfig { ProjectId = Project01, FastTraining = true },
+ buildConfig,
preTranslate: true,
CancellationToken.None
);
- await env
- .TranslationEnginesClient.Received()
- .StartBuildAsync(
- TranslationEngine01,
- Arg.Is(b => ((int)((JObject)b.Options)["train_params"]["max_steps"]) == 5000),
- CancellationToken.None
- );
+ env.MockLogger.AssertNoEvent(logEvent => logEvent.Exception == ex);
+ env.ExceptionHandler.DidNotReceiveWithAnyArgs().ReportException(ex);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationErrorMessage);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationJobId);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationQueuedAt);
}
[Test]
- public void BuildProjectAsync_ThrowsExceptionWhenProjectSecretMissing()
+ public async Task BuildProjectForBackgroundJobAsync_DoesNotRecordBuildInProgressErrorsForSmt()
{
// Set up test environment
var env = new TestEnvironment();
+ ServalApiException ex = ServalApiExceptions.BuildInProgress;
+ var buildConfig = new BuildConfig { ProjectId = Project01 };
+ env.Service.Configure()
+ .BuildProjectAsync(User01, buildConfig, preTranslate: false, CancellationToken.None)
+ .ThrowsAsync(ex);
+
+ // An SMT translation job has been queued
+ await env.SetupProjectSecretAsync(
+ Project01,
+ new ServalData { TranslationJobId = Job01, TranslationQueuedAt = DateTime.UtcNow }
+ );
// SUT
- Assert.ThrowsAsync(
- () =>
- env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = "invalid_project_id" },
- preTranslate: false,
- CancellationToken.None
- )
+ await env.Service.BuildProjectForBackgroundJobAsync(
+ User01,
+ buildConfig,
+ preTranslate: false,
+ CancellationToken.None
);
+
+ env.MockLogger.AssertNoEvent(logEvent => logEvent.Exception == ex);
+ env.ExceptionHandler.DidNotReceiveWithAnyArgs().ReportException(ex);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationErrorMessage);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationJobId);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationQueuedAt);
}
[Test]
- public async Task BuildProjectAsync_ThrowsExceptionWhenProjectMissing()
+ public async Task BuildProjectForBackgroundJobAsync_DoesNotRecordTaskCancellation()
{
// Set up test environment
var env = new TestEnvironment();
- await env.Projects.DeleteAllAsync(_ => true);
+ var ex = new TaskCanceledException();
+ var buildConfig = new BuildConfig { ProjectId = Project01 };
+ env.Service.Configure()
+ .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None)
+ .ThrowsAsync(ex);
+
+ // A pre-translation job has been queued
+ await env.SetupProjectSecretAsync(Project01, new ServalData { PreTranslationQueuedAt = DateTime.UtcNow });
// SUT
- Assert.ThrowsAsync(
- () =>
- env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project01 },
- preTranslate: false,
- CancellationToken.None
- )
+ await env.Service.BuildProjectForBackgroundJobAsync(
+ User01,
+ buildConfig,
+ preTranslate: true,
+ CancellationToken.None
);
+
+ env.ExceptionHandler.DidNotReceive().ReportException(Arg.Any());
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationQueuedAt);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationErrorMessage);
}
[Test]
- public async Task BuildProjectAsync_ThrowsExceptionWhenSourceMissing()
+ public async Task BuildProjectForBackgroundJobAsync_DoesNotRecordTaskCancellationForSmt()
{
// Set up test environment
var env = new TestEnvironment();
- await env.Projects.UpdateAsync(Project01, op => op.Unset(p => p.TranslateConfig.Source));
+ var ex = new TaskCanceledException();
+ var buildConfig = new BuildConfig { ProjectId = Project01 };
+ env.Service.Configure()
+ .BuildProjectAsync(User01, buildConfig, preTranslate: false, CancellationToken.None)
+ .ThrowsAsync(ex);
+
+ // An SMT translation job has been queued
+ await env.SetupProjectSecretAsync(Project01, new ServalData { TranslationQueuedAt = DateTime.UtcNow });
// SUT
- Assert.ThrowsAsync(
- () =>
- env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project01 },
- preTranslate: false,
- CancellationToken.None
- )
+ await env.Service.BuildProjectForBackgroundJobAsync(
+ User01,
+ buildConfig,
+ preTranslate: false,
+ CancellationToken.None
);
+
+ env.ExceptionHandler.DidNotReceive().ReportException(Arg.Any());
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationQueuedAt);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationErrorMessage);
}
[Test]
- public async Task BuildProjectAsync_ThrowsExceptionWhenSourceRemoved()
+ public async Task BuildProjectForBackgroundJobAsync_RecordsDataNotFoundExceptionAsWarning()
{
// Set up test environment
- var env = new TestEnvironment(new TestEnvironmentOptions { HasTranslationEngineForSmt = true });
- await env.Projects.UpdateAsync(Project02, op => op.Unset(p => p.TranslateConfig.Source));
+ var env = new TestEnvironment();
+ var ex = new DataNotFoundException("Not Found");
+ var buildConfig = new BuildConfig { ProjectId = Project01 };
+ env.Service.Configure()
+ .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None)
+ .ThrowsAsync(ex);
// SUT
- Assert.ThrowsAsync(
- () =>
- env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project02 },
- preTranslate: false,
- CancellationToken.None
- )
+ await env.Service.BuildProjectForBackgroundJobAsync(
+ User01,
+ buildConfig,
+ preTranslate: true,
+ CancellationToken.None
);
+
+ env.MockLogger.AssertHasEvent(logEvent => logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Warning);
+ env.ExceptionHandler.DidNotReceive().ReportException(Arg.Any());
}
[Test]
- public async Task BuildProjectAsync_CallsServalIfTranslationEngineIdPresent()
+ public async Task BuildProjectForBackgroundJobAsync_RecordsErrors()
{
// Set up test environment
- var env = new TestEnvironment(new TestEnvironmentOptions { HasTranslationEngineForSmt = true });
+ var env = new TestEnvironment();
+ ServalApiException ex = ServalApiExceptions.Forbidden;
+ var buildConfig = new BuildConfig { ProjectId = Project01 };
+ env.Service.Configure()
+ .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None)
+ .ThrowsAsync(ex);
+
+ // A pre-translation job has been queued
+ await env.SetupProjectSecretAsync(
+ Project01,
+ new ServalData { PreTranslationJobId = Job01, PreTranslationQueuedAt = DateTime.UtcNow }
+ );
// SUT
- await env.Service.BuildProjectAsync(
+ await env.Service.BuildProjectForBackgroundJobAsync(
User01,
- new BuildConfig { ProjectId = Project02 },
- preTranslate: false,
+ buildConfig,
+ preTranslate: true,
CancellationToken.None
);
- await env
- .TranslationEnginesClient.Received()
- .StartBuildAsync(TranslationEngine02, Arg.Any(), CancellationToken.None);
+ env.MockLogger.AssertHasEvent(logEvent => logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Error);
+ env.ExceptionHandler.Received(1).ReportException(ex);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationJobId);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationQueuedAt);
+ Assert.AreEqual(ex.Message, env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationErrorMessage);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationErrorMessage);
}
[Test]
- public async Task BuildProjectAsync_BuildsPreTranslationProjects()
+ public async Task BuildProjectForBackgroundJobAsync_RecordsErrorsForSmt()
{
// Set up test environment
- var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false });
+ var env = new TestEnvironment();
+ ServalApiException ex = ServalApiExceptions.Forbidden;
+ var buildConfig = new BuildConfig { ProjectId = Project01 };
+ env.Service.Configure()
+ .BuildProjectAsync(User01, buildConfig, preTranslate: false, CancellationToken.None)
+ .ThrowsAsync(ex);
+
+ // An SMT translation job has been queued
+ await env.SetupProjectSecretAsync(
+ Project01,
+ new ServalData { TranslationJobId = Job01, TranslationQueuedAt = DateTime.UtcNow }
+ );
// SUT
- await env.Service.BuildProjectAsync(
+ await env.Service.BuildProjectForBackgroundJobAsync(
User01,
- new BuildConfig { ProjectId = Project01 },
- preTranslate: true,
+ buildConfig,
+ preTranslate: false,
CancellationToken.None
);
- await env
- .TranslationEnginesClient.Received()
- .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None);
+ env.MockLogger.AssertHasEvent(logEvent => logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Error);
+ env.ExceptionHandler.Received(1).ReportException(ex);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationErrorMessage);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationJobId);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationQueuedAt);
+ Assert.AreEqual(ex.Message, env.ProjectSecrets.Get(Project01).ServalData!.TranslationErrorMessage);
}
[Test]
- public async Task BuildProjectAsync_SendsAdditionalTrainingData()
+ public async Task BuildProjectForBackgroundJobAsync_RunsBuildProjectAsync()
{
// Set up test environment
var env = new TestEnvironment();
- await env.SetupTrainingDataAsync(Project01);
+ var buildConfig = new BuildConfig { ProjectId = Project01 };
+ env.Service.Configure()
+ .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None)
+ .Returns(Task.CompletedTask);
// SUT
- await env.Service.BuildProjectAsync(
+ await env.Service.BuildProjectForBackgroundJobAsync(
User01,
- new BuildConfig { ProjectId = Project01, TrainingDataFiles = { Data01 } },
+ buildConfig,
preTranslate: true,
CancellationToken.None
);
- // Ensure that the additional texts were retrieved
await env
- .TrainingDataService.Received()
- .GetTextsAsync(
+ .Service.Received(1)
+ .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None);
+ }
+
+ [Test]
+ public async Task BuildProjectAsync_PreTranslationBuild()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+ var servalData = new ServalData
+ {
+ PreTranslationEngineId = TranslationEngine01,
+ PreTranslationJobId = Job01,
+ PreTranslationQueuedAt = DateTime.UtcNow,
+ };
+ await env.SetupProjectSecretAsync(Project01, servalData);
+ var buildConfig = new BuildConfig { ProjectId = Project01 };
+ env.Service.Configure()
+ .RemoveLegacyServalDataAsync(Project01, preTranslate: true, CancellationToken.None)
+ .Returns(Task.CompletedTask);
+ env.Service.Configure()
+ .EnsureTranslationEngineExistsAsync(
User01,
- Project01,
- Arg.Is>(d => d.Contains(Data01)),
- Arg.Any>(),
- Arg.Any>()
- );
+ Arg.Any>(),
+ Arg.Any(),
+ preTranslate: true,
+ CancellationToken.None
+ )
+ .Returns(Task.FromResult(TranslationEngine01));
+ env.Service.Configure()
+ .RecreateTranslationEngineIfRequiredAsync(
+ TranslationEngine01,
+ Arg.Any(),
+ preTranslate: true,
+ CancellationToken.None
+ )
+ .Returns(Task.CompletedTask);
+ env.Service.Configure()
+ .SyncProjectCorporaAsync(User01, buildConfig, preTranslate: true, CancellationToken.None)
+ .Returns(Task.FromResult>([]));
+ var translationBuildConfig = new TranslationBuildConfig();
+ env.Service.Configure()
+ .GetTranslationBuildConfig(
+ Arg.Any(),
+ servalConfig: null,
+ buildConfig,
+ Arg.Any>()
+ )
+ .Returns(translationBuildConfig);
- // Ensure that the additional files corpus was synced, and the build started
- await env
- .TranslationEnginesClient.Received()
- .AddCorpusAsync(Arg.Any(), Arg.Any(), CancellationToken.None);
- Assert.IsNotEmpty(
- env.ProjectSecrets.Get(Project01)
- .ServalData!.Corpora.First(c => c.Value.PreTranslate && c.Value.AdditionalTrainingData)
- .Key
- );
+ // SUT
+ await env.Service.BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationJobId);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationQueuedAt);
await env
- .TranslationEnginesClient.Received()
- .StartBuildAsync(
- Arg.Any(),
- Arg.Is(b => b.TrainOn == null),
+ .TranslationEnginesClient.Received(1)
+ .StartBuildAsync(TranslationEngine01, translationBuildConfig, CancellationToken.None);
+ }
+
+ [Test]
+ public async Task BuildProjectAsync_SmtTranslationBuild()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+ var servalData = new ServalData
+ {
+ TranslationEngineId = TranslationEngine01,
+ TranslationJobId = Job01,
+ TranslationQueuedAt = DateTime.UtcNow,
+ };
+ await env.SetupProjectSecretAsync(Project01, servalData);
+ var buildConfig = new BuildConfig { ProjectId = Project01 };
+ env.Service.Configure()
+ .RemoveLegacyServalDataAsync(Project01, preTranslate: false, CancellationToken.None)
+ .Returns(Task.CompletedTask);
+ env.Service.Configure()
+ .EnsureTranslationEngineExistsAsync(
+ User01,
+ Arg.Any>(),
+ Arg.Any(),
+ preTranslate: false,
CancellationToken.None
- );
+ )
+ .Returns(Task.FromResult(TranslationEngine01));
+ env.Service.Configure()
+ .RecreateTranslationEngineIfRequiredAsync(
+ TranslationEngine01,
+ Arg.Any(),
+ preTranslate: false,
+ CancellationToken.None
+ )
+ .Returns(Task.CompletedTask);
+ env.Service.Configure()
+ .SyncProjectCorporaAsync(User01, buildConfig, preTranslate: false, CancellationToken.None)
+ .Returns(Task.FromResult>([]));
+
+ // SUT
+ await env.Service.BuildProjectAsync(User01, buildConfig, preTranslate: false, CancellationToken.None);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationJobId);
+ Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationQueuedAt);
+ await env
+ .TranslationEnginesClient.Received(1)
+ .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None);
}
[Test]
- public async Task BuildProjectAsync_SendsAdditionalTrainingDataWhenFilesPreviouslyUploaded()
+ public async Task BuildProjectAsync_ThrowsExceptionWhenProjectMissing()
{
// Set up test environment
var env = new TestEnvironment();
- await env.SetupTrainingDataAsync(Project02, existingData: true);
+ await env.Projects.DeleteAllAsync(_ => true);
// SUT
- await env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project02, TrainingDataFiles = { Data01 } },
- preTranslate: true,
- CancellationToken.None
+ Assert.ThrowsAsync(
+ () =>
+ env.Service.BuildProjectAsync(
+ User01,
+ new BuildConfig { ProjectId = Project01 },
+ preTranslate: false,
+ CancellationToken.None
+ )
);
+ }
- // Ensure that the additional texts were retrieved
- await env
- .TrainingDataService.Received()
- .GetTextsAsync(
+ [Test]
+ public async Task BuildProjectAsync_ThrowsExceptionWhenProjectSecretMissing()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+ await env.ProjectSecrets.DeleteAllAsync(_ => true);
+
+ // SUT
+ Assert.ThrowsAsync(
+ () =>
+ env.Service.BuildProjectAsync(
+ User01,
+ new BuildConfig { ProjectId = Project01 },
+ preTranslate: false,
+ CancellationToken.None
+ )
+ );
+ }
+
+ [Test]
+ public async Task BuildProjectAsync_ThrowsExceptionWhenServalDataMissing()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+ await env.SetupProjectSecretAsync(Project01, null);
+ var buildConfig = new BuildConfig { ProjectId = Project01 };
+ env.Service.Configure()
+ .RemoveLegacyServalDataAsync(Project01, preTranslate: true, CancellationToken.None)
+ .Returns(Task.CompletedTask);
+ env.Service.Configure()
+ .EnsureTranslationEngineExistsAsync(
User01,
- Project02,
- Arg.Is>(d => d.Contains(Data01)),
- Arg.Any>(),
- Arg.Any>()
- );
+ Arg.Any>(),
+ Arg.Any(),
+ preTranslate: true,
+ CancellationToken.None
+ )
+ .Returns(Task.FromResult(TranslationEngine01));
+ env.Service.Configure()
+ .RecreateTranslationEngineIfRequiredAsync(
+ TranslationEngine01,
+ Arg.Any(),
+ preTranslate: true,
+ CancellationToken.None
+ )
+ .Returns(Task.CompletedTask);
+ env.Service.Configure()
+ .SyncProjectCorporaAsync(User01, buildConfig, preTranslate: true, CancellationToken.None)
+ .Returns(Task.FromResult>([]));
- // Ensure that the previous files with different IDs were deleted, and new ones added
- await env.DataFilesClient.Received(2).DeleteAsync(File02);
- await env
- .DataFilesClient.Received()
- .CreateAsync(Arg.Any(), Arg.Any(), Data01, CancellationToken.None);
+ // SUT
+ Assert.ThrowsAsync(
+ () => env.Service.BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None)
+ );
}
[Test]
- public async Task BuildProjectAsync_SendsAdditionalTrainingDataWithAlternateSource()
+ public async Task BuildProjectAsync_UsesTheServalConfigurationSpecifiedByTheServalAdmin()
{
// Set up test environment
- var env = new TestEnvironment(
- new TestEnvironmentOptions
- {
- AlternateTrainingSourceEnabled = true,
- AlternateTrainingSourceConfigured = true,
- }
+ var env = new TestEnvironment();
+ const string servalConfig = """{"max_steps":35}""";
+ await env.Projects.UpdateAsync(
+ Project01,
+ op => op.Set(p => p.TranslateConfig.DraftConfig.ServalConfig, servalConfig)
);
- await env.SetupTrainingDataAsync(Project02);
// SUT
await env.Service.BuildProjectAsync(
User01,
- new BuildConfig { ProjectId = Project02, TrainingDataFiles = { Data01 } },
+ new BuildConfig { ProjectId = Project01 },
preTranslate: true,
CancellationToken.None
);
-
- // Ensure that the additional texts were retrieved
- await env
- .TrainingDataService.Received()
- .GetTextsAsync(
- User01,
- Project02,
- Arg.Is>(d => d.Contains(Data01)),
- Arg.Any>(),
- Arg.Any>()
- );
-
- // Ensure that the build passed the additional files corpus in the train_on parameter
- string corpusId = env
- .ProjectSecrets.Get(Project02)
- .ServalData!.Corpora.First(c => c.Value.PreTranslate && c.Value.AdditionalTrainingData)
- .Key;
await env
.TranslationEnginesClient.Received()
.StartBuildAsync(
- Arg.Any(),
- Arg.Is(b => b.TrainOn.Any(c => c.CorpusId == corpusId)),
+ TranslationEngine01,
+ Arg.Is(b => ((int)((JObject)b.Options)["max_steps"]) == 35),
CancellationToken.None
);
+ await env.FeatureManager.DidNotReceive().IsEnabledAsync(FeatureFlags.UpdatedLearningRateForServal);
}
[Test]
- public async Task BuildProjectAsync_PassesFastTrainingConfiguration()
+ public async Task BuildProjectAsync_UsesTheUpdatedLearningRateForServal()
{
// Set up test environment
- var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false });
-
+ var env = new TestEnvironment();
+ env.FeatureManager.IsEnabledAsync(FeatureFlags.UpdatedLearningRateForServal).Returns(Task.FromResult(true));
// SUT
await env.Service.BuildProjectAsync(
User01,
@@ -346,831 +544,1172 @@ await env.Service.BuildProjectAsync(
preTranslate: true,
CancellationToken.None
);
-
await env
.TranslationEnginesClient.Received()
.StartBuildAsync(
TranslationEngine01,
- Arg.Is(b => ((int)((JObject)b.Options)["max_steps"]) == 20),
+ Arg.Is(b => ((int)((JObject)b.Options)["train_params"]["max_steps"]) == 5000),
CancellationToken.None
);
}
[Test]
- public async Task BuildProjectAsync_MergesFastTrainingConfiguration()
+ public async Task CreateOrUpdateParallelCorpusAsync_CreatesParallelCorpus()
{
// Set up test environment
- var env = new TestEnvironment(
- new TestEnvironmentOptions { BuildIsPending = false, ServalConfig = @"{""max_steps"":35}" }
- );
+ var env = new TestEnvironment();
// SUT
- await env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project01, FastTraining = true },
- preTranslate: true,
+ string actual = await env.Service.CreateOrUpdateParallelCorpusAsync(
+ TranslationEngine01,
+ null,
+ string.Empty,
+ [],
+ [],
CancellationToken.None
);
-
+ Assert.AreEqual(ParallelCorpus01, actual);
await env
- .TranslationEnginesClient.Received()
- .StartBuildAsync(
- TranslationEngine01,
- Arg.Is(b => ((int)((JObject)b.Options)["max_steps"]) == 20),
- CancellationToken.None
- );
+ .TranslationEnginesClient.Received(1)
+ .AddParallelCorpusAsync(TranslationEngine01, Arg.Any());
}
[Test]
- public async Task BuildProjectAsync_PassesServalConfig()
+ public async Task CreateOrUpdateParallelCorpusAsync_UpdatesParallelCorpus()
{
// Set up test environment
- var env = new TestEnvironment(
- new TestEnvironmentOptions { BuildIsPending = false, ServalConfig = @"{""max_steps"":35}" }
- );
+ var env = new TestEnvironment();
// SUT
- await env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project01 },
- preTranslate: true,
+ string actual = await env.Service.CreateOrUpdateParallelCorpusAsync(
+ TranslationEngine01,
+ ParallelCorpus01,
+ string.Empty,
+ [],
+ [],
CancellationToken.None
);
-
+ Assert.AreEqual(ParallelCorpus01, actual);
await env
- .TranslationEnginesClient.Received()
- .StartBuildAsync(
+ .TranslationEnginesClient.Received(1)
+ .UpdateParallelCorpusAsync(
TranslationEngine01,
- Arg.Is(b => ((int)((JObject)b.Options)["max_steps"]) == 35),
- CancellationToken.None
+ ParallelCorpus01,
+ Arg.Any()
);
}
[Test]
- public async Task BuildProjectAsync_CreatesServalProjectIfMissing()
+ public async Task CreateServalProjectAsync_ExistingPreTranslationProject()
{
// Set up test environment
var env = new TestEnvironment();
- string sourceLanguage = env.Projects.Get(Project01).TranslateConfig.Source!.WritingSystem.Tag;
- string targetLanguage = env.Projects.Get(Project01).WritingSystem.Tag;
- Assert.AreNotEqual(sourceLanguage, targetLanguage);
+ var project = new SFProject { Id = Project01 };
+ await env.SetupProjectSecretAsync(Project01, new ServalData { PreTranslationEngineId = TranslationEngine01 });
// SUT
- await env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project01 },
- preTranslate: true,
- CancellationToken.None
- );
+ string actual = await env.Service.CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None);
+ Assert.AreEqual(TranslationEngine01, actual);
+ await env.TranslationEnginesClient.DidNotReceiveWithAnyArgs().CreateAsync(Arg.Any());
+ }
- await env
- .TranslationEnginesClient.Received()
- .CreateAsync(
- Arg.Is(t =>
- t.SourceLanguage == sourceLanguage && t.TargetLanguage == targetLanguage
- ),
- CancellationToken.None
- );
+ [Test]
+ public async Task CreateServalProjectAsync_ExistingServalDataInProjectSecretsForPreTranslation()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+ await env.SetupProjectSecretAsync(Project01, new ServalData());
+ var project = new SFProject { Id = Project01 };
+ env.Service.Configure().GetSourceLanguage(project).Returns("en");
+ env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de"));
+ env.TranslationEnginesClient.CreateAsync(Arg.Any())
+ .Returns(Task.FromResult(new TranslationEngine { Id = TranslationEngine01 }));
+
+ // SUT
+ string actual = await env.Service.CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None);
+ Assert.AreEqual(TranslationEngine01, actual);
+ Assert.AreEqual(TranslationEngine01, env.ProjectSecrets.Get(Project01).ServalData?.PreTranslationEngineId);
}
[Test]
- public async Task BuildProjectAsync_CreatesServalProjectIfRemoved()
+ public async Task CreateServalProjectAsync_ExistingServalDataInProjectSecretsForSmtTranslation()
{
// Set up test environment
var env = new TestEnvironment();
- env.TranslationEnginesClient.GetAsync(TranslationEngine02, CancellationToken.None)
- .Throws(ServalApiExceptions.NotFound);
- string sourceLanguage = env.Projects.Get(Project02).TranslateConfig.Source!.WritingSystem.Tag;
- string targetLanguage = env.Projects.Get(Project02).WritingSystem.Tag;
- Assert.AreNotEqual(sourceLanguage, targetLanguage);
+ await env.SetupProjectSecretAsync(Project01, new ServalData());
+ var project = new SFProject { Id = Project01 };
+ env.Service.Configure().GetSourceLanguage(project).Returns("en");
+ env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de"));
+ env.TranslationEnginesClient.CreateAsync(Arg.Any())
+ .Returns(Task.FromResult(new TranslationEngine { Id = TranslationEngine01 }));
// SUT
- await env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project02 },
+ string actual = await env.Service.CreateServalProjectAsync(
+ project,
preTranslate: false,
CancellationToken.None
);
-
- await env
- .TranslationEnginesClient.Received()
- .CreateAsync(
- Arg.Is(t =>
- t.SourceLanguage == sourceLanguage && t.TargetLanguage == targetLanguage
- ),
- CancellationToken.None
- );
+ Assert.AreEqual(TranslationEngine01, actual);
+ Assert.AreEqual(TranslationEngine01, env.ProjectSecrets.Get(Project01).ServalData?.TranslationEngineId);
}
[Test]
- public void BuildProjectAsync_DirectoryNotFound()
+ public async Task CreateServalProjectAsync_ExistingSmtTranslationProject()
{
// Set up test environment
- var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false });
- env.FileSystemService.DirectoryExists(Arg.Any()).Returns(false);
+ var env = new TestEnvironment();
+ var project = new SFProject { Id = Project01 };
+ await env.SetupProjectSecretAsync(Project01, new ServalData { TranslationEngineId = TranslationEngine01 });
// SUT
- Assert.ThrowsAsync(
- () =>
- env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project01 },
- preTranslate: true,
- CancellationToken.None
- )
+ string actual = await env.Service.CreateServalProjectAsync(
+ project,
+ preTranslate: false,
+ CancellationToken.None
);
+ Assert.AreEqual(TranslationEngine01, actual);
+ await env.TranslationEnginesClient.DidNotReceiveWithAnyArgs().CreateAsync(Arg.Any());
}
[Test]
- public async Task BuildProjectAsync_SpecifiesTheSameSourceAndTargetLanguageForEcho()
+ public async Task CreateServalProjectAsync_NoServalDataInProjectSecretsForPreTranslation()
{
// Set up test environment
- var env = new TestEnvironment(new TestEnvironmentOptions { UseEchoForPreTranslation = true });
- string sourceLanguage = env.Projects.Get(Project01).TranslateConfig.Source!.WritingSystem.Tag;
- string targetLanguage = env.Projects.Get(Project01).WritingSystem.Tag;
- Assert.AreNotEqual(sourceLanguage, targetLanguage);
+ var env = new TestEnvironment();
+ var project = new SFProject { Id = Project01 };
+ env.Service.Configure().GetSourceLanguage(project).Returns("en");
+ env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de"));
+ env.TranslationEnginesClient.CreateAsync(Arg.Any())
+ .Returns(Task.FromResult(new TranslationEngine { Id = TranslationEngine01 }));
// SUT
- await env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project01 },
- preTranslate: true,
- CancellationToken.None
- );
-
- await env
- .TranslationEnginesClient.Received()
- .CreateAsync(
- Arg.Is(t =>
- t.SourceLanguage == sourceLanguage && t.TargetLanguage == sourceLanguage
- ),
- CancellationToken.None
- );
+ string actual = await env.Service.CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None);
+ Assert.AreEqual(TranslationEngine01, actual);
+ Assert.AreEqual(TranslationEngine01, env.ProjectSecrets.Get(Project01).ServalData?.PreTranslationEngineId);
}
[Test]
- public async Task BuildProjectAsync_CreatesTranslationEngineIfNoTranslationEngineId()
+ public async Task CreateServalProjectAsync_NoServalDataInProjectSecretsForSmtTranslation()
{
// Set up test environment
var env = new TestEnvironment();
+ var project = new SFProject { Id = Project01 };
+ env.Service.Configure().GetSourceLanguage(project).Returns("en");
+ env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de"));
+ env.TranslationEnginesClient.CreateAsync(Arg.Any())
+ .Returns(Task.FromResult(new TranslationEngine { Id = TranslationEngine01 }));
// SUT
- await env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project01 },
+ string actual = await env.Service.CreateServalProjectAsync(
+ project,
preTranslate: false,
CancellationToken.None
);
-
- await env
- .TranslationEnginesClient.Received()
- .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None);
- await env
- .TranslationEnginesClient.Received()
- .CreateAsync(Arg.Any(), CancellationToken.None);
+ Assert.AreEqual(TranslationEngine01, actual);
+ Assert.AreEqual(TranslationEngine01, env.ProjectSecrets.Get(Project01).ServalData?.TranslationEngineId);
}
[Test]
- public async Task BuildProjectAsync_CreatesTranslationEngineOnServalIfMissing()
+ public void CreateServalProjectAsync_NoTranslationEngineIdFromServal()
{
// Set up test environment
- var env = new TestEnvironment(
- new TestEnvironmentOptions { LocalSourceTextHasData = true, LocalTargetTextHasData = true }
- );
+ var env = new TestEnvironment();
+ var project = new SFProject { Id = Project01 };
+ env.Service.Configure().GetSourceLanguage(project).Returns("en");
+ env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de"));
+ env.TranslationEnginesClient.CreateAsync(Arg.Any())
+ .Returns(Task.FromResult(new TranslationEngine()));
- // Make the Serval API return the error code for a missing translation engine
- env.TranslationEnginesClient.GetAsync(TranslationEngine02, CancellationToken.None)
- .Throws(ServalApiExceptions.NotFound);
+ // SUT
+ Assert.ThrowsAsync(
+ () => env.Service.CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None)
+ );
+ }
- // Return the correctly created corpus
- env.TranslationEnginesClient.GetCorpusAsync(TranslationEngine01, Arg.Any(), CancellationToken.None)
- .Returns(args =>
- Task.FromResult(
- new TranslationCorpus
- {
- Id = args.ArgAt(1),
- SourceLanguage = "en",
- TargetLanguage = "en_US",
- }
- )
- );
+ [Test]
+ public async Task CreateZipFileFromParatextDirectoryAsync_Success()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+ MemoryStream outputStream = new MemoryStream();
// SUT
- await env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project02 },
- preTranslate: false,
- CancellationToken.None
- );
+ await env.Service.CreateZipFileFromParatextDirectoryAsync(Project01, outputStream, CancellationToken.None);
- await env
- .TranslationEnginesClient.Received()
- .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None);
- await env
- .TranslationEnginesClient.Received()
- .CreateAsync(Arg.Any(), CancellationToken.None);
+ // Validate the zip file
+ outputStream.Seek(0, SeekOrigin.Begin);
+ using var archive = new ZipArchive(outputStream, ZipArchiveMode.Read);
+ Assert.AreEqual(1, archive.Entries.Count);
+ Assert.AreEqual("file", archive.Entries[0].FullName);
}
[Test]
- public async Task BuildProjectAsync_CreatesDataFilesOnServalIfMissing_Paratext()
+ public void CreateZipFileFromParatextDirectoryAsync_ThrowsExceptionWhenProjectDirectoryMissing()
{
// Set up test environment
- var env = new TestEnvironment(
- new TestEnvironmentOptions { LocalSourceTextHasData = true, LocalTargetTextHasData = true }
+ var env = new TestEnvironment();
+ env.FileSystemService.DirectoryExists(Arg.Any()).Returns(false);
+ MemoryStream outputStream = new MemoryStream();
+
+ // SUT
+ Assert.ThrowsAsync(
+ () => env.Service.CreateZipFileFromParatextDirectoryAsync(Project01, outputStream, CancellationToken.None)
);
- await env.SetDataInSync(Project02, preTranslate: true, uploadParatextZipFile: true);
+ }
- // Make the Serval API return the error code for a missing data file
- env.DataFilesClient.UpdateAsync(Arg.Any(), Arg.Any(), CancellationToken.None)
- .Throws(ServalApiExceptions.NotFound);
+ [Test]
+ public async Task DeleteAllCorporaAndFilesAsync_DoesNotCrashWhenCorporaNotFound()
+ {
+ // Set up test environment
+ var env = new TestEnvironment();
+ ServalApiException ex = ServalApiExceptions.NotFound;
+ env.CorporaClient.DeleteAsync(Corpus01).ThrowsAsync(ex);
// SUT
- await env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project02 },
- preTranslate: true,
+ await env.Service.DeleteAllCorporaAndFilesAsync(
+ [new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }],
+ Project01,
CancellationToken.None
);
- await env
- .TranslationEnginesClient.Received()
- .StartBuildAsync(TranslationEngine02, Arg.Any(), CancellationToken.None);
- await env
- .DataFilesClient.Received()
- .CreateAsync(Arg.Any(), FileFormat.Paratext, Arg.Any(), CancellationToken.None);
+ env.MockLogger.AssertHasEvent(logEvent =>
+ logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Information
+ );
}
[Test]
- public async Task BuildProjectAsync_CreatesDataFilesOnServalIfMissing_Text()
+ public async Task DeleteAllCorporaAndFilesAsync_DoesNotCrashWhenFileNotFound()
{
// Set up test environment
- var env = new TestEnvironment(
- new TestEnvironmentOptions { LocalSourceTextHasData = true, LocalTargetTextHasData = true }
- );
- await env.SetDataInSync(Project02, preTranslate: true);
-
- // Make the Serval API return the error code for a missing data file
- env.DataFilesClient.GetAsync(Arg.Any(), CancellationToken.None).Throws(ServalApiExceptions.NotFound);
+ var env = new TestEnvironment();
+ ServalApiException ex = ServalApiExceptions.NotFound;
+ env.DataFilesClient.DeleteAsync(File01).ThrowsAsync(ex);
// SUT
- await env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project02 },
- preTranslate: true,
+ await env.Service.DeleteAllCorporaAndFilesAsync(
+ [new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }],
+ Project01,
CancellationToken.None
);
- await env
- .TranslationEnginesClient.Received()
- .StartBuildAsync(TranslationEngine02, Arg.Any(), CancellationToken.None);
- await env
- .DataFilesClient.Received()
- .CreateAsync(Arg.Any(), FileFormat.Paratext, Arg.Any(), CancellationToken.None);
+ env.MockLogger.AssertHasEvent(logEvent =>
+ logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Information
+ );
}
[Test]
- public async Task BuildProjectAsync_GetsTheSourceAndTargetLanguageIfMissing()
+ public async Task DeleteAllCorporaAndFilesAsync_Success()
{
// Set up test environment
- var env = new TestEnvironment(
- new TestEnvironmentOptions { LocalSourceTextHasData = true, LocalTargetTextHasData = true }
- );
- SFProject project = env.Projects.Get(Project03);
- Assert.IsNull(project.WritingSystem.Tag);
- Assert.IsNull(project.TranslateConfig.Source?.WritingSystem.Tag);
+ var env = new TestEnvironment();
// SUT
- await env.Service.BuildProjectAsync(
- User01,
- new BuildConfig { ProjectId = Project03 },
- preTranslate: false,
+ await env.Service.DeleteAllCorporaAndFilesAsync(
+ [new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }],
+ Project01,
CancellationToken.None
);
- await env
- .TranslationEnginesClient.Received()
- .StartBuildAsync(TranslationEngine01, Arg.Any