diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/machine-api/build-dto.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/machine-api/build-dto.ts index 6884f7fc4d..c506b2c9ed 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/app/machine-api/build-dto.ts +++ b/src/SIL.XForge.Scripture/ClientApp/src/app/machine-api/build-dto.ts @@ -14,6 +14,7 @@ export interface ServalBuildAdditionalInfo { buildId: string; corporaIds?: string[]; dateFinished?: string; + parallelCorporaIds?: string[]; step: number; translationEngineId: string; } diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.spec.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.spec.ts index 1e3343a952..3d9bb62f32 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.spec.ts +++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.spec.ts @@ -287,9 +287,11 @@ describe('DraftGenerationStepsComponent', () => { fixture.detectChanges(); expect(component.done.emit).toHaveBeenCalledWith({ - translationBooks, - trainingDataFiles, trainingBooks: trainingBooks.filter(book => !translationBooks.includes(book)), + trainingDataFiles, + trainingScriptureRanges: [], + translationBooks, + translationScriptureRanges: [], fastTraining: false } as DraftGenerationStepsResult); expect(component.isStepsCompleted).toBe(true); @@ -402,7 +404,9 @@ describe('DraftGenerationStepsComponent', () => { expect(component.done.emit).toHaveBeenCalledWith({ trainingBooks, trainingDataFiles, + trainingScriptureRanges: [], translationBooks, + translationScriptureRanges: [], fastTraining: true } as DraftGenerationStepsResult); expect(generateDraftButton['disabled']).toBe(true); diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.ts index a1da09a380..f549871f50 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.ts +++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation-steps/draft-generation-steps.component.ts @@ -20,6 +20,7 @@ import { BookMultiSelectComponent } from '../../../shared/book-multi-select/book import { SharedModule } from '../../../shared/shared.module'; import { NllbLanguageService } from '../../nllb-language.service'; import { ConfirmSourcesComponent } from '../confirm-sources/confirm-sources.component'; +import { ProjectScriptureRange } from '../draft-generation'; import { DraftSource, DraftSourcesService } from '../draft-sources.service'; import { TrainingDataMultiSelectComponent } from '../training-data/training-data-multi-select.component'; import { TrainingDataUploadDialogComponent } from '../training-data/training-data-upload-dialog.component'; @@ -29,8 +30,10 @@ export interface DraftGenerationStepsResult { trainingBooks: number[]; trainingDataFiles: string[]; trainingScriptureRange?: string; + trainingScriptureRanges: ProjectScriptureRange[]; translationBooks: number[]; translationScriptureRange?: string; + translationScriptureRanges: ProjectScriptureRange[]; fastTraining: boolean; } @@ -263,8 +266,10 @@ export class DraftGenerationStepsComponent extends SubscriptionDisposable implem this.isStepsCompleted = true; this.done.emit({ trainingBooks: this.userSelectedTrainingBooks, + trainingScriptureRanges: [], trainingDataFiles: this.selectedTrainingDataIds, translationBooks: this.userSelectedTranslateBooks, + translationScriptureRanges: [], fastTraining: this.fastTraining }); } diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.html b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.html index 907b400c18..2bf640f292 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.html +++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.html @@ -254,6 +254,10 @@

Corpora Ids: {{ draftJob?.additionalInfo?.corporaIds?.join(", ") ?? "unknown" }} +
+ Parallel Corpora Ids: + {{ draftJob?.additionalInfo?.parallelCorporaIds?.join(", ") ?? "unknown" }} +
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.spec.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.spec.ts index 1f58da4d25..c5b1e1efc4 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.spec.ts +++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.spec.ts @@ -1976,7 +1976,9 @@ describe('DraftGenerationComponent', () => { env.component.startBuild({ trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false, projectId: projectId }); @@ -1986,7 +1988,9 @@ describe('DraftGenerationComponent', () => { projectId: projectId, trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false }); env.startedOrActiveBuild$.next(buildDto); @@ -2003,7 +2007,9 @@ describe('DraftGenerationComponent', () => { env.component.startBuild({ trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false, projectId: projectId }); @@ -2012,7 +2018,9 @@ describe('DraftGenerationComponent', () => { projectId: projectId, trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false }); verify(mockDialogRef.getState()).never(); @@ -2028,7 +2036,9 @@ describe('DraftGenerationComponent', () => { env.component.startBuild({ trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false, projectId: projectId }); @@ -2037,7 +2047,9 @@ describe('DraftGenerationComponent', () => { projectId: projectId, trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false }); verify(mockDialogRef.getState()).never(); @@ -2053,7 +2065,9 @@ describe('DraftGenerationComponent', () => { env.component.startBuild({ trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false, projectId: projectId }); @@ -2062,7 +2076,9 @@ describe('DraftGenerationComponent', () => { projectId: projectId, trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false }); verify(mockDialogRef.getState()).never(); @@ -2079,7 +2095,9 @@ describe('DraftGenerationComponent', () => { env.component.startBuild({ trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false, projectId: projectId }); @@ -2088,7 +2106,9 @@ describe('DraftGenerationComponent', () => { projectId: projectId, trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false }); verify(mockDialogRef.close()).once(); @@ -2104,7 +2124,9 @@ describe('DraftGenerationComponent', () => { env.component.startBuild({ trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false, projectId: projectId }); @@ -2114,7 +2136,9 @@ describe('DraftGenerationComponent', () => { projectId: projectId, trainingBooks: [], trainingDataFiles: [], + trainingScriptureRanges: [], translationBooks: [], + translationScriptureRanges: [], fastTraining: false }); expect(mockAuthService.requestParatextCredentialUpdate).toHaveBeenCalled(); diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.ts index a8d0fd3cb0..ac43d299db 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.ts +++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.component.ts @@ -455,8 +455,10 @@ export class DraftGenerationComponent extends DataLoadingComponent implements On trainingBooks: result.trainingBooks, trainingDataFiles: result.trainingDataFiles, trainingScriptureRange: result.trainingScriptureRange, + trainingScriptureRanges: result.trainingScriptureRanges, translationBooks: result.translationBooks, translationScriptureRange: result.translationScriptureRange, + translationScriptureRanges: result.trainingScriptureRanges, fastTraining: result.fastTraining }); } diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.service.spec.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.service.spec.ts index a6fa6a1088..1599ff8542 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.service.spec.ts +++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.service.spec.ts @@ -43,7 +43,9 @@ describe('DraftGenerationService', () => { projectId, trainingBooks: [], trainingDataFiles: [], + translationScriptureRanges: [], translationBooks: [], + trainingScriptureRanges: [], fastTraining: false }; const buildDto: BuildDto = { diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.ts index 615b18e306..8e08feafdb 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.ts +++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-generation.ts @@ -1,7 +1,7 @@ import { InjectionToken } from '@angular/core'; import { BuildStates } from '../../machine-api/build-states'; -/* +/** * The build configuration for a pre-translation build. */ export interface BuildConfig { @@ -9,11 +9,21 @@ export interface BuildConfig { trainingBooks: number[]; trainingDataFiles: string[]; trainingScriptureRange?: string; + trainingScriptureRanges: ProjectScriptureRange[]; translationBooks: number[]; translationScriptureRange?: string; + translationScriptureRanges: ProjectScriptureRange[]; fastTraining: boolean; } +/** + * A per-project scripture range. + */ +export interface ProjectScriptureRange { + projectId: string; + scriptureRange: string; +} + /** * Dictionary of 'segmentRef -> segment text'. */ diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-information/draft-information.component.html b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-information/draft-information.component.html index bd325e4d1b..bc11884ca9 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-information/draft-information.component.html +++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/draft-generation/draft-information/draft-information.component.html @@ -1,6 +1,7 @@ @if (canShowAdditionalInfo) {
Build Id: {{ draftJob?.additionalInfo?.buildId }}
Corpora Ids: {{ draftJob?.additionalInfo?.corporaIds?.join(", ") }}
+
Parallel Corpora Ids: {{ draftJob?.additionalInfo?.parallelCorporaIds?.join(", ") }}
Date Finished: {{ draftJob?.additionalInfo?.dateFinished?.toLocaleString() }}
Message: {{ draftJob?.message }}
Percent Completed: {{ draftJob?.percentCompleted }}
diff --git a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/editor/editor-draft/editor-draft.component.spec.ts b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/editor/editor-draft/editor-draft.component.spec.ts index 777e8e42af..81b7100eba 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/app/translate/editor/editor-draft/editor-draft.component.spec.ts +++ b/src/SIL.XForge.Scripture/ClientApp/src/app/translate/editor/editor-draft/editor-draft.component.spec.ts @@ -16,7 +16,6 @@ import { TestRealtimeModule } from 'xforge-common/test-realtime.module'; import { configureTestingModule, TestTranslocoModule } from 'xforge-common/test-utils'; import { SFProjectProfileDoc } from '../../../core/models/sf-project-profile-doc'; import { SF_TYPE_REGISTRY } from '../../../core/models/sf-type-registry'; -import { TextDocService } from '../../../core/text-doc.service'; import { SharedModule } from '../../../shared/shared.module'; import { EDITOR_READY_TIMEOUT } from '../../../shared/text/text.component'; import { DraftSegmentMap } from '../../draft-generation/draft-generation'; @@ -29,7 +28,6 @@ const mockActivatedProjectService = mock(ActivatedProjectService); const mockDraftHandlingService = mock(DraftHandlingService); const mockI18nService = mock(I18nService); const mockDialogService = mock(DialogService); -const mockTextDocService = mock(TextDocService); describe('EditorDraftComponent', () => { let fixture: ComponentFixture; @@ -52,8 +50,7 @@ describe('EditorDraftComponent', () => { { provide: DraftHandlingService, useMock: mockDraftHandlingService }, { provide: I18nService, useMock: mockI18nService }, { provide: OnlineStatusService, useClass: TestOnlineStatusService }, - { provide: DialogService, useMock: mockDialogService }, - { provide: TextDocService, useMock: mockTextDocService } + { provide: DialogService, useMock: mockDialogService } ] })); diff --git a/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json b/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json index 3d835bea00..a802f9a6d4 100644 --- a/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json +++ b/src/SIL.XForge.Scripture/ClientApp/src/assets/i18n/non_checking_en.json @@ -194,7 +194,6 @@ "generate_draft_button": "Generate draft", "generate_forward_translation_drafts_header": "Generate translation drafts", "generate_new_draft": "New draft", - "go_to_draft_viewer": "Go to [link:draftViewerUrl]draft viewer[/link] to preview and apply the generated draft to chapters of your choice.", "improved_learning_rate_notice": "Drafting is now much faster! Most drafts will now take about {1}{2} hours to complete instead of {3}{4}. Draft quality should be about the same as before, but please {5}contact us{6} if you notice any issues.", "info_alert_different_additional_training_and_source_language": "The language for your additional training text ({{ additionalTrainingSourceLanguageDisplayName }}) must be the same as the training source language ({{ alternateTrainingSourceLanguageDisplayName }}). Select a different additional training text on the [link:projectSettingsUrl]settings page[/link].", "info_alert_different_training_and_source_language": "The language for your alternate training text ({{ alternateTrainingSourceLanguageDisplayName }}) must be the same as the source language ({{ sourceLanguageDisplayName }}). Select a different alternate training text on the [link:projectSettingsUrl]settings page[/link].", diff --git a/src/SIL.XForge.Scripture/Models/BuildConfig.cs b/src/SIL.XForge.Scripture/Models/BuildConfig.cs index c8c22a65f7..3fd3e85100 100644 --- a/src/SIL.XForge.Scripture/Models/BuildConfig.cs +++ b/src/SIL.XForge.Scripture/Models/BuildConfig.cs @@ -19,6 +19,7 @@ public class BuildConfig /// /// The numbers of the books to use as the source texts for training. /// + /// This property is for legacy client use. /// You should not set this property and at the same time. /// public HashSet TrainingBooks { get; set; } = []; @@ -34,16 +35,26 @@ public class BuildConfig /// /// The book ids and chapter numbers separated by semicolons. /// + /// This property is for legacy client use. /// See https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range for syntax. /// You should not set this property and at the same time. /// public string? TrainingScriptureRange { get; set; } + /// + /// Gets or sets the per-project books and chapters to use for training. + /// + /// + /// A list containing the project identifiers and scripture ranges. + /// + public HashSet TrainingScriptureRanges { get; set; } = []; + /// /// Gets or sets the books to use for translation. /// /// The numbers of the books to use as the source texts for training. /// + /// This property is for legacy client use. /// You should not set this property and at the same time. /// public HashSet TranslationBooks { get; set; } = []; @@ -53,11 +64,20 @@ public class BuildConfig /// /// The book ids and chapter numbers separated by semicolons. /// + /// This property is for legacy client use. /// See https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range for syntax. /// You should not set this property and at the same time. /// public string? TranslationScriptureRange { get; set; } + /// + /// Gets or sets the per-project books and chapters to use for training. + /// + /// + /// A list containing the project identifiers and scripture ranges. + /// + public HashSet TranslationScriptureRanges { get; set; } = []; + /// /// Gets or sets the project identifier. /// diff --git a/src/SIL.XForge.Scripture/Models/DraftConfig.cs b/src/SIL.XForge.Scripture/Models/DraftConfig.cs index a241d601fe..ba1937f5a1 100644 --- a/src/SIL.XForge.Scripture/Models/DraftConfig.cs +++ b/src/SIL.XForge.Scripture/Models/DraftConfig.cs @@ -13,8 +13,10 @@ public class DraftConfig public TranslateSource? AlternateTrainingSource { get; set; } public IList LastSelectedTrainingBooks { get; set; } = []; public string? LastSelectedTrainingScriptureRange { get; set; } + public IList LastSelectedTrainingScriptureRanges { get; set; } = []; public IList LastSelectedTrainingDataFiles { get; set; } = []; public IList LastSelectedTranslationBooks { get; set; } = []; public string? LastSelectedTranslationScriptureRange { get; set; } + public IList LastSelectedTranslationScriptureRanges { get; set; } = []; public string? ServalConfig { get; set; } } diff --git a/src/SIL.XForge.Scripture/Models/MachineApi.cs b/src/SIL.XForge.Scripture/Models/MachineApi.cs index c645133518..f083e4701f 100644 --- a/src/SIL.XForge.Scripture/Models/MachineApi.cs +++ b/src/SIL.XForge.Scripture/Models/MachineApi.cs @@ -9,6 +9,7 @@ namespace SIL.XForge.Scripture.Models; public static class MachineApi { public const string HttpClientName = "machine_api"; + public const string TokenClientName = "machine_api_token"; public const string Namespace = "machine-api/v3"; public const string StartBuild = "translation/builds"; public const string GetBuild = "translation/builds/id:{sfProjectId}.{buildId?}"; diff --git a/src/SIL.XForge.Scripture/Models/ProjectScriptureRange.cs b/src/SIL.XForge.Scripture/Models/ProjectScriptureRange.cs new file mode 100644 index 0000000000..82e986452c --- /dev/null +++ b/src/SIL.XForge.Scripture/Models/ProjectScriptureRange.cs @@ -0,0 +1,22 @@ +namespace SIL.XForge.Scripture.Models; + +/// +/// A scripture range for a specific project. +/// This is used by . +/// +public record ProjectScriptureRange +{ + /// + /// The project identifier. + /// + public string ProjectId { get; set; } = string.Empty; + + /// + /// The scripture range. + /// + /// The book ids and chapter numbers separated by semicolons. + /// + /// See https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range for syntax. + /// + public string ScriptureRange { get; set; } = string.Empty; +} diff --git a/src/SIL.XForge.Scripture/Models/ServalAdditionalTrainingData.cs b/src/SIL.XForge.Scripture/Models/ServalAdditionalTrainingData.cs new file mode 100644 index 0000000000..96ec24911f --- /dev/null +++ b/src/SIL.XForge.Scripture/Models/ServalAdditionalTrainingData.cs @@ -0,0 +1,32 @@ +using System.Collections.Generic; + +namespace SIL.XForge.Scripture.Models; + +/// +/// Configuration for uploading Additional Training Data to Serval. +/// +public class ServalAdditionalTrainingData +{ + /// + /// Gets or sets the Parallel Corpus identifier. + /// + public string ParallelCorpusId { get; set; } = string.Empty; + + /// + /// Gets or sets the identifier of the corpus to be used as the source in the Parallel Corpus. + /// + public string SourceCorpusId { get; set; } = string.Empty; + + /// + /// Gets or sets the identifier of the corpus to be used as the target in the Parallel Corpus. + /// + public string TargetCorpusId { get; set; } = string.Empty; + + /// + /// Gets or sets the corpus files uploaded to Serval. + /// + /// + /// The files in both the source and target corpora. + /// + public List CorpusFiles { get; set; } = []; +} diff --git a/src/SIL.XForge.Scripture/Models/ServalBuildAdditionalInfo.cs b/src/SIL.XForge.Scripture/Models/ServalBuildAdditionalInfo.cs index b01c7d95ef..2a1d7a248a 100644 --- a/src/SIL.XForge.Scripture/Models/ServalBuildAdditionalInfo.cs +++ b/src/SIL.XForge.Scripture/Models/ServalBuildAdditionalInfo.cs @@ -5,9 +5,10 @@ namespace SIL.XForge.Scripture.Models; public class ServalBuildAdditionalInfo { - public string BuildId { get; set; } = string.Empty; - public IEnumerable? CorporaIds { get; set; } - public DateTimeOffset? DateFinished { get; set; } - public int Step { get; set; } - public string TranslationEngineId { get; set; } = string.Empty; + public string BuildId { get; init; } = string.Empty; + public IEnumerable? CorporaIds { get; init; } + public DateTimeOffset? DateFinished { get; init; } + public IEnumerable? ParallelCorporaIds { get; init; } + public int Step { get; init; } + public string TranslationEngineId { get; init; } = string.Empty; } diff --git a/src/SIL.XForge.Scripture/Models/ServalCorpus.cs b/src/SIL.XForge.Scripture/Models/ServalCorpus.cs index dae8a48dac..7bb189abce 100644 --- a/src/SIL.XForge.Scripture/Models/ServalCorpus.cs +++ b/src/SIL.XForge.Scripture/Models/ServalCorpus.cs @@ -50,7 +50,7 @@ public class ServalCorpus /// /// The source corpus files. /// - public List SourceFiles { get; set; } = new List(); + public List SourceFiles { get; set; } = []; /// /// Gets or sets the target files uploaded to Serval. @@ -58,5 +58,5 @@ public class ServalCorpus /// /// The target corpus files. /// - public List TargetFiles { get; set; } = new List(); + public List TargetFiles { get; set; } = []; } diff --git a/src/SIL.XForge.Scripture/Models/ServalCorpusFile.cs b/src/SIL.XForge.Scripture/Models/ServalCorpusFile.cs index 12ec6ea9c3..557a68a2d4 100644 --- a/src/SIL.XForge.Scripture/Models/ServalCorpusFile.cs +++ b/src/SIL.XForge.Scripture/Models/ServalCorpusFile.cs @@ -5,6 +5,11 @@ namespace SIL.XForge.Scripture.Models; /// public class ServalCorpusFile { + /// + /// Gets or sets the corpus this file is associated with. + /// + public string CorpusId { get; set; } = string.Empty; + /// /// The MD5 Hash of the corpus file's contents. /// This is used to see if the file has changed since its last upload to Serval. @@ -16,6 +21,11 @@ public class ServalCorpusFile /// public string FileId { get; set; } = string.Empty; + /// + /// Gets or sets the language of the file and corpus. + /// + public string LanguageCode { get; set; } = string.Empty; + /// /// Gets or sets the project this file is from. For example, the mixed source project. /// @@ -25,8 +35,10 @@ public class ServalCorpusFile /// Gets or sets the text identifier this file represents. /// /// - /// For text files, this will be in format bookNum_chapterNum. - /// For Paratext files, this will be the target project id. + /// Notes: + /// - For text files, this will be in format bookNum_chapterNum. + /// - For Paratext files, this will be the target project id. + /// - When using mixed sources, TextId must be the same for each file to mix the sources together. /// public string TextId { get; set; } = string.Empty; } diff --git a/src/SIL.XForge.Scripture/Models/ServalCorpusSync.cs b/src/SIL.XForge.Scripture/Models/ServalCorpusSync.cs new file mode 100644 index 0000000000..9940f09fff --- /dev/null +++ b/src/SIL.XForge.Scripture/Models/ServalCorpusSync.cs @@ -0,0 +1,33 @@ +namespace SIL.XForge.Scripture.Models; + +/// +/// Serval Corpus Synchronization Information. +/// +/// +/// This class is used by +/// to determine the pre-translate and train on corpus configuration. +/// +public class ServalCorpusSyncInfo +{ + /// + /// Gets or sets the corpus that was synchronized. + /// + public string CorpusId { get; init; } = string.Empty; + + /// + /// Gets or sets whether this corpus is a source corpus. + /// + /// true if a source corpus; otherwise, + public bool IsSource { get; init; } + + /// + /// Gets or sets the identifier of the parallel corpus + /// that was synchronized for this corpus. + /// + public string ParallelCorpusId { get; init; } = string.Empty; + + /// + /// Gets or sets the project that was synchronized for this corpus. + /// + public string ProjectId { get; init; } = string.Empty; +} diff --git a/src/SIL.XForge.Scripture/Models/ServalData.cs b/src/SIL.XForge.Scripture/Models/ServalData.cs index 60ab2e4a89..94ce977375 100644 --- a/src/SIL.XForge.Scripture/Models/ServalData.cs +++ b/src/SIL.XForge.Scripture/Models/ServalData.cs @@ -4,15 +4,15 @@ namespace SIL.XForge.Scripture.Models; /// -/// Serval Data. +/// Serval Configuration Data. /// public class ServalData { /// - /// Gets or sets the SMT Translation Engine Id for the project. + /// Gets or sets the SMT Translation Engine identifier for the project. /// /// - /// The SMT Translation Engine Id. + /// The SMT Translation Engine identifier. /// /// /// The user should not interact with the translation engine directly by ID. @@ -32,7 +32,7 @@ public class ServalData public string? TranslationErrorMessage { get; set; } /// - /// Gets or sets the Hangfire Job Id for the Translation job. + /// Gets or sets the Hangfire Job identifier for the Translation job. /// public string? TranslationJobId { get; set; } @@ -50,10 +50,10 @@ public class ServalData public DateTime? TranslationQueuedAt { get; set; } /// - /// Gets or sets the NMT Translation Engine Id for the project. + /// Gets or sets the NMT Translation Engine identifier for the project. /// /// - /// The NMT Translation Engine Id. + /// The NMT Translation Engine identifier. /// public string? PreTranslationEngineId { get; set; } @@ -70,10 +70,27 @@ public class ServalData public string? PreTranslationErrorMessage { get; set; } /// - /// Gets or sets the Hangfire Job Id for the Pre-Translation job. + /// Gets or sets the Hangfire Job identifier for the Pre-Translation job. /// public string? PreTranslationJobId { get; set; } + /// + /// Gets or sets the Identifier of the Parallel Corpus to be used in the PreTranslate section of the + /// for pre-translation (NMT) builds. + /// + public string? ParallelCorpusIdForPreTranslate { get; set; } + + /// + /// Gets or sets the Identifier of the Parallel Corpus to be used for translation (SMT) builds. + /// + public string? ParallelCorpusIdForSmt { get; set; } + + /// + /// Gets or sets the Identifier of the Parallel Corpus to be used in the TrainOn section of the + /// for pre-translation (NMT) builds. + /// + public string? ParallelCorpusIdForTrainOn { get; set; } + /// /// Gets or sets the date and time that the pre-translation build was queued. /// @@ -110,5 +127,18 @@ public class ServalData /// /// The dictionary key is the corpus ID. /// - public Dictionary Corpora { get; set; } = new Dictionary(); + public Dictionary? Corpora { get; set; } + + /// + /// Gets or sets the additional training data configuration for pre-translation (NMT) builds. + /// + public ServalAdditionalTrainingData? AdditionalTrainingData { get; set; } + + /// + /// Gets or sets the corpus and data files configuration. + /// + /// + /// These are shared by translation (SMT) and pre-translation (NMT) translation engines. + /// + public List CorpusFiles { get; set; } = []; } diff --git a/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj b/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj index 8b0617a599..d4c49194f2 100644 --- a/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj +++ b/src/SIL.XForge.Scripture/SIL.XForge.Scripture.csproj @@ -28,8 +28,9 @@ - + + @@ -39,7 +40,7 @@ - + diff --git a/src/SIL.XForge.Scripture/Services/BuildConfigJsonConverter.cs b/src/SIL.XForge.Scripture/Services/BuildConfigJsonConverter.cs index 1957826e1d..968b0d5d0f 100644 --- a/src/SIL.XForge.Scripture/Services/BuildConfigJsonConverter.cs +++ b/src/SIL.XForge.Scripture/Services/BuildConfigJsonConverter.cs @@ -42,6 +42,12 @@ public override void WriteJson(JsonWriter writer, BuildConfig? value, JsonSerial serializer.Serialize(writer, value.TrainingScriptureRange); } + if (value.TrainingScriptureRanges.Count > 0) + { + writer.WritePropertyName(nameof(value.TrainingScriptureRanges)); + serializer.Serialize(writer, value.TrainingScriptureRanges); + } + if (value.TranslationBooks.Count > 0) { writer.WritePropertyName(nameof(value.TranslationBooks)); @@ -54,6 +60,12 @@ public override void WriteJson(JsonWriter writer, BuildConfig? value, JsonSerial serializer.Serialize(writer, value.TranslationScriptureRange); } + if (value.TranslationScriptureRanges.Count > 0) + { + writer.WritePropertyName(nameof(value.TranslationScriptureRanges)); + serializer.Serialize(writer, value.TranslationScriptureRanges); + } + if (value.FastTraining) { writer.WritePropertyName(nameof(value.FastTraining)); diff --git a/src/SIL.XForge.Scripture/Services/IMachineProjectService.cs b/src/SIL.XForge.Scripture/Services/IMachineProjectService.cs index cca7cf0548..0653a54b1b 100644 --- a/src/SIL.XForge.Scripture/Services/IMachineProjectService.cs +++ b/src/SIL.XForge.Scripture/Services/IMachineProjectService.cs @@ -1,50 +1,13 @@ using System.IO; using System.Threading; using System.Threading.Tasks; -using Serval.Client; -using SIL.XForge.Scripture.Models; namespace SIL.XForge.Scripture.Services; public interface IMachineProjectService { - Task AddProjectAsync( - string curUserId, - string sfProjectId, - bool preTranslate, - CancellationToken cancellationToken - ); - Task BuildProjectAsync( - string curUserId, - BuildConfig buildConfig, - bool preTranslate, - CancellationToken cancellationToken - ); - Task BuildProjectForBackgroundJobAsync( - string curUserId, - BuildConfig buildConfig, - bool preTranslate, - CancellationToken cancellationToken - ); + Task AddProjectAsync(string sfProjectId, bool preTranslate, CancellationToken cancellationToken); Task GetProjectZipAsync(string sfProjectId, Stream outputStream, CancellationToken cancellationToken); Task GetTranslationEngineTypeAsync(bool preTranslate); - Task RemoveProjectAsync( - string curUserId, - string sfProjectId, - bool preTranslate, - CancellationToken cancellationToken - ); - Task SyncProjectCorporaAsync( - string curUserId, - BuildConfig buildConfig, - bool preTranslate, - CancellationToken cancellationToken - ); - Task TranslationEngineExistsAsync( - string projectId, - string translationEngineId, - bool preTranslate, - CancellationToken cancellationToken - ); - Task UpdateTranslationSourcesAsync(string curUserId, string sfProjectId); + Task RemoveProjectAsync(string sfProjectId, bool preTranslate, CancellationToken cancellationToken); } diff --git a/src/SIL.XForge.Scripture/Services/MachineApiService.cs b/src/SIL.XForge.Scripture/Services/MachineApiService.cs index 0b123e7a18..5d2ad3c56d 100644 --- a/src/SIL.XForge.Scripture/Services/MachineApiService.cs +++ b/src/SIL.XForge.Scripture/Services/MachineApiService.cs @@ -62,6 +62,8 @@ ITranslationEngineTypesClient translationEngineTypesClient private static readonly IEqualityComparer> _listStringComparer = SequenceEqualityComparer.Create( EqualityComparer.Default ); + private static readonly IEqualityComparer> _listProjectScriptureRangeComparer = + SequenceEqualityComparer.Create(EqualityComparer.Default); public async Task CancelPreTranslationBuildAsync( string curUserId, @@ -221,7 +223,7 @@ CancellationToken cancellationToken // Make sure the DTO conforms to the machine-api V2 URLs if (buildDto is not null) { - UpdateDto(buildDto, sfProjectId); + buildDto = UpdateDto(buildDto, sfProjectId); } return buildDto; @@ -266,7 +268,7 @@ await translationEnginesClient.GetAllBuildsAsync(translationEngineId, cancellati // Make sure the DTO conforms to the machine-api V2 URLs if (buildDto is not null) { - UpdateDto(buildDto, sfProjectId); + buildDto = UpdateDto(buildDto, sfProjectId); } return buildDto; @@ -314,7 +316,7 @@ CancellationToken cancellationToken } buildDto = CreateDto(translationBuild); - UpdateDto(buildDto, sfProjectId); + buildDto = UpdateDto(buildDto, sfProjectId); } catch (ServalApiException e) { @@ -474,7 +476,7 @@ CancellationToken cancellationToken { State = BuildStateFaulted, Message = errorMessage, - AdditionalInfo = new ServalBuildAdditionalInfo { TranslationEngineId = engineId ?? string.Empty, }, + AdditionalInfo = new ServalBuildAdditionalInfo { TranslationEngineId = engineId ?? string.Empty }, }; } else @@ -517,7 +519,7 @@ CancellationToken cancellationToken // Make sure the DTO conforms to the machine-api V2 URLs if (buildDto is not null) { - UpdateDto(buildDto, sfProjectId); + buildDto = UpdateDto(buildDto, sfProjectId); } return buildDto; @@ -697,7 +699,7 @@ public async Task StartPreTranslationBuildAsync( CancellationToken cancellationToken ) { - // Ensure that there are no errors in the build configuration + // Ensure that there are no errors in the build configuration for training if (!string.IsNullOrWhiteSpace(buildConfig.TrainingScriptureRange) && buildConfig.TrainingBooks.Count > 0) { throw new DataNotFoundException( @@ -706,6 +708,26 @@ CancellationToken cancellationToken ); } + if ( + !string.IsNullOrWhiteSpace(buildConfig.TrainingScriptureRange) + && buildConfig.TrainingScriptureRanges.Count > 0 + ) + { + throw new DataNotFoundException( + $"You cannot specify both {nameof(buildConfig.TrainingScriptureRange)}" + + $" and {nameof(buildConfig.TrainingScriptureRanges)}." + ); + } + + if (buildConfig.TrainingScriptureRanges.Count > 0 && buildConfig.TrainingBooks.Count > 0) + { + throw new DataNotFoundException( + $"You cannot specify both {nameof(buildConfig.TrainingScriptureRanges)}" + + $" and {nameof(buildConfig.TrainingBooks)}." + ); + } + + // Ensure that there are no errors in the build configuration for translation if (!string.IsNullOrWhiteSpace(buildConfig.TranslationScriptureRange) && buildConfig.TranslationBooks.Count > 0) { throw new DataNotFoundException( @@ -714,6 +736,25 @@ CancellationToken cancellationToken ); } + if ( + !string.IsNullOrWhiteSpace(buildConfig.TranslationScriptureRange) + && buildConfig.TranslationScriptureRanges.Count > 0 + ) + { + throw new DataNotFoundException( + $"You cannot specify both {nameof(buildConfig.TranslationScriptureRange)}" + + $" and {nameof(buildConfig.TranslationScriptureRanges)}." + ); + } + + if (buildConfig.TranslationScriptureRanges.Count > 0 && buildConfig.TranslationBooks.Count > 0) + { + throw new DataNotFoundException( + $"You cannot specify both {nameof(buildConfig.TranslationScriptureRanges)}" + + $" and {nameof(buildConfig.TranslationBooks)}." + ); + } + // Load the project from the realtime service await using IConnection conn = await realtimeService.ConnectAsync(curUserId); IDocument projectDoc = await conn.FetchAsync(buildConfig.ProjectId); @@ -730,29 +771,41 @@ await projectDoc.SubmitJson0OpAsync(op => { op.Set( p => p.TranslateConfig.DraftConfig.LastSelectedTrainingBooks, - buildConfig.TrainingBooks.ToList(), + [.. buildConfig.TrainingBooks], _listIntComparer ); op.Set( p => p.TranslateConfig.DraftConfig.LastSelectedTrainingDataFiles, - buildConfig.TrainingDataFiles.ToList(), + [.. buildConfig.TrainingDataFiles], _listStringComparer ); op.Set( p => p.TranslateConfig.DraftConfig.LastSelectedTrainingScriptureRange, buildConfig.TrainingScriptureRange ); + op.Set( + p => p.TranslateConfig.DraftConfig.LastSelectedTrainingScriptureRanges, + [.. buildConfig.TrainingScriptureRanges], + _listProjectScriptureRangeComparer + ); op.Set( p => p.TranslateConfig.DraftConfig.LastSelectedTranslationBooks, - buildConfig.TranslationBooks.ToList(), + [.. buildConfig.TranslationBooks], _listIntComparer ); op.Set( p => p.TranslateConfig.DraftConfig.LastSelectedTranslationScriptureRange, buildConfig.TranslationScriptureRange ); + op.Set( + p => p.TranslateConfig.DraftConfig.LastSelectedTranslationScriptureRanges, + [.. buildConfig.TranslationScriptureRanges], + _listProjectScriptureRangeComparer + ); if (!projectDoc.Data.TranslateConfig.PreTranslate) + { op.Set(p => p.TranslateConfig.PreTranslate, true); + } }); // Sync the source and target before running the build @@ -935,7 +988,29 @@ private static ServalBuildDto CreateDto(TranslationBuild translationBuild) => AdditionalInfo = new ServalBuildAdditionalInfo { BuildId = translationBuild.Id, - CorporaIds = translationBuild.Pretranslate?.Select(p => p.Corpus.Id), + CorporaIds = new HashSet( + // Use a HashSet to ensure there are no duplicate corpus ids + [ + .. translationBuild + .Pretranslate?.SelectMany(t => t.SourceFilters ?? []) + .Select(f => f.Corpus.Id) ?? [], + .. translationBuild.TrainOn?.SelectMany(t => t.SourceFilters ?? []).Select(f => f.Corpus.Id) + ?? [], + .. translationBuild.TrainOn?.SelectMany(t => t.TargetFilters ?? []).Select(f => f.Corpus.Id) + ?? [], + ] + ), + ParallelCorporaIds = new HashSet( + // Use a HashSet to ensure there are no duplicate parallel corpus ids + [ + .. translationBuild + .Pretranslate?.Select(t => t.ParallelCorpus?.Id) + .Where(id => !string.IsNullOrEmpty(id)) ?? [], + .. translationBuild + .TrainOn?.Select(t => t.ParallelCorpus?.Id) + .Where(id => !string.IsNullOrEmpty(id)) ?? [], + ] + ), DateFinished = translationBuild.DateFinished, Step = translationBuild.Step, TranslationEngineId = translationBuild.Engine.Id, @@ -961,7 +1036,11 @@ private static ServalEngineDto CreateDto(TranslationEngine translationEngine) => /// /// Method not allowed or not supported for the specified translation engine. /// - /// If this method returns, it is expected that the DTO will be null. + /// + /// If this method returns, it is expected that the DTO will be null. + /// The following status codes may be thrown by Serval, and are not handled by this method: + /// - 499: Operation Cancelled + /// private static void ProcessServalApiException(ServalApiException e) { switch (e) diff --git a/src/SIL.XForge.Scripture/Services/MachineProjectService.cs b/src/SIL.XForge.Scripture/Services/MachineProjectService.cs index 36495e96d4..99e5f608b6 100644 --- a/src/SIL.XForge.Scripture/Services/MachineProjectService.cs +++ b/src/SIL.XForge.Scripture/Services/MachineProjectService.cs @@ -13,7 +13,6 @@ using Microsoft.FeatureManagement; using Newtonsoft.Json.Linq; using Serval.Client; -using SIL.Extensions; using SIL.Scripture; using SIL.XForge.Configuration; using SIL.XForge.DataAccess; @@ -33,6 +32,7 @@ namespace SIL.XForge.Scripture.Services; /// Provides functionality to add, remove, and build Machine projects. /// public class MachineProjectService( + ICorporaClient corporaClient, IDataFilesClient dataFilesClient, IExceptionHandler exceptionHandler, IFeatureManager featureManager, @@ -53,8 +53,15 @@ IRepository userSecrets internal const string Nmt = "nmt"; internal const string SmtTransfer = "smt-transfer"; + /// + /// Adds the project to Serval, if the required data is present. + /// + /// The Scripture Forge project identifier. + /// If true use NMT; otherwise if false use SMT. + /// The cancellation token. + /// The translation engine identifier. + /// The project does not exist. public async Task AddProjectAsync( - string curUserId, string sfProjectId, bool preTranslate, CancellationToken cancellationToken @@ -82,274 +89,17 @@ CancellationToken cancellationToken return string.Empty; } - public async Task BuildProjectAsync( - string curUserId, - BuildConfig buildConfig, - bool preTranslate, - CancellationToken cancellationToken - ) - { - // Load the target project secrets, so we can get the translation engine ID - if (!(await projectSecrets.TryGetAsync(buildConfig.ProjectId)).TryResult(out SFProjectSecret projectSecret)) - { - throw new DataNotFoundException("The project secret cannot be found."); - } - - // Load the project from the realtime service - await using IConnection conn = await realtimeService.ConnectAsync(curUserId); - IDocument projectDoc = await conn.FetchAsync(buildConfig.ProjectId); - if (!projectDoc.IsLoaded) - { - throw new DataNotFoundException("The project does not exist."); - } - - // Ensure we have a translation engine id or a pre-translation engine id, and that it exists - string translationEngineId = preTranslate - ? projectSecret.ServalData?.PreTranslationEngineId - : projectSecret.ServalData?.TranslationEngineId; - if ( - !await TranslationEngineExistsAsync( - buildConfig.ProjectId, - translationEngineId, - preTranslate, - cancellationToken - ) - ) - { - // We do not have one, likely because the translation is a back translation - // We can only get the language tags for back translations from the ScrText, - // which is not present until after the first sync (not from the Registry). - - // If the source or target writing system tag is missing, get them from the ScrText - // We do not need to do this for the alternate source as this would have been populated correctly - if ( - string.IsNullOrWhiteSpace(projectDoc.Data.WritingSystem.Tag) - || string.IsNullOrWhiteSpace(projectDoc.Data.TranslateConfig.Source?.WritingSystem.Tag) - ) - { - // Get the user secret - Attempt userSecretAttempt = await userSecrets.TryGetAsync(curUserId); - if (!userSecretAttempt.TryResult(out UserSecret userSecret)) - throw new DataNotFoundException("The user does not exist."); - - // This error can occur if the project is deleted while the build is running - if (projectDoc.Data is null) - { - throw new DataNotFoundException("The project does not exist."); - } - - // Update the target writing system tag - if (string.IsNullOrWhiteSpace(projectDoc.Data.WritingSystem.Tag)) - { - WritingSystem writingSystem = paratextService.GetWritingSystem( - userSecret, - projectDoc.Data.ParatextId - ); - if (!string.IsNullOrEmpty(writingSystem.Tag)) - { - await projectDoc.SubmitJson0OpAsync(op => - { - op.Set(p => p.WritingSystem.Region, writingSystem.Region); - op.Set(p => p.WritingSystem.Script, writingSystem.Script); - op.Set(p => p.WritingSystem.Tag, writingSystem.Tag); - }); - } - } - - // This error can occur if the project is deleted while the build is running - if (projectDoc.Data is null) - { - throw new DataNotFoundException("The project does not exist."); - } - - // This error can occur if the project source is cleared while the build is running - if (projectDoc.Data.TranslateConfig.Source is null) - { - throw new DataNotFoundException("The project source is not specified."); - } - - // Update the source writing system tag - if (string.IsNullOrWhiteSpace(projectDoc.Data.TranslateConfig.Source.WritingSystem.Tag)) - { - WritingSystem writingSystem = paratextService.GetWritingSystem( - userSecret, - projectDoc.Data.TranslateConfig.Source.ParatextId - ); - if (!string.IsNullOrEmpty(writingSystem.Tag)) - { - await projectDoc.SubmitJson0OpAsync(op => - op.Set(p => p.TranslateConfig.Source.WritingSystem.Tag, writingSystem.Tag) - ); - } - } - } - - // Clear the existing translation engine id and corpora, based on whether this is pre-translation or not - string[] corporaIds = - projectSecret - .ServalData?.Corpora.Where(c => preTranslate ? c.Value.PreTranslate : !c.Value.PreTranslate) - .Select(c => c.Key) - .ToArray() ?? []; - await projectSecrets.UpdateAsync( - projectDoc.Id, - u => - { - if (preTranslate) - { - u.Unset(p => p.ServalData.PreTranslationEngineId); - } - else - { - u.Unset(p => p.ServalData.TranslationEngineId); - } - - foreach (string corporaId in corporaIds) - { - u.Unset(p => p.ServalData.Corpora[corporaId]); - } - } - ); - - // Create the Serval project, and get the translation engine id - translationEngineId = await CreateServalProjectAsync(projectDoc.Data, preTranslate, cancellationToken); - } - - // Ensure a translation engine id is present - if (string.IsNullOrWhiteSpace(translationEngineId)) - { - throw new DataNotFoundException("The translation engine is not specified."); - } - - // Get the translation engine from Serval - try - { - TranslationEngine translationEngine = await translationEnginesClient.GetAsync( - translationEngineId, - cancellationToken - ); - bool recreateTranslationEngine = false; - - // See if the target language has changed - string projectTargetLanguage = await GetTargetLanguageAsync(projectDoc.Data); - if (translationEngine.TargetLanguage != projectTargetLanguage) - { - string message = - $"Target language has changed from {translationEngine.TargetLanguage} to {projectTargetLanguage}."; - logger.LogInformation(message); - recreateTranslationEngine = true; - } - - // See if the source language has changed - string projectSourceLanguage = GetSourceLanguage(projectDoc.Data, useAlternateTrainingSource: false); - if (translationEngine.SourceLanguage != projectSourceLanguage) - { - string message = - $"Source language has changed from {translationEngine.SourceLanguage} to {projectSourceLanguage}."; - logger.LogInformation(message); - recreateTranslationEngine = true; - } - - // Delete then recreate the translation engine if they have changed - if (recreateTranslationEngine) - { - // Removal can be a slow process - await RemoveProjectAsync(curUserId, buildConfig.ProjectId, preTranslate, cancellationToken); - await AddProjectAsync(curUserId, buildConfig.ProjectId, preTranslate, cancellationToken); - } - } - catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound) - { - // A 404 means that the translation engine does not exist - logger.LogInformation($"Translation Engine {translationEngineId} does not exist."); - string? corporaId = projectSecret - .ServalData?.Corpora.FirstOrDefault(c => preTranslate ? c.Value.PreTranslate : !c.Value.PreTranslate) - .Key; - // Clear the existing translation engine id and corpora - await projectSecrets.UpdateAsync( - projectDoc.Id, - u => - { - if (preTranslate) - { - u.Unset(p => p.ServalData.PreTranslationEngineId); - } - else - { - u.Unset(p => p.ServalData.TranslationEngineId); - } - - if (!string.IsNullOrWhiteSpace(corporaId)) - { - u.Unset(p => p.ServalData.Corpora[corporaId]); - } - } - ); - - // Create the new translation engine id - translationEngineId = await CreateServalProjectAsync(projectDoc.Data, preTranslate, cancellationToken); - logger.LogInformation($"Created Translation Engine {translationEngineId}."); - } - - // Sync the corpus - if ((await SyncProjectCorporaAsync(curUserId, buildConfig, preTranslate, cancellationToken)) || preTranslate) - { - // If the corpus was updated (or this is a pre-translation engine), start the build - // We do not need the build ID for tracking as we use GetCurrentBuildAsync for that - - // Get the updated project secrets - projectSecret = await projectSecrets.GetAsync(buildConfig.ProjectId); - - // Get the appropriate translation engine - TranslationBuildConfig translationBuildConfig; - if (preTranslate) - { - translationEngineId = projectSecret.ServalData!.PreTranslationEngineId!; - - // Execute a complete pre-translation - translationBuildConfig = await GetTranslationBuildConfigAsync( - projectSecret.ServalData, - projectDoc.Data.TranslateConfig.DraftConfig, - buildConfig - ); - } - else - { - translationEngineId = projectSecret.ServalData!.TranslationEngineId!; - translationBuildConfig = new TranslationBuildConfig(); - } - - // Start the build - TranslationBuild translationBuild = await translationEnginesClient.StartBuildAsync( - translationEngineId, - translationBuildConfig, - cancellationToken - ); - - // Clear the queued status and job id - await projectSecrets.UpdateAsync( - buildConfig.ProjectId, - u => - { - if (preTranslate) - { - u.Unset(p => p.ServalData.PreTranslationJobId); - u.Unset(p => p.ServalData.PreTranslationQueuedAt); - } - else - { - u.Unset(p => p.ServalData.TranslationJobId); - u.Unset(p => p.ServalData.TranslationQueuedAt); - } - } - ); - - return translationBuild; - } - - // No build started - return null; - } - + /// + /// Executes , and traps any errors during execution. + /// + /// The current user identifier. + /// The build configuration. + /// If true use NMT; otherwise if false use SMT. + /// The cancellation token. + /// An asynchronous task. + /// + /// This cannot be run multiple times in different threads. + /// [Mutex] public async Task BuildProjectForBackgroundJobAsync( string curUserId, @@ -407,13 +157,16 @@ await projectSecrets.UpdateAsync( { // This will occur if the project is deleted while the job is running string message = - $"Build DataNotFoundException occurred for project {buildConfig.ProjectId} running in background job."; + $"Build DataNotFoundException occurred for project {buildConfig.ProjectId.Sanitize()}" + + " running in background job."; logger.LogWarning(e, message); } catch (Exception e) { // Log the error and report to bugsnag - string message = $"Build exception occurred for project {buildConfig.ProjectId} running in background job."; + string message = + $"Build exception occurred for project {buildConfig.ProjectId.Sanitize()}" + + " running in background job."; logger.LogError(e, message); exceptionHandler.ReportException(e); @@ -446,7 +199,9 @@ await projectSecrets.UpdateAsync( /// The output stream. /// The cancellation token. /// The name of the zip file, e.g. ABC.zip. - /// The project does not exist, is a resource, or could not be found on disk. + /// + /// The project does not exist, is a resource, or could not be found on disk. + /// public async Task GetProjectZipAsync( string sfProjectId, Stream outputStream, @@ -466,24 +221,8 @@ CancellationToken cancellationToken throw new DataNotFoundException("You cannot download a resource."); } - // Get the path to the Paratext directory - string path = Path.Combine(siteOptions.Value.SiteDir, "sync", project.ParatextId, "target"); - - // Ensure that the path exists - if (!fileSystemService.DirectoryExists(path)) - { - throw new DataNotFoundException($"The directory could not be found for {project.ParatextId}"); - } - // Create the zip file from the directory in memory - using var archive = new ZipArchive(outputStream, ZipArchiveMode.Create, true); - foreach (string filePath in fileSystemService.EnumerateFiles(path)) - { - await using Stream fileStream = fileSystemService.OpenFile(filePath, FileMode.Open); - ZipArchiveEntry entry = archive.CreateEntry(Path.GetFileName(filePath)); - await using Stream entryStream = entry.Open(); - await fileStream.CopyToAsync(entryStream, cancellationToken); - } + await CreateZipFileFromParatextDirectoryAsync(project.ParatextId, outputStream, cancellationToken); // Strip invalid characters from the file name string fileName = Path.GetInvalidFileNameChars() @@ -494,7 +233,7 @@ CancellationToken cancellationToken /// /// Gets the translation engine type string for Serval. /// - /// If true, then the translation engine is for pre-translation. + /// If true use NMT; otherwise if false use SMT. /// The translation engine type string for Serval. public async Task GetTranslationEngineTypeAsync(bool preTranslate) { @@ -507,8 +246,16 @@ public async Task GetTranslationEngineTypeAsync(bool preTranslate) }; } - public async Task RemoveProjectAsync( - string curUserId, + /// + /// Removes a project from Serval. + /// + /// The Scripture Forge project identifier. + /// If true use NMT; otherwise if false use SMT. + /// The cancellation token. + /// An asynchronous task. + /// The project secret cannot be found. + /// This can be mocked in unit tests. + public virtual async Task RemoveProjectAsync( string sfProjectId, bool preTranslate, CancellationToken cancellationToken @@ -521,485 +268,117 @@ CancellationToken cancellationToken } // Ensure we have a translation engine id - string translationEngineId = preTranslate - ? projectSecret.ServalData?.PreTranslationEngineId - : projectSecret.ServalData?.TranslationEngineId; + string translationEngineId = GetTranslationEngineId(projectSecret, preTranslate); if (string.IsNullOrWhiteSpace(translationEngineId)) { - logger.LogInformation($"No Translation Engine Id specified for project {sfProjectId}"); + logger.LogInformation($"No Translation Engine Id specified for project {sfProjectId.Sanitize()}"); return; } - // Remove the corpus files - foreach ( - (string corpusId, _) in projectSecret.ServalData.Corpora.Where(c => c.Value.PreTranslate == preTranslate) - ) + // Remove the legacy serval data + await RemoveLegacyServalDataAsync(sfProjectId, preTranslate, cancellationToken); + + // Build the list of files, corpora, and parallel corpora to remove + List fileIdsToRemove = []; + List corpusIdsToRemove = []; + if (preTranslate) { - foreach ( - string fileId in projectSecret - .ServalData.Corpora[corpusId] - .SourceFiles.Concat(projectSecret.ServalData.Corpora[corpusId].TargetFiles) - .Select(f => f.FileId) - ) + // Remove the additional training data + if (projectSecret.ServalData?.AdditionalTrainingData is not null) { - try - { - await dataFilesClient.DeleteAsync(fileId, cancellationToken); - } - catch (ServalApiException e) - { - // A 404 means that the file does not exist - string message; - if (e.StatusCode == StatusCodes.Status404NotFound) - { - message = - $"Corpora file {fileId} in corpus {corpusId} for project {sfProjectId}" - + " was missing or already deleted."; - logger.LogInformation(message); - } - else - { - message = - $"Ignored exception while deleting file {fileId} in corpus {corpusId}" - + $" for project {sfProjectId}."; - logger.LogError(e, message); - } - } + corpusIdsToRemove.Add(projectSecret.ServalData.AdditionalTrainingData.SourceCorpusId); + corpusIdsToRemove.Add(projectSecret.ServalData.AdditionalTrainingData.TargetCorpusId); + fileIdsToRemove.AddRange( + projectSecret.ServalData.AdditionalTrainingData.CorpusFiles.Select(f => f.FileId) + ); } - // Delete the corpus - try - { - await translationEnginesClient.DeleteCorpusAsync(translationEngineId, corpusId, cancellationToken); - } - catch (ServalApiException e) + // If there is no SMT training engine, remove all files and corpora + if ( + projectSecret.ServalData is not null + && string.IsNullOrWhiteSpace(projectSecret.ServalData.TranslationEngineId) + ) { - // A 404 means that the translation engine does not exist - string message; - if (e.StatusCode == StatusCodes.Status404NotFound) - { - message = - $"Translation Engine {translationEngineId} for project {sfProjectId}" - + " was missing or already deleted."; - logger.LogInformation(message); - } - else - { - message = - $"Ignored exception while deleting translation engine {translationEngineId}" - + $" for project {sfProjectId}."; - logger.LogError(e, message); - } + corpusIdsToRemove.AddRange(projectSecret.ServalData.CorpusFiles.Select(f => f.CorpusId)); + fileIdsToRemove.AddRange(projectSecret.ServalData.CorpusFiles.Select(f => f.FileId)); } - - // Remove our record of the corpus - await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.Corpora[corpusId])); - } - - // Remove the project from Serval - await translationEnginesClient.DeleteAsync(translationEngineId, cancellationToken); - - // Remove the Serval Data - if (preTranslate) - { - await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.PreTranslationEngineId)); - } - else - { - await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.TranslationEngineId)); - } - } - - /// - /// Syncs the project corpora from the file system to Serval. - /// - /// The current user identifier. - /// The build configuration. - /// The project is for pre-translation. - /// The cancellation token. - /// true if the project corpora and its files were updated; otherwise, false. - /// The project does not exist. - /// - /// Notes: - /// - If the corpus was updated, then you should start the Build with . - /// - If a corpus is not configured on Serval, one is created and recorded in the project secret. - /// - Any corpus files without project ids will be deleted and recreated with project ids. - /// - public async Task SyncProjectCorporaAsync( - string curUserId, - BuildConfig buildConfig, - bool preTranslate, - CancellationToken cancellationToken - ) - { - // Used to return whether the corpus was updated - bool corpusUpdated = false; - - // Load the project from the realtime service - Attempt attempt = await realtimeService.TryGetSnapshotAsync(buildConfig.ProjectId); - if (!attempt.TryResult(out SFProject project)) - { - throw new DataNotFoundException("The project does not exist."); - } - - // Ensure we have a source - if (project.TranslateConfig.Source is null) - { - throw new DataNotFoundException("The project source is not specified."); - } - - // Load the project secrets, so we can get the corpus files - if (!(await projectSecrets.TryGetAsync(project.Id)).TryResult(out SFProjectSecret projectSecret)) - { - throw new DataNotFoundException("The project secret cannot be found."); - } - - // Ensure we have serval data - if (projectSecret.ServalData is null) - { - throw new DataNotFoundException("The Serval data cannot be found."); - } - - // Ensure we have a translation engine ID - string translationEngineId = preTranslate - ? projectSecret.ServalData?.PreTranslationEngineId - : projectSecret.ServalData?.TranslationEngineId; - if (string.IsNullOrWhiteSpace(translationEngineId)) - { - throw new DataNotFoundException("The translation engine ID cannot be found."); } - - // See if there is a translation corpus - string? corpusId = projectSecret - .ServalData.Corpora.FirstOrDefault(c => - c.Value.PreTranslate == preTranslate && !c.Value.AlternateTrainingSource - ) - .Key; - - // See if there is an alternate source to use for drafting - bool useAlternateSource = - project.TranslateConfig.DraftConfig.AlternateSourceEnabled - && project.TranslateConfig.DraftConfig.AlternateSource is not null - && preTranslate; - - // See if there is an alternate training source corpus - bool useAlternateTrainingSource = - project.TranslateConfig.DraftConfig.AlternateTrainingSourceEnabled - && project.TranslateConfig.DraftConfig.AlternateTrainingSource is not null - && preTranslate; - - // See if there is an additional training source - bool useAdditionalTrainingSource = - project.TranslateConfig.DraftConfig.AdditionalTrainingSourceEnabled - && project.TranslateConfig.DraftConfig.AdditionalTrainingSource is not null - && preTranslate; - - // Get the alternate training source corpus id, if present - string? alternateTrainingSourceCorpusId = projectSecret - .ServalData.Corpora.FirstOrDefault(c => c.Value.PreTranslate && c.Value.AlternateTrainingSource) - .Key; - - // If we are to use the alternate source, only use it for drafting - bool useSourceAsAlternateTrainingSource = false; - string sourceProjectId = project.TranslateConfig.Source.ProjectRef; - string sourceParatextId = project.TranslateConfig.Source.ParatextId; - if (useAlternateSource) - { - sourceProjectId = project.TranslateConfig.DraftConfig.AlternateSource.ProjectRef; - sourceParatextId = project.TranslateConfig.DraftConfig.AlternateSource.ParatextId; - - // If we do not have an alternate training source, use the reference source for training - useSourceAsAlternateTrainingSource = !useAlternateTrainingSource; - } - - // Get the files we have already synced - List oldSourceCorpusFiles = []; - List oldTargetCorpusFiles = []; - List newTargetCorpusFiles = []; - List newSourceCorpusFiles = []; - if (!string.IsNullOrWhiteSpace(corpusId)) - { - oldSourceCorpusFiles = projectSecret.ServalData.Corpora[corpusId].SourceFiles; - oldTargetCorpusFiles = projectSecret.ServalData.Corpora[corpusId].TargetFiles; - } - - // Upload the translation source - corpusUpdated |= await UploadNewCorpusFilesAsync( - targetProjectId: project.Id, - sourceProjectId, - paratextId: sourceParatextId, - uploadParatextZipFile: true, - texts: [], - oldSourceCorpusFiles, - newSourceCorpusFiles, - cancellationToken - ); - - // Upload the translation target - corpusUpdated |= await UploadNewCorpusFilesAsync( - targetProjectId: project.Id, - sourceProjectId: project.Id, - project.ParatextId, - uploadParatextZipFile: true, - texts: [], - oldTargetCorpusFiles, - newTargetCorpusFiles, - cancellationToken - ); - - // Update the translation corpus - corpusUpdated |= await UpdateCorpusConfigAsync( - project, - translationEngineId, - corpusId, - preTranslate, - additionalTrainingData: false, - useAlternateTrainingSource: false, - uploadParatextZipFile: true, - corpusUpdated, - newSourceCorpusFiles, - newTargetCorpusFiles, - cancellationToken - ); - - // Get the files we have already synced for the alternate training source - List oldAlternateTrainingSourceCorpusFiles = []; - List newAlternateTrainingSourceCorpusFiles = []; - if (!string.IsNullOrWhiteSpace(alternateTrainingSourceCorpusId)) + else if ( + projectSecret.ServalData is not null + && string.IsNullOrWhiteSpace(projectSecret.ServalData.PreTranslationEngineId) + ) { - oldAlternateTrainingSourceCorpusFiles = projectSecret - .ServalData - .Corpora[alternateTrainingSourceCorpusId] - .SourceFiles; + // If there is no NMT training engine, remove all files and corpora + corpusIdsToRemove.AddRange(projectSecret.ServalData.CorpusFiles.Select(f => f.CorpusId)); + fileIdsToRemove.AddRange(projectSecret.ServalData.CorpusFiles.Select(f => f.FileId)); } - // Upload the training corpus, or remove it if no longer used - if (useAlternateTrainingSource || useSourceAsAlternateTrainingSource || useAdditionalTrainingSource) + // Remove the specified corpora + foreach (string corpusId in corpusIdsToRemove.Where(s => !string.IsNullOrWhiteSpace(s))) { - // Determine which project to use for training - string paratextId = useAlternateTrainingSource - ? project.TranslateConfig.DraftConfig.AlternateTrainingSource.ParatextId - : project.TranslateConfig.Source.ParatextId; - string projectId = useAlternateTrainingSource - ? project.TranslateConfig.DraftConfig.AlternateTrainingSource.ProjectRef - : project.TranslateConfig.Source.ProjectRef; - - // Upload the training corpus - corpusUpdated |= await UploadNewCorpusFilesAsync( - targetProjectId: project.Id, - sourceProjectId: projectId, - paratextId, - uploadParatextZipFile: true, - texts: [], - oldAlternateTrainingSourceCorpusFiles, - newAlternateTrainingSourceCorpusFiles, - cancellationToken - ); - - // Upload the additional training source - if (useAdditionalTrainingSource) + try { - corpusUpdated |= await UploadNewCorpusFilesAsync( - targetProjectId: project.Id, - sourceProjectId: project.TranslateConfig.DraftConfig.AdditionalTrainingSource.ProjectRef, - paratextId: project.TranslateConfig.DraftConfig.AdditionalTrainingSource.ParatextId, - uploadParatextZipFile: true, - texts: [], - oldAlternateTrainingSourceCorpusFiles, - newAlternateTrainingSourceCorpusFiles, - cancellationToken - ); + await corporaClient.DeleteAsync(corpusId, cancellationToken); + } + catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound) + { + // If the file was already deleted, just log a message + string message = + $"Corpus {corpusId.Sanitize()} in project {sfProjectId.Sanitize()}" + + " was missing or already deleted."; + logger.LogInformation(e, message); } - - // Update the training corpus - corpusUpdated |= await UpdateCorpusConfigAsync( - project, - translationEngineId, - corpusId: alternateTrainingSourceCorpusId, - preTranslate: true, - additionalTrainingData: false, - useAlternateTrainingSource: true, - uploadParatextZipFile: true, - corpusUpdated, - sourceCorpusFiles: newAlternateTrainingSourceCorpusFiles, - targetCorpusFiles: newAlternateTrainingSourceCorpusFiles.Count > 0 ? newTargetCorpusFiles : [], - cancellationToken - ); } - else if (preTranslate && !string.IsNullOrWhiteSpace(alternateTrainingSourceCorpusId)) - { - // If there is an existing alternate training source, remove it - // Remove the corpus from Serval + // Remove the specified files + foreach (string fileId in fileIdsToRemove.Where(s => !string.IsNullOrWhiteSpace(s))) + { try { - await translationEnginesClient.DeleteCorpusAsync( - translationEngineId, - alternateTrainingSourceCorpusId, - cancellationToken - ); + await dataFilesClient.DeleteAsync(fileId, cancellationToken); } catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound) { // If the file was already deleted, just log a message string message = - $"Corpus {alternateTrainingSourceCorpusId} in project {buildConfig.ProjectId}" + $"File {fileId.Sanitize()} in project {sfProjectId.Sanitize()}" + " was missing or already deleted."; logger.LogInformation(e, message); } - - // Remove the files from Serval - foreach (ServalCorpusFile corpusFile in oldAlternateTrainingSourceCorpusFiles) - { - try - { - await dataFilesClient.DeleteAsync(corpusFile.FileId, cancellationToken); - } - catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound) - { - // If the file was already deleted, just log a message - string message = - $"Corpora file {corpusFile.FileId} for text {corpusFile.TextId} in project {buildConfig.ProjectId}" - + " was missing or already deleted."; - logger.LogInformation(e, message); - } - } - - // Remove the reference to the corpus from the project secret - await projectSecrets.UpdateAsync( - project.Id, - u => u.Unset(p => p.ServalData.Corpora[alternateTrainingSourceCorpusId]) - ); } - // See if we have an additional training data - if (preTranslate) + // Remove the project from Serval + try { - // Get the training data corpus id - string trainingDataCorpusId = projectSecret - .ServalData.Corpora.FirstOrDefault(c => c.Value.PreTranslate && c.Value.AdditionalTrainingData) - .Key; - - // If there are training data files, or they were removed (i.e. we have a corpus record for it) - if (buildConfig.TrainingDataFiles.Count > 0 || !string.IsNullOrWhiteSpace(trainingDataCorpusId)) - { - // Set up the collections required to upload the corpus data files - List newTrainingDataSourceTexts = []; - List newTrainingDataTargetTexts = []; - List newTrainingDataSourceCorpusFiles = []; - List newTrainingDataTargetCorpusFiles = []; - List oldTrainingDataSourceCorpusFiles = []; - List oldTrainingDataTargetCorpusFiles = []; - - // Get the training data texts - await trainingDataService.GetTextsAsync( - curUserId, - buildConfig.ProjectId, - buildConfig.TrainingDataFiles, - newTrainingDataSourceTexts, - newTrainingDataTargetTexts - ); - - // Get the training data files we have already synced - if (!string.IsNullOrWhiteSpace(trainingDataCorpusId)) - { - oldTrainingDataSourceCorpusFiles = projectSecret - .ServalData - .Corpora[trainingDataCorpusId] - .SourceFiles; - oldTrainingDataTargetCorpusFiles = projectSecret - .ServalData - .Corpora[trainingDataCorpusId] - .TargetFiles; - } - - // Upload the source files for the training data - corpusUpdated |= await UploadNewCorpusFilesAsync( - targetProjectId: project.Id, - sourceProjectId: project.Id, - project.ParatextId, - uploadParatextZipFile: false, - newTrainingDataSourceTexts, - oldTrainingDataSourceCorpusFiles, - newTrainingDataSourceCorpusFiles, - cancellationToken - ); - - // Upload the target files for the training data - corpusUpdated |= await UploadNewCorpusFilesAsync( - targetProjectId: project.Id, - sourceProjectId: project.Id, - project.ParatextId, - uploadParatextZipFile: false, - newTrainingDataTargetTexts, - oldTrainingDataTargetCorpusFiles, - newTrainingDataTargetCorpusFiles, - cancellationToken - ); - - // Update the training data corpus - corpusUpdated |= await UpdateCorpusConfigAsync( - project, - translationEngineId, - corpusId: trainingDataCorpusId, - preTranslate: true, - additionalTrainingData: true, - useAlternateTrainingSource: false, - uploadParatextZipFile: false, - corpusUpdated, - sourceCorpusFiles: newTrainingDataSourceCorpusFiles, - targetCorpusFiles: newTrainingDataTargetCorpusFiles, - cancellationToken - ); - } + await translationEnginesClient.DeleteAsync(translationEngineId, cancellationToken); } - - return corpusUpdated; - } - - /// - /// Determines whether a translation engine exists for the specified project. - /// - /// The Scripture Forge project identifier. - /// The Serval translation engine identifier. - /// The Serval translation engine identifier. - /// The cancellation token. - /// - public async Task TranslationEngineExistsAsync( - string projectId, - string? translationEngineId, - bool preTranslate, - CancellationToken cancellationToken - ) - { - if (string.IsNullOrWhiteSpace(translationEngineId)) + catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound) { - return false; + // If the file was already deleted, just log a message + string message = + $"Translation Engine {translationEngineId.Sanitize()} in project {sfProjectId.Sanitize()}" + + " was missing or already deleted."; + logger.LogInformation(e, message); } - try + // Remove the translation engine identifier + if (preTranslate) { - TranslationEngine translationEngine = await translationEnginesClient.GetAsync( - translationEngineId, - cancellationToken - ); - string type = await GetTranslationEngineTypeAsync(preTranslate); - - // We check for the type, taking account of Pascal Case (Serval 1.1) and Kebab Case (Serval 1.2) - return translationEngine.Name == projectId - && string.Equals( - translationEngine.Type.Replace("-", string.Empty), - type.Replace("-", string.Empty), - StringComparison.InvariantCultureIgnoreCase - ); + await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.PreTranslationEngineId)); } - catch (ServalApiException e) - when (e.StatusCode is StatusCodes.Status403Forbidden or StatusCodes.Status404NotFound) + else { - return false; + await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.TranslationEngineId)); } } + /// + /// Updates the language configuration for the additional and alternate sources. + /// + /// The current user identifier. + /// The Scripture Forge project identifier. + /// + /// The project or user secret does not exist. [Mutex] public async Task UpdateTranslationSourcesAsync(string curUserId, string sfProjectId) { @@ -1017,7 +396,7 @@ public async Task UpdateTranslationSourcesAsync(string curUserId, string sfProje throw new DataNotFoundException("The project does not exist."); } - // If there is an alternate source, ensure that writing system and RTL is correct + // If there is an alternate source, ensure that name, writing system and RTL is correct if (projectDoc.Data.TranslateConfig.DraftConfig.AlternateSource is not null) { ParatextSettings? alternateSourceSettings = paratextService.GetParatextSettings( @@ -1033,15 +412,24 @@ await projectDoc.SubmitJson0OpAsync(op => alternateSourceSettings.IsRightToLeft ); if (alternateSourceSettings.LanguageTag is not null) + { op.Set( pd => pd.TranslateConfig.DraftConfig.AlternateSource.WritingSystem.Tag, alternateSourceSettings.LanguageTag ); + } + if (alternateSourceSettings.FullName is not null) + { + op.Set( + pd => pd.TranslateConfig.DraftConfig.AlternateSource.Name, + alternateSourceSettings.FullName + ); + } }); } } - // If there is an alternate training source, ensure that writing system and RTL is correct + // If there is an alternate training source, ensure that name, writing system and RTL is correct if (projectDoc.Data.TranslateConfig.DraftConfig.AlternateTrainingSource is not null) { ParatextSettings? alternateSourceSettings = paratextService.GetParatextSettings( @@ -1057,15 +445,24 @@ await projectDoc.SubmitJson0OpAsync(op => alternateSourceSettings.IsRightToLeft ); if (alternateSourceSettings.LanguageTag is not null) + { op.Set( pd => pd.TranslateConfig.DraftConfig.AlternateTrainingSource.WritingSystem.Tag, alternateSourceSettings.LanguageTag ); + } + if (alternateSourceSettings.FullName is not null) + { + op.Set( + pd => pd.TranslateConfig.DraftConfig.AlternateTrainingSource.Name, + alternateSourceSettings.FullName + ); + } }); } } - // If there is an additional training source, ensure that writing system and RTL is correct + // If there is an additional training source, ensure that name, writing system and RTL is correct if (projectDoc.Data.TranslateConfig.DraftConfig.AdditionalTrainingSource is not null) { ParatextSettings? additionalTrainingSourceSettings = paratextService.GetParatextSettings( @@ -1081,121 +478,103 @@ await projectDoc.SubmitJson0OpAsync(op => additionalTrainingSourceSettings.IsRightToLeft ); if (additionalTrainingSourceSettings.LanguageTag is not null) + { op.Set( pd => pd.TranslateConfig.DraftConfig.AdditionalTrainingSource.WritingSystem.Tag, additionalTrainingSourceSettings.LanguageTag ); + } + if (additionalTrainingSourceSettings.FullName is not null) + { + op.Set( + pd => pd.TranslateConfig.DraftConfig.AdditionalTrainingSource.Name, + additionalTrainingSourceSettings.FullName + ); + } }); } } } /// - /// Gets the source language for the project. + /// Builds a project on Serval, including syncing and any required setup. /// - /// The project. - /// If true, use the alternate training source. - /// The source language. - /// - private static string GetSourceLanguage(SFProject? project, bool useAlternateTrainingSource) + /// The current user identifier. + /// The build configuration. + /// If true use NMT; otherwise if false use SMT. + /// The cancellation token. + /// An asynchronous task. + /// The project or project secret could not be found. + /// This can be mocked in unit tests. + protected internal virtual async Task BuildProjectAsync( + string curUserId, + BuildConfig buildConfig, + bool preTranslate, + CancellationToken cancellationToken + ) { - // This error can occur if the project is deleted while the build is running - if (project is null) + // Load the target project secrets, so we can get the translation engine ID + if (!(await projectSecrets.TryGetAsync(buildConfig.ProjectId)).TryResult(out SFProjectSecret projectSecret)) { - throw new DataNotFoundException("The project does not exist."); + throw new DataNotFoundException("The project secret cannot be found."); } - // This error can occur if the project source is cleared while the build is running - if (project.TranslateConfig.Source is null) - { - throw new DataNotFoundException("The project source is not specified."); - } - - if (useAlternateTrainingSource) + // Load the project from the realtime service + await using IConnection conn = await realtimeService.ConnectAsync(curUserId); + IDocument projectDoc = await conn.FetchAsync(buildConfig.ProjectId); + if (!projectDoc.IsLoaded) { - return project.TranslateConfig.DraftConfig.AlternateTrainingSource?.WritingSystem.Tag - ?? project.TranslateConfig.Source?.WritingSystem.Tag - ?? project.TranslateConfig.DraftConfig.AlternateSource?.WritingSystem.Tag - ?? throw new ArgumentNullException(nameof(project)); + throw new DataNotFoundException("The project does not exist."); } - string alternateSourceLanguage = project.TranslateConfig.DraftConfig.AlternateSource?.WritingSystem.Tag; - bool useAlternateSourceLanguage = - project.TranslateConfig.DraftConfig.AlternateSourceEnabled - && !string.IsNullOrWhiteSpace(alternateSourceLanguage); - return useAlternateSourceLanguage - ? alternateSourceLanguage - : project.TranslateConfig.Source?.WritingSystem.Tag ?? throw new ArgumentNullException(nameof(project)); - } - - /// - /// Gets the segments from the text with Unix/Linux line endings. - /// - /// The . - /// The text file data to be uploaded to Serval. - private static string GetTextFileData(ISFText text) - { - var sb = new StringBuilder(); + // Remove the legacy serval data, if present + await RemoveLegacyServalDataAsync(buildConfig.ProjectId, preTranslate, cancellationToken); - // For pre-translation, we must upload empty lines with segment refs for the correct references to be returned - foreach (SFTextSegment segment in text.Segments.Where(s => !s.IsEmpty)) - { - sb.Append(segment.SegmentRef); - sb.Append('\t'); - sb.Append(segment.SegmentText); - sb.Append('\t'); - if (segment.IsSentenceStart) - { - sb.Append("ss,"); - } + // Ensure we have a translation engine id or a pre-translation engine id, and that it exists + string translationEngineId = await EnsureTranslationEngineExistsAsync( + curUserId, + projectDoc, + projectSecret, + preTranslate, + cancellationToken + ); - if (segment.IsInRange) - { - sb.Append("ir,"); - } + // Recreate the translation engine if it is missing, or the language has changed + await RecreateTranslationEngineIfRequiredAsync( + translationEngineId, + projectDoc.Data, + preTranslate, + cancellationToken + ); - if (segment.IsRangeStart) - { - sb.Append("rs,"); - } + // Perform the file and corpora sync with Serval + IList corporaSyncInfo = await SyncProjectCorporaAsync( + curUserId, + buildConfig, + preTranslate, + cancellationToken + ); - // Strip the last comma, or the tab if there are no flags - sb.Length--; + // Get the updated project secret + projectSecret = await projectSecrets.GetAsync(buildConfig.ProjectId); - // Append the Unix EOL to ensure consistency as this text data is uploaded to Serval - sb.Append('\n'); + // Ensure we have the ServalData + if (projectSecret.ServalData is null) + { + throw new DataNotFoundException("The project secret does not contain Serval data."); } - return sb.ToString(); - } - - /// - /// Gets the TranslationBuildConfig for the specified ServalData object. - /// - /// The Serval data from . - /// - /// The Draft configuration from .. - /// - /// The build configuration from the user, specified on the front end. - /// The TranslationBuildConfig for a Pre-Translate build. - /// Do not use with SMT builds. - private async Task GetTranslationBuildConfigAsync( - ServalData servalData, - DraftConfig draftConfig, - BuildConfig buildConfig - ) - { - JObject? servalConfig = null; - if (draftConfig.ServalConfig is not null) + // Set up the Serval Configuration + string? servalConfig = null; + if (projectDoc.Data.TranslateConfig.DraftConfig.ServalConfig is not null) { // Load the Serval Config from the Draft Config - servalConfig = JObject.Parse(draftConfig.ServalConfig); + servalConfig = projectDoc.Data.TranslateConfig.DraftConfig.ServalConfig; } else if (await featureManager.IsEnabledAsync(FeatureFlags.UpdatedLearningRateForServal)) { // Specify the updated learning rate - servalConfig = JObject.Parse( - """ + servalConfig = """ { "train_params": { @@ -1205,149 +584,123 @@ BuildConfig buildConfig "max_steps": 5000 } } - """ - ); + """; } - // If Fast Training is enabled, override the max_steps - if (buildConfig.FastTraining) + // Get the appropriate translation engine + TranslationBuildConfig translationBuildConfig; + if (preTranslate) { - // Ensure that there is a servalConfig JSON object - servalConfig ??= new JObject(); + translationEngineId = projectSecret.ServalData.PreTranslationEngineId!; - // 20 is the number of steps used on Serval QA by default - servalConfig["max_steps"] = 20; + // Execute a complete pre-translation + translationBuildConfig = GetTranslationBuildConfig( + projectSecret.ServalData, + servalConfig, + buildConfig, + corporaSyncInfo + ); } - - // See if there is an alternate training source or alternate drafting source corpus - bool useAlternateTrainingCorpus = - (draftConfig.AlternateTrainingSourceEnabled && draftConfig.AlternateTrainingSource is not null) - || draftConfig.AlternateSourceEnabled && draftConfig.AlternateSource is not null; - - // Set up the pre-translation and training corpora - List preTranslate = []; - List? trainOn = null; - - // Add the pre-translation books - foreach ( - KeyValuePair corpus in servalData.Corpora.Where(s => - s.Value.PreTranslate && !s.Value.AlternateTrainingSource && !s.Value.AdditionalTrainingData - ) - ) + else { - var preTranslateCorpusConfig = new PretranslateCorpusConfig { CorpusId = corpus.Key }; + translationEngineId = projectSecret.ServalData.TranslationEngineId!; + translationBuildConfig = new TranslationBuildConfig(); + } - // If this is a Paratext zip file corpus - if (corpus.Value.UploadParatextZipFile) - { - // Since all books are uploaded via the zip file, we need to specify the target books to translate - preTranslateCorpusConfig.ScriptureRange = !string.IsNullOrWhiteSpace( - buildConfig.TranslationScriptureRange - ) - ? buildConfig.TranslationScriptureRange - : string.Join(';', buildConfig.TranslationBooks.Select(Canon.BookNumberToId)); + // Start the build + await translationEnginesClient.StartBuildAsync(translationEngineId, translationBuildConfig, cancellationToken); - // Ensure that the pre-translate scripture range is null if it is blank - if (string.IsNullOrWhiteSpace(preTranslateCorpusConfig.ScriptureRange)) + // Clear the queued status and job id + await projectSecrets.UpdateAsync( + buildConfig.ProjectId, + u => + { + if (preTranslate) { - preTranslateCorpusConfig.ScriptureRange = null; + u.Unset(p => p.ServalData.PreTranslationJobId); + u.Unset(p => p.ServalData.PreTranslationQueuedAt); } - - if (!useAlternateTrainingCorpus) + else { - string? scriptureRange = !string.IsNullOrWhiteSpace(buildConfig.TrainingScriptureRange) - ? buildConfig.TrainingScriptureRange - : string.Join(';', buildConfig.TrainingBooks.Select(Canon.BookNumberToId)); - string[]? textIds = null; - - // Ensure that the trainOn scripture range is null if it is blank, - // and that the textIds array is empty so no books are trained on. - if (string.IsNullOrWhiteSpace(scriptureRange)) - { - scriptureRange = null; - textIds = []; - } - - // As we do not have an alternate train on source specified, use the source texts to train on - trainOn ??= []; - trainOn.Add( - new TrainingCorpusConfig - { - CorpusId = corpus.Key, - ScriptureRange = scriptureRange, - TextIds = textIds - } - ); + u.Unset(p => p.ServalData.TranslationJobId); + u.Unset(p => p.ServalData.TranslationQueuedAt); } } + ); + } - preTranslate.Add(preTranslateCorpusConfig); - } - - // Add the alternate training corpus, if enabled - // This will be the reference source if we are using an alternate drafting source - if (useAlternateTrainingCorpus) + /// + /// Creates or Updates a Parallel Corpus on Serval. + /// + /// The translation engine identifier. + /// + /// The parallel corpus to be updated. If null or empty, a new parallel corpus will be created. + /// + /// + /// The name of the parallel corpus. This will only be used if the parallel corpus is being created. + /// + /// The source corpus identifiers. + /// The target corpus identifiers. + /// The cancellation token. + /// + /// The new or updated parallel corpus identifier. If is not null, + /// this will be the same value as . If + /// is null, this will be the identifier of the new parallel corpus. + /// + /// This can be mocked in unit tests. + protected internal virtual async Task CreateOrUpdateParallelCorpusAsync( + string translationEngineId, + string? parallelCorpusId, + string? name, + IList sourceCorpusIds, + IList targetCorpusIds, + CancellationToken cancellationToken + ) + { + if (string.IsNullOrWhiteSpace(parallelCorpusId)) { - trainOn = []; - foreach ( - KeyValuePair corpus in servalData.Corpora.Where(s => - s.Value.PreTranslate && s.Value.AlternateTrainingSource - ) - ) - { - var trainingCorpusConfig = new TrainingCorpusConfig { CorpusId = corpus.Key }; - if (corpus.Value.UploadParatextZipFile) + // Create a new parallel corpus + TranslationParallelCorpus parallelCorpus = await translationEnginesClient.AddParallelCorpusAsync( + translationEngineId, + new TranslationParallelCorpusConfig { - // As all books are uploaded via the zip file, specify the source books to train on - trainingCorpusConfig.ScriptureRange = !string.IsNullOrWhiteSpace(buildConfig.TrainingScriptureRange) - ? buildConfig.TrainingScriptureRange - : string.Join(';', buildConfig.TrainingBooks.Select(Canon.BookNumberToId)); - - // Ensure that the alternate training corpus scripture range is null if it is blank, - // and that the textIds array is empty so no books are trained on. - if (string.IsNullOrWhiteSpace(trainingCorpusConfig.ScriptureRange)) - { - trainingCorpusConfig.ScriptureRange = null; - trainingCorpusConfig.TextIds = []; - } - } - - trainOn.Add(trainingCorpusConfig); - } + Name = name, + SourceCorpusIds = sourceCorpusIds, + TargetCorpusIds = targetCorpusIds, + }, + cancellationToken + ); + parallelCorpusId = parallelCorpus.Id; } - - var translationBuildConfig = new TranslationBuildConfig - { - Options = servalConfig, - Pretranslate = preTranslate, - TrainOn = trainOn, - }; - - // If we have an alternate training source, we need to add the additional files - // If not, Serval will use the additional files corpus automatically, so we do not need to do anything - if (buildConfig.TrainingDataFiles.Count > 0 && useAlternateTrainingCorpus) + else { - // Include the additional training data with the alternate training corpora - translationBuildConfig.TrainOn.AddRange( - servalData - .Corpora.Where(s => s.Value.PreTranslate && s.Value.AdditionalTrainingData) - .Select(c => new TrainingCorpusConfig { CorpusId = c.Key }) - .ToList() + // Update the specified parallel corpus + await translationEnginesClient.UpdateParallelCorpusAsync( + translationEngineId, + parallelCorpusId, + new TranslationParallelCorpusUpdateConfig + { + SourceCorpusIds = sourceCorpusIds, + TargetCorpusIds = targetCorpusIds, + }, + cancellationToken ); } - return translationBuildConfig; + return parallelCorpusId; } /// - /// Creates a project in Serval. + /// Creates the translation engine for a project in Serval, + /// and updates the project secret with the translation engine identifier. /// /// The Scripture Forge project - /// The project is for pre-translation. + /// If true use NMT; otherwise if false use SMT. /// The cancellation token. /// The translation engine id. /// The translation engine could not be created. - private async Task CreateServalProjectAsync( + /// This can be mocked in unit tests. + protected internal virtual async Task CreateServalProjectAsync( SFProject sfProject, bool preTranslate, CancellationToken cancellationToken @@ -1355,15 +708,13 @@ CancellationToken cancellationToken { // Get the existing project secret, so we can see how to create the engine and update the Serval data SFProjectSecret projectSecret = await projectSecrets.GetAsync(sfProject.Id); - string translationEngineId = preTranslate - ? projectSecret.ServalData?.PreTranslationEngineId - : projectSecret.ServalData?.TranslationEngineId; + string translationEngineId = GetTranslationEngineId(projectSecret, preTranslate); if (string.IsNullOrWhiteSpace(translationEngineId)) { TranslationEngineConfig engineConfig = new TranslationEngineConfig { Name = sfProject.Id, - SourceLanguage = GetSourceLanguage(sfProject, useAlternateTrainingSource: false), + SourceLanguage = GetSourceLanguage(sfProject), TargetLanguage = await GetTargetLanguageAsync(sfProject), Type = await GetTranslationEngineTypeAsync(preTranslate), }; @@ -1386,7 +737,7 @@ CancellationToken cancellationToken // Store the Pre-Translation Engine ID await projectSecrets.UpdateAsync( sfProject.Id, - u => u.Set(p => p.ServalData.PreTranslationEngineId, translationEngine.Id) + u => u.Set(p => p.ServalData.PreTranslationEngineId, translationEngineId) ); } else if (projectSecret.ServalData is not null) @@ -1394,7 +745,7 @@ await projectSecrets.UpdateAsync( // Store the Translation Engine ID await projectSecrets.UpdateAsync( sfProject.Id, - u => u.Set(p => p.ServalData.TranslationEngineId, translationEngine.Id) + u => u.Set(p => p.ServalData.TranslationEngineId, translationEngineId) ); } else if (preTranslate) @@ -1402,7 +753,11 @@ await projectSecrets.UpdateAsync( // Store the Pre-Translation Engine ID await projectSecrets.UpdateAsync( sfProject.Id, - u => u.Set(p => p.ServalData, new ServalData { PreTranslationEngineId = translationEngine.Id }) + u => + u.Set( + p => p.ServalData, + new ServalData { PreTranslationEngineId = translationEngineId, CorpusFiles = [] } + ) ); } else @@ -1410,7 +765,11 @@ await projectSecrets.UpdateAsync( // Store the Translation Engine ID await projectSecrets.UpdateAsync( sfProject.Id, - u => u.Set(p => p.ServalData, new ServalData { TranslationEngineId = translationEngine.Id }) + u => + u.Set( + p => p.ServalData, + new ServalData { TranslationEngineId = translationEngineId, CorpusFiles = [] } + ) ); } } @@ -1418,393 +777,1280 @@ await projectSecrets.UpdateAsync( return translationEngineId; } - private async Task UploadFileAsync( - string textId, - string projectId, - string textFileData, - FileFormat fileFormat, - ICollection? oldCorpusFiles, - ICollection newCorpusFiles, - CancellationToken cancellationToken - ) - { - byte[] buffer = Encoding.UTF8.GetBytes(textFileData); - await using Stream stream = new MemoryStream(buffer, false); - return await UploadFileAsync( - textId, - projectId, - stream, - fileFormat, - oldCorpusFiles, - newCorpusFiles, - cancellationToken - ); - } - - private async Task UploadFileAsync( - string textId, - string projectId, - Stream stream, - FileFormat fileFormat, - ICollection? oldCorpusFiles, - ICollection newCorpusFiles, + /// + /// Creates a zip file from the contents of a directory. + /// + /// The Paratext identifier for the project. + /// The output stream. + /// The cancellation token. + /// An asynchronous task. + /// This can be mocked in unit tests. + protected internal virtual async Task CreateZipFileFromParatextDirectoryAsync( + string paratextId, + Stream outputStream, CancellationToken cancellationToken ) { - // See if the corpus exists and update it if it is missing, or if the checksum has changed - bool uploadText = false; - - // Reset the stream to the start - stream.Seek(0, SeekOrigin.Begin); - - // Calculate the checksum from the stream - using MD5 md5 = MD5.Create(); - StringBuilder sb = new StringBuilder(); - foreach (var hashByte in await md5.ComputeHashAsync(stream, cancellationToken)) - { - sb.Append(hashByte.ToString("X2").ToLower()); - } + // Get the path to the Paratext directory + string path = Path.Combine(siteOptions.Value.SiteDir, "sync", paratextId, "target"); - // Upload the file if it is not there or has changed - string checksum = sb.ToString(); - ServalCorpusFile? previousCorpusFile = oldCorpusFiles?.FirstOrDefault(c => - c.TextId == textId && c.ProjectId == projectId - ); - if (previousCorpusFile is null || previousCorpusFile.FileChecksum != checksum) + // Ensure that the path exists + if (!fileSystemService.DirectoryExists(path)) { - uploadText = true; + throw new DataNotFoundException($"The directory could not be found for {paratextId}"); } - // No update, so do not upload - if (!uploadText) + using var archive = new ZipArchive(outputStream, ZipArchiveMode.Create, leaveOpen: true); + foreach (string filePath in fileSystemService.EnumerateFiles(path)) { - newCorpusFiles.Add(previousCorpusFile); - return false; + await using Stream fileStream = fileSystemService.OpenFile(filePath, FileMode.Open); + ZipArchiveEntry entry = archive.CreateEntry(Path.GetFileName(filePath)); + await using Stream entryStream = entry.Open(); + await fileStream.CopyToAsync(entryStream, cancellationToken); } + } - // Reset the stream to the start - stream.Seek(0, SeekOrigin.Begin); - - // Upload the file - DataFile dataFile; - if (previousCorpusFile is null) - { - dataFile = await dataFilesClient.CreateAsync( - new FileParameter(stream), - fileFormat, - textId, - cancellationToken - ); - } - else + /// + /// Deletes all corpora and files for the specified collection. + /// + /// The Serval Corpus Files. + /// The project identifier + /// The cancellation token. + /// This can be mocked in unit tests. + protected internal virtual async Task DeleteAllCorporaAndFilesAsync( + IEnumerable servalCorpusFiles, + string projectId, + CancellationToken cancellationToken + ) + { + foreach (ServalCorpusFile servalCorpusFile in servalCorpusFiles) { - // See if the file exists, and it is the same format - bool dataFileExists; try { - dataFile = await dataFilesClient.GetAsync(previousCorpusFile.FileId, cancellationToken); - dataFileExists = dataFile.Format == fileFormat; + await corporaClient.DeleteAsync(servalCorpusFile.CorpusId, cancellationToken); } catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound) { - logger.LogInformation($"File {previousCorpusFile.FileId} does not exist - creating."); - dataFileExists = false; + // If the file was already deleted, just log a message + string message = + $"Corpus {servalCorpusFile.CorpusId.Sanitize()} in project {projectId.Sanitize()}" + + " was missing or already deleted."; + logger.LogInformation(e, message); } - // Update the file if it exists, otherwise create it - dataFile = dataFileExists - ? await dataFilesClient.UpdateAsync( - previousCorpusFile.FileId, - new FileParameter(stream), - cancellationToken - ) - : await dataFilesClient.CreateAsync(new FileParameter(stream), fileFormat, textId, cancellationToken); - } - - newCorpusFiles.Add( - new ServalCorpusFile + try { - FileChecksum = checksum, - FileId = dataFile.Id, - ProjectId = projectId, - TextId = textId, + await dataFilesClient.DeleteAsync(servalCorpusFile.FileId, cancellationToken); } - ); - - return true; - } - - /// - /// Gets the target language for the project - /// - /// The project. - /// The target language. - /// - private async Task GetTargetLanguageAsync(SFProject project) - { - // Echo requires the target and source language to be the same, as it outputs your source texts - bool useEcho = await featureManager.IsEnabledAsync(FeatureFlags.UseEchoForPreTranslation); - return useEcho ? GetSourceLanguage(project, useAlternateTrainingSource: false) : project.WritingSystem.Tag; + catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound) + { + // If the file was already deleted, just log a message + string message = + $"File {servalCorpusFile.FileId.Sanitize()} in project {projectId.Sanitize()}" + + " was missing or already deleted."; + logger.LogInformation(e, message); + } + } } /// - /// Updates the corpus configuration in the project secrets. + /// Ensures that the translation engine exists, and that the Scripture Forge project is in a compatible state. /// - /// The project. - /// The translation engine identifier. - /// The corpus identifier. If null, a new corpus is created. - /// The project is for pre-translation. - /// If true, this is the additional training data corpus. - /// If true, use the alternate training source. - /// A Paratext zip file was used for the upload. - /// The files in the corpus have been updated. - /// The source corpus files. - /// The target corpus files. + /// The current user identifier. + /// The project document. + /// The project secret. + /// If true use NMT; otherwise if false use SMT. /// The cancellation token. - /// true if the corpus was updated; otherwise, false. - private async Task UpdateCorpusConfigAsync( - SFProject project, - string translationEngineId, - string? corpusId, + /// The translation engine identifier. + /// The project, user, or translation engine does not exist. + /// This can be mocked in unit tests. + protected internal virtual async Task EnsureTranslationEngineExistsAsync( + string curUserId, + IDocument projectDoc, + SFProjectSecret projectSecret, bool preTranslate, - bool additionalTrainingData, - bool useAlternateTrainingSource, - bool uploadParatextZipFile, - bool corpusUpdated, - List sourceCorpusFiles, - List targetCorpusFiles, CancellationToken cancellationToken ) { - // Create or update the corpus - TranslationCorpus corpus; - TranslationCorpusConfig corpusConfig = new TranslationCorpusConfig - { - Name = project.Id, - SourceFiles = sourceCorpusFiles - .Select(f => new TranslationCorpusFileConfig { FileId = f.FileId, TextId = f.TextId }) - .ToList(), - SourceLanguage = GetSourceLanguage(project, useAlternateTrainingSource), - TargetFiles = targetCorpusFiles - .Select(f => new TranslationCorpusFileConfig { FileId = f.FileId, TextId = f.TextId }) - .ToList(), - TargetLanguage = await GetTargetLanguageAsync(project), - }; - - // See if we need to create or update the corpus - if (string.IsNullOrEmpty(corpusId)) + string translationEngineId = GetTranslationEngineId(projectSecret, preTranslate); + if (!await TranslationEngineExistsAsync(projectDoc.Id, translationEngineId, preTranslate, cancellationToken)) { - corpus = await translationEnginesClient.AddCorpusAsync( - translationEngineId, - corpusConfig, - cancellationToken - ); - } - else - { - // Get the corpus to see if the language has changed - bool createCorpus; - bool deleteCorpus; - try - { - corpus = await translationEnginesClient.GetCorpusAsync( - translationEngineId, - corpusId, - cancellationToken - ); - createCorpus = - corpus.SourceLanguage != corpusConfig.SourceLanguage - || corpus.TargetLanguage != corpusConfig.TargetLanguage; - deleteCorpus = createCorpus; - } - catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound) - { - // A 404 means that the translation engine does not exist - logger.LogInformation($"Corpus {corpusId} in Translation Engine {translationEngineId} does not exist."); - createCorpus = true; - deleteCorpus = false; - } + // We do not have one, likely because the translation is a back translation + // We can only get the language tags for back translations from the ScrText, + // which is not present until after the first sync (not from the Registry). - // The language has changed, or the corpus is missing - if (createCorpus) + // If the source or target writing system tag is missing, get them from the ScrText + // We do not need to do this for the alternate source as this would have been populated correctly + if ( + string.IsNullOrWhiteSpace(projectDoc.Data?.WritingSystem.Tag) + || string.IsNullOrWhiteSpace(projectDoc.Data?.TranslateConfig.Source?.WritingSystem.Tag) + ) { - // Delete the old corpus - if (deleteCorpus) + // Get the user secret + Attempt userSecretAttempt = await userSecrets.TryGetAsync(curUserId); + if (!userSecretAttempt.TryResult(out UserSecret userSecret)) + { + throw new DataNotFoundException("The user does not exist."); + } + + // This error can occur if the project is deleted while the build is running + if (projectDoc.Data is null) + { + throw new DataNotFoundException("The project does not exist."); + } + + // Update the target writing system tag + if (string.IsNullOrWhiteSpace(projectDoc.Data.WritingSystem.Tag)) + { + WritingSystem writingSystem = paratextService.GetWritingSystem( + userSecret, + projectDoc.Data.ParatextId + ); + if (!string.IsNullOrEmpty(writingSystem.Tag)) + { + await projectDoc.SubmitJson0OpAsync(op => + { + op.Set(p => p.WritingSystem.Region, writingSystem.Region); + op.Set(p => p.WritingSystem.Script, writingSystem.Script); + op.Set(p => p.WritingSystem.Tag, writingSystem.Tag); + }); + } + } + + // This error can occur if the project is deleted while the build is running + if (projectDoc.Data is null) + { + throw new DataNotFoundException("The project does not exist."); + } + + // This error can occur if the project source is cleared while the build is running + if (projectDoc.Data.TranslateConfig.Source is null) + { + throw new DataNotFoundException("The project source is not specified."); + } + + // Update the source writing system tag + if (string.IsNullOrWhiteSpace(projectDoc.Data.TranslateConfig.Source.WritingSystem.Tag)) + { + WritingSystem writingSystem = paratextService.GetWritingSystem( + userSecret, + projectDoc.Data.TranslateConfig.Source.ParatextId + ); + if (!string.IsNullOrWhiteSpace(writingSystem.Tag)) + { + await projectDoc.SubmitJson0OpAsync(op => + op.Set(p => p.TranslateConfig.Source.WritingSystem.Tag, writingSystem.Tag) + ); + } + } + } + + // Clear the existing translation engine id, based on whether this is pre-translation or not + await projectSecrets.UpdateAsync( + projectDoc.Id, + u => + { + if (preTranslate) + { + u.Unset(p => p.ServalData.PreTranslationEngineId); + } + else + { + u.Unset(p => p.ServalData.TranslationEngineId); + } + } + ); + + // Create the Serval project, and get the translation engine id + translationEngineId = await CreateServalProjectAsync(projectDoc.Data, preTranslate, cancellationToken); + } + + // Ensure a translation engine id is present + if (string.IsNullOrWhiteSpace(translationEngineId)) + { + throw new DataNotFoundException("Failed to create a translation engine."); + } + + return translationEngineId; + } + + /// + /// Gets the drafting source language for the project. + /// + /// The project. + /// The source language. + /// + /// The writing system tag was not specified for the source project. + /// + /// + /// The source was not specified for the project, or the project does not exist. + /// + /// This can be mocked in unit tests. + protected internal virtual string GetSourceLanguage(SFProject? project) + { + // This error can occur if the project is deleted while the build is running + if (project is null) + { + throw new DataNotFoundException("The project does not exist."); + } + + // This error can occur if the project source is cleared while the build is running + if (project.TranslateConfig.Source is null) + { + throw new DataNotFoundException("The project source is not specified."); + } + + string alternateSourceLanguage = project.TranslateConfig.DraftConfig.AlternateSource?.WritingSystem.Tag; + bool useAlternateSourceLanguage = + project.TranslateConfig.DraftConfig.AlternateSourceEnabled + && !string.IsNullOrWhiteSpace(alternateSourceLanguage); + return useAlternateSourceLanguage + ? alternateSourceLanguage + : project.TranslateConfig.Source?.WritingSystem.Tag ?? throw new ArgumentNullException(nameof(project)); + } + + /// + /// Gets the target language for the project + /// + /// The project. + /// The target language. + /// + /// The writing system tag was not specified for the source project. + /// + /// + /// The source was not specified for the project, or the project does not exist. + /// + /// + /// If Echo is enabled, the source language will be returned. + /// This can be mocked in unit tests. + /// + protected internal virtual async Task GetTargetLanguageAsync(SFProject project) + { + // Echo requires the target and source language to be the same, as it outputs your source texts + bool useEcho = await featureManager.IsEnabledAsync(FeatureFlags.UseEchoForPreTranslation); + return useEcho ? GetSourceLanguage(project) : project.WritingSystem.Tag!; + } + + /// + /// Gets the segments from the text with Unix/Linux line endings. + /// + /// The . + /// The text file data to be uploaded to Serval. + /// This can be mocked in unit tests. + protected internal virtual string GetTextFileData(ISFText text) + { + var sb = new StringBuilder(); + + // For pre-translation, we must upload empty lines with segment refs for the correct references to be returned + foreach (SFTextSegment segment in text.Segments.Where(s => !s.IsEmpty)) + { + sb.Append(segment.SegmentRef); + sb.Append('\t'); + sb.Append(segment.SegmentText); + sb.Append('\t'); + if (segment.IsSentenceStart) + { + sb.Append("ss,"); + } + + if (segment.IsInRange) + { + sb.Append("ir,"); + } + + if (segment.IsRangeStart) + { + sb.Append("rs,"); + } + + // Strip the last comma, or the tab if there are no flags + sb.Length--; + + // Append the Unix EOL to ensure consistency as this text data is uploaded to Serval + sb.Append('\n'); + } + + return sb.ToString(); + } + + /// + /// Gets the TranslationBuildConfig for the specified ServalData object. + /// + /// The Serval data from . + /// + /// The Serval JSON configuration from . + /// + /// The build configuration from the user, specified on the front end. + /// The synchronization information for the corpora. + /// The TranslationBuildConfig for a Pre-Translate build. + /// + /// Do not use with SMT builds. + /// This can be mocked in unit tests. + /// + protected internal virtual TranslationBuildConfig GetTranslationBuildConfig( + ServalData servalData, + string? servalConfig, + BuildConfig buildConfig, + IList corporaSyncInfo + ) + { + // Load the Serval Config from the Draft Config + JObject? options = null; + if (!string.IsNullOrWhiteSpace(servalConfig)) + { + options = JObject.Parse(servalConfig); + } + + // If Fast Training is enabled, override the max_steps + if (buildConfig.FastTraining) + { + // Ensure that there is a servalConfig JSON object + options ??= []; + + // 20 is the number of steps used on Serval QA by default + options["max_steps"] = 20; + } + + // Get the scripture ranges + // These scripture ranges will be used if no per project configuration was used + string? trainOnScriptureRange = !string.IsNullOrWhiteSpace(buildConfig.TrainingScriptureRange) + ? buildConfig.TrainingScriptureRange + : string.Join(';', buildConfig.TrainingBooks.Select(Canon.BookNumberToId)); + if (string.IsNullOrWhiteSpace(trainOnScriptureRange)) + { + trainOnScriptureRange = null; + } + + string? preTranslateScriptureRange = !string.IsNullOrWhiteSpace(buildConfig.TranslationScriptureRange) + ? buildConfig.TranslationScriptureRange + : string.Join(';', buildConfig.TranslationBooks.Select(Canon.BookNumberToId)); + if (string.IsNullOrWhiteSpace(preTranslateScriptureRange)) + { + preTranslateScriptureRange = null; + } + + // Create the build configuration + var translationBuildConfig = new TranslationBuildConfig + { + Options = options, + Pretranslate = + [ + new PretranslateCorpusConfig + { + ParallelCorpusId = servalData.ParallelCorpusIdForPreTranslate, + SourceFilters = + [ + .. corporaSyncInfo + .Where(s => s.ParallelCorpusId == servalData.ParallelCorpusIdForPreTranslate && s.IsSource) + .Select(s => new ParallelCorpusFilterConfig + { + CorpusId = s.CorpusId, + ScriptureRange = + buildConfig + .TranslationScriptureRanges.FirstOrDefault(t => t.ProjectId == s.ProjectId) + ?.ScriptureRange ?? preTranslateScriptureRange, + }), + ], + }, + ], + TrainOn = + [ + new TrainingCorpusConfig + { + ParallelCorpusId = servalData.ParallelCorpusIdForTrainOn, + SourceFilters = + [ + .. corporaSyncInfo + .Where(s => s.ParallelCorpusId == servalData.ParallelCorpusIdForTrainOn && s.IsSource) + .Select(s => new ParallelCorpusFilterConfig + { + CorpusId = s.CorpusId, + ScriptureRange = + buildConfig + .TrainingScriptureRanges.FirstOrDefault(t => t.ProjectId == s.ProjectId) + ?.ScriptureRange ?? trainOnScriptureRange, + }), + ], + TargetFilters = + [ + .. corporaSyncInfo + .Where(s => s.ParallelCorpusId == servalData.ParallelCorpusIdForTrainOn && !s.IsSource) + .Select(s => new ParallelCorpusFilterConfig + { + CorpusId = s.CorpusId, + ScriptureRange = + buildConfig + .TrainingScriptureRanges.FirstOrDefault(t => t.ProjectId == s.ProjectId) + ?.ScriptureRange ?? trainOnScriptureRange, + }), + ], + }, + ], + }; + + // Add the additional training data + if ( + !string.IsNullOrWhiteSpace(servalData.AdditionalTrainingData?.ParallelCorpusId) + && buildConfig.TrainingDataFiles.Count > 0 + ) + { + translationBuildConfig.TrainOn.Add( + new TrainingCorpusConfig { ParallelCorpusId = servalData.AdditionalTrainingData.ParallelCorpusId } + ); + } + + return translationBuildConfig; + } + + /// + /// Recreates the translation engine if the source or target language has changed. + /// + /// The translation engine identifier. + /// The project. + /// If true use NMT; otherwise if false use SMT. + /// The cancellation token. + /// An asynchronous task. + /// This can be mocked in unit tests. + protected internal virtual async Task RecreateTranslationEngineIfRequiredAsync( + string translationEngineId, + SFProject project, + bool preTranslate, + CancellationToken cancellationToken + ) + { + // Get the translation engine from Serval + try + { + TranslationEngine translationEngine = await translationEnginesClient.GetAsync( + translationEngineId, + cancellationToken + ); + bool recreateTranslationEngine = false; + + // See if the target language has changed + string projectTargetLanguage = await GetTargetLanguageAsync(project); + if (translationEngine.TargetLanguage != projectTargetLanguage) + { + string message = + $"Target language has changed from {translationEngine.TargetLanguage} to {projectTargetLanguage}."; + logger.LogInformation(message); + recreateTranslationEngine = true; + } + + // See if the source language has changed + string projectSourceLanguage = GetSourceLanguage(project); + if (translationEngine.SourceLanguage != projectSourceLanguage) + { + string message = + $"Source language has changed from {translationEngine.SourceLanguage} to {projectSourceLanguage}."; + logger.LogInformation(message); + recreateTranslationEngine = true; + } + + // Delete then recreate the translation engine if they have changed + if (recreateTranslationEngine) + { + // Removal can be a slow process + await RemoveProjectAsync(project.Id, preTranslate, cancellationToken); + await CreateServalProjectAsync(project, preTranslate, cancellationToken); + } + } + catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound) + { + // A 404 means that the translation engine does not exist + logger.LogInformation(e, $"Translation Engine {translationEngineId} does not exist."); + + // Clear the existing translation engine id and corpora + await projectSecrets.UpdateAsync( + project.Id, + u => + { + if (preTranslate) + { + u.Unset(p => p.ServalData.PreTranslationEngineId); + } + else + { + u.Unset(p => p.ServalData.TranslationEngineId); + } + } + ); + + // Create the new translation engine id + translationEngineId = await CreateServalProjectAsync(project, preTranslate, cancellationToken); + logger.LogInformation($"Created Translation Engine {translationEngineId}."); + } + } + + /// + /// Removes the legacy files and corpora from Serval. + /// + /// The Scripture Forge project identifier. + /// If true use NMT; otherwise if false use SMT. + /// The Cancellation token + /// An asynchronous task. + /// The project secret cannot be found. + /// This can be mocked in unit tests. + protected internal virtual async Task RemoveLegacyServalDataAsync( + string sfProjectId, + bool preTranslate, + CancellationToken cancellationToken + ) + { + // Load the target project secrets, so we can get the translation engine ID + if (!(await projectSecrets.TryGetAsync(sfProjectId)).TryResult(out SFProjectSecret projectSecret)) + { + throw new DataNotFoundException("The project secret cannot be found."); + } + + // Ensure we have a translation engine id + string translationEngineId = GetTranslationEngineId(projectSecret, preTranslate); + if (string.IsNullOrWhiteSpace(translationEngineId)) + { + logger.LogInformation($"No Translation Engine Id specified for project {sfProjectId.Sanitize()}"); + return; + } + + // Remove the corpora and files + string[] corpusIds = + projectSecret + .ServalData?.Corpora?.Where(c => c.Value.PreTranslate == preTranslate) + .Select(c => c.Key) + .ToArray() ?? []; + foreach (string corpusId in corpusIds) + { + // Delete the corpus + try + { + await translationEnginesClient.DeleteCorpusAsync( + translationEngineId, + corpusId, + deleteFiles: true, + cancellationToken + ); + } + catch (ServalApiException e) + { + // A 404 means that the translation engine does not exist + string message; + if (e.StatusCode == StatusCodes.Status404NotFound) + { + message = + $"Translation Engine {translationEngineId.Sanitize()} for project {sfProjectId.Sanitize()}" + + " was missing or already deleted."; + logger.LogInformation(message); + } + else { - await translationEnginesClient.DeleteCorpusAsync(translationEngineId, corpusId, cancellationToken); + message = + $"Ignored exception while deleting translation engine {translationEngineId.Sanitize()}" + + " for project {sfProjectId.Sanitize()}."; + logger.LogError(e, message); } + } + + // Remove our record of the corpus + await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.Corpora[corpusId])); + } + + // Remove the corpora property if it is empty + if (projectSecret.ServalData?.Corpora?.Any(c => c.Value.PreTranslate != preTranslate) == false) + { + await projectSecrets.UpdateAsync(sfProjectId, u => u.Unset(p => p.ServalData.Corpora)); + } + } + + /// + /// Synchronizes the additional training data for a pre-translation project. + /// + /// The current user identifier + /// The project. + /// The translation engine identifier. + /// The build configuration from the user. + /// The additional training data. + /// The cancellation token. + /// The updated . + /// + /// If there are no TrainingDataFiles specified in , then the additional training + /// data corpora will be removed from Serval. Otherwise, the corpora will be created or updated as required. + /// This can be mocked in unit tests. + /// + protected internal virtual async Task SyncAdditionalTrainingData( + string curUserId, + SFProject project, + string translationEngineId, + BuildConfig buildConfig, + ServalAdditionalTrainingData? additionalTrainingData, + CancellationToken cancellationToken + ) + { + // If there are training data files + if (buildConfig.TrainingDataFiles.Count > 0) + { + // Get the training data texts + List sourceTexts = []; + List targetTexts = []; + await trainingDataService.GetTextsAsync( + curUserId, + project.Id, + buildConfig.TrainingDataFiles, + sourceTexts, + targetTexts + ); + + // Create the additional training data object if it is missing + additionalTrainingData ??= new ServalAdditionalTrainingData(); + + // Upload the target texts + List targetCorpusFiles = [.. additionalTrainingData.CorpusFiles]; + additionalTrainingData.TargetCorpusId = await UploadAdditionalTrainingDataAsync( + project.Id, + additionalTrainingData.TargetCorpusId, + languageCode: await GetTargetLanguageAsync(project), + targetCorpusFiles, + targetTexts, + cancellationToken + ); + + // Upload the source texts + List sourceCorpusFiles = [.. additionalTrainingData.CorpusFiles]; + additionalTrainingData.SourceCorpusId = await UploadAdditionalTrainingDataAsync( + project.Id, + additionalTrainingData.SourceCorpusId, + GetSourceLanguage(project), + sourceCorpusFiles, + sourceTexts, + cancellationToken + ); + + // Update the project corpora with the new files + additionalTrainingData.CorpusFiles = [.. targetCorpusFiles.Union(sourceCorpusFiles)]; + foreach (var corpus in additionalTrainingData.CorpusFiles.GroupBy(c => c.CorpusId)) + { + await corporaClient.UpdateAsync( + corpus.Key, + files: [.. corpus.Select(f => new CorpusFileConfig { FileId = f.FileId, TextId = f.TextId })], + cancellationToken + ); + } + + // Set up the parallel corpus for additional training data + List sourceCorpusIds = [additionalTrainingData.SourceCorpusId]; + List targetCorpusIds = [additionalTrainingData.TargetCorpusId]; + + // Create or update the additional training data parallel corpora + additionalTrainingData.ParallelCorpusId = await CreateOrUpdateParallelCorpusAsync( + translationEngineId, + additionalTrainingData.ParallelCorpusId, + name: "AdditionalTrainingData", + sourceCorpusIds, + targetCorpusIds, + cancellationToken + ); + } + else if (additionalTrainingData is not null) + { + // Remove the parallel corpora + if (!string.IsNullOrWhiteSpace(additionalTrainingData.ParallelCorpusId)) + { + await translationEnginesClient.DeleteParallelCorpusAsync( + translationEngineId, + additionalTrainingData.ParallelCorpusId, + cancellationToken + ); + } + + // Remove the corpora and files + await DeleteAllCorporaAndFilesAsync(additionalTrainingData.CorpusFiles, project.Id, cancellationToken); + + // Remove reference to the additional training data from the project secrets + additionalTrainingData = null; + } + + return additionalTrainingData; + } + + /// + /// Synchronizes the corpora and files with Serval. + /// + /// The current user identifier. + /// The build configuration from the user. + /// If true use NMT; otherwise if false use SMT. + /// The cancellation token. + /// + /// The for all source and target corpora that were synchronised, + /// excluding the additional data corpora. + /// + /// + /// The project, project source, or project secret could not be found. + /// + /// This can be mocked in unit tests. + protected internal virtual async Task> SyncProjectCorporaAsync( + string curUserId, + BuildConfig buildConfig, + bool preTranslate, + CancellationToken cancellationToken + ) + { + // Load the project from the realtime service + Attempt attempt = await realtimeService.TryGetSnapshotAsync(buildConfig.ProjectId); + if (!attempt.TryResult(out SFProject project)) + { + throw new DataNotFoundException("The project does not exist."); + } + + // Ensure we have a source + if (project.TranslateConfig.Source is null) + { + throw new DataNotFoundException("The project source is not specified."); + } + + // Load the project secrets, so we can get the corpus files + if (!(await projectSecrets.TryGetAsync(project.Id)).TryResult(out SFProjectSecret projectSecret)) + { + throw new DataNotFoundException("The project secret cannot be found."); + } - // Recreate the corpus - corpus = await translationEnginesClient.AddCorpusAsync( - translationEngineId, - corpusConfig, - cancellationToken - ); + // Ensure we have serval data + if (projectSecret.ServalData is null) + { + throw new DataNotFoundException("The Serval data cannot be found."); + } + + // Return sync information so the translation build configuration can be generated + List corporaSyncInfo = []; + + // Ensure we have a translation engine ID + string translationEngineId = GetTranslationEngineId(projectSecret, preTranslate); + if (string.IsNullOrWhiteSpace(translationEngineId)) + { + throw new DataNotFoundException("The translation engine ID cannot be found."); + } + + // See if there is an alternate source to use for drafting + bool hasAlternateSource = + project.TranslateConfig.DraftConfig.AlternateSourceEnabled + && project.TranslateConfig.DraftConfig.AlternateSource is not null + && project.TranslateConfig.PreTranslate; + + // See if there is an alternate training source corpus + bool hasAlternateTrainingSource = + project.TranslateConfig.DraftConfig.AlternateTrainingSourceEnabled + && project.TranslateConfig.DraftConfig.AlternateTrainingSource is not null + && project.TranslateConfig.PreTranslate; + + // See if there is an additional training source + bool hasAdditionalTrainingSource = + project.TranslateConfig.DraftConfig.AdditionalTrainingSourceEnabled + && project.TranslateConfig.DraftConfig.AdditionalTrainingSource is not null + && project.TranslateConfig.PreTranslate; + + // Build the list of corpora and files to upload + List<(string projectId, string paratextId, string writingSystemTag)> projects = + [ + // Target Project + (project.Id, project.ParatextId, project.WritingSystem.Tag), + // Source Project + ( + project.TranslateConfig.Source.ProjectRef, + project.TranslateConfig.Source.ParatextId, + project.TranslateConfig.Source.WritingSystem.Tag + ), + ]; + if (hasAlternateSource) + { + projects.Add( + ( + project.TranslateConfig.DraftConfig.AlternateSource.ProjectRef, + project.TranslateConfig.DraftConfig.AlternateSource.ParatextId, + project.TranslateConfig.DraftConfig.AlternateSource.WritingSystem.Tag + ) + ); + } + + if (hasAlternateTrainingSource) + { + projects.Add( + ( + project.TranslateConfig.DraftConfig.AlternateTrainingSource.ProjectRef, + project.TranslateConfig.DraftConfig.AlternateTrainingSource.ParatextId, + project.TranslateConfig.DraftConfig.AlternateTrainingSource.WritingSystem.Tag + ) + ); + } + + if (hasAdditionalTrainingSource) + { + projects.Add( + ( + project.TranslateConfig.DraftConfig.AdditionalTrainingSource.ProjectRef, + project.TranslateConfig.DraftConfig.AdditionalTrainingSource.ParatextId, + project.TranslateConfig.DraftConfig.AdditionalTrainingSource.WritingSystem.Tag + ) + ); + } + + // Create and upload the Serval Corpus Files + List servalCorpusFiles = []; + foreach ((string projectId, string paratextId, string languageCode) in projects) + { + if (servalCorpusFiles.Any(f => f.ProjectId == projectId)) + { + // Do not allow duplicate corpora for the same project + continue; } - else if (corpusUpdated) + + ServalCorpusFile servalCorpusFile = projectSecret.ServalData.CorpusFiles.SingleOrDefault(f => + f.ProjectId == projectId + ); + if (servalCorpusFile is null || servalCorpusFile.LanguageCode != languageCode) { - // Update the corpus - TranslationCorpusUpdateConfig corpusUpdateConfig = new TranslationCorpusUpdateConfig - { - SourceFiles = corpusConfig.SourceFiles, - TargetFiles = corpusConfig.TargetFiles, - }; - corpus = await translationEnginesClient.UpdateCorpusAsync( - translationEngineId, - corpusId, - corpusUpdateConfig, + // Create the corpus if it does not exist or the language code has changed + Corpus corpus = await corporaClient.CreateAsync( + new CorpusConfig { Name = $"{project.Id}_{projectId}", Language = languageCode }, cancellationToken ); + servalCorpusFile = new ServalCorpusFile + { + CorpusId = corpus.Id, + LanguageCode = languageCode, + ProjectId = projectId, + TextId = project.Id, + }; } - else + + // Upload the file + await UploadParatextFileAsync(servalCorpusFile, paratextId, cancellationToken); + servalCorpusFiles.Add(servalCorpusFile); + } + + // Update the project corpora with the files + foreach (ServalCorpusFile servalCorpusFile in servalCorpusFiles) + { + await corporaClient.UpdateAsync( + servalCorpusFile.CorpusId, + files: [new CorpusFileConfig { FileId = servalCorpusFile.FileId, TextId = servalCorpusFile.TextId }], + cancellationToken + ); + } + + // Get the source project for the NMT/SMT translation corpus + string sourceProjectId = + hasAlternateSource && preTranslate + ? project.TranslateConfig.DraftConfig.AlternateSource.ProjectRef + : project.TranslateConfig.Source.ProjectRef; + + // Set up the parallel corpus for NMT/SMT translation + List sourceCorpora = [servalCorpusFiles.Single(f => f.ProjectId == sourceProjectId)]; + List targetCorpora = [servalCorpusFiles.Single(f => f.ProjectId == project.Id)]; + List sourceCorpusIds = [.. sourceCorpora.Select(f => f.CorpusId)]; + List targetCorpusIds = [.. targetCorpora.Select(f => f.CorpusId)]; + + // Get the NMT/SMT translation parallel corpus id (might be null) + string translationParallelCorpusId = preTranslate + ? projectSecret.ServalData.ParallelCorpusIdForPreTranslate + : projectSecret.ServalData.ParallelCorpusIdForSmt; + + // Create or update the NMT/SMT translation parallel corpora + translationParallelCorpusId = await CreateOrUpdateParallelCorpusAsync( + translationEngineId, + translationParallelCorpusId, + name: preTranslate ? "PreTranslation" : "SmtTranslation", + sourceCorpusIds, + targetCorpusIds, + cancellationToken + ); + + // Record the corpus sync info for the pre-translate corpora + corporaSyncInfo = RecordServalCorpusSyncInfo( + corporaSyncInfo, + sourceCorpora, + targetCorpora, + translationParallelCorpusId + ); + + // If we are NMT pre-translating, add the training parallel corpus + string trainOnParallelCorpusId = null; + ServalAdditionalTrainingData? additionalTrainingData = projectSecret.ServalData.AdditionalTrainingData; + if (preTranslate) + { + // Build the source corpus ids for training + sourceProjectId = hasAlternateTrainingSource + ? project.TranslateConfig.DraftConfig.AlternateTrainingSource.ProjectRef + : project.TranslateConfig.Source.ProjectRef; + + sourceCorpora = [servalCorpusFiles.Single(f => f.ProjectId == sourceProjectId)]; + + // Add the additional training source, if present and we are pre-translating + if (hasAdditionalTrainingSource) { - // The corpus was not updated - return false; + string additionalTrainingSourceProjectId = project + .TranslateConfig + .DraftConfig + .AdditionalTrainingSource + .ProjectRef; + sourceCorpora.Add(servalCorpusFiles.Single(f => f.ProjectId == additionalTrainingSourceProjectId)); } + + sourceCorpusIds = [.. sourceCorpora.Select(f => f.CorpusId)]; + + // Build the target corpus ids for training + targetCorpora = [servalCorpusFiles.Single(f => f.ProjectId == project.Id)]; + targetCorpusIds = [.. targetCorpora.Select(f => f.CorpusId)]; + + // Get the train on parallel corpus id (might be null) + trainOnParallelCorpusId = projectSecret.ServalData.ParallelCorpusIdForTrainOn; + + // Create or update the train on parallel corpora + trainOnParallelCorpusId = await CreateOrUpdateParallelCorpusAsync( + translationEngineId, + trainOnParallelCorpusId, + name: "TrainOn", + sourceCorpusIds, + targetCorpusIds, + cancellationToken + ); + + // Record the corpus sync info for the train on corpora + corporaSyncInfo = RecordServalCorpusSyncInfo( + corporaSyncInfo, + sourceCorpora, + targetCorpora, + trainOnParallelCorpusId + ); + + // Sync the additional training data + // NOTE: We do not record the corpus sync info for the additional training data + // You can get that information from ServalData.AdditionalTrainingData + additionalTrainingData = await SyncAdditionalTrainingData( + curUserId, + project, + translationEngineId, + buildConfig, + additionalTrainingData, + cancellationToken + ); } - // Update the project secret with the new corpus information - await projectSecrets.UpdateAsync( + // Delete any project corpora and files that are no longer used + await DeleteAllCorporaAndFilesAsync( + projectSecret.ServalData.CorpusFiles.Except(servalCorpusFiles), project.Id, + cancellationToken + ); + + // Update the project secret + await projectSecrets.UpdateAsync( + projectSecret, u => - u.Set( - p => p.ServalData.Corpora[corpus.Id], - new ServalCorpus - { - SourceFiles = sourceCorpusFiles, - TargetFiles = targetCorpusFiles, - PreTranslate = preTranslate, - AdditionalTrainingData = additionalTrainingData, - AlternateTrainingSource = useAlternateTrainingSource, - UploadParatextZipFile = uploadParatextZipFile, - } - ) + { + u.Set(p => p.ServalData.CorpusFiles, servalCorpusFiles); + if (preTranslate) + { + u.Set(p => p.ServalData.ParallelCorpusIdForPreTranslate, translationParallelCorpusId); + u.Set(p => p.ServalData.ParallelCorpusIdForTrainOn, trainOnParallelCorpusId); + u.Set(p => p.ServalData.AdditionalTrainingData, additionalTrainingData); + } + else + { + u.Set(p => p.ServalData.ParallelCorpusIdForSmt, translationParallelCorpusId); + } + } ); - return true; + return corporaSyncInfo; } /// - /// Syncs a collection of to Serval, creating files on Serval as necessary. + /// Determines whether a translation engine exists for the specified project. /// - /// The target project identifier. - /// The source project identifier (this may be a training source). - /// The Paratext identifier. - /// - /// true if we are uploading a Paratext zip file; otherwise false. - /// - /// The texts created by . - /// The existing corpus files (optional). - /// The updated list of corpus files. - /// - /// true if the corpus was created or updated; otherwise, false. - /// - /// The project secret is updated with the corpus file details added to or removed from Serval. - /// - private async Task UploadNewCorpusFilesAsync( - string targetProjectId, - string sourceProjectId, - string paratextId, - bool uploadParatextZipFile, - IEnumerable texts, - ICollection? oldCorpusFiles, - ICollection newCorpusFiles, + /// The Scripture Forge project identifier. + /// The Serval translation engine identifier. + /// If true use NMT; otherwise if false use SMT. + /// The cancellation token. + /// true if the translation engine exists; otherwise false. + /// This can be mocked in unit tests. + protected internal virtual async Task TranslationEngineExistsAsync( + string projectId, + string? translationEngineId, + bool preTranslate, CancellationToken cancellationToken ) { - // Used to return whether the corpus files were created or updated - bool corpusUpdated = false; + if (string.IsNullOrWhiteSpace(translationEngineId)) + { + return false; + } + + try + { + TranslationEngine translationEngine = await translationEnginesClient.GetAsync( + translationEngineId, + cancellationToken + ); + string type = await GetTranslationEngineTypeAsync(preTranslate); - // Upload the Paratext zip file, if we are supposed to - if (uploadParatextZipFile) + // We check for the type, taking account of Pascal Case (Serval 1.1) and Kebab Case (Serval 1.2) + return translationEngine.Name == projectId + && string.Equals( + translationEngine.Type.Replace("-", string.Empty, StringComparison.OrdinalIgnoreCase), + type.Replace("-", string.Empty, StringComparison.OrdinalIgnoreCase), + StringComparison.InvariantCultureIgnoreCase + ); + } + catch (ServalApiException e) + when (e.StatusCode is StatusCodes.Status403Forbidden or StatusCodes.Status404NotFound) { - // Get the path to the Paratext directory - string path = Path.Combine(siteOptions.Value.SiteDir, "sync", paratextId, "target"); + return false; + } + } - // Ensure that the path exists - if (!fileSystemService.DirectoryExists(path)) - { - throw new DirectoryNotFoundException($"The directory could not be found for {paratextId}"); - } + /// + /// Uploads the additional training data for a project. + /// + /// The project identifier. + /// The corpus identifier. + /// The language for the corpus. + /// The existing corpus files. These will be replaced with the new corpus files. + /// The texts to upload. + /// The cancellation token. + /// The updated corpus identifier. + /// This can be mocked in unit tests. + protected internal virtual async Task UploadAdditionalTrainingDataAsync( + string projectId, + string? corpusId, + string languageCode, + List corpusFiles, + List texts, + CancellationToken cancellationToken + ) + { + // Make a local copy of the previous corpus files + List previousCorpusFiles = [.. corpusFiles]; + corpusFiles.Clear(); - // Create the zip file from the directory in memory - await using var memoryStream = new MemoryStream(); - using (var archive = new ZipArchive(memoryStream, ZipArchiveMode.Create, true)) - { - // Do not convert the ZipArchive using statement above into a using declaration, - // otherwise the ZipArchive disposal will crash after the MemoryStream disposal. - foreach (string filePath in fileSystemService.EnumerateFiles(path)) - { - await using Stream fileStream = fileSystemService.OpenFile(filePath, FileMode.Open); - ZipArchiveEntry entry = archive.CreateEntry(Path.GetFileName(filePath)); - await using Stream entryStream = entry.Open(); - await fileStream.CopyToAsync(entryStream, cancellationToken); - } - } + // Delete the old corpus if the language has changed + string corpusLanguageCode = previousCorpusFiles.FirstOrDefault(f => f.CorpusId == corpusId)?.LanguageCode; + if ( + !string.IsNullOrWhiteSpace(corpusLanguageCode) + && languageCode != corpusLanguageCode + && !string.IsNullOrWhiteSpace(corpusId) + ) + { + await corporaClient.DeleteAsync(corpusId, cancellationToken); + corpusId = null; + } - // Upload the zip file - corpusUpdated = await UploadFileAsync( - textId: targetProjectId, - projectId: sourceProjectId, - memoryStream, - FileFormat.Paratext, - oldCorpusFiles, - newCorpusFiles, + // If there is no corpus, create it + if (string.IsNullOrWhiteSpace(corpusId)) + { + Corpus corpus = await corporaClient.CreateAsync( + new CorpusConfig + { + Name = $"{projectId}_additionalTrainingData_{languageCode}", + Language = languageCode, + }, cancellationToken ); + corpusId = corpus.Id; } - else + + foreach (ISFText text in texts) { - // Sync each text - foreach (ISFText text in texts) - { - string textFileData = GetTextFileData(text); - if (!string.IsNullOrWhiteSpace(textFileData)) + // The text ids are in the format projectId_dataId + string textId = text.Id.Split('_').Last(); + + // Get the existing Serval Corpus File, or create a new one + ServalCorpusFile servalCorpusFile = + previousCorpusFiles.SingleOrDefault(f => f.TextId == textId && f.CorpusId == corpusId) + ?? new ServalCorpusFile { - // Remove the target project id from the start of the text id (if present) - string textId = text.Id.StartsWith($"{targetProjectId}_") - ? text.Id[(targetProjectId.Length + 1)..] - : text.Id; - - // Remove the source project id from the start of the text id (if present) - textId = textId.StartsWith($"{sourceProjectId}_") ? textId[(sourceProjectId.Length + 1)..] : textId; - - // Upload the text file - corpusUpdated |= await UploadFileAsync( - textId, - sourceProjectId, - textFileData, - FileFormat.Text, - oldCorpusFiles, - newCorpusFiles, - cancellationToken - ); - } + CorpusId = corpusId, + LanguageCode = languageCode, + ProjectId = projectId, + TextId = textId, + }; + + // Upload the text + if (await UploadTextFileAsync(servalCorpusFile, text, cancellationToken)) + { + corpusFiles.Add(servalCorpusFile); } } - // Delete corpus files for removed texts - if (oldCorpusFiles is not null) + return corpusId; + } + + /// + /// Uploads a file to Serval. + /// + /// The Serval corpus file + /// The stream of file data. + /// The Serval file format. + /// The cancellation token. + /// An asynchronous task. + /// This can be mocked in unit tests. + protected internal virtual async Task UploadFileAsync( + ServalCorpusFile servalCorpusFile, + Stream stream, + FileFormat fileFormat, + CancellationToken cancellationToken + ) + { + // Reset the stream to the start + stream.Seek(0, SeekOrigin.Begin); + + // Calculate the checksum from the stream + using MD5 md5 = MD5.Create(); + StringBuilder sb = new StringBuilder(); + foreach (var hashByte in await md5.ComputeHashAsync(stream, cancellationToken)) + { + sb.Append(hashByte.ToString("X2").ToLower()); + } + + // See if the file has changed + string checksum = sb.ToString(); + if (servalCorpusFile.FileChecksum == checksum) + { + // No update, so do not upload + return; + } + + // Reset the stream to the start + stream.Seek(0, SeekOrigin.Begin); + + // See if the file exists, and it is the same format + bool dataFileExists = false; + if (!string.IsNullOrWhiteSpace(servalCorpusFile.FileId)) { - foreach (var corpusFile in oldCorpusFiles.Where(c => newCorpusFiles.All(n => n.FileId != c.FileId))) + try { - try - { - await dataFilesClient.DeleteAsync(corpusFile.FileId, cancellationToken); - } - catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound) + DataFile existingDataFile = await dataFilesClient.GetAsync(servalCorpusFile.FileId, cancellationToken); + dataFileExists = existingDataFile.Format == fileFormat; + + // Delete the file if we are changing the format + if (!dataFileExists) { - // If the file was already deleted, just log a message - string message = - $"Corpora file {corpusFile.FileId} for text {corpusFile.TextId} in project {targetProjectId}" - + " was missing or already deleted."; - logger.LogInformation(e, message); + logger.LogInformation($"File {servalCorpusFile.FileId} has the wrong format - deleting."); + await dataFilesClient.DeleteAsync(servalCorpusFile.FileId, cancellationToken); } - - corpusUpdated = true; } + catch (ServalApiException e) when (e.StatusCode == StatusCodes.Status404NotFound) + { + logger.LogInformation(e, $"File {servalCorpusFile.FileId} does not exist - creating."); + } + } + + // Update the file if it exists, otherwise create it + DataFile dataFile = dataFileExists + ? await dataFilesClient.UpdateAsync(servalCorpusFile.FileId, new FileParameter(stream), cancellationToken) + : await dataFilesClient.CreateAsync( + new FileParameter(stream), + fileFormat, + servalCorpusFile.TextId, + cancellationToken + ); + + // Update the Serval Corpus File + servalCorpusFile.FileChecksum = checksum; + servalCorpusFile.FileId = dataFile.Id; + } + + /// + /// Uploads a Paratext zip file to Serval. + /// + /// The Serval corpus file + /// The Paratext identifier. + /// The cancellation token. + /// An asynchronous task. + /// This can be mocked in unit tests. + protected internal virtual async Task UploadParatextFileAsync( + ServalCorpusFile servalCorpusFile, + string paratextId, + CancellationToken cancellationToken + ) + { + // Create the zip file from the directory in memory + await using var stream = new MemoryStream(); + await CreateZipFileFromParatextDirectoryAsync(paratextId, stream, cancellationToken); + + // Upload the zip file + await UploadFileAsync(servalCorpusFile, stream, FileFormat.Paratext, cancellationToken); + } + + /// + /// Uploads a text file to Serval. + /// + /// The Serval corpus file + /// The text. + /// The cancellation token. + /// true if the file was uploaded; otherwise, false. + /// This can be mocked in unit tests. + protected internal virtual async Task UploadTextFileAsync( + ServalCorpusFile servalCorpusFile, + ISFText text, + CancellationToken cancellationToken + ) + { + string textFileData = GetTextFileData(text); + + // Ensure that there is file data + if (string.IsNullOrWhiteSpace(textFileData)) + { + return false; } - return corpusUpdated; + // Upload the text file + byte[] buffer = Encoding.UTF8.GetBytes(textFileData); + await using Stream stream = new MemoryStream(buffer, false); + await UploadFileAsync(servalCorpusFile, stream, FileFormat.Text, cancellationToken); + return true; + } + + /// + /// Gets the translation engine identifier from the project secret, + /// depending on whether we are pre-translating or not. + /// + /// The project secret. + /// If true, we are pre-translating. + /// The translation engine identifier. + private static string? GetTranslationEngineId(SFProjectSecret projectSecret, bool preTranslate) => + preTranslate ? projectSecret.ServalData?.PreTranslationEngineId : projectSecret.ServalData?.TranslationEngineId; + + /// + /// Records the Corpus Synchronization information. + /// + /// The List of corpus synchronization information. + /// The list of source corpora + /// The list of target corpora. + /// The parallel corpus identifier. + /// + /// Used by . + private static List RecordServalCorpusSyncInfo( + List corpusSyncInfo, + IList sourceCorpora, + IList targetCorpora, + string parallelCorpusId + ) + { + corpusSyncInfo.AddRange( + sourceCorpora.Select(f => new ServalCorpusSyncInfo + { + CorpusId = f.CorpusId, + ParallelCorpusId = parallelCorpusId, + IsSource = true, + ProjectId = f.ProjectId, + }) + ); + corpusSyncInfo.AddRange( + targetCorpora.Select(f => new ServalCorpusSyncInfo + { + CorpusId = f.CorpusId, + ParallelCorpusId = parallelCorpusId, + IsSource = false, + ProjectId = f.ProjectId, + }) + ); + return corpusSyncInfo; } } diff --git a/src/SIL.XForge.Scripture/Services/MachineServiceCollectionExtensions.cs b/src/SIL.XForge.Scripture/Services/MachineServiceCollectionExtensions.cs index 2f20a41c06..08242daada 100644 --- a/src/SIL.XForge.Scripture/Services/MachineServiceCollectionExtensions.cs +++ b/src/SIL.XForge.Scripture/Services/MachineServiceCollectionExtensions.cs @@ -1,4 +1,5 @@ using System; +using System.Diagnostics.CodeAnalysis; using System.Net; using System.Net.Http; using IdentityModel.Client; @@ -13,6 +14,7 @@ namespace Microsoft.Extensions.DependencyInjection; +[ExcludeFromCodeCoverage(Justification = "This logic will only work in a valid ASP.NET Core Context")] public static class MachineServiceCollectionExtensions { public static IServiceCollection AddSFMachine( @@ -21,24 +23,25 @@ public static IServiceCollection AddSFMachine( IWebHostEnvironment env ) { - // Setup the Machine API + // Set up the Machine API var servalOptions = configuration.GetOptions(); - services.AddAccessTokenManagement(options => - { - options.Client.Clients.Add( - MachineApi.HttpClientName, - new ClientCredentialsTokenRequest + services.AddDistributedMemoryCache(); + services + .AddClientCredentialsTokenManagement() + .AddClient( + MachineApi.TokenClientName, + client => { - Address = servalOptions.TokenUrl, - ClientId = servalOptions.ClientId, - ClientSecret = servalOptions.ClientSecret, - Parameters = new Parameters { { "audience", servalOptions.Audience } }, + client.TokenEndpoint = servalOptions.TokenUrl; + client.ClientId = servalOptions.ClientId; + client.ClientSecret = servalOptions.ClientSecret; + client.Parameters = new Parameters { { "audience", servalOptions.Audience } }; } ); - }); services - .AddClientAccessTokenHttpClient( + .AddClientCredentialsHttpClient( MachineApi.HttpClientName, + MachineApi.TokenClientName, configureClient: client => client.BaseAddress = new Uri(servalOptions.ApiServer) ) .ConfigurePrimaryHttpMessageHandler(() => @@ -78,6 +81,13 @@ IWebHostEnvironment env var httpClient = factory.CreateClient(MachineApi.HttpClientName); return new DataFilesClient(httpClient); }); + services.AddSingleton(sp => + { + // Instantiate the corpora client with our named HTTP client + var factory = sp.GetService(); + var httpClient = factory.CreateClient(MachineApi.HttpClientName); + return new CorporaClient(httpClient); + }); services.AddSingleton(); services.AddSingleton(); services.AddSingleton(); diff --git a/src/SIL.XForge.Scripture/Services/PreTranslationService.cs b/src/SIL.XForge.Scripture/Services/PreTranslationService.cs index a3f7a53165..eb9c67adec 100644 --- a/src/SIL.XForge.Scripture/Services/PreTranslationService.cs +++ b/src/SIL.XForge.Scripture/Services/PreTranslationService.cs @@ -34,24 +34,11 @@ CancellationToken cancellationToken { List preTranslations = []; - // Load the target project secrets, so we can get the translation engine ID and corpus ID - if (!(await projectSecrets.TryGetAsync(sfProjectId)).TryResult(out SFProjectSecret projectSecret)) - { - throw new DataNotFoundException("The project secret cannot be found."); - } - // Ensure we have the parameters to retrieve the pre-translation - string translationEngineId = projectSecret.ServalData?.PreTranslationEngineId; - string corpusId = projectSecret - .ServalData?.Corpora.FirstOrDefault(c => c.Value.PreTranslate && !c.Value.AlternateTrainingSource) - .Key; - if (string.IsNullOrWhiteSpace(translationEngineId) || string.IsNullOrWhiteSpace(corpusId)) - { - throw new DataNotFoundException("The pre-translation engine is not configured."); - } + (string? translationEngineId, string corpusId, bool useParatextVerseRef) = + await GetPreTranslationParametersAsync(sfProjectId); // Get the pre-translation data from Serval - bool useParatextVerseRef = projectSecret.ServalData.Corpora[corpusId].UploadParatextZipFile; string textId = useParatextVerseRef ? GetTextId(bookNum) : GetTextId(bookNum, chapterNum); foreach ( Pretranslation preTranslation in await translationEnginesClient.GetAllPretranslationsAsync( @@ -195,21 +182,8 @@ public async Task GetPreTranslationUsfmAsync( CancellationToken cancellationToken ) { - // Load the project secrets, so we can get the translation engine ID and corpus ID - if (!(await projectSecrets.TryGetAsync(sfProjectId)).TryResult(out SFProjectSecret projectSecret)) - { - throw new DataNotFoundException("The project secret cannot be found."); - } - // Ensure we have the parameters to retrieve the pre-translation - string translationEngineId = projectSecret.ServalData?.PreTranslationEngineId; - string? corpusId = projectSecret - .ServalData?.Corpora.FirstOrDefault(c => c.Value.PreTranslate && !c.Value.AlternateTrainingSource) - .Key; - if (string.IsNullOrWhiteSpace(translationEngineId) || string.IsNullOrWhiteSpace(corpusId)) - { - throw new DataNotFoundException("The pre-translation engine is not configured."); - } + (string? translationEngineId, string corpusId, bool _) = await GetPreTranslationParametersAsync(sfProjectId); // Get the USFM string usfm = await translationEnginesClient.GetPretranslatedUsfmAsync( @@ -243,12 +217,6 @@ CancellationToken cancellationToken public async Task UpdatePreTranslationStatusAsync(string sfProjectId, CancellationToken cancellationToken) { - // Load the target project secrets, so we can get the translation engine ID and corpus ID - if (!(await projectSecrets.TryGetAsync(sfProjectId)).TryResult(out SFProjectSecret projectSecret)) - { - throw new DataNotFoundException("The project secret cannot be found."); - } - // Load the project from the realtime service await using IConnection conn = await realtimeService.ConnectAsync(); IDocument projectDoc = await conn.FetchAsync(sfProjectId); @@ -258,18 +226,11 @@ public async Task UpdatePreTranslationStatusAsync(string sfProjectId, Cancellati } // Ensure we have the parameters to retrieve the pre-translation - string translationEngineId = projectSecret.ServalData?.PreTranslationEngineId; - string corpusId = projectSecret - .ServalData?.Corpora.FirstOrDefault(c => c.Value.PreTranslate && !c.Value.AlternateTrainingSource) - .Key; - if (string.IsNullOrWhiteSpace(translationEngineId) || string.IsNullOrWhiteSpace(corpusId)) - { - throw new DataNotFoundException("The pre-translation engine is not configured."); - } + (string? translationEngineId, string corpusId, bool useParatextVerseRef) = + await GetPreTranslationParametersAsync(sfProjectId); // Get all the pre-translations and update the chapters Dictionary> bookChapters = []; - bool useParatextVerseRef = projectSecret.ServalData.Corpora[corpusId].UploadParatextZipFile; foreach ( Pretranslation preTranslation in await translationEnginesClient.GetAllPretranslationsAsync( translationEngineId, @@ -351,4 +312,53 @@ await projectDoc.SubmitJson0OpAsync(op => } }); } + + /// + /// Gets the required parameters from the project secret to retrieve the pre-translations. + /// + /// The Scripture Forge project identifier. + /// + /// The translation engine identifier, the corpus identifier, and whether to use Paratext verse references. + /// + /// This can be mocked in unit tests. + /// The pre-translation engine is not configured, or the project secret cannot be found. + protected internal virtual async Task<( + string translationEngineId, + string corpusId, + bool useParatextVerseRef + )> GetPreTranslationParametersAsync(string sfProjectId) + { + // Load the target project secrets, so we can get the translation engine ID and corpus ID + if (!(await projectSecrets.TryGetAsync(sfProjectId)).TryResult(out SFProjectSecret projectSecret)) + { + throw new DataNotFoundException("The project secret cannot be found."); + } + + string translationEngineId = projectSecret.ServalData?.PreTranslationEngineId; + string corpusId; + bool useParatextVerseRef = false; + if (!string.IsNullOrWhiteSpace(projectSecret.ServalData?.ParallelCorpusIdForPreTranslate)) + { + corpusId = projectSecret.ServalData.ParallelCorpusIdForPreTranslate; + useParatextVerseRef = true; + } + else + { + // Legacy Serval Project + corpusId = projectSecret + .ServalData?.Corpora?.FirstOrDefault(c => c.Value.PreTranslate && !c.Value.AlternateTrainingSource) + .Key; + if (!string.IsNullOrWhiteSpace(corpusId)) + { + useParatextVerseRef = projectSecret.ServalData.Corpora[corpusId].UploadParatextZipFile; + } + } + + if (string.IsNullOrWhiteSpace(translationEngineId) || string.IsNullOrWhiteSpace(corpusId)) + { + throw new DataNotFoundException("The pre-translation engine is not configured."); + } + + return (translationEngineId, corpusId, useParatextVerseRef); + } } diff --git a/src/SIL.XForge.Scripture/Services/SFBiblicalTermsText.cs b/src/SIL.XForge.Scripture/Services/SFBiblicalTermsText.cs deleted file mode 100644 index be5daec431..0000000000 --- a/src/SIL.XForge.Scripture/Services/SFBiblicalTermsText.cs +++ /dev/null @@ -1,108 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text.RegularExpressions; -using System.Xml.Linq; -using SIL.XForge.Scripture.Models; - -namespace SIL.XForge.Scripture.Services; - -public class SFBiblicalTermsText : ISFText -{ - private static readonly Regex BracketedTextRegex = new Regex(@"\([^)]*\)", RegexOptions.Compiled); - private static readonly Regex WhitespaceRegex = new Regex(@"\s+", RegexOptions.Compiled); - - public SFBiblicalTermsText(string projectId, IList biblicalTerms) - { - Id = $"{projectId}_biblical_terms"; - Segments = GetSegments(biblicalTerms).OrderBy(s => s.SegmentRef).ToArray(); - } - - public SFBiblicalTermsText(string projectId, XDocument termRenderingsDoc) - { - Id = $"{projectId}_biblical_terms"; - Segments = GetSegments(termRenderingsDoc).OrderBy(s => s.SegmentRef).ToArray(); - } - - public string Id { get; } - - public IEnumerable Segments { get; } - - /// - /// Removes Paratext specific codes from the Biblical Term Rendering. - /// - /// The BT rendering. - /// The cleaned rendering. - /// - /// This method removes text in brackets, asterisks, forward slashes, and normalizes the whitespace. - /// See the Guide in the Edit Biblical Term Rendering dialog in Paratext for details on these codes. - /// - private static string RemoveParatextSyntaxFromRendering(string rendering) - { - rendering = rendering.Replace("*", string.Empty); - rendering = BracketedTextRegex.Replace(rendering, string.Empty); - rendering = rendering.Replace("/", " "); - rendering = WhitespaceRegex.Replace(rendering, " "); - return rendering.Trim(); - } - - private static IEnumerable GetSegments(IList biblicalTerms) - { - if (!biblicalTerms.Any()) - { - yield break; - } - - foreach (BiblicalTerm biblicalTerm in biblicalTerms.OrderBy(t => t.TermId)) - { - foreach (string rendering in biblicalTerm.Renderings.Select(RemoveParatextSyntaxFromRendering)) - { - // Do not add blank renderings - if (string.IsNullOrWhiteSpace(rendering)) - { - continue; - } - - // Sentence placement is not essential for biblical terms. Set all to false - yield return new SFTextSegment([biblicalTerm.TermId], rendering, false, false, false); - } - } - } - - private static IEnumerable GetSegments(XDocument termRenderingsDoc) - { - if (termRenderingsDoc.Root is null) - { - yield break; - } - - foreach ( - XElement termRenderingElem in termRenderingsDoc - .Root.Elements("TermRendering") - .Where(t => !(bool)t.Attribute("Guess")) - .OrderBy(t => t.Attribute("Id")?.Value) - ) - { - string id = termRenderingElem.Attribute("Id")?.Value; - if (string.IsNullOrWhiteSpace(id)) - { - continue; - } - - var renderingsStr = (string?)termRenderingElem.Element("Renderings"); - string[] renderings = renderingsStr?.Trim().Split("||", StringSplitOptions.RemoveEmptyEntries) ?? []; - - foreach (string rendering in renderings.Select(RemoveParatextSyntaxFromRendering)) - { - // Do not add blank renderings - if (string.IsNullOrWhiteSpace(rendering)) - { - continue; - } - - // Sentence placement is not essential for biblical terms. Set all to false - yield return new SFTextSegment([id], rendering, false, false, false); - } - } - } -} diff --git a/src/SIL.XForge.Scripture/Services/SFProjectService.cs b/src/SIL.XForge.Scripture/Services/SFProjectService.cs index fb485e6b19..0ef95e8dbd 100644 --- a/src/SIL.XForge.Scripture/Services/SFProjectService.cs +++ b/src/SIL.XForge.Scripture/Services/SFProjectService.cs @@ -299,18 +299,8 @@ async Task removeSourceReference(string projectId) await RealtimeService.DeleteProjectAsync(projectId); // The machine service requires the project secrets, so call it before removing them - await _machineProjectService.RemoveProjectAsync( - curUserId, - projectId, - preTranslate: false, - CancellationToken.None - ); - await _machineProjectService.RemoveProjectAsync( - curUserId, - projectId, - preTranslate: true, - CancellationToken.None - ); + await _machineProjectService.RemoveProjectAsync(projectId, preTranslate: false, CancellationToken.None); + await _machineProjectService.RemoveProjectAsync(projectId, preTranslate: true, CancellationToken.None); await ProjectSecrets.DeleteAsync(projectId); } @@ -507,7 +497,6 @@ await projectDoc.SubmitJson0OpAsync(op => if (hasExistingMachineProject) { await _machineProjectService.RemoveProjectAsync( - curUserId, projectId, preTranslate: false, CancellationToken.None @@ -516,7 +505,6 @@ await _machineProjectService.RemoveProjectAsync( await EnsureWritingSystemTagIsSetAsync(curUserId, projectDoc, ptProjects); await _machineProjectService.AddProjectAsync( - curUserId, projectId, preTranslate: false, CancellationToken.None @@ -527,7 +515,6 @@ await _machineProjectService.AddProjectAsync( { // translation suggestions was disabled or source project set to null await _machineProjectService.RemoveProjectAsync( - curUserId, projectId, preTranslate: false, CancellationToken.None diff --git a/src/SIL.XForge/Utils/StringUtils.cs b/src/SIL.XForge/Utils/StringUtils.cs index ad1a2438f5..6e2d30cf6c 100644 --- a/src/SIL.XForge/Utils/StringUtils.cs +++ b/src/SIL.XForge/Utils/StringUtils.cs @@ -23,6 +23,14 @@ public static string ComputeMd5Hash(string message) return sb.ToString().ToLower(); } + /// + /// Sanitizes a string for logging. + /// + /// The string value. + /// The string sanitized for logging. + /// This extension method resolves CodeQL cs/log-forging. + public static string Sanitize(this string value) => value.ReplaceLineEndings(string.Empty); + public static string ToCamelCase(this string str) => CamelCaseNamingStrategy.GetPropertyName(str, false); public static bool ValidateId(string id) => ObjectId.TryParse(id, out _); diff --git a/test/SIL.XForge.Scripture.Tests/Services/BuildConfigJsonConverterTests.cs b/test/SIL.XForge.Scripture.Tests/Services/BuildConfigJsonConverterTests.cs index ba1747679f..0f1656469c 100644 --- a/test/SIL.XForge.Scripture.Tests/Services/BuildConfigJsonConverterTests.cs +++ b/test/SIL.XForge.Scripture.Tests/Services/BuildConfigJsonConverterTests.cs @@ -94,7 +94,7 @@ public void WriteJson_Serializes_BuildConfig_WithoutTrainingBooks() var converter = new BuildConfigJsonConverter(); var writer = Substitute.For(); var serializer = Substitute.For(); - var buildConfig = new BuildConfig { ProjectId = Project01, TranslationBooks = [1, 2, 3], }; + var buildConfig = new BuildConfig { ProjectId = Project01, TranslationBooks = [1, 2, 3] }; // SUT converter.WriteJson(writer, buildConfig, serializer); @@ -114,7 +114,7 @@ public void WriteJson_Serializes_BuildConfig_WithoutTranslationBooks() var converter = new BuildConfigJsonConverter(); var writer = Substitute.For(); var serializer = Substitute.For(); - var buildConfig = new BuildConfig { ProjectId = Project01, TrainingBooks = [1, 2, 3], }; + var buildConfig = new BuildConfig { ProjectId = Project01, TrainingBooks = [1, 2, 3] }; // SUT converter.WriteJson(writer, buildConfig, serializer); @@ -134,7 +134,7 @@ public void WriteJson_Serializes_BuildConfig_TrainingDataFiles() var converter = new BuildConfigJsonConverter(); var writer = Substitute.For(); var serializer = Substitute.For(); - var buildConfig = new BuildConfig { TrainingDataFiles = [Data01, Data02], }; + var buildConfig = new BuildConfig { TrainingDataFiles = [Data01, Data02] }; // SUT converter.WriteJson(writer, buildConfig, serializer); @@ -151,7 +151,7 @@ public void WriteJson_Serializes_BuildConfig_TrainingScriptureRange() var converter = new BuildConfigJsonConverter(); var writer = Substitute.For(); var serializer = Substitute.For(); - var buildConfig = new BuildConfig { TrainingScriptureRange = "MAT;MRK1-2,4", }; + var buildConfig = new BuildConfig { TrainingScriptureRange = "MAT;MRK1-2,4" }; // SUT converter.WriteJson(writer, buildConfig, serializer); @@ -168,14 +168,57 @@ public void WriteJson_Serializes_BuildConfig_TranslationScriptureRange() var converter = new BuildConfigJsonConverter(); var writer = Substitute.For(); var serializer = Substitute.For(); - var buildConfig = new BuildConfig { TrainingScriptureRange = "JHN", }; + var buildConfig = new BuildConfig { TranslationScriptureRange = "JHN" }; // SUT converter.WriteJson(writer, buildConfig, serializer); writer.Received().WriteStartObject(); - writer.Received().WritePropertyName(nameof(buildConfig.TrainingScriptureRange)); - serializer.Received().Serialize(writer, buildConfig.TrainingScriptureRange); + writer.Received().WritePropertyName(nameof(buildConfig.TranslationScriptureRange)); + serializer.Received().Serialize(writer, buildConfig.TranslationScriptureRange); + writer.Received().WriteEndObject(); + } + + [Test] + public void WriteJson_Serializes_BuildConfig_TrainingScriptureRanges() + { + var converter = new BuildConfigJsonConverter(); + var writer = Substitute.For(); + var serializer = Substitute.For(); + var buildConfig = new BuildConfig + { + TrainingScriptureRanges = [new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "MAT;MRK" }], + }; + + // SUT + converter.WriteJson(writer, buildConfig, serializer); + + writer.Received().WriteStartObject(); + writer.Received().WritePropertyName(nameof(buildConfig.TrainingScriptureRanges)); + serializer.Received().Serialize(writer, buildConfig.TrainingScriptureRanges); + writer.Received().WriteEndObject(); + } + + [Test] + public void WriteJson_Serializes_BuildConfig_TranslationScriptureRanges() + { + var converter = new BuildConfigJsonConverter(); + var writer = Substitute.For(); + var serializer = Substitute.For(); + var buildConfig = new BuildConfig + { + TranslationScriptureRanges = + [ + new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "MAT;MRK" }, + ], + }; + + // SUT + converter.WriteJson(writer, buildConfig, serializer); + + writer.Received().WriteStartObject(); + writer.Received().WritePropertyName(nameof(buildConfig.TranslationScriptureRanges)); + serializer.Received().Serialize(writer, buildConfig.TranslationScriptureRanges); writer.Received().WriteEndObject(); } @@ -213,8 +256,14 @@ public void ReadJson_Deserializes_JSON_String() public void ReadJson_Deserializes_JSON_Object() { var converter = new BuildConfigJsonConverter(); - const string jsonString = - $"{{\"{nameof(BuildConfig.ProjectId)}\":\"{Project01}\",\"{nameof(BuildConfig.TrainingBooks)}\":[1,2,3],\"{nameof(BuildConfig.TranslationBooks)}\":[4,5,6],\"{nameof(BuildConfig.FastTraining)}\":true}}"; + const string jsonString = $$""" + { + "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}", + "{{nameof(BuildConfig.TrainingBooks)}}":[1,2,3], + "{{nameof(BuildConfig.TranslationBooks)}}":[4,5,6], + "{{nameof(BuildConfig.FastTraining)}}":true + } + """; using var stringReader = new StringReader(jsonString); using var reader = new JsonTextReader(stringReader); var serializer = new JsonSerializer(); @@ -234,8 +283,13 @@ public void ReadJson_Deserializes_JSON_Object() public void ReadJson_Deserializes_JSON_Object_WithoutFastConfig() { var converter = new BuildConfigJsonConverter(); - const string jsonString = - $"{{\"{nameof(BuildConfig.ProjectId)}\":\"{Project01}\",\"{nameof(BuildConfig.TrainingBooks)}\":[1,2,3],\"{nameof(BuildConfig.TranslationBooks)}\":[4,5,6]}}"; + const string jsonString = $$""" + { + "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}", + "{{nameof(BuildConfig.TrainingBooks)}}":[1,2,3], + "{{nameof(BuildConfig.TranslationBooks)}}":[4,5,6] + } + """; using var stringReader = new StringReader(jsonString); using var reader = new JsonTextReader(stringReader); var serializer = new JsonSerializer(); @@ -255,8 +309,12 @@ public void ReadJson_Deserializes_JSON_Object_WithoutFastConfig() public void ReadJson_Deserializes_JSON_Object_TrainingDataFiles() { var converter = new BuildConfigJsonConverter(); - const string jsonString = - $"{{\"{nameof(BuildConfig.ProjectId)}\":\"{Project01}\",\"{nameof(BuildConfig.TrainingDataFiles)}\":[\"{Data01}\",\"{Data02}\"]}}"; + const string jsonString = $$""" + { + "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}", + "{{nameof(BuildConfig.TrainingDataFiles)}}":["{{Data01}}","{{Data02}}"] + } + """; using var stringReader = new StringReader(jsonString); using var reader = new JsonTextReader(stringReader); var serializer = new JsonSerializer(); @@ -276,8 +334,12 @@ public void ReadJson_Deserializes_JSON_Object_TrainingScriptureRange() { var converter = new BuildConfigJsonConverter(); const string scriptureRange = "MAT;MRK1-2,4"; - const string jsonString = - $"{{\"{nameof(BuildConfig.ProjectId)}\":\"{Project01}\",\"{nameof(BuildConfig.TrainingScriptureRange)}\":\"{scriptureRange}\"}}"; + const string jsonString = $$""" + { + "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}", + "{{nameof(BuildConfig.TrainingScriptureRange)}}":"{{scriptureRange}}" + } + """; using var stringReader = new StringReader(jsonString); using var reader = new JsonTextReader(stringReader); var serializer = new JsonSerializer(); @@ -288,7 +350,7 @@ public void ReadJson_Deserializes_JSON_Object_TrainingScriptureRange() Assert.IsNotNull(buildConfig); Assert.IsInstanceOf(buildConfig); Assert.IsFalse(buildConfig!.FastTraining); - CollectionAssert.AreEqual(scriptureRange, buildConfig.TrainingScriptureRange); + Assert.AreEqual(scriptureRange, buildConfig.TrainingScriptureRange); Assert.AreEqual(Project01, buildConfig.ProjectId); } @@ -297,8 +359,80 @@ public void ReadJson_Deserializes_JSON_Object_TranslationScriptureRange() { var converter = new BuildConfigJsonConverter(); const string scriptureRange = "JHN"; - const string jsonString = - $"{{\"{nameof(BuildConfig.ProjectId)}\":\"{Project01}\",\"{nameof(BuildConfig.TranslationScriptureRange)}\":\"{scriptureRange}\"}}"; + const string jsonString = $$""" + { + "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}", + "{{nameof(BuildConfig.TranslationScriptureRange)}}":"{{scriptureRange}}" + } + """; + using var stringReader = new StringReader(jsonString); + using var reader = new JsonTextReader(stringReader); + var serializer = new JsonSerializer(); + + // SUT + var buildConfig = converter.ReadJson(reader, typeof(BuildConfig), null, false, serializer); + + Assert.IsNotNull(buildConfig); + Assert.IsInstanceOf(buildConfig); + Assert.IsFalse(buildConfig!.FastTraining); + Assert.AreEqual(scriptureRange, buildConfig.TranslationScriptureRange); + Assert.AreEqual(Project01, buildConfig.ProjectId); + } + + [Test] + public void ReadJson_Deserializes_JSON_Object_TrainingScriptureRanges() + { + var converter = new BuildConfigJsonConverter(); + const string scriptureRange = "JHN"; + const string jsonString = $$""" + { + "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}", + "{{nameof(BuildConfig.TrainingScriptureRanges)}}": + [ + { + "{{nameof(ProjectScriptureRange.ProjectId)}}":"{{Project01}}", + "{{nameof(ProjectScriptureRange.ScriptureRange)}}":"{{scriptureRange}}" + } + ] + } + """; + using var stringReader = new StringReader(jsonString); + using var reader = new JsonTextReader(stringReader); + var serializer = new JsonSerializer(); + + // SUT + var buildConfig = converter.ReadJson(reader, typeof(BuildConfig), null, false, serializer); + + Assert.IsNotNull(buildConfig); + Assert.IsInstanceOf(buildConfig); + Assert.IsFalse(buildConfig!.FastTraining); + CollectionAssert.AreEqual( + new List + { + new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = scriptureRange }, + }, + buildConfig.TrainingScriptureRanges + ); + Assert.AreEqual(Project01, buildConfig.ProjectId); + } + + [Test] + public void ReadJson_Deserializes_JSON_Object_TranslationScriptureRanges() + { + var converter = new BuildConfigJsonConverter(); + const string scriptureRange = "JHN"; + const string jsonString = $$""" + { + "{{nameof(BuildConfig.ProjectId)}}":"{{Project01}}", + "{{nameof(BuildConfig.TranslationScriptureRanges)}}": + [ + { + "{{nameof(ProjectScriptureRange.ProjectId)}}":"{{Project01}}", + "{{nameof(ProjectScriptureRange.ScriptureRange)}}":"{{scriptureRange}}" + } + ] + } + """; using var stringReader = new StringReader(jsonString); using var reader = new JsonTextReader(stringReader); var serializer = new JsonSerializer(); @@ -309,7 +443,13 @@ public void ReadJson_Deserializes_JSON_Object_TranslationScriptureRange() Assert.IsNotNull(buildConfig); Assert.IsInstanceOf(buildConfig); Assert.IsFalse(buildConfig!.FastTraining); - CollectionAssert.AreEqual(scriptureRange, buildConfig.TranslationScriptureRange); + CollectionAssert.AreEqual( + new List + { + new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = scriptureRange }, + }, + buildConfig.TranslationScriptureRanges + ); Assert.AreEqual(Project01, buildConfig.ProjectId); } } diff --git a/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs b/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs index 7a606a431b..c69a96f3fb 100644 --- a/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs +++ b/test/SIL.XForge.Scripture.Tests/Services/MachineApiServiceTests.cs @@ -409,6 +409,10 @@ public async Task GetBuildAsync_IncludesAdditionalInfo() const string engineId = "engineId1"; const string corpusId1 = "corpusId1"; const string corpusId2 = "corpusId2"; + const string corpusId3 = "corpusId3"; + const string corpusId4 = "corpusId4"; + const string parallelCorpusId1 = "parallelCorpusId1"; + const string parallelCorpusId2 = "parallelCorpusId2"; const int step = 123; env.TranslationEnginesClient.GetBuildAsync( TranslationEngine01, @@ -434,12 +438,62 @@ public async Task GetBuildAsync_IncludesAdditionalInfo() [ new PretranslateCorpus { - Corpus = new ResourceLink { Id = corpusId1, Url = "https://example.com" }, + ParallelCorpus = new ResourceLink + { + Id = parallelCorpusId1, + Url = "https://example.com", + }, }, new PretranslateCorpus { - Corpus = new ResourceLink { Id = corpusId2, Url = "https://example.com" }, + ParallelCorpus = new ResourceLink + { + Id = parallelCorpusId2, + Url = "https://example.com", + }, }, + new PretranslateCorpus + { + SourceFilters = + [ + new ParallelCorpusFilter + { + Corpus = new ResourceLink { Id = corpusId1, Url = "https://example.com" }, + }, + new ParallelCorpusFilter + { + Corpus = new ResourceLink { Id = corpusId2, Url = "https://example.com" }, + }, + ], + }, + // Invalid corpus format + new PretranslateCorpus(), + ], + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpus = new ResourceLink { Id = corpusId3, Url = "https://example.com" }, + }, + new TrainingCorpus + { + SourceFilters = + [ + new ParallelCorpusFilter + { + Corpus = new ResourceLink { Id = corpusId3, Url = "https://example.com" }, + }, + ], + TargetFilters = + [ + new ParallelCorpusFilter + { + Corpus = new ResourceLink { Id = corpusId4, Url = "https://example.com" }, + }, + ], + }, + // Invalid corpus format + new TrainingCorpus(), ], } ) @@ -472,9 +526,14 @@ public async Task GetBuildAsync_IncludesAdditionalInfo() Assert.AreEqual(step, actual.AdditionalInfo.Step); Assert.AreEqual(engineId, actual.AdditionalInfo.TranslationEngineId); Assert.IsNotNull(actual.AdditionalInfo.CorporaIds); - Assert.AreEqual(2, actual.AdditionalInfo.CorporaIds.Count()); - Assert.AreEqual(corpusId1, actual.AdditionalInfo.CorporaIds.First()); - Assert.AreEqual(corpusId2, actual.AdditionalInfo.CorporaIds.Last()); + Assert.AreEqual(4, actual.AdditionalInfo.CorporaIds.Count()); + Assert.AreEqual(corpusId1, actual.AdditionalInfo.CorporaIds.ElementAt(0)); + Assert.AreEqual(corpusId2, actual.AdditionalInfo.CorporaIds.ElementAt(1)); + Assert.AreEqual(corpusId3, actual.AdditionalInfo.CorporaIds.ElementAt(2)); + Assert.AreEqual(corpusId4, actual.AdditionalInfo.CorporaIds.ElementAt(3)); + Assert.IsNotNull(actual.AdditionalInfo.ParallelCorporaIds); + Assert.AreEqual(parallelCorpusId1, actual.AdditionalInfo.ParallelCorporaIds.ElementAt(0)); + Assert.AreEqual(parallelCorpusId2, actual.AdditionalInfo.ParallelCorporaIds.ElementAt(1)); } [Test] @@ -1080,7 +1139,7 @@ public async Task GetPreTranslationAsync_Success() Task.FromResult( new PreTranslation[] { - new PreTranslation { Reference = reference, Translation = translation, }, + new PreTranslation { Reference = reference, Translation = translation }, } ) ); @@ -1592,7 +1651,7 @@ public async Task IsLanguageSupportedAsync_LanguageSupported() { EngineType = MachineProjectService.Nmt, InternalCode = internalCode, - IsNative = true + IsNative = true, } ) ); @@ -1760,6 +1819,56 @@ public void StartPreTranslationBuildAsync_DoNotAllowTrainingScriptureRangeWithTr ); } + [Test] + public void StartPreTranslationBuildAsync_DoNotAllowTrainingScriptureRangesWithTrainingScriptureRange() + { + // Set up test environment + var env = new TestEnvironment(); + + // SUT + Assert.ThrowsAsync( + () => + env.Service.StartPreTranslationBuildAsync( + User01, + new BuildConfig + { + ProjectId = Project01, + TrainingScriptureRange = "GEN", + TrainingScriptureRanges = + [ + new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "GEN" }, + ], + }, + CancellationToken.None + ) + ); + } + + [Test] + public void StartPreTranslationBuildAsync_DoNotAllowTrainingScriptureRangesWithTrainingBooks() + { + // Set up test environment + var env = new TestEnvironment(); + + // SUT + Assert.ThrowsAsync( + () => + env.Service.StartPreTranslationBuildAsync( + User01, + new BuildConfig + { + ProjectId = Project01, + TrainingBooks = [1], + TrainingScriptureRanges = + [ + new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "GEN" }, + ], + }, + CancellationToken.None + ) + ); + } + [Test] public void StartPreTranslationBuildAsync_DoNotAllowTranslationScriptureRangeWithTranslationBooks() { @@ -1782,6 +1891,56 @@ public void StartPreTranslationBuildAsync_DoNotAllowTranslationScriptureRangeWit ); } + [Test] + public void StartPreTranslationBuildAsync_DoNotAllowTranslationScriptureRangesWithTranslationScriptureRange() + { + // Set up test environment + var env = new TestEnvironment(); + + // SUT + Assert.ThrowsAsync( + () => + env.Service.StartPreTranslationBuildAsync( + User01, + new BuildConfig + { + ProjectId = Project01, + TranslationScriptureRange = "GEN", + TranslationScriptureRanges = + [ + new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "GEN" }, + ], + }, + CancellationToken.None + ) + ); + } + + [Test] + public void StartPreTranslationBuildAsync_DoNotAllowTranslationScriptureRangesWithTranslationBooks() + { + // Set up test environment + var env = new TestEnvironment(); + + // SUT + Assert.ThrowsAsync( + () => + env.Service.StartPreTranslationBuildAsync( + User01, + new BuildConfig + { + ProjectId = Project01, + TranslationBooks = [1], + TranslationScriptureRanges = + [ + new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = "GEN" }, + ], + }, + CancellationToken.None + ) + ); + } + [Test] public async Task StartPreTranslationBuildAsync_SuccessNoTrainingOrTranslationBooks() { @@ -1871,6 +2030,71 @@ await env.Service.StartPreTranslationBuildAsync( ); } + [Test] + public async Task StartPreTranslationBuildAsync_SuccessWithTrainingAndTranslationScriptureRanges() + { + // Set up test environment + var env = new TestEnvironment(); + const string scriptureRange1 = "GEN"; + const string scriptureRange2 = "EXO"; + + // SUT + await env.Service.StartPreTranslationBuildAsync( + User01, + new BuildConfig + { + ProjectId = Project01, + TrainingScriptureRanges = + [ + new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = scriptureRange1 }, + ], + TranslationScriptureRanges = + [ + new ProjectScriptureRange { ProjectId = Project02, ScriptureRange = scriptureRange2 }, + ], + }, + CancellationToken.None + ); + + await env.ProjectService.Received(1).SyncAsync(User01, Project01); + env.BackgroundJobClient.Received(1).Create(Arg.Any(), Arg.Any()); + Assert.AreEqual(JobId, env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationJobId); + Assert.IsNotNull(env.ProjectSecrets.Get(Project01).ServalData?.PreTranslationQueuedAt); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData?.PreTranslationErrorMessage); + Assert.AreEqual( + 1, + env.Projects.Get(Project01).TranslateConfig.DraftConfig.LastSelectedTrainingScriptureRanges.Count + ); + Assert.AreEqual( + Project01, + env.Projects.Get(Project01) + .TranslateConfig.DraftConfig.LastSelectedTrainingScriptureRanges.First() + .ProjectId + ); + Assert.AreEqual( + scriptureRange1, + env.Projects.Get(Project01) + .TranslateConfig.DraftConfig.LastSelectedTrainingScriptureRanges.First() + .ScriptureRange + ); + Assert.AreEqual( + 1, + env.Projects.Get(Project01).TranslateConfig.DraftConfig.LastSelectedTranslationScriptureRanges.Count + ); + Assert.AreEqual( + Project02, + env.Projects.Get(Project01) + .TranslateConfig.DraftConfig.LastSelectedTranslationScriptureRanges.First() + .ProjectId + ); + Assert.AreEqual( + scriptureRange2, + env.Projects.Get(Project01) + .TranslateConfig.DraftConfig.LastSelectedTranslationScriptureRanges.First() + .ScriptureRange + ); + } + [Test] public async Task StartPreTranslationBuildAsync_AlternateTrainingSource() { @@ -2207,14 +2431,6 @@ public TestEnvironment() MachineProjectService .GetTranslationEngineTypeAsync(preTranslate: true) .Returns(Task.FromResult(Services.MachineProjectService.Nmt)); - MachineProjectService - .TranslationEngineExistsAsync( - Project01, - TranslationEngine01, - preTranslate: false, - CancellationToken.None - ) - .Returns(Task.FromResult(true)); MockLogger = new MockLogger(); ParatextService = Substitute.For(); PreTranslationService = Substitute.For(); diff --git a/test/SIL.XForge.Scripture.Tests/Services/MachineProjectServiceTests.cs b/test/SIL.XForge.Scripture.Tests/Services/MachineProjectServiceTests.cs index 07cf3bd838..f57b763196 100644 --- a/test/SIL.XForge.Scripture.Tests/Services/MachineProjectServiceTests.cs +++ b/test/SIL.XForge.Scripture.Tests/Services/MachineProjectServiceTests.cs @@ -12,16 +12,18 @@ using Newtonsoft.Json.Linq; using NSubstitute; using NSubstitute.ExceptionExtensions; +using NSubstitute.Extensions; using NUnit.Framework; -using Polly.CircuitBreaker; using Serval.Client; using SIL.XForge.Configuration; using SIL.XForge.DataAccess; using SIL.XForge.Models; using SIL.XForge.Realtime; +using SIL.XForge.Realtime.Json0; using SIL.XForge.Scripture.Models; using SIL.XForge.Scripture.Realtime; using SIL.XForge.Services; +using SIL.XForge.Utils; namespace SIL.XForge.Scripture.Services; @@ -31,20 +33,44 @@ public class MachineProjectServiceTests private const string Paratext01 = "paratext01"; private const string Paratext02 = "paratext02"; private const string Paratext03 = "paratext03"; + private const string Paratext04 = "paratext04"; + private const string Paratext05 = "paratext05"; private const string Project01 = "project01"; private const string Project02 = "project02"; private const string Project03 = "project03"; + private const string Project04 = "project04"; + private const string Project05 = "project05"; private const string User01 = "user01"; private const string Corpus01 = "corpus01"; private const string Corpus02 = "corpus02"; private const string Corpus03 = "corpus03"; + private const string Corpus04 = "corpus04"; private const string Data01 = "data01"; private const string File01 = "file01"; private const string File02 = "file02"; + private const string File03 = "file03"; + private const string File04 = "file04"; + private const string File05 = "file05"; + private const string File06 = "file06"; + private const string Job01 = "job01"; + private const string ParallelCorpus01 = "parallelCorpus01"; + private const string ParallelCorpus02 = "parallelCorpus02"; + private const string ParallelCorpus03 = "parallelCorpus03"; private const string TranslationEngine01 = "translationEngine01"; private const string TranslationEngine02 = "translationEngine02"; private const string LanguageTag = "he"; + [Test] + public async Task AddProjectAsync_DoesNotCreateIfLanguageMissing() + { + // Set up test environment + var env = new TestEnvironment(); + + // SUT + string actual = await env.Service.AddProjectAsync(Project03, preTranslate: false, CancellationToken.None); + Assert.IsEmpty(actual); + } + [Test] public void AddProjectAsync_ThrowsExceptionWhenProjectSecretMissing() { @@ -53,292 +79,464 @@ public void AddProjectAsync_ThrowsExceptionWhenProjectSecretMissing() // SUT Assert.ThrowsAsync( - () => env.Service.AddProjectAsync(User01, "invalid_project_id", preTranslate: false, CancellationToken.None) + () => env.Service.AddProjectAsync("invalid_project_id", preTranslate: false, CancellationToken.None) ); } [Test] - public async Task AddProjectAsync_DoesNotCreateIfLanguageMissing() + public async Task AddProjectAsync_Success() { // Set up test environment var env = new TestEnvironment(); + env.Service.Configure() + .CreateServalProjectAsync(Arg.Any(), preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(TranslationEngine01)); // SUT - string actual = await env.Service.AddProjectAsync( - User01, - Project03, - preTranslate: false, - CancellationToken.None - ); - Assert.IsEmpty(actual); + string actual = await env.Service.AddProjectAsync(Project01, preTranslate: true, CancellationToken.None); + Assert.AreEqual(TranslationEngine01, actual); } [Test] - public async Task BuildProjectAsync_UsesTheUpdatedLearningRateForServal() + public async Task BuildProjectForBackgroundJobAsync_DoesNotRecordBuildInProgressErrors() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); - env.FeatureManager.IsEnabledAsync(FeatureFlags.UpdatedLearningRateForServal).Returns(Task.FromResult(true)); + var env = new TestEnvironment(); + ServalApiException ex = ServalApiExceptions.BuildInProgress; + var buildConfig = new BuildConfig { ProjectId = Project01 }; + env.Service.Configure() + .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None) + .ThrowsAsync(ex); + + // A pre-translation job has been queued + await env.SetupProjectSecretAsync( + Project01, + new ServalData { PreTranslationJobId = Job01, PreTranslationQueuedAt = DateTime.UtcNow } + ); // SUT - await env.Service.BuildProjectAsync( + await env.Service.BuildProjectForBackgroundJobAsync( User01, - new BuildConfig { ProjectId = Project01, FastTraining = true }, + buildConfig, preTranslate: true, CancellationToken.None ); - await env - .TranslationEnginesClient.Received() - .StartBuildAsync( - TranslationEngine01, - Arg.Is(b => ((int)((JObject)b.Options)["train_params"]["max_steps"]) == 5000), - CancellationToken.None - ); + env.MockLogger.AssertNoEvent(logEvent => logEvent.Exception == ex); + env.ExceptionHandler.DidNotReceiveWithAnyArgs().ReportException(ex); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationErrorMessage); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationJobId); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationQueuedAt); } [Test] - public void BuildProjectAsync_ThrowsExceptionWhenProjectSecretMissing() + public async Task BuildProjectForBackgroundJobAsync_DoesNotRecordBuildInProgressErrorsForSmt() { // Set up test environment var env = new TestEnvironment(); + ServalApiException ex = ServalApiExceptions.BuildInProgress; + var buildConfig = new BuildConfig { ProjectId = Project01 }; + env.Service.Configure() + .BuildProjectAsync(User01, buildConfig, preTranslate: false, CancellationToken.None) + .ThrowsAsync(ex); + + // An SMT translation job has been queued + await env.SetupProjectSecretAsync( + Project01, + new ServalData { TranslationJobId = Job01, TranslationQueuedAt = DateTime.UtcNow } + ); // SUT - Assert.ThrowsAsync( - () => - env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = "invalid_project_id" }, - preTranslate: false, - CancellationToken.None - ) + await env.Service.BuildProjectForBackgroundJobAsync( + User01, + buildConfig, + preTranslate: false, + CancellationToken.None ); + + env.MockLogger.AssertNoEvent(logEvent => logEvent.Exception == ex); + env.ExceptionHandler.DidNotReceiveWithAnyArgs().ReportException(ex); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationErrorMessage); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationJobId); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationQueuedAt); } [Test] - public async Task BuildProjectAsync_ThrowsExceptionWhenProjectMissing() + public async Task BuildProjectForBackgroundJobAsync_DoesNotRecordTaskCancellation() { // Set up test environment var env = new TestEnvironment(); - await env.Projects.DeleteAllAsync(_ => true); + var ex = new TaskCanceledException(); + var buildConfig = new BuildConfig { ProjectId = Project01 }; + env.Service.Configure() + .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None) + .ThrowsAsync(ex); + + // A pre-translation job has been queued + await env.SetupProjectSecretAsync(Project01, new ServalData { PreTranslationQueuedAt = DateTime.UtcNow }); // SUT - Assert.ThrowsAsync( - () => - env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: false, - CancellationToken.None - ) + await env.Service.BuildProjectForBackgroundJobAsync( + User01, + buildConfig, + preTranslate: true, + CancellationToken.None ); + + env.ExceptionHandler.DidNotReceive().ReportException(Arg.Any()); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationQueuedAt); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationErrorMessage); } [Test] - public async Task BuildProjectAsync_ThrowsExceptionWhenSourceMissing() + public async Task BuildProjectForBackgroundJobAsync_DoesNotRecordTaskCancellationForSmt() { // Set up test environment var env = new TestEnvironment(); - await env.Projects.UpdateAsync(Project01, op => op.Unset(p => p.TranslateConfig.Source)); + var ex = new TaskCanceledException(); + var buildConfig = new BuildConfig { ProjectId = Project01 }; + env.Service.Configure() + .BuildProjectAsync(User01, buildConfig, preTranslate: false, CancellationToken.None) + .ThrowsAsync(ex); + + // An SMT translation job has been queued + await env.SetupProjectSecretAsync(Project01, new ServalData { TranslationQueuedAt = DateTime.UtcNow }); // SUT - Assert.ThrowsAsync( - () => - env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: false, - CancellationToken.None - ) + await env.Service.BuildProjectForBackgroundJobAsync( + User01, + buildConfig, + preTranslate: false, + CancellationToken.None ); + + env.ExceptionHandler.DidNotReceive().ReportException(Arg.Any()); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationQueuedAt); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationErrorMessage); } [Test] - public async Task BuildProjectAsync_ThrowsExceptionWhenSourceRemoved() + public async Task BuildProjectForBackgroundJobAsync_RecordsDataNotFoundExceptionAsWarning() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { HasTranslationEngineForSmt = true }); - await env.Projects.UpdateAsync(Project02, op => op.Unset(p => p.TranslateConfig.Source)); + var env = new TestEnvironment(); + var ex = new DataNotFoundException("Not Found"); + var buildConfig = new BuildConfig { ProjectId = Project01 }; + env.Service.Configure() + .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None) + .ThrowsAsync(ex); // SUT - Assert.ThrowsAsync( - () => - env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: false, - CancellationToken.None - ) + await env.Service.BuildProjectForBackgroundJobAsync( + User01, + buildConfig, + preTranslate: true, + CancellationToken.None ); + + env.MockLogger.AssertHasEvent(logEvent => logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Warning); + env.ExceptionHandler.DidNotReceive().ReportException(Arg.Any()); } [Test] - public async Task BuildProjectAsync_CallsServalIfTranslationEngineIdPresent() + public async Task BuildProjectForBackgroundJobAsync_RecordsErrors() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { HasTranslationEngineForSmt = true }); + var env = new TestEnvironment(); + ServalApiException ex = ServalApiExceptions.Forbidden; + var buildConfig = new BuildConfig { ProjectId = Project01 }; + env.Service.Configure() + .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None) + .ThrowsAsync(ex); + + // A pre-translation job has been queued + await env.SetupProjectSecretAsync( + Project01, + new ServalData { PreTranslationJobId = Job01, PreTranslationQueuedAt = DateTime.UtcNow } + ); // SUT - await env.Service.BuildProjectAsync( + await env.Service.BuildProjectForBackgroundJobAsync( User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: false, + buildConfig, + preTranslate: true, CancellationToken.None ); - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(TranslationEngine02, Arg.Any(), CancellationToken.None); + env.MockLogger.AssertHasEvent(logEvent => logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Error); + env.ExceptionHandler.Received(1).ReportException(ex); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationJobId); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationQueuedAt); + Assert.AreEqual(ex.Message, env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationErrorMessage); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationErrorMessage); } [Test] - public async Task BuildProjectAsync_BuildsPreTranslationProjects() + public async Task BuildProjectForBackgroundJobAsync_RecordsErrorsForSmt() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); + var env = new TestEnvironment(); + ServalApiException ex = ServalApiExceptions.Forbidden; + var buildConfig = new BuildConfig { ProjectId = Project01 }; + env.Service.Configure() + .BuildProjectAsync(User01, buildConfig, preTranslate: false, CancellationToken.None) + .ThrowsAsync(ex); + + // An SMT translation job has been queued + await env.SetupProjectSecretAsync( + Project01, + new ServalData { TranslationJobId = Job01, TranslationQueuedAt = DateTime.UtcNow } + ); // SUT - await env.Service.BuildProjectAsync( + await env.Service.BuildProjectForBackgroundJobAsync( User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: true, + buildConfig, + preTranslate: false, CancellationToken.None ); - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None); + env.MockLogger.AssertHasEvent(logEvent => logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Error); + env.ExceptionHandler.Received(1).ReportException(ex); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationErrorMessage); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationJobId); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationQueuedAt); + Assert.AreEqual(ex.Message, env.ProjectSecrets.Get(Project01).ServalData!.TranslationErrorMessage); } [Test] - public async Task BuildProjectAsync_SendsAdditionalTrainingData() + public async Task BuildProjectForBackgroundJobAsync_RunsBuildProjectAsync() { // Set up test environment var env = new TestEnvironment(); - await env.SetupTrainingDataAsync(Project01); + var buildConfig = new BuildConfig { ProjectId = Project01 }; + env.Service.Configure() + .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None) + .Returns(Task.CompletedTask); // SUT - await env.Service.BuildProjectAsync( + await env.Service.BuildProjectForBackgroundJobAsync( User01, - new BuildConfig { ProjectId = Project01, TrainingDataFiles = { Data01 } }, + buildConfig, preTranslate: true, CancellationToken.None ); - // Ensure that the additional texts were retrieved await env - .TrainingDataService.Received() - .GetTextsAsync( + .Service.Received(1) + .BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None); + } + + [Test] + public async Task BuildProjectAsync_PreTranslationBuild() + { + // Set up test environment + var env = new TestEnvironment(); + var servalData = new ServalData + { + PreTranslationEngineId = TranslationEngine01, + PreTranslationJobId = Job01, + PreTranslationQueuedAt = DateTime.UtcNow, + }; + await env.SetupProjectSecretAsync(Project01, servalData); + var buildConfig = new BuildConfig { ProjectId = Project01 }; + env.Service.Configure() + .RemoveLegacyServalDataAsync(Project01, preTranslate: true, CancellationToken.None) + .Returns(Task.CompletedTask); + env.Service.Configure() + .EnsureTranslationEngineExistsAsync( User01, - Project01, - Arg.Is>(d => d.Contains(Data01)), - Arg.Any>(), - Arg.Any>() - ); + Arg.Any>(), + Arg.Any(), + preTranslate: true, + CancellationToken.None + ) + .Returns(Task.FromResult(TranslationEngine01)); + env.Service.Configure() + .RecreateTranslationEngineIfRequiredAsync( + TranslationEngine01, + Arg.Any(), + preTranslate: true, + CancellationToken.None + ) + .Returns(Task.CompletedTask); + env.Service.Configure() + .SyncProjectCorporaAsync(User01, buildConfig, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult>([])); + var translationBuildConfig = new TranslationBuildConfig(); + env.Service.Configure() + .GetTranslationBuildConfig( + Arg.Any(), + servalConfig: null, + buildConfig, + Arg.Any>() + ) + .Returns(translationBuildConfig); - // Ensure that the additional files corpus was synced, and the build started - await env - .TranslationEnginesClient.Received() - .AddCorpusAsync(Arg.Any(), Arg.Any(), CancellationToken.None); - Assert.IsNotEmpty( - env.ProjectSecrets.Get(Project01) - .ServalData!.Corpora.First(c => c.Value.PreTranslate && c.Value.AdditionalTrainingData) - .Key - ); + // SUT + await env.Service.BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationJobId); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationQueuedAt); await env - .TranslationEnginesClient.Received() - .StartBuildAsync( - Arg.Any(), - Arg.Is(b => b.TrainOn == null), + .TranslationEnginesClient.Received(1) + .StartBuildAsync(TranslationEngine01, translationBuildConfig, CancellationToken.None); + } + + [Test] + public async Task BuildProjectAsync_SmtTranslationBuild() + { + // Set up test environment + var env = new TestEnvironment(); + var servalData = new ServalData + { + TranslationEngineId = TranslationEngine01, + TranslationJobId = Job01, + TranslationQueuedAt = DateTime.UtcNow, + }; + await env.SetupProjectSecretAsync(Project01, servalData); + var buildConfig = new BuildConfig { ProjectId = Project01 }; + env.Service.Configure() + .RemoveLegacyServalDataAsync(Project01, preTranslate: false, CancellationToken.None) + .Returns(Task.CompletedTask); + env.Service.Configure() + .EnsureTranslationEngineExistsAsync( + User01, + Arg.Any>(), + Arg.Any(), + preTranslate: false, CancellationToken.None - ); + ) + .Returns(Task.FromResult(TranslationEngine01)); + env.Service.Configure() + .RecreateTranslationEngineIfRequiredAsync( + TranslationEngine01, + Arg.Any(), + preTranslate: false, + CancellationToken.None + ) + .Returns(Task.CompletedTask); + env.Service.Configure() + .SyncProjectCorporaAsync(User01, buildConfig, preTranslate: false, CancellationToken.None) + .Returns(Task.FromResult>([])); + + // SUT + await env.Service.BuildProjectAsync(User01, buildConfig, preTranslate: false, CancellationToken.None); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationJobId); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationQueuedAt); + await env + .TranslationEnginesClient.Received(1) + .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None); } [Test] - public async Task BuildProjectAsync_SendsAdditionalTrainingDataWhenFilesPreviouslyUploaded() + public async Task BuildProjectAsync_ThrowsExceptionWhenProjectMissing() { // Set up test environment var env = new TestEnvironment(); - await env.SetupTrainingDataAsync(Project02, existingData: true); + await env.Projects.DeleteAllAsync(_ => true); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project02, TrainingDataFiles = { Data01 } }, - preTranslate: true, - CancellationToken.None + Assert.ThrowsAsync( + () => + env.Service.BuildProjectAsync( + User01, + new BuildConfig { ProjectId = Project01 }, + preTranslate: false, + CancellationToken.None + ) ); + } - // Ensure that the additional texts were retrieved - await env - .TrainingDataService.Received() - .GetTextsAsync( + [Test] + public async Task BuildProjectAsync_ThrowsExceptionWhenProjectSecretMissing() + { + // Set up test environment + var env = new TestEnvironment(); + await env.ProjectSecrets.DeleteAllAsync(_ => true); + + // SUT + Assert.ThrowsAsync( + () => + env.Service.BuildProjectAsync( + User01, + new BuildConfig { ProjectId = Project01 }, + preTranslate: false, + CancellationToken.None + ) + ); + } + + [Test] + public async Task BuildProjectAsync_ThrowsExceptionWhenServalDataMissing() + { + // Set up test environment + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(Project01, null); + var buildConfig = new BuildConfig { ProjectId = Project01 }; + env.Service.Configure() + .RemoveLegacyServalDataAsync(Project01, preTranslate: true, CancellationToken.None) + .Returns(Task.CompletedTask); + env.Service.Configure() + .EnsureTranslationEngineExistsAsync( User01, - Project02, - Arg.Is>(d => d.Contains(Data01)), - Arg.Any>(), - Arg.Any>() - ); + Arg.Any>(), + Arg.Any(), + preTranslate: true, + CancellationToken.None + ) + .Returns(Task.FromResult(TranslationEngine01)); + env.Service.Configure() + .RecreateTranslationEngineIfRequiredAsync( + TranslationEngine01, + Arg.Any(), + preTranslate: true, + CancellationToken.None + ) + .Returns(Task.CompletedTask); + env.Service.Configure() + .SyncProjectCorporaAsync(User01, buildConfig, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult>([])); - // Ensure that the previous files with different IDs were deleted, and new ones added - await env.DataFilesClient.Received(2).DeleteAsync(File02); - await env - .DataFilesClient.Received() - .CreateAsync(Arg.Any(), Arg.Any(), Data01, CancellationToken.None); + // SUT + Assert.ThrowsAsync( + () => env.Service.BuildProjectAsync(User01, buildConfig, preTranslate: true, CancellationToken.None) + ); } [Test] - public async Task BuildProjectAsync_SendsAdditionalTrainingDataWithAlternateSource() + public async Task BuildProjectAsync_UsesTheServalConfigurationSpecifiedByTheServalAdmin() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions - { - AlternateTrainingSourceEnabled = true, - AlternateTrainingSourceConfigured = true, - } + var env = new TestEnvironment(); + const string servalConfig = """{"max_steps":35}"""; + await env.Projects.UpdateAsync( + Project01, + op => op.Set(p => p.TranslateConfig.DraftConfig.ServalConfig, servalConfig) ); - await env.SetupTrainingDataAsync(Project02); // SUT await env.Service.BuildProjectAsync( User01, - new BuildConfig { ProjectId = Project02, TrainingDataFiles = { Data01 } }, + new BuildConfig { ProjectId = Project01 }, preTranslate: true, CancellationToken.None ); - - // Ensure that the additional texts were retrieved - await env - .TrainingDataService.Received() - .GetTextsAsync( - User01, - Project02, - Arg.Is>(d => d.Contains(Data01)), - Arg.Any>(), - Arg.Any>() - ); - - // Ensure that the build passed the additional files corpus in the train_on parameter - string corpusId = env - .ProjectSecrets.Get(Project02) - .ServalData!.Corpora.First(c => c.Value.PreTranslate && c.Value.AdditionalTrainingData) - .Key; await env .TranslationEnginesClient.Received() .StartBuildAsync( - Arg.Any(), - Arg.Is(b => b.TrainOn.Any(c => c.CorpusId == corpusId)), + TranslationEngine01, + Arg.Is(b => ((int)((JObject)b.Options)["max_steps"]) == 35), CancellationToken.None ); + await env.FeatureManager.DidNotReceive().IsEnabledAsync(FeatureFlags.UpdatedLearningRateForServal); } [Test] - public async Task BuildProjectAsync_PassesFastTrainingConfiguration() + public async Task BuildProjectAsync_UsesTheUpdatedLearningRateForServal() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); - + var env = new TestEnvironment(); + env.FeatureManager.IsEnabledAsync(FeatureFlags.UpdatedLearningRateForServal).Returns(Task.FromResult(true)); // SUT await env.Service.BuildProjectAsync( User01, @@ -346,831 +544,1172 @@ await env.Service.BuildProjectAsync( preTranslate: true, CancellationToken.None ); - await env .TranslationEnginesClient.Received() .StartBuildAsync( TranslationEngine01, - Arg.Is(b => ((int)((JObject)b.Options)["max_steps"]) == 20), + Arg.Is(b => ((int)((JObject)b.Options)["train_params"]["max_steps"]) == 5000), CancellationToken.None ); } [Test] - public async Task BuildProjectAsync_MergesFastTrainingConfiguration() + public async Task CreateOrUpdateParallelCorpusAsync_CreatesParallelCorpus() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions { BuildIsPending = false, ServalConfig = @"{""max_steps"":35}" } - ); + var env = new TestEnvironment(); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project01, FastTraining = true }, - preTranslate: true, + string actual = await env.Service.CreateOrUpdateParallelCorpusAsync( + TranslationEngine01, + null, + string.Empty, + [], + [], CancellationToken.None ); - + Assert.AreEqual(ParallelCorpus01, actual); await env - .TranslationEnginesClient.Received() - .StartBuildAsync( - TranslationEngine01, - Arg.Is(b => ((int)((JObject)b.Options)["max_steps"]) == 20), - CancellationToken.None - ); + .TranslationEnginesClient.Received(1) + .AddParallelCorpusAsync(TranslationEngine01, Arg.Any()); } [Test] - public async Task BuildProjectAsync_PassesServalConfig() + public async Task CreateOrUpdateParallelCorpusAsync_UpdatesParallelCorpus() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions { BuildIsPending = false, ServalConfig = @"{""max_steps"":35}" } - ); + var env = new TestEnvironment(); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: true, + string actual = await env.Service.CreateOrUpdateParallelCorpusAsync( + TranslationEngine01, + ParallelCorpus01, + string.Empty, + [], + [], CancellationToken.None ); - + Assert.AreEqual(ParallelCorpus01, actual); await env - .TranslationEnginesClient.Received() - .StartBuildAsync( + .TranslationEnginesClient.Received(1) + .UpdateParallelCorpusAsync( TranslationEngine01, - Arg.Is(b => ((int)((JObject)b.Options)["max_steps"]) == 35), - CancellationToken.None + ParallelCorpus01, + Arg.Any() ); } [Test] - public async Task BuildProjectAsync_CreatesServalProjectIfMissing() + public async Task CreateServalProjectAsync_ExistingPreTranslationProject() { // Set up test environment var env = new TestEnvironment(); - string sourceLanguage = env.Projects.Get(Project01).TranslateConfig.Source!.WritingSystem.Tag; - string targetLanguage = env.Projects.Get(Project01).WritingSystem.Tag; - Assert.AreNotEqual(sourceLanguage, targetLanguage); + var project = new SFProject { Id = Project01 }; + await env.SetupProjectSecretAsync(Project01, new ServalData { PreTranslationEngineId = TranslationEngine01 }); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: true, - CancellationToken.None - ); + string actual = await env.Service.CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None); + Assert.AreEqual(TranslationEngine01, actual); + await env.TranslationEnginesClient.DidNotReceiveWithAnyArgs().CreateAsync(Arg.Any()); + } - await env - .TranslationEnginesClient.Received() - .CreateAsync( - Arg.Is(t => - t.SourceLanguage == sourceLanguage && t.TargetLanguage == targetLanguage - ), - CancellationToken.None - ); + [Test] + public async Task CreateServalProjectAsync_ExistingServalDataInProjectSecretsForPreTranslation() + { + // Set up test environment + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(Project01, new ServalData()); + var project = new SFProject { Id = Project01 }; + env.Service.Configure().GetSourceLanguage(project).Returns("en"); + env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de")); + env.TranslationEnginesClient.CreateAsync(Arg.Any()) + .Returns(Task.FromResult(new TranslationEngine { Id = TranslationEngine01 })); + + // SUT + string actual = await env.Service.CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None); + Assert.AreEqual(TranslationEngine01, actual); + Assert.AreEqual(TranslationEngine01, env.ProjectSecrets.Get(Project01).ServalData?.PreTranslationEngineId); } [Test] - public async Task BuildProjectAsync_CreatesServalProjectIfRemoved() + public async Task CreateServalProjectAsync_ExistingServalDataInProjectSecretsForSmtTranslation() { // Set up test environment var env = new TestEnvironment(); - env.TranslationEnginesClient.GetAsync(TranslationEngine02, CancellationToken.None) - .Throws(ServalApiExceptions.NotFound); - string sourceLanguage = env.Projects.Get(Project02).TranslateConfig.Source!.WritingSystem.Tag; - string targetLanguage = env.Projects.Get(Project02).WritingSystem.Tag; - Assert.AreNotEqual(sourceLanguage, targetLanguage); + await env.SetupProjectSecretAsync(Project01, new ServalData()); + var project = new SFProject { Id = Project01 }; + env.Service.Configure().GetSourceLanguage(project).Returns("en"); + env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de")); + env.TranslationEnginesClient.CreateAsync(Arg.Any()) + .Returns(Task.FromResult(new TranslationEngine { Id = TranslationEngine01 })); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project02 }, + string actual = await env.Service.CreateServalProjectAsync( + project, preTranslate: false, CancellationToken.None ); - - await env - .TranslationEnginesClient.Received() - .CreateAsync( - Arg.Is(t => - t.SourceLanguage == sourceLanguage && t.TargetLanguage == targetLanguage - ), - CancellationToken.None - ); + Assert.AreEqual(TranslationEngine01, actual); + Assert.AreEqual(TranslationEngine01, env.ProjectSecrets.Get(Project01).ServalData?.TranslationEngineId); } [Test] - public void BuildProjectAsync_DirectoryNotFound() + public async Task CreateServalProjectAsync_ExistingSmtTranslationProject() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); - env.FileSystemService.DirectoryExists(Arg.Any()).Returns(false); + var env = new TestEnvironment(); + var project = new SFProject { Id = Project01 }; + await env.SetupProjectSecretAsync(Project01, new ServalData { TranslationEngineId = TranslationEngine01 }); // SUT - Assert.ThrowsAsync( - () => - env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: true, - CancellationToken.None - ) + string actual = await env.Service.CreateServalProjectAsync( + project, + preTranslate: false, + CancellationToken.None ); + Assert.AreEqual(TranslationEngine01, actual); + await env.TranslationEnginesClient.DidNotReceiveWithAnyArgs().CreateAsync(Arg.Any()); } [Test] - public async Task BuildProjectAsync_SpecifiesTheSameSourceAndTargetLanguageForEcho() + public async Task CreateServalProjectAsync_NoServalDataInProjectSecretsForPreTranslation() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { UseEchoForPreTranslation = true }); - string sourceLanguage = env.Projects.Get(Project01).TranslateConfig.Source!.WritingSystem.Tag; - string targetLanguage = env.Projects.Get(Project01).WritingSystem.Tag; - Assert.AreNotEqual(sourceLanguage, targetLanguage); + var env = new TestEnvironment(); + var project = new SFProject { Id = Project01 }; + env.Service.Configure().GetSourceLanguage(project).Returns("en"); + env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de")); + env.TranslationEnginesClient.CreateAsync(Arg.Any()) + .Returns(Task.FromResult(new TranslationEngine { Id = TranslationEngine01 })); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: true, - CancellationToken.None - ); - - await env - .TranslationEnginesClient.Received() - .CreateAsync( - Arg.Is(t => - t.SourceLanguage == sourceLanguage && t.TargetLanguage == sourceLanguage - ), - CancellationToken.None - ); + string actual = await env.Service.CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None); + Assert.AreEqual(TranslationEngine01, actual); + Assert.AreEqual(TranslationEngine01, env.ProjectSecrets.Get(Project01).ServalData?.PreTranslationEngineId); } [Test] - public async Task BuildProjectAsync_CreatesTranslationEngineIfNoTranslationEngineId() + public async Task CreateServalProjectAsync_NoServalDataInProjectSecretsForSmtTranslation() { // Set up test environment var env = new TestEnvironment(); + var project = new SFProject { Id = Project01 }; + env.Service.Configure().GetSourceLanguage(project).Returns("en"); + env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de")); + env.TranslationEnginesClient.CreateAsync(Arg.Any()) + .Returns(Task.FromResult(new TranslationEngine { Id = TranslationEngine01 })); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project01 }, + string actual = await env.Service.CreateServalProjectAsync( + project, preTranslate: false, CancellationToken.None ); - - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None); - await env - .TranslationEnginesClient.Received() - .CreateAsync(Arg.Any(), CancellationToken.None); + Assert.AreEqual(TranslationEngine01, actual); + Assert.AreEqual(TranslationEngine01, env.ProjectSecrets.Get(Project01).ServalData?.TranslationEngineId); } [Test] - public async Task BuildProjectAsync_CreatesTranslationEngineOnServalIfMissing() + public void CreateServalProjectAsync_NoTranslationEngineIdFromServal() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions { LocalSourceTextHasData = true, LocalTargetTextHasData = true } - ); + var env = new TestEnvironment(); + var project = new SFProject { Id = Project01 }; + env.Service.Configure().GetSourceLanguage(project).Returns("en"); + env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de")); + env.TranslationEnginesClient.CreateAsync(Arg.Any()) + .Returns(Task.FromResult(new TranslationEngine())); - // Make the Serval API return the error code for a missing translation engine - env.TranslationEnginesClient.GetAsync(TranslationEngine02, CancellationToken.None) - .Throws(ServalApiExceptions.NotFound); + // SUT + Assert.ThrowsAsync( + () => env.Service.CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None) + ); + } - // Return the correctly created corpus - env.TranslationEnginesClient.GetCorpusAsync(TranslationEngine01, Arg.Any(), CancellationToken.None) - .Returns(args => - Task.FromResult( - new TranslationCorpus - { - Id = args.ArgAt(1), - SourceLanguage = "en", - TargetLanguage = "en_US", - } - ) - ); + [Test] + public async Task CreateZipFileFromParatextDirectoryAsync_Success() + { + // Set up test environment + var env = new TestEnvironment(); + MemoryStream outputStream = new MemoryStream(); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: false, - CancellationToken.None - ); + await env.Service.CreateZipFileFromParatextDirectoryAsync(Project01, outputStream, CancellationToken.None); - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None); - await env - .TranslationEnginesClient.Received() - .CreateAsync(Arg.Any(), CancellationToken.None); + // Validate the zip file + outputStream.Seek(0, SeekOrigin.Begin); + using var archive = new ZipArchive(outputStream, ZipArchiveMode.Read); + Assert.AreEqual(1, archive.Entries.Count); + Assert.AreEqual("file", archive.Entries[0].FullName); } [Test] - public async Task BuildProjectAsync_CreatesDataFilesOnServalIfMissing_Paratext() + public void CreateZipFileFromParatextDirectoryAsync_ThrowsExceptionWhenProjectDirectoryMissing() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions { LocalSourceTextHasData = true, LocalTargetTextHasData = true } + var env = new TestEnvironment(); + env.FileSystemService.DirectoryExists(Arg.Any()).Returns(false); + MemoryStream outputStream = new MemoryStream(); + + // SUT + Assert.ThrowsAsync( + () => env.Service.CreateZipFileFromParatextDirectoryAsync(Project01, outputStream, CancellationToken.None) ); - await env.SetDataInSync(Project02, preTranslate: true, uploadParatextZipFile: true); + } - // Make the Serval API return the error code for a missing data file - env.DataFilesClient.UpdateAsync(Arg.Any(), Arg.Any(), CancellationToken.None) - .Throws(ServalApiExceptions.NotFound); + [Test] + public async Task DeleteAllCorporaAndFilesAsync_DoesNotCrashWhenCorporaNotFound() + { + // Set up test environment + var env = new TestEnvironment(); + ServalApiException ex = ServalApiExceptions.NotFound; + env.CorporaClient.DeleteAsync(Corpus01).ThrowsAsync(ex); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, + await env.Service.DeleteAllCorporaAndFilesAsync( + [new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }], + Project01, CancellationToken.None ); - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(TranslationEngine02, Arg.Any(), CancellationToken.None); - await env - .DataFilesClient.Received() - .CreateAsync(Arg.Any(), FileFormat.Paratext, Arg.Any(), CancellationToken.None); + env.MockLogger.AssertHasEvent(logEvent => + logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Information + ); } [Test] - public async Task BuildProjectAsync_CreatesDataFilesOnServalIfMissing_Text() + public async Task DeleteAllCorporaAndFilesAsync_DoesNotCrashWhenFileNotFound() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions { LocalSourceTextHasData = true, LocalTargetTextHasData = true } - ); - await env.SetDataInSync(Project02, preTranslate: true); - - // Make the Serval API return the error code for a missing data file - env.DataFilesClient.GetAsync(Arg.Any(), CancellationToken.None).Throws(ServalApiExceptions.NotFound); + var env = new TestEnvironment(); + ServalApiException ex = ServalApiExceptions.NotFound; + env.DataFilesClient.DeleteAsync(File01).ThrowsAsync(ex); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, + await env.Service.DeleteAllCorporaAndFilesAsync( + [new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }], + Project01, CancellationToken.None ); - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(TranslationEngine02, Arg.Any(), CancellationToken.None); - await env - .DataFilesClient.Received() - .CreateAsync(Arg.Any(), FileFormat.Paratext, Arg.Any(), CancellationToken.None); + env.MockLogger.AssertHasEvent(logEvent => + logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Information + ); } [Test] - public async Task BuildProjectAsync_GetsTheSourceAndTargetLanguageIfMissing() + public async Task DeleteAllCorporaAndFilesAsync_Success() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions { LocalSourceTextHasData = true, LocalTargetTextHasData = true } - ); - SFProject project = env.Projects.Get(Project03); - Assert.IsNull(project.WritingSystem.Tag); - Assert.IsNull(project.TranslateConfig.Source?.WritingSystem.Tag); + var env = new TestEnvironment(); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project03 }, - preTranslate: false, + await env.Service.DeleteAllCorporaAndFilesAsync( + [new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }], + Project01, CancellationToken.None ); - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None); - project = env.Projects.Get(Project03); - Assert.IsNotNull(project.WritingSystem.Tag); - Assert.IsNotNull(project.TranslateConfig.Source?.WritingSystem.Tag); + await env.CorporaClient.Received(1).DeleteAsync(Corpus01); + await env.DataFilesClient.Received(1).DeleteAsync(File01); } [Test] - public async Task BuildProjectAsync_RecreatesTheProjectOnServalIfTheSourceAndTargetLanguageChange() + public async Task EnsureTranslationEngineExistsAsync_PreTranslationEngineAlreadyExists() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions - { - HasTranslationEngineForSmt = true, - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, - } - ); + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(Project01, new ServalData { PreTranslationEngineId = TranslationEngine01 }); + env.Service.Configure() + .TranslationEngineExistsAsync(Project01, TranslationEngine01, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(true)); - // Make the Serval API return the translation engine - env.TranslationEnginesClient.GetAsync(TranslationEngine02, CancellationToken.None) - .Returns( - Task.FromResult( - new TranslationEngine - { - Id = TranslationEngine02, - Name = Project02, - SourceLanguage = "old_source_language", - TargetLanguage = "old_target_language", - Type = MachineProjectService.SmtTransfer, - } - ) - ); + // Retrieve required objects + await using IConnection connection = await env.RealtimeService.ConnectAsync(); + IDocument projectDoc = connection.Get(Project01); + await projectDoc.FetchAsync(); + SFProjectSecret projectSecret = env.ProjectSecrets.Get(Project01); // SUT - await env.Service.BuildProjectAsync( + string actual = await env.Service.EnsureTranslationEngineExistsAsync( User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: false, + projectDoc, + projectSecret, + preTranslate: true, CancellationToken.None ); - - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None); - await env.TranslationEnginesClient.Received().DeleteAsync(TranslationEngine02, CancellationToken.None); - await env - .TranslationEnginesClient.Received() - .CreateAsync(Arg.Any(), CancellationToken.None); + Assert.AreEqual(TranslationEngine01, actual); } [Test] - public async Task BuildProjectAsync_RecreatesTheProjectIfAlternateSourceLanguageDoesNotMatch() + public async Task EnsureTranslationEngineExistsAsync_ProjectDeletedBeforeExecution() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions { AlternateSourceEnabled = true, AlternateSourceConfigured = true } - ); - await env.SetDataInSync(Project02, preTranslate: true); + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(Project01, new ServalData { PreTranslationEngineId = TranslationEngine01 }); + env.Service.Configure() + .TranslationEngineExistsAsync(Project01, TranslationEngine01, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(false)); + + // Retrieve required objects + await using IConnection connection = await env.RealtimeService.ConnectAsync(); + IDocument projectDoc = connection.Get(Project01); + await projectDoc.FetchAsync(); + SFProjectSecret projectSecret = env.ProjectSecrets.Get(Project01); + await projectDoc.DeleteAsync(); + // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, - CancellationToken.None + Assert.ThrowsAsync( + () => + env.Service.EnsureTranslationEngineExistsAsync( + User01, + projectDoc, + projectSecret, + preTranslate: true, + CancellationToken.None + ) ); - - string newEngineId = TranslationEngine01; - string oldEngineId = TranslationEngine02; - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(newEngineId, Arg.Any(), CancellationToken.None); - await env.TranslationEnginesClient.Received().DeleteAsync(oldEngineId, CancellationToken.None); - await env - .TranslationEnginesClient.Received() - .CreateAsync(Arg.Any(), CancellationToken.None); + env.ParatextService.DidNotReceiveWithAnyArgs().GetWritingSystem(Arg.Any(), Arg.Any()); } [Test] - public async Task BuildProjectAsync_DoesNotRecreateTheProjectIfSourceLanguageMatchesAndAlternateSourceDisabled() + public async Task EnsureTranslationEngineExistsAsync_ProjectDeletedDuringExecution() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions { AlternateSourceEnabled = false, AlternateSourceConfigured = true } + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(Project01, new ServalData { PreTranslationEngineId = TranslationEngine01 }); + env.Service.Configure() + .TranslationEngineExistsAsync(Project01, TranslationEngine01, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(false)); + + // Retrieve required objects + await using IConnection connection = await env.RealtimeService.ConnectAsync(); + IDocument projectDoc = connection.Get(Project03); + await projectDoc.FetchAsync(); + SFProjectSecret projectSecret = env.ProjectSecrets.Get(Project03); + + // Delete the project mid-sync (normally this would be done out of process) + env.ParatextService.GetWritingSystem(Arg.Any(), Arg.Any()) + .Returns(new WritingSystem()) + .AndDoes(_ => projectDoc.DeleteAsync()); + + // SUT + Assert.ThrowsAsync( + () => + env.Service.EnsureTranslationEngineExistsAsync( + User01, + projectDoc, + projectSecret, + preTranslate: true, + CancellationToken.None + ) ); - await env.SetDataInSync(Project02, preTranslate: true); + env.ParatextService.Received(1).GetWritingSystem(Arg.Any(), Arg.Any()); + } + + [Test] + public async Task EnsureTranslationEngineExistsAsync_ProjectSourceRemoved() + { + // Set up test environment + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(Project01, new ServalData { PreTranslationEngineId = TranslationEngine01 }); + env.Service.Configure() + .TranslationEngineExistsAsync(Project01, TranslationEngine01, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(false)); + + // Retrieve required objects + await using IConnection connection = await env.RealtimeService.ConnectAsync(); + IDocument projectDoc = connection.Get(Project03); + await projectDoc.FetchAsync(); + await projectDoc.SubmitJson0OpAsync(op => op.Unset(p => p.TranslateConfig.Source)); + SFProjectSecret projectSecret = env.ProjectSecrets.Get(Project03); + // SUT - await env.Service.BuildProjectAsync( + Assert.ThrowsAsync( + () => + env.Service.EnsureTranslationEngineExistsAsync( + User01, + projectDoc, + projectSecret, + preTranslate: true, + CancellationToken.None + ) + ); + } + + [Test] + public async Task EnsureTranslationEngineExistsAsync_SetsUpTheProjectAndTranslationEngineForPreTranslation() + { + // Set up test environment + var env = new TestEnvironment(); + const string sourceLanguage = "en"; + const string targetLanguage = "fr"; + await env.SetupProjectSecretAsync(Project03, new ServalData { PreTranslationEngineId = TranslationEngine01 }); + env.Service.Configure() + .TranslationEngineExistsAsync(Project03, TranslationEngine01, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(false)); + env.Service.Configure() + .CreateServalProjectAsync(Arg.Any(), preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(TranslationEngine02)); + env.ParatextService.GetWritingSystem(Arg.Any(), Paratext01) + .Returns(new WritingSystem { Tag = sourceLanguage }); + env.ParatextService.GetWritingSystem(Arg.Any(), Paratext03) + .Returns(new WritingSystem { Tag = targetLanguage }); + + // Retrieve required objects + await using IConnection connection = await env.RealtimeService.ConnectAsync(); + IDocument projectDoc = connection.Get(Project03); + await projectDoc.FetchAsync(); + SFProjectSecret projectSecret = env.ProjectSecrets.Get(Project03); + + // SUT + string actual = await env.Service.EnsureTranslationEngineExistsAsync( User01, - new BuildConfig { ProjectId = Project02 }, + projectDoc, + projectSecret, preTranslate: true, CancellationToken.None ); - - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(TranslationEngine02, Arg.Any(), CancellationToken.None); - await env.TranslationEnginesClient.DidNotReceive().DeleteAsync(Arg.Any(), CancellationToken.None); - await env - .TranslationEnginesClient.DidNotReceive() - .CreateAsync(Arg.Any(), CancellationToken.None); + Assert.AreEqual(TranslationEngine02, actual); + Assert.IsNull(env.ProjectSecrets.Get(Project03).ServalData?.PreTranslationEngineId); + Assert.AreEqual(sourceLanguage, env.Projects.Get(Project03).TranslateConfig.Source?.WritingSystem.Tag); + Assert.AreEqual(targetLanguage, env.Projects.Get(Project03).WritingSystem.Tag); } [Test] - public async Task BuildProjectAsync_ClearsAssociatedCorporaReferencesIfTheTranslationEngineTypeIsIncorrect() + public async Task EnsureTranslationEngineExistsAsync_SetsUpTheProjectAndTranslationEngineForSmtTranslation() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions { LocalSourceTextHasData = true, LocalTargetTextHasData = true } - ); - await env.SetDataInSync( - Project02, - preTranslate: true, - uploadParatextZipFile: false, - alternateTrainingSource: true + var env = new TestEnvironment(); + const string sourceLanguage = "en"; + const string targetLanguage = "fr"; + await env.SetupProjectSecretAsync(Project03, new ServalData { TranslationEngineId = TranslationEngine01 }); + env.Service.Configure() + .TranslationEngineExistsAsync(Project03, TranslationEngine01, preTranslate: false, CancellationToken.None) + .Returns(Task.FromResult(false)); + env.Service.Configure() + .CreateServalProjectAsync(Arg.Any(), preTranslate: false, CancellationToken.None) + .Returns(Task.FromResult(TranslationEngine02)); + env.ParatextService.GetWritingSystem(Arg.Any(), Paratext01) + .Returns(new WritingSystem { Tag = sourceLanguage }); + env.ParatextService.GetWritingSystem(Arg.Any(), Paratext03) + .Returns(new WritingSystem { Tag = targetLanguage }); + + // Retrieve required objects + await using IConnection connection = await env.RealtimeService.ConnectAsync(); + IDocument projectDoc = connection.Get(Project03); + await projectDoc.FetchAsync(); + SFProjectSecret projectSecret = env.ProjectSecrets.Get(Project03); + + // SUT + string actual = await env.Service.EnsureTranslationEngineExistsAsync( + User01, + projectDoc, + projectSecret, + preTranslate: false, + CancellationToken.None ); + Assert.AreEqual(TranslationEngine02, actual); + Assert.IsNull(env.ProjectSecrets.Get(Project03).ServalData?.TranslationEngineId); + Assert.AreEqual(sourceLanguage, env.Projects.Get(Project03).TranslateConfig.Source?.WritingSystem.Tag); + Assert.AreEqual(targetLanguage, env.Projects.Get(Project03).WritingSystem.Tag); + Assert.IsFalse(env.Projects.Get(Project03).TranslateConfig.PreTranslate); + } - // Make the Serval API return the old translation engine with an incorrect type - env.TranslationEnginesClient.GetAsync(TranslationEngine02, CancellationToken.None) - .Returns( - Task.FromResult( - new TranslationEngine - { - Id = TranslationEngine02, - Name = Project02, - SourceLanguage = "en", - TargetLanguage = "en_US", - Type = MachineProjectService.SmtTransfer, - } - ) - ); - - // And the new translation engine correctly - env.TranslationEnginesClient.GetAsync(TranslationEngine01, CancellationToken.None) - .Returns( - Task.FromResult( - new TranslationEngine - { - Id = TranslationEngine01, - Name = Project02, - SourceLanguage = "en", - TargetLanguage = "en_US", - Type = MachineProjectService.Nmt, - } - ) - ); + [Test] + public async Task EnsureTranslationEngineExistsAsync_SmtTranslationEngineAlreadyExists() + { + // Set up test environment + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(Project01, new ServalData { TranslationEngineId = TranslationEngine01 }); + env.Service.Configure() + .TranslationEngineExistsAsync(Project01, TranslationEngine01, preTranslate: false, CancellationToken.None) + .Returns(Task.FromResult(true)); - // Check that we have more than one pre-translate corpora - Assert.AreEqual(2, env.ProjectSecrets.Get(Project02).ServalData!.Corpora.Count(c => c.Value.PreTranslate)); + // Retrieve required objects + await using IConnection connection = await env.RealtimeService.ConnectAsync(); + IDocument projectDoc = connection.Get(Project01); + await projectDoc.FetchAsync(); + SFProjectSecret projectSecret = env.ProjectSecrets.Get(Project01); // SUT - await env.Service.BuildProjectAsync( + string actual = await env.Service.EnsureTranslationEngineExistsAsync( User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, + projectDoc, + projectSecret, + preTranslate: false, CancellationToken.None ); + Assert.AreEqual(TranslationEngine01, actual); + } - // The old engine should not be deleted, as it is an incorrect association - await env.TranslationEnginesClient.DidNotReceive().DeleteAsync(TranslationEngine02, CancellationToken.None); - await env - .TranslationEnginesClient.Received() - .CreateAsync(Arg.Any(), CancellationToken.None); - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None); + [Test] + public async Task EnsureTranslationEngineExistsAsync_TranslationEngineCouldNotBeCreated() + { + // Set up test environment + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(Project01, new ServalData { TranslationEngineId = TranslationEngine01 }); + env.Service.Configure() + .TranslationEngineExistsAsync(Project01, TranslationEngine01, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(false)); + env.Service.Configure() + .CreateServalProjectAsync(Arg.Any(), preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(null)); + + // Retrieve required objects + await using IConnection connection = await env.RealtimeService.ConnectAsync(); + IDocument projectDoc = connection.Get(Project01); + await projectDoc.FetchAsync(); + SFProjectSecret projectSecret = env.ProjectSecrets.Get(Project01); - // Ensure we have just one pre-translate corpora - Assert.AreEqual(1, env.ProjectSecrets.Get(Project02).ServalData!.Corpora.Count(c => c.Value.PreTranslate)); + // SUT + Assert.ThrowsAsync( + () => + env.Service.EnsureTranslationEngineExistsAsync( + User01, + projectDoc, + projectSecret, + preTranslate: true, + CancellationToken.None + ) + ); + env.ParatextService.DidNotReceiveWithAnyArgs().GetWritingSystem(Arg.Any(), Arg.Any()); } [Test] - public async Task BuildProjectAsync_ClearsAlternateSourceCorporaIfDisabled() + public async Task EnsureTranslationEngineExistsAsync_UserSecretDoesNotExist() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions - { - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, - AlternateTrainingSourceEnabled = false, - } - ); - await env.SetDataInSync( - Project02, - preTranslate: true, - uploadParatextZipFile: false, - alternateTrainingSource: true - ); + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(Project01, new ServalData { PreTranslationEngineId = TranslationEngine01 }); + env.Service.Configure() + .TranslationEngineExistsAsync(Project01, TranslationEngine01, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(false)); - // Check that we have more than one pre-translate corpora - Assert.AreEqual(2, env.ProjectSecrets.Get(Project02).ServalData!.Corpora.Count(c => c.Value.PreTranslate)); + // Retrieve required objects + await using IConnection connection = await env.RealtimeService.ConnectAsync(); + IDocument projectDoc = connection.Get(Project03); + await projectDoc.FetchAsync(); + SFProjectSecret projectSecret = env.ProjectSecrets.Get(Project03); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, - CancellationToken.None + Assert.ThrowsAsync( + () => + env.Service.EnsureTranslationEngineExistsAsync( + "invalid_user_id", + projectDoc, + projectSecret, + preTranslate: true, + CancellationToken.None + ) ); + } - // The old corpus and its files should be deleted - await env - .TranslationEnginesClient.Received() - .DeleteCorpusAsync(TranslationEngine02, Corpus02, CancellationToken.None); - await env.DataFilesClient.Received().DeleteAsync(File01, CancellationToken.None); + [Test] + public async Task GetProjectZipAsync_Success() + { + // Set up test environment + var env = new TestEnvironment(); + MemoryStream outputStream = new MemoryStream(); + env.Service.Configure() + .CreateZipFileFromParatextDirectoryAsync(Paratext01, outputStream, CancellationToken.None) + .Returns(Task.CompletedTask); - // Ensure we have just one pre-translate corpora - Assert.AreEqual(1, env.ProjectSecrets.Get(Project02).ServalData!.Corpora.Count(c => c.Value.PreTranslate)); + // SUT + string actual = await env.Service.GetProjectZipAsync(Project01, outputStream, CancellationToken.None); + Assert.AreEqual("P01.zip", actual); } [Test] - public async Task BuildProjectAsync_UploadParatextZipSpecifiesBookIds() + public void GetProjectZipAsync_ThrowsExceptionWhenProjectDocumentMissing() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); - await env.SetDataInSync(Project01, preTranslate: true, uploadParatextZipFile: true); + var env = new TestEnvironment(); + MemoryStream outputStream = new MemoryStream(); // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig - { - ProjectId = Project01, - TrainingBooks = { 1, 2 }, - TranslationBooks = { 3, 4 }, - }, - preTranslate: true, - CancellationToken.None + Assert.ThrowsAsync( + () => env.Service.GetProjectZipAsync("invalid_project_id", outputStream, CancellationToken.None) ); + } - await env - .TranslationEnginesClient.Received() - .StartBuildAsync( - TranslationEngine01, - Arg.Is(b => - b.TrainOn.Count == 1 - && b.TrainOn.First().CorpusId == Corpus01 - && b.TrainOn.First().ScriptureRange == "GEN;EXO" - && b.TrainOn.First().TextIds == null - && b.Pretranslate.Count == 1 - && b.Pretranslate.First().CorpusId == Corpus01 - && b.Pretranslate.First().ScriptureRange == "LEV;NUM" - && b.Pretranslate.First().TextIds == null - ), - CancellationToken.None - ); + [Test] + public void GetProjectZipAsync_ThrowsExceptionWhenProjectIsAResource() + { + // Set up test environment + var env = new TestEnvironment(); + env.ParatextService.IsResource(Arg.Any()).Returns(true); + MemoryStream outputStream = new MemoryStream(); + + // SUT + Assert.ThrowsAsync( + () => env.Service.GetProjectZipAsync(Project01, outputStream, CancellationToken.None) + ); } [Test] - public async Task BuildProjectAsync_UploadParatextZipSpecifiesAlternateTrainingSourceBookIds() + public void GetSourceLanguage_DoesNotUseTheAlternateSourceIfItIsDisabled() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions + var env = new TestEnvironment(); + const string alternateSourceWritingSystemTag = "alternate_source_writing_system_tag"; + const string sourceWritingSystemTag = "source_writing_system_tag"; + var project = new SFProject + { + TranslateConfig = { - BuildIsPending = false, - AlternateTrainingSourceConfigured = true, - AlternateTrainingSourceEnabled = true, - } - ); - await env.SetDataInSync( - Project02, - preTranslate: true, - uploadParatextZipFile: true, - alternateTrainingSource: true - ); + DraftConfig = new DraftConfig + { + AlternateSourceEnabled = false, + AlternateSource = new TranslateSource + { + WritingSystem = new WritingSystem { Tag = alternateSourceWritingSystemTag }, + }, + }, + Source = new TranslateSource { WritingSystem = new WritingSystem { Tag = sourceWritingSystemTag } }, + }, + }; // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig + string actual = env.Service.GetSourceLanguage(project); + Assert.AreEqual(sourceWritingSystemTag, actual); + } + + [Test] + public void GetSourceLanguage_DoesNotUseTheAlternateSourceIfItsWritingTagIsEmpty() + { + // Set up test environment + var env = new TestEnvironment(); + const string sourceWritingSystemTag = "source_writing_system_tag"; + var project = new SFProject + { + TranslateConfig = { - ProjectId = Project02, - TrainingBooks = { 1, 2 }, - TranslationBooks = { 3, 4 }, + DraftConfig = new DraftConfig { AlternateSourceEnabled = true, AlternateSource = null }, + Source = new TranslateSource { WritingSystem = new WritingSystem { Tag = sourceWritingSystemTag } }, }, - preTranslate: true, - CancellationToken.None - ); + }; - await env - .TranslationEnginesClient.Received() - .StartBuildAsync( - TranslationEngine02, - Arg.Is(b => - b.TrainOn.Count == 1 - && b.TrainOn.First().CorpusId == Corpus02 - && b.TrainOn.First().ScriptureRange == "GEN;EXO" - && b.TrainOn.First().TextIds == null - && b.Pretranslate.Count == 1 - && b.Pretranslate.First().CorpusId == Corpus01 - && b.Pretranslate.First().ScriptureRange == "LEV;NUM" - && b.Pretranslate.First().TextIds == null - ), - CancellationToken.None - ); + // SUT + string actual = env.Service.GetSourceLanguage(project); + Assert.AreEqual(sourceWritingSystemTag, actual); } - [TestCase(null)] - [TestCase("")] - [TestCase(" ")] - public async Task BuildProjectAsync_SpecifiesNullScriptureRange(string? scriptureRange) + [Test] + public void GetSourceLanguage_ThrowsExceptionWhenProjectDoesNotHaveASource() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); + var env = new TestEnvironment(); + var project = new SFProject { TranslateConfig = { Source = null } }; // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig + Assert.Throws(() => env.Service.GetSourceLanguage(project)); + } + + [Test] + public void GetSourceLanguage_ThrowsExceptionWhenProjectNull() + { + // Set up test environment + var env = new TestEnvironment(); + + // SUT + Assert.Throws(() => env.Service.GetSourceLanguage(null)); + } + + [Test] + public void GetSourceLanguage_UsesTheAlternateSourceIfItIsEnabledAndConfigured() + { + // Set up test environment + var env = new TestEnvironment(); + const string alternateSourceWritingSystemTag = "alternate_source_writing_system_tag"; + const string sourceWritingSystemTag = "source_writing_system_tag"; + var project = new SFProject + { + TranslateConfig = { - ProjectId = Project01, - TrainingScriptureRange = scriptureRange, - TranslationScriptureRange = scriptureRange, + DraftConfig = new DraftConfig + { + AlternateSourceEnabled = true, + AlternateSource = new TranslateSource + { + WritingSystem = new WritingSystem { Tag = alternateSourceWritingSystemTag }, + }, + }, + Source = new TranslateSource { WritingSystem = new WritingSystem { Tag = sourceWritingSystemTag } }, }, - preTranslate: true, - CancellationToken.None - ); + }; - await env - .TranslationEnginesClient.Received() - .StartBuildAsync( - TranslationEngine01, - Arg.Is(b => - b.Pretranslate.Count == 1 - && b.Pretranslate.First().ScriptureRange == null - && b.Pretranslate.First().TextIds == null - && b.TrainOn.Count == 1 - && b.TrainOn.First().ScriptureRange == null - && b.TrainOn.First().TextIds!.Count == 0 - ), - CancellationToken.None - ); + // SUT + string actual = env.Service.GetSourceLanguage(project); + Assert.AreEqual(alternateSourceWritingSystemTag, actual); } - [TestCase(null)] - [TestCase("")] - [TestCase(" ")] - public async Task BuildProjectAsync_SpecifiesNullScriptureRangeForAlternateTrainingSource(string? scriptureRange) + [Test] + public void GetSourceLanguage_ThrowsExceptionWhenTheSourceDoesNotHaveAWritingTag() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions - { - AlternateTrainingSourceEnabled = true, - AlternateTrainingSourceConfigured = true, - } - ); - await env.SetDataInSync( - Project02, - preTranslate: true, - uploadParatextZipFile: true, - alternateTrainingSource: true - ); + var env = new TestEnvironment(); + var project = new SFProject + { + TranslateConfig = { Source = new TranslateSource { WritingSystem = new WritingSystem { Tag = null } } }, + }; // SUT - await env.Service.BuildProjectAsync( - User01, - new BuildConfig - { - ProjectId = Project02, - TrainingScriptureRange = scriptureRange, - TranslationScriptureRange = scriptureRange, - }, - preTranslate: true, - CancellationToken.None - ); + Assert.Throws(() => env.Service.GetSourceLanguage(project)); + } - await env - .TranslationEnginesClient.Received() - .StartBuildAsync( - TranslationEngine02, - Arg.Is(b => - b.Pretranslate.Count == 1 - && b.Pretranslate.First().ScriptureRange == null - && b.Pretranslate.First().TextIds == null - && b.TrainOn.Count == 1 - && b.TrainOn.First().ScriptureRange == null - && b.TrainOn.First().TextIds!.Count == 0 - ), - CancellationToken.None - ); + [Test] + public void GetTextFileData_Success() + { + // Set up test environment + var env = new TestEnvironment(); + var text = TestEnvironment.GetMockTrainingData(); + const string expected = "001\ttarget\n003\tall flags\tss,ir,rs\n"; + + // SUT + string actual = env.Service.GetTextFileData(text); + Assert.AreEqual(expected, actual); } [Test] - public async Task BuildProjectForBackgroundJobAsync_BuildsPreTranslationProjects() + public async Task GetTargetLanguageAsync_ReturnSourceIfEcho() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); + var env = new TestEnvironment(new TestEnvironmentOptions { UseEchoForPreTranslation = true }); + const string sourceWritingSystemTag = "source_writing_system_tag"; + var project = new SFProject(); + env.Service.Configure().GetSourceLanguage(project).Returns(sourceWritingSystemTag); // SUT - await env.Service.BuildProjectForBackgroundJobAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: true, - CancellationToken.None - ); + string actual = await env.Service.GetTargetLanguageAsync(project); + Assert.AreEqual(sourceWritingSystemTag, actual); + } - await env - .TranslationEnginesClient.Received() - .StartBuildAsync(TranslationEngine01, Arg.Any(), CancellationToken.None); + [Test] + public async Task GetTargetLanguageAsync_Success() + { + // Set up test environment + var env = new TestEnvironment(); + const string targetWritingSystemTag = "target_writing_system_tag"; + var project = new SFProject { WritingSystem = new WritingSystem { Tag = targetWritingSystemTag } }; + + // SUT + string actual = await env.Service.GetTargetLanguageAsync(project); + Assert.AreEqual(targetWritingSystemTag, actual); } [Test] - public async Task BuildProjectForBackgroundJobAsync_RecordsDataNotFoundExceptionAsWarning() + public void GetTranslationBuildConfig_DoesNotSpecifyAdditionalTrainingDataIfNoFilesSpecified() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); + var env = new TestEnvironment(); + var servalData = new ServalData + { + ParallelCorpusIdForPreTranslate = ParallelCorpus01, + ParallelCorpusIdForTrainOn = ParallelCorpus02, + AdditionalTrainingData = new ServalAdditionalTrainingData { ParallelCorpusId = ParallelCorpus03 }, + }; + var buildConfig = new BuildConfig(); // SUT - await env.Service.BuildProjectForBackgroundJobAsync( - User01, - new BuildConfig { ProjectId = "project_does_not_exist" }, - preTranslate: false, - CancellationToken.None + TranslationBuildConfig actual = env.Service.GetTranslationBuildConfig( + servalData, + servalConfig: null, + buildConfig, + corporaSyncInfo: [] ); + Assert.IsTrue(actual.Pretranslate!.Any(c => c.ParallelCorpusId == ParallelCorpus01)); + Assert.IsTrue(actual.TrainOn!.Any(c => c.ParallelCorpusId == ParallelCorpus02)); + Assert.IsFalse(actual.TrainOn!.Any(c => c.ParallelCorpusId == ParallelCorpus03)); + } - env.MockLogger.AssertHasEvent(logEvent => - logEvent.Message.Contains("DataNotFoundException", StringComparison.OrdinalIgnoreCase) - && logEvent.LogLevel == LogLevel.Warning + [Test] + public void GetTranslationBuildConfig_MergesFastTrainingConfiguration() + { + // Set up test environment + var env = new TestEnvironment(); + var servalData = new ServalData(); + const string servalConfig = """{"max_steps":35}"""; + var buildConfig = new BuildConfig { FastTraining = true }; + + // SUT + TranslationBuildConfig actual = env.Service.GetTranslationBuildConfig( + servalData, + servalConfig, + buildConfig, + corporaSyncInfo: [] ); - env.ExceptionHandler.DidNotReceive().ReportException(Arg.Any()); + Assert.AreEqual(20, (int)(actual.Options as JObject)?["max_steps"]); } [Test] - public async Task BuildProjectForBackgroundJobAsync_RecordsErrors() + public void GetTranslationBuildConfig_NoScriptureRange() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); - ServalApiException ex = ServalApiExceptions.Forbidden; - env.TranslationEnginesClient.CreateAsync(Arg.Any(), CancellationToken.None).Throws(ex); + var env = new TestEnvironment(); + var servalData = new ServalData + { + ParallelCorpusIdForPreTranslate = ParallelCorpus01, + ParallelCorpusIdForTrainOn = ParallelCorpus02, + }; + var buildConfig = new BuildConfig(); + List corporaSyncInfo = + [ + new ServalCorpusSyncInfo + { + CorpusId = Corpus01, + IsSource = true, + ParallelCorpusId = ParallelCorpus01, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus02, + IsSource = false, + ParallelCorpusId = ParallelCorpus01, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus03, + IsSource = true, + ParallelCorpusId = ParallelCorpus02, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus04, + IsSource = false, + ParallelCorpusId = ParallelCorpus02, + }, + ]; // SUT - await env.Service.BuildProjectForBackgroundJobAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, - CancellationToken.None + TranslationBuildConfig actual = env.Service.GetTranslationBuildConfig( + servalData, + servalConfig: null, + buildConfig, + corporaSyncInfo + ); + Assert.IsNull( + actual + .Pretranslate!.Single(c => c.ParallelCorpusId == ParallelCorpus01) + .SourceFilters!.Single(f => f.CorpusId == Corpus01) + .ScriptureRange + ); + Assert.IsNull( + actual + .TrainOn!.Single(c => c.ParallelCorpusId == ParallelCorpus02) + .SourceFilters!.Single(f => f.CorpusId == Corpus03) + .ScriptureRange + ); + Assert.IsNull( + actual + .TrainOn!.Single(c => c.ParallelCorpusId == ParallelCorpus02) + .TargetFilters!.Single(f => f.CorpusId == Corpus04) + .ScriptureRange ); - - env.MockLogger.AssertHasEvent(logEvent => logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Error); - env.ExceptionHandler.Received().ReportException(ex); - Assert.IsNull(env.ProjectSecrets.Get(Project02).ServalData!.PreTranslationQueuedAt); - Assert.AreEqual(ex.Message, env.ProjectSecrets.Get(Project02).ServalData!.PreTranslationErrorMessage); } [Test] - public async Task BuildProjectForBackgroundJobAsync_DoesNotUpdatePreTranslationSecretsOnSmtErrors() + public void GetTranslationBuildConfig_PassesFastTrainingConfiguration() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); - ServalApiException ex = ServalApiExceptions.Forbidden; - env.TranslationEnginesClient.CreateAsync(Arg.Any(), CancellationToken.None).Throws(ex); + var env = new TestEnvironment(); + var servalData = new ServalData(); + var buildConfig = new BuildConfig { FastTraining = true }; // SUT - await env.Service.BuildProjectForBackgroundJobAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: false, - CancellationToken.None + TranslationBuildConfig actual = env.Service.GetTranslationBuildConfig( + servalData, + servalConfig: null, + buildConfig, + corporaSyncInfo: [] ); + Assert.AreEqual(20, (int)(actual.Options as JObject)?["max_steps"]); + } - env.MockLogger.AssertHasEvent(logEvent => logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Error); - env.ExceptionHandler.Received().ReportException(ex); - Assert.IsNull(env.ProjectSecrets.Get(Project02).ServalData!.PreTranslationErrorMessage); + [Test] + public void GetTranslationBuildConfig_PassesServalConfig() + { + // Set up test environment + var env = new TestEnvironment(); + var servalData = new ServalData(); + const string servalConfig = """{"max_steps":35}"""; + var buildConfig = new BuildConfig(); + + // SUT + TranslationBuildConfig actual = env.Service.GetTranslationBuildConfig( + servalData, + servalConfig, + buildConfig, + corporaSyncInfo: [] + ); + Assert.AreEqual(35, (int)(actual.Options as JObject)?["max_steps"]); } [Test] - public async Task BuildProjectForBackgroundJobAsync_DoesNotRecordBuildInProgressErrors() + public void GetTranslationBuildConfig_ScriptureRangeAsString() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); - ServalApiException ex = ServalApiExceptions.BuildInProgress; - env.TranslationEnginesClient.StartBuildAsync( - Arg.Any(), - Arg.Any(), - CancellationToken.None - ) - .Throws(ex); + var env = new TestEnvironment(); + var servalData = new ServalData + { + ParallelCorpusIdForPreTranslate = ParallelCorpus01, + ParallelCorpusIdForTrainOn = ParallelCorpus02, + }; + const string trainingScriptureRange = "MAT;MRK"; + const string translationScriptureRange = "LUK;JHN"; + var buildConfig = new BuildConfig + { + TrainingScriptureRange = trainingScriptureRange, + TranslationScriptureRange = translationScriptureRange, + }; + List corporaSyncInfo = + [ + new ServalCorpusSyncInfo + { + CorpusId = Corpus01, + IsSource = true, + ParallelCorpusId = ParallelCorpus01, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus02, + IsSource = false, + ParallelCorpusId = ParallelCorpus01, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus03, + IsSource = true, + ParallelCorpusId = ParallelCorpus02, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus04, + IsSource = false, + ParallelCorpusId = ParallelCorpus02, + }, + ]; // SUT - await env.Service.BuildProjectForBackgroundJobAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, - CancellationToken.None + TranslationBuildConfig actual = env.Service.GetTranslationBuildConfig( + servalData, + servalConfig: null, + buildConfig, + corporaSyncInfo + ); + Assert.AreEqual( + translationScriptureRange, + actual + .Pretranslate!.Single(c => c.ParallelCorpusId == ParallelCorpus01) + .SourceFilters!.Single(f => f.CorpusId == Corpus01) + .ScriptureRange + ); + Assert.AreEqual( + trainingScriptureRange, + actual + .TrainOn!.Single(c => c.ParallelCorpusId == ParallelCorpus02) + .SourceFilters!.Single(f => f.CorpusId == Corpus03) + .ScriptureRange + ); + Assert.AreEqual( + trainingScriptureRange, + actual + .TrainOn!.Single(c => c.ParallelCorpusId == ParallelCorpus02) + .TargetFilters!.Single(f => f.CorpusId == Corpus04) + .ScriptureRange ); + } - await env - .TranslationEnginesClient.Received(1) - .StartBuildAsync(Arg.Any(), Arg.Any(), CancellationToken.None); - env.MockLogger.AssertNoEvent(logEvent => logEvent.Exception == ex); - env.ExceptionHandler.DidNotReceiveWithAnyArgs().ReportException(ex); - Assert.IsNull(env.ProjectSecrets.Get(Project02).ServalData!.PreTranslationQueuedAt); - Assert.IsNull(env.ProjectSecrets.Get(Project02).ServalData!.PreTranslationErrorMessage); + [Test] + public void GetTranslationBuildConfig_SpecifiesAdditionalTrainingData() + { + // Set up test environment + var env = new TestEnvironment(); + var servalData = new ServalData + { + ParallelCorpusIdForPreTranslate = ParallelCorpus01, + ParallelCorpusIdForTrainOn = ParallelCorpus02, + AdditionalTrainingData = new ServalAdditionalTrainingData { ParallelCorpusId = ParallelCorpus03 }, + }; + var buildConfig = new BuildConfig { TrainingDataFiles = [Data01] }; + + // SUT + TranslationBuildConfig actual = env.Service.GetTranslationBuildConfig( + servalData, + servalConfig: null, + buildConfig, + corporaSyncInfo: [] + ); + Assert.IsTrue(actual.Pretranslate!.Any(c => c.ParallelCorpusId == ParallelCorpus01)); + Assert.IsTrue(actual.TrainOn!.Any(c => c.ParallelCorpusId == ParallelCorpus02)); + Assert.IsTrue(actual.TrainOn!.Any(c => c.ParallelCorpusId == ParallelCorpus03)); } [Test] - public async Task BuildProjectForBackgroundJobAsync_DoesNotRecordTaskCancellation() + public void GetTranslationBuildConfig_TranslationBooksAndTrainingBooks() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { BuildIsPending = false }); - env.TranslationEnginesClient.CreateAsync(Arg.Any(), CancellationToken.None) - .Throws(new TaskCanceledException()); + var env = new TestEnvironment(); + var servalData = new ServalData + { + ParallelCorpusIdForPreTranslate = ParallelCorpus01, + ParallelCorpusIdForTrainOn = ParallelCorpus02, + }; + // The training and translation books will correspond to these two strings + const string trainingScriptureRange = "MAT;MRK"; + const string translationScriptureRange = "LUK;JHN"; + var buildConfig = new BuildConfig { TrainingBooks = [40, 41], TranslationBooks = [42, 43] }; + List corporaSyncInfo = + [ + new ServalCorpusSyncInfo + { + CorpusId = Corpus01, + IsSource = true, + ParallelCorpusId = ParallelCorpus01, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus02, + IsSource = false, + ParallelCorpusId = ParallelCorpus01, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus03, + IsSource = true, + ParallelCorpusId = ParallelCorpus02, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus04, + IsSource = false, + ParallelCorpusId = ParallelCorpus02, + }, + ]; // SUT - await env.Service.BuildProjectForBackgroundJobAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, - CancellationToken.None + TranslationBuildConfig actual = env.Service.GetTranslationBuildConfig( + servalData, + servalConfig: null, + buildConfig, + corporaSyncInfo + ); + Assert.AreEqual( + translationScriptureRange, + actual + .Pretranslate!.Single(c => c.ParallelCorpusId == ParallelCorpus01) + .SourceFilters!.Single(f => f.CorpusId == Corpus01) + .ScriptureRange + ); + Assert.AreEqual( + trainingScriptureRange, + actual + .TrainOn!.Single(c => c.ParallelCorpusId == ParallelCorpus02) + .SourceFilters!.Single(f => f.CorpusId == Corpus03) + .ScriptureRange ); + Assert.AreEqual( + trainingScriptureRange, + actual + .TrainOn!.Single(c => c.ParallelCorpusId == ParallelCorpus02) + .TargetFilters!.Single(f => f.CorpusId == Corpus04) + .ScriptureRange + ); + } - env.ExceptionHandler.DidNotReceive().ReportException(Arg.Any()); - Assert.IsNull(env.ProjectSecrets.Get(Project02).ServalData!.PreTranslationQueuedAt); - Assert.IsNull(env.ProjectSecrets.Get(Project02).ServalData!.PreTranslationErrorMessage); + [Test] + public void GetTranslationBuildConfig_TranslationScriptureRangesAndTrainingScriptureRanges() + { + // Set up test environment + var env = new TestEnvironment(); + var servalData = new ServalData + { + ParallelCorpusIdForPreTranslate = ParallelCorpus01, + ParallelCorpusIdForTrainOn = ParallelCorpus02, + }; + // The training and translation books will correspond to these two strings + const string project01ScriptureRange = "MAT;MRK"; + // No scripture range is supported for target pre-translate translation (project02) + const string project03ScriptureRange = "LUK;JHN"; + const string project04ScriptureRange = "ACT;ROM"; + var buildConfig = new BuildConfig + { + TranslationScriptureRanges = + [ + new ProjectScriptureRange { ProjectId = Project01, ScriptureRange = project01ScriptureRange }, + ], + TrainingScriptureRanges = + [ + new ProjectScriptureRange { ProjectId = Project03, ScriptureRange = project03ScriptureRange }, + new ProjectScriptureRange { ProjectId = Project04, ScriptureRange = project04ScriptureRange }, + ], + }; + List corporaSyncInfo = + [ + new ServalCorpusSyncInfo + { + CorpusId = Corpus01, + IsSource = true, + ParallelCorpusId = ParallelCorpus01, + ProjectId = Project01, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus02, + IsSource = false, + ParallelCorpusId = ParallelCorpus01, + ProjectId = Project02, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus03, + IsSource = true, + ParallelCorpusId = ParallelCorpus02, + ProjectId = Project03, + }, + new ServalCorpusSyncInfo + { + CorpusId = Corpus04, + IsSource = false, + ParallelCorpusId = ParallelCorpus02, + ProjectId = Project04, + }, + ]; + + // SUT + TranslationBuildConfig actual = env.Service.GetTranslationBuildConfig( + servalData, + servalConfig: null, + buildConfig, + corporaSyncInfo + ); + Assert.AreEqual( + project01ScriptureRange, + actual + .Pretranslate!.Single(c => c.ParallelCorpusId == ParallelCorpus01) + .SourceFilters!.Single(f => f.CorpusId == Corpus01) + .ScriptureRange + ); + Assert.AreEqual( + project03ScriptureRange, + actual + .TrainOn!.Single(c => c.ParallelCorpusId == ParallelCorpus02) + .SourceFilters!.Single(f => f.CorpusId == Corpus03) + .ScriptureRange + ); + Assert.AreEqual( + project04ScriptureRange, + actual + .TrainOn!.Single(c => c.ParallelCorpusId == ParallelCorpus02) + .TargetFilters!.Single(f => f.CorpusId == Corpus04) + .ScriptureRange + ); } [Test] public async Task GetTranslationEngineTypeAsync_Echo() { + // Set up test environment var env = new TestEnvironment(new TestEnvironmentOptions { UseEchoForPreTranslation = true }); // SUT @@ -1181,6 +1720,7 @@ public async Task GetTranslationEngineTypeAsync_Echo() [Test] public async Task GetTranslationEngineTypeAsync_Nmt() { + // Set up test environment var env = new TestEnvironment(); // SUT @@ -1191,6 +1731,7 @@ public async Task GetTranslationEngineTypeAsync_Nmt() [Test] public async Task GetTranslationEngineTypeAsync_Smt() { + // Set up test environment var env = new TestEnvironment(); // SUT @@ -1199,131 +1740,296 @@ public async Task GetTranslationEngineTypeAsync_Smt() } [Test] - public async Task GetProjectZipAsync_Success() + public async Task RecreateTranslationEngineIfRequiredAsync_DoNotRecreateIfNoLanguageChanges() { + // Set up test environment var env = new TestEnvironment(); - MemoryStream outputStream = new MemoryStream(); + var project = new SFProject { Id = Project01 }; + const string targetLanguage = "en"; + const string sourceLanguage = "de"; + env.Service.Configure().GetSourceLanguage(project).Returns(sourceLanguage); + env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult(targetLanguage)); + env.Service.Configure() + .CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(string.Empty)); + env.Service.Configure() + .RemoveProjectAsync(Project01, preTranslate: true, CancellationToken.None) + .Returns(Task.CompletedTask); + env.TranslationEnginesClient.GetAsync(TranslationEngine01) + .Returns( + Task.FromResult( + new TranslationEngine + { + Id = TranslationEngine01, + SourceLanguage = sourceLanguage, + TargetLanguage = targetLanguage, + } + ) + ); // SUT - string actual = await env.Service.GetProjectZipAsync(Project01, outputStream, CancellationToken.None); - Assert.AreEqual("P01.zip", actual); + await env.Service.RecreateTranslationEngineIfRequiredAsync( + TranslationEngine01, + project, + preTranslate: true, + CancellationToken.None + ); + await env + .Service.DidNotReceiveWithAnyArgs() + .RemoveProjectAsync(Project01, preTranslate: true, CancellationToken.None); + await env + .Service.DidNotReceiveWithAnyArgs() + .CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None); + } - // Validate the zip file - outputStream.Seek(0, SeekOrigin.Begin); - using var archive = new ZipArchive(outputStream, ZipArchiveMode.Read); - Assert.AreEqual(1, archive.Entries.Count); - Assert.AreEqual("file", archive.Entries[0].FullName); + [Test] + public async Task RecreateTranslationEngineIfRequiredAsync_RecreateIfTheSourceLanguageChanges() + { + // Set up test environment + var env = new TestEnvironment(); + var project = new SFProject { Id = Project01 }; + const string targetLanguage = "en"; + const string oldSourceLanguage = "de"; + const string newSourceLanguage = "fr"; + env.Service.Configure().GetSourceLanguage(project).Returns(oldSourceLanguage); + env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult(targetLanguage)); + env.Service.Configure() + .CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(string.Empty)); + env.Service.Configure() + .RemoveProjectAsync(Project01, preTranslate: true, CancellationToken.None) + .Returns(Task.CompletedTask); + env.TranslationEnginesClient.GetAsync(TranslationEngine01) + .Returns( + Task.FromResult( + new TranslationEngine + { + Id = TranslationEngine01, + SourceLanguage = newSourceLanguage, + TargetLanguage = targetLanguage, + } + ) + ); + + // SUT + await env.Service.RecreateTranslationEngineIfRequiredAsync( + TranslationEngine01, + project, + preTranslate: true, + CancellationToken.None + ); + await env.Service.Received(1).RemoveProjectAsync(Project01, preTranslate: true, CancellationToken.None); + await env.Service.Received(1).CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None); } [Test] - public void GetProjectZipAsync_ThrowsExceptionWhenProjectDirectoryMissing() + public async Task RecreateTranslationEngineIfRequiredAsync_RecreateIfTheTargetLanguageChanges() { // Set up test environment var env = new TestEnvironment(); - env.FileSystemService.DirectoryExists(Arg.Any()).Returns(false); - MemoryStream outputStream = new MemoryStream(); + var project = new SFProject { Id = Project01 }; + const string oldTargetLanguage = "en"; + const string newTargetLanguage = "fr"; + const string sourceLanguage = "de"; + env.Service.Configure().GetSourceLanguage(project).Returns(sourceLanguage); + env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult(newTargetLanguage)); + env.Service.Configure() + .CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(string.Empty)); + env.Service.Configure() + .RemoveProjectAsync(Project01, preTranslate: true, CancellationToken.None) + .Returns(Task.CompletedTask); + env.TranslationEnginesClient.GetAsync(TranslationEngine01) + .Returns( + Task.FromResult( + new TranslationEngine + { + Id = TranslationEngine01, + SourceLanguage = sourceLanguage, + TargetLanguage = oldTargetLanguage, + } + ) + ); // SUT - Assert.ThrowsAsync( - () => env.Service.GetProjectZipAsync(Project01, outputStream, CancellationToken.None) + await env.Service.RecreateTranslationEngineIfRequiredAsync( + TranslationEngine01, + project, + preTranslate: true, + CancellationToken.None ); + await env.Service.Received(1).RemoveProjectAsync(Project01, preTranslate: true, CancellationToken.None); + await env.Service.Received(1).CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None); } [Test] - public void GetProjectZipAsync_ThrowsExceptionWhenProjectDocumentMissing() + public async Task RecreateTranslationEngineIfRequiredAsync_RecreatePreTranslationEngineIfNotFound() { // Set up test environment var env = new TestEnvironment(); - MemoryStream outputStream = new MemoryStream(); + await env.SetupProjectSecretAsync(Project01, new ServalData { PreTranslationEngineId = TranslationEngine01 }); + var project = new SFProject { Id = Project01 }; + ServalApiException ex = ServalApiExceptions.NotFound; + env.Service.Configure() + .CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None) + .Returns(Task.FromResult(string.Empty)); + env.TranslationEnginesClient.GetAsync(TranslationEngine01).ThrowsAsync(ex); // SUT - Assert.ThrowsAsync( - () => env.Service.GetProjectZipAsync("invalid_project_id", outputStream, CancellationToken.None) + await env.Service.RecreateTranslationEngineIfRequiredAsync( + TranslationEngine01, + project, + preTranslate: true, + CancellationToken.None ); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.PreTranslationEngineId); + await env.Service.Received(1).CreateServalProjectAsync(project, preTranslate: true, CancellationToken.None); + env.MockLogger.AssertHasEvent(l => l.Exception == ex && l.LogLevel == LogLevel.Information); } [Test] - public void GetProjectZipAsync_ThrowsExceptionWhenProjectIsAResource() + public async Task RecreateTranslationEngineIfRequiredAsync_RecreateSmtTranslationEngineIfNotFound() { // Set up test environment var env = new TestEnvironment(); - env.ParatextService.IsResource(Arg.Any()).Returns(true); - MemoryStream outputStream = new MemoryStream(); + await env.SetupProjectSecretAsync(Project01, new ServalData { TranslationEngineId = TranslationEngine01 }); + var project = new SFProject { Id = Project01 }; + ServalApiException ex = ServalApiExceptions.NotFound; + env.Service.Configure() + .CreateServalProjectAsync(project, preTranslate: false, CancellationToken.None) + .Returns(Task.FromResult(string.Empty)); + env.TranslationEnginesClient.GetAsync(TranslationEngine01).ThrowsAsync(ex); + + // SUT + await env.Service.RecreateTranslationEngineIfRequiredAsync( + TranslationEngine01, + project, + preTranslate: false, + CancellationToken.None + ); + Assert.IsNull(env.ProjectSecrets.Get(Project01).ServalData!.TranslationEngineId); + await env.Service.Received(1).CreateServalProjectAsync(project, preTranslate: false, CancellationToken.None); + env.MockLogger.AssertHasEvent(l => l.Exception == ex && l.LogLevel == LogLevel.Information); + } + + [Test] + public async Task RemoveLegacyServalDataAsync_DoesNotCallServalIfNoTranslationEngineId() + { + // Set up test environment + var env = new TestEnvironment(); + + // SUT + await env.Service.RemoveLegacyServalDataAsync(Project01, preTranslate: false, CancellationToken.None); + + // Ensure that the corpus and its files were not deleted + await env + .TranslationEnginesClient.DidNotReceiveWithAnyArgs() + .DeleteCorpusAsync(TranslationEngine01, Corpus01, deleteFiles: true, CancellationToken.None); + } + + [Test] + public async Task RemoveLegacyServalDataAsync_LogsAnErrorWhenAServalErrorOccurs() + { + // Set up test environment + var env = new TestEnvironment( + new TestEnvironmentOptions { HasTranslationEngineForSmt = true, LegacyCorpora = true } + ); + env.TranslationEnginesClient.DeleteCorpusAsync( + TranslationEngine02, + Corpus01, + deleteFiles: true, + CancellationToken.None + ) + .ThrowsAsync(ServalApiExceptions.InternalServerError); // SUT - Assert.ThrowsAsync( - () => env.Service.GetProjectZipAsync(Project01, outputStream, CancellationToken.None) - ); + await env.Service.RemoveLegacyServalDataAsync(Project02, preTranslate: false, CancellationToken.None); + + // Ensure that the corpus and its files were deleted + env.MockLogger.AssertHasEvent(l => l.LogLevel == LogLevel.Error && l.Message!.Contains(TranslationEngine02)); } [Test] - public void RemoveProjectAsync_ThrowsExceptionWhenProjectSecretMissing() + public async Task RemoveLegacyServalDataAsync_LogsAnEventWhenTheFileIsNotFound() { // Set up test environment - var env = new TestEnvironment(); + var env = new TestEnvironment( + new TestEnvironmentOptions { HasTranslationEngineForSmt = true, LegacyCorpora = true } + ); + env.TranslationEnginesClient.DeleteCorpusAsync( + TranslationEngine02, + Corpus01, + deleteFiles: true, + CancellationToken.None + ) + .ThrowsAsync(ServalApiExceptions.NotFound); // SUT - Assert.ThrowsAsync( - () => - env.Service.RemoveProjectAsync( - User01, - "invalid_project_id", - preTranslate: false, - CancellationToken.None - ) + await env.Service.RemoveLegacyServalDataAsync(Project02, preTranslate: false, CancellationToken.None); + + // Ensure that the corpus and its files were deleted + env.MockLogger.AssertHasEvent(l => + l.LogLevel == LogLevel.Information && l.Message!.Contains(TranslationEngine02) ); } [Test] - public async Task RemoveProjectAsync_CallsServalIfTranslationEngineIdPresent() + public async Task RemoveLegacyServalDataAsync_OnlyRemovesRelevantCorpora() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { HasTranslationEngineForSmt = true }); + var env = new TestEnvironment( + new TestEnvironmentOptions { HasTranslationEngineForSmt = true, LegacyCorpora = true } + ); + + // Verify there are two corpora + Assert.AreEqual(2, env.ProjectSecrets.Get(Project02).ServalData!.Corpora!.Count); // SUT - await env.Service.RemoveProjectAsync(User01, Project02, preTranslate: false, CancellationToken.None); + await env.Service.RemoveLegacyServalDataAsync(Project02, preTranslate: false, CancellationToken.None); - // Ensure that the translation engine, corpus and any files are deleted - await env.TranslationEnginesClient.Received(1).DeleteAsync(TranslationEngine02, CancellationToken.None); + // Ensure that the corpus and its files were deleted await env .TranslationEnginesClient.Received(1) - .DeleteCorpusAsync(TranslationEngine02, Corpus01, CancellationToken.None); - await env.DataFilesClient.Received(1).DeleteAsync(File01, CancellationToken.None); - await env.DataFilesClient.Received(1).DeleteAsync(File02, CancellationToken.None); + .DeleteCorpusAsync(TranslationEngine02, Corpus01, deleteFiles: true, CancellationToken.None); + Assert.AreEqual(1, env.ProjectSecrets.Get(Project02).ServalData!.Corpora!.Count); } [Test] - public async Task RemoveProjectAsync_DoesNotCallServalIfNoTranslationEngineId() + public async Task RemoveLegacyServalDataAsync_RemovesCorporaPropertyIfNoMoreCorpora() { // Set up test environment - var env = new TestEnvironment(); + var env = new TestEnvironment(new TestEnvironmentOptions { LegacyCorpora = true }); + await env.SetDataInSync( + Project02, + preTranslate: true, + uploadParatextZipFile: true, + alternateTrainingSource: true + ); + + // Verify there are two corpora + Assert.AreEqual(2, env.ProjectSecrets.Get(Project02).ServalData!.Corpora!.Count); // SUT - await env.Service.RemoveProjectAsync(User01, Project01, preTranslate: false, CancellationToken.None); + await env.Service.RemoveLegacyServalDataAsync(Project02, preTranslate: true, CancellationToken.None); - // Ensure that the translation engine, corpus and any files were not deleted - await env - .TranslationEnginesClient.DidNotReceiveWithAnyArgs() - .DeleteAsync(TranslationEngine01, CancellationToken.None); + // Ensure that the corpus and its files were deleted await env - .TranslationEnginesClient.DidNotReceiveWithAnyArgs() - .DeleteCorpusAsync(TranslationEngine01, Corpus01, CancellationToken.None); - await env.DataFilesClient.DidNotReceiveWithAnyArgs().DeleteAsync(File01, CancellationToken.None); + .TranslationEnginesClient.Received(1) + .DeleteCorpusAsync(TranslationEngine02, Corpus01, deleteFiles: true, CancellationToken.None); + Assert.IsNull(env.ProjectSecrets.Get(Project02).ServalData?.Corpora); } [Test] - public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenProjectMissing() + public void RemoveLegacyServalDataAsync_ThrowsExceptionWhenProjectSecretMissing() { // Set up test environment var env = new TestEnvironment(); - await env.Projects.DeleteAllAsync(_ => true); // SUT Assert.ThrowsAsync( () => - env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project01 }, + env.Service.RemoveLegacyServalDataAsync( + "invalid_project_id", preTranslate: false, CancellationToken.None ) @@ -1331,666 +2037,746 @@ public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenProjectMissing() } [Test] - public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenProjectSecretMissing() + public async Task RemoveProjectAsync_DeletesPreTranslationEngineAndAllCorporaAndFilesIfNoSmt() { // Set up test environment var env = new TestEnvironment(); - await env.ProjectSecrets.DeleteAllAsync(_ => true); + await env.SetupProjectSecretAsync( + Project01, + new ServalData + { + PreTranslationEngineId = TranslationEngine01, + CorpusFiles = + [ + new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }, + new ServalCorpusFile { CorpusId = Corpus02, FileId = File02 }, + ], + AdditionalTrainingData = new ServalAdditionalTrainingData + { + SourceCorpusId = Corpus03, + TargetCorpusId = Corpus04, + CorpusFiles = + [ + new ServalCorpusFile { CorpusId = Corpus03, FileId = File03 }, + new ServalCorpusFile { CorpusId = Corpus04, FileId = File04 }, + ], + }, + } + ); // SUT - Assert.ThrowsAsync( - () => - env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: false, - CancellationToken.None - ) - ); + await env.Service.RemoveProjectAsync(Project01, preTranslate: true, CancellationToken.None); + + // Ensure that the pre-translation engine, additional training corpora and files are deleted + await env.TranslationEnginesClient.Received(1).DeleteAsync(TranslationEngine01); + await env.CorporaClient.Received(1).DeleteAsync(Corpus01); + await env.CorporaClient.Received(1).DeleteAsync(Corpus02); + await env.CorporaClient.Received(1).DeleteAsync(Corpus03); + await env.CorporaClient.Received(1).DeleteAsync(Corpus04); + await env.DataFilesClient.Received(1).DeleteAsync(File01); + await env.DataFilesClient.Received(1).DeleteAsync(File02); + await env.DataFilesClient.Received(1).DeleteAsync(File03); + await env.DataFilesClient.Received(1).DeleteAsync(File04); } [Test] - public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenServalConfigMissing() + public async Task RemoveProjectAsync_DeletesPreTranslationEngineOnly() { // Set up test environment var env = new TestEnvironment(); - await env.ProjectSecrets.UpdateAsync(Project01, op => op.Unset(p => p.ServalData)); + await env.SetupProjectSecretAsync( + Project01, + new ServalData + { + PreTranslationEngineId = TranslationEngine01, + TranslationEngineId = TranslationEngine02, + CorpusFiles = + [ + new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }, + new ServalCorpusFile { CorpusId = Corpus02, FileId = File02 }, + ], + AdditionalTrainingData = new ServalAdditionalTrainingData + { + SourceCorpusId = Corpus03, + TargetCorpusId = Corpus04, + CorpusFiles = + [ + new ServalCorpusFile { CorpusId = Corpus03, FileId = File03 }, + new ServalCorpusFile { CorpusId = Corpus03, FileId = File04 }, + new ServalCorpusFile { CorpusId = Corpus04, FileId = File05 }, + new ServalCorpusFile { CorpusId = Corpus04, FileId = File06 }, + ], + }, + } + ); // SUT - Assert.ThrowsAsync( - () => - env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: false, - CancellationToken.None - ) - ); + await env.Service.RemoveProjectAsync(Project01, preTranslate: true, CancellationToken.None); + + // Ensure that the pre-translation engine, additional training corpora and files are deleted + await env.TranslationEnginesClient.Received(1).DeleteAsync(TranslationEngine01); + await env.CorporaClient.Received(1).DeleteAsync(Corpus03); + await env.CorporaClient.Received(1).DeleteAsync(Corpus04); + await env.DataFilesClient.Received(1).DeleteAsync(File03); + await env.DataFilesClient.Received(1).DeleteAsync(File04); + await env.DataFilesClient.Received(1).DeleteAsync(File05); + await env.DataFilesClient.Received(1).DeleteAsync(File06); + + // Ensure that the SMT translation engine, shared corpora, and shared files are not deleted + await env.TranslationEnginesClient.DidNotReceive().DeleteAsync(TranslationEngine02); + await env.CorporaClient.DidNotReceive().DeleteAsync(Corpus01); + await env.CorporaClient.DidNotReceive().DeleteAsync(Corpus02); + await env.DataFilesClient.DidNotReceive().DeleteAsync(File01); + await env.DataFilesClient.DidNotReceive().DeleteAsync(File02); } [Test] - public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenPreTranslationEngineIdMissing() + public async Task RemoveProjectAsync_DeletesTranslationEngineAndAllCorporaAndFilesIfNoNmt() { // Set up test environment var env = new TestEnvironment(); - await env.ProjectSecrets.UpdateAsync(Project01, op => op.Set(p => p.ServalData, new ServalData())); + await env.SetupProjectSecretAsync( + Project01, + new ServalData + { + TranslationEngineId = TranslationEngine02, + CorpusFiles = + [ + new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }, + new ServalCorpusFile { CorpusId = Corpus02, FileId = File02 }, + ], + } + ); // SUT - Assert.ThrowsAsync( - () => - env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: true, - CancellationToken.None - ) - ); + await env.Service.RemoveProjectAsync(Project01, preTranslate: false, CancellationToken.None); + + // Ensure that the SMT translation engine, shared corpora, and shared files are not deleted + await env.TranslationEnginesClient.Received(1).DeleteAsync(TranslationEngine02); + await env.CorporaClient.Received(1).DeleteAsync(Corpus01); + await env.CorporaClient.Received(1).DeleteAsync(Corpus02); + await env.DataFilesClient.Received(1).DeleteAsync(File01); + await env.DataFilesClient.Received(1).DeleteAsync(File02); } [Test] - public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenTranslationEngineIdMissing() + public async Task RemoveProjectAsync_DeletesTranslationEngineOnly() { // Set up test environment var env = new TestEnvironment(); - await env.ProjectSecrets.UpdateAsync(Project01, op => op.Set(p => p.ServalData, new ServalData())); + await env.SetupProjectSecretAsync( + Project01, + new ServalData + { + PreTranslationEngineId = TranslationEngine01, + TranslationEngineId = TranslationEngine02, + CorpusFiles = + [ + new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }, + new ServalCorpusFile { CorpusId = Corpus02, FileId = File02 }, + ], + AdditionalTrainingData = new ServalAdditionalTrainingData + { + SourceCorpusId = Corpus03, + TargetCorpusId = Corpus04, + CorpusFiles = + [ + new ServalCorpusFile { CorpusId = Corpus03, FileId = File03 }, + new ServalCorpusFile { CorpusId = Corpus03, FileId = File04 }, + new ServalCorpusFile { CorpusId = Corpus04, FileId = File05 }, + new ServalCorpusFile { CorpusId = Corpus04, FileId = File06 }, + ], + }, + } + ); // SUT - Assert.ThrowsAsync( - () => - env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: false, - CancellationToken.None - ) - ); + await env.Service.RemoveProjectAsync(Project01, preTranslate: false, CancellationToken.None); + + // Ensure that the SMT translation engine, shared corpora, and shared files are not deleted + await env.TranslationEnginesClient.Received(1).DeleteAsync(TranslationEngine02); + + // Ensure that the pre-translation engine, and any additional or shared corpora and files are not deleted + await env.TranslationEnginesClient.DidNotReceive().DeleteAsync(TranslationEngine01); + await env.CorporaClient.DidNotReceive().DeleteAsync(Corpus01); + await env.CorporaClient.DidNotReceive().DeleteAsync(Corpus02); + await env.CorporaClient.DidNotReceive().DeleteAsync(Corpus03); + await env.CorporaClient.DidNotReceive().DeleteAsync(Corpus04); + await env.DataFilesClient.DidNotReceive().DeleteAsync(File01); + await env.DataFilesClient.DidNotReceive().DeleteAsync(File02); + await env.DataFilesClient.DidNotReceive().DeleteAsync(File03); + await env.DataFilesClient.DidNotReceive().DeleteAsync(File04); + await env.DataFilesClient.DidNotReceive().DeleteAsync(File05); + await env.DataFilesClient.DidNotReceive().DeleteAsync(File06); } [Test] - public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenSourceMissing() + public async Task RemoveProjectAsync_DoesNotCallServalIfNoTranslationEngineId() { // Set up test environment var env = new TestEnvironment(); - await env.Projects.UpdateAsync(Project01, op => op.Unset(p => p.TranslateConfig.Source)); // SUT - Assert.ThrowsAsync( - () => - env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: false, - CancellationToken.None - ) + await env.Service.RemoveProjectAsync(Project01, preTranslate: false, CancellationToken.None); + + // Ensure that the translation engine, corpus and any files were not deleted + await env.TranslationEnginesClient.DidNotReceiveWithAnyArgs().DeleteAsync(TranslationEngine01); + await env.CorporaClient.DidNotReceiveWithAnyArgs().DeleteAsync(Corpus01); + await env.DataFilesClient.DidNotReceiveWithAnyArgs().DeleteAsync(File01); + + // A message was logged about the missing translation engine + env.MockLogger.AssertHasEvent(logEvent => + logEvent.LogLevel == LogLevel.Information && logEvent.Message!.Contains("No Translation Engine Id") ); } [Test] - public async Task SyncProjectCorporaAsync_CreatesRemoteCorpusIfMissing() + public async Task RemoveProjectAsync_DoesNotThrowExceptionWhenTheCorpusIsNotFound() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { LocalSourceTextHasData = true }); - await env.BeforeFirstSync(Project01); - string sourceLanguage = env.Projects.Get(Project01).TranslateConfig.Source!.WritingSystem.Tag; - string targetLanguage = env.Projects.Get(Project01).WritingSystem.Tag; - Assert.AreNotEqual(sourceLanguage, targetLanguage); + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync( + Project01, + new ServalData + { + PreTranslationEngineId = TranslationEngine01, + CorpusFiles = [new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }], + } + ); + + // Make the Serval API return the error code for an already deleted translation engine + env.CorporaClient.DeleteAsync(Corpus01).Throws(ServalApiExceptions.NotFound); // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: false, - CancellationToken.None + await env.Service.RemoveProjectAsync(Project01, preTranslate: true, CancellationToken.None); + + // Ensure that the translation engine, shared corpora, and shared files are not deleted + await env.TranslationEnginesClient.Received(1).DeleteAsync(TranslationEngine01); + await env.CorporaClient.Received(1).DeleteAsync(Corpus01); + await env.DataFilesClient.Received(1).DeleteAsync(File01); + + // The 404 exception was logged + env.MockLogger.AssertHasEvent(logEvent => + logEvent.LogLevel == LogLevel.Information && logEvent.Exception is ServalApiException ); - Assert.IsTrue(actual); - await env - .TranslationEnginesClient.Received(1) - .AddCorpusAsync( - Arg.Any(), - Arg.Is(t => - t.SourceLanguage == sourceLanguage && t.TargetLanguage == targetLanguage - ), - CancellationToken.None - ); - await env.DataFilesClient.DidNotReceiveWithAnyArgs().DeleteAsync(string.Empty, CancellationToken.None); - await env - .DataFilesClient.Received(2) - .CreateAsync(Arg.Any(), FileFormat.Paratext, Project01, CancellationToken.None); - Assert.AreEqual(1, env.ProjectSecrets.Get(Project01).ServalData?.Corpora[Corpus01].SourceFiles.Count); } [Test] - public async Task SyncProjectCorporaAsync_DoesNotUpdateAlternateTrainingSourceOnSmtBuilds() + public async Task RemoveProjectAsync_DoesNotThrowExceptionWhenTheFileIsNotFound() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync( + Project01, + new ServalData { - HasTranslationEngineForSmt = true, - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, - AlternateTrainingSourceConfigured = true, - AlternateTrainingSourceEnabled = true, + PreTranslationEngineId = TranslationEngine01, + CorpusFiles = [new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }], } ); - await env.SetDataInSync( - Project02, - preTranslate: true, - uploadParatextZipFile: false, - alternateTrainingSource: true - ); + + // Make the Serval API return the error code for an already deleted translation engine + env.DataFilesClient.DeleteAsync(File01).Throws(ServalApiExceptions.NotFound); // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: false, - CancellationToken.None + await env.Service.RemoveProjectAsync(Project01, preTranslate: true, CancellationToken.None); + + // Ensure that the translation engine, shared corpora, and shared files are not deleted + await env.TranslationEnginesClient.Received(1).DeleteAsync(TranslationEngine01); + await env.CorporaClient.Received(1).DeleteAsync(Corpus01); + await env.DataFilesClient.Received(1).DeleteAsync(File01); + + // The 404 exception was logged + env.MockLogger.AssertHasEvent(logEvent => + logEvent.LogLevel == LogLevel.Information && logEvent.Exception is ServalApiException ); - Assert.IsTrue(actual); - await env - .TranslationEnginesClient.DidNotReceiveWithAnyArgs() - .DeleteCorpusAsync(Arg.Any(), Arg.Any(), CancellationToken.None); - await env.DataFilesClient.DidNotReceiveWithAnyArgs().DeleteAsync(string.Empty, CancellationToken.None); } [Test] - public async Task SyncProjectCorporaAsync_FailsLocallyOnRemoteFailure() + public async Task RemoveProjectAsync_DoesNotThrowExceptionWhenTheTranslationEngineIsNotFound() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { LocalSourceTextHasData = true }); - await env.BeforeFirstSync(Project01); + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync( + Project01, + new ServalData + { + PreTranslationEngineId = TranslationEngine01, + CorpusFiles = [new ServalCorpusFile { CorpusId = Corpus01, FileId = File01 }], + } + ); - // Make adding the corpus to fail due to an API issue - env.TranslationEnginesClient.AddCorpusAsync( - TranslationEngine01, - Arg.Any(), - CancellationToken.None - ) - .Throws(new BrokenCircuitException()); + // Make the Serval API return the error code for an already deleted translation engine + env.TranslationEnginesClient.DeleteAsync(TranslationEngine01).Throws(ServalApiExceptions.NotFound); // SUT - Assert.ThrowsAsync( - () => - env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project01 }, - preTranslate: false, - CancellationToken.None - ) + await env.Service.RemoveProjectAsync(Project01, preTranslate: true, CancellationToken.None); + + // Ensure that the translation engine, shared corpora, and shared files are not deleted + await env.TranslationEnginesClient.Received(1).DeleteAsync(TranslationEngine01); + await env.CorporaClient.Received(1).DeleteAsync(Corpus01); + await env.DataFilesClient.Received(1).DeleteAsync(File01); + + // The 404 exception was logged + env.MockLogger.AssertHasEvent(logEvent => + logEvent.LogLevel == LogLevel.Information && logEvent.Exception is ServalApiException ); } [Test] - public async Task SyncProjectCorporaAsync_UpdatesRemoteCorpusIfLocalTextChanges() + public void RemoveProjectAsync_ThrowsExceptionWhenProjectSecretMissing() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions { HasTranslationEngineForSmt = true, LocalSourceTextHasData = true } - ); + var env = new TestEnvironment(); - // Set sync state so that there is one file and the local copy has changed since last sync - await env.ProjectSecrets.UpdateAsync( - Project02, - u => - u.Set( - p => p.ServalData.Corpora[Corpus01], - new ServalCorpus - { - SourceFiles = - [ - new ServalCorpusFile - { - FileChecksum = "old_checksum", - FileId = File01, - ProjectId = Project03, - TextId = Project02, - }, - ], - TargetFiles = - [ - new ServalCorpusFile - { - FileChecksum = "old_checksum", - FileId = File02, - ProjectId = Project02, - TextId = Project02, - }, - ], - UploadParatextZipFile = true, - } - ) + // SUT + Assert.ThrowsAsync( + () => env.Service.RemoveProjectAsync("invalid_project_id", preTranslate: false, CancellationToken.None) ); + } - // Make the Serval API return a data file - env.DataFilesClient.GetAsync(Arg.Any(), CancellationToken.None) - .Returns(Task.FromResult(new DataFile { Format = FileFormat.Paratext })); + [Test] + public async Task SyncAdditionalTrainingData_RemoveAdditionalTrainingDataWithoutParallelCorpus() + { + // Set up test environment + var env = new TestEnvironment(); + var project = new SFProject { Id = Project01 }; + var buildConfig = new BuildConfig(); + var additionalTrainingData = new ServalAdditionalTrainingData(); + env.Service.Configure() + .DeleteAllCorporaAndFilesAsync(Arg.Any>(), Project01, CancellationToken.None) + .Returns(Task.CompletedTask); // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( + ServalAdditionalTrainingData actual = await env.Service.SyncAdditionalTrainingData( User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: false, + project, + TranslationEngine01, + buildConfig, + additionalTrainingData, CancellationToken.None ); - Assert.IsTrue(actual); + Assert.IsNull(actual); await env - .DataFilesClient.DidNotReceiveWithAnyArgs() - .CreateAsync(Arg.Any(), FileFormat.Paratext, Arg.Any(), CancellationToken.None); - await env.DataFilesClient.DidNotReceiveWithAnyArgs().DeleteAsync(string.Empty, CancellationToken.None); - await env.DataFilesClient.Received(1).UpdateAsync(File01, Arg.Any(), CancellationToken.None); - await env.DataFilesClient.Received(1).UpdateAsync(File02, Arg.Any(), CancellationToken.None); + .TranslationEnginesClient.DidNotReceiveWithAnyArgs() + .DeleteParallelCorpusAsync(Arg.Any(), Arg.Any(), CancellationToken.None); + await env + .Service.Received(1) + .DeleteAllCorporaAndFilesAsync(Arg.Any>(), Project01, CancellationToken.None); } [Test] - public async Task SyncProjectCorporaAsync_AddsAndDeletesLocalSourceAndTargetFilesToRemote() + public async Task SyncAdditionalTrainingData_RemoveAdditionalTrainingDataWithParallelCorpus() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions - { - HasTranslationEngineForSmt = true, - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, - } - ); + var env = new TestEnvironment(); + var project = new SFProject { Id = Project01 }; + var buildConfig = new BuildConfig(); + var additionalTrainingData = new ServalAdditionalTrainingData { ParallelCorpusId = ParallelCorpus01 }; + env.Service.Configure() + .DeleteAllCorporaAndFilesAsync(Arg.Any>(), Project01, CancellationToken.None) + .Returns(Task.CompletedTask); - // Set the sync state so that there are two files on remote that no longer exist locally - await env.ProjectSecrets.UpdateAsync( - Project02, - u => - u.Add( - p => p.ServalData.Corpora[Corpus01].SourceFiles, - new ServalCorpusFile - { - FileChecksum = "a_previous_checksum", - FileId = "File03", - ProjectId = Project03, - TextId = "textId1", - } - ) - .Add( - p => p.ServalData.Corpora[Corpus01].TargetFiles, - new ServalCorpusFile - { - FileChecksum = "another_previous_checksum", - FileId = "File04", - ProjectId = Project01, - TextId = "textId2", - } - ) + // SUT + ServalAdditionalTrainingData actual = await env.Service.SyncAdditionalTrainingData( + User01, + project, + TranslationEngine01, + buildConfig, + additionalTrainingData, + CancellationToken.None ); + Assert.IsNull(actual); + await env + .TranslationEnginesClient.Received(1) + .DeleteParallelCorpusAsync(TranslationEngine01, ParallelCorpus01, CancellationToken.None); + await env + .Service.Received(1) + .DeleteAllCorporaAndFilesAsync(Arg.Any>(), Project01, CancellationToken.None); + } + + [Test] + public async Task SyncAdditionalTrainingData_UploadAdditionalTrainingDataWithExistingParallelCorpus() + { + // Set up test environment + var env = new TestEnvironment(); + await env.SetupTrainingDataAsync(Project01); + var project = new SFProject { Id = Project01 }; + var buildConfig = new BuildConfig { TrainingDataFiles = [Data01] }; + var additionalTrainingData = new ServalAdditionalTrainingData + { + CorpusFiles = + [ + new ServalCorpusFile + { + CorpusId = Corpus01, + FileId = File01, + TextId = Data01, + }, + new ServalCorpusFile + { + CorpusId = Corpus01, + FileId = File02, + TextId = Data01, + }, + new ServalCorpusFile + { + CorpusId = Corpus02, + FileId = File03, + TextId = Data01, + }, + new ServalCorpusFile + { + CorpusId = Corpus02, + FileId = File04, + TextId = Data01, + }, + ], + ParallelCorpusId = ParallelCorpus01, + SourceCorpusId = Corpus01, + TargetCorpusId = Corpus02, + }; + env.Service.Configure().GetSourceLanguage(project).Returns("en"); + env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de")); + env.Service.Configure() + .CreateOrUpdateParallelCorpusAsync( + TranslationEngine01, + Arg.Any(), + Arg.Any(), + Arg.Any>(), + Arg.Any>(), + CancellationToken.None + ) + .Returns(ParallelCorpus01); + env.Service.Configure() + .UploadAdditionalTrainingDataAsync( + Project01, + Arg.Any(), + Arg.Any(), + Arg.Any>(), + Arg.Any>(), + CancellationToken.None + ) + .Returns(args => Task.FromResult(args[1] as string)); // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( + ServalAdditionalTrainingData actual = await env.Service.SyncAdditionalTrainingData( User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: false, + project, + TranslationEngine01, + buildConfig, + additionalTrainingData, CancellationToken.None ); - Assert.IsTrue(actual); - await env.DataFilesClient.Received(1).DeleteAsync("File03", CancellationToken.None); - await env.DataFilesClient.Received(1).DeleteAsync("File04", CancellationToken.None); + Assert.AreEqual(ParallelCorpus01, actual?.ParallelCorpusId); + await env.CorporaClient.Received(1).UpdateAsync(Corpus01, Arg.Any>()); + await env.CorporaClient.Received(1).UpdateAsync(Corpus02, Arg.Any>()); await env - .DataFilesClient.Received(2) - .CreateAsync(Arg.Any(), FileFormat.Paratext, Project02, CancellationToken.None); + .TrainingDataService.Received(1) + .GetTextsAsync( + Arg.Any(), + Arg.Any(), + Arg.Any>(), + Arg.Any>(), + Arg.Any>() + ); } [Test] - public async Task SyncProjectCorporaAsync_DoesNotCrashWhenDeletingAlreadyDeletedRemoteFiles() + public async Task SyncAdditionalTrainingData_UploadAdditionalTrainingDataWithoutParallelCorpus() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { HasTranslationEngineForSmt = true }); - - // Make the Serval API return the error code for an already deleted file - env.DataFilesClient.DeleteAsync("File03", CancellationToken.None).Throws(ServalApiExceptions.NotFound); - - // Add one file to the sync state that we think exists remotely, but doesn't, and no longer exists locally - await env.ProjectSecrets.UpdateAsync( - Project02, - u => - u.Add( - p => p.ServalData.Corpora[Corpus01].SourceFiles, + var env = new TestEnvironment(); + await env.SetupTrainingDataAsync(Project01); + const string sourceLanguage = "en"; + var project = new SFProject { Id = Project01 }; + var buildConfig = new BuildConfig { TrainingDataFiles = [Data01] }; + var additionalTrainingData = new ServalAdditionalTrainingData(); + env.Service.Configure().GetSourceLanguage(project).Returns(sourceLanguage); + env.Service.Configure().GetTargetLanguageAsync(project).Returns(Task.FromResult("de")); + env.Service.Configure() + .CreateOrUpdateParallelCorpusAsync( + TranslationEngine01, + Arg.Any(), + Arg.Any(), + Arg.Any>(), + Arg.Any>(), + CancellationToken.None + ) + .Returns(ParallelCorpus01); + env.Service.Configure() + .UploadAdditionalTrainingDataAsync( + Project01, + Arg.Any(), + Arg.Any(), + Arg.Any>(), + Arg.Any>(), + CancellationToken.None + ) + .Returns(args => + { + string corpusId = (string)args[2] == sourceLanguage ? Corpus01 : Corpus02; + ((List)args[3]).Add( new ServalCorpusFile { - FileChecksum = "a_previous_checksum", - FileId = "File03", - ProjectId = Project03, - TextId = "textId1", + CorpusId = corpusId, + FileId = File01, + TextId = Data01, } - ) - ); + ); + return Task.FromResult(corpusId); + }); // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( + ServalAdditionalTrainingData actual = await env.Service.SyncAdditionalTrainingData( User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: false, + project, + TranslationEngine01, + buildConfig, + additionalTrainingData, CancellationToken.None ); - Assert.IsTrue(actual); - await env.DataFilesClient.Received(1).DeleteAsync("File03", CancellationToken.None); - - // The 404 exception was logged - env.MockLogger.AssertHasEvent(logEvent => - logEvent.LogLevel == LogLevel.Information && logEvent.Exception is ServalApiException - ); + Assert.AreEqual(ParallelCorpus01, actual?.ParallelCorpusId); + // UploadAdditionalTrainingDataAsync will perform the initial corpus CreateAsync() + await env.CorporaClient.Received(1).UpdateAsync(Corpus01, Arg.Any>()); + await env.CorporaClient.Received(1).UpdateAsync(Corpus02, Arg.Any>()); + await env + .TrainingDataService.Received(1) + .GetTextsAsync( + Arg.Any(), + Arg.Any(), + Arg.Any>(), + Arg.Any>(), + Arg.Any>() + ); } [Test] - public async Task BuildProjectAsync_DoesNotCrashWhenDeletingMissingAlternateTrainingSourceCorpora() + public async Task SyncAdditionalTrainingData_NoAdditionalTrainingData() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions - { - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, - AlternateTrainingSourceEnabled = false, - } - ); - await env.SetDataInSync( - Project02, - preTranslate: true, - uploadParatextZipFile: false, - alternateTrainingSource: true - ); - ServalApiException ex = ServalApiExceptions.NotFound; - env.TranslationEnginesClient.DeleteCorpusAsync(TranslationEngine02, Corpus02, CancellationToken.None) - .Throws(ex); - - // Check that we have more than one pre-translate corpora - Assert.AreEqual(2, env.ProjectSecrets.Get(Project02).ServalData!.Corpora.Count(c => c.Value.PreTranslate)); + var env = new TestEnvironment(); + var project = new SFProject { Id = Project01 }; + var buildConfig = new BuildConfig(); // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( + ServalAdditionalTrainingData actual = await env.Service.SyncAdditionalTrainingData( User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, + project, + TranslationEngine01, + buildConfig, + additionalTrainingData: null, CancellationToken.None ); - Assert.IsTrue(actual); - - // The old corpus and its files should be deleted - await env - .TranslationEnginesClient.Received() - .DeleteCorpusAsync(TranslationEngine02, Corpus02, CancellationToken.None); - await env.DataFilesClient.Received().DeleteAsync(File01, CancellationToken.None); - - // Ensure we have just one pre-translate corpora - Assert.AreEqual(1, env.ProjectSecrets.Get(Project02).ServalData!.Corpora.Count(c => c.Value.PreTranslate)); - - // The 404 exception was logged - env.MockLogger.AssertHasEvent(logEvent => - logEvent.LogLevel == LogLevel.Information && logEvent.Exception is ServalApiException - ); + Assert.IsNull(actual); } [Test] - public async Task BuildProjectAsync_DoesNotCrashWhenDeletingMissingAlternateTrainingSourceFiles() + [TestCaseSource(nameof(SyncProjectCorporaAsyncOptions))] + public async Task SyncProjectCorporaAsync_Success(TestEnvironmentOptions options) { // Set up test environment var env = new TestEnvironment( - new TestEnvironmentOptions + options with { - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, - AlternateTrainingSourceEnabled = false, + HasTranslationEngineForNmt = options.PreTranslate, + HasTranslationEngineForSmt = !options.PreTranslate, } ); - await env.SetDataInSync( - Project02, - preTranslate: true, - uploadParatextZipFile: false, - alternateTrainingSource: true - ); - ServalApiException ex = ServalApiExceptions.NotFound; - env.DataFilesClient.DeleteAsync(File01, CancellationToken.None).Throws(ex); - - // Check that we have more than one pre-translate corpora - Assert.AreEqual(2, env.ProjectSecrets.Get(Project02).ServalData!.Corpora.Count(c => c.Value.PreTranslate)); + env.Service.Configure() + .UploadParatextFileAsync(Arg.Any(), Arg.Any(), CancellationToken.None) + .Returns(Task.CompletedTask); + env.Service.Configure() + .DeleteAllCorporaAndFilesAsync(Arg.Any>(), Project02, CancellationToken.None) + .Returns(Task.CompletedTask); + env.Service.Configure() + .CreateOrUpdateParallelCorpusAsync( + options.PreTranslate ? TranslationEngine01 : TranslationEngine02, + Arg.Any(), + Arg.Any(), + Arg.Any>(), + Arg.Any>(), + CancellationToken.None + ) + .Returns(ParallelCorpus01); + env.Service.Configure() + .SyncAdditionalTrainingData( + User01, + Arg.Any(), + options.PreTranslate ? TranslationEngine01 : TranslationEngine02, + Arg.Any(), + Arg.Any(), + CancellationToken.None + ) + .Returns(args => args[4] as ServalAdditionalTrainingData); - // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( + // SUT 1 + IList actual = await env.Service.SyncProjectCorporaAsync( User01, new BuildConfig { ProjectId = Project02 }, - preTranslate: true, + preTranslate: options.PreTranslate, CancellationToken.None ); - Assert.IsTrue(actual); - - // The old corpus and its files should be deleted - await env - .TranslationEnginesClient.Received() - .DeleteCorpusAsync(TranslationEngine02, Corpus02, CancellationToken.None); - await env.DataFilesClient.Received().DeleteAsync(File01, CancellationToken.None); + await env.AssertSyncProjectCorporaAsync(options, actual, createsServalCorpora: true); - // Ensure we have just one pre-translate corpora - Assert.AreEqual(1, env.ProjectSecrets.Get(Project02).ServalData!.Corpora.Count(c => c.Value.PreTranslate)); + // Re-run using existing ServalCorpusFiles + Assert.IsNotEmpty(env.ProjectSecrets.Get(Project02).ServalData!.CorpusFiles); - // The 404 exception was logged - env.MockLogger.AssertHasEvent(logEvent => - logEvent.LogLevel == LogLevel.Information && logEvent.Exception is ServalApiException + // SUT 2 + actual = await env.Service.SyncProjectCorporaAsync( + User01, + new BuildConfig { ProjectId = Project02 }, + preTranslate: options.PreTranslate, + CancellationToken.None ); - } + await env.AssertSyncProjectCorporaAsync(options, actual, createsServalCorpora: false); - [Test] - public async Task SyncProjectCorporaAsync_SynchronizesTheAlternateSource() - { - // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions + // Re-run after changing the languages + await env.Projects.UpdateAsync( + Project02, + op => { - AlternateSourceEnabled = true, - AlternateSourceConfigured = true, - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, + op.Set(p => p.WritingSystem.Tag, "fr"); + op.Set(p => p.TranslateConfig.Source.WritingSystem.Tag, "fr_be"); + if (options.AlternateSource) + { + op.Set(p => p.TranslateConfig.DraftConfig.AlternateSource.WritingSystem.Tag, "fr_ca"); + } + + if (options.AlternateTrainingSource) + { + op.Set(p => p.TranslateConfig.DraftConfig.AlternateTrainingSource.WritingSystem.Tag, "fr_ch"); + } + + if (options.AdditionalTrainingSource) + { + op.Set(p => p.TranslateConfig.DraftConfig.AdditionalTrainingSource.WritingSystem.Tag, "fr_lu"); + } } ); - await env.SetDataInSync(Project02, preTranslate: true, uploadParatextZipFile: true); - // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( + // SUT 3 + actual = await env.Service.SyncProjectCorporaAsync( User01, new BuildConfig { ProjectId = Project02 }, - preTranslate: true, + preTranslate: options.PreTranslate, CancellationToken.None ); - Assert.IsTrue(actual); - - // Verify that it was just the alternate source, source, and target directories that were read for data - var project = env.Projects.Get(Project02); - Assert.That(project.TranslateConfig.DraftConfig.AlternateSource?.ParatextId, Is.EqualTo(Paratext01)); - Assert.That(project.TranslateConfig.Source?.ParatextId, Is.EqualTo(Paratext03)); - env.FileSystemService.Received(1).EnumerateFiles(Arg.Is(path => path.Contains(Paratext01))); - env.FileSystemService.Received(1).EnumerateFiles(Arg.Is(path => path.Contains(Paratext02))); - env.FileSystemService.Received(1).EnumerateFiles(Arg.Is(path => path.Contains(Paratext03))); - env.FileSystemService.Received(3).EnumerateFiles(Arg.Any()); + await env.AssertSyncProjectCorporaAsync(options, actual, createsServalCorpora: true); } [Test] - public async Task SyncProjectCorporaAsync_UsesTheSourceWhenAlternateSourceIsEnabledButNotConfigured() + public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenPreTranslationEngineIdMissing() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions - { - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, - AlternateSourceConfigured = false, - AlternateSourceEnabled = true, - } - ); - await env.SetDataInSync(Project02, preTranslate: true, uploadParatextZipFile: true); + var env = new TestEnvironment(); + await env.ProjectSecrets.UpdateAsync(Project01, op => op.Set(p => p.ServalData, new ServalData())); // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, - CancellationToken.None + Assert.ThrowsAsync( + () => + env.Service.SyncProjectCorporaAsync( + User01, + new BuildConfig { ProjectId = Project01 }, + preTranslate: true, + CancellationToken.None + ) ); - Assert.IsTrue(actual); - - // Verify that it was just the source and target directories that were read for data - var project = env.Projects.Get(Project02); - Assert.That(project.TranslateConfig.Source?.ParatextId, Is.EqualTo(Paratext03)); - env.FileSystemService.Received(1).EnumerateFiles(Arg.Is(path => path.Contains(Paratext02))); - env.FileSystemService.Received(1).EnumerateFiles(Arg.Is(path => path.Contains(Paratext03))); - env.FileSystemService.Received(2).EnumerateFiles(Arg.Any()); } [Test] - public async Task SyncProjectCorporaAsync_SynchronizesTheAdditionalTrainingSourceCorpora() + public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenProjectMissing() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions - { - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, - AdditionalTrainingSourceConfigured = true, - } - ); - await env.SetDataInSync(Project02, preTranslate: true, uploadParatextZipFile: true); + var env = new TestEnvironment(); + await env.Projects.DeleteAllAsync(_ => true); // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, - CancellationToken.None + Assert.ThrowsAsync( + () => + env.Service.SyncProjectCorporaAsync( + User01, + new BuildConfig { ProjectId = Project01 }, + preTranslate: false, + CancellationToken.None + ) ); - Assert.IsTrue(actual); - - // Check for the upload of the source, target, source duplicated as training source, and mixed source - await env - .DataFilesClient.Received(4) - .CreateAsync(Arg.Any(), FileFormat.Paratext, Arg.Any(), CancellationToken.None); } [Test] - public async Task SyncProjectCorporaAsync_SynchronizesTheAdditionalTrainingSourceIntoTheAlternateTrainingSourceCorpora() + public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenProjectSecretMissing() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions - { - AlternateTrainingSourceConfigured = true, - AlternateTrainingSourceEnabled = true, - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, - AdditionalTrainingSourceConfigured = true, - } - ); - await env.SetDataInSync(Project02, preTranslate: true, uploadParatextZipFile: true); + var env = new TestEnvironment(); + await env.ProjectSecrets.DeleteAllAsync(_ => true); // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: true, - CancellationToken.None + Assert.ThrowsAsync( + () => + env.Service.SyncProjectCorporaAsync( + User01, + new BuildConfig { ProjectId = Project01 }, + preTranslate: false, + CancellationToken.None + ) ); - Assert.IsTrue(actual); - - // Check for the upload of the source, target, alternate training source, and mixed source - await env - .DataFilesClient.Received(4) - .CreateAsync(Arg.Any(), FileFormat.Paratext, Arg.Any(), CancellationToken.None); } [Test] - public async Task SyncProjectCorporaAsync_RecreatesDeletedCorpora() + public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenServalConfigMissing() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions - { - HasTranslationEngineForSmt = true, - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, - } - ); - await env.SetDataInSync(Project02); - - // Make the Serval API return the error code for a missing corpus - env.TranslationEnginesClient.GetCorpusAsync(TranslationEngine02, Arg.Any(), CancellationToken.None) - .Throws(ServalApiExceptions.NotFound); + var env = new TestEnvironment(); + await env.ProjectSecrets.UpdateAsync(Project01, op => op.Unset(p => p.ServalData)); // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: false, - CancellationToken.None + Assert.ThrowsAsync( + () => + env.Service.SyncProjectCorporaAsync( + User01, + new BuildConfig { ProjectId = Project01 }, + preTranslate: false, + CancellationToken.None + ) ); - Assert.IsTrue(actual); - await env - .TranslationEnginesClient.Received(1) - .AddCorpusAsync(Arg.Any(), Arg.Any(), CancellationToken.None); - await env - .TranslationEnginesClient.DidNotReceiveWithAnyArgs() - .DeleteCorpusAsync(Arg.Any(), Arg.Any(), CancellationToken.None); - await env - .TranslationEnginesClient.DidNotReceiveWithAnyArgs() - .UpdateCorpusAsync( - Arg.Any(), - Arg.Any(), - Arg.Any(), - CancellationToken.None - ); } [Test] - public async Task SyncProjectCorporaAsync_RecreatesCorporaWhenLanguageChanges() + public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenSourceMissing() { // Set up test environment - var env = new TestEnvironment( - new TestEnvironmentOptions - { - HasTranslationEngineForSmt = true, - LocalSourceTextHasData = true, - LocalTargetTextHasData = true, - } - ); - await env.SetDataInSync(Project02); + var env = new TestEnvironment(); + await env.Projects.UpdateAsync(Project01, op => op.Unset(p => p.TranslateConfig.Source)); - // Make the Serval API return the corpus - env.TranslationEnginesClient.GetCorpusAsync(TranslationEngine02, Arg.Any(), CancellationToken.None) - .Returns(args => - Task.FromResult( - new TranslationCorpus - { - Id = args.ArgAt(1), - SourceLanguage = "fr", - TargetLanguage = "de" - } + // SUT + Assert.ThrowsAsync( + () => + env.Service.SyncProjectCorporaAsync( + User01, + new BuildConfig { ProjectId = Project01 }, + preTranslate: false, + CancellationToken.None ) - ); + ); + } + + [Test] + public async Task SyncProjectCorporaAsync_ThrowsExceptionWhenTranslationEngineIdMissing() + { + // Set up test environment + var env = new TestEnvironment(); + await env.ProjectSecrets.UpdateAsync(Project01, op => op.Set(p => p.ServalData, new ServalData())); // SUT - bool actual = await env.Service.SyncProjectCorporaAsync( - User01, - new BuildConfig { ProjectId = Project02 }, - preTranslate: false, - CancellationToken.None + Assert.ThrowsAsync( + () => + env.Service.SyncProjectCorporaAsync( + User01, + new BuildConfig { ProjectId = Project01 }, + preTranslate: false, + CancellationToken.None + ) ); - Assert.IsTrue(actual); - await env - .TranslationEnginesClient.Received(1) - .AddCorpusAsync(Arg.Any(), Arg.Any(), CancellationToken.None); - await env - .TranslationEnginesClient.Received(1) - .DeleteCorpusAsync(Arg.Any(), Arg.Any(), CancellationToken.None); - await env - .TranslationEnginesClient.DidNotReceiveWithAnyArgs() - .UpdateCorpusAsync( - Arg.Any(), - Arg.Any(), - Arg.Any(), - CancellationToken.None - ); } [Test] @@ -2248,7 +3034,7 @@ await env.Projects.UpdateAsync( new DraftConfig { AlternateSourceEnabled = true, - AlternateSource = new TranslateSource { ParatextId = Paratext01 } + AlternateSource = new TranslateSource { ParatextId = Paratext01 }, } ) ); @@ -2319,19 +3105,372 @@ await env.Projects.UpdateAsync( ); } - private class TestEnvironmentOptions + [Test] + public async Task UploadAdditionalTrainingDataAsync_CreatesTheCorpusIfMissing() + { + // Set up test environment + var env = new TestEnvironment(); + + // Set up the test data + const string languageCode = "en"; + List servalCorpusFiles = []; + ISFText text = TestEnvironment.GetMockTrainingData(); + List texts = [text]; + + // Set up other API calls + env.CorporaClient.CreateAsync(Arg.Any()).Returns(Task.FromResult(new Corpus { Id = Corpus01 })); + env.Service.Configure() + .UploadTextFileAsync(Arg.Any(), text, CancellationToken.None) + .Returns(Task.FromResult(true)); + + string actual = await env.Service.UploadAdditionalTrainingDataAsync( + Project01, + null, + languageCode, + servalCorpusFiles, + texts, + CancellationToken.None + ); + Assert.AreEqual(Corpus01, actual); + Assert.AreEqual(Corpus01, servalCorpusFiles.First().CorpusId); + Assert.AreEqual(Project01, servalCorpusFiles.First().ProjectId); + Assert.AreEqual(languageCode, servalCorpusFiles.First().LanguageCode); + await env.CorporaClient.DidNotReceiveWithAnyArgs().DeleteAsync(Corpus01); + await env.CorporaClient.Received(1).CreateAsync(Arg.Any()); + } + + [Test] + public async Task UploadAdditionalTrainingDataAsync_OnlyReturnsUploadedServalCorpusFiles() + { + // Set up test environment + var env = new TestEnvironment(); + + // Set up the test data + const string languageCode = "en"; + var servalCorpusFile = new ServalCorpusFile + { + CorpusId = Corpus01, + LanguageCode = languageCode, + TextId = Data01, + }; + List servalCorpusFiles = [servalCorpusFile]; + ISFText text = TestEnvironment.GetMockTrainingData(); + List texts = [text]; + + // Set up other API calls + env.Service.Configure() + .UploadTextFileAsync(servalCorpusFile, text, CancellationToken.None) + .Returns(Task.FromResult(false)); + + string actual = await env.Service.UploadAdditionalTrainingDataAsync( + Project01, + Corpus01, + languageCode, + servalCorpusFiles, + texts, + CancellationToken.None + ); + Assert.AreEqual(Corpus01, actual); + Assert.IsEmpty(servalCorpusFiles); + await env.CorporaClient.DidNotReceiveWithAnyArgs().DeleteAsync(Corpus01); + await env.CorporaClient.DidNotReceiveWithAnyArgs().CreateAsync(Arg.Any()); + } + + [Test] + public async Task UploadAdditionalTrainingDataAsync_RecreatesCorpusIfLanguageChanges() + { + // Set up test environment + var env = new TestEnvironment(); + + // Set up the test data + const string oldLanguageCode = "en"; + const string newLanguageCode = "de"; + var servalCorpusFile = new ServalCorpusFile + { + CorpusId = Corpus01, + LanguageCode = oldLanguageCode, + TextId = Data01, + }; + List servalCorpusFiles = [servalCorpusFile]; + ISFText text = TestEnvironment.GetMockTrainingData(); + List texts = [text]; + + // Set up other API calls + env.CorporaClient.CreateAsync(Arg.Any()).Returns(Task.FromResult(new Corpus { Id = Corpus02 })); + env.Service.Configure() + .UploadTextFileAsync(servalCorpusFile, text, CancellationToken.None) + .Returns(Task.FromResult(true)); + + string actual = await env.Service.UploadAdditionalTrainingDataAsync( + Project01, + Corpus01, + newLanguageCode, + servalCorpusFiles, + texts, + CancellationToken.None + ); + Assert.AreEqual(Corpus02, actual); + Assert.AreEqual(Corpus02, servalCorpusFiles.First().CorpusId); + Assert.AreEqual(Project01, servalCorpusFiles.First().ProjectId); + Assert.AreEqual(newLanguageCode, servalCorpusFiles.First().LanguageCode); + await env.CorporaClient.Received(1).DeleteAsync(Corpus01); + await env.CorporaClient.Received(1).CreateAsync(Arg.Any()); + } + + [Test] + public async Task UploadAdditionalTrainingDataAsync_UpdatesTheCorpus() + { + // Set up test environment + var env = new TestEnvironment(); + + // Set up the test data + const string languageCode = "en"; + var servalCorpusFile = new ServalCorpusFile + { + CorpusId = Corpus01, + LanguageCode = languageCode, + TextId = Data01, + }; + List servalCorpusFiles = [servalCorpusFile]; + ISFText text = TestEnvironment.GetMockTrainingData(); + List texts = [text]; + + // Set up other API calls + env.Service.Configure() + .UploadTextFileAsync(Arg.Any(), text, CancellationToken.None) + .Returns(Task.FromResult(true)); + + string actual = await env.Service.UploadAdditionalTrainingDataAsync( + Project01, + Corpus01, + languageCode, + servalCorpusFiles, + texts, + CancellationToken.None + ); + Assert.AreEqual(Corpus01, actual); + Assert.AreEqual(servalCorpusFile, servalCorpusFiles.First()); + await env.CorporaClient.DidNotReceiveWithAnyArgs().DeleteAsync(Corpus01); + await env.CorporaClient.DidNotReceiveWithAnyArgs().CreateAsync(Arg.Any()); + } + + [Test] + public async Task UploadFileAsync_ChecksumMatches() + { + // Set up test environment + var env = new TestEnvironment(); + + // Set up the text file + const string textFileData = "My text file data"; + var servalCorpusFile = new ServalCorpusFile { FileChecksum = StringUtils.ComputeMd5Hash(textFileData) }; + byte[] buffer = Encoding.UTF8.GetBytes(textFileData); + await using Stream stream = new MemoryStream(buffer, false); + + // SUT + await env.Service.UploadFileAsync(servalCorpusFile, stream, FileFormat.Text, CancellationToken.None); + await env.DataFilesClient.DidNotReceiveWithAnyArgs().CreateAsync(Arg.Any(), FileFormat.Text); + } + + [Test] + public async Task UploadFileAsync_CreatesIfFileFormatChanges() + { + // Set up test environment + var env = new TestEnvironment(); + env.DataFilesClient.GetAsync(File01).Returns(new DataFile { Id = File01, Format = FileFormat.Paratext }); + env.DataFilesClient.CreateAsync(Arg.Any(), FileFormat.Text, Project01) + .Returns(new DataFile { Id = File02 }); + + // Set up the text file + const string textFileData = "My text file data"; + string checksum = StringUtils.ComputeMd5Hash(textFileData); + var servalCorpusFile = new ServalCorpusFile { TextId = Project01, FileId = File01 }; + byte[] buffer = Encoding.UTF8.GetBytes(textFileData); + await using Stream stream = new MemoryStream(buffer, false); + + // SUT + await env.Service.UploadFileAsync(servalCorpusFile, stream, FileFormat.Text, CancellationToken.None); + Assert.AreEqual(checksum, servalCorpusFile.FileChecksum); + Assert.AreEqual(File02, servalCorpusFile.FileId); + await env.DataFilesClient.Received(1).DeleteAsync(File01); + env.MockLogger.AssertHasEvent(logEvent => logEvent.LogLevel == LogLevel.Information); + } + + [Test] + public async Task UploadFileAsync_CreatesIfFileNotFound() + { + // Set up test environment + var env = new TestEnvironment(); + ServalApiException ex = ServalApiExceptions.NotFound; + env.DataFilesClient.GetAsync(File01).ThrowsAsync(ex); + env.DataFilesClient.CreateAsync(Arg.Any(), FileFormat.Text, Project01) + .Returns(new DataFile { Id = File02 }); + + // Set up the text file + const string textFileData = "My text file data"; + string checksum = StringUtils.ComputeMd5Hash(textFileData); + var servalCorpusFile = new ServalCorpusFile { TextId = Project01, FileId = File01 }; + byte[] buffer = Encoding.UTF8.GetBytes(textFileData); + await using Stream stream = new MemoryStream(buffer, false); + + // SUT + await env.Service.UploadFileAsync(servalCorpusFile, stream, FileFormat.Text, CancellationToken.None); + Assert.AreEqual(checksum, servalCorpusFile.FileChecksum); + Assert.AreEqual(File02, servalCorpusFile.FileId); + await env.DataFilesClient.DidNotReceive().DeleteAsync(File01); + env.MockLogger.AssertHasEvent(logEvent => + logEvent.Exception == ex && logEvent.LogLevel == LogLevel.Information + ); + } + + [Test] + public async Task UploadFileAsync_CreatesIfNoExistingFile() + { + // Set up test environment + var env = new TestEnvironment(); + env.DataFilesClient.CreateAsync(Arg.Any(), FileFormat.Text, Project01) + .Returns(new DataFile { Id = File01 }); + + // Set up the text file + const string textFileData = "My text file data"; + string checksum = StringUtils.ComputeMd5Hash(textFileData); + var servalCorpusFile = new ServalCorpusFile { TextId = Project01 }; + byte[] buffer = Encoding.UTF8.GetBytes(textFileData); + await using Stream stream = new MemoryStream(buffer, false); + + // SUT + await env.Service.UploadFileAsync(servalCorpusFile, stream, FileFormat.Text, CancellationToken.None); + Assert.AreEqual(checksum, servalCorpusFile.FileChecksum); + Assert.AreEqual(File01, servalCorpusFile.FileId); + } + + [Test] + public async Task UploadFileAsync_UpdatesIfFileExists() + { + // Set up test environment + var env = new TestEnvironment(); + env.DataFilesClient.GetAsync(File01).Returns(new DataFile { Id = File01, Format = FileFormat.Text }); + env.DataFilesClient.UpdateAsync(File01, Arg.Any()).Returns(new DataFile { Id = File01 }); + + // Set up the text file + const string textFileData = "My text file data"; + string checksum = StringUtils.ComputeMd5Hash(textFileData); + var servalCorpusFile = new ServalCorpusFile { TextId = Project01, FileId = File01 }; + byte[] buffer = Encoding.UTF8.GetBytes(textFileData); + await using Stream stream = new MemoryStream(buffer, false); + + // SUT + await env.Service.UploadFileAsync(servalCorpusFile, stream, FileFormat.Text, CancellationToken.None); + Assert.AreEqual(checksum, servalCorpusFile.FileChecksum); + Assert.AreEqual(File01, servalCorpusFile.FileId); + await env + .DataFilesClient.DidNotReceiveWithAnyArgs() + .CreateAsync(Arg.Any(), FileFormat.Text, Project01); + await env.DataFilesClient.DidNotReceiveWithAnyArgs().DeleteAsync(File01); + } + + [Test] + public async Task UploadParatextFileAsync_Success() + { + // Set up test environment + var env = new TestEnvironment(); + var servalCorpusFile = new ServalCorpusFile(); + env.Service.CreateZipFileFromParatextDirectoryAsync(Paratext01, Arg.Any(), CancellationToken.None) + .Returns(Task.CompletedTask); + env.Service.UploadFileAsync( + servalCorpusFile, + Arg.Any(), + FileFormat.Paratext, + CancellationToken.None + ) + .Returns(Task.FromResult(true)); + + // SUT + await env.Service.UploadParatextFileAsync(servalCorpusFile, Paratext01, CancellationToken.None); + await env + .Service.Received(1) + .UploadFileAsync(servalCorpusFile, Arg.Any(), FileFormat.Paratext, CancellationToken.None); + } + + [Test] + public async Task UploadTextFileAsync_EmptyTextFile() + { + // Set up test environment + var env = new TestEnvironment(); + var servalCorpusFile = new ServalCorpusFile(); + var text = TestEnvironment.GetMockTrainingData(); + env.Service.GetTextFileData(text).Returns(string.Empty); + + // SUT + bool actual = await env.Service.UploadTextFileAsync(servalCorpusFile, text, CancellationToken.None); + Assert.IsFalse(actual); + } + + [Test] + public async Task UploadTextFileAsync_Success() + { + // Set up test environment + var env = new TestEnvironment(); + var servalCorpusFile = new ServalCorpusFile(); + var text = TestEnvironment.GetMockTrainingData(); + env.Service.GetTextFileData(text).Returns("File Data"); + env.Service.UploadFileAsync(servalCorpusFile, Arg.Any(), FileFormat.Text, CancellationToken.None) + .Returns(Task.FromResult(true)); + + // SUT + bool actual = await env.Service.UploadTextFileAsync(servalCorpusFile, text, CancellationToken.None); + Assert.IsTrue(actual); + } + + /// + /// Gets the test environment options for SyncProjectCorporaAsync + /// to ensure an adequate feature test coverage matrix. + /// + public static IEnumerable SyncProjectCorporaAsyncOptions + { + get + { + bool[] boolValues = [false, true]; + foreach (bool preTranslate in boolValues) + { + foreach (bool alternateSource in boolValues) + { + foreach (bool alternateTrainingSource in boolValues) + { + foreach (bool additionalTrainingSource in boolValues) + { + yield return new TestEnvironmentOptions + { + AlternateSource = alternateSource, + AlternateTrainingSource = alternateTrainingSource, + AdditionalTrainingSource = additionalTrainingSource, + PreTranslate = preTranslate, + }; + } + } + } + + // Emit special test cases with pre-translate enabled or disabled + yield return new TestEnvironmentOptions + { + AlternateTrainingSource = true, + AlternateTrainingSourceAndSourceAreTheSame = true, + PreTranslate = preTranslate, + }; + } + } + } + + public record TestEnvironmentOptions { - public bool AlternateSourceEnabled { get; init; } - public bool AlternateSourceConfigured { get; init; } - public bool AlternateTrainingSourceConfigured { get; init; } - public bool AlternateTrainingSourceEnabled { get; init; } - public bool BuildIsPending { get; init; } + public bool AdditionalTrainingSource { get; init; } + public bool AlternateSource { get; init; } + public bool AlternateTrainingSource { get; init; } + public bool AlternateTrainingSourceAndSourceAreTheSame { get; init; } + public bool HasTranslationEngineForNmt { get; init; } public bool HasTranslationEngineForSmt { get; init; } + public bool LegacyCorpora { get; init; } + public bool PreTranslate { get; init; } public bool UseEchoForPreTranslation { get; init; } - public bool LocalSourceTextHasData { get; init; } - public bool LocalTargetTextHasData { get; init; } - public bool AdditionalTrainingSourceConfigured { get; init; } - public string? ServalConfig { get; init; } } private class TestEnvironment @@ -2341,6 +3480,7 @@ public TestEnvironment(TestEnvironmentOptions? options = null) options ??= new TestEnvironmentOptions(); ExceptionHandler = Substitute.For(); MockLogger = new MockLogger(); + CorporaClient = Substitute.For(); DataFilesClient = Substitute.For(); DataFilesClient .CreateAsync(Arg.Any(), Arg.Any(), Arg.Any(), CancellationToken.None) @@ -2350,8 +3490,12 @@ public TestEnvironment(TestEnvironmentOptions? options = null) .Returns(args => Task.FromResult(new DataFile { Id = args.ArgAt(0) })); TranslationEnginesClient = Substitute.For(); TranslationEnginesClient - .AddCorpusAsync(Arg.Any(), Arg.Any(), CancellationToken.None) - .Returns(Task.FromResult(new TranslationCorpus { Id = Corpus01 })); + .AddParallelCorpusAsync( + Arg.Any(), + Arg.Any(), + CancellationToken.None + ) + .Returns(Task.FromResult(new TranslationParallelCorpus { Id = ParallelCorpus01 })); TranslationEnginesClient .CreateAsync(Arg.Any(), CancellationToken.None) .Returns(Task.FromResult(new TranslationEngine { Id = TranslationEngine01 })); @@ -2383,57 +3527,9 @@ public TestEnvironment(TestEnvironmentOptions? options = null) } ) ); - TranslationEnginesClient - .GetCorpusAsync(TranslationEngine01, Arg.Any(), CancellationToken.None) - .Returns(args => - Task.FromResult( - new TranslationCorpus - { - Id = args.ArgAt(1), - SourceLanguage = "en_US", - TargetLanguage = "en_GB", - } - ) - ); - TranslationEnginesClient - .GetCorpusAsync(TranslationEngine02, Arg.Any(), CancellationToken.None) - .Returns(args => - Task.FromResult( - new TranslationCorpus - { - Id = args.ArgAt(1), - SourceLanguage = "en_GB", - TargetLanguage = "en_US", - } - ) - ); - TranslationEnginesClient - .UpdateCorpusAsync( - Arg.Any(), - Arg.Any(), - Arg.Any(), - CancellationToken.None - ) - .Returns(args => Task.FromResult(new TranslationCorpus { Id = args.ArgAt(1) })); - if (options.BuildIsPending) - { - TranslationEnginesClient - .GetCurrentBuildAsync(Arg.Any(), null, CancellationToken.None) - .Returns( - Task.FromResult( - new TranslationBuild - { - Pretranslate = new List { new PretranslateCorpus() }, - } - ) - ); - } - else - { - TranslationEnginesClient - .GetCurrentBuildAsync(Arg.Any(), null, CancellationToken.None) - .ThrowsAsync(ServalApiExceptions.NoContent); - } + CorporaClient + .CreateAsync(Arg.Any(), CancellationToken.None) + .Returns(Task.FromResult(new Corpus { Id = Corpus01 })); ParatextService = Substitute.For(); ParatextService @@ -2467,42 +3563,45 @@ public TestEnvironment(TestEnvironmentOptions? options = null) Id = Project02, ServalData = new ServalData { + PreTranslationEngineId = options.HasTranslationEngineForNmt ? TranslationEngine01 : null, TranslationEngineId = options.HasTranslationEngineForSmt ? TranslationEngine02 : null, - Corpora = new Dictionary - { + Corpora = options.LegacyCorpora + ? new Dictionary { - Corpus01, - new ServalCorpus { - PreTranslate = false, - AlternateTrainingSource = false, - SourceFiles = - [ - new ServalCorpusFile { FileId = File01, ProjectId = Project03 }, - ], - TargetFiles = - [ - new ServalCorpusFile { FileId = File02, ProjectId = Project01 }, - ], - } - }, - { - Corpus02, - new ServalCorpus + Corpus01, + new ServalCorpus + { + PreTranslate = false, + AlternateTrainingSource = false, + SourceFiles = + [ + new ServalCorpusFile { FileId = File01, ProjectId = Project03 }, + ], + TargetFiles = + [ + new ServalCorpusFile { FileId = File02, ProjectId = Project01 }, + ], + } + }, { - PreTranslate = true, - AlternateTrainingSource = false, - SourceFiles = - [ - new ServalCorpusFile { FileId = File01, ProjectId = Project03 }, - ], - TargetFiles = - [ - new ServalCorpusFile { FileId = File02, ProjectId = Project01 }, - ], - } - }, - }, + Corpus02, + new ServalCorpus + { + PreTranslate = true, + AlternateTrainingSource = false, + SourceFiles = + [ + new ServalCorpusFile { FileId = File01, ProjectId = Project03 }, + ], + TargetFiles = + [ + new ServalCorpusFile { FileId = File02, ProjectId = Project01 }, + ], + } + }, + } + : null, }, }, new SFProjectSecret { Id = Project03 }, @@ -2532,7 +3631,7 @@ public TestEnvironment(TestEnvironmentOptions? options = null) ParatextId = Paratext02, WritingSystem = new WritingSystem { Tag = "en_US" }, }, - DraftConfig = new DraftConfig { ServalConfig = options.ServalConfig }, + DraftConfig = new DraftConfig(), }, WritingSystem = new WritingSystem { Tag = "en_GB" }, }, @@ -2549,35 +3648,48 @@ public TestEnvironment(TestEnvironmentOptions? options = null) TranslationSuggestionsEnabled = true, Source = new TranslateSource { - ProjectRef = Project03, - ParatextId = Paratext03, + ProjectRef = Project01, + ParatextId = Paratext01, WritingSystem = new WritingSystem { Tag = "en" }, }, DraftConfig = new DraftConfig { - AlternateSourceEnabled = options.AlternateSourceEnabled, - AlternateSource = options.AlternateSourceConfigured + AlternateSourceEnabled = options.AlternateSource, + AlternateSource = options.AlternateSource ? new TranslateSource { - ProjectRef = Project01, - ParatextId = Paratext01, + ProjectRef = Project03, + ParatextId = Paratext03, WritingSystem = new WritingSystem { Tag = "en_GB" }, } : null, - AlternateTrainingSourceEnabled = options.AlternateTrainingSourceEnabled, - AlternateTrainingSource = options.AlternateTrainingSourceConfigured + AlternateTrainingSourceEnabled = options.AlternateTrainingSource, + AlternateTrainingSource = options.AlternateTrainingSource ? new TranslateSource { - ProjectRef = Project01, - ParatextId = Paratext01, + ProjectRef = options.AlternateTrainingSourceAndSourceAreTheSame + ? Project01 + : Project04, + ParatextId = options.AlternateTrainingSourceAndSourceAreTheSame + ? Paratext01 + : Paratext04, WritingSystem = new WritingSystem { Tag = "en_GB" }, } : null, - AdditionalTrainingSourceEnabled = options.AdditionalTrainingSourceConfigured, - AdditionalTrainingSource = options.AdditionalTrainingSourceConfigured - ? new TranslateSource { ProjectRef = Project01, ParatextId = Paratext01 } + AdditionalTrainingSourceEnabled = options.AdditionalTrainingSource, + AdditionalTrainingSource = options.AdditionalTrainingSource + ? new TranslateSource + { + ProjectRef = Project05, + ParatextId = Paratext05, + WritingSystem = new WritingSystem { Tag = "en_GB" }, + } : null, }, + PreTranslate = + options.AlternateSource + || options.AlternateTrainingSource + || options.AdditionalTrainingSource, }, WritingSystem = new WritingSystem { Tag = "en_US" }, }, @@ -2601,11 +3713,13 @@ public TestEnvironment(TestEnvironmentOptions? options = null) TrainingDataService = Substitute.For(); TrainingData = new MemoryRepository(); - var realtimeService = new SFMemoryRealtimeService(); - realtimeService.AddRepository("sf_projects", OTType.Json0, Projects); - realtimeService.AddRepository("training_data", OTType.Json0, TrainingData); + RealtimeService = new SFMemoryRealtimeService(); + RealtimeService.AddRepository("sf_projects", OTType.Json0, Projects); + RealtimeService.AddRepository("training_data", OTType.Json0, TrainingData); - Service = new MachineProjectService( + // We use this so we can mock any virtual methods in the class + Service = Substitute.ForPartsOf( + CorporaClient, DataFilesClient, ExceptionHandler, FeatureManager, @@ -2613,7 +3727,7 @@ public TestEnvironment(TestEnvironmentOptions? options = null) MockLogger, ParatextService, ProjectSecrets, - realtimeService, + RealtimeService, siteOptions, TrainingDataService, TranslationEnginesClient, @@ -2622,18 +3736,138 @@ public TestEnvironment(TestEnvironmentOptions? options = null) } public MachineProjectService Service { get; } + public ICorporaClient CorporaClient { get; } public IDataFilesClient DataFilesClient { get; } public IFeatureManager FeatureManager { get; } public IFileSystemService FileSystemService { get; } public IParatextService ParatextService { get; } + public SFMemoryRealtimeService RealtimeService { get; } public ITranslationEnginesClient TranslationEnginesClient { get; } + private MemoryRepository TrainingData { get; } public ITrainingDataService TrainingDataService { get; } - public MemoryRepository TrainingData { get; } public MemoryRepository Projects { get; } public MemoryRepository ProjectSecrets { get; } public MockLogger MockLogger { get; } public IExceptionHandler ExceptionHandler { get; } + /// + /// Asserts whether the correct API calls have bene made for SyncProjectCorporaAsync. + /// + /// The test environment. + /// The actual results from the synchronization. + /// If true, expect corpora to be created on Serval. + /// An asynchronous task. + public async Task AssertSyncProjectCorporaAsync( + TestEnvironmentOptions options, + IList actual, + bool createsServalCorpora + ) + { + int numberOfServalCorpusFiles = 2; + + // Target + await CorporaClient + .Received(createsServalCorpora ? 1 : 0) + .CreateAsync(Arg.Is(c => c.Name == $"{Project02}_{Project02}")); + Assert.AreEqual(options.PreTranslate ? 2 : 1, actual.Count(s => s.ProjectId == Project02)); + + // Source + await CorporaClient + .Received(createsServalCorpora ? 1 : 0) + .CreateAsync(Arg.Is(c => c.Name == $"{Project02}_{Project01}")); + + // See how many times the source corpus was used in the parallel corpora + int expected = options switch + { + { PreTranslate: false } => 1, + { PreTranslate: true, AlternateTrainingSource: true, AlternateTrainingSourceAndSourceAreTheSame: true } + => 2, + { PreTranslate: true, AlternateTrainingSource: true, AlternateSource: true } => 0, + { PreTranslate: true, AlternateTrainingSource: true } => 1, + { PreTranslate: true, AlternateSource: true } => 1, + { PreTranslate: true } => 2, + }; + Assert.AreEqual(expected, actual.Count(s => s.ProjectId == Project01)); + + // Alternate Source + if (options.AlternateSource) + { + await CorporaClient + .Received(createsServalCorpora ? 1 : 0) + .CreateAsync(Arg.Is(c => c.Name == $"{Project02}_{Project03}")); + Assert.AreEqual(options.PreTranslate ? 1 : 0, actual.Count(s => s.ProjectId == Project03)); + numberOfServalCorpusFiles++; + } + + // Alternate Training Source + // This can be used to test that a duplicate corpus and file were not uploaded + if (options.AlternateTrainingSource && !options.AlternateTrainingSourceAndSourceAreTheSame) + { + await CorporaClient + .Received(createsServalCorpora ? 1 : 0) + .CreateAsync(Arg.Is(c => c.Name == $"{Project02}_{Project04}")); + Assert.AreEqual(options.PreTranslate ? 1 : 0, actual.Count(s => s.ProjectId == Project04)); + numberOfServalCorpusFiles++; + } + + // Additional Training Source + if (options.AdditionalTrainingSource) + { + await CorporaClient + .Received(createsServalCorpora ? 1 : 0) + .CreateAsync(Arg.Is(c => c.Name == $"{Project02}_{Project05}")); + Assert.AreEqual(options.PreTranslate ? 1 : 0, actual.Count(s => s.ProjectId == Project05)); + numberOfServalCorpusFiles++; + } + + // Each corpus will be updated, even after creation + await CorporaClient + .Received(numberOfServalCorpusFiles) + .UpdateAsync(Arg.Any(), Arg.Any>()); + + // A file will be uploaded for each corpus + await Service + .Received(numberOfServalCorpusFiles) + .UploadParatextFileAsync(Arg.Any(), Arg.Any(), CancellationToken.None); + + // The parallel corpora will be created or updated + await Service + .Received(options.PreTranslate ? 2 : 1) + .CreateOrUpdateParallelCorpusAsync( + options.PreTranslate ? TranslationEngine01 : TranslationEngine02, + Arg.Any(), + Arg.Any(), + Arg.Any>(), + Arg.Any>(), + CancellationToken.None + ); + + // Unused corpora will be removed + await Service + .Received(1) + .DeleteAllCorporaAndFilesAsync( + Arg.Any>(), + Project02, + CancellationToken.None + ); + + // The training data will be synced for pre-translation builds only + await Service + .Received(options.PreTranslate ? 1 : 0) + .SyncAdditionalTrainingData( + User01, + Arg.Any(), + TranslationEngine01, + Arg.Any(), + Arg.Any(), + CancellationToken.None + ); + + // Reset the received calls so we can call SyncProjectCorporaAsync again + CorporaClient.ClearReceivedCalls(); + Service.ClearReceivedCalls(); + } + public async Task SetDataInSync( string projectId, bool preTranslate = false, @@ -2726,17 +3960,22 @@ await ProjectSecrets.UpdateAsync( } ); - public async Task BeforeFirstSync(string projectId) => - await ProjectSecrets.UpdateAsync( - projectId, - u => u.Set(p => p.ServalData, new ServalData { TranslationEngineId = TranslationEngine01 }) - ); + /// + /// Sets up the Project Secret. + /// + /// The project identifier. + /// The Serval configuration data. + /// The asynchronous task. + public async Task SetupProjectSecretAsync(string projectId, ServalData? servalData) => + await ProjectSecrets.UpdateAsync(projectId, u => u.Set(p => p.ServalData, servalData)); /// /// Sets up the additional training data /// /// The project identifier. - /// If the project is to have existing data, true. Default: false. + /// + /// If the project is to have existing data, true. Default: false. + /// public async Task SetupTrainingDataAsync(string projectId, bool existingData = false) { TrainingData.Add( @@ -2762,7 +4001,7 @@ public async Task SetupTrainingDataAsync(string projectId, bool existingData = f .Returns(args => { ((List)args[3]).Add(GetMockTrainingData(true)); - ((List)args[4]).Add(GetMockTrainingData(false)); + ((List)args[4]).Add(GetMockTrainingData()); return Task.CompletedTask; }); if (existingData) @@ -2806,14 +4045,23 @@ await ProjectSecrets.UpdateAsync( } } - private static MockText GetMockTrainingData(bool source) => - new MockText + /// + /// Gets the mock training data. + /// + /// + /// Optional. Default: false. + /// If true, the first segment's text will be "source"; otherwise if false it will be "target". + /// + /// The training text with segments. + public static SFTrainingText GetMockTrainingData(bool source = false) => + new SFTrainingText { - Id = Data01, + Id = $"{Project01}_{Data01}", Segments = new List { - new SFTextSegment(["1"], $"alternate {(source ? "source" : "target")}", false, false, false), + new SFTextSegment(["1"], $"{(source ? "source" : "target")}", false, false, false), new SFTextSegment(["2"], string.Empty, false, false, false), + new SFTextSegment(["3"], "all flags", true, true, true), }, }; } diff --git a/test/SIL.XForge.Scripture.Tests/Services/MockText.cs b/test/SIL.XForge.Scripture.Tests/Services/MockText.cs deleted file mode 100644 index bf10985be4..0000000000 --- a/test/SIL.XForge.Scripture.Tests/Services/MockText.cs +++ /dev/null @@ -1,10 +0,0 @@ -using System.Collections.Generic; -using SIL.XForge.Scripture.Models; - -namespace SIL.XForge.Scripture.Services; - -public class MockText : ISFText -{ - public string Id { get; init; } = string.Empty; - public IEnumerable Segments { get; init; } = new List(); -} diff --git a/test/SIL.XForge.Scripture.Tests/Services/PreTranslationServiceTests.cs b/test/SIL.XForge.Scripture.Tests/Services/PreTranslationServiceTests.cs index 02643c61af..fad839db4c 100644 --- a/test/SIL.XForge.Scripture.Tests/Services/PreTranslationServiceTests.cs +++ b/test/SIL.XForge.Scripture.Tests/Services/PreTranslationServiceTests.cs @@ -3,6 +3,7 @@ using System.Threading; using System.Threading.Tasks; using NSubstitute; +using NSubstitute.Extensions; using NUnit.Framework; using Serval.Client; using SIL.XForge.DataAccess; @@ -17,16 +18,113 @@ namespace SIL.XForge.Scripture.Services; public class PreTranslationServiceTests { private const string Project01 = "project01"; - private const string Project02 = "project02"; - private const string Project03 = "project03"; private const string Corpus01 = "corpus01"; + private const string ParallelCorpus01 = "parallelCorpus01"; private const string TranslationEngine01 = "translationEngine01"; + [TestCase(true)] + [TestCase(false)] + public async Task GetPreTranslationParametersAsync_CompatibleWithLegacyCorpora(bool uploadParatextZipFile) + { + // Set up test environment + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync( + new ServalData + { + PreTranslationEngineId = TranslationEngine01, + Corpora = new Dictionary + { + { + "another_corpus", + new ServalCorpus { PreTranslate = false } + }, + { + Corpus01, + new ServalCorpus { PreTranslate = true, UploadParatextZipFile = uploadParatextZipFile } + }, + }, + } + ); + + // SUT + (string translationEngineId, string corpusId, bool useParatextVerseRef) = + await env.Service.GetPreTranslationParametersAsync(Project01); + Assert.AreEqual(TranslationEngine01, translationEngineId); + Assert.AreEqual(Corpus01, corpusId); + Assert.AreEqual(uploadParatextZipFile, useParatextVerseRef); + } + [Test] - public async Task GetPreTranslationsAsync_CombinesSegmentedVerses() + public async Task GetPreTranslationParametersAsync_CompatibleWithParallelCorpora() + { + // Set up test environment + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync( + new ServalData + { + ParallelCorpusIdForPreTranslate = ParallelCorpus01, + PreTranslationEngineId = TranslationEngine01, + } + ); + + // SUT + (string translationEngineId, string corpusId, bool useParatextVerseRef) = + await env.Service.GetPreTranslationParametersAsync(Project01); + Assert.AreEqual(TranslationEngine01, translationEngineId); + Assert.AreEqual(ParallelCorpus01, corpusId); + Assert.IsTrue(useParatextVerseRef); + } + + [Test] + public async Task GetPreTranslationParametersAsync_ThrowsExceptionWhenNoCorpusConfiguredForProject() + { + // Set up test environment + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(new ServalData { PreTranslationEngineId = TranslationEngine01 }); + + // SUT + Assert.ThrowsAsync(() => env.Service.GetPreTranslationParametersAsync(Project01)); + } + + [Test] + public async Task GetPreTranslationParametersAsync_ThrowsExceptionWhenNoPreTranslationConfigured() + { + // Set up test environment + var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(new ServalData()); + + // SUT + Assert.ThrowsAsync(() => env.Service.GetPreTranslationParametersAsync(Project01)); + } + + [Test] + public async Task GetPreTranslationParametersAsync_ThrowsExceptionWhenNullServalData() { // Set up test environment var env = new TestEnvironment(); + await env.SetupProjectSecretAsync(servalData: null); + + // SUT + Assert.ThrowsAsync(() => env.Service.GetPreTranslationParametersAsync(Project01)); + } + + [Test] + public void GetPreTranslationParametersAsync_ThrowsExceptionWhenProjectSecretMissing() + { + // Set up test environment + var env = new TestEnvironment(); + + // SUT + Assert.ThrowsAsync( + () => env.Service.GetPreTranslationParametersAsync("invalid_project_id") + ); + } + + [Test] + public async Task GetPreTranslationsAsync_CombinesSegmentedVerses() + { + // Set up test environment + var env = new TestEnvironment(new TestEnvironmentOptions { MockPreTranslationParameters = true }); const int bookNum = 64; const int chapterNum = 1; string textId = PreTranslationService.GetTextId(bookNum, chapterNum); @@ -82,35 +180,13 @@ public async Task GetPreTranslationsAsync_CombinesSegmentedVerses() ); } - [Test] - public void GetPreTranslationsAsync_ThrowsExceptionWhenProjectSecretMissing() - { - // Set up test environment - var env = new TestEnvironment(); - - // SUT - Assert.ThrowsAsync( - () => env.Service.GetPreTranslationsAsync("invalid_project_id", 40, 1, CancellationToken.None) - ); - } - - [Test] - public void GetPreTranslationsAsync_ThrowsExceptionWhenNoPreTranslationConfigured() - { - // Set up test environment - var env = new TestEnvironment(); - - // SUT - Assert.ThrowsAsync( - () => env.Service.GetPreTranslationsAsync(Project02, 40, 1, CancellationToken.None) - ); - } - [Test] public async Task GetPreTranslationsAsync_ReturnsEmptyArrayIfNoPreTranslations_Paratext() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { UseParatextZipFile = true }); + var env = new TestEnvironment( + new TestEnvironmentOptions { MockPreTranslationParameters = true, UseParatextZipFile = true } + ); const int bookNum = 40; const int chapterNum = 1; string textId = PreTranslationService.GetTextId(bookNum); @@ -139,7 +215,7 @@ await env public async Task GetPreTranslationsAsync_ReturnsEmptyArrayIfNoPreTranslations_Text() { // Set up test environment - var env = new TestEnvironment(); + var env = new TestEnvironment(new TestEnvironmentOptions { MockPreTranslationParameters = true }); const int bookNum = 40; const int chapterNum = 1; string textId = PreTranslationService.GetTextId(bookNum, chapterNum); @@ -168,7 +244,9 @@ await env public async Task GetPreTranslationsAsync_ReturnsUsablePreTranslations_Paratext() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { UseParatextZipFile = true }); + var env = new TestEnvironment( + new TestEnvironmentOptions { MockPreTranslationParameters = true, UseParatextZipFile = true } + ); const int bookNum = 40; const int chapterNum = 1; string textId = PreTranslationService.GetTextId(bookNum); @@ -248,7 +326,7 @@ public async Task GetPreTranslationsAsync_ReturnsUsablePreTranslations_Paratext( public async Task GetPreTranslationsAsync_ReturnsUsablePreTranslations_Text() { // Set up test environment - var env = new TestEnvironment(); + var env = new TestEnvironment(new TestEnvironmentOptions { MockPreTranslationParameters = true }); const int bookNum = 40; const int chapterNum = 1; string textId = PreTranslationService.GetTextId(bookNum, chapterNum); @@ -324,35 +402,13 @@ public async Task GetPreTranslationsAsync_ReturnsUsablePreTranslations_Text() ); } - [Test] - public void GetPreTranslationUsfmAsync_ThrowsExceptionWhenProjectSecretMissing() - { - // Set up test environment - var env = new TestEnvironment(); - - // SUT - Assert.ThrowsAsync( - () => env.Service.GetPreTranslationUsfmAsync("invalid_project_id", 40, 1, CancellationToken.None) - ); - } - - [Test] - public void GetPreTranslationUsfmAsync_ThrowsExceptionWhenNoPreTranslationConfigured() - { - // Set up test environment - var env = new TestEnvironment(); - - // SUT - Assert.ThrowsAsync( - () => env.Service.GetPreTranslationUsfmAsync(Project02, 40, 1, CancellationToken.None) - ); - } - [Test] public async Task GetPreTranslationUsfmAsync_ReturnsEntireBook() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { UseParatextZipFile = true }); + var env = new TestEnvironment( + new TestEnvironmentOptions { MockPreTranslationParameters = true, UseParatextZipFile = true } + ); // SUT string usfm = await env.Service.GetPreTranslationUsfmAsync(Project01, 40, 0, CancellationToken.None); @@ -363,7 +419,9 @@ public async Task GetPreTranslationUsfmAsync_ReturnsEntireBook() public async Task GetPreTranslationUsfmAsync_ReturnsChapterOneWithIntroductoryMaterial() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { UseParatextZipFile = true }); + var env = new TestEnvironment( + new TestEnvironmentOptions { MockPreTranslationParameters = true, UseParatextZipFile = true } + ); // SUT string usfm = await env.Service.GetPreTranslationUsfmAsync(Project01, 40, 1, CancellationToken.None); @@ -374,7 +432,9 @@ public async Task GetPreTranslationUsfmAsync_ReturnsChapterOneWithIntroductoryMa public async Task GetPreTranslationUsfmAsync_ReturnsSpecificChapter() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { UseParatextZipFile = true }); + var env = new TestEnvironment( + new TestEnvironmentOptions { MockPreTranslationParameters = true, UseParatextZipFile = true } + ); // SUT string usfm = await env.Service.GetPreTranslationUsfmAsync(Project01, 40, 2, CancellationToken.None); @@ -385,43 +445,21 @@ public async Task GetPreTranslationUsfmAsync_ReturnsSpecificChapter() public async Task GetPreTranslationUsfmAsync_ReturnsEmptyStringForMissingChapter() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { UseParatextZipFile = true }); + var env = new TestEnvironment( + new TestEnvironmentOptions { MockPreTranslationParameters = true, UseParatextZipFile = true } + ); // SUT string usfm = await env.Service.GetPreTranslationUsfmAsync(Project01, 40, 3, CancellationToken.None); Assert.IsEmpty(usfm); } - [Test] - public void UpdatePreTranslationStatusAsync_ThrowsExceptionWhenNoPreTranslationConfigured() - { - // Set up test environment - var env = new TestEnvironment(); - - // SUT - Assert.ThrowsAsync( - () => env.Service.UpdatePreTranslationStatusAsync(Project02, CancellationToken.None) - ); - } - [Test] public void UpdatePreTranslationStatusAsync_ThrowsExceptionWhenProjectMissing() { // Set up test environment var env = new TestEnvironment(); - // SUT - Assert.ThrowsAsync( - () => env.Service.UpdatePreTranslationStatusAsync(Project03, CancellationToken.None) - ); - } - - [Test] - public void UpdatePreTranslationStatusAsync_ThrowsExceptionWhenProjectSecretMissing() - { - // Set up test environment - var env = new TestEnvironment(); - // SUT Assert.ThrowsAsync( () => env.Service.UpdatePreTranslationStatusAsync("invalid_project_id", CancellationToken.None) @@ -432,7 +470,7 @@ public void UpdatePreTranslationStatusAsync_ThrowsExceptionWhenProjectSecretMiss public async Task UpdatePreTranslationStatusAsync_NoDrafts() { // Set up test environment - var env = new TestEnvironment(); + var env = new TestEnvironment(new TestEnvironmentOptions { MockPreTranslationParameters = true }); env.TranslationEnginesClient.GetAllPretranslationsAsync( TranslationEngine01, @@ -469,7 +507,9 @@ public async Task UpdatePreTranslationStatusAsync_NoDrafts() public async Task UpdatePreTranslationStatusAsync_Paratext() { // Set up test environment - var env = new TestEnvironment(new TestEnvironmentOptions { UseParatextZipFile = true }); + var env = new TestEnvironment( + new TestEnvironmentOptions { MockPreTranslationParameters = true, UseParatextZipFile = true } + ); env.TranslationEnginesClient.GetAllPretranslationsAsync( TranslationEngine01, @@ -515,7 +555,7 @@ public async Task UpdatePreTranslationStatusAsync_Paratext() public async Task UpdatePreTranslationStatusAsync_Text() { // Set up test environment - var env = new TestEnvironment(); + var env = new TestEnvironment(new TestEnvironmentOptions { MockPreTranslationParameters = true }); env.TranslationEnginesClient.GetAllPretranslationsAsync( TranslationEngine01, @@ -559,6 +599,7 @@ public async Task UpdatePreTranslationStatusAsync_Text() private class TestEnvironmentOptions { + public bool MockPreTranslationParameters { get; init; } public bool UseParatextZipFile { get; init; } } @@ -572,35 +613,7 @@ private class TestEnvironment public TestEnvironment(TestEnvironmentOptions? options = null) { options ??= new TestEnvironmentOptions(); - var projectSecrets = new MemoryRepository( - [ - new SFProjectSecret - { - Id = Project01, - ServalData = new ServalData - { - PreTranslationEngineId = TranslationEngine01, - Corpora = new Dictionary - { - { - "another_corpus", - new ServalCorpus { PreTranslate = false } - }, - { - Corpus01, - new ServalCorpus - { - PreTranslate = true, - UploadParatextZipFile = options.UseParatextZipFile, - } - }, - }, - }, - }, - new SFProjectSecret { Id = Project02 }, - new SFProjectSecret { Id = Project03 }, - ] - ); + ProjectSecrets = new MemoryRepository([new SFProjectSecret { Id = Project01 }]); RealtimeService = new SFMemoryRealtimeService(); SFProject[] sfProjects = @@ -632,7 +645,6 @@ public TestEnvironment(TestEnvironmentOptions? options = null) }, ], }, - new SFProject { Id = Project02 }, ]; RealtimeService.AddRepository("sf_projects", OTType.Json0, new MemoryRepository(sfProjects)); TranslationEnginesClient = Substitute.For(); @@ -646,11 +658,35 @@ public TestEnvironment(TestEnvironmentOptions? options = null) CancellationToken.None ) .Returns(MatthewBookUsfm); - Service = new PreTranslationService(projectSecrets, RealtimeService, TranslationEnginesClient); + Service = Substitute.ForPartsOf( + ProjectSecrets, + RealtimeService, + TranslationEnginesClient + ); + if (options.MockPreTranslationParameters) + { + Service + .Configure() + .GetPreTranslationParametersAsync(Project01) + .Returns( + Task.FromResult<(string, string, bool)>( + (TranslationEngine01, Corpus01, options.UseParatextZipFile) + ) + ); + } } + private MemoryRepository ProjectSecrets { get; } public SFMemoryRealtimeService RealtimeService { get; } public PreTranslationService Service { get; } public ITranslationEnginesClient TranslationEnginesClient { get; } + + /// + /// Sets up the Project Secret. + /// + /// The Serval configuration data. + /// The asynchronous task. + public async Task SetupProjectSecretAsync(ServalData? servalData) => + await ProjectSecrets.UpdateAsync(Project01, u => u.Set(p => p.ServalData, servalData)); } } diff --git a/test/SIL.XForge.Scripture.Tests/Services/SFBiblicalTermsTextTests.cs b/test/SIL.XForge.Scripture.Tests/Services/SFBiblicalTermsTextTests.cs deleted file mode 100644 index 8e9ab9cfb6..0000000000 --- a/test/SIL.XForge.Scripture.Tests/Services/SFBiblicalTermsTextTests.cs +++ /dev/null @@ -1,201 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Xml.Linq; -using NUnit.Framework; -using SIL.XForge.Scripture.Models; - -namespace SIL.XForge.Scripture.Services; - -[TestFixture] -public class SFBiblicalTermsTextTests -{ - [Test] - public void Segments_EmptyDoc() - { - var doc = new XDocument(new XElement("TermRenderingsList")); - var text = new SFBiblicalTermsText("project01", doc); - Assert.That(text.Segments, Is.Empty); - } - - [Test] - public void Segments_EmptyRenderings() - { - var doc = new XDocument( - new XElement( - "TermRenderingsList", - TermRendering("term1", guess: false), - TermRendering("term2", guess: false) - ) - ); - var text = new SFBiblicalTermsText("project01", doc); - Assert.That(text.Segments, Is.Empty); - } - - [Test] - public void Segments_Guess() - { - var doc = new XDocument( - new XElement( - "TermRenderingsList", - TermRendering("term1", guess: true, "Term1"), - TermRendering("term2", guess: true, "Term2") - ) - ); - var text = new SFBiblicalTermsText("project01", doc); - Assert.That(text.Segments, Is.Empty); - } - - [Test] - public void Segments_InvalidDocument() - { - var doc = new XDocument(); - var text = new SFBiblicalTermsText("project01", doc); - Assert.That(text.Segments, Is.Empty); - } - - [Test] - public void Segments_InvalidId() - { - var doc = new XDocument(new XElement("TermRenderingsList", TermRendering(string.Empty, guess: false, "Term1"))); - var text = new SFBiblicalTermsText("project01", doc); - Assert.That(text.Segments, Is.Empty); - } - - [Test] - public void Segments_Renderings() - { - var doc = new XDocument( - new XElement( - "TermRenderingsList", - TermRendering("term2", guess: false, "Term2"), - TermRendering("term1", guess: false, "Term1") - ) - ); - var text = new SFBiblicalTermsText("project01", doc); - SFTextSegment[] segments = text.Segments.ToArray(); - Assert.That(segments.Length, Is.EqualTo(2)); - - Assert.That(segments[0].SegmentRef, Is.EqualTo("term1")); - Assert.That(segments[0].SegmentText, Is.EqualTo("Term1")); - - Assert.That(segments[1].SegmentRef, Is.EqualTo("term2")); - Assert.That(segments[1].SegmentText, Is.EqualTo("Term2")); - } - - [Test] - public void Segments_MultipleRenderings() - { - var doc = new XDocument( - new XElement( - "TermRenderingsList", - TermRendering("term2", guess: false, "Term2-1", "Term2-2"), - TermRendering("term1", guess: false, "Term1", "\n", " ") - ) - ); - var text = new SFBiblicalTermsText("project01", doc); - SFTextSegment[] segments = text.Segments.ToArray(); - Assert.That(segments.Length, Is.EqualTo(3)); - - Assert.That(segments[0].SegmentRef, Is.EqualTo("term1")); - Assert.That(segments[0].SegmentText, Is.EqualTo("Term1")); - - Assert.That(segments[1].SegmentRef, Is.EqualTo("term2")); - Assert.That(segments[1].SegmentText, Is.EqualTo("Term2-1")); - - Assert.That(segments[2].SegmentRef, Is.EqualTo("term2")); - Assert.That(segments[2].SegmentText, Is.EqualTo("Term2-2")); - } - - [Test] - public void Segments_ComplexRenderings() - { - // These examples are drawn from the Paratext in-app documentation - var renderings = new List<(string rendering, string expected)> - { - ("word1", "word1"), - ("word1 word2", "word1 word2"), - ("word1/word2", "word1 word2"), - ("word1 / word2", "word1 word2"), - ("word1 * word2", "word1 word2"), - ("word1 ** word2", "word1 word2"), - ("word1 * * word2", "word1 word2"), - ("word1*", "word1"), - ("*word1", "word1"), - ("*word1*", "word1"), - ("w*rd1", "wrd1"), - ("word1 (information)", "word1"), - }; - - var doc = new XDocument( - new XElement( - "TermRenderingsList", - TermRendering("Term01", guess: false, renderings[0].rendering), - TermRendering("Term02", guess: false, renderings[1].rendering), - TermRendering("Term03", guess: false, renderings[2].rendering), - TermRendering("Term04", guess: false, renderings[3].rendering), - TermRendering("Term05", guess: false, renderings[4].rendering), - TermRendering("Term06", guess: false, renderings[5].rendering), - TermRendering("Term07", guess: false, renderings[6].rendering), - TermRendering("Term08", guess: false, renderings[7].rendering), - TermRendering("Term09", guess: false, renderings[8].rendering), - TermRendering("Term10", guess: false, renderings[9].rendering), - TermRendering("Term11", guess: false, renderings[10].rendering), - TermRendering("Term12", guess: false, renderings[11].rendering) - ) - ); - var text = new SFBiblicalTermsText("project01", doc); - SFTextSegment[] segments = text.Segments.ToArray(); - Assert.That(segments.Length, Is.EqualTo(renderings.Count)); - for (int i = 0; i < renderings.Count; i++) - { - Assert.That(segments[i].SegmentRef, Is.EqualTo("Term" + (i + 1).ToString("D2"))); - Assert.That(segments[i].SegmentText, Is.EqualTo(renderings[i].expected)); - } - } - - [Test] - public void Segments_BiblicalTermsFromMongo() - { - var biblicalTerms = new List - { - new BiblicalTerm { TermId = "term2", Renderings = { "Term2-1", "Term2-2" } }, - new BiblicalTerm { TermId = "term1", Renderings = { "Term1", "\n" } }, - }; - var text = new SFBiblicalTermsText("project01", biblicalTerms); - SFTextSegment[] segments = text.Segments.ToArray(); - Assert.That(segments.Length, Is.EqualTo(3)); - - Assert.That(segments[0].SegmentRef, Is.EqualTo("term1")); - Assert.That(segments[0].SegmentText, Is.EqualTo("Term1")); - - Assert.That(segments[1].SegmentRef, Is.EqualTo("term2")); - Assert.That(segments[1].SegmentText, Is.EqualTo("Term2-1")); - - Assert.That(segments[2].SegmentRef, Is.EqualTo("term2")); - Assert.That(segments[2].SegmentText, Is.EqualTo("Term2-2")); - } - - [Test] - public void Segments_NoBiblicalTerms() - { - var biblicalTerms = Array.Empty(); - var text = new SFBiblicalTermsText("project01", biblicalTerms); - Assert.That(text.Segments, Is.Empty); - } - - private static XElement TermRendering(string id, bool guess, params string[] renderings) => - new XElement( - "TermRendering", - new XAttribute("Id", id), - new XAttribute("Guess", guess), - new XElement( - "Renderings", - string.Join("||", renderings), - new XElement("Glossary"), - new XElement("Changes"), - new XElement("Notes"), - new XElement("Denials") - ) - ); -} diff --git a/test/SIL.XForge.Scripture.Tests/Services/SFProjectServiceTests.cs b/test/SIL.XForge.Scripture.Tests/Services/SFProjectServiceTests.cs index f44976a6a0..e47a6ad61b 100644 --- a/test/SIL.XForge.Scripture.Tests/Services/SFProjectServiceTests.cs +++ b/test/SIL.XForge.Scripture.Tests/Services/SFProjectServiceTests.cs @@ -2274,10 +2274,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.DidNotReceive().SyncAsync(Arg.Any()); } @@ -2313,10 +2313,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.Received().SyncAsync(Arg.Any()); env.BackgroundJobClient.Received(1).Create(Arg.Any(), Arg.Any()); @@ -2347,10 +2347,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.DidNotReceive().SyncAsync(Arg.Any()); } @@ -2385,10 +2385,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.Received().SyncAsync(Arg.Any()); env.BackgroundJobClient.Received(1).Create(Arg.Any(), Arg.Any()); @@ -2417,10 +2417,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.DidNotReceive().SyncAsync(Arg.Any()); } @@ -2459,10 +2459,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.Received().SyncAsync(Arg.Any()); env.BackgroundJobClient.Received(1).Create(Arg.Any(), Arg.Any()); @@ -2489,10 +2489,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.DidNotReceive().SyncAsync(Arg.Any()); } @@ -2512,10 +2512,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.DidNotReceive().SyncAsync(Arg.Any()); } @@ -2535,10 +2535,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.DidNotReceive().SyncAsync(Arg.Any()); } @@ -2559,10 +2559,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.Received() - .RemoveProjectAsync(User01, Project01, preTranslate: false, CancellationToken.None); + .RemoveProjectAsync(Project01, preTranslate: false, CancellationToken.None); await env .MachineProjectService.Received() - .AddProjectAsync(User01, Project01, preTranslate: false, CancellationToken.None); + .AddProjectAsync(Project01, preTranslate: false, CancellationToken.None); await env.SyncService.Received().SyncAsync(Arg.Any()); } @@ -2583,10 +2583,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.Received().SyncAsync(Arg.Any()); } @@ -2606,10 +2606,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.Received() - .AddProjectAsync(User01, Project03, preTranslate: false, CancellationToken.None); + .AddProjectAsync(Project03, preTranslate: false, CancellationToken.None); await env.SyncService.Received().SyncAsync(Arg.Any()); } @@ -2633,10 +2633,10 @@ await env.Service.UpdateSettingsAsync( await env .MachineProjectService.Received() - .RemoveProjectAsync(User01, Project01, preTranslate: false, CancellationToken.None); + .RemoveProjectAsync(Project01, preTranslate: false, CancellationToken.None); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.Received().SyncAsync(Arg.Any()); } @@ -2652,10 +2652,10 @@ public async Task UpdateSettingsAsync_EnableChecking_Sync() await env .MachineProjectService.DidNotReceive() - .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .RemoveProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env .MachineProjectService.DidNotReceive() - .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()); + .AddProjectAsync(Arg.Any(), Arg.Any(), Arg.Any()); await env.SyncService.Received().SyncAsync(Arg.Any()); } @@ -2715,7 +2715,7 @@ public async Task DeleteProjectAsync_Success() Assert.That(user.Sites[SiteId].Projects, Does.Not.Contain(Project01)); await env .MachineProjectService.Received() - .RemoveProjectAsync(User01, Project01, preTranslate: false, CancellationToken.None); + .RemoveProjectAsync(Project01, preTranslate: false, CancellationToken.None); env.FileSystemService.Received().DeleteDirectory(ptProjectDir); Assert.That(env.ProjectSecrets.Contains(Project01), Is.False); @@ -2732,7 +2732,7 @@ await env ); await env .MachineProjectService.Received() - .RemoveProjectAsync(User01, SourceOnly, preTranslate: false, CancellationToken.None); + .RemoveProjectAsync(SourceOnly, preTranslate: false, CancellationToken.None); env.FileSystemService.Received().DeleteDirectory(ptProjectDir); Assert.That(env.ContainsProject(SourceOnly), Is.False); Assert.That(env.GetUser(User01).Sites[SiteId].Projects, Does.Not.Contain(SourceOnly)); diff --git a/tools/ServalBuildReport/Program.cs b/tools/ServalBuildReport/Program.cs index 00efee2e56..153eacf9fb 100644 --- a/tools/ServalBuildReport/Program.cs +++ b/tools/ServalBuildReport/Program.cs @@ -161,6 +161,7 @@ static ServiceProvider SetupServices(string environment) { const string httpClientName = "serval-api"; + const string tokenClientName = "serval-api-token"; ConfigurationBuilder configurationBuilder = new ConfigurationBuilder(); IConfiguration configuration = configurationBuilder @@ -170,21 +171,22 @@ static ServiceProvider SetupServices(string environment) ServalOptions servalOptions = configuration.GetSection(environment).Get()!; var services = new ServiceCollection(); - services.AddAccessTokenManagement(options => - { - options.Client.Clients.Add( - httpClientName, - new ClientCredentialsTokenRequest + services.AddDistributedMemoryCache(); + services + .AddClientCredentialsTokenManagement() + .AddClient( + tokenClientName, + client => { - Address = servalOptions.TokenUrl, - ClientId = servalOptions.ClientId, - ClientSecret = servalOptions.ClientSecret, - Parameters = new Parameters { { "audience", servalOptions.Audience } }, + client.TokenEndpoint = servalOptions.TokenUrl; + client.ClientId = servalOptions.ClientId; + client.ClientSecret = servalOptions.ClientSecret; + client.Parameters = new Parameters { { "audience", servalOptions.Audience } }; } ); - }); - services.AddClientAccessTokenHttpClient( + services.AddClientCredentialsHttpClient( httpClientName, + tokenClientName, configureClient: client => client.BaseAddress = new Uri(servalOptions.ApiServer) ); services.AddHttpClient(httpClientName).SetHandlerLifetime(TimeSpan.FromMinutes(5)); diff --git a/tools/ServalBuildReport/ServalBuildReport.csproj b/tools/ServalBuildReport/ServalBuildReport.csproj index 9dee327128..d932bdea33 100644 --- a/tools/ServalBuildReport/ServalBuildReport.csproj +++ b/tools/ServalBuildReport/ServalBuildReport.csproj @@ -15,11 +15,12 @@ - - - - - + + + + + + diff --git a/tools/ServalDownloader/Program.cs b/tools/ServalDownloader/Program.cs index ad8fdcdac1..6ac289fca3 100644 --- a/tools/ServalDownloader/Program.cs +++ b/tools/ServalDownloader/Program.cs @@ -15,6 +15,7 @@ // Setup services ServiceProvider services = SetupServices(); IDataFilesClient dataFilesClient = services.GetService()!; +ICorporaClient corporaClient = services.GetService()!; ITranslationEnginesClient translationEnginesClient = services.GetService()!; // Set up the translation engine directory and get the translation engine @@ -28,8 +29,10 @@ return; } -// Download every file for every corpus +// Download every file for every legacy corpus +#pragma warning disable CS0612 // Type or member is obsolete foreach (TranslationCorpus corpus in await translationEnginesClient.GetAllCorporaAsync(translationEngineId)) +#pragma warning restore CS0612 // Type or member is obsolete { string corpusPath = Path.Combine(translationEnginePath, corpus.Id); Directory.CreateDirectory(corpusPath); @@ -73,6 +76,64 @@ } } +// Download every file for every parallel corpus +foreach ( + TranslationParallelCorpus parallelCorpus in await translationEnginesClient.GetAllParallelCorporaAsync( + translationEngineId + ) +) +{ + string parallelCorpusPath = Path.Combine(translationEnginePath, parallelCorpus.Id); + Directory.CreateDirectory(parallelCorpusPath); + foreach (ResourceLink sourceCorpus in parallelCorpus.SourceCorpora) + { + // Create the source directory + string sourcePath = Path.Combine(parallelCorpusPath, "source"); + Directory.CreateDirectory(sourcePath); + + Corpus corpus = await corporaClient.GetAsync(sourceCorpus.Id); + foreach (CorpusFile corpusFile in corpus.Files) + { + // Get the file extension + DataFile dataFile = await dataFilesClient.GetAsync(corpusFile.File.Id); + string extension = dataFile.Format == FileFormat.Paratext ? ".zip" : ".txt"; + + // Download the file + FileResponse file = await dataFilesClient.DownloadAsync(corpusFile.File.Id); + + // Write the file + string path = Path.Combine(sourcePath, $"{corpusFile.TextId}_({corpusFile.File.Id}){extension}"); + Console.WriteLine($"Writing {path}..."); + await using FileStream fileStream = new FileStream(path, FileMode.Create, FileAccess.Write); + file.Stream.CopyTo(fileStream); + } + } + + foreach (ResourceLink sourceCorpus in parallelCorpus.SourceCorpora) + { + // Create the target directory + string targetPath = Path.Combine(parallelCorpusPath, "target"); + Directory.CreateDirectory(targetPath); + + Corpus corpus = await corporaClient.GetAsync(sourceCorpus.Id); + foreach (CorpusFile corpusFile in corpus.Files) + { + // Get the file extension + DataFile dataFile = await dataFilesClient.GetAsync(corpusFile.File.Id); + string extension = dataFile.Format == FileFormat.Paratext ? ".zip" : ".txt"; + + // Download the file + FileResponse file = await dataFilesClient.DownloadAsync(corpusFile.File.Id); + + // Write the file + string path = Path.Combine(targetPath, $"{corpusFile.TextId}_({corpusFile.File.Id}){extension}"); + Console.WriteLine($"Writing {path}..."); + await using FileStream fileStream = new FileStream(path, FileMode.Create, FileAccess.Write); + file.Stream.CopyTo(fileStream); + } + } +} + // If we are on Windows, open the directory in Explorer if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { @@ -85,6 +146,7 @@ static ServiceProvider SetupServices() { const string httpClientName = "serval-api"; + const string tokenClientName = "serval-api-token"; ConfigurationBuilder configurationBuilder = new ConfigurationBuilder(); IConfiguration configuration = configurationBuilder @@ -94,21 +156,23 @@ static ServiceProvider SetupServices() ServalOptions servalOptions = configuration.GetSection("Serval").Get()!; var services = new ServiceCollection(); - services.AddAccessTokenManagement(options => - { - options.Client.Clients.Add( - httpClientName, - new ClientCredentialsTokenRequest + + services.AddDistributedMemoryCache(); + services + .AddClientCredentialsTokenManagement() + .AddClient( + tokenClientName, + client => { - Address = servalOptions.TokenUrl, - ClientId = servalOptions.ClientId, - ClientSecret = servalOptions.ClientSecret, - Parameters = new Parameters { { "audience", servalOptions.Audience } }, + client.TokenEndpoint = servalOptions.TokenUrl; + client.ClientId = servalOptions.ClientId; + client.ClientSecret = servalOptions.ClientSecret; + client.Parameters = new Parameters { { "audience", servalOptions.Audience } }; } ); - }); - services.AddClientAccessTokenHttpClient( + services.AddClientCredentialsHttpClient( httpClientName, + tokenClientName, configureClient: client => client.BaseAddress = new Uri(servalOptions.ApiServer) ); services.AddHttpClient(httpClientName).SetHandlerLifetime(TimeSpan.FromMinutes(5)); @@ -119,6 +183,13 @@ static ServiceProvider SetupServices() var httpClient = factory!.CreateClient(httpClientName); return new TranslationEnginesClient(httpClient); }); + services.AddSingleton(sp => + { + // Instantiate the corpora client with our named HTTP client + var factory = sp.GetService(); + var httpClient = factory!.CreateClient(httpClientName); + return new CorporaClient(httpClient); + }); services.AddSingleton(sp => { // Instantiate the data files client with our named HTTP client @@ -129,11 +200,11 @@ static ServiceProvider SetupServices() return services.BuildServiceProvider(); } -public class ServalOptions +internal class ServalOptions { - public string ApiServer { get; set; } = string.Empty; - public string Audience { get; set; } = string.Empty; - public string ClientId { get; set; } = string.Empty; - public string ClientSecret { get; set; } = string.Empty; - public string TokenUrl { get; set; } = string.Empty; + public string ApiServer { get; init; } = string.Empty; + public string Audience { get; init; } = string.Empty; + public string ClientId { get; init; } = string.Empty; + public string ClientSecret { get; init; } = string.Empty; + public string TokenUrl { get; init; } = string.Empty; } diff --git a/tools/ServalDownloader/ServalDownloader.csproj b/tools/ServalDownloader/ServalDownloader.csproj index 4e7cf50663..e2bf16acbb 100644 --- a/tools/ServalDownloader/ServalDownloader.csproj +++ b/tools/ServalDownloader/ServalDownloader.csproj @@ -15,10 +15,11 @@ - - - - + + + + +