diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 80a5130b..6ffb75a7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,6 +21,14 @@ jobs: with: mongodb-version: "6.0" mongodb-replica-set: rs0 + + # Pull in a matching machine repo branch if it exists to use it rather than the released version of Machine. + - name: Check if there is a corresponding branch in Machine + id: find_matching_branch + continue-on-error: true + if: ${{ github.ref_name }} != "main" + run: cd .. && git clone https://github.com/sillsdev/machine.git --branch ${{ github.ref_name }} && dotnet build machine + - name: Restore dotnet tools run: dotnet tool restore - name: Restore dependencies diff --git a/.vscode/launch.json b/.vscode/launch.json index 24d96910..5038bdd1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -25,7 +25,7 @@ "containerName": "echo_cntr", "request": "attach", "platform": "netCore", - "processName": "EchoTranslationEngine", + "processName": "EchoEngine", "sourceFileMap": { "/app": "${workspaceFolder}" }, @@ -77,9 +77,9 @@ "type": "coreclr", "request": "launch", "preLaunchTask": "build", - "program": "${workspaceFolder}/src/Echo/src/EchoTranslationEngine/bin/Debug/net8.0/EchoTranslationEngine.dll", + "program": "${workspaceFolder}/src/Echo/src/EchoEngine/bin/Debug/net8.0/EchoEngine.dll", "args": [], - "cwd": "${workspaceFolder}/src/Echo/src/EchoTranslationEngine", + "cwd": "${workspaceFolder}/src/Echo/src/EchoEngine", "stopAtEntry": false, "console": "internalConsole", "justMyCode": false, diff --git a/.vscode/settings.json b/.vscode/settings.json index cbe0a073..820245ac 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -13,6 +13,7 @@ "hmac", "HMACSHA", "inferencing", + "Initializable", "keyterms", "MATSRC", "MATTRG", @@ -28,6 +29,7 @@ "ptcc", "Rebinder", "stylesheet", + "thot", "timespan", "upserted", "USFM" diff --git a/Serval.sln b/Serval.sln index 9698e63e..5f828b79 100644 --- a/Serval.sln +++ b/Serval.sln @@ -36,8 +36,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Serval.Translation.Tests", EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Serval.DataFiles", "src\Serval\src\Serval.DataFiles\Serval.DataFiles.csproj", "{4375A7BF-E3CE-4785-91E3-2ED6FCEB074F}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "EchoTranslationEngine", "src\Echo\src\EchoTranslationEngine\EchoTranslationEngine.csproj", "{A9D08CA2-3CF7-4BB7-A47F-5A567FFDB2CC}" -EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Serval.ApiServer.IntegrationTests", "src\Serval\test\Serval.ApiServer.IntegrationTests\Serval.ApiServer.IntegrationTests.csproj", "{0C3DF75B-B022-4EFC-882C-F276F1EC8435}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Serval.E2ETests", "src\Serval\test\Serval.E2ETests\Serval.E2ETests.csproj", "{1F020042-D7B8-4541-9691-26ECFD1FFC73}" @@ -48,10 +46,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SIL.DataAccess.Tests", "src EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Serval.Shared.Tests", "src\Serval\test\Serval.Shared.Tests\Serval.Shared.Tests.csproj", "{0E220C65-AA88-450E-AFB2-844E49060B3F}" EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Echo", "Echo", "{53B18D34-B7C7-4B91-BCB0-1021170DFC65}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{BE90915F-E16D-479C-9568-22A08F0FD8F9}" -EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Serval", "Serval", "{6D20F76D-9A0E-44AC-8754-B4291C75D25B}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{25CDB05B-4E24-4A6E-933E-1E0BEC97D74D}" @@ -84,6 +78,24 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{C3A14577-A65 EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SIL.ServiceToolkit", "src\ServiceToolkit\src\SIL.ServiceToolkit\SIL.ServiceToolkit.csproj", "{0E40F959-C641-40A2-9750-B17A4F9F9E55}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{9125C013-4F15-4761-BCD2-070524986737}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Serval", "Serval", "{A78D900F-AE52-436C-88CE-A22EAEDECD91}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{C1EF85B4-F2CD-407D-904D-BAC1BCE3A64C}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Serval.WordAlignment", "src\Serval\src\Serval.WordAlignment\Serval.WordAlignment.csproj", "{F07B5541-4BA4-4BF8-AE1A-B44BDDCEB354}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Echo", "Echo", "{D201886D-9299-4758-80E8-694DBCF8DF93}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{0904BA95-D5BF-4AC2-A919-20A785EF45F5}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "EchoEngine", "src\Echo\src\EchoEngine\EchoEngine.csproj", "{929FF600-8C7E-4498-A2A3-5534F3A3481E}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{41EE40B9-699C-4145-8AA7-0EE89C727A19}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Serval.WordAlignment.Tests", "src\Serval\test\Serval.WordAlignment.Tests\Serval.WordAlignment.Tests.csproj", "{5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5}" +EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{1DB5E6D1-17A8-4FF2-B90A-C5DFBEF63126}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SIL.ServiceToolkit.Tests", "src\ServiceToolkit\test\SIL.ServiceToolkit.Tests\SIL.ServiceToolkit.Tests.csproj", "{C50ED15A-876D-42BF-980A-388E8C49C78D}" @@ -134,10 +146,6 @@ Global {4375A7BF-E3CE-4785-91E3-2ED6FCEB074F}.Debug|Any CPU.Build.0 = Debug|Any CPU {4375A7BF-E3CE-4785-91E3-2ED6FCEB074F}.Release|Any CPU.ActiveCfg = Release|Any CPU {4375A7BF-E3CE-4785-91E3-2ED6FCEB074F}.Release|Any CPU.Build.0 = Release|Any CPU - {A9D08CA2-3CF7-4BB7-A47F-5A567FFDB2CC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {A9D08CA2-3CF7-4BB7-A47F-5A567FFDB2CC}.Debug|Any CPU.Build.0 = Debug|Any CPU - {A9D08CA2-3CF7-4BB7-A47F-5A567FFDB2CC}.Release|Any CPU.ActiveCfg = Release|Any CPU - {A9D08CA2-3CF7-4BB7-A47F-5A567FFDB2CC}.Release|Any CPU.Build.0 = Release|Any CPU {0C3DF75B-B022-4EFC-882C-F276F1EC8435}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {0C3DF75B-B022-4EFC-882C-F276F1EC8435}.Debug|Any CPU.Build.0 = Debug|Any CPU {0C3DF75B-B022-4EFC-882C-F276F1EC8435}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -178,6 +186,18 @@ Global {0E40F959-C641-40A2-9750-B17A4F9F9E55}.Debug|Any CPU.Build.0 = Debug|Any CPU {0E40F959-C641-40A2-9750-B17A4F9F9E55}.Release|Any CPU.ActiveCfg = Release|Any CPU {0E40F959-C641-40A2-9750-B17A4F9F9E55}.Release|Any CPU.Build.0 = Release|Any CPU + {F07B5541-4BA4-4BF8-AE1A-B44BDDCEB354}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {F07B5541-4BA4-4BF8-AE1A-B44BDDCEB354}.Debug|Any CPU.Build.0 = Debug|Any CPU + {F07B5541-4BA4-4BF8-AE1A-B44BDDCEB354}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F07B5541-4BA4-4BF8-AE1A-B44BDDCEB354}.Release|Any CPU.Build.0 = Release|Any CPU + {929FF600-8C7E-4498-A2A3-5534F3A3481E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {929FF600-8C7E-4498-A2A3-5534F3A3481E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {929FF600-8C7E-4498-A2A3-5534F3A3481E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {929FF600-8C7E-4498-A2A3-5534F3A3481E}.Release|Any CPU.Build.0 = Release|Any CPU + {5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5}.Debug|Any CPU.Build.0 = Debug|Any CPU + {5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5}.Release|Any CPU.ActiveCfg = Release|Any CPU + {5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5}.Release|Any CPU.Build.0 = Release|Any CPU {C50ED15A-876D-42BF-980A-388E8C49C78D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {C50ED15A-876D-42BF-980A-388E8C49C78D}.Debug|Any CPU.Build.0 = Debug|Any CPU {C50ED15A-876D-42BF-980A-388E8C49C78D}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -197,13 +217,11 @@ Global {8FC30758-37FC-4819-805D-8EFF3DEDF05F} = {3E753B99-7C31-42AC-B02E-012B802F58DB} {539598C5-8634-4273-8714-A684622DDCFC} = {3E753B99-7C31-42AC-B02E-012B802F58DB} {4375A7BF-E3CE-4785-91E3-2ED6FCEB074F} = {25CDB05B-4E24-4A6E-933E-1E0BEC97D74D} - {A9D08CA2-3CF7-4BB7-A47F-5A567FFDB2CC} = {BE90915F-E16D-479C-9568-22A08F0FD8F9} {0C3DF75B-B022-4EFC-882C-F276F1EC8435} = {3E753B99-7C31-42AC-B02E-012B802F58DB} {1F020042-D7B8-4541-9691-26ECFD1FFC73} = {3E753B99-7C31-42AC-B02E-012B802F58DB} {63E4D71B-11BE-4D68-A876-5B1B5F0A4C88} = {3E753B99-7C31-42AC-B02E-012B802F58DB} {71151518-8774-44D0-8E69-D77FA447BEFA} = {BA044B98-3136-4FDE-B90F-B0975758C07F} {0E220C65-AA88-450E-AFB2-844E49060B3F} = {3E753B99-7C31-42AC-B02E-012B802F58DB} - {BE90915F-E16D-479C-9568-22A08F0FD8F9} = {53B18D34-B7C7-4B91-BCB0-1021170DFC65} {25CDB05B-4E24-4A6E-933E-1E0BEC97D74D} = {6D20F76D-9A0E-44AC-8754-B4291C75D25B} {3E753B99-7C31-42AC-B02E-012B802F58DB} = {6D20F76D-9A0E-44AC-8754-B4291C75D25B} {92805246-5285-4F0A-9BF8-6EE4A027A41B} = {33E6965E-5A58-4C6F-882E-F17C8E88A3FF} @@ -216,6 +234,14 @@ Global {B0D23A55-AB09-4C2C-B309-F4BEB3BC968D} = {40C225C2-1EEF-4D1D-9D14-1CBB86C8A1CB} {C3A14577-A654-4604-818C-4E683DD45A51} = {EA69B41C-49EF-4017-A687-44B9DF37FF98} {0E40F959-C641-40A2-9750-B17A4F9F9E55} = {C3A14577-A654-4604-818C-4E683DD45A51} + {A78D900F-AE52-436C-88CE-A22EAEDECD91} = {9125C013-4F15-4761-BCD2-070524986737} + {C1EF85B4-F2CD-407D-904D-BAC1BCE3A64C} = {A78D900F-AE52-436C-88CE-A22EAEDECD91} + {F07B5541-4BA4-4BF8-AE1A-B44BDDCEB354} = {C1EF85B4-F2CD-407D-904D-BAC1BCE3A64C} + {D201886D-9299-4758-80E8-694DBCF8DF93} = {9125C013-4F15-4761-BCD2-070524986737} + {0904BA95-D5BF-4AC2-A919-20A785EF45F5} = {D201886D-9299-4758-80E8-694DBCF8DF93} + {929FF600-8C7E-4498-A2A3-5534F3A3481E} = {0904BA95-D5BF-4AC2-A919-20A785EF45F5} + {41EE40B9-699C-4145-8AA7-0EE89C727A19} = {A78D900F-AE52-436C-88CE-A22EAEDECD91} + {5E3D2BC3-9A98-4106-A2BF-B1F3641DC6F5} = {41EE40B9-699C-4145-8AA7-0EE89C727A19} {1DB5E6D1-17A8-4FF2-B90A-C5DFBEF63126} = {EA69B41C-49EF-4017-A687-44B9DF37FF98} {C50ED15A-876D-42BF-980A-388E8C49C78D} = {1DB5E6D1-17A8-4FF2-B90A-C5DFBEF63126} EndGlobalSection diff --git a/deploy/serval/templates/echo-deployment.yaml b/deploy/serval/templates/echo-deployment.yaml index 72d961de..6f9f6b02 100644 --- a/deploy/serval/templates/echo-deployment.yaml +++ b/deploy/serval/templates/echo-deployment.yaml @@ -21,7 +21,7 @@ spec: containers: - command: - dotnet - - /app/echo_server/EchoTranslationEngine.dll + - /app/echo_server/EchoEngine.dll workingDir: /app/echo_server env: - name: ASPNETCORE_ENVIRONMENT diff --git a/docker-compose.withatlas.yml b/docker-compose.withatlas.yml index b23d0be0..0cd4869f 100644 --- a/docker-compose.withatlas.yml +++ b/docker-compose.withatlas.yml @@ -22,6 +22,11 @@ services: - ASPNETCORE_Translation__Engines__1__Address=http://machine-engine - ASPNETCORE_Translation__Engines__2__Type=Nmt - ASPNETCORE_Translation__Engines__2__Address=http://machine-engine + - ASPNETCORE_WordAlignment__Engines__0__Type=EchoWordAlignment + - ASPNETCORE_WordAlignment__Engines__0__Address=http://echo + - ASPNETCORE_WordAlignment__Engines__1__Type=Statistical + - ASPNETCORE_WordAlignment__Engines__1__Address=http://machine-engine + - "Bugsnag__ApiKey=${Bugsnag__ApiKey}" expose: - 80 - 81 @@ -51,6 +56,8 @@ services: - ASPNETCORE_Kestrel__Endpoints__Http__Url=http://*:80 - ASPNETCORE_Kestrel__EndpointDefaults__Protocols=Http2 - ASPNETCORE_ConnectionStrings__TranslationPlatformApi=http://serval-api:81 + - ASPNETCORE_ConnectionStrings__WordAlignmentPlatformApi=http://serval-api:81 + - "Bugsnag__ApiKey=${Bugsnag__ApiKey}" expose: - 80 ports: @@ -61,7 +68,7 @@ services: - .:/app:ro - ~/.nuget/packages:/root/.nuget/packages:ro - /var/lib/serval:/var/lib/serval - working_dir: '/app/src/Echo/src/EchoTranslationEngine' + working_dir: '/app/src/Echo/src/EchoEngine' entrypoint: - dotnet - run @@ -95,6 +102,7 @@ services: - SharedFile__Uri=s3://silnlp/docker-compose/ - "SharedFile__S3AccessKeyId=${AWS_ACCESS_KEY_ID:?access key needed}" - "SharedFile__S3SecretAccessKey=${AWS_SECRET_ACCESS_KEY:?secret key needed}" + - "Bugsnag__ApiKey=${Bugsnag__ApiKey}" expose: - 80 ports: @@ -139,6 +147,7 @@ services: - SharedFile__Uri=s3://silnlp/docker-compose/ - "SharedFile__S3AccessKeyId=${AWS_ACCESS_KEY_ID:?access key needed}" - "SharedFile__S3SecretAccessKey=${AWS_SECRET_ACCESS_KEY:?secret key needed}" + - "Bugsnag__ApiKey=${Bugsnag__ApiKey}" expose: - 80 ports: diff --git a/docker-compose.yml b/docker-compose.yml index be2d668f..b290078f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,6 +21,10 @@ services: - ASPNETCORE_Translation__Engines__1__Address=http://machine-engine - ASPNETCORE_Translation__Engines__2__Type=Nmt - ASPNETCORE_Translation__Engines__2__Address=http://machine-engine + - ASPNETCORE_WordAlignment__Engines__0__Type=EchoWordAlignment + - ASPNETCORE_WordAlignment__Engines__0__Address=http://echo + - ASPNETCORE_WordAlignment__Engines__1__Type=Statistical + - ASPNETCORE_WordAlignment__Engines__1__Address=http://machine-engine - "Bugsnag__ApiKey=${Bugsnag__ApiKey}" expose: - 80 @@ -53,6 +57,7 @@ services: - ASPNETCORE_Kestrel__Endpoints__Http__Url=http://*:80 - ASPNETCORE_Kestrel__EndpointDefaults__Protocols=Http2 - ASPNETCORE_ConnectionStrings__TranslationPlatformApi=http://serval-api:81 + - ASPNETCORE_ConnectionStrings__WordAlignmentPlatformApi=http://serval-api:81 - "Bugsnag__ApiKey=${Bugsnag__ApiKey}" expose: - 80 @@ -64,7 +69,7 @@ services: - .:/app:ro - ~/.nuget/packages:/root/.nuget/packages:ro - /var/lib/serval:/var/lib/serval - working_dir: '/app/src/Echo/src/EchoTranslationEngine' + working_dir: '/app/src/Echo/src/EchoEngine' entrypoint: - dotnet - run @@ -95,6 +100,8 @@ services: - BuildJob__ClearML__0__DockerImage=${MACHINE_PY_IMAGE:-ghcr.io/sillsdev/machine.py:latest} - BuildJob__ClearML__1__Queue=${CLEARML_CPU_QUEUE:-lambert_24gb.cpu_only} - BuildJob__ClearML__1__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:latest.cpu_only} + - BuildJob__ClearML__2__Queue=lambert_24gb.cpu_only + - BuildJob__ClearML__2__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:latest.cpu_only} - SharedFile__Uri=s3://silnlp/docker-compose/ - "SharedFile__S3AccessKeyId=${AWS_ACCESS_KEY_ID:?access key needed}" - "SharedFile__S3SecretAccessKey=${AWS_SECRET_ACCESS_KEY:?secret key needed}" @@ -141,6 +148,8 @@ services: - BuildJob__ClearML__0__DockerImage=${MACHINE_PY_IMAGE:-ghcr.io/sillsdev/machine.py:latest} - BuildJob__ClearML__1__Queue=${CLEARML_CPU_QUEUE:-lambert_24gb.cpu_only} - BuildJob__ClearML__1__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:latest.cpu_only} + - BuildJob__ClearML__2__Queue=lambert_24gb.cpu_only + - BuildJob__ClearML__2__DockerImage=${MACHINE_PY_CPU_IMAGE:-ghcr.io/sillsdev/machine.py:latest.cpu_only} - SharedFile__Uri=s3://silnlp/docker-compose/ - "SharedFile__S3AccessKeyId=${AWS_ACCESS_KEY_ID:?access key needed}" - "SharedFile__S3SecretAccessKey=${AWS_SECRET_ACCESS_KEY:?secret key needed}" @@ -168,7 +177,7 @@ services: - /root/.nuget/packages mongo: hostname: mongo - image: mongo:6.0 + image: mongo:8.0 expose: - 27017 ports: diff --git a/dockerfile b/dockerfile index b58f3c21..4c8a78a3 100644 --- a/dockerfile +++ b/dockerfile @@ -9,7 +9,7 @@ COPY . ./ RUN dotnet restore # Build and publish a release RUN dotnet publish ./src/Serval/src/Serval.ApiServer/Serval.ApiServer.csproj -c Release -o out_api_server -RUN dotnet publish ./src/Echo/src/EchoTranslationEngine/EchoTranslationEngine.csproj -c Release -o out_echo_server +RUN dotnet publish ./src/Echo/src/EchoEngine/EchoEngine.csproj -c Release -o out_echo_server RUN dotnet publish ./src/Machine/src/Serval.Machine.EngineServer/Serval.Machine.EngineServer.csproj -c Release -o out_machine_engine_server RUN dotnet publish ./src/Machine/src/Serval.Machine.JobServer/Serval.Machine.JobServer.csproj -c Release -o out_machine_job_server diff --git a/src/Echo/src/EchoTranslationEngine/BackgroundTaskQueue.cs b/src/Echo/src/EchoEngine/BackgroundTaskQueue.cs similarity index 97% rename from src/Echo/src/EchoTranslationEngine/BackgroundTaskQueue.cs rename to src/Echo/src/EchoEngine/BackgroundTaskQueue.cs index 6ee3fd4e..03ef178f 100644 --- a/src/Echo/src/EchoTranslationEngine/BackgroundTaskQueue.cs +++ b/src/Echo/src/EchoEngine/BackgroundTaskQueue.cs @@ -1,4 +1,4 @@ -namespace EchoTranslationEngine; +namespace EchoEngine; public class BackgroundTaskQueue { diff --git a/src/Echo/src/EchoTranslationEngine/BackgroundTaskService.cs b/src/Echo/src/EchoEngine/BackgroundTaskService.cs similarity index 97% rename from src/Echo/src/EchoTranslationEngine/BackgroundTaskService.cs rename to src/Echo/src/EchoEngine/BackgroundTaskService.cs index cf901427..001895f0 100644 --- a/src/Echo/src/EchoTranslationEngine/BackgroundTaskService.cs +++ b/src/Echo/src/EchoEngine/BackgroundTaskService.cs @@ -1,4 +1,4 @@ -namespace EchoTranslationEngine; +namespace EchoEngine; public class BackgroundTaskService( BackgroundTaskQueue taskQueue, diff --git a/src/Echo/src/EchoTranslationEngine/EchoTranslationEngine.csproj b/src/Echo/src/EchoEngine/EchoEngine.csproj similarity index 100% rename from src/Echo/src/EchoTranslationEngine/EchoTranslationEngine.csproj rename to src/Echo/src/EchoEngine/EchoEngine.csproj diff --git a/src/Echo/src/EchoTranslationEngine/HealthServiceV1.cs b/src/Echo/src/EchoEngine/HealthServiceV1.cs similarity index 94% rename from src/Echo/src/EchoTranslationEngine/HealthServiceV1.cs rename to src/Echo/src/EchoEngine/HealthServiceV1.cs index 05bc98c1..025b2fee 100644 --- a/src/Echo/src/EchoTranslationEngine/HealthServiceV1.cs +++ b/src/Echo/src/EchoEngine/HealthServiceV1.cs @@ -1,6 +1,6 @@ using Serval.Health.V1; -namespace EchoTranslationEngine; +namespace EchoEngine; public class HealthServiceV1(HealthCheckService healthCheckService) : HealthApi.HealthApiBase { diff --git a/src/Echo/src/EchoTranslationEngine/Program.cs b/src/Echo/src/EchoEngine/Program.cs similarity index 58% rename from src/Echo/src/EchoTranslationEngine/Program.cs rename to src/Echo/src/EchoEngine/Program.cs index 352c536a..6e6c9c07 100644 --- a/src/Echo/src/EchoTranslationEngine/Program.cs +++ b/src/Echo/src/EchoEngine/Program.cs @@ -1,10 +1,24 @@ +using Serval.Translation.V1; +using Serval.WordAlignment.V1; + WebApplicationBuilder builder = WebApplication.CreateBuilder(args); // Add services to the container. -builder.Services.AddGrpcClient(o => -{ - o.Address = new Uri(builder.Configuration.GetConnectionString("TranslationPlatformApi")!); -}); +builder.Services.AddGrpcClient( + "Translation", + o => + { + o.Address = new Uri(builder.Configuration.GetConnectionString("TranslationPlatformApi")!); + } +); +builder.Services.AddGrpcClient( + "WordAlignment", + o => + { + o.Address = new Uri(builder.Configuration.GetConnectionString("WordAlignmentPlatformApi")!); + } +); + builder.Services.AddGrpc(); builder.Services.AddHostedService(); @@ -20,6 +34,8 @@ WebApplication app = builder.Build(); app.MapGrpcService(); +app.MapGrpcService(); + app.MapGrpcService(); app.Run(); diff --git a/src/Echo/src/EchoTranslationEngine/Properties/launchSettings.json b/src/Echo/src/EchoEngine/Properties/launchSettings.json similarity index 90% rename from src/Echo/src/EchoTranslationEngine/Properties/launchSettings.json rename to src/Echo/src/EchoEngine/Properties/launchSettings.json index 961e424a..3e0605d4 100644 --- a/src/Echo/src/EchoTranslationEngine/Properties/launchSettings.json +++ b/src/Echo/src/EchoEngine/Properties/launchSettings.json @@ -1,7 +1,7 @@ { "$schema": "https://json.schemastore.org/launchsettings.json", "profiles": { - "EchoTranslationEngine": { + "EchoEngine": { "commandName": "Project", "dotnetRunMessages": true, "launchBrowser": false, @@ -11,4 +11,4 @@ } } } -} +} \ No newline at end of file diff --git a/src/Echo/src/EchoEngine/TranslationEngineServiceV1.cs b/src/Echo/src/EchoEngine/TranslationEngineServiceV1.cs new file mode 100644 index 00000000..1a31331e --- /dev/null +++ b/src/Echo/src/EchoEngine/TranslationEngineServiceV1.cs @@ -0,0 +1,331 @@ +using Serval.Translation.V1; + +namespace EchoEngine; + +public class TranslationEngineServiceV1(BackgroundTaskQueue taskQueue) : TranslationEngineApi.TranslationEngineApiBase +{ + private static readonly Empty Empty = new(); + private readonly BackgroundTaskQueue _taskQueue = taskQueue; + + public override Task Create(CreateRequest request, ServerCallContext context) + { + if (request.SourceLanguage != request.TargetLanguage) + { + Status status = new Status(StatusCode.InvalidArgument, "Source and target languages must be the same"); + throw new RpcException(status); + } + return Task.FromResult(new CreateResponse { IsModelPersisted = true }); + } + + public override Task Delete(DeleteRequest request, ServerCallContext context) + { + return Task.FromResult(Empty); + } + + public override Task Translate(TranslateRequest request, ServerCallContext context) + { + string[] tokens = request.Segment.Split(); + var response = new TranslateResponse + { + Results = + { + new TranslationResult + { + Translation = request.Segment, + SourceTokens = { tokens }, + TargetTokens = { tokens }, + Confidences = { Enumerable.Repeat(1.0, tokens.Length) }, + Sources = + { + Enumerable.Repeat( + new TranslationSources { Values = { TranslationSource.Primary } }, + tokens.Length + ) + }, + Alignment = + { + Enumerable + .Range(0, tokens.Length) + .Select(i => new AlignedWordPair { SourceIndex = i, TargetIndex = i }) + }, + Phrases = + { + new Phrase + { + SourceSegmentStart = 0, + SourceSegmentEnd = tokens.Length, + TargetSegmentCut = tokens.Length + } + } + } + } + }; + return Task.FromResult(response); + } + + public override async Task StartBuild(StartBuildRequest request, ServerCallContext context) + { + await _taskQueue.QueueBackgroundWorkItemAsync( + async (services, cancellationToken) => + { + TranslationPlatformApi.TranslationPlatformApiClient client = + services.GetRequiredService(); + await client.BuildStartedAsync( + new BuildStartedRequest { BuildId = request.BuildId }, + cancellationToken: cancellationToken + ); + + try + { + using ( + AsyncClientStreamingCall call = client.InsertInferences( + cancellationToken: cancellationToken + ) + ) + { + foreach (ParallelCorpus corpus in request.Corpora) + { + var sourceFiles = corpus + .SourceCorpora.SelectMany(sc => + sc.Files.Where(f => + ( + sc.PretranslateAll + || sc.PretranslateTextIds is null + || sc.PretranslateTextIds.Contains(f.TextId) + ) + && f.Format == FileFormat.Text + ) + ) + .ToDictionary(f => f.TextId, f => f.Location); + var targetFiles = corpus + .TargetCorpora.SelectMany(tc => + tc.Files.Where(f => + ( + tc.PretranslateAll + || tc.PretranslateTextIds is null + || tc.PretranslateTextIds.Contains(f.TextId) + ) + && f.Format == FileFormat.Text + ) + ) + .ToDictionary(f => f.TextId, f => f.Location); + + foreach (KeyValuePair sourceFile in sourceFiles) + { + string[] sourceLines = await File.ReadAllLinesAsync( + sourceFile.Value, + cancellationToken + ); + + if (targetFiles.TryGetValue(sourceFile.Key, out string? targetPath)) + { + string[] targetLines = await File.ReadAllLinesAsync(targetPath, cancellationToken); + bool isTabSeparated = (sourceLines.Length > 0) && sourceLines[0].Contains('/'); + if (!isTabSeparated) + { + int lineNum = 1; + foreach ( + (string sourceLine, string targetLine) in sourceLines + .Select(l => l.Trim()) + .Zip(targetLines.Select(l => l.Trim())) + ) + { + if (sourceLine.Length > 0 && targetLine.Length == 0) + { + await call.RequestStream.WriteAsync( + new InsertInferencesRequest + { + EngineId = request.EngineId, + CorpusId = corpus.Id, + TextId = sourceFile.Key, + Refs = { $"{sourceFile.Key}:{lineNum}" }, + Translation = sourceLine + }, + cancellationToken + ); + } + lineNum++; + } + } + else + { + var sourceLinesDict = sourceLines.ToDictionary( + l => l.Split('\t')[0].Trim(), + l => l.Split('\t')[1].Trim() + ); + var targetLinesDict = targetLines.ToDictionary( + l => l.Split('\t')[0].Trim(), + l => l.Contains('\t') ? l.Split('\t')[1].Trim() : string.Empty + ); + foreach (KeyValuePair targetLineKVPair in targetLinesDict) + { + string? sourceLine = null; + sourceLinesDict.TryGetValue(targetLineKVPair.Key, out sourceLine); + sourceLine ??= string.Empty; + string? targetLine = targetLineKVPair.Value; + if (sourceLine.Length > 0 && targetLine.Length == 0) + { + await call.RequestStream.WriteAsync( + new InsertInferencesRequest + { + EngineId = request.EngineId, + CorpusId = corpus.Id, + TextId = sourceFile.Key, + Refs = { $"{sourceFile.Key}:{targetLineKVPair.Key}" }, + Translation = sourceLine + }, + cancellationToken + ); + } + } + } + } + else + { + bool isTabSeparated = (sourceLines.Length > 0) && sourceLines[0].Contains('/'); + if (!isTabSeparated) + { + int lineNum = 1; + foreach (string sourceLine in sourceLines.Select(l => l.Trim())) + { + if (sourceLine.Length > 0) + { + await call.RequestStream.WriteAsync( + new InsertInferencesRequest + { + EngineId = request.EngineId, + CorpusId = corpus.Id, + TextId = sourceFile.Key, + Refs = { $"{sourceFile.Key}:{lineNum}" }, + Translation = sourceLine + }, + cancellationToken + ); + } + lineNum++; + } + } + else + { + foreach (string sourceLine in sourceLines.Select(l => l.Trim())) + { + if (sourceLine.Length > 0) + { + await call.RequestStream.WriteAsync( + new InsertInferencesRequest + { + EngineId = request.EngineId, + CorpusId = corpus.Id, + TextId = sourceFile.Key, + Refs = { $"{sourceFile.Key}:{sourceLine.Split('\t')[0]}" }, + Translation = sourceLine.Contains('\t') + ? sourceLine.Split('\t')[1].Trim() + : string.Empty + }, + cancellationToken + ); + } + } + } + } + } + } + + await call.RequestStream.CompleteAsync(); + await call; + } + + await client.BuildCompletedAsync( + new BuildCompletedRequest { BuildId = request.BuildId, Confidence = 1.0 }, + cancellationToken: CancellationToken.None + ); + } + catch (OperationCanceledException) + { + await client.BuildCanceledAsync( + new BuildCanceledRequest { BuildId = request.BuildId }, + cancellationToken: CancellationToken.None + ); + } + catch (Exception e) + { + await client.BuildFaultedAsync( + new BuildFaultedRequest { BuildId = request.BuildId, Message = e.Message }, + cancellationToken: CancellationToken.None + ); + } + } + ); + + return Empty; + } + + public override Task TrainSegmentPair(TrainSegmentPairRequest request, ServerCallContext context) + { + return Task.FromResult(Empty); + } + + public override Task GetWordGraph(GetWordGraphRequest request, ServerCallContext context) + { + string[] tokens = request.Segment.Split(); + return Task.FromResult( + new GetWordGraphResponse + { + WordGraph = new WordGraph + { + InitialStateScore = 0.0, + SourceTokens = { tokens }, + Arcs = + { + Enumerable + .Range(0, tokens.Length - 1) + .Select(index => new WordGraphArc + { + PrevState = index, + NextState = index + 1, + Score = 1.0, + TargetTokens = { tokens[index] }, + Confidences = { 1.0 }, + SourceSegmentStart = index, + SourceSegmentEnd = index + 1, + Alignment = + { + new AlignedWordPair { SourceIndex = 0, TargetIndex = 0 } + } + }) + }, + FinalStates = { tokens.Length } + } + } + ); + } + + public override Task GetModelDownloadUrl( + GetModelDownloadUrlRequest request, + ServerCallContext context + ) + { + var response = new GetModelDownloadUrlResponse + { + Url = "https://example.com/model", + ModelRevision = 1, + ExpiresAt = DateTime.UtcNow.AddHours(1).ToTimestamp() + }; + return Task.FromResult(response); + } + + public override Task GetQueueSize(GetQueueSizeRequest request, ServerCallContext context) + { + return Task.FromResult(new GetQueueSizeResponse { Size = 0 }); + } + + public override Task GetLanguageInfo( + GetLanguageInfoRequest request, + ServerCallContext context + ) + { + return Task.FromResult( + new GetLanguageInfoResponse { InternalCode = request.Language + "_echo", IsNative = true, } + ); + } +} diff --git a/src/Echo/src/EchoTranslationEngine/Usings.cs b/src/Echo/src/EchoEngine/Usings.cs similarity index 60% rename from src/Echo/src/EchoTranslationEngine/Usings.cs rename to src/Echo/src/EchoEngine/Usings.cs index c02c0f22..c8280f0d 100644 --- a/src/Echo/src/EchoTranslationEngine/Usings.cs +++ b/src/Echo/src/EchoEngine/Usings.cs @@ -1,7 +1,5 @@ global using System.Threading.Channels; -global using EchoTranslationEngine; +global using EchoEngine; global using Google.Protobuf.WellKnownTypes; global using Grpc.Core; global using Microsoft.Extensions.Diagnostics.HealthChecks; -global using Serval.Translation.V1; -global using SIL.ServiceToolkit.Utils; diff --git a/src/Echo/src/EchoEngine/WordAlignmentEngineServiceV1.cs b/src/Echo/src/EchoEngine/WordAlignmentEngineServiceV1.cs new file mode 100644 index 00000000..3d3c5e0a --- /dev/null +++ b/src/Echo/src/EchoEngine/WordAlignmentEngineServiceV1.cs @@ -0,0 +1,221 @@ +using Serval.WordAlignment.V1; + +namespace EchoEngine; + +public class WordAlignmentEngineServiceV1(BackgroundTaskQueue taskQueue) + : WordAlignmentEngineApi.WordAlignmentEngineApiBase +{ + private static readonly Empty Empty = new(); + private readonly BackgroundTaskQueue _taskQueue = taskQueue; + + public override Task Create(CreateRequest request, ServerCallContext context) + { + if (request.SourceLanguage != request.TargetLanguage) + { + Status status = new Status(StatusCode.InvalidArgument, "Source and target languages must be the same"); + throw new RpcException(status); + } + return Task.FromResult(Empty); + } + + public override Task Delete(DeleteRequest request, ServerCallContext context) + { + return Task.FromResult(Empty); + } + + public static IEnumerable GenerateAlignedWordPairs(int number) + { + if (number < 0) + { + throw new ArgumentOutOfRangeException(nameof(number), "Number must be non-negative"); + } + return Enumerable.Range(0, number).Select(i => new AlignedWordPair { SourceIndex = i, TargetIndex = i }); + } + + public override Task GetWordAlignment( + GetWordAlignmentRequest request, + ServerCallContext context + ) + { + string[] sourceTokens = request.SourceSegment.Split(); + string[] targetTokens = request.TargetSegment.Split(); + int minLength = Math.Min(sourceTokens.Length, targetTokens.Length); + + var response = new GetWordAlignmentResponse + { + Result = new WordAlignmentResult + { + SourceTokens = { sourceTokens }, + TargetTokens = { targetTokens }, + Confidences = { Enumerable.Repeat(1.0, minLength) }, + Alignment = { GenerateAlignedWordPairs(minLength) } + } + }; + return Task.FromResult(response); + } + + public override async Task StartBuild(StartBuildRequest request, ServerCallContext context) + { + await _taskQueue.QueueBackgroundWorkItemAsync( + async (services, cancellationToken) => + { + WordAlignmentPlatformApi.WordAlignmentPlatformApiClient client = + services.GetRequiredService(); + await client.BuildStartedAsync( + new BuildStartedRequest { BuildId = request.BuildId }, + cancellationToken: cancellationToken + ); + + try + { + using ( + AsyncClientStreamingCall call = client.InsertInferences( + cancellationToken: cancellationToken + ) + ) + { + foreach (ParallelCorpus corpus in request.Corpora) + { + var sourceFiles = corpus + .SourceCorpora.SelectMany(sc => + sc.Files.Where(f => + ( + sc.WordAlignOnAll + || sc.WordAlignOnTextIds is null + || sc.WordAlignOnTextIds.Contains(f.TextId) + ) + && f.Format == FileFormat.Text + ) + ) + .ToDictionary(f => f.TextId, f => f.Location); + var targetFiles = corpus + .TargetCorpora.SelectMany(tc => + tc.Files.Where(f => + ( + tc.WordAlignOnAll + || tc.WordAlignOnTextIds is null + || tc.WordAlignOnTextIds.Contains(f.TextId) + ) + && f.Format == FileFormat.Text + ) + ) + .ToDictionary(f => f.TextId, f => f.Location); + + foreach (KeyValuePair sourceFile in sourceFiles) + { + string[] sourceLines = await File.ReadAllLinesAsync( + sourceFile.Value, + cancellationToken + ); + + if (targetFiles.TryGetValue(sourceFile.Key, out string? targetPath)) + { + string[] targetLines = await File.ReadAllLinesAsync(targetPath, cancellationToken); + bool isTabSeparated = (sourceLines.Length > 0) && sourceLines[0].Contains('/'); + if (!isTabSeparated) + { + int lineNum = 1; + foreach ( + (string sourceLine, string targetLine) in sourceLines + .Select(l => l.Trim()) + .Zip(targetLines.Select(l => l.Trim())) + ) + { + if (sourceLine.Length > 0 && targetLine.Length == 0) + { + int minLength = Math.Min( + sourceLine.Split().Length, + targetLine.Split().Length + ); + await call.RequestStream.WriteAsync( + new InsertInferencesRequest + { + EngineId = request.EngineId, + CorpusId = corpus.Id, + TextId = sourceFile.Key, + Refs = { $"{sourceFile.Key}:{lineNum}" }, + SourceTokens = { sourceLine.Split() }, + TargetTokens = { targetLine.Split() }, + Confidences = { Enumerable.Repeat(1.0, minLength) }, + Alignment = { GenerateAlignedWordPairs(minLength) } + }, + cancellationToken + ); + } + lineNum++; + } + } + else + { + var sourceLinesDict = sourceLines.ToDictionary( + l => l.Split('\t')[0].Trim(), + l => l.Split('\t')[1].Trim() + ); + var targetLinesDict = targetLines.ToDictionary( + l => l.Split('\t')[0].Trim(), + l => l.Contains('\t') ? l.Split('\t')[1].Trim() : string.Empty + ); + foreach (KeyValuePair targetLineKVPair in targetLinesDict) + { + string? sourceLine = null; + sourceLinesDict.TryGetValue(targetLineKVPair.Key, out sourceLine); + sourceLine ??= string.Empty; + string? targetLine = targetLineKVPair.Value; + if (sourceLine.Length > 0 && targetLine.Length == 0) + { + int minLength = Math.Min( + sourceLine.Split().Length, + targetLine.Split().Length + ); + await call.RequestStream.WriteAsync( + new InsertInferencesRequest + { + EngineId = request.EngineId, + CorpusId = corpus.Id, + TextId = sourceFile.Key, + Refs = { $"{sourceFile.Key}:{targetLineKVPair.Key}" }, + SourceTokens = { sourceLine.Split() }, + TargetTokens = { targetLine.Split() }, + Confidences = + { + Enumerable.Repeat(1.0, sourceLine.Split().Length) + }, + Alignment = { GenerateAlignedWordPairs(minLength) } + }, + cancellationToken + ); + } + } + } + } + } + } + await call.RequestStream.CompleteAsync(); + await call; + } + + await client.BuildCompletedAsync( + new BuildCompletedRequest { BuildId = request.BuildId, Confidence = 1.0 }, + cancellationToken: CancellationToken.None + ); + } + catch (OperationCanceledException) + { + await client.BuildCanceledAsync( + new BuildCanceledRequest { BuildId = request.BuildId }, + cancellationToken: CancellationToken.None + ); + } + catch (Exception e) + { + await client.BuildFaultedAsync( + new BuildFaultedRequest { BuildId = request.BuildId, Message = e.Message }, + cancellationToken: CancellationToken.None + ); + } + } + ); + + return Empty; + } +} diff --git a/src/Echo/src/EchoTranslationEngine/appsettings.Development.json b/src/Echo/src/EchoEngine/appsettings.Development.json similarity index 66% rename from src/Echo/src/EchoTranslationEngine/appsettings.Development.json rename to src/Echo/src/EchoEngine/appsettings.Development.json index 838bc6f4..25895bf0 100644 --- a/src/Echo/src/EchoTranslationEngine/appsettings.Development.json +++ b/src/Echo/src/EchoEngine/appsettings.Development.json @@ -5,7 +5,8 @@ } }, "ConnectionStrings": { - "TranslationPlatformApi": "http://localhost:8080" + "TranslationPlatformApi": "http://localhost:8080", + "WordAlignmentPlatformApi": "http://localhost:8081" }, "Logging": { "LogLevel": { @@ -13,4 +14,4 @@ "Microsoft.AspNetCore": "Warning" } } -} +} \ No newline at end of file diff --git a/src/Echo/src/EchoTranslationEngine/appsettings.Production.json b/src/Echo/src/EchoEngine/appsettings.Production.json similarity index 100% rename from src/Echo/src/EchoTranslationEngine/appsettings.Production.json rename to src/Echo/src/EchoEngine/appsettings.Production.json diff --git a/src/Echo/src/EchoTranslationEngine/appsettings.Staging.json b/src/Echo/src/EchoEngine/appsettings.Staging.json similarity index 100% rename from src/Echo/src/EchoTranslationEngine/appsettings.Staging.json rename to src/Echo/src/EchoEngine/appsettings.Staging.json diff --git a/src/Echo/src/EchoTranslationEngine/appsettings.json b/src/Echo/src/EchoEngine/appsettings.json similarity index 100% rename from src/Echo/src/EchoTranslationEngine/appsettings.json rename to src/Echo/src/EchoEngine/appsettings.json diff --git a/src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs b/src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs deleted file mode 100644 index 3258202a..00000000 --- a/src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs +++ /dev/null @@ -1,284 +0,0 @@ -namespace EchoTranslationEngine; - -public class TranslationEngineServiceV1( - BackgroundTaskQueue taskQueue, - IParallelCorpusPreprocessingService parallelCorpusPreprocessingService -) : TranslationEngineApi.TranslationEngineApiBase -{ - private static readonly Empty Empty = new(); - private readonly BackgroundTaskQueue _taskQueue = taskQueue; - - private readonly IParallelCorpusPreprocessingService _parallelCorpusPreprocessingService = - parallelCorpusPreprocessingService; - - public override Task Create(CreateRequest request, ServerCallContext context) - { - if (request.SourceLanguage != request.TargetLanguage) - { - Status status = new Status(StatusCode.InvalidArgument, "Source and target languages must be the same"); - throw new RpcException(status); - } - return Task.FromResult(new CreateResponse { IsModelPersisted = true }); - } - - public override Task Delete(DeleteRequest request, ServerCallContext context) - { - return Task.FromResult(Empty); - } - - public override Task Translate(TranslateRequest request, ServerCallContext context) - { - string[] tokens = request.Segment.Split(); - var response = new TranslateResponse - { - Results = - { - new TranslationResult - { - Translation = request.Segment, - SourceTokens = { tokens }, - TargetTokens = { tokens }, - Confidences = { Enumerable.Repeat(1.0, tokens.Length) }, - Sources = - { - Enumerable.Repeat( - new TranslationSources { Values = { TranslationSource.Primary } }, - tokens.Length - ) - }, - Alignment = - { - Enumerable - .Range(0, tokens.Length) - .Select(i => new AlignedWordPair { SourceIndex = i, TargetIndex = i }) - }, - Phrases = - { - new Phrase - { - SourceSegmentStart = 0, - SourceSegmentEnd = tokens.Length, - TargetSegmentCut = tokens.Length - } - } - } - } - }; - return Task.FromResult(response); - } - - public override async Task StartBuild(StartBuildRequest request, ServerCallContext context) - { - await _taskQueue.QueueBackgroundWorkItemAsync( - async (services, cancellationToken) => - { - TranslationPlatformApi.TranslationPlatformApiClient client = - services.GetRequiredService(); - await client.BuildStartedAsync( - new BuildStartedRequest { BuildId = request.BuildId }, - cancellationToken: cancellationToken - ); - - try - { - List pretranslationsRequests = []; - await _parallelCorpusPreprocessingService.PreprocessAsync( - request.Corpora.Select(Map).ToList(), - row => Task.CompletedTask, - (row, corpus) => - { - pretranslationsRequests.Add( - new InsertPretranslationsRequest - { - EngineId = request.EngineId, - CorpusId = corpus.Id, - TextId = row.TextId, - Refs = { row.Refs.Select(r => r.ToString()) }, - Translation = row.SourceSegment - } - ); - return Task.CompletedTask; - }, - false - ); - using ( - AsyncClientStreamingCall call = - client.InsertPretranslations(cancellationToken: cancellationToken) - ) - { - foreach (InsertPretranslationsRequest request in pretranslationsRequests) - { - await call.RequestStream.WriteAsync(request, cancellationToken); - } - await call.RequestStream.CompleteAsync(); - await call; - } - - await client.BuildCompletedAsync( - new BuildCompletedRequest { BuildId = request.BuildId, Confidence = 1.0 }, - cancellationToken: CancellationToken.None - ); - } - catch (OperationCanceledException) - { - await client.BuildCanceledAsync( - new BuildCanceledRequest { BuildId = request.BuildId }, - cancellationToken: CancellationToken.None - ); - } - catch (Exception e) - { - await client.BuildFaultedAsync( - new BuildFaultedRequest { BuildId = request.BuildId, Message = e.Message }, - cancellationToken: CancellationToken.None - ); - } - } - ); - - return Empty; - } - - public override Task TrainSegmentPair(TrainSegmentPairRequest request, ServerCallContext context) - { - return Task.FromResult(Empty); - } - - public override Task GetWordGraph(GetWordGraphRequest request, ServerCallContext context) - { - string[] tokens = request.Segment.Split(); - return Task.FromResult( - new GetWordGraphResponse - { - WordGraph = new WordGraph - { - InitialStateScore = 0.0, - SourceTokens = { tokens }, - Arcs = - { - Enumerable - .Range(0, tokens.Length - 1) - .Select(index => new WordGraphArc - { - PrevState = index, - NextState = index + 1, - Score = 1.0, - TargetTokens = { tokens[index] }, - Confidences = { 1.0 }, - SourceSegmentStart = index, - SourceSegmentEnd = index + 1, - Alignment = - { - new AlignedWordPair { SourceIndex = 0, TargetIndex = 0 } - } - }) - }, - FinalStates = { tokens.Length } - } - } - ); - } - - public override Task GetModelDownloadUrl( - GetModelDownloadUrlRequest request, - ServerCallContext context - ) - { - var response = new GetModelDownloadUrlResponse - { - Url = "https://example.com/model", - ModelRevision = 1, - ExpiresAt = DateTime.UtcNow.AddHours(1).ToTimestamp() - }; - return Task.FromResult(response); - } - - public override Task GetQueueSize(GetQueueSizeRequest request, ServerCallContext context) - { - return Task.FromResult(new GetQueueSizeResponse { Size = 0 }); - } - - public override Task GetLanguageInfo( - GetLanguageInfoRequest request, - ServerCallContext context - ) - { - return Task.FromResult( - new GetLanguageInfoResponse { InternalCode = request.Language + "_echo", IsNative = true, } - ); - } - - private static SIL.ServiceToolkit.Models.ParallelCorpus Map(ParallelCorpus source) - { - return new SIL.ServiceToolkit.Models.ParallelCorpus - { - Id = source.Id, - SourceCorpora = source.SourceCorpora.Select(Map).ToList(), - TargetCorpora = source.TargetCorpora.Select(Map).ToList() - }; - } - - private static SIL.ServiceToolkit.Models.MonolingualCorpus Map(MonolingualCorpus source) - { - var trainOnChapters = source.TrainOnChapters.ToDictionary( - kvp => kvp.Key, - kvp => kvp.Value.Chapters.ToHashSet() - ); - var trainOnTextIds = source.TrainOnTextIds.ToHashSet(); - FilterChoice trainingFilter = GetFilterChoice(trainOnChapters, trainOnTextIds, source.TrainOnAll); - - var pretranslateChapters = source.PretranslateChapters.ToDictionary( - kvp => kvp.Key, - kvp => kvp.Value.Chapters.ToHashSet() - ); - var pretranslateTextIds = source.PretranslateTextIds.ToHashSet(); - FilterChoice pretranslateFilter = GetFilterChoice( - pretranslateChapters, - pretranslateTextIds, - source.PretranslateAll - ); - - return new SIL.ServiceToolkit.Models.MonolingualCorpus - { - Id = source.Id, - Language = source.Language, - Files = source.Files.Select(Map).ToList(), - TrainOnChapters = trainingFilter == FilterChoice.Chapters ? trainOnChapters : null, - TrainOnTextIds = trainingFilter == FilterChoice.TextIds ? trainOnTextIds : null, - PretranslateChapters = pretranslateFilter == FilterChoice.Chapters ? pretranslateChapters : null, - PretranslateTextIds = pretranslateFilter == FilterChoice.TextIds ? pretranslateTextIds : null - }; - } - - private static SIL.ServiceToolkit.Models.CorpusFile Map(CorpusFile source) - { - return new SIL.ServiceToolkit.Models.CorpusFile - { - Location = source.Location, - Format = (SIL.ServiceToolkit.Models.FileFormat)source.Format, - TextId = source.TextId - }; - } - - private enum FilterChoice - { - Chapters, - TextIds, - None - } - - private static FilterChoice GetFilterChoice( - IReadOnlyDictionary> chapters, - HashSet textIds, - bool noFilter - ) - { - // Only either textIds or Scripture Range will be used at a time - // TextIds may be an empty array, so prefer that if both are empty (which applies to both scripture and text) - if (noFilter || (chapters is null && textIds is null)) - return FilterChoice.None; - if (chapters is null || chapters.Count == 0) - return FilterChoice.TextIds; - return FilterChoice.Chapters; - } -} diff --git a/src/Machine/src/Serval.Machine.EngineServer/Program.cs b/src/Machine/src/Serval.Machine.EngineServer/Program.cs index 72a62b25..cba795f8 100644 --- a/src/Machine/src/Serval.Machine.EngineServer/Program.cs +++ b/src/Machine/src/Serval.Machine.EngineServer/Program.cs @@ -10,6 +10,9 @@ .AddMongoDataAccess() .AddMongoHangfireJobClient() .AddServalTranslationEngineService() + .AddServalWordAlignmentEngineService() + .AddServalTranslationPlatformService() + .AddServalWordAlignmentPlatformService() .AddModelCleanupService() .AddMessageOutboxDeliveryService() .AddClearMLService(); @@ -35,6 +38,7 @@ var app = builder.Build(); app.MapServalTranslationEngineService(); +app.MapServalWordAlignmentEngineService(); app.MapHangfireDashboard(); app.Run(); diff --git a/src/Machine/src/Serval.Machine.EngineServer/appsettings.json b/src/Machine/src/Serval.Machine.EngineServer/appsettings.json index f17d77f3..03344cef 100644 --- a/src/Machine/src/Serval.Machine.EngineServer/appsettings.json +++ b/src/Machine/src/Serval.Machine.EngineServer/appsettings.json @@ -10,18 +10,27 @@ "SmtTransfer", "Nmt" ], + "WordAlignmentEngines": [ + "Statistical" + ], "BuildJob": { "ClearML": [ { - "TranslationEngineType": "Nmt", + "EngineType": "Nmt", "ModelType": "huggingface", "Queue": "jobs_backlog", "DockerImage": "ghcr.io/sillsdev/machine.py:latest" }, { - "TranslationEngineType": "SmtTransfer", + "EngineType": "SmtTransfer", "ModelType": "thot", - "Queue": "cpu_only", + "Queue": "jobs_backlog.cpu_only", + "DockerImage": "ghcr.io/sillsdev/machine.py:latest" + }, + { + "EngineType": "Statistical", + "ModelType": "thot", + "Queue": "jobs_backlog.cpu_only", "DockerImage": "ghcr.io/sillsdev/machine.py:latest" } ] @@ -29,6 +38,9 @@ "SmtTransferEngine": { "EnginesDir": "/var/lib/machine/engines" }, + "WordAlignmentEngine": { + "EnginesDir": "/var/lib/machine/engines" + }, "ClearML": { "BuildPollingEnabled": true }, diff --git a/src/Machine/src/Serval.Machine.JobServer/Program.cs b/src/Machine/src/Serval.Machine.JobServer/Program.cs index f1ad384d..eea567ec 100644 --- a/src/Machine/src/Serval.Machine.JobServer/Program.cs +++ b/src/Machine/src/Serval.Machine.JobServer/Program.cs @@ -8,7 +8,8 @@ .AddMongoDataAccess() .AddMongoHangfireJobClient() .AddHangfireJobServer() - .AddServalPlatformService() + .AddServalTranslationPlatformService() + .AddServalWordAlignmentPlatformService() .AddClearMLService(); if (builder.Environment.IsDevelopment()) { diff --git a/src/Machine/src/Serval.Machine.JobServer/appsettings.json b/src/Machine/src/Serval.Machine.JobServer/appsettings.json index d5aada0d..95df90ca 100644 --- a/src/Machine/src/Serval.Machine.JobServer/appsettings.json +++ b/src/Machine/src/Serval.Machine.JobServer/appsettings.json @@ -10,16 +10,25 @@ "SmtTransfer", "Nmt" ], + "WordAlignmentEngines": [ + "Statistical" + ], "BuildJob": { "ClearML": [ { - "TranslationEngineType": "Nmt", + "EngineType": "Nmt", "ModelType": "huggingface", "Queue": "jobs_backlog", "DockerImage": "ghcr.io/sillsdev/machine.py:latest" }, { - "TranslationEngineType": "SmtTransfer", + "EngineType": "SmtTransfer", + "ModelType": "thot", + "Queue": "jobs_backlog.cpu_only", + "DockerImage": "ghcr.io/sillsdev/machine.py:latest" + }, + { + "EngineType": "Statistical", "ModelType": "thot", "Queue": "jobs_backlog.cpu_only", "DockerImage": "ghcr.io/sillsdev/machine.py:latest" @@ -30,6 +39,9 @@ "SmtTransferEngine": { "EnginesDir": "/var/lib/machine/engines" }, + "WordAlignmentEngine": { + "EnginesDir": "/var/lib/machine/engines" + }, "ClearML": { "BuildPollingEnabled": false }, diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/ClearMLBuildQueue.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/ClearMLBuildQueue.cs index 53e25245..593424de 100644 --- a/src/Machine/src/Serval.Machine.Shared/Configuration/ClearMLBuildQueue.cs +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/ClearMLBuildQueue.cs @@ -2,7 +2,7 @@ public class ClearMLBuildQueue { - public TranslationEngineType TranslationEngineType { get; set; } + public string EngineType { get; set; } = ""; public string ModelType { get; set; } = ""; public string Queue { get; set; } = "default"; public string DockerImage { get; set; } = ""; diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IEndpointRouteBuilderExtensions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IEndpointRouteBuilderExtensions.cs index 107de6c2..1392a791 100644 --- a/src/Machine/src/Serval.Machine.Shared/Configuration/IEndpointRouteBuilderExtensions.cs +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IEndpointRouteBuilderExtensions.cs @@ -9,4 +9,11 @@ public static IEndpointRouteBuilder MapServalTranslationEngineService(this IEndp return builder; } + + public static IEndpointRouteBuilder MapServalWordAlignmentEngineService(this IEndpointRouteBuilder builder) + { + builder.MapGrpcService(); + + return builder; + } } diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs index 67b8ef3d..1b61bdc5 100644 --- a/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs @@ -1,5 +1,6 @@ using Polly.Extensions.Http; using Serval.Translation.V1; +using Serval.WordAlignment.V1; namespace Microsoft.Extensions.DependencyInjection; @@ -17,6 +18,12 @@ public static IMachineBuilder AddSmtTransferEngineOptions(this IMachineBuilder b return builder; } + public static IMachineBuilder AddWordAlignmentEngineOptions(this IMachineBuilder builder, IConfiguration config) + { + builder.Services.Configure(config); + return builder; + } + public static IMachineBuilder AddClearMLOptions(this IMachineBuilder builder, IConfiguration config) { builder.Services.Configure(config); @@ -168,25 +175,36 @@ public static IMachineBuilder AddMongoHangfireJobClient( public static IMachineBuilder AddHangfireJobServer( this IMachineBuilder builder, - IEnumerable? engineTypes = null + IEnumerable? engineTypes = null ) { - engineTypes ??= - builder.Configuration.GetSection("TranslationEngines").Get() - ?? [TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt]; + engineTypes ??= ( + builder.Configuration.GetSection("TranslationEngines").Get() + ?? [EngineType.SmtTransfer, EngineType.Nmt] + ).Concat( + builder.Configuration.GetSection("WordAlignmentEngines").Get() ?? [EngineType.Statistical] + ); var queues = new List(); - foreach (TranslationEngineType engineType in engineTypes.Distinct()) + foreach (EngineType engineType in engineTypes.Distinct()) { switch (engineType) { - case TranslationEngineType.SmtTransfer: + case EngineType.SmtTransfer: builder.Services.AddSingleton(); - builder.AddThotSmtModel().AddTransferEngine().AddUnigramTruecaser(); + builder.Services.AddHostedService(); + builder.AddThot(); queues.Add("smt_transfer"); break; - case TranslationEngineType.Nmt: + case EngineType.Nmt: queues.Add("nmt"); break; + case EngineType.Statistical: + builder.Services.AddSingleton(); + builder.AddThot(); + queues.Add("statistical"); + break; + default: + throw new ArgumentOutOfRangeException(engineType.ToString()); } } @@ -202,6 +220,7 @@ public static IMachineBuilder AddMemoryDataAccess(this IMachineBuilder builder) builder.Services.AddMemoryDataAccess(o => { o.AddRepository(); + o.AddRepository(); o.AddRepository(); o.AddRepository(); o.AddRepository(); @@ -238,6 +257,23 @@ await c.Indexes.CreateOrUpdateAsync( ); } ); + o.AddRepository( + "word_alignment_engines", + mapSetup: m => m.SetIgnoreExtraElements(true), + init: async c => + { + await c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel( + Builders.IndexKeys.Ascending(e => e.EngineId) + ) + ); + await c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel( + Builders.IndexKeys.Ascending(e => e.CurrentBuild!.BuildJobRunner) + ) + ); + } + ); o.AddRepository("locks"); o.AddRepository( "train_segment_pairs", @@ -263,7 +299,7 @@ await c.Indexes.CreateOrUpdateAsync( return builder; } - public static IMachineBuilder AddServalPlatformService( + public static IMachineBuilder AddServalTranslationPlatformService( this IMachineBuilder builder, string? connectionString = null ) @@ -272,9 +308,9 @@ public static IMachineBuilder AddServalPlatformService( if (connectionString is null) throw new InvalidOperationException("Serval connection string is required"); - builder.Services.AddScoped(); + builder.Services.AddScoped(); - builder.Services.AddSingleton(); + builder.Services.AddSingleton(); builder.Services.AddScoped(); @@ -309,10 +345,67 @@ public static IMachineBuilder AddServalPlatformService( new MethodName { Service = "serval.translation.v1.TranslationPlatformApi", - Method = "UpdateBuildStatus" + Method = "UpdateTranslationBuildStatus" } } + } + } + }; + }); + + return builder; + } + + public static IMachineBuilder AddServalWordAlignmentPlatformService( + this IMachineBuilder builder, + string? connectionString = null + ) + { + connectionString ??= builder.Configuration.GetConnectionString("Serval"); + if (connectionString is null) + throw new InvalidOperationException("Serval connection string is required"); + + builder.Services.AddScoped(); + + builder.Services.AddSingleton(); + + builder.Services.AddScoped(); + + builder + .Services.AddGrpcClient(o => + { + o.Address = new Uri(connectionString); + }) + .ConfigureChannel(o => + { + o.MaxRetryAttempts = null; + o.ServiceConfig = new ServiceConfig + { + MethodConfigs = + { + new MethodConfig + { + Names = { MethodName.Default }, + RetryPolicy = new Grpc.Net.Client.Configuration.RetryPolicy + { + MaxAttempts = 10, + InitialBackoff = TimeSpan.FromSeconds(1), + MaxBackoff = TimeSpan.FromSeconds(5), + BackoffMultiplier = 1.5, + RetryableStatusCodes = { StatusCode.Unavailable } + } }, + new MethodConfig + { + Names = + { + new MethodName + { + Service = "serval.word_alignment.v1.WordAlignmentPlatformApi", + Method = "UpdateWordAlignmentBuildStatus" + } + } + } } }; }); @@ -323,7 +416,7 @@ public static IMachineBuilder AddServalPlatformService( public static IMachineBuilder AddServalTranslationEngineService( this IMachineBuilder builder, string? connectionString = null, - IEnumerable? engineTypes = null + IEnumerable? engineTypes = null ) { builder.Services.AddGrpc(options => @@ -332,37 +425,85 @@ public static IMachineBuilder AddServalTranslationEngineService( options.Interceptors.Add(); options.Interceptors.Add(); }); - builder.AddServalPlatformService(connectionString); engineTypes ??= - builder.Configuration.GetSection("TranslationEngines").Get() - ?? [TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt]; - foreach (TranslationEngineType engineType in engineTypes.Distinct()) + builder.Configuration.GetSection("TranslationEngines").Get() + ?? [EngineType.SmtTransfer, EngineType.Nmt]; + foreach (EngineType engineType in engineTypes.Distinct()) { switch (engineType) { - case TranslationEngineType.SmtTransfer: - builder.Services.AddSingleton(); - builder.Services.AddHostedService(); - builder.AddThotSmtModel().AddTransferEngine().AddUnigramTruecaser(); + case EngineType.SmtTransfer: + builder.AddThot(); builder.Services.AddScoped(); break; - case TranslationEngineType.Nmt: + case EngineType.Nmt: builder.Services.AddScoped(); break; + default: + throw new ArgumentOutOfRangeException(engineType.ToString()); + } + } + + return builder; + } + + public static IMachineBuilder AddServalWordAlignmentEngineService( + this IMachineBuilder builder, + string? connectionString = null, + IEnumerable? engineTypes = null + ) + { + builder.Services.AddGrpc(options => + { + options.Interceptors.Add(); + options.Interceptors.Add(); + options.Interceptors.Add(); + }); + + engineTypes ??= + builder.Configuration.GetSection("WordAlignmentEngines").Get() ?? [EngineType.Statistical]; + + foreach (EngineType engineType in engineTypes.Distinct()) + { + switch (engineType) + { + case EngineType.Statistical: + builder.Services.AddSingleton(); + builder.AddThot(); + builder.Services.AddScoped(); + break; + default: + throw new ArgumentOutOfRangeException(engineType.ToString()); } } return builder; } - public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, string? smtTransferEngineDir = null) + public static IMachineBuilder AddThot(this IMachineBuilder builder) { - builder.Services.AddScoped(); + try + { + builder.AddThotSmtModel().AddTransferEngine().AddUnigramTruecaser(); + } + catch (ArgumentException) + { + // if this has already been run, don't run it again + } + return builder; + } + + public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder) + { + builder.Services.AddScoped, TranslationBuildJobService>(); + builder.Services.AddScoped, BuildJobService>(); builder.Services.AddScoped(); builder.Services.AddScoped(); builder.Services.AddScoped(); + builder.Services.AddScoped(); + builder.Services.AddSingleton(); builder.Services.AddSingleton(x => x.GetRequiredService()); builder.Services.AddHostedService(p => p.GetRequiredService()); @@ -370,22 +511,24 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, s builder.Services.AddScoped(); builder.Services.AddScoped(); builder.Services.AddScoped(); - - if (smtTransferEngineDir is null) - { - var smtTransferEngineOptions = new SmtTransferEngineOptions(); - builder.Configuration.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions); - smtTransferEngineDir = smtTransferEngineOptions.EnginesDir; - } - string? driveLetter = Path.GetPathRoot(smtTransferEngineDir)?[..1]; - if (driveLetter is null) - throw new InvalidOperationException("SMT Engine directory is required"); + builder.Services.AddScoped(); + + var smtTransferEngineOptions = new SmtTransferEngineOptions(); + builder.Configuration.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions); + string? smtDriveLetter = Path.GetPathRoot(smtTransferEngineOptions.EnginesDir)?[..1]; + var statisticsEngineOptions = new WordAlignmentEngineOptions(); + builder.Configuration.GetSection(WordAlignmentEngineOptions.Key).Bind(statisticsEngineOptions); + string? statisticsDriveLetter = Path.GetPathRoot(statisticsEngineOptions.EnginesDir)?[..1]; + if (smtDriveLetter is null || statisticsDriveLetter is null) + throw new InvalidOperationException("SMT Engine and Statistical directory is required"); + if (smtDriveLetter != statisticsDriveLetter) + throw new InvalidOperationException("SMT Engine and Statistical directory must be on the same drive"); // add health check for disk storage capacity builder .Services.AddHealthChecks() .AddDiskStorageHealthCheck( - x => x.AddDrive(driveLetter, 1_000), // 1GB - "SMT Engine Storage Capacity", + x => x.AddDrive(smtDriveLetter, 1_000), // 1GB + "SMT and Statistical Engine Storage Capacity", HealthStatus.Degraded ); diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs index 8fcaced4..9c3deb34 100644 --- a/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs @@ -25,6 +25,7 @@ public static IMachineBuilder AddMachine(this IServiceCollection services, IConf builder.AddServiceOptions(configuration.GetSection(ServiceOptions.Key)); builder.AddSharedFileOptions(configuration.GetSection(SharedFileOptions.Key)); builder.AddSmtTransferEngineOptions(configuration.GetSection(SmtTransferEngineOptions.Key)); + builder.AddWordAlignmentEngineOptions(configuration.GetSection(WordAlignmentEngineOptions.Key)); builder.AddClearMLOptions(configuration.GetSection(ClearMLOptions.Key)); builder.AddDistributedReaderWriterLockOptions(configuration.GetSection(DistributedReaderWriterLockOptions.Key)); builder.AddBuildJobOptions(configuration.GetSection(BuildJobOptions.Key)); diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentEngineOptions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentEngineOptions.cs new file mode 100644 index 00000000..fd692afb --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentEngineOptions.cs @@ -0,0 +1,12 @@ +namespace Serval.Machine.Shared.Configuration; + +public class WordAlignmentEngineOptions +{ + public const string Key = "WordAlignmentEngine"; + + public string EnginesDir { get; set; } = "word_alignment_engines"; + public TimeSpan EngineCommitFrequency { get; set; } = TimeSpan.FromMinutes(5); + public TimeSpan InactiveEngineTimeout { get; set; } = TimeSpan.FromMinutes(10); + public TimeSpan SaveModelTimeout { get; set; } = TimeSpan.FromMinutes(5); + public TimeSpan EngineCommitTimeout { get; set; } = TimeSpan.FromMinutes(2); +} diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentModelOptions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentModelOptions.cs new file mode 100644 index 00000000..ec0223df --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Configuration/WordAlignmentModelOptions.cs @@ -0,0 +1,14 @@ +namespace Serval.Machine.Shared.Configuration; + +public class WordAlignmentModelOptions +{ + public const string Key = "WordAlignmentModel"; + + public WordAlignmentModelOptions() + { + string installDir = Path.GetDirectoryName(Assembly.GetEntryAssembly()!.Location)!; + NewModelFile = Path.Combine(installDir, "thot-new-model.zip"); + } + + public string NewModelFile { get; set; } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Models/ITrainingEngine.cs b/src/Machine/src/Serval.Machine.Shared/Models/ITrainingEngine.cs new file mode 100644 index 00000000..3f7af125 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Models/ITrainingEngine.cs @@ -0,0 +1,11 @@ +namespace Serval.Machine.Shared.Models; + +public interface ITrainingEngine : IEntity +{ + public string EngineId { get; init; } + public EngineType Type { get; init; } + public string SourceLanguage { get; init; } + public string TargetLanguage { get; init; } + public int BuildRevision { get; init; } + public Build? CurrentBuild { get; init; } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Models/TranslationEngine.cs b/src/Machine/src/Serval.Machine.Shared/Models/TranslationEngine.cs index e3143a3c..53fa082b 100644 --- a/src/Machine/src/Serval.Machine.Shared/Models/TranslationEngine.cs +++ b/src/Machine/src/Serval.Machine.Shared/Models/TranslationEngine.cs @@ -1,11 +1,11 @@ namespace Serval.Machine.Shared.Models; -public record TranslationEngine : IEntity +public record TranslationEngine : ITrainingEngine { public string Id { get; set; } = ""; public int Revision { get; set; } = 1; public required string EngineId { get; init; } - public required TranslationEngineType Type { get; init; } + public required EngineType Type { get; init; } public required string SourceLanguage { get; init; } public required string TargetLanguage { get; init; } public required bool IsModelPersisted { get; init; } diff --git a/src/Machine/src/Serval.Machine.Shared/Models/WordAlignment.cs b/src/Machine/src/Serval.Machine.Shared/Models/WordAlignment.cs new file mode 100644 index 00000000..e8181457 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Models/WordAlignment.cs @@ -0,0 +1,12 @@ +namespace Serval.Machine.Shared.Models; + +public record WordAlignment +{ + public required string CorpusId { get; init; } + public required string TextId { get; init; } + public required IReadOnlyList Refs { get; init; } + public required IReadOnlyList SourceTokens { get; set; } + public required IReadOnlyList TargetTokens { get; set; } + public required IReadOnlyList Confidences { get; set; } + public required IReadOnlyList Alignment { get; set; } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Models/WordAlignmentEngine.cs b/src/Machine/src/Serval.Machine.Shared/Models/WordAlignmentEngine.cs new file mode 100644 index 00000000..7739e980 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Models/WordAlignmentEngine.cs @@ -0,0 +1,13 @@ +namespace Serval.Machine.Shared.Models; + +public record WordAlignmentEngine : ITrainingEngine +{ + public string Id { get; set; } = ""; + public int Revision { get; set; } = 1; + public required string EngineId { get; init; } + public required EngineType Type { get; init; } + public required string SourceLanguage { get; init; } + public required string TargetLanguage { get; init; } + public int BuildRevision { get; init; } + public Build? CurrentBuild { get; init; } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj index f9756293..0bf03089 100644 --- a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj +++ b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj @@ -39,6 +39,7 @@ + @@ -49,6 +50,7 @@ + diff --git a/src/Machine/src/Serval.Machine.Shared/Services/BuildJobService.cs b/src/Machine/src/Serval.Machine.Shared/Services/BuildJobService.cs index da670439..4b3bd5f4 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/BuildJobService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/BuildJobService.cs @@ -1,22 +1,24 @@ namespace Serval.Machine.Shared.Services; -public class BuildJobService(IEnumerable runners, IRepository engines) - : IBuildJobService +public class BuildJobService(IEnumerable runners, IRepository engines) + : IBuildJobService + where TEngine : ITrainingEngine { - private readonly Dictionary _runners = runners.ToDictionary(r => r.Type); - private readonly IRepository _engines = engines; + // TODO: make some sort of service to get the engine repos. + protected readonly Dictionary Runners = runners.ToDictionary(r => r.Type); + protected readonly IRepository Engines = engines; public Task IsEngineBuilding(string engineId, CancellationToken cancellationToken = default) { - return _engines.ExistsAsync(e => e.EngineId == engineId && e.CurrentBuild != null, cancellationToken); + return Engines.ExistsAsync(e => e.EngineId == engineId && e.CurrentBuild != null, cancellationToken); } - public Task> GetBuildingEnginesAsync( + public async Task> GetBuildingEnginesAsync( BuildJobRunnerType runner, CancellationToken cancellationToken = default ) { - return _engines.GetAllAsync( + return await Engines.GetAllAsync( e => e.CurrentBuild != null && e.CurrentBuild.BuildJobRunner == runner, cancellationToken ); @@ -28,7 +30,7 @@ public Task> GetBuildingEnginesAsync( CancellationToken cancellationToken = default ) { - TranslationEngine? engine = await _engines.GetAsync( + TEngine? engine = await Engines.GetAsync( e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.BuildId == buildId, cancellationToken ); @@ -41,25 +43,25 @@ public async Task CreateEngineAsync( CancellationToken cancellationToken = default ) { - foreach (BuildJobRunnerType runnerType in _runners.Keys) + foreach (BuildJobRunnerType runnerType in Runners.Keys) { - IBuildJobRunner runner = _runners[runnerType]; + IBuildJobRunner runner = Runners[runnerType]; await runner.CreateEngineAsync(engineId, name, cancellationToken); } } public async Task DeleteEngineAsync(string engineId, CancellationToken cancellationToken = default) { - foreach (BuildJobRunnerType runnerType in _runners.Keys) + foreach (BuildJobRunnerType runnerType in Runners.Keys) { - IBuildJobRunner runner = _runners[runnerType]; + IBuildJobRunner runner = Runners[runnerType]; await runner.DeleteEngineAsync(engineId, cancellationToken); } } public async Task StartBuildJobAsync( BuildJobRunnerType runnerType, - TranslationEngineType engineType, + EngineType engineType, string engineId, string buildId, BuildStage stage, @@ -68,7 +70,7 @@ public async Task StartBuildJobAsync( CancellationToken cancellationToken = default ) { - IBuildJobRunner runner = _runners[runnerType]; + IBuildJobRunner runner = Runners[runnerType]; string jobId = await runner.CreateJobAsync( engineType, engineId, @@ -80,7 +82,7 @@ public async Task StartBuildJobAsync( ); try { - TranslationEngine? engine = await _engines.UpdateAsync( + TEngine? engine = await Engines.UpdateAsync( e => e.EngineId == engineId && ( @@ -121,18 +123,17 @@ public async Task StartBuildJobAsync( } } - public async Task<(string? BuildId, BuildJobState State)> CancelBuildJobAsync( + public virtual async Task<(string? BuildId, BuildJobState State)> CancelBuildJobAsync( string engineId, CancellationToken cancellationToken = default ) { // cancel a job that hasn't started yet - TranslationEngine? engine = await _engines.UpdateAsync( + TEngine? engine = await Engines.UpdateAsync( e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.JobState == BuildJobState.Pending, u => { u.Unset(b => b.CurrentBuild); - u.Set(e => e.CollectTrainSegmentPairs, false); }, returnOriginal: true, cancellationToken: cancellationToken @@ -140,20 +141,20 @@ public async Task StartBuildJobAsync( if (engine is not null && engine.CurrentBuild is not null) { // job will be deleted from the queue - IBuildJobRunner runner = _runners[engine.CurrentBuild.BuildJobRunner]; + IBuildJobRunner runner = Runners[engine.CurrentBuild.BuildJobRunner]; await runner.StopJobAsync(engine.CurrentBuild.JobId, CancellationToken.None); return (engine.CurrentBuild.BuildId, BuildJobState.None); } // cancel a job that is already running - engine = await _engines.UpdateAsync( + engine = await Engines.UpdateAsync( e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.JobState == BuildJobState.Active, u => u.Set(e => e.CurrentBuild!.JobState, BuildJobState.Canceling), cancellationToken: cancellationToken ); if (engine is not null && engine.CurrentBuild is not null) { - IBuildJobRunner runner = _runners[engine.CurrentBuild.BuildJobRunner]; + IBuildJobRunner runner = Runners[engine.CurrentBuild.BuildJobRunner]; await runner.StopJobAsync(engine.CurrentBuild.JobId, CancellationToken.None); return (engine.CurrentBuild.BuildId, BuildJobState.Canceling); } @@ -167,7 +168,7 @@ public async Task BuildJobStartedAsync( CancellationToken cancellationToken = default ) { - TranslationEngine? engine = await _engines.UpdateAsync( + TEngine? engine = await Engines.UpdateAsync( e => e.EngineId == engineId && e.CurrentBuild != null @@ -179,19 +180,18 @@ public async Task BuildJobStartedAsync( return engine is not null; } - public Task BuildJobFinishedAsync( + public virtual Task BuildJobFinishedAsync( string engineId, string buildId, bool buildComplete, CancellationToken cancellationToken = default ) { - return _engines.UpdateAsync( + return Engines.UpdateAsync( e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.BuildId == buildId, u => { u.Unset(e => e.CurrentBuild); - u.Set(e => e.CollectTrainSegmentPairs, false); if (buildComplete) u.Inc(e => e.BuildRevision); }, @@ -201,7 +201,7 @@ public Task BuildJobFinishedAsync( public Task BuildJobRestartingAsync(string engineId, string buildId, CancellationToken cancellationToken = default) { - return _engines.UpdateAsync( + return Engines.UpdateAsync( e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.BuildId == buildId, u => u.Set(e => e.CurrentBuild!.JobState, BuildJobState.Pending), cancellationToken: cancellationToken diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLBuildJobRunner.cs b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLBuildJobRunner.cs index 910dd957..5f148043 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLBuildJobRunner.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLBuildJobRunner.cs @@ -7,11 +7,12 @@ IOptionsMonitor options ) : IBuildJobRunner { private readonly IClearMLService _clearMLService = clearMLService; - private readonly Dictionary _buildJobFactories = + private readonly Dictionary _buildJobFactories = buildJobFactories.ToDictionary(f => f.EngineType); - private readonly Dictionary _options = - options.CurrentValue.ClearML.ToDictionary(o => o.TranslationEngineType); + private readonly Dictionary _options = options.CurrentValue.ClearML.ToDictionary(o => + Enum.Parse(o.EngineType) + ); public BuildJobRunnerType Type => BuildJobRunnerType.ClearML; @@ -32,7 +33,7 @@ public async Task DeleteEngineAsync(string engineId, CancellationToken cancellat } public async Task CreateJobAsync( - TranslationEngineType engineType, + EngineType engineType, string engineId, string buildId, BuildStage stage, @@ -74,7 +75,7 @@ public Task DeleteJobAsync(string jobId, CancellationToken cancellationTok public Task EnqueueJobAsync( string jobId, - TranslationEngineType engineType, + EngineType engineType, CancellationToken cancellationToken = default ) { diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLMonitorService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLMonitorService.cs index c14be661..b073befc 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLMonitorService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLMonitorService.cs @@ -26,45 +26,68 @@ ILogger logger private readonly ILogger _logger = logger; private readonly Dictionary _curBuildStatus = new(); - private readonly IReadOnlyDictionary _queuePerEngineType = - buildJobOptions.CurrentValue.ClearML.ToDictionary(x => x.TranslationEngineType, x => x.Queue); + private readonly IReadOnlyDictionary _queuePerEngineType = + buildJobOptions.CurrentValue.ClearML.ToDictionary(x => x.EngineType, x => x.Queue); - private readonly IDictionary _queueSizePerEngineType = new ConcurrentDictionary< - TranslationEngineType, - int - >(buildJobOptions.CurrentValue.ClearML.ToDictionary(x => x.TranslationEngineType, x => 0)); + private readonly IDictionary _queueSizePerEngineType = new ConcurrentDictionary( + buildJobOptions.CurrentValue.ClearML.ToDictionary(x => x.EngineType, x => 0) + ); - public int GetQueueSize(TranslationEngineType engineType) + public int GetQueueSize(EngineType engineType) { - return _queueSizePerEngineType[engineType]; + return _queueSizePerEngineType[engineType.ToString()]; } protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken cancellationToken) + { + await MonitorClearMLTasksPerDomain(scope, cancellationToken); + } + + private async Task MonitorClearMLTasksPerDomain(IServiceScope scope, CancellationToken cancellationToken) { try { - var buildJobService = scope.ServiceProvider.GetRequiredService(); - IReadOnlyList trainingEngines = await buildJobService.GetBuildingEnginesAsync( - BuildJobRunnerType.ClearML, - cancellationToken - ); - if (trainingEngines.Count == 0) + var translationBuildJobService = scope.ServiceProvider.GetRequiredService< + IBuildJobService + >(); + var wordAlignmentBuildJobService = scope.ServiceProvider.GetRequiredService< + IBuildJobService + >(); + + Dictionary engineToBuildServiceDict = ( + await translationBuildJobService.GetBuildingEnginesAsync(BuildJobRunnerType.ClearML, cancellationToken) + ).ToDictionary(e => (ITrainingEngine)e, e => (IBuildJobServiceBase)translationBuildJobService); + + foreach ( + var engine in await wordAlignmentBuildJobService.GetBuildingEnginesAsync( + BuildJobRunnerType.ClearML, + cancellationToken + ) + ) + { + engineToBuildServiceDict[engine] = wordAlignmentBuildJobService; + } + + if (engineToBuildServiceDict.Count == 0) return; Dictionary tasks = ( await _clearMLService.GetTasksByIdAsync( - trainingEngines.Select(e => e.CurrentBuild!.JobId), + engineToBuildServiceDict.Select(e => e.Key.CurrentBuild!.JobId), cancellationToken ) ).ToDictionary(t => t.Id); - Dictionary> queuePositionsPerEngineType = new(); + Dictionary> queuePositionsPerEngineType = new(); - foreach ((TranslationEngineType engineType, string queueName) in _queuePerEngineType) + foreach ((string engineType, string queueName) in _queuePerEngineType) { var tasksPerEngineType = tasks .Where(kvp => - trainingEngines.Where(te => te.CurrentBuild?.JobId == kvp.Key).FirstOrDefault()?.Type - == engineType + engineToBuildServiceDict + .Where(te => te.Key.CurrentBuild?.JobId == kvp.Key) + .FirstOrDefault() + .Key?.Type + .ToString() == engineType ) .Select(kvp => kvp.Value) .UnionBy(await _clearMLService.GetTasksForQueueAsync(queueName, cancellationToken), t => t.Id) @@ -80,9 +103,12 @@ await _clearMLService.GetTasksByIdAsync( } var dataAccessContext = scope.ServiceProvider.GetRequiredService(); - var platformService = scope.ServiceProvider.GetRequiredService(); - foreach (TranslationEngine engine in trainingEngines) + var platformServices = scope.ServiceProvider.GetRequiredService>(); + foreach (ITrainingEngine engine in engineToBuildServiceDict.Keys) { + var platformService = platformServices.First(ps => + ps.EngineGroup == EngineTypeMethods.ToEngineGroup(engine.Type) + ); if (engine.CurrentBuild is null || !tasks.TryGetValue(engine.CurrentBuild.JobId, out ClearMLTask? task)) continue; @@ -96,7 +122,7 @@ await UpdateTrainJobStatus( engine.CurrentBuild.BuildId, new ProgressStatus(step: 0, percentCompleted: 0.0), //CurrentBuild.BuildId should always equal the corresponding task.Name - queuePositionsPerEngineType[engine.Type][engine.CurrentBuild.BuildId] + 1, + queuePositionsPerEngineType[engine.Type.ToString()][engine.CurrentBuild.BuildId] + 1, cancellationToken ); } @@ -114,7 +140,7 @@ or ClearMLTaskStatus.Completed { bool canceled = !await TrainJobStartedAsync( dataAccessContext, - buildJobService, + engineToBuildServiceDict[engine], platformService, engine.EngineId, engine.CurrentBuild.BuildId, @@ -153,7 +179,7 @@ await UpdateTrainJobStatus( cancellationToken ); bool canceling = !await TrainJobCompletedAsync( - buildJobService, + engineToBuildServiceDict[engine], engine.Type, engine.EngineId, engine.CurrentBuild.BuildId, @@ -166,7 +192,7 @@ await UpdateTrainJobStatus( { await TrainJobCanceledAsync( dataAccessContext, - buildJobService, + engineToBuildServiceDict[engine], platformService, engine.EngineId, engine.CurrentBuild.BuildId, @@ -180,7 +206,7 @@ await TrainJobCanceledAsync( { await TrainJobCanceledAsync( dataAccessContext, - buildJobService, + engineToBuildServiceDict[engine], platformService, engine.EngineId, engine.CurrentBuild.BuildId, @@ -193,7 +219,7 @@ await TrainJobCanceledAsync( { await TrainJobFaultedAsync( dataAccessContext, - buildJobService, + engineToBuildServiceDict[engine], platformService, engine.EngineId, engine.CurrentBuild.BuildId, @@ -214,7 +240,7 @@ await TrainJobFaultedAsync( private async Task TrainJobStartedAsync( IDataAccessContext dataAccessContext, - IBuildJobService buildJobService, + IBuildJobServiceBase buildJobService, IPlatformService platformService, string engineId, string buildId, @@ -237,8 +263,8 @@ private async Task TrainJobStartedAsync( } private async Task TrainJobCompletedAsync( - IBuildJobService buildJobService, - TranslationEngineType engineType, + IBuildJobServiceBase buildJobService, + EngineType engineType, string engineId, string buildId, int corpusSize, @@ -268,7 +294,7 @@ CancellationToken cancellationToken private async Task TrainJobFaultedAsync( IDataAccessContext dataAccessContext, - IBuildJobService buildJobService, + IBuildJobServiceBase buildJobService, IPlatformService platformService, string engineId, string buildId, @@ -301,7 +327,7 @@ await buildJobService.BuildJobFinishedAsync( private async Task TrainJobCanceledAsync( IDataAccessContext dataAccessContext, - IBuildJobService buildJobService, + IBuildJobServiceBase buildJobService, IPlatformService platformService, string engineId, string buildId, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/EngineType.cs b/src/Machine/src/Serval.Machine.Shared/Services/EngineType.cs new file mode 100644 index 00000000..c206d27a --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/EngineType.cs @@ -0,0 +1,28 @@ +namespace Serval.Machine.Shared.Services; + +public enum EngineType +{ + SmtTransfer, + Nmt, + Statistical +} + +public enum EngineGroup +{ + Translation, + WordAlignment +} + +public static class EngineTypeMethods +{ + public static EngineGroup ToEngineGroup(EngineType engineType) + { + return engineType switch + { + EngineType.SmtTransfer => EngineGroup.Translation, + EngineType.Nmt => EngineGroup.Translation, + EngineType.Statistical => EngineGroup.WordAlignment, + _ => throw new ArgumentOutOfRangeException(nameof(engineType), engineType, null) + }; + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/HangfireBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/HangfireBuildJob.cs index 13fc9add..f9d6e442 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/HangfireBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/HangfireBuildJob.cs @@ -1,12 +1,13 @@ namespace Serval.Machine.Shared.Services; -public abstract class HangfireBuildJob( +public abstract class HangfireBuildJob( IPlatformService platformService, - IRepository engines, + IRepository engines, IDataAccessContext dataAccessContext, - IBuildJobService buildJobService, - ILogger logger -) : HangfireBuildJob(platformService, engines, dataAccessContext, buildJobService, logger) + IBuildJobService buildJobService, + ILogger> logger +) : HangfireBuildJob(platformService, engines, dataAccessContext, buildJobService, logger) + where TEngine : ITrainingEngine { public virtual Task RunAsync( string engineId, @@ -19,24 +20,25 @@ CancellationToken cancellationToken } } -public abstract class HangfireBuildJob( +public abstract class HangfireBuildJob( IPlatformService platformService, - IRepository engines, + IRepository engines, IDataAccessContext dataAccessContext, - IBuildJobService buildJobService, - ILogger> logger + IBuildJobService buildJobService, + ILogger> logger ) + where TEngine : ITrainingEngine { protected IPlatformService PlatformService { get; } = platformService; - protected IRepository Engines { get; } = engines; + protected IRepository Engines { get; } = engines; protected IDataAccessContext DataAccessContext { get; } = dataAccessContext; - protected IBuildJobService BuildJobService { get; } = buildJobService; - protected ILogger> Logger { get; } = logger; + protected IBuildJobService BuildJobService { get; } = buildJobService; + protected ILogger> Logger { get; } = logger; public virtual async Task RunAsync( string engineId, string buildId, - T data, + TData data, string? buildOptions, CancellationToken cancellationToken ) @@ -56,7 +58,7 @@ CancellationToken cancellationToken catch (OperationCanceledException) { // Check if the cancellation was initiated by an API call or a shutdown. - TranslationEngine? engine = await Engines.GetAsync( + TEngine? engine = await Engines.GetAsync( e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.BuildId == buildId, CancellationToken.None ); @@ -123,7 +125,12 @@ await BuildJobService.BuildJobFinishedAsync( } } - protected virtual Task InitializeAsync(string engineId, string buildId, T data, CancellationToken cancellationToken) + protected virtual Task InitializeAsync( + string engineId, + string buildId, + TData data, + CancellationToken cancellationToken + ) { return Task.CompletedTask; } @@ -131,12 +138,17 @@ protected virtual Task InitializeAsync(string engineId, string buildId, T data, protected abstract Task DoWorkAsync( string engineId, string buildId, - T data, + TData data, string? buildOptions, CancellationToken cancellationToken ); - protected virtual Task CleanupAsync(string engineId, string buildId, T data, JobCompletionStatus completionStatus) + protected virtual Task CleanupAsync( + string engineId, + string buildId, + TData data, + JobCompletionStatus completionStatus + ) { return Task.CompletedTask; } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/HangfireBuildJobRunner.cs b/src/Machine/src/Serval.Machine.Shared/Services/HangfireBuildJobRunner.cs index d5be7f30..fc91536a 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/HangfireBuildJobRunner.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/HangfireBuildJobRunner.cs @@ -5,14 +5,15 @@ public class HangfireBuildJobRunner( IEnumerable buildJobFactories ) : IBuildJobRunner { - public static Job CreateJob( + public static Job CreateJob( string engineId, string buildId, string queue, object? data, string? buildOptions ) - where TJob : HangfireBuildJob + where TEngine : ITrainingEngine + where TJob : HangfireBuildJob { ArgumentNullException.ThrowIfNull(data); // Token "None" is used here because hangfire injects the proper cancellation token @@ -22,8 +23,9 @@ public static Job CreateJob( ); } - public static Job CreateJob(string engineId, string buildId, string queue, string? buildOptions) - where TJob : HangfireBuildJob + public static Job CreateJob(string engineId, string buildId, string queue, string? buildOptions) + where TEngine : ITrainingEngine + where TJob : HangfireBuildJob { // Token "None" is used here because hangfire injects the proper cancellation token return Job.FromExpression( @@ -33,7 +35,7 @@ public static Job CreateJob(string engineId, string buildId, string queue, } private readonly IBackgroundJobClient _jobClient = jobClient; - private readonly Dictionary _buildJobFactories = + private readonly Dictionary _buildJobFactories = buildJobFactories.ToDictionary(f => f.EngineType); public BuildJobRunnerType Type => BuildJobRunnerType.Hangfire; @@ -49,7 +51,7 @@ public Task DeleteEngineAsync(string engineId, CancellationToken cancellationTok } public Task CreateJobAsync( - TranslationEngineType engineType, + EngineType engineType, string engineId, string buildId, BuildStage stage, @@ -70,7 +72,7 @@ public Task DeleteJobAsync(string jobId, CancellationToken cancellationTok public Task EnqueueJobAsync( string jobId, - TranslationEngineType engineType, + EngineType engineType, CancellationToken cancellationToken = default ) { diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobRunner.cs b/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobRunner.cs index 6f6d3696..0c04cbde 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobRunner.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobRunner.cs @@ -8,7 +8,7 @@ public interface IBuildJobRunner Task DeleteEngineAsync(string engineId, CancellationToken cancellationToken = default); Task CreateJobAsync( - TranslationEngineType engineType, + EngineType engineType, string engineId, string buildId, BuildStage stage, @@ -19,11 +19,7 @@ Task CreateJobAsync( Task DeleteJobAsync(string jobId, CancellationToken cancellationToken = default); - Task EnqueueJobAsync( - string jobId, - TranslationEngineType engineType, - CancellationToken cancellationToken = default - ); + Task EnqueueJobAsync(string jobId, EngineType engineType, CancellationToken cancellationToken = default); Task StopJobAsync(string jobId, CancellationToken cancellationToken = default); } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobService.cs b/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobService.cs index 61c6122e..f72ce755 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobService.cs @@ -1,42 +1,10 @@ namespace Serval.Machine.Shared.Services; -public interface IBuildJobService +public interface IBuildJobService : IBuildJobServiceBase + where TEngine : ITrainingEngine { - Task> GetBuildingEnginesAsync( + Task> GetBuildingEnginesAsync( BuildJobRunnerType runner, CancellationToken cancellationToken = default ); - - Task IsEngineBuilding(string engineId, CancellationToken cancellationToken = default); - - Task CreateEngineAsync(string engineId, string? name = null, CancellationToken cancellationToken = default); - - Task DeleteEngineAsync(string engineId, CancellationToken cancellationToken = default); - - Task StartBuildJobAsync( - BuildJobRunnerType runnerType, - TranslationEngineType engineType, - string engineId, - string buildId, - BuildStage stage, - object? data = default, - string? buildOptions = default, - CancellationToken cancellationToken = default - ); - - Task<(string? BuildId, BuildJobState State)> CancelBuildJobAsync( - string engineId, - CancellationToken cancellationToken = default - ); - - Task BuildJobStartedAsync(string engineId, string buildId, CancellationToken cancellationToken = default); - - Task BuildJobFinishedAsync( - string engineId, - string buildId, - bool buildComplete, - CancellationToken cancellationToken = default - ); - - Task BuildJobRestartingAsync(string engineId, string buildId, CancellationToken cancellationToken = default); } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobServiceBase.cs b/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobServiceBase.cs new file mode 100644 index 00000000..16e771ce --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/IBuildJobServiceBase.cs @@ -0,0 +1,37 @@ +namespace Serval.Machine.Shared.Services; + +public interface IBuildJobServiceBase +{ + Task IsEngineBuilding(string engineId, CancellationToken cancellationToken = default); + + Task CreateEngineAsync(string engineId, string? name = null, CancellationToken cancellationToken = default); + + Task DeleteEngineAsync(string engineId, CancellationToken cancellationToken = default); + + Task StartBuildJobAsync( + BuildJobRunnerType runnerType, + EngineType engineType, + string engineId, + string buildId, + BuildStage stage, + object? data = default, + string? buildOptions = default, + CancellationToken cancellationToken = default + ); + + Task<(string? BuildId, BuildJobState State)> CancelBuildJobAsync( + string engineId, + CancellationToken cancellationToken = default + ); + + Task BuildJobStartedAsync(string engineId, string buildId, CancellationToken cancellationToken = default); + + Task BuildJobFinishedAsync( + string engineId, + string buildId, + bool buildComplete, + CancellationToken cancellationToken = default + ); + + Task BuildJobRestartingAsync(string engineId, string buildId, CancellationToken cancellationToken = default); +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IClearMLBuildJobFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/IClearMLBuildJobFactory.cs index bb5afc57..fe265fc6 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/IClearMLBuildJobFactory.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/IClearMLBuildJobFactory.cs @@ -2,7 +2,7 @@ public interface IClearMLBuildJobFactory { - TranslationEngineType EngineType { get; } + EngineType EngineType { get; } Task CreateJobScriptAsync( string engineId, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IClearMLQueueService.cs b/src/Machine/src/Serval.Machine.Shared/Services/IClearMLQueueService.cs index 1e2425a4..714f227d 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/IClearMLQueueService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/IClearMLQueueService.cs @@ -2,5 +2,5 @@ public interface IClearMLQueueService { - public int GetQueueSize(TranslationEngineType engineType); + public int GetQueueSize(EngineType engineType); } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IHangfireBuildJobFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/IHangfireBuildJobFactory.cs index faabcfec..e57ac8c5 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/IHangfireBuildJobFactory.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/IHangfireBuildJobFactory.cs @@ -2,7 +2,7 @@ public interface IHangfireBuildJobFactory { - TranslationEngineType EngineType { get; } + EngineType EngineType { get; } Job CreateJob(string engineId, string buildId, BuildStage stage, object? data, string? buildOptions); } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IModelFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/IModelFactory.cs new file mode 100644 index 00000000..61ec1489 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/IModelFactory.cs @@ -0,0 +1,15 @@ +namespace Serval.Machine.Shared.Services; + +public interface IModelFactory +{ + ITrainer CreateTrainer( + string engineDir, + IRangeTokenizer tokenizer, + IParallelTextCorpus corpus + ); + + void InitNew(string engineDir); + void Cleanup(string engineDir); + Task UpdateEngineFromAsync(string engineDir, Stream source, CancellationToken cancellationToken = default); + Task SaveEngineToAsync(string engineDir, Stream destination, CancellationToken cancellationToken = default); +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IPlatformService.cs b/src/Machine/src/Serval.Machine.Shared/Services/IPlatformService.cs index 79b30f6b..71161ed1 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/IPlatformService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/IPlatformService.cs @@ -2,6 +2,7 @@ public interface IPlatformService { + EngineGroup EngineGroup { get; } Task IncrementTrainSizeAsync(string engineId, int count = 1, CancellationToken cancellationToken = default); Task UpdateBuildStatusAsync( @@ -21,10 +22,5 @@ Task BuildCompletedAsync( Task BuildCanceledAsync(string buildId, CancellationToken cancellationToken = default); Task BuildFaultedAsync(string buildId, string message, CancellationToken cancellationToken = default); Task BuildRestartingAsync(string buildId, CancellationToken cancellationToken = default); - - Task InsertPretranslationsAsync( - string engineId, - Stream pretranslationsStream, - CancellationToken cancellationToken = default - ); + Task InsertInferencesAsync(string engineId, Stream inferenceStream, CancellationToken cancellationToken = default); } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ITranslationEngineService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ITranslationEngineService.cs index b9e64472..3d4f983a 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ITranslationEngineService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ITranslationEngineService.cs @@ -2,7 +2,7 @@ public interface ITranslationEngineService { - TranslationEngineType Type { get; } + EngineType Type { get; } Task CreateAsync( string engineId, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentEngineService.cs b/src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentEngineService.cs new file mode 100644 index 00000000..feaf4f97 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentEngineService.cs @@ -0,0 +1,34 @@ +namespace Serval.Machine.Shared.Services; + +public interface IWordAlignmentEngineService +{ + EngineType Type { get; } + + Task CreateAsync( + string engineId, + string? engineName, + string sourceLanguage, + string targetLanguage, + CancellationToken cancellationToken = default + ); + Task DeleteAsync(string engineId, CancellationToken cancellationToken = default); + + Task GetBestPhraseAlignmentAsync( + string engineId, + string sourceSegment, + string targetSegment, + CancellationToken cancellationToken = default + ); + + Task StartBuildAsync( + string engineId, + string buildId, + string? buildOptions, + IReadOnlyList corpora, + CancellationToken cancellationToken = default + ); + + Task CancelBuildAsync(string engineId, CancellationToken cancellationToken = default); + + int GetQueueSize(); +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentModelFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentModelFactory.cs new file mode 100644 index 00000000..b1ebe197 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/IWordAlignmentModelFactory.cs @@ -0,0 +1,11 @@ +namespace Serval.Machine.Shared.Services; + +public interface IWordAlignmentModelFactory +{ + IWordAlignmentModel Create(string engineDir); + ITrainer CreateTrainer(string engineDir, ITokenizer tokenizer, IParallelTextCorpus corpus); + void InitNew(string engineDir); + void Cleanup(string engineDir); + Task UpdateEngineFromAsync(string engineDir, Stream source, CancellationToken cancellationToken = default); + Task SaveEngineToAsync(string engineDir, Stream destination, CancellationToken cancellationToken = default); +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ModelCleanupService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ModelCleanupService.cs index 92b38d6a..da638014 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ModelCleanupService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ModelCleanupService.cs @@ -26,12 +26,12 @@ internal async Task CheckModelsAsync(IRepository engines, Can // Get all NMT engine ids from the database IReadOnlyList? allEngines = await engines.GetAllAsync(cancellationToken: cancellationToken); IEnumerable validNmtFilenames = allEngines - .Where(e => e.Type == TranslationEngineType.Nmt) + .Where(e => e.Type == EngineType.Nmt) .Select(e => NmtEngineService.GetModelPath(e.EngineId, e.BuildRevision)); // If there is a currently running build that creates and pushes a new file, but the database has not // updated yet, don't delete the new file. IEnumerable validNmtFilenamesForNextBuild = allEngines - .Where(e => e.Type == TranslationEngineType.Nmt) + .Where(e => e.Type == EngineType.Nmt) .Select(e => NmtEngineService.GetModelPath(e.EngineId, e.BuildRevision + 1)); var filenameFilter = validNmtFilenames.Concat(validNmtFilenamesForNextBuild).ToHashSet(); diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ModelFactoryBase.cs b/src/Machine/src/Serval.Machine.Shared/Services/ModelFactoryBase.cs new file mode 100644 index 00000000..8fea2fbb --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/ModelFactoryBase.cs @@ -0,0 +1,74 @@ +namespace Serval.Machine.Shared.Services; + +public abstract class ModelFactoryBase : IModelFactory +{ + public virtual ITrainer CreateTrainer( + string engineDir, + IRangeTokenizer tokenizer, + IParallelTextCorpus corpus + ) + { + throw new NotImplementedException(); + } + + public virtual void InitNew(string engineDir) + { + throw new NotImplementedException(); + } + + public void Cleanup(string engineDir) + { + if (!Directory.Exists(engineDir)) + return; + DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "lm")); + DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "tm")); + string smtConfigFileName = Path.Combine(engineDir, "smt.cfg"); + if (File.Exists(smtConfigFileName)) + File.Delete(smtConfigFileName); + if (!Directory.EnumerateFileSystemEntries(engineDir).Any()) + Directory.Delete(engineDir); + } + + public async Task UpdateEngineFromAsync( + string engineDir, + Stream source, + CancellationToken cancellationToken = default + ) + { + if (!Directory.Exists(engineDir)) + Directory.CreateDirectory(engineDir); + + await using MemoryStream memoryStream = new(); + await using (GZipStream gzipStream = new(source, CompressionMode.Decompress)) + { + await gzipStream.CopyToAsync(memoryStream, cancellationToken); + } + memoryStream.Seek(0, SeekOrigin.Begin); + await TarFile.ExtractToDirectoryAsync( + memoryStream, + engineDir, + overwriteFiles: true, + cancellationToken: cancellationToken + ); + } + + public async Task SaveEngineToAsync( + string engineDir, + Stream destination, + CancellationToken cancellationToken = default + ) + { + // create zip archive in memory stream + // This cannot be created directly to the shared stream because it all needs to be written at once + await using MemoryStream memoryStream = new(); + await TarFile.CreateFromDirectoryAsync( + engineDir, + memoryStream, + includeBaseDirectory: false, + cancellationToken: cancellationToken + ); + memoryStream.Seek(0, SeekOrigin.Begin); + await using GZipStream gzipStream = new(destination, CompressionMode.Compress); + await memoryStream.CopyToAsync(gzipStream, cancellationToken); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/NmtClearMLBuildJobFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/NmtClearMLBuildJobFactory.cs index 4f465936..ee2b07bc 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/NmtClearMLBuildJobFactory.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/NmtClearMLBuildJobFactory.cs @@ -10,7 +10,7 @@ IRepository engines private readonly ILanguageTagService _languageTagService = languageTagService; private readonly IRepository _engines = engines; - public TranslationEngineType EngineType => TranslationEngineType.Nmt; + public EngineType EngineType => EngineType.Nmt; public async Task CreateJobScriptAsync( string engineId, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/NmtEngineService.cs b/src/Machine/src/Serval.Machine.Shared/Services/NmtEngineService.cs index 0dd66544..dec045f9 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/NmtEngineService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/NmtEngineService.cs @@ -1,19 +1,21 @@ namespace Serval.Machine.Shared.Services; public class NmtEngineService( - IPlatformService platformService, + IEnumerable platformServices, IDataAccessContext dataAccessContext, IRepository engines, - IBuildJobService buildJobService, + IBuildJobService buildJobService, ILanguageTagService languageTagService, IClearMLQueueService clearMLQueueService, ISharedFileService sharedFileService ) : ITranslationEngineService { - private readonly IPlatformService _platformService = platformService; + private readonly IPlatformService _platformService = platformServices.First(ps => + ps.EngineGroup == EngineGroup.Translation + ); private readonly IDataAccessContext _dataAccessContext = dataAccessContext; private readonly IRepository _engines = engines; - private readonly IBuildJobService _buildJobService = buildJobService; + private readonly IBuildJobService _buildJobService = buildJobService; private readonly IClearMLQueueService _clearMLQueueService = clearMLQueueService; private readonly ILanguageTagService _languageTagService = languageTagService; private readonly ISharedFileService _sharedFileService = sharedFileService; @@ -24,7 +26,7 @@ public static string GetModelPath(string engineId, int buildRevision) return $"{ModelDirectory}{engineId}_{buildRevision}.tar.gz"; } - public TranslationEngineType Type => TranslationEngineType.Nmt; + public EngineType Type => EngineType.Nmt; private const int MinutesToExpire = 60; @@ -45,7 +47,7 @@ public async Task CreateAsync( EngineId = engineId, SourceLanguage = sourceLanguage, TargetLanguage = targetLanguage, - Type = TranslationEngineType.Nmt, + Type = EngineType.Nmt, IsModelPersisted = isModelPersisted ?? false // models are not persisted if not specified }; await _engines.InsertAsync(translationEngine, ct); @@ -75,7 +77,7 @@ public async Task StartBuildAsync( { bool building = !await _buildJobService.StartBuildJobAsync( BuildJobRunnerType.Hangfire, - TranslationEngineType.Nmt, + EngineType.Nmt, engineId, buildId, BuildStage.Preprocess, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/NmtHangfireBuildJobFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/NmtHangfireBuildJobFactory.cs index 4d250188..bcfc5014 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/NmtHangfireBuildJobFactory.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/NmtHangfireBuildJobFactory.cs @@ -4,14 +4,14 @@ namespace Serval.Machine.Shared.Services; public class NmtHangfireBuildJobFactory : IHangfireBuildJobFactory { - public TranslationEngineType EngineType => TranslationEngineType.Nmt; + public EngineType EngineType => EngineType.Nmt; public Job CreateJob(string engineId, string buildId, BuildStage stage, object? data, string? buildOptions) { return stage switch { BuildStage.Preprocess - => CreateJob>( + => CreateJob>( engineId, buildId, "nmt", @@ -19,7 +19,13 @@ public Job CreateJob(string engineId, string buildId, BuildStage stage, object? buildOptions ), BuildStage.Postprocess - => CreateJob(engineId, buildId, "nmt", data, buildOptions), + => CreateJob, (int, double)>( + engineId, + buildId, + "nmt", + data, + buildOptions + ), _ => throw new ArgumentException("Unknown build stage.", nameof(stage)), }; } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/NmtPreprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/NmtPreprocessBuildJob.cs index 2e79d09a..26eac7fc 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/NmtPreprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/NmtPreprocessBuildJob.cs @@ -1,17 +1,17 @@ namespace Serval.Machine.Shared.Services; public class NmtPreprocessBuildJob( - IPlatformService platformService, + IEnumerable platformServices, IRepository engines, IDataAccessContext dataAccessContext, ILogger logger, - IBuildJobService buildJobService, + IBuildJobService buildJobService, ISharedFileService sharedFileService, ILanguageTagService languageTagService, IParallelCorpusPreprocessingService parallelCorpusPreprocessingService ) - : PreprocessBuildJob( - platformService, + : PreprocessBuildJob( + platformServices.First(ps => ps.EngineGroup == EngineGroup.Translation), engines, dataAccessContext, logger, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/PostprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/PostprocessBuildJob.cs index c9922e41..7620dce5 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/PostprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/PostprocessBuildJob.cs @@ -1,14 +1,15 @@ namespace Serval.Machine.Shared.Services; -public class PostprocessBuildJob( +public class PostprocessBuildJob( IPlatformService platformService, - IRepository engines, + IRepository engines, IDataAccessContext dataAccessContext, - IBuildJobService buildJobService, - ILogger logger, + IBuildJobService buildJobService, + ILogger> logger, ISharedFileService sharedFileService, IOptionsMonitor options -) : HangfireBuildJob<(int, double)>(platformService, engines, dataAccessContext, buildJobService, logger) +) : HangfireBuildJob(platformService, engines, dataAccessContext, buildJobService, logger) + where TEngine : ITrainingEngine { protected ISharedFileService SharedFileService { get; } = sharedFileService; private readonly BuildJobOptions _buildJobOptions = options.CurrentValue; @@ -30,7 +31,7 @@ CancellationToken cancellationToken ) ) { - await PlatformService.InsertPretranslationsAsync(engineId, pretranslationsStream, cancellationToken); + await PlatformService.InsertInferencesAsync(engineId, pretranslationsStream, cancellationToken); } int additionalCorpusSize = await SaveModelAsync(engineId, buildId); diff --git a/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs index 2d2e06f7..7fdd646e 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/PreprocessBuildJob.cs @@ -1,30 +1,45 @@ namespace Serval.Machine.Shared.Services; -public class PreprocessBuildJob( - IPlatformService platformService, - IRepository engines, - IDataAccessContext dataAccessContext, - ILogger logger, - IBuildJobService buildJobService, - ISharedFileService sharedFileService, - IParallelCorpusPreprocessingService parallelCorpusPreprocessingService -) - : HangfireBuildJob>( - platformService, - engines, - dataAccessContext, - buildJobService, - logger - ) +public class PreprocessBuildJob : HangfireBuildJob> + where TEngine : ITrainingEngine { - private static readonly JsonWriterOptions PretranslateWriterOptions = new() { Indented = true }; + protected static readonly JsonWriterOptions InferenceWriterOptions = new() { Indented = true }; internal BuildJobRunnerType TrainJobRunnerType { get; init; } = BuildJobRunnerType.ClearML; - private readonly ISharedFileService _sharedFileService = sharedFileService; + protected readonly ISharedFileService SharedFileService; + protected readonly IParallelCorpusPreprocessingService ParallelCorpusPreprocessingService; + private int _seed = 1234; + private Random _random; + + public PreprocessBuildJob( + IPlatformService platformService, + IRepository engines, + IDataAccessContext dataAccessContext, + ILogger> logger, + IBuildJobService buildJobService, + ISharedFileService sharedFileService, + IParallelCorpusPreprocessingService parallelCorpusPreprocessingService + ) + : base(platformService, engines, dataAccessContext, buildJobService, logger) + { + SharedFileService = sharedFileService; + this.ParallelCorpusPreprocessingService = parallelCorpusPreprocessingService; + _random = new Random(_seed); + } - private readonly IParallelCorpusPreprocessingService _parallelCorpusPreprocessingService = - parallelCorpusPreprocessingService; + internal int Seed + { + get => _seed; + set + { + if (_seed != value) + { + _seed = value; + _random = new Random(_seed); + } + } + } protected override async Task DoWorkAsync( string engineId, @@ -34,7 +49,7 @@ protected override async Task DoWorkAsync( CancellationToken cancellationToken ) { - TranslationEngine? engine = await Engines.GetAsync(e => e.EngineId == engineId, cancellationToken); + TEngine? engine = await Engines.GetAsync(e => e.EngineId == engineId, cancellationToken); if (engine is null) throw new OperationCanceledException($"Engine {engineId} does not exist. Build canceled."); @@ -55,7 +70,7 @@ CancellationToken cancellationToken { "EngineId", engineId }, { "BuildId", buildId }, { "NumTrainRows", trainCount }, - { "NumPretranslateRows", pretranslateCount }, + { "NumInferenceRows", pretranslateCount }, { "SourceLanguageResolved", srcLang }, { "TargetLanguageResolved", trgLang } }; @@ -83,7 +98,7 @@ CancellationToken cancellationToken throw new OperationCanceledException(); } - private async Task<(int TrainCount, int PretranslateCount)> WriteDataFilesAsync( + protected virtual async Task<(int TrainCount, int InferenceCount)> WriteDataFilesAsync( string buildId, IReadOnlyList corpora, string? buildOptions, @@ -95,19 +110,20 @@ CancellationToken cancellationToken buildOptionsObject = JsonSerializer.Deserialize(buildOptions); await using StreamWriter sourceTrainWriter = - new(await _sharedFileService.OpenWriteAsync($"builds/{buildId}/train.src.txt", cancellationToken)); + new(await SharedFileService.OpenWriteAsync($"builds/{buildId}/train.src.txt", cancellationToken)); await using StreamWriter targetTrainWriter = - new(await _sharedFileService.OpenWriteAsync($"builds/{buildId}/train.trg.txt", cancellationToken)); - await using Stream pretranslateStream = await _sharedFileService.OpenWriteAsync( + new(await SharedFileService.OpenWriteAsync($"builds/{buildId}/train.trg.txt", cancellationToken)); + + await using Stream pretranslateStream = await SharedFileService.OpenWriteAsync( $"builds/{buildId}/pretranslate.src.json", cancellationToken ); - await using Utf8JsonWriter pretranslateWriter = new(pretranslateStream, PretranslateWriterOptions); + await using Utf8JsonWriter pretranslateWriter = new(pretranslateStream, InferenceWriterOptions); int trainCount = 0; int pretranslateCount = 0; pretranslateWriter.WriteStartArray(); - await _parallelCorpusPreprocessingService.PreprocessAsync( + await ParallelCorpusPreprocessingService.PreprocessAsync( corpora, async row => { @@ -156,7 +172,7 @@ JobCompletionStatus completionStatus { try { - await _sharedFileService.DeleteAsync($"builds/{buildId}/"); + await SharedFileService.DeleteAsync($"builds/{buildId}/"); } catch (Exception e) { diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationEngineServiceV1.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationEngineServiceV1.cs index 336d98ae..97390562 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationEngineServiceV1.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationEngineServiceV1.cs @@ -8,8 +8,9 @@ public class ServalTranslationEngineServiceV1(IEnumerable _engineServices = - engineServices.ToDictionary(es => es.Type); + private readonly Dictionary _engineServices = engineServices.ToDictionary( + es => es.Type + ); public override async Task Create(CreateRequest request, ServerCallContext context) { @@ -172,15 +173,19 @@ private ITranslationEngineService GetEngineService(string engineTypeStr) { if (_engineServices.TryGetValue(GetEngineType(engineTypeStr), out ITranslationEngineService? service)) return service; - throw new RpcException(new Status(StatusCode.InvalidArgument, "The engine type is invalid.")); + throw new RpcException( + new Status(StatusCode.InvalidArgument, $"The engine type {engineTypeStr} is not supported.") + ); } - private static TranslationEngineType GetEngineType(string engineTypeStr) + private static EngineType GetEngineType(string engineTypeStr) { engineTypeStr = engineTypeStr[0].ToString().ToUpperInvariant() + engineTypeStr[1..]; - if (System.Enum.TryParse(engineTypeStr, out TranslationEngineType engineType)) + if (System.Enum.TryParse(engineTypeStr, out EngineType engineType)) return engineType; - throw new RpcException(new Status(StatusCode.InvalidArgument, "The engine type is invalid.")); + throw new RpcException( + new Status(StatusCode.InvalidArgument, $"The engine type {engineTypeStr} is not supported.") + ); } private static Translation.V1.TranslationResult Map(SIL.Machine.Translation.TranslationResult source) @@ -306,8 +311,8 @@ private static SIL.ServiceToolkit.Models.MonolingualCorpus Map(Translation.V1.Mo Files = source.Files.Select(Map).ToList(), TrainOnChapters = trainingFilter == FilterChoice.Chapters ? trainOnChapters : null, TrainOnTextIds = trainingFilter == FilterChoice.TextIds ? trainOnTextIds : null, - PretranslateChapters = pretranslateFilter == FilterChoice.Chapters ? pretranslateChapters : null, - PretranslateTextIds = pretranslateFilter == FilterChoice.TextIds ? pretranslateTextIds : null + InferenceChapters = pretranslateFilter == FilterChoice.Chapters ? pretranslateChapters : null, + InferenceTextIds = pretranslateFilter == FilterChoice.TextIds ? pretranslateTextIds : null }; } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxConstants.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformOutboxConstants.cs similarity index 54% rename from src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxConstants.cs rename to src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformOutboxConstants.cs index 493cb9ed..a089d371 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxConstants.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformOutboxConstants.cs @@ -1,14 +1,14 @@ namespace Serval.Machine.Shared.Services; -public static class ServalPlatformOutboxConstants +public static class ServalTranslationPlatformOutboxConstants { - public const string OutboxId = "ServalPlatform"; + public const string OutboxId = "ServalTranslationPlatform"; public const string BuildStarted = "BuildStarted"; public const string BuildCompleted = "BuildCompleted"; public const string BuildCanceled = "BuildCanceled"; public const string BuildFaulted = "BuildFaulted"; public const string BuildRestarting = "BuildRestarting"; - public const string InsertPretranslations = "InsertPretranslations"; - public const string IncrementTranslationEngineCorpusSize = "IncrementTranslationEngineCorpusSize"; + public const string IncrementTrainEngineCorpusSize = "IncrementTrainEngineCorpusSize"; + public const string InsertInferences = "InsertInferences"; } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxMessageHandler.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformOutboxMessageHandler.cs similarity index 74% rename from src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxMessageHandler.cs rename to src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformOutboxMessageHandler.cs index 41132504..4be48091 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformOutboxMessageHandler.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformOutboxMessageHandler.cs @@ -2,14 +2,14 @@ namespace Serval.Machine.Shared.Services; -public class ServalPlatformOutboxMessageHandler(TranslationPlatformApi.TranslationPlatformApiClient client) +public class ServalTranslationPlatformOutboxMessageHandler(TranslationPlatformApi.TranslationPlatformApiClient client) : IOutboxMessageHandler { private readonly TranslationPlatformApi.TranslationPlatformApiClient _client = client; private static readonly JsonSerializerOptions JsonSerializerOptions = new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }; - public string OutboxId => ServalPlatformOutboxConstants.OutboxId; + public string OutboxId => ServalTranslationPlatformOutboxConstants.OutboxId; public async Task HandleMessageAsync( string method, @@ -20,37 +20,37 @@ public async Task HandleMessageAsync( { switch (method) { - case ServalPlatformOutboxConstants.BuildStarted: + case ServalTranslationPlatformOutboxConstants.BuildStarted: await _client.BuildStartedAsync( JsonSerializer.Deserialize(content!), cancellationToken: cancellationToken ); break; - case ServalPlatformOutboxConstants.BuildCompleted: + case ServalTranslationPlatformOutboxConstants.BuildCompleted: await _client.BuildCompletedAsync( JsonSerializer.Deserialize(content!), cancellationToken: cancellationToken ); break; - case ServalPlatformOutboxConstants.BuildCanceled: + case ServalTranslationPlatformOutboxConstants.BuildCanceled: await _client.BuildCanceledAsync( JsonSerializer.Deserialize(content!), cancellationToken: cancellationToken ); break; - case ServalPlatformOutboxConstants.BuildFaulted: + case ServalTranslationPlatformOutboxConstants.BuildFaulted: await _client.BuildFaultedAsync( JsonSerializer.Deserialize(content!), cancellationToken: cancellationToken ); break; - case ServalPlatformOutboxConstants.BuildRestarting: + case ServalTranslationPlatformOutboxConstants.BuildRestarting: await _client.BuildRestartingAsync( JsonSerializer.Deserialize(content!), cancellationToken: cancellationToken ); break; - case ServalPlatformOutboxConstants.InsertPretranslations: + case ServalTranslationPlatformOutboxConstants.InsertInferences: IAsyncEnumerable pretranslations = JsonSerializer .DeserializeAsyncEnumerable( contentStream!, @@ -59,12 +59,12 @@ await _client.BuildRestartingAsync( ) .OfType(); - using (var call = _client.InsertPretranslations(cancellationToken: cancellationToken)) + using (var call = _client.InsertInferences(cancellationToken: cancellationToken)) { await foreach (Pretranslation pretranslation in pretranslations) { await call.RequestStream.WriteAsync( - new InsertPretranslationsRequest + new InsertInferencesRequest { EngineId = content!, CorpusId = pretranslation.CorpusId, @@ -79,9 +79,9 @@ await call.RequestStream.WriteAsync( await call; } break; - case ServalPlatformOutboxConstants.IncrementTranslationEngineCorpusSize: - await _client.IncrementTranslationEngineCorpusSizeAsync( - JsonSerializer.Deserialize(content!), + case ServalTranslationPlatformOutboxConstants.IncrementTrainEngineCorpusSize: + await _client.IncrementTrainEngineCorpusSizeAsync( + JsonSerializer.Deserialize(content!), cancellationToken: cancellationToken ); break; diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformService.cs similarity index 77% rename from src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformService.cs rename to src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformService.cs index 429fbb72..155dc4fb 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ServalPlatformService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalTranslationPlatformService.cs @@ -2,19 +2,20 @@ namespace Serval.Machine.Shared.Services; -public class ServalPlatformService( +public class ServalTranslationPlatformService( TranslationPlatformApi.TranslationPlatformApiClient client, IMessageOutboxService outboxService ) : IPlatformService { + EngineGroup IPlatformService.EngineGroup => EngineGroup.Translation; private readonly TranslationPlatformApi.TranslationPlatformApiClient _client = client; private readonly IMessageOutboxService _outboxService = outboxService; public async Task BuildStartedAsync(string buildId, CancellationToken cancellationToken = default) { await _outboxService.EnqueueMessageAsync( - ServalPlatformOutboxConstants.OutboxId, - ServalPlatformOutboxConstants.BuildStarted, + ServalTranslationPlatformOutboxConstants.OutboxId, + ServalTranslationPlatformOutboxConstants.BuildStarted, buildId, JsonSerializer.Serialize(new BuildStartedRequest { BuildId = buildId }), cancellationToken: cancellationToken @@ -29,8 +30,8 @@ public async Task BuildCompletedAsync( ) { await _outboxService.EnqueueMessageAsync( - ServalPlatformOutboxConstants.OutboxId, - ServalPlatformOutboxConstants.BuildCompleted, + ServalTranslationPlatformOutboxConstants.OutboxId, + ServalTranslationPlatformOutboxConstants.BuildCompleted, buildId, JsonSerializer.Serialize( new BuildCompletedRequest @@ -47,8 +48,8 @@ await _outboxService.EnqueueMessageAsync( public async Task BuildCanceledAsync(string buildId, CancellationToken cancellationToken = default) { await _outboxService.EnqueueMessageAsync( - ServalPlatformOutboxConstants.OutboxId, - ServalPlatformOutboxConstants.BuildCanceled, + ServalTranslationPlatformOutboxConstants.OutboxId, + ServalTranslationPlatformOutboxConstants.BuildCanceled, buildId, JsonSerializer.Serialize(new BuildCanceledRequest { BuildId = buildId }), cancellationToken: cancellationToken @@ -58,8 +59,8 @@ await _outboxService.EnqueueMessageAsync( public async Task BuildFaultedAsync(string buildId, string message, CancellationToken cancellationToken = default) { await _outboxService.EnqueueMessageAsync( - ServalPlatformOutboxConstants.OutboxId, - ServalPlatformOutboxConstants.BuildFaulted, + ServalTranslationPlatformOutboxConstants.OutboxId, + ServalTranslationPlatformOutboxConstants.BuildFaulted, buildId, JsonSerializer.Serialize(new BuildFaultedRequest { BuildId = buildId, Message = message }), cancellationToken: cancellationToken @@ -69,8 +70,8 @@ await _outboxService.EnqueueMessageAsync( public async Task BuildRestartingAsync(string buildId, CancellationToken cancellationToken = default) { await _outboxService.EnqueueMessageAsync( - ServalPlatformOutboxConstants.OutboxId, - ServalPlatformOutboxConstants.BuildRestarting, + ServalTranslationPlatformOutboxConstants.OutboxId, + ServalTranslationPlatformOutboxConstants.BuildRestarting, buildId, JsonSerializer.Serialize(new BuildRestartingRequest { BuildId = buildId }), cancellationToken: cancellationToken @@ -105,15 +106,15 @@ await _client.UpdateBuildStatusAsync( ); } - public async Task InsertPretranslationsAsync( + public async Task InsertInferencesAsync( string engineId, Stream pretranslationsStream, CancellationToken cancellationToken = default ) { await _outboxService.EnqueueMessageAsync( - ServalPlatformOutboxConstants.OutboxId, - ServalPlatformOutboxConstants.InsertPretranslations, + ServalTranslationPlatformOutboxConstants.OutboxId, + ServalTranslationPlatformOutboxConstants.InsertInferences, engineId, engineId, pretranslationsStream, @@ -128,12 +129,10 @@ public async Task IncrementTrainSizeAsync( ) { await _outboxService.EnqueueMessageAsync( - ServalPlatformOutboxConstants.OutboxId, - ServalPlatformOutboxConstants.IncrementTranslationEngineCorpusSize, + ServalTranslationPlatformOutboxConstants.OutboxId, + ServalTranslationPlatformOutboxConstants.IncrementTrainEngineCorpusSize, engineId, - JsonSerializer.Serialize( - new IncrementTranslationEngineCorpusSizeRequest { EngineId = engineId, Count = count } - ), + JsonSerializer.Serialize(new IncrementTrainEngineCorpusSizeRequest { EngineId = engineId, Count = count }), cancellationToken: cancellationToken ); } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentEngineServiceV1.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentEngineServiceV1.cs new file mode 100644 index 00000000..7330df08 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentEngineServiceV1.cs @@ -0,0 +1,215 @@ +using Google.Protobuf.WellKnownTypes; +using Serval.WordAlignment.V1; + +namespace Serval.Machine.Shared.Services; + +public class ServalWordAlignmentEngineServiceV1(IEnumerable engineServices) + : WordAlignmentEngineApi.WordAlignmentEngineApiBase +{ + private static readonly Empty Empty = new(); + + private readonly Dictionary _engineServices = engineServices.ToDictionary( + es => es.Type + ); + + public override async Task Create(CreateRequest request, ServerCallContext context) + { + IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); + await engineService.CreateAsync( + request.EngineId, + request.HasEngineName ? request.EngineName : null, + request.SourceLanguage, + request.TargetLanguage, + cancellationToken: context.CancellationToken + ); + return Empty; + } + + public override async Task Delete(DeleteRequest request, ServerCallContext context) + { + IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); + await engineService.DeleteAsync(request.EngineId, context.CancellationToken); + return Empty; + } + + public override async Task GetWordAlignment( + GetWordAlignmentRequest request, + ServerCallContext context + ) + { + IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); + SIL.Machine.Translation.WordAlignmentResult result; + try + { + result = await engineService.GetBestPhraseAlignmentAsync( + request.EngineId, + request.SourceSegment, + request.TargetSegment, + context.CancellationToken + ); + } + catch (EngineNotBuiltException e) + { + throw new RpcException(new Status(StatusCode.Aborted, e.Message, e)); + } + + return new GetWordAlignmentResponse { Result = Map(result) }; + } + + public override async Task StartBuild(StartBuildRequest request, ServerCallContext context) + { + IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); + SIL.ServiceToolkit.Models.ParallelCorpus[] corpora = request.Corpora.Select(Map).ToArray(); + try + { + await engineService.StartBuildAsync( + request.EngineId, + request.BuildId, + request.HasOptions ? request.Options : null, + corpora, + context.CancellationToken + ); + } + catch (InvalidOperationException e) + { + throw new RpcException(new Status(StatusCode.Aborted, e.Message, e)); + } + return Empty; + } + + public override async Task CancelBuild(CancelBuildRequest request, ServerCallContext context) + { + IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); + try + { + await engineService.CancelBuildAsync(request.EngineId, context.CancellationToken); + } + catch (InvalidOperationException e) + { + throw new RpcException(new Status(StatusCode.Aborted, e.Message, e)); + } + return Empty; + } + + public override Task GetQueueSize(GetQueueSizeRequest request, ServerCallContext context) + { + IWordAlignmentEngineService engineService = GetEngineService(request.EngineType); + return Task.FromResult(new GetQueueSizeResponse { Size = engineService.GetQueueSize() }); + } + + private IWordAlignmentEngineService GetEngineService(string engineTypeStr) + { + if (_engineServices.TryGetValue(GetEngineType(engineTypeStr), out IWordAlignmentEngineService? service)) + return service; + throw new RpcException( + new Status(StatusCode.InvalidArgument, $"The engine type {engineTypeStr} is not supported.") + ); + } + + private static EngineType GetEngineType(string engineTypeStr) + { + engineTypeStr = engineTypeStr[0].ToString().ToUpperInvariant() + engineTypeStr[1..]; + if (System.Enum.TryParse(engineTypeStr, out EngineType engineType)) + return engineType; + throw new RpcException( + new Status(StatusCode.InvalidArgument, $"The engine type {engineTypeStr} is not supported.") + ); + } + + private static WordAlignment.V1.WordAlignmentResult Map(SIL.Machine.Translation.WordAlignmentResult source) + { + return new WordAlignment.V1.WordAlignmentResult + { + SourceTokens = { source.SourceTokens }, + TargetTokens = { source.TargetTokens }, + Alignment = { Map(source.Alignment) }, + Confidences = { source.Confidences } + }; + } + + private static IEnumerable Map(WordAlignmentMatrix source) + { + for (int i = 0; i < source.RowCount; i++) + { + for (int j = 0; j < source.ColumnCount; j++) + { + if (source[i, j]) + yield return new WordAlignment.V1.AlignedWordPair { SourceIndex = i, TargetIndex = j }; + } + } + } + + private static SIL.ServiceToolkit.Models.ParallelCorpus Map(WordAlignment.V1.ParallelCorpus source) + { + return new SIL.ServiceToolkit.Models.ParallelCorpus + { + Id = source.Id, + SourceCorpora = source.SourceCorpora.Select(Map).ToList(), + TargetCorpora = source.TargetCorpora.Select(Map).ToList() + }; + } + + private static SIL.ServiceToolkit.Models.MonolingualCorpus Map(WordAlignment.V1.MonolingualCorpus source) + { + var trainOnChapters = source.TrainOnChapters.ToDictionary( + kvp => kvp.Key, + kvp => kvp.Value.Chapters.ToHashSet() + ); + var trainOnTextIds = source.TrainOnTextIds.ToHashSet(); + FilterChoice trainingFilter = GetFilterChoice(trainOnChapters, trainOnTextIds, source.TrainOnAll); + + var wordAlignOnChapters = source.WordAlignOnChapters.ToDictionary( + kvp => kvp.Key, + kvp => kvp.Value.Chapters.ToHashSet() + ); + var wordAlignOnTextIds = source.WordAlignOnTextIds.ToHashSet(); + FilterChoice wordAlignOnFilter = GetFilterChoice( + wordAlignOnChapters, + wordAlignOnTextIds, + source.WordAlignOnAll + ); + + return new SIL.ServiceToolkit.Models.MonolingualCorpus + { + Id = source.Id, + Language = source.Language, + Files = source.Files.Select(Map).ToList(), + TrainOnChapters = trainingFilter == FilterChoice.Chapters ? trainOnChapters : null, + TrainOnTextIds = trainingFilter == FilterChoice.TextIds ? trainOnTextIds : null, + InferenceChapters = wordAlignOnFilter == FilterChoice.Chapters ? wordAlignOnChapters : null, + InferenceTextIds = wordAlignOnFilter == FilterChoice.TextIds ? wordAlignOnTextIds : null + }; + } + + private static SIL.ServiceToolkit.Models.CorpusFile Map(WordAlignment.V1.CorpusFile source) + { + return new SIL.ServiceToolkit.Models.CorpusFile + { + Location = source.Location, + Format = (SIL.ServiceToolkit.Models.FileFormat)source.Format, + TextId = source.TextId + }; + } + + private enum FilterChoice + { + Chapters, + TextIds, + None + } + + private static FilterChoice GetFilterChoice( + IReadOnlyDictionary> chapters, + HashSet textIds, + bool noFilter + ) + { + // Only either textIds or Scripture Range will be used at a time + // TextIds may be an empty array, so prefer that if both are empty (which applies to both scripture and text) + if (noFilter || (chapters is null && textIds is null)) + return FilterChoice.None; + if (chapters is null || chapters.Count == 0) + return FilterChoice.TextIds; + return FilterChoice.Chapters; + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformOutboxConstants.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformOutboxConstants.cs new file mode 100644 index 00000000..573a40d9 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformOutboxConstants.cs @@ -0,0 +1,14 @@ +namespace Serval.Machine.Shared.Services; + +public static class ServalWordAlignmentPlatformOutboxConstants +{ + public const string OutboxId = "ServalWordAlignmentPlatform"; + + public const string BuildStarted = "BuildStarted"; + public const string BuildCompleted = "BuildCompleted"; + public const string BuildCanceled = "BuildCanceled"; + public const string BuildFaulted = "BuildFaulted"; + public const string BuildRestarting = "BuildRestarting"; + public const string IncrementTrainEngineCorpusSize = "IncrementTrainEngineCorpusSize"; + public const string InsertInferences = "InsertInferences"; +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformOutboxMessageHandler.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformOutboxMessageHandler.cs new file mode 100644 index 00000000..b063b8f7 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformOutboxMessageHandler.cs @@ -0,0 +1,140 @@ +using Serval.WordAlignment.V1; + +namespace Serval.Machine.Shared.Services; + +public class ServalWordAlignmentPlatformOutboxMessageHandler( + WordAlignmentPlatformApi.WordAlignmentPlatformApiClient client +) : IOutboxMessageHandler +{ + private readonly WordAlignmentPlatformApi.WordAlignmentPlatformApiClient _client = client; + private static readonly JsonSerializerOptions JsonSerializerOptions = + new() { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }; + + public string OutboxId => ServalWordAlignmentPlatformOutboxConstants.OutboxId; + + public async Task HandleMessageAsync( + string method, + string? content, + Stream? contentStream, + CancellationToken cancellationToken = default + ) + { + switch (method) + { + case ServalWordAlignmentPlatformOutboxConstants.BuildStarted: + await _client.BuildStartedAsync( + JsonSerializer.Deserialize(content!), + cancellationToken: cancellationToken + ); + break; + case ServalWordAlignmentPlatformOutboxConstants.BuildCompleted: + await _client.BuildCompletedAsync( + JsonSerializer.Deserialize(content!), + cancellationToken: cancellationToken + ); + break; + case ServalWordAlignmentPlatformOutboxConstants.BuildCanceled: + await _client.BuildCanceledAsync( + JsonSerializer.Deserialize(content!), + cancellationToken: cancellationToken + ); + break; + case ServalWordAlignmentPlatformOutboxConstants.BuildFaulted: + await _client.BuildFaultedAsync( + JsonSerializer.Deserialize(content!), + cancellationToken: cancellationToken + ); + break; + case ServalWordAlignmentPlatformOutboxConstants.BuildRestarting: + await _client.BuildRestartingAsync( + JsonSerializer.Deserialize(content!), + cancellationToken: cancellationToken + ); + break; + case ServalWordAlignmentPlatformOutboxConstants.InsertInferences: + var jsonSerializerOptions = new JsonSerializerOptions(JsonSerializerOptions); + jsonSerializerOptions.Converters.Add(new WordAlignmentJsonConverter()); + IAsyncEnumerable wordAlignments = JsonSerializer + .DeserializeAsyncEnumerable( + contentStream!, + JsonSerializerOptions, + cancellationToken + ) + .OfType(); + + using (var call = _client.InsertInferences(cancellationToken: cancellationToken)) + { + await foreach (Models.WordAlignment wordAlignment in wordAlignments) + { + await call.RequestStream.WriteAsync( + new InsertInferencesRequest + { + EngineId = content!, + CorpusId = wordAlignment.CorpusId, + TextId = wordAlignment.TextId, + Refs = { wordAlignment.Refs }, + SourceTokens = { wordAlignment.SourceTokens }, + TargetTokens = { wordAlignment.TargetTokens }, + Confidences = { wordAlignment.Confidences }, + Alignment = { Map(wordAlignment.Alignment) } + }, + cancellationToken + ); + } + await call.RequestStream.CompleteAsync(); + await call; + } + break; + case ServalWordAlignmentPlatformOutboxConstants.IncrementTrainEngineCorpusSize: + await _client.IncrementTrainEngineCorpusSizeAsync( + JsonSerializer.Deserialize(content!), + cancellationToken: cancellationToken + ); + break; + default: + throw new InvalidOperationException($"Encountered a message with the unrecognized method '{method}'."); + } + } + + private static IEnumerable Map( + IEnumerable alignedWordPairs + ) + { + foreach (SIL.Machine.Corpora.AlignedWordPair alignedWordPair in alignedWordPairs) + { + yield return new WordAlignment.V1.AlignedWordPair + { + SourceIndex = alignedWordPair.SourceIndex, + TargetIndex = alignedWordPair.TargetIndex + }; + } + } +} + +public class WordAlignmentJsonConverter : JsonConverter +{ + public override object Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + switch (reader.TokenType) + { + case JsonTokenType.True: + return true; + case JsonTokenType.False: + return false; + case JsonTokenType.Number when reader.TryGetInt64(out long l): + return l; + case JsonTokenType.Number: + return reader.GetDouble(); + case JsonTokenType.String: + var str = reader.GetString(); + if (SIL.Machine.Corpora.AlignedWordPair.TryParse(str, out var alignedWordPair)) + return alignedWordPair; + return str!; + default: + throw new JsonException(); + } + } + + public override void Write(Utf8JsonWriter writer, object objectToWrite, JsonSerializerOptions options) => + JsonSerializer.Serialize(writer, objectToWrite, objectToWrite.GetType(), options); +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformService.cs new file mode 100644 index 00000000..23f611ee --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/ServalWordAlignmentPlatformService.cs @@ -0,0 +1,139 @@ +using Serval.WordAlignment.V1; + +namespace Serval.Machine.Shared.Services; + +public class ServalWordAlignmentPlatformService( + WordAlignmentPlatformApi.WordAlignmentPlatformApiClient client, + IMessageOutboxService outboxService +) : IPlatformService +{ + EngineGroup IPlatformService.EngineGroup => EngineGroup.WordAlignment; + private readonly WordAlignmentPlatformApi.WordAlignmentPlatformApiClient _client = client; + private readonly IMessageOutboxService _outboxService = outboxService; + + public async Task BuildStartedAsync(string buildId, CancellationToken cancellationToken = default) + { + await _outboxService.EnqueueMessageAsync( + ServalWordAlignmentPlatformOutboxConstants.OutboxId, + ServalWordAlignmentPlatformOutboxConstants.BuildStarted, + buildId, + JsonSerializer.Serialize(new BuildStartedRequest { BuildId = buildId }), + cancellationToken: cancellationToken + ); + } + + public async Task BuildCompletedAsync( + string buildId, + int trainSize, + double confidence, + CancellationToken cancellationToken = default + ) + { + await _outboxService.EnqueueMessageAsync( + ServalWordAlignmentPlatformOutboxConstants.OutboxId, + ServalWordAlignmentPlatformOutboxConstants.BuildCompleted, + buildId, + JsonSerializer.Serialize( + new BuildCompletedRequest + { + BuildId = buildId, + CorpusSize = trainSize, + Confidence = confidence + } + ), + cancellationToken: cancellationToken + ); + } + + public async Task BuildCanceledAsync(string buildId, CancellationToken cancellationToken = default) + { + await _outboxService.EnqueueMessageAsync( + ServalWordAlignmentPlatformOutboxConstants.OutboxId, + ServalWordAlignmentPlatformOutboxConstants.BuildCanceled, + buildId, + JsonSerializer.Serialize(new BuildCanceledRequest { BuildId = buildId }), + cancellationToken: cancellationToken + ); + } + + public async Task BuildFaultedAsync(string buildId, string message, CancellationToken cancellationToken = default) + { + await _outboxService.EnqueueMessageAsync( + ServalWordAlignmentPlatformOutboxConstants.OutboxId, + ServalWordAlignmentPlatformOutboxConstants.BuildFaulted, + buildId, + JsonSerializer.Serialize(new BuildFaultedRequest { BuildId = buildId, Message = message }), + cancellationToken: cancellationToken + ); + } + + public async Task BuildRestartingAsync(string buildId, CancellationToken cancellationToken = default) + { + await _outboxService.EnqueueMessageAsync( + ServalWordAlignmentPlatformOutboxConstants.OutboxId, + ServalWordAlignmentPlatformOutboxConstants.BuildRestarting, + buildId, + JsonSerializer.Serialize(new BuildRestartingRequest { BuildId = buildId }), + cancellationToken: cancellationToken + ); + } + + public async Task UpdateBuildStatusAsync( + string buildId, + ProgressStatus progressStatus, + int? queueDepth = null, + CancellationToken cancellationToken = default + ) + { + var request = new UpdateBuildStatusRequest { BuildId = buildId, Step = progressStatus.Step }; + if (progressStatus.PercentCompleted.HasValue) + request.PercentCompleted = progressStatus.PercentCompleted.Value; + if (progressStatus.Message is not null) + request.Message = progressStatus.Message; + if (queueDepth is not null) + request.QueueDepth = queueDepth.Value; + + // just try to send it - if it fails, it fails. + await _client.UpdateBuildStatusAsync(request, cancellationToken: cancellationToken); + } + + public async Task UpdateBuildStatusAsync(string buildId, int step, CancellationToken cancellationToken = default) + { + // just try to send it - if it fails, it fails. + await _client.UpdateBuildStatusAsync( + new UpdateBuildStatusRequest { BuildId = buildId, Step = step }, + cancellationToken: cancellationToken + ); + } + + public async Task InsertInferencesAsync( + string engineId, + Stream wordAlignmentsStream, + CancellationToken cancellationToken = default + ) + { + await _outboxService.EnqueueMessageAsync( + ServalWordAlignmentPlatformOutboxConstants.OutboxId, + ServalWordAlignmentPlatformOutboxConstants.InsertInferences, + engineId, + engineId, + wordAlignmentsStream, + cancellationToken: cancellationToken + ); + } + + public async Task IncrementTrainSizeAsync( + string engineId, + int count = 1, + CancellationToken cancellationToken = default + ) + { + await _outboxService.EnqueueMessageAsync( + ServalWordAlignmentPlatformOutboxConstants.OutboxId, + ServalWordAlignmentPlatformOutboxConstants.IncrementTrainEngineCorpusSize, + engineId, + JsonSerializer.Serialize(new IncrementTrainEngineCorpusSizeRequest { EngineId = engineId, Count = count }), + cancellationToken: cancellationToken + ); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferClearMLBuildJobFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferClearMLBuildJobFactory.cs index 6e0b6b9c..fe97eaeb 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferClearMLBuildJobFactory.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferClearMLBuildJobFactory.cs @@ -8,7 +8,7 @@ IRepository engines private readonly ISharedFileService _sharedFileService = sharedFileService; private readonly IRepository _engines = engines; - public TranslationEngineType EngineType => TranslationEngineType.SmtTransfer; + public EngineType EngineType => EngineType.SmtTransfer; public async Task CreateJobScriptAsync( string engineId, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferEngineService.cs b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferEngineService.cs index 7c4f10b4..5b401c4f 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferEngineService.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferEngineService.cs @@ -2,25 +2,27 @@ public class SmtTransferEngineService( IDistributedReaderWriterLockFactory lockFactory, - IPlatformService platformService, + IEnumerable platformServices, IDataAccessContext dataAccessContext, IRepository engines, IRepository trainSegmentPairs, SmtTransferEngineStateService stateService, - IBuildJobService buildJobService, + IBuildJobService buildJobService, IClearMLQueueService clearMLQueueService ) : ITranslationEngineService { private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory; - private readonly IPlatformService _platformService = platformService; + private readonly IPlatformService _platformService = platformServices.First(ps => + ps.EngineGroup == EngineGroup.Translation + ); private readonly IDataAccessContext _dataAccessContext = dataAccessContext; private readonly IRepository _engines = engines; private readonly IRepository _trainSegmentPairs = trainSegmentPairs; private readonly SmtTransferEngineStateService _stateService = stateService; - private readonly IBuildJobService _buildJobService = buildJobService; + private readonly IBuildJobService _buildJobService = buildJobService; private readonly IClearMLQueueService _clearMLQueueService = clearMLQueueService; - public TranslationEngineType Type => TranslationEngineType.SmtTransfer; + public EngineType Type => EngineType.SmtTransfer; public async Task CreateAsync( string engineId, @@ -47,7 +49,7 @@ public async Task CreateAsync( EngineId = engineId, SourceLanguage = sourceLanguage, TargetLanguage = targetLanguage, - Type = TranslationEngineType.SmtTransfer, + Type = EngineType.SmtTransfer, IsModelPersisted = isModelPersisted ?? true // models are persisted if not specified }; await _engines.InsertAsync(translationEngine, ct); @@ -186,7 +188,7 @@ public async Task StartBuildAsync( { bool building = !await _buildJobService.StartBuildJobAsync( BuildJobRunnerType.Hangfire, - TranslationEngineType.SmtTransfer, + EngineType.SmtTransfer, engineId, buildId, BuildStage.Preprocess, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferHangfireBuildJobFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferHangfireBuildJobFactory.cs index 71f2d09a..65e13e24 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferHangfireBuildJobFactory.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferHangfireBuildJobFactory.cs @@ -4,14 +4,14 @@ namespace Serval.Machine.Shared.Services; public class SmtTransferHangfireBuildJobFactory : IHangfireBuildJobFactory { - public TranslationEngineType EngineType => TranslationEngineType.SmtTransfer; + public EngineType EngineType => EngineType.SmtTransfer; public Job CreateJob(string engineId, string buildId, BuildStage stage, object? data, string? buildOptions) { return stage switch { BuildStage.Preprocess - => CreateJob>( + => CreateJob>( engineId, buildId, "smt_transfer", @@ -19,14 +19,20 @@ public Job CreateJob(string engineId, string buildId, BuildStage stage, object? buildOptions ), BuildStage.Postprocess - => CreateJob( + => CreateJob( engineId, buildId, "smt_transfer", data, buildOptions ), - BuildStage.Train => CreateJob(engineId, buildId, "smt_transfer", buildOptions), + BuildStage.Train + => CreateJob( + engineId, + buildId, + "smt_transfer", + buildOptions + ), _ => throw new ArgumentException("Unknown build stage.", nameof(stage)), }; } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPostprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPostprocessBuildJob.cs index 8b30dc6d..5ad0c4bd 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPostprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPostprocessBuildJob.cs @@ -1,10 +1,10 @@ namespace Serval.Machine.Shared.Services; public class SmtTransferPostprocessBuildJob( - IPlatformService platformService, + IEnumerable platformServices, IRepository engines, IDataAccessContext dataAccessContext, - IBuildJobService buildJobService, + IBuildJobService buildJobService, ILogger logger, ISharedFileService sharedFileService, IDistributedReaderWriterLockFactory lockFactory, @@ -14,8 +14,8 @@ public class SmtTransferPostprocessBuildJob( IOptionsMonitor buildOptions, IOptionsMonitor engineOptions ) - : PostprocessBuildJob( - platformService, + : PostprocessBuildJob( + platformServices.First(ps => ps.EngineGroup == EngineGroup.Translation), engines, dataAccessContext, buildJobService, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPreprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPreprocessBuildJob.cs index 7e1627a6..7b377e1e 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPreprocessBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferPreprocessBuildJob.cs @@ -1,18 +1,18 @@ namespace Serval.Machine.Shared.Services; public class SmtTransferPreprocessBuildJob( - IPlatformService platformService, + IEnumerable platformServices, IRepository engines, IDataAccessContext dataAccessContext, - ILogger logger, - IBuildJobService buildJobService, + ILogger logger, + IBuildJobService buildJobService, ISharedFileService sharedFileService, IDistributedReaderWriterLockFactory lockFactory, IRepository trainSegmentPairs, IParallelCorpusPreprocessingService parallelCorpusPreprocessingService ) - : PreprocessBuildJob( - platformService, + : PreprocessBuildJob( + platformServices.First(ps => ps.EngineGroup == EngineGroup.Translation), engines, dataAccessContext, logger, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferTrainBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferTrainBuildJob.cs index e81fc354..f1292c92 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferTrainBuildJob.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/SmtTransferTrainBuildJob.cs @@ -1,16 +1,23 @@ namespace Serval.Machine.Shared.Services; public class SmtTransferTrainBuildJob( - IPlatformService platformService, + IEnumerable platformServices, IRepository engines, IDataAccessContext dataAccessContext, - IBuildJobService buildJobService, + IBuildJobService buildJobService, ILogger logger, ISharedFileService sharedFileService, ITruecaserFactory truecaserFactory, ISmtModelFactory smtModelFactory, ITransferEngineFactory transferEngineFactory -) : HangfireBuildJob(platformService, engines, dataAccessContext, buildJobService, logger) +) + : HangfireBuildJob( + platformServices.First(ps => ps.EngineGroup == EngineGroup.Translation), + engines, + dataAccessContext, + buildJobService, + logger + ) { private static readonly JsonWriterOptions PretranslateWriterOptions = new() { Indented = true }; private static readonly JsonSerializerOptions JsonSerializerOptions = @@ -55,7 +62,7 @@ CancellationToken cancellationToken bool canceling = !await BuildJobService.StartBuildJobAsync( BuildJobRunnerType.Hangfire, - TranslationEngineType.SmtTransfer, + EngineType.SmtTransfer, engineId, buildId, BuildStage.Postprocess, diff --git a/src/Machine/src/Serval.Machine.Shared/Services/StatisticalEngineService.cs b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalEngineService.cs new file mode 100644 index 00000000..e881485f --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalEngineService.cs @@ -0,0 +1,192 @@ +namespace Serval.Machine.Shared.Services; + +public class StatisticalEngineService( + IDistributedReaderWriterLockFactory lockFactory, + IEnumerable platformServices, + IDataAccessContext dataAccessContext, + IRepository engines, + WordAlignmentEngineStateService stateService, + IBuildJobService buildJobService, + IClearMLQueueService clearMLQueueService +) : IWordAlignmentEngineService +{ + private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory; + private readonly IPlatformService _platformService = platformServices.First(ps => + ps.EngineGroup == EngineGroup.WordAlignment + ); + private readonly IDataAccessContext _dataAccessContext = dataAccessContext; + private readonly IRepository _engines = engines; + private readonly WordAlignmentEngineStateService _stateService = stateService; + private readonly IBuildJobService _buildJobService = buildJobService; + private readonly IClearMLQueueService _clearMLQueueService = clearMLQueueService; + + public EngineType Type => EngineType.Statistical; + + public async Task CreateAsync( + string engineId, + string? engineName, + string sourceLanguage, + string targetLanguage, + CancellationToken cancellationToken = default + ) + { + WordAlignmentEngine wordAlignmentEngine = await _dataAccessContext.WithTransactionAsync( + async ct => + { + var waEngine = new WordAlignmentEngine + { + EngineId = engineId, + SourceLanguage = sourceLanguage, + TargetLanguage = targetLanguage, + Type = EngineType.Statistical, + }; + await _engines.InsertAsync(waEngine, ct); + await _buildJobService.CreateEngineAsync(engineId, engineName, ct); + return waEngine; + }, + cancellationToken: cancellationToken + ); + + WordAlignmentEngineState state = _stateService.Get(engineId); + state.InitNew(); + return wordAlignmentEngine; + } + + public async Task GetBestPhraseAlignmentAsync( + string engineId, + string sourceSegment, + string targetSegment, + CancellationToken cancellationToken = default + ) + { + WordAlignmentEngine engine = await GetBuiltEngineAsync(engineId, cancellationToken); + WordAlignmentEngineState state = _stateService.Get(engineId); + + IDistributedReaderWriterLock @lock = await _lockFactory.CreateAsync(engineId, cancellationToken); + WordAlignmentResult result = await @lock.ReaderLockAsync( + async ct => + { + IWordAlignmentEngine wordAlignmentEngine = await state.GetEngineAsync(engine.BuildRevision, ct); + LatinWordTokenizer tokenizer = new(); + + // there is no way to cancel this call + IReadOnlyList sourceTokens = tokenizer.Tokenize(sourceSegment).ToList(); + IReadOnlyList targetTokens = tokenizer.Tokenize(targetSegment).ToList(); + IReadOnlyCollection wordPairs = wordAlignmentEngine.GetBestAlignedWordPairs( + sourceTokens, + targetTokens + ); + wordAlignmentEngine.ComputeAlignedWordPairScores(sourceTokens, targetTokens, wordPairs); + return new WordAlignmentResult( + sourceTokens: sourceTokens, + targetTokens: targetTokens, + alignment: new WordAlignmentMatrix( + sourceTokens.Count, + targetTokens.Count, + wordPairs.Select(wp => (wp.SourceIndex, wp.TargetIndex)) + ), + confidences: wordPairs.Select(wp => wp.AlignmentScore * wp.TranslationScore).ToList() + ); + }, + cancellationToken: cancellationToken + ); + + state.Touch(); + return result; + + throw new NotImplementedException(); + } + + public async Task DeleteAsync(string engineId, CancellationToken cancellationToken = default) + { + await CancelBuildJobAsync(engineId, cancellationToken); + + await _dataAccessContext.WithTransactionAsync( + async ct => + { + await _engines.DeleteAsync(e => e.EngineId == engineId, ct); + }, + cancellationToken: cancellationToken + ); + await _buildJobService.DeleteEngineAsync(engineId, CancellationToken.None); + + WordAlignmentEngineState state = _stateService.Get(engineId); + _stateService.Remove(engineId); + // there is no way to cancel this call + state.DeleteData(); + state.Dispose(); + await _lockFactory.DeleteAsync(engineId, CancellationToken.None); + } + + public async Task StartBuildAsync( + string engineId, + string buildId, + string? buildOptions, + IReadOnlyList corpora, + CancellationToken cancellationToken = default + ) + { + bool building = !await _buildJobService.StartBuildJobAsync( + BuildJobRunnerType.Hangfire, + EngineType.Statistical, + engineId, + buildId, + BuildStage.Preprocess, + corpora, + buildOptions, + cancellationToken + ); + // If there is a pending/running build, then no need to start a new one. + if (building) + throw new InvalidOperationException("The engine is already building or in the process of canceling."); + + WordAlignmentEngineState state = _stateService.Get(engineId); + state.Touch(); + } + + public async Task CancelBuildAsync(string engineId, CancellationToken cancellationToken = default) + { + bool building = await CancelBuildJobAsync(engineId, cancellationToken); + if (!building) + throw new InvalidOperationException("The engine is not currently building."); + + WordAlignmentEngineState state = _stateService.Get(engineId); + state.Touch(); + } + + public int GetQueueSize() + { + return _clearMLQueueService.GetQueueSize(Type); + } + + private async Task CancelBuildJobAsync(string engineId, CancellationToken cancellationToken) + { + string? buildId = null; + await _dataAccessContext.WithTransactionAsync( + async ct => + { + (buildId, BuildJobState jobState) = await _buildJobService.CancelBuildJobAsync(engineId, ct); + if (buildId is not null && jobState is BuildJobState.None) + await _platformService.BuildCanceledAsync(buildId, CancellationToken.None); + }, + cancellationToken: cancellationToken + ); + return buildId is not null; + } + + private async Task GetEngineAsync(string engineId, CancellationToken cancellationToken) + { + WordAlignmentEngine? engine = await _engines.GetAsync(e => e.EngineId == engineId, cancellationToken); + if (engine is null) + throw new InvalidOperationException($"The engine {engineId} does not exist."); + return engine; + } + + private async Task GetBuiltEngineAsync(string engineId, CancellationToken cancellationToken) + { + WordAlignmentEngine engine = await GetEngineAsync(engineId, cancellationToken); + if (engine.BuildRevision == 0) + throw new EngineNotBuiltException("The engine must be built first."); + return engine; + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/StatisticalHangfireBuildJobFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalHangfireBuildJobFactory.cs new file mode 100644 index 00000000..7a22fef4 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalHangfireBuildJobFactory.cs @@ -0,0 +1,39 @@ +using static Serval.Machine.Shared.Services.HangfireBuildJobRunner; + +namespace Serval.Machine.Shared.Services; + +public class StatisticalHangfireBuildJobFactory : IHangfireBuildJobFactory +{ + public EngineType EngineType => EngineType.Statistical; + + public Job CreateJob(string engineId, string buildId, BuildStage stage, object? data, string? buildOptions) + { + return stage switch + { + BuildStage.Preprocess + => CreateJob>( + engineId, + buildId, + "statistical", + data, + buildOptions + ), + BuildStage.Postprocess + => CreateJob( + engineId, + buildId, + "statistical", + data, + buildOptions + ), + BuildStage.Train + => CreateJob( + engineId, + buildId, + "statistical", + buildOptions + ), + _ => throw new ArgumentException("Unknown build stage.", nameof(stage)), + }; + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/StatisticalPostprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalPostprocessBuildJob.cs new file mode 100644 index 00000000..cfe8354b --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalPostprocessBuildJob.cs @@ -0,0 +1,88 @@ +namespace Serval.Machine.Shared.Services; + +public class StatisticalPostprocessBuildJob( + IEnumerable platformServices, + IRepository engines, + IDataAccessContext dataAccessContext, + IBuildJobService buildJobService, + ILogger logger, + ISharedFileService sharedFileService, + IDistributedReaderWriterLockFactory lockFactory, + ISmtModelFactory smtModelFactory, + IOptionsMonitor buildOptions, + IOptionsMonitor engineOptions +) + : PostprocessBuildJob( + platformServices.First(ps => ps.EngineGroup == EngineGroup.WordAlignment), + engines, + dataAccessContext, + buildJobService, + logger, + sharedFileService, + buildOptions + ) +{ + private readonly ISmtModelFactory _smtModelFactory = smtModelFactory; + private readonly IOptionsMonitor _engineOptions = engineOptions; + private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory; + + protected override async Task DoWorkAsync( + string engineId, + string buildId, + (int, double) data, + string? buildOptions, + CancellationToken cancellationToken + ) + { + (int corpusSize, double confidence) = data; + + await using ( + Stream wordAlignmentStream = await SharedFileService.OpenReadAsync( + $"builds/{buildId}/word_alignment_outputs.json", + cancellationToken + ) + ) + { + await PlatformService.InsertInferencesAsync(engineId, wordAlignmentStream, cancellationToken); + } + + int additionalCorpusSize = await SaveModelAsync(engineId, buildId); + await DataAccessContext.WithTransactionAsync( + async (ct) => + { + await PlatformService.BuildCompletedAsync( + buildId, + corpusSize + additionalCorpusSize, + Math.Round(confidence, 2, MidpointRounding.AwayFromZero), + ct + ); + await BuildJobService.BuildJobFinishedAsync(engineId, buildId, buildComplete: true, ct); + }, + cancellationToken: CancellationToken.None + ); + + Logger.LogInformation("Build completed ({0}).", buildId); + } + + protected override async Task SaveModelAsync(string engineId, string buildId) + { + IDistributedReaderWriterLock @lock = await _lockFactory.CreateAsync(engineId); + return await @lock.WriterLockAsync( + async ct => + { + await using ( + Stream engineStream = await SharedFileService.OpenReadAsync($"builds/{buildId}/model.tar.gz", ct) + ) + { + await _smtModelFactory.UpdateEngineFromAsync( + Path.Combine(_engineOptions.CurrentValue.EnginesDir, engineId), + engineStream, + ct + ); + } + return 0; + }, + _engineOptions.CurrentValue.SaveModelTimeout + ); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/StatisticalTrainBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalTrainBuildJob.cs new file mode 100644 index 00000000..d68376a0 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/StatisticalTrainBuildJob.cs @@ -0,0 +1,28 @@ +namespace Serval.Machine.Shared.Services; + +public class StatisticalTrainBuildJob( + IEnumerable platformServices, + IRepository engines, + IDataAccessContext dataAccessContext, + IBuildJobService buildJobService, + ILogger logger +) + : HangfireBuildJob( + platformServices.First(ps => ps.EngineGroup == EngineGroup.WordAlignment), + engines, + dataAccessContext, + buildJobService, + logger + ) +{ + protected override Task DoWorkAsync( + string engineId, + string buildId, + object? data, + string? buildOptions, + CancellationToken cancellationToken + ) + { + throw new NotImplementedException(); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/StatsiticalClearMLBuildJobFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/StatsiticalClearMLBuildJobFactory.cs new file mode 100644 index 00000000..1e104b2c --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/StatsiticalClearMLBuildJobFactory.cs @@ -0,0 +1,49 @@ +namespace Serval.Machine.Shared.Services; + +public class StatisticalClearMLBuildJobFactory( + ISharedFileService sharedFileService, + IRepository engines +) : IClearMLBuildJobFactory +{ + private readonly ISharedFileService _sharedFileService = sharedFileService; + private readonly IRepository _engines = engines; + + public EngineType EngineType => EngineType.Statistical; + + public async Task CreateJobScriptAsync( + string engineId, + string buildId, + string modelType, + BuildStage stage, + object? data = null, + string? buildOptions = null, + CancellationToken cancellationToken = default + ) + { + if (stage == BuildStage.Train) + { + WordAlignmentEngine? engine = await _engines.GetAsync(e => e.EngineId == engineId, cancellationToken); + if (engine is null) + throw new InvalidOperationException("The engine does not exist."); + + Uri sharedFileUri = _sharedFileService.GetBaseUri(); + string baseUri = sharedFileUri.GetComponents(UriComponents.SchemeAndServer, UriFormat.Unescaped); + string folder = sharedFileUri.GetComponents(UriComponents.Path, UriFormat.Unescaped); + return "from machine.jobs.build_word_alignment_model import run\n" + + "args = {\n" + + $" 'model_type': '{modelType}',\n" + + $" 'engine_id': '{engineId}',\n" + + $" 'build_id': '{buildId}',\n" + + $" 'shared_file_uri': '{baseUri}',\n" + + $" 'shared_file_folder': '{folder}',\n" + + (buildOptions is not null ? $" 'build_options': '''{buildOptions}''',\n" : "") + + $" 'clearml': True\n" + + "}\n" + + "run(args)\n"; + } + else + { + throw new ArgumentException("Unknown build stage.", nameof(stage)); + } + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ThotSmtModelFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/ThotSmtModelFactory.cs index 03f4ab5d..b856a86a 100644 --- a/src/Machine/src/Serval.Machine.Shared/Services/ThotSmtModelFactory.cs +++ b/src/Machine/src/Serval.Machine.Shared/Services/ThotSmtModelFactory.cs @@ -1,6 +1,6 @@ namespace Serval.Machine.Shared.Services; -public class ThotSmtModelFactory(IOptionsMonitor options) : ISmtModelFactory +public class ThotSmtModelFactory(IOptionsMonitor options) : ModelFactoryBase, ISmtModelFactory { private readonly IOptionsMonitor _options = options; @@ -24,7 +24,7 @@ ITruecaser truecaser return model; } - public ITrainer CreateTrainer( + public override ITrainer CreateTrainer( string engineDir, IRangeTokenizer tokenizer, IParallelTextCorpus corpus @@ -41,66 +41,10 @@ IParallelTextCorpus corpus return trainer; } - public void InitNew(string engineDir) + public override void InitNew(string engineDir) { if (!Directory.Exists(engineDir)) Directory.CreateDirectory(engineDir); ZipFile.ExtractToDirectory(_options.CurrentValue.NewModelFile, engineDir); } - - public void Cleanup(string engineDir) - { - if (!Directory.Exists(engineDir)) - return; - DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "lm")); - DirectoryHelper.DeleteDirectoryRobust(Path.Combine(engineDir, "tm")); - string smtConfigFileName = Path.Combine(engineDir, "smt.cfg"); - if (File.Exists(smtConfigFileName)) - File.Delete(smtConfigFileName); - if (!Directory.EnumerateFileSystemEntries(engineDir).Any()) - Directory.Delete(engineDir); - } - - public async Task UpdateEngineFromAsync( - string engineDir, - Stream source, - CancellationToken cancellationToken = default - ) - { - if (!Directory.Exists(engineDir)) - Directory.CreateDirectory(engineDir); - - await using MemoryStream memoryStream = new(); - await using (GZipStream gzipStream = new(source, CompressionMode.Decompress)) - { - await gzipStream.CopyToAsync(memoryStream, cancellationToken); - } - memoryStream.Seek(0, SeekOrigin.Begin); - await TarFile.ExtractToDirectoryAsync( - memoryStream, - engineDir, - overwriteFiles: true, - cancellationToken: cancellationToken - ); - } - - public async Task SaveEngineToAsync( - string engineDir, - Stream destination, - CancellationToken cancellationToken = default - ) - { - // create zip archive in memory stream - // This cannot be created directly to the shared stream because it all needs to be written at once - await using MemoryStream memoryStream = new(); - await TarFile.CreateFromDirectoryAsync( - engineDir, - memoryStream, - includeBaseDirectory: false, - cancellationToken: cancellationToken - ); - memoryStream.Seek(0, SeekOrigin.Begin); - await using GZipStream gzipStream = new(destination, CompressionMode.Compress); - await memoryStream.CopyToAsync(gzipStream, cancellationToken); - } } diff --git a/src/Machine/src/Serval.Machine.Shared/Services/TranslationBuildJobService.cs b/src/Machine/src/Serval.Machine.Shared/Services/TranslationBuildJobService.cs new file mode 100644 index 00000000..8dd2c493 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/TranslationBuildJobService.cs @@ -0,0 +1,65 @@ +namespace Serval.Machine.Shared.Services; + +public class TranslationBuildJobService(IEnumerable runners, IRepository engines) + : BuildJobService(runners, engines) +{ + public override async Task<(string? BuildId, BuildJobState State)> CancelBuildJobAsync( + string engineId, + CancellationToken cancellationToken = default + ) + { + // cancel a job that hasn't started yet + TranslationEngine? engine = await Engines.UpdateAsync( + e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.JobState == BuildJobState.Pending, + u => + { + u.Unset(b => b.CurrentBuild); + u.Set(e => e.CollectTrainSegmentPairs, false); + }, + returnOriginal: true, + cancellationToken: cancellationToken + ); + if (engine is not null && engine.CurrentBuild is not null) + { + // job will be deleted from the queue + IBuildJobRunner runner = Runners[engine.CurrentBuild.BuildJobRunner]; + await runner.StopJobAsync(engine.CurrentBuild.JobId, CancellationToken.None); + return (engine.CurrentBuild.BuildId, BuildJobState.None); + } + + // cancel a job that is already running + engine = await Engines.UpdateAsync( + e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.JobState == BuildJobState.Active, + u => u.Set(e => e.CurrentBuild!.JobState, BuildJobState.Canceling), + cancellationToken: cancellationToken + ); + if (engine is not null && engine.CurrentBuild is not null) + { + IBuildJobRunner runner = Runners[engine.CurrentBuild.BuildJobRunner]; + await runner.StopJobAsync(engine.CurrentBuild.JobId, CancellationToken.None); + return (engine.CurrentBuild.BuildId, BuildJobState.Canceling); + } + + return (null, BuildJobState.None); + } + + public override Task BuildJobFinishedAsync( + string engineId, + string buildId, + bool buildComplete, + CancellationToken cancellationToken = default + ) + { + return Engines.UpdateAsync( + e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.BuildId == buildId, + u => + { + u.Unset(e => e.CurrentBuild); + u.Set(e => e.CollectTrainSegmentPairs, false); + if (buildComplete) + u.Inc(e => e.BuildRevision); + }, + cancellationToken: cancellationToken + ); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/TranslationEngineType.cs b/src/Machine/src/Serval.Machine.Shared/Services/TranslationEngineType.cs deleted file mode 100644 index 61df1966..00000000 --- a/src/Machine/src/Serval.Machine.Shared/Services/TranslationEngineType.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace Serval.Machine.Shared.Services; - -public enum TranslationEngineType -{ - SmtTransfer, - Nmt -} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineState.cs b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineState.cs new file mode 100644 index 00000000..d075af95 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineState.cs @@ -0,0 +1,94 @@ +using SIL.ObjectModel; + +namespace Serval.Machine.Shared.Services; + +public class WordAlignmentEngineState( + IWordAlignmentModelFactory wordAlignmentModelFactory, + IOptionsMonitor options, + string engineId +) : DisposableBase +{ + private readonly IWordAlignmentModelFactory _wordAlignmentModelFactory = wordAlignmentModelFactory; + private readonly IOptionsMonitor _options = options; + private readonly AsyncLock _lock = new(); + + private IWordAlignmentEngine? _wordAlignmentEngine; + + public string EngineId { get; } = engineId; + + public bool IsUpdated { get; set; } + public int CurrentBuildRevision { get; set; } = -1; + public DateTime LastUsedTime { get; private set; } = DateTime.UtcNow; + public bool IsLoaded => _wordAlignmentEngine != null; + + private string EngineDir => Path.Combine(_options.CurrentValue.EnginesDir, EngineId); + + public void InitNew() + { + _wordAlignmentModelFactory.InitNew(EngineDir); + } + + public async Task GetEngineAsync( + int buildRevision, + CancellationToken cancellationToken = default + ) + { + using (await _lock.LockAsync(cancellationToken)) + { + if (_wordAlignmentEngine is not null && CurrentBuildRevision != -1 && buildRevision != CurrentBuildRevision) + { + IsUpdated = false; + Unload(); + } + + _wordAlignmentEngine ??= _wordAlignmentModelFactory.Create(EngineDir); + CurrentBuildRevision = buildRevision; + return _wordAlignmentEngine; + } + } + + public void DeleteData() + { + Unload(); + _wordAlignmentModelFactory.Cleanup(EngineDir); + } + + public void Commit(int buildRevision, TimeSpan inactiveTimeout) + { + if (_wordAlignmentEngine is null) + return; + + if (CurrentBuildRevision == -1) + CurrentBuildRevision = buildRevision; + if (buildRevision != CurrentBuildRevision) + { + Unload(); + CurrentBuildRevision = buildRevision; + } + else if (DateTime.UtcNow - LastUsedTime > inactiveTimeout) + { + Unload(); + } + } + + public void Touch() + { + LastUsedTime = DateTime.UtcNow; + } + + private void Unload() + { + if (_wordAlignmentEngine is null) + return; + + _wordAlignmentEngine.Dispose(); + + _wordAlignmentEngine = null; + CurrentBuildRevision = -1; + } + + protected override void DisposeManagedResources() + { + Unload(); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineStateService.cs b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineStateService.cs new file mode 100644 index 00000000..03f03038 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentEngineStateService.cs @@ -0,0 +1,70 @@ +using SIL.ObjectModel; + +namespace Serval.Machine.Shared.Services; + +public class WordAlignmentEngineStateService( + IWordAlignmentModelFactory wordAlignmentModelFactory, + IOptionsMonitor options, + ILogger logger +) : DisposableBase +{ + private readonly IWordAlignmentModelFactory _wordAlignmentModelFactory = wordAlignmentModelFactory; + private readonly IOptionsMonitor _options = options; + private readonly ILogger _logger = logger; + + private readonly ConcurrentDictionary _engineStates = + new ConcurrentDictionary(); + + public WordAlignmentEngineState Get(string engineId) + { + return _engineStates.GetOrAdd(engineId, CreateState); + } + + public void Remove(string engineId) + { + _engineStates.TryRemove(engineId, out _); + } + + public async Task CommitAsync( + IDistributedReaderWriterLockFactory lockFactory, + IRepository engines, + TimeSpan inactiveTimeout, + CancellationToken cancellationToken = default + ) + { + foreach (WordAlignmentEngineState state in _engineStates.Values) + { + try + { + IDistributedReaderWriterLock @lock = await lockFactory.CreateAsync(state.EngineId, cancellationToken); + await @lock.WriterLockAsync( + async ct => + { + TranslationEngine? engine = await engines.GetAsync(state.EngineId, ct); + if (engine is not null && !(engine.CollectTrainSegmentPairs ?? false)) + // there is no way to cancel this call + state.Commit(engine.BuildRevision, inactiveTimeout); + }, + _options.CurrentValue.EngineCommitTimeout, + cancellationToken: cancellationToken + ); + } + catch (Exception e) + { + _logger.LogError(e, "Error occurred while committing SMT transfer engine {EngineId}.", state.EngineId); + } + } + } + + private WordAlignmentEngineState CreateState(string engineId) + { + return new WordAlignmentEngineState(_wordAlignmentModelFactory, _options, engineId); + } + + protected override void DisposeManagedResources() + { + foreach (WordAlignmentEngineState state in _engineStates.Values) + state.Dispose(); + _engineStates.Clear(); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentModelFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentModelFactory.cs new file mode 100644 index 00000000..ac97d382 --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentModelFactory.cs @@ -0,0 +1,46 @@ +namespace Serval.Machine.Shared.Services; + +public class WordAlignmentModelFactory(IOptionsMonitor options) + : ModelFactoryBase, + IWordAlignmentModelFactory +{ + private readonly IOptionsMonitor _options = options; + + public IWordAlignmentModel Create(string engineDir) + { + var modelPath = Path.Combine(engineDir, "tm", "src_trg"); + var directModel = ThotWordAlignmentModel.Create(ThotWordAlignmentModelType.Hmm); + directModel.Load(modelPath + "_invswm"); + + var inverseModel = ThotWordAlignmentModel.Create(ThotWordAlignmentModelType.Hmm); + inverseModel.Load(modelPath + "_swm"); + + return new SymmetrizedWordAlignmentModel(directModel, inverseModel); + } + + public ITrainer CreateTrainer( + string engineDir, + ITokenizer tokenizer, + IParallelTextCorpus corpus + ) + { + var modelPath = Path.Combine(engineDir, "tm", "src_trg"); + var directModel = ThotWordAlignmentModel.Create(ThotWordAlignmentModelType.Hmm); + directModel.Load(modelPath + "_invswm"); + + var inverseModel = ThotWordAlignmentModel.Create(ThotWordAlignmentModelType.Hmm); + inverseModel.Load(modelPath + "_swm"); + + ITrainer directTrainer = directModel.CreateTrainer(corpus, tokenizer); + ITrainer inverseTrainer = inverseModel.CreateTrainer(corpus.Invert(), tokenizer); + + return new SymmetrizedWordAlignmentModelTrainer(directTrainer, inverseTrainer); + } + + public override void InitNew(string engineDir) + { + if (!Directory.Exists(engineDir)) + Directory.CreateDirectory(engineDir); + ZipFile.ExtractToDirectory(_options.CurrentValue.NewModelFile, engineDir); + } +} diff --git a/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentPreprocessBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentPreprocessBuildJob.cs new file mode 100644 index 00000000..be3147cb --- /dev/null +++ b/src/Machine/src/Serval.Machine.Shared/Services/WordAlignmentPreprocessBuildJob.cs @@ -0,0 +1,85 @@ +namespace Serval.Machine.Shared.Services; + +public class WordAlignmentPreprocessBuildJob( + IEnumerable platformServices, + IRepository engines, + IDataAccessContext dataAccessContext, + ILogger logger, + IBuildJobService buildJobService, + ISharedFileService sharedFileService, + IParallelCorpusPreprocessingService parallelCorpusPreprocessingService +) + : PreprocessBuildJob( + platformServices.First(ps => ps.EngineGroup == EngineGroup.WordAlignment), + engines, + dataAccessContext, + logger, + buildJobService, + sharedFileService, + parallelCorpusPreprocessingService + ) +{ + protected override async Task<(int TrainCount, int InferenceCount)> WriteDataFilesAsync( + string buildId, + IReadOnlyList corpora, + string? buildOptions, + CancellationToken cancellationToken + ) + { + JsonObject? buildOptionsObject = null; + if (buildOptions is not null) + buildOptionsObject = JsonSerializer.Deserialize(buildOptions); + + await using StreamWriter sourceTrainWriter = + new(await SharedFileService.OpenWriteAsync($"builds/{buildId}/train.src.txt", cancellationToken)); + await using StreamWriter targetTrainWriter = + new(await SharedFileService.OpenWriteAsync($"builds/{buildId}/train.trg.txt", cancellationToken)); + + await using Stream inferenceStream = await SharedFileService.OpenWriteAsync( + $"builds/{buildId}/word_alignment_inputs.json", + cancellationToken + ); + await using Utf8JsonWriter inferenceWriter = new(inferenceStream, InferenceWriterOptions); + + int trainCount = 0; + int inferenceCount = 0; + inferenceWriter.WriteStartArray(); + await ParallelCorpusPreprocessingService.PreprocessAsync( + corpora, + async row => + { + if (row.SourceSegment.Length > 0 || row.TargetSegment.Length > 0) + { + await sourceTrainWriter.WriteAsync($"{row.SourceSegment}\n"); + await targetTrainWriter.WriteAsync($"{row.TargetSegment}\n"); + } + if (row.SourceSegment.Length > 0 && row.TargetSegment.Length > 0) + trainCount++; + }, + async (row, corpus) => + { + if (row.SourceSegment.Length > 0 && row.TargetSegment.Length > 0) + { + inferenceWriter.WriteStartObject(); + inferenceWriter.WriteString("corpusId", corpus.Id); + inferenceWriter.WriteString("textId", row.TextId); + inferenceWriter.WriteStartArray("refs"); + foreach (object rowRef in row.Refs) + inferenceWriter.WriteStringValue(rowRef.ToString()); + inferenceWriter.WriteEndArray(); + inferenceWriter.WriteString("source", row.SourceSegment); + inferenceWriter.WriteString("target", row.TargetSegment); + inferenceWriter.WriteEndObject(); + inferenceCount++; + } + if (inferenceWriter.BytesPending > 1024 * 1024) + await inferenceWriter.FlushAsync(); + }, + (bool?)buildOptionsObject?["use_key_terms"] ?? true + ); + + inferenceWriter.WriteEndArray(); + + return (trainCount, inferenceCount); + } +} diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/ModelCleanupServiceTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/ModelCleanupServiceTests.cs index 49923372..88797059 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/ModelCleanupServiceTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/ModelCleanupServiceTests.cs @@ -46,7 +46,7 @@ public TestEnvironment() { Id = "engine1", EngineId = "engineId1", - Type = TranslationEngineType.Nmt, + Type = EngineType.Nmt, SourceLanguage = "es", TargetLanguage = "en", BuildRevision = 1, @@ -58,7 +58,7 @@ public TestEnvironment() { Id = "engine2", EngineId = "engineId2", - Type = TranslationEngineType.Nmt, + Type = EngineType.Nmt, SourceLanguage = "es", TargetLanguage = "en", BuildRevision = 2, diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtClearMLBuildJobFactoryTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtClearMLBuildJobFactoryTests.cs index 439b8d7c..f5e5ceaa 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtClearMLBuildJobFactoryTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtClearMLBuildJobFactoryTests.cs @@ -81,7 +81,7 @@ public TestEnvironment() { Id = "engine1", EngineId = "engine1", - Type = TranslationEngineType.Nmt, + Type = EngineType.Nmt, SourceLanguage = "es", TargetLanguage = "en", BuildRevision = 1, diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs index f05a8cb3..de7be478 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/NmtEngineServiceTests.cs @@ -88,7 +88,7 @@ public TestEnvironment() { Id = "engine1", EngineId = "engine1", - Type = TranslationEngineType.Nmt, + Type = EngineType.Nmt, SourceLanguage = "es", TargetLanguage = "en", BuildRevision = 1, @@ -98,6 +98,7 @@ public TestEnvironment() _memoryStorage = new Hangfire.InMemory.InMemoryStorage(); _jobClient = new BackgroundJobClient(_memoryStorage); PlatformService = Substitute.For(); + PlatformService.EngineGroup.Returns(EngineGroup.Translation); _lockFactory = new DistributedReaderWriterLockFactory( new OptionsWrapper(new ServiceOptions { ServiceId = "host" }), new OptionsWrapper(new DistributedReaderWriterLockOptions()), @@ -132,14 +133,14 @@ public TestEnvironment() [ new ClearMLBuildQueue() { - TranslationEngineType = TranslationEngineType.Nmt, + EngineType = EngineType.Nmt.ToString(), ModelType = "huggingface", DockerImage = "default", Queue = "default" }, new ClearMLBuildQueue() { - TranslationEngineType = TranslationEngineType.SmtTransfer, + EngineType = EngineType.SmtTransfer.ToString(), ModelType = "thot", DockerImage = "default", Queue = "default" @@ -147,7 +148,7 @@ public TestEnvironment() ] } ); - BuildJobService = new BuildJobService( + BuildJobService = new BuildJobService( [ new HangfireBuildJobRunner(_jobClient, [new NmtHangfireBuildJobFactory()]), new ClearMLBuildJobRunner( @@ -184,7 +185,7 @@ public TestEnvironment() public IPlatformService PlatformService { get; } public IClearMLService ClearMLService { get; } public ISharedFileService SharedFileService { get; } - public IBuildJobService BuildJobService { get; } + public IBuildJobService BuildJobService { get; } public void StopServer() { @@ -211,7 +212,7 @@ private BackgroundJobServer CreateJobServer() private NmtEngineService CreateService() { return new NmtEngineService( - PlatformService, + new[] { PlatformService }, new MemoryDataAccessContext(), Engines, BuildJobService, @@ -262,7 +263,7 @@ private async Task RunNormalTrainJob() await BuildJobService.StartBuildJobAsync( BuildJobRunnerType.Hangfire, - TranslationEngineType.Nmt, + EngineType.Nmt, "engine1", "build1", BuildStage.Postprocess, @@ -295,7 +296,7 @@ public override object ActivateJob(Type jobType) if (jobType == typeof(NmtPreprocessBuildJob)) { return new NmtPreprocessBuildJob( - _env.PlatformService, + new[] { _env.PlatformService }, _env.Engines, new MemoryDataAccessContext(), Substitute.For>(), @@ -305,16 +306,16 @@ public override object ActivateJob(Type jobType) new ParallelCorpusPreprocessingService(new CorpusService()) ); } - if (jobType == typeof(PostprocessBuildJob)) + if (jobType == typeof(PostprocessBuildJob)) { var buildJobOptions = Substitute.For>(); buildJobOptions.CurrentValue.Returns(new BuildJobOptions()); - return new PostprocessBuildJob( + return new PostprocessBuildJob( _env.PlatformService, _env.Engines, new MemoryDataAccessContext(), _env.BuildJobService, - Substitute.For>(), + Substitute.For>>(), _env.SharedFileService, buildJobOptions ); diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs index 470817cc..23c81750 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs @@ -25,7 +25,7 @@ public async Task RunAsync_FilterOutEverything() public async Task RunAsync_TrainOnAll() { using TestEnvironment env = new(); - ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: null, pretranslateTextIds: []); + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: null, inferenceTextIds: []); await env.RunBuildJobAsync(corpus1); @@ -43,7 +43,7 @@ public async Task RunAsync_TrainOnAll() public async Task RunAsync_TrainOnTextIds() { using TestEnvironment env = new(); - ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: ["textId1"], pretranslateTextIds: []); + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: ["textId1"], inferenceTextIds: []); await env.RunBuildJobAsync(corpus1); @@ -61,7 +61,7 @@ public async Task RunAsync_TrainOnTextIds() public async Task RunAsync_TrainAndPretranslateAll() { using TestEnvironment env = new(); - ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: null, pretranslateTextIds: null); + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: null, inferenceTextIds: null); await env.RunBuildJobAsync(corpus1); @@ -72,7 +72,7 @@ public async Task RunAsync_TrainAndPretranslateAll() public async Task RunAsync_PretranslateAll() { using TestEnvironment env = new(); - ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: [], pretranslateTextIds: null); + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(trainOnTextIds: [], inferenceTextIds: null); await env.RunBuildJobAsync(corpus1); @@ -80,10 +80,10 @@ public async Task RunAsync_PretranslateAll() } [Test] - public async Task RunAsync_PretranslateTextIds() + public async Task RunAsync_InferenceTextIds() { using TestEnvironment env = new(); - ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(pretranslateTextIds: ["textId1"], trainOnTextIds: null); + ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(inferenceTextIds: ["textId1"], trainOnTextIds: null); await env.RunBuildJobAsync(corpus1); @@ -91,11 +91,11 @@ public async Task RunAsync_PretranslateTextIds() } [Test] - public async Task RunAsync_PretranslateTextIdsOverlapWithTrainOnTextIds() + public async Task RunAsync_InferenceTextIdsOverlapWithTrainOnTextIds() { using TestEnvironment env = new(); ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus( - pretranslateTextIds: ["textId1"], + inferenceTextIds: ["textId1"], trainOnTextIds: ["textId1"] ); @@ -144,12 +144,12 @@ public async Task RunAsync_DisableKeyTerms() } [Test] - public async Task RunAsync_PretranslateChapters() + public async Task RunAsync_InferenceChapters() { using TestEnvironment env = new(); ParallelCorpus corpus1 = env.ParatextCorpus( trainOnChapters: [], - pretranslateChapters: new Dictionary> + inferenceChapters: new Dictionary> { { "1CH", @@ -179,7 +179,7 @@ public async Task RunAsync_TrainOnChapters() new HashSet { 1 } } }, - pretranslateChapters: [] + inferenceChapters: [] ); await env.RunBuildJobAsync(corpus1, useKeyTerms: false); @@ -250,7 +250,7 @@ public async Task RunAsync_UnknownLanguageTagsNoDataSmtTransfer() using TestEnvironment env = new(); ParallelCorpus corpus1 = TestEnvironment.TextFileCorpus(sourceLanguage: "xxx", targetLanguage: "zzz"); - await env.RunBuildJobAsync(corpus1, engineId: "engine2", engineType: TranslationEngineType.SmtTransfer); + await env.RunBuildJobAsync(corpus1, engineId: "engine2", engineType: EngineType.SmtTransfer); } [Test] @@ -265,7 +265,7 @@ public async Task RunAsync_RemoveFreestandingEllipses() new HashSet() { 2 } } }, - pretranslateChapters: new Dictionary> + inferenceChapters: new Dictionary> { { "MAT", @@ -295,10 +295,7 @@ public async Task RunAsync_RemoveFreestandingEllipses() public void RunAsync_OnlyParseSelectedBooks_NoBadBooks() { using TestEnvironment env = new(); - ParallelCorpus corpus = env.ParatextCorpus( - trainOnTextIds: new() { "LEV" }, - pretranslateTextIds: new() { "MRK" } - ); + ParallelCorpus corpus = env.ParatextCorpus(trainOnTextIds: new() { "LEV" }, inferenceTextIds: new() { "MRK" }); env.CorpusService = Substitute.For(); env.CorpusService.CreateTextCorpora(Arg.Any>()) @@ -318,10 +315,7 @@ public void RunAsync_OnlyParseSelectedBooks_NoBadBooks() public void RunAsync_OnlyParseSelectedBooks_TrainOnBadBook() { using TestEnvironment env = new(); - ParallelCorpus corpus = env.ParatextCorpus( - trainOnTextIds: new() { "MAT" }, - pretranslateTextIds: new() { "MRK" } - ); + ParallelCorpus corpus = env.ParatextCorpus(trainOnTextIds: new() { "MAT" }, inferenceTextIds: new() { "MRK" }); env.CorpusService = Substitute.For(); env.CorpusService.CreateTextCorpora(Arg.Any>()) .Returns( @@ -340,10 +334,7 @@ public void RunAsync_OnlyParseSelectedBooks_TrainOnBadBook() public void RunAsync_OnlyParseSelectedBooks_PretranslateOnBadBook() { using TestEnvironment env = new(); - ParallelCorpus corpus = env.ParatextCorpus( - trainOnTextIds: new() { "LEV" }, - pretranslateTextIds: new() { "MAT" } - ); + ParallelCorpus corpus = env.ParatextCorpus(trainOnTextIds: new() { "LEV" }, inferenceTextIds: new() { "MAT" }); env.CorpusService = Substitute.For(); env.CorpusService.CreateTextCorpora(Arg.Any>()) .Returns( @@ -385,7 +376,7 @@ public async Task ParallelCorpusAsync() new() { } } }, - PretranslateChapters = new() + InferenceChapters = new() { { "1CH", @@ -409,7 +400,7 @@ public async Task ParallelCorpusAsync() new() { } } }, - PretranslateChapters = new() + InferenceChapters = new() { { "1CH", @@ -535,7 +526,7 @@ public async Task ParallelCorpusAsync_UseKeyTerms() new() { } } }, - PretranslateChapters = new() + InferenceChapters = new() { { "1CH", @@ -559,7 +550,7 @@ public async Task ParallelCorpusAsync_UseKeyTerms() new() { } } }, - PretranslateChapters = new() { } + InferenceChapters = new() { } }, }, TargetCorpora = new List() @@ -667,7 +658,7 @@ public async Task ParallelCorpusAsync_UseKeyTerms_TextIds() Language = "en", Files = new List { env.ParatextFile("pt-source1") }, TrainOnTextIds = ["MAT", "LEV"], - PretranslateTextIds = ["1CH"] + InferenceTextIds = ["1CH"] }, new() { @@ -675,7 +666,7 @@ public async Task ParallelCorpusAsync_UseKeyTerms_TextIds() Language = "en", Files = new List { env.ParatextFile("pt-source2") }, TrainOnTextIds = ["MAT", "MRK"], - PretranslateTextIds = [] + InferenceTextIds = [] }, }, TargetCorpora = new List() @@ -767,7 +758,7 @@ private class TestEnvironment : DisposableBase public MemoryRepository Engines { get; } public MemoryRepository TrainSegmentPairs { get; } public IDistributedReaderWriterLockFactory LockFactory { get; } - public IBuildJobService BuildJobService { get; } + public IBuildJobService BuildJobService { get; } public IClearMLService ClearMLService { get; } public IOptionsMonitor BuildJobOptions { get; } @@ -799,7 +790,7 @@ public TestEnvironment() Language = "es", Files = [TextFile("source1")], TrainOnTextIds = [], - PretranslateTextIds = [] + InferenceTextIds = [] } }, TargetCorpora = new List() @@ -826,8 +817,8 @@ public TestEnvironment() Files = [TextFile("source1"), TextFile("source2")], TrainOnTextIds = null, TrainOnChapters = null, - PretranslateTextIds = null, - PretranslateChapters = null, + InferenceTextIds = null, + InferenceChapters = null, } }, TargetCorpora = new List() @@ -854,7 +845,7 @@ public TestEnvironment() Language = "es", Files = [ParatextFile("pt-source1")], TrainOnTextIds = null, - PretranslateTextIds = null + InferenceTextIds = null } }, TargetCorpora = new List() @@ -880,7 +871,7 @@ public TestEnvironment() Language = "es", Files = [ParatextFile("pt-source1")], TrainOnTextIds = null, - PretranslateTextIds = null + InferenceTextIds = null }, new() { @@ -888,7 +879,7 @@ public TestEnvironment() Language = "es", Files = [ParatextFile("pt-source2")], TrainOnTextIds = null, - PretranslateTextIds = null + InferenceTextIds = null } }, TargetCorpora = new List() @@ -909,7 +900,7 @@ public TestEnvironment() { Id = "engine1", EngineId = "engine1", - Type = TranslationEngineType.Nmt, + Type = EngineType.Nmt, SourceLanguage = "es", TargetLanguage = "en", BuildRevision = 1, @@ -929,7 +920,7 @@ public TestEnvironment() { Id = "engine2", EngineId = "engine2", - Type = TranslationEngineType.Nmt, + Type = EngineType.Nmt, SourceLanguage = "xxx", TargetLanguage = "zzz", BuildRevision = 1, @@ -949,7 +940,7 @@ public TestEnvironment() { Id = "engine2", EngineId = "engine2", - Type = TranslationEngineType.Nmt, + Type = EngineType.Nmt, SourceLanguage = "xxx", TargetLanguage = "zzz", BuildRevision = 1, @@ -967,6 +958,7 @@ public TestEnvironment() TrainSegmentPairs = new MemoryRepository(); CorpusService = new CorpusService(); PlatformService = Substitute.For(); + PlatformService.EngineGroup.Returns(EngineGroup.Translation); LockFactory = new DistributedReaderWriterLockFactory( new OptionsWrapper(new ServiceOptions { ServiceId = "host" }), new OptionsWrapper(new DistributedReaderWriterLockOptions()), @@ -981,14 +973,14 @@ public TestEnvironment() [ new ClearMLBuildQueue() { - TranslationEngineType = TranslationEngineType.Nmt, + EngineType = EngineType.Nmt.ToString(), ModelType = "huggingface", DockerImage = "default", Queue = "default" }, new ClearMLBuildQueue() { - TranslationEngineType = TranslationEngineType.SmtTransfer, + EngineType = EngineType.SmtTransfer.ToString(), ModelType = "thot", DockerImage = "default", Queue = "default" @@ -1016,7 +1008,7 @@ public TestEnvironment() ) .Returns(Task.FromResult("job1")); SharedFileService = new SharedFileService(Substitute.For()); - BuildJobService = new BuildJobService( + BuildJobService = new BuildJobService( [ new HangfireBuildJobRunner( Substitute.For(), @@ -1038,14 +1030,14 @@ [new NmtHangfireBuildJobFactory()] ); } - public PreprocessBuildJob GetBuildJob(TranslationEngineType engineType) + public PreprocessBuildJob GetBuildJob(EngineType engineType) { switch (engineType) { - case TranslationEngineType.Nmt: + case EngineType.Nmt: { return new NmtPreprocessBuildJob( - PlatformService, + new[] { PlatformService }, Engines, new MemoryDataAccessContext(), Substitute.For>(), @@ -1055,13 +1047,13 @@ public PreprocessBuildJob GetBuildJob(TranslationEngineType engineType) new ParallelCorpusPreprocessingService(CorpusService) ); } - case TranslationEngineType.SmtTransfer: + case EngineType.SmtTransfer: { return new SmtTransferPreprocessBuildJob( - PlatformService, + new[] { PlatformService }, Engines, new MemoryDataAccessContext(), - Substitute.For>(), + Substitute.For>(), BuildJobService, SharedFileService, LockFactory, @@ -1075,10 +1067,7 @@ public PreprocessBuildJob GetBuildJob(TranslationEngineType engineType) ; } - public static ParallelCorpus TextFileCorpus( - HashSet? trainOnTextIds, - HashSet? pretranslateTextIds - ) + public static ParallelCorpus TextFileCorpus(HashSet? trainOnTextIds, HashSet? inferenceTextIds) { return new() { @@ -1091,7 +1080,7 @@ public static ParallelCorpus TextFileCorpus( Language = "es", Files = [TextFile("source1")], TrainOnTextIds = trainOnTextIds, - PretranslateTextIds = pretranslateTextIds + InferenceTextIds = inferenceTextIds } }, TargetCorpora = new List() @@ -1120,7 +1109,7 @@ public static ParallelCorpus TextFileCorpus(string sourceLanguage, string target Language = sourceLanguage, Files = [TextFile("source1")], TrainOnTextIds = [], - PretranslateTextIds = [] + InferenceTextIds = [] } }, TargetCorpora = new List() @@ -1138,7 +1127,7 @@ public static ParallelCorpus TextFileCorpus(string sourceLanguage, string target public ParallelCorpus ParatextCorpus( Dictionary>? trainOnChapters, - Dictionary>? pretranslateChapters + Dictionary>? inferenceChapters ) { return new() @@ -1152,7 +1141,7 @@ public ParallelCorpus ParatextCorpus( Language = "es", Files = [ParatextFile("pt-source1")], TrainOnChapters = trainOnChapters, - PretranslateChapters = pretranslateChapters + InferenceChapters = inferenceChapters } }, TargetCorpora = new List() @@ -1168,7 +1157,7 @@ public ParallelCorpus ParatextCorpus( }; } - public ParallelCorpus ParatextCorpus(HashSet? trainOnTextIds, HashSet? pretranslateTextIds) + public ParallelCorpus ParatextCorpus(HashSet? trainOnTextIds, HashSet? inferenceTextIds) { return new() { @@ -1181,7 +1170,7 @@ public ParallelCorpus ParatextCorpus(HashSet? trainOnTextIds, HashSet() @@ -1201,7 +1190,7 @@ public Task RunBuildJobAsync( ParallelCorpus corpus, bool useKeyTerms = true, string engineId = "engine1", - TranslationEngineType engineType = TranslationEngineType.Nmt + EngineType engineType = EngineType.Nmt ) { return RunBuildJobAsync([corpus], useKeyTerms, engineId, engineType); @@ -1211,7 +1200,7 @@ public Task RunBuildJobAsync( IEnumerable corpora, bool useKeyTerms = true, string engineId = "engine1", - TranslationEngineType engineType = TranslationEngineType.Nmt + EngineType engineType = EngineType.Nmt ) { return GetBuildJob(engineType) diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/ServalPlatformOutboxMessageHandlerTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/ServalPlatformOutboxMessageHandlerTests.cs index 3bc63f98..eedd29ab 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/ServalPlatformOutboxMessageHandlerTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/ServalPlatformOutboxMessageHandlerTests.cs @@ -15,7 +15,7 @@ public async Task HandleMessageAsync_BuildStarted() TestEnvironment env = new(); await env.Handler.HandleMessageAsync( - ServalPlatformOutboxConstants.BuildStarted, + ServalTranslationPlatformOutboxConstants.BuildStarted, JsonSerializer.Serialize(new BuildStartedRequest { BuildId = "C" }), null ); @@ -24,7 +24,7 @@ await env.Handler.HandleMessageAsync( } [Test] - public async Task HandleMessageAsync_InsertPretranslations() + public async Task HandleMessageAsync_InsertInferences() { TestEnvironment env = new(); await using (MemoryStream stream = new()) @@ -45,16 +45,16 @@ await JsonSerializer.SerializeAsync( ); stream.Seek(0, SeekOrigin.Begin); await env.Handler.HandleMessageAsync( - ServalPlatformOutboxConstants.InsertPretranslations, + ServalTranslationPlatformOutboxConstants.InsertInferences, "engine1", stream ); } - _ = env.Client.Received(1).InsertPretranslations(); + _ = env.Client.Received(1).InsertInferences(); _ = env.PretranslationWriter.Received(1) .WriteAsync( - new InsertPretranslationsRequest + new InsertInferencesRequest { EngineId = "engine1", CorpusId = "corpus1", @@ -76,11 +76,11 @@ public TestEnvironment() Client.BuildFaultedAsync(Arg.Any()).Returns(CreateEmptyUnaryCall()); Client.BuildCompletedAsync(Arg.Any()).Returns(CreateEmptyUnaryCall()); Client - .IncrementTranslationEngineCorpusSizeAsync(Arg.Any()) + .IncrementTrainEngineCorpusSizeAsync(Arg.Any()) .Returns(CreateEmptyUnaryCall()); - PretranslationWriter = Substitute.For>(); + PretranslationWriter = Substitute.For>(); Client - .InsertPretranslations(cancellationToken: Arg.Any()) + .InsertInferences(cancellationToken: Arg.Any()) .Returns( TestCalls.AsyncClientStreamingCall( PretranslationWriter, @@ -92,12 +92,12 @@ public TestEnvironment() ) ); - Handler = new ServalPlatformOutboxMessageHandler(Client); + Handler = new ServalTranslationPlatformOutboxMessageHandler(Client); } public TranslationPlatformApi.TranslationPlatformApiClient Client { get; } - public ServalPlatformOutboxMessageHandler Handler { get; } - public IClientStreamWriter PretranslationWriter { get; } + public ServalTranslationPlatformOutboxMessageHandler Handler { get; } + public IClientStreamWriter PretranslationWriter { get; } private static AsyncUnaryCall CreateEmptyUnaryCall() { diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs index 17c89ed4..70a8859a 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/SmtTransferEngineServiceTests.cs @@ -51,7 +51,7 @@ await env.Service.StartBuildAsync( Language = "es", Files = [], TrainOnTextIds = null, - PretranslateTextIds = null + InferenceTextIds = null } }, TargetCorpora = new List() @@ -238,7 +238,7 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp { Id = EngineId1, EngineId = EngineId1, - Type = TranslationEngineType.SmtTransfer, + Type = EngineType.SmtTransfer, SourceLanguage = "es", TargetLanguage = "en", BuildRevision = 1, @@ -249,6 +249,7 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp _memoryStorage = new Hangfire.InMemory.InMemoryStorage(); _jobClient = new BackgroundJobClient(_memoryStorage); PlatformService = Substitute.For(); + PlatformService.EngineGroup.Returns(EngineGroup.Translation); SmtModel = Substitute.For(); SmtBatchTrainer = Substitute.For(); SmtBatchTrainer.Stats.Returns( @@ -277,14 +278,14 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp [ new ClearMLBuildQueue() { - TranslationEngineType = TranslationEngineType.Nmt, + EngineType = EngineType.Nmt.ToString().ToString(), ModelType = "huggingface", DockerImage = "default", Queue = "default" }, new ClearMLBuildQueue() { - TranslationEngineType = TranslationEngineType.SmtTransfer, + EngineType = EngineType.SmtTransfer.ToString(), ModelType = "thot", DockerImage = "default", Queue = "default" @@ -319,7 +320,7 @@ public TestEnvironment(BuildJobRunnerType trainJobRunnerType = BuildJobRunnerTyp buildJobOptions, Substitute.For>() ); - BuildJobService = new BuildJobService( + BuildJobService = new BuildJobService( [ new HangfireBuildJobRunner(_jobClient, [new SmtTransferHangfireBuildJobFactory()]), new ClearMLBuildJobRunner( @@ -352,7 +353,7 @@ [new SmtTransferClearMLBuildJobFactory(SharedFileService, Engines)], public ISharedFileService SharedFileService { get; } - public IBuildJobService BuildJobService { get; } + public IBuildJobService BuildJobService { get; } public async Task CommitAsync(TimeSpan inactiveTimeout) { @@ -420,7 +421,7 @@ private SmtTransferEngineService CreateService() { return new SmtTransferEngineService( _lockFactory, - PlatformService, + new[] { PlatformService }, new MemoryDataAccessContext(), Engines, TrainSegmentPairs, @@ -659,7 +660,7 @@ private async Task RunTrainJob() await BuildJobService.StartBuildJobAsync( BuildJobRunnerType.Hangfire, - TranslationEngineType.SmtTransfer, + EngineType.SmtTransfer, EngineId1, BuildId1, BuildStage.Postprocess, @@ -681,10 +682,10 @@ public override object ActivateJob(Type jobType) if (jobType == typeof(SmtTransferPreprocessBuildJob)) { return new SmtTransferPreprocessBuildJob( - _env.PlatformService, + new[] { _env.PlatformService }, _env.Engines, new MemoryDataAccessContext(), - Substitute.For>(), + Substitute.For>(), _env.BuildJobService, _env.SharedFileService, _env._lockFactory, @@ -702,7 +703,7 @@ public override object ActivateJob(Type jobType) var buildJobOptions = Substitute.For>(); buildJobOptions.CurrentValue.Returns(new BuildJobOptions()); return new SmtTransferPostprocessBuildJob( - _env.PlatformService, + new[] { _env.PlatformService }, _env.Engines, new MemoryDataAccessContext(), _env.BuildJobService, @@ -719,7 +720,7 @@ public override object ActivateJob(Type jobType) if (jobType == typeof(SmtTransferTrainBuildJob)) { return new SmtTransferTrainBuildJob( - _env.PlatformService, + new[] { _env.PlatformService }, _env.Engines, new MemoryDataAccessContext(), _env.BuildJobService, diff --git a/src/Serval/src/Serval.ApiServer/Serval.ApiServer.csproj b/src/Serval/src/Serval.ApiServer/Serval.ApiServer.csproj index 6ae87abd..032976f8 100644 --- a/src/Serval/src/Serval.ApiServer/Serval.ApiServer.csproj +++ b/src/Serval/src/Serval.ApiServer/Serval.ApiServer.csproj @@ -46,6 +46,7 @@ + diff --git a/src/Serval/src/Serval.ApiServer/Startup.cs b/src/Serval/src/Serval.ApiServer/Startup.cs index 2f6e7549..759fd27f 100644 --- a/src/Serval/src/Serval.ApiServer/Startup.cs +++ b/src/Serval/src/Serval.ApiServer/Startup.cs @@ -79,10 +79,12 @@ public void ConfigureServices(IServiceCollection services) .AddMongoDataAccess(cfg => { cfg.AddTranslationRepositories(); + cfg.AddWordAlignmentRepositories(); cfg.AddDataFilesRepositories(); cfg.AddWebhooksRepositories(); }) .AddTranslation() + .AddWordAlignment() .AddDataFiles() .AddWebhooks(); services.AddTransient(); @@ -110,6 +112,7 @@ public void ConfigureServices(IServiceCollection services) services.AddMediator(cfg => { cfg.AddTranslationConsumers(); + cfg.AddWordAlignmentConsumers(); cfg.AddDataFilesConsumers(); cfg.AddWebhooksConsumers(); }); @@ -222,6 +225,7 @@ public void Configure(IApplicationBuilder app, IWebHostEnvironment env) { x.MapControllers(); x.MapServalTranslationServices(); + x.MapServalWordAlignmentServices(); x.MapHangfireDashboard(); }); diff --git a/src/Serval/src/Serval.ApiServer/appsettings.Development.json b/src/Serval/src/Serval.ApiServer/appsettings.Development.json index 0910ceed..218c579b 100644 --- a/src/Serval/src/Serval.ApiServer/appsettings.Development.json +++ b/src/Serval/src/Serval.ApiServer/appsettings.Development.json @@ -25,6 +25,18 @@ } ] }, + "WordAlignment": { + "Engines": [ + { + "Type": "EchoWordAlignment", + "Address": "http://localhost:8055" + }, + { + "Type": "Statistical", + "Address": "http://localhost:9000" + } + ] + }, "Logging": { "LogLevel": { "Default": "Information", diff --git a/src/Serval/src/Serval.Client/Client.g.cs b/src/Serval/src/Serval.Client/Client.g.cs index 91631ede..0f18973d 100644 --- a/src/Serval/src/Serval.Client/Client.g.cs +++ b/src/Serval/src/Serval.Client/Client.g.cs @@ -2494,6 +2494,7 @@ public partial interface ITranslationEnginesClient ///
///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. ///
See [nmt job settings documentation](https://github.com/sillsdev/serval/wiki/NMT-Build-Options) about configuring job parameters. + ///
See [smt-transfer job settings documentation](https://github.com/sillsdev/serval/wiki/SMT-Transfer-Build-Options) about configuring job parameters. ///
See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. ///
///
When using a parallel corpus: @@ -5289,6 +5290,7 @@ public string BaseUrl ///
///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. ///
See [nmt job settings documentation](https://github.com/sillsdev/serval/wiki/NMT-Build-Options) about configuring job parameters. + ///
See [smt-transfer job settings documentation](https://github.com/sillsdev/serval/wiki/SMT-Transfer-Build-Options) about configuring job parameters. ///
See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. ///
///
When using a parallel corpus: @@ -6991,99 +6993,3144 @@ private string ConvertToString(object? value, System.Globalization.CultureInfo c } } + [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial interface IWordAlignmentEnginesClient + { + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all word alignment engines + /// + /// The engines + /// A server side error occurred. + System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Create a new word alignment engine + /// + /// + /// ## Parameters + ///
* **name**: (optional) A name to help identify and distinguish the file. + ///
* Recommendation: Create a multi-part name to distinguish between projects, uses, etc. + ///
* The name does not have to be unique, as the engine is uniquely identified by the auto-generated id + ///
* **sourceLanguage**: The source language code (a valid [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag) is recommended) + ///
* **targetLanguage**: The target language code (a valid IETF language tag is recommended) + ///
* **type**: **statistical** or **echo-word-alignment** + ///
### statistical + ///
The Statistical engine is based off of the [Thot library](https://github.com/sillsdev/thot) and contains IBM-1, IBM-2, IBM-3, IBM-4, FastAlign and HMM algorithms. + ///
### echo-word-alignment + ///
The echo-word-alignment engine has full coverage of all endpoints. Endpoints like create and build return empty responses. + ///
Endpoints like get-word-alignment echo the sent content back to the user in the proper format. This engine is useful for debugging and testing purposes. + ///
## Sample request: + ///
+ ///
{ + ///
"name": "myTeam:myProject:myEngine", + ///
"sourceLanguage": "el", + ///
"targetLanguage": "en", + ///
"type": "statistical" + ///
} + ///
+ /// The engine configuration (see above) + /// The new engine + /// A server side error occurred. + System.Threading.Tasks.Task CreateAsync(WordAlignmentEngineConfig engineConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get a word alignment engine by unique id + /// + /// The engine id + /// The engine + /// A server side error occurred. + System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Delete a word alignment engine + /// + /// The engine id + /// The engine was successfully deleted. + /// A server side error occurred. + System.Threading.Tasks.Task DeleteAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Align words between a source and target segment + /// + /// The engine id + /// The source and target segment + /// The word alignment result + /// A server side error occurred. + System.Threading.Tasks.Task GetWordAlignmentAsync(string id, WordAlignmentRequest wordAlignmentRequest, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Add a parallel corpus to an engine + /// + /// + /// ## Parameters + ///
* **SourceCorpusIds**: The source corpora associated with the parallel corpus + ///
* **TargetCorpusIds**: The target corpora associated with the parallel corpus + ///
+ /// The engine id + /// The corpus configuration (see remarks) + /// The added corpus + /// A server side error occurred. + System.Threading.Tasks.Task AddParallelCorpusAsync(string id, WordAlignmentParallelCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all parallel corpora for a engine + /// + /// The engine id + /// The parallel corpora + /// A server side error occurred. + System.Threading.Tasks.Task> GetAllParallelCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Update a parallel corpus with a new set of corpora + /// + /// + /// Will completely replace the parallel corpus' file associations. Will not affect jobs already queued or running. Will not affect existing word graphs until new build is complete. + /// + /// The engine id + /// The parallel corpus id + /// The corpus configuration + /// The corpus was updated successfully + /// A server side error occurred. + System.Threading.Tasks.Task UpdateParallelCorpusAsync(string id, string parallelCorpusId, WordAlignmentParallelCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get the configuration of a parallel corpus for a engine + /// + /// The engine id + /// The parallel corpus id + /// The parallel corpus configuration + /// A server side error occurred. + System.Threading.Tasks.Task GetParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Remove a parallel corpus from a engine + /// + /// + /// Removing a parallel corpus will remove all word alignments associated with that corpus. + /// + /// The engine id + /// The parallel corpus id + /// The parallel corpus was deleted successfully. + /// A server side error occurred. + System.Threading.Tasks.Task DeleteParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all word alignments in a corpus of a engine + /// + /// + /// Word alignments are arranged in a list of dictionaries with the following fields per word alignment: + ///
* **TextId**: The TextId of the SourceFile defined when the corpus was created. + ///
* **Refs** (a list of strings): A list of references including: + ///
* The references defined in the SourceFile per line, if any. + ///
* An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed. + ///
* **SourceTokens**: the tokenized source segment + ///
* **TargetTokens**: the tokenized target segment + ///
* **Confidences**: the confidence of the alignment ona scale from 0 to 1 + ///
* **Alignment**: the word alignment, 0 indexed for source and target positions + ///
+ ///
Word alignments can be filtered by text id if provided. + ///
Only word alignments for the most recent successful build of the engine are returned. + ///
+ /// The engine id + /// The corpus id + /// The text id (optional) + /// The word alignments + /// A server side error occurred. + System.Threading.Tasks.Task> GetAllWordAlignmentsAsync(string id, string corpusId, string? textId = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all build jobs for a engine + /// + /// The engine id + /// The build jobs + /// A server side error occurred. + System.Threading.Tasks.Task> GetAllBuildsAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Starts a build job for a engine. + /// + /// + /// Specify the corpora and textIds to train on. If no "trainOn" field is provided, all corpora will be used. + ///
Paratext Projects, you may flag a subset of books for training by including their [abbreviations] + ///
Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. + ///
Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) + ///
All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + ///
+ ///
Specify the corpora or textIds to word align on. + ///
When a corpus or textId is selected for word align on, only text segments that are in both the source and the target will be aligned. + ///
+ ///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. + ///
See [statistical alignment job settings documentation](https://github.com/sillsdev/serval/wiki/Statistical-Alignment-Build-Options) about configuring job parameters. + ///
See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. + ///
+ /// The engine id + /// The build config (see remarks) + /// The new build job + /// A server side error occurred. + System.Threading.Tasks.Task StartBuildAsync(string id, WordAlignmentBuildConfig buildConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get a build job + /// + /// + /// If the `minRevision` is not defined, the current build, at whatever state it is, + ///
will be immediately returned. If `minRevision` is defined, Serval will wait for + ///
up to 40 seconds for the engine to build to the `minRevision` specified, else + ///
will timeout. + ///
A use case is to actively query the state of the current build, where the subsequent + ///
request sets the `minRevision` to the returned `revision` + 1 and timeouts are handled gracefully. + ///
This method should use request throttling. + ///
Note: Within the returned build, percentCompleted is a value between 0 and 1. + ///
+ /// The engine id + /// The build job id + /// The minimum revision + /// The build job + /// A server side error occurred. + System.Threading.Tasks.Task GetBuildAsync(string id, string buildId, long? minRevision = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get the currently running build job for a engine + /// + /// + /// See documentation on endpoint /word-alignment/engines/{id}/builds/{id} - "Get a Build Job" for details on using `minRevision`. + /// + /// The engine id + /// The minimum revision + /// The build job + /// A server side error occurred. + System.Threading.Tasks.Task GetCurrentBuildAsync(string id, long? minRevision = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Cancel the current build job (whether pending or active) for a engine + /// + /// The engine id + /// The build job was cancelled successfully. + /// A server side error occurred. + System.Threading.Tasks.Task CancelBuildAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + } + + [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class WordAlignmentEnginesClient : IWordAlignmentEnginesClient + { + #pragma warning disable 8618 + private string _baseUrl; + #pragma warning restore 8618 + + private System.Net.Http.HttpClient _httpClient; + private static System.Lazy _settings = new System.Lazy(CreateSerializerSettings, true); + private Newtonsoft.Json.JsonSerializerSettings _instanceSettings; + + #pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + public WordAlignmentEnginesClient(System.Net.Http.HttpClient httpClient) + #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + { + BaseUrl = "/api/v1"; + _httpClient = httpClient; + Initialize(); + } + + private static Newtonsoft.Json.JsonSerializerSettings CreateSerializerSettings() + { + var settings = new Newtonsoft.Json.JsonSerializerSettings(); + UpdateJsonSerializerSettings(settings); + return settings; + } + + public string BaseUrl + { + get { return _baseUrl; } + set + { + _baseUrl = value; + if (!string.IsNullOrEmpty(_baseUrl) && !_baseUrl.EndsWith("/")) + _baseUrl += '/'; + } + } + + protected Newtonsoft.Json.JsonSerializerSettings JsonSerializerSettings { get { return _instanceSettings ?? _settings.Value; } } + + static partial void UpdateJsonSerializerSettings(Newtonsoft.Json.JsonSerializerSettings settings); + + partial void Initialize(); + + partial void PrepareRequest(System.Net.Http.HttpClient client, System.Net.Http.HttpRequestMessage request, string url); + partial void PrepareRequest(System.Net.Http.HttpClient client, System.Net.Http.HttpRequestMessage request, System.Text.StringBuilder urlBuilder); + partial void ProcessResponse(System.Net.Http.HttpClient client, System.Net.Http.HttpResponseMessage response); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all word alignment engines + /// + /// The engines + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task> GetAllAsync(System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines" + urlBuilder_.Append("word-alignment/engines"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Create a new word alignment engine + /// + /// + /// ## Parameters + ///
* **name**: (optional) A name to help identify and distinguish the file. + ///
* Recommendation: Create a multi-part name to distinguish between projects, uses, etc. + ///
* The name does not have to be unique, as the engine is uniquely identified by the auto-generated id + ///
* **sourceLanguage**: The source language code (a valid [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag) is recommended) + ///
* **targetLanguage**: The target language code (a valid IETF language tag is recommended) + ///
* **type**: **statistical** or **echo-word-alignment** + ///
### statistical + ///
The Statistical engine is based off of the [Thot library](https://github.com/sillsdev/thot) and contains IBM-1, IBM-2, IBM-3, IBM-4, FastAlign and HMM algorithms. + ///
### echo-word-alignment + ///
The echo-word-alignment engine has full coverage of all endpoints. Endpoints like create and build return empty responses. + ///
Endpoints like get-word-alignment echo the sent content back to the user in the proper format. This engine is useful for debugging and testing purposes. + ///
## Sample request: + ///
+ ///
{ + ///
"name": "myTeam:myProject:myEngine", + ///
"sourceLanguage": "el", + ///
"targetLanguage": "en", + ///
"type": "statistical" + ///
} + ///
+ /// The engine configuration (see above) + /// The new engine + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task CreateAsync(WordAlignmentEngineConfig engineConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (engineConfig == null) + throw new System.ArgumentNullException("engineConfig"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(engineConfig, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines" + urlBuilder_.Append("word-alignment/engines"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 201) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request. Is the engine type correct?", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get a word alignment engine by unique id + /// + /// The engine id + /// The engine + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task GetAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Delete a word alignment engine + /// + /// The engine id + /// The engine was successfully deleted. + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task DeleteAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("DELETE"); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + return; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist and therefore cannot be deleted.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Align words between a source and target segment + /// + /// The engine id + /// The source and target segment + /// The word alignment result + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task GetWordAlignmentAsync(string id, WordAlignmentRequest wordAlignmentRequest, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (wordAlignmentRequest == null) + throw new System.ArgumentNullException("wordAlignmentRequest"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(wordAlignmentRequest, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/get-word-alignment" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/get-word-alignment"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 405) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The method is not supported.", status_, responseText_, headers_, null); + } + else + if (status_ == 409) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine needs to be built before it can alignment segments.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Add a parallel corpus to an engine + /// + /// + /// ## Parameters + ///
* **SourceCorpusIds**: The source corpora associated with the parallel corpus + ///
* **TargetCorpusIds**: The target corpora associated with the parallel corpus + ///
+ /// The engine id + /// The corpus configuration (see remarks) + /// The added corpus + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task AddParallelCorpusAsync(string id, WordAlignmentParallelCorpusConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (corpusConfig == null) + throw new System.ArgumentNullException("corpusConfig"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(corpusConfig, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/parallel-corpora" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/parallel-corpora"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 201) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all parallel corpora for a engine + /// + /// The engine id + /// The parallel corpora + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task> GetAllParallelCorporaAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/parallel-corpora" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/parallel-corpora"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the engine", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details. ", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Update a parallel corpus with a new set of corpora + /// + /// + /// Will completely replace the parallel corpus' file associations. Will not affect jobs already queued or running. Will not affect existing word graphs until new build is complete. + /// + /// The engine id + /// The parallel corpus id + /// The corpus configuration + /// The corpus was updated successfully + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task UpdateParallelCorpusAsync(string id, string parallelCorpusId, WordAlignmentParallelCorpusUpdateConfig corpusConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (parallelCorpusId == null) + throw new System.ArgumentNullException("parallelCorpusId"); + + if (corpusConfig == null) + throw new System.ArgumentNullException("corpusConfig"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(corpusConfig, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("PATCH"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/parallel-corpora/{parallelCorpusId}" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/parallel-corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(parallelCorpusId, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get the configuration of a parallel corpus for a engine + /// + /// The engine id + /// The parallel corpus id + /// The parallel corpus configuration + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task GetParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (parallelCorpusId == null) + throw new System.ArgumentNullException("parallelCorpusId"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/parallel-corpora/{parallelCorpusId}" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/parallel-corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(parallelCorpusId, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine or parallel corpus does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Remove a parallel corpus from a engine + /// + /// + /// Removing a parallel corpus will remove all word alignments associated with that corpus. + /// + /// The engine id + /// The parallel corpus id + /// The parallel corpus was deleted successfully. + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task DeleteParallelCorpusAsync(string id, string parallelCorpusId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (parallelCorpusId == null) + throw new System.ArgumentNullException("parallelCorpusId"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("DELETE"); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/parallel-corpora/{parallelCorpusId}" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/parallel-corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(parallelCorpusId, System.Globalization.CultureInfo.InvariantCulture))); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + return; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine or parallel corpus does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all word alignments in a corpus of a engine + /// + /// + /// Word alignments are arranged in a list of dictionaries with the following fields per word alignment: + ///
* **TextId**: The TextId of the SourceFile defined when the corpus was created. + ///
* **Refs** (a list of strings): A list of references including: + ///
* The references defined in the SourceFile per line, if any. + ///
* An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed. + ///
* **SourceTokens**: the tokenized source segment + ///
* **TargetTokens**: the tokenized target segment + ///
* **Confidences**: the confidence of the alignment ona scale from 0 to 1 + ///
* **Alignment**: the word alignment, 0 indexed for source and target positions + ///
+ ///
Word alignments can be filtered by text id if provided. + ///
Only word alignments for the most recent successful build of the engine are returned. + ///
+ /// The engine id + /// The corpus id + /// The text id (optional) + /// The word alignments + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task> GetAllWordAlignmentsAsync(string id, string corpusId, string? textId = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (corpusId == null) + throw new System.ArgumentNullException("corpusId"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/corpora/{corpusId}/word-alignments" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/corpora/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(corpusId, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/word-alignments"); + urlBuilder_.Append('?'); + if (textId != null) + { + urlBuilder_.Append(System.Uri.EscapeDataString("textId")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(textId, System.Globalization.CultureInfo.InvariantCulture))).Append('&'); + } + urlBuilder_.Length--; + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine or corpus does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 409) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine needs to be built first.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get all build jobs for a engine + /// + /// The engine id + /// The build jobs + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task> GetAllBuildsAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/builds" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/builds"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync>(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation or does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Starts a build job for a engine. + /// + /// + /// Specify the corpora and textIds to train on. If no "trainOn" field is provided, all corpora will be used. + ///
Paratext Projects, you may flag a subset of books for training by including their [abbreviations] + ///
Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. + ///
Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) + ///
All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + ///
+ ///
Specify the corpora or textIds to word align on. + ///
When a corpus or textId is selected for word align on, only text segments that are in both the source and the target will be aligned. + ///
+ ///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. + ///
See [statistical alignment job settings documentation](https://github.com/sillsdev/serval/wiki/Statistical-Alignment-Build-Options) about configuring job parameters. + ///
See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. + ///
+ /// The engine id + /// The build config (see remarks) + /// The new build job + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task StartBuildAsync(string id, WordAlignmentBuildConfig buildConfig, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (buildConfig == null) + throw new System.ArgumentNullException("buildConfig"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + var json_ = Newtonsoft.Json.JsonConvert.SerializeObject(buildConfig, JsonSerializerSettings); + var content_ = new System.Net.Http.StringContent(json_); + content_.Headers.ContentType = System.Net.Http.Headers.MediaTypeHeaderValue.Parse("application/json"); + request_.Content = content_; + request_.Method = new System.Net.Http.HttpMethod("POST"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/builds" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/builds"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 201) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The build configuration was invalid.", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 409) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("There is already an active or pending build or a build in the process of being canceled.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get a build job + /// + /// + /// If the `minRevision` is not defined, the current build, at whatever state it is, + ///
will be immediately returned. If `minRevision` is defined, Serval will wait for + ///
up to 40 seconds for the engine to build to the `minRevision` specified, else + ///
will timeout. + ///
A use case is to actively query the state of the current build, where the subsequent + ///
request sets the `minRevision` to the returned `revision` + 1 and timeouts are handled gracefully. + ///
This method should use request throttling. + ///
Note: Within the returned build, percentCompleted is a value between 0 and 1. + ///
+ /// The engine id + /// The build job id + /// The minimum revision + /// The build job + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task GetBuildAsync(string id, string buildId, long? minRevision = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + if (buildId == null) + throw new System.ArgumentNullException("buildId"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/builds/{buildId}" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/builds/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(buildId, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append('?'); + if (minRevision != null) + { + urlBuilder_.Append(System.Uri.EscapeDataString("minRevision")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(minRevision, System.Globalization.CultureInfo.InvariantCulture))).Append('&'); + } + urlBuilder_.Length--; + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine or build does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 408) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The long polling request timed out. This is expected behavior if you\'re using long-polling with the minRevision strategy specified in the docs.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get the currently running build job for a engine + /// + /// + /// See documentation on endpoint /word-alignment/engines/{id}/builds/{id} - "Get a Build Job" for details on using `minRevision`. + /// + /// The engine id + /// The minimum revision + /// The build job + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task GetCurrentBuildAsync(string id, long? minRevision = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/current-build" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/current-build"); + urlBuilder_.Append('?'); + if (minRevision != null) + { + urlBuilder_.Append(System.Uri.EscapeDataString("minRevision")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(minRevision, System.Globalization.CultureInfo.InvariantCulture))).Append('&'); + } + urlBuilder_.Length--; + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 204) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("There is no build currently running.", status_, responseText_, headers_, null); + } + else + if (status_ == 400) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("Bad request", status_, responseText_, headers_, null); + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 408) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The long polling request timed out. This is expected behavior if you\'re using long-polling with the minRevision strategy specified in the docs.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Cancel the current build job (whether pending or active) for a engine + /// + /// The engine id + /// The build job was cancelled successfully. + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task CancelBuildAsync(string id, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (id == null) + throw new System.ArgumentNullException("id"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Content = new System.Net.Http.StringContent(string.Empty, System.Text.Encoding.UTF8, "application/json"); + request_.Method = new System.Net.Http.HttpMethod("POST"); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engines/{id}/current-build/cancel" + urlBuilder_.Append("word-alignment/engines/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(id, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/current-build/cancel"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + return; + } + else + if (status_ == 204) + { + return; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated.", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client does not own the engine.", status_, responseText_, headers_, null); + } + else + if (status_ == 404) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not exist.", status_, responseText_, headers_, null); + } + else + if (status_ == 405) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The engine does not support cancelling builds.", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details.", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + protected struct ObjectResponseResult + { + public ObjectResponseResult(T responseObject, string responseText) + { + this.Object = responseObject; + this.Text = responseText; + } + + public T Object { get; } + + public string Text { get; } + } + + public bool ReadResponseAsString { get; set; } + + protected virtual async System.Threading.Tasks.Task> ReadObjectResponseAsync(System.Net.Http.HttpResponseMessage response, System.Collections.Generic.IReadOnlyDictionary> headers, System.Threading.CancellationToken cancellationToken) + { + if (response == null || response.Content == null) + { + return new ObjectResponseResult(default(T)!, string.Empty); + } + + if (ReadResponseAsString) + { + var responseText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); + try + { + var typedBody = Newtonsoft.Json.JsonConvert.DeserializeObject(responseText, JsonSerializerSettings); + return new ObjectResponseResult(typedBody!, responseText); + } + catch (Newtonsoft.Json.JsonException exception) + { + var message = "Could not deserialize the response body string as " + typeof(T).FullName + "."; + throw new ServalApiException(message, (int)response.StatusCode, responseText, headers, exception); + } + } + else + { + try + { + using (var responseStream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false)) + using (var streamReader = new System.IO.StreamReader(responseStream)) + using (var jsonTextReader = new Newtonsoft.Json.JsonTextReader(streamReader)) + { + var serializer = Newtonsoft.Json.JsonSerializer.Create(JsonSerializerSettings); + var typedBody = serializer.Deserialize(jsonTextReader); + return new ObjectResponseResult(typedBody!, string.Empty); + } + } + catch (Newtonsoft.Json.JsonException exception) + { + var message = "Could not deserialize the response body stream as " + typeof(T).FullName + "."; + throw new ServalApiException(message, (int)response.StatusCode, string.Empty, headers, exception); + } + } + } + + private string ConvertToString(object? value, System.Globalization.CultureInfo cultureInfo) + { + if (value == null) + { + return ""; + } + + if (value is System.Enum) + { + var name = System.Enum.GetName(value.GetType(), value); + if (name != null) + { + var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); + if (field != null) + { + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + as System.Runtime.Serialization.EnumMemberAttribute; + if (attribute != null) + { + return attribute.Value != null ? attribute.Value : name; + } + } + + var converted = System.Convert.ToString(System.Convert.ChangeType(value, System.Enum.GetUnderlyingType(value.GetType()), cultureInfo)); + return converted == null ? string.Empty : converted; + } + } + else if (value is bool) + { + return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); + } + else if (value is byte[]) + { + return System.Convert.ToBase64String((byte[]) value); + } + else if (value is string[]) + { + return string.Join(",", (string[])value); + } + else if (value.GetType().IsArray) + { + var valueArray = (System.Array)value; + var valueTextArray = new string[valueArray.Length]; + for (var i = 0; i < valueArray.Length; i++) + { + valueTextArray[i] = ConvertToString(valueArray.GetValue(i), cultureInfo); + } + return string.Join(",", valueTextArray); + } + + var result = System.Convert.ToString(value, cultureInfo); + return result == null ? "" : result; + } + } + + [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial interface IWordAlignmentEngineTypesClient + { + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get queue information for a given engine type + /// + /// A valid engine type: statistical or echo-word-alignment + /// Queue information for the specified engine type + /// A server side error occurred. + System.Threading.Tasks.Task GetQueueAsync(string engineType, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + + } + + [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class WordAlignmentEngineTypesClient : IWordAlignmentEngineTypesClient + { + #pragma warning disable 8618 + private string _baseUrl; + #pragma warning restore 8618 + + private System.Net.Http.HttpClient _httpClient; + private static System.Lazy _settings = new System.Lazy(CreateSerializerSettings, true); + private Newtonsoft.Json.JsonSerializerSettings _instanceSettings; + + #pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + public WordAlignmentEngineTypesClient(System.Net.Http.HttpClient httpClient) + #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. + { + BaseUrl = "/api/v1"; + _httpClient = httpClient; + Initialize(); + } + + private static Newtonsoft.Json.JsonSerializerSettings CreateSerializerSettings() + { + var settings = new Newtonsoft.Json.JsonSerializerSettings(); + UpdateJsonSerializerSettings(settings); + return settings; + } + + public string BaseUrl + { + get { return _baseUrl; } + set + { + _baseUrl = value; + if (!string.IsNullOrEmpty(_baseUrl) && !_baseUrl.EndsWith("/")) + _baseUrl += '/'; + } + } + + protected Newtonsoft.Json.JsonSerializerSettings JsonSerializerSettings { get { return _instanceSettings ?? _settings.Value; } } + + static partial void UpdateJsonSerializerSettings(Newtonsoft.Json.JsonSerializerSettings settings); + + partial void Initialize(); + + partial void PrepareRequest(System.Net.Http.HttpClient client, System.Net.Http.HttpRequestMessage request, string url); + partial void PrepareRequest(System.Net.Http.HttpClient client, System.Net.Http.HttpRequestMessage request, System.Text.StringBuilder urlBuilder); + partial void ProcessResponse(System.Net.Http.HttpClient client, System.Net.Http.HttpResponseMessage response); + + /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. + /// + /// Get queue information for a given engine type + /// + /// A valid engine type: statistical or echo-word-alignment + /// Queue information for the specified engine type + /// A server side error occurred. + public virtual async System.Threading.Tasks.Task GetQueueAsync(string engineType, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + { + if (engineType == null) + throw new System.ArgumentNullException("engineType"); + + var client_ = _httpClient; + var disposeClient_ = false; + try + { + using (var request_ = new System.Net.Http.HttpRequestMessage()) + { + request_.Method = new System.Net.Http.HttpMethod("GET"); + request_.Headers.Accept.Add(System.Net.Http.Headers.MediaTypeWithQualityHeaderValue.Parse("application/json")); + + var urlBuilder_ = new System.Text.StringBuilder(); + if (!string.IsNullOrEmpty(_baseUrl)) urlBuilder_.Append(_baseUrl); + // Operation Path: "word-alignment/engine-types/{engineType}/queues" + urlBuilder_.Append("word-alignment/engine-types/"); + urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(engineType, System.Globalization.CultureInfo.InvariantCulture))); + urlBuilder_.Append("/queues"); + + PrepareRequest(client_, request_, urlBuilder_); + + var url_ = urlBuilder_.ToString(); + request_.RequestUri = new System.Uri(url_, System.UriKind.RelativeOrAbsolute); + + PrepareRequest(client_, request_, url_); + + var response_ = await client_.SendAsync(request_, System.Net.Http.HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false); + var disposeResponse_ = true; + try + { + var headers_ = new System.Collections.Generic.Dictionary>(); + foreach (var item_ in response_.Headers) + headers_[item_.Key] = item_.Value; + if (response_.Content != null && response_.Content.Headers != null) + { + foreach (var item_ in response_.Content.Headers) + headers_[item_.Key] = item_.Value; + } + + ProcessResponse(client_, response_); + + var status_ = (int)response_.StatusCode; + if (status_ == 200) + { + var objectResponse_ = await ReadObjectResponseAsync(response_, headers_, cancellationToken).ConfigureAwait(false); + if (objectResponse_.Object == null) + { + throw new ServalApiException("Response was null which was not expected.", status_, objectResponse_.Text, headers_, null); + } + return objectResponse_.Object; + } + else + if (status_ == 401) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The client is not authenticated", status_, responseText_, headers_, null); + } + else + if (status_ == 403) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The authenticated client cannot perform the operation", status_, responseText_, headers_, null); + } + else + if (status_ == 503) + { + string responseText_ = ( response_.Content == null ) ? string.Empty : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("A necessary service is currently unavailable. Check `/health` for more details. ", status_, responseText_, headers_, null); + } + else + { + var responseData_ = response_.Content == null ? null : await response_.Content.ReadAsStringAsync().ConfigureAwait(false); + throw new ServalApiException("The HTTP status code of the response was not expected (" + status_ + ").", status_, responseData_, headers_, null); + } + } + finally + { + if (disposeResponse_) + response_.Dispose(); + } + } + } + finally + { + if (disposeClient_) + client_.Dispose(); + } + } + + protected struct ObjectResponseResult + { + public ObjectResponseResult(T responseObject, string responseText) + { + this.Object = responseObject; + this.Text = responseText; + } + + public T Object { get; } + + public string Text { get; } + } + + public bool ReadResponseAsString { get; set; } + + protected virtual async System.Threading.Tasks.Task> ReadObjectResponseAsync(System.Net.Http.HttpResponseMessage response, System.Collections.Generic.IReadOnlyDictionary> headers, System.Threading.CancellationToken cancellationToken) + { + if (response == null || response.Content == null) + { + return new ObjectResponseResult(default(T)!, string.Empty); + } + + if (ReadResponseAsString) + { + var responseText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); + try + { + var typedBody = Newtonsoft.Json.JsonConvert.DeserializeObject(responseText, JsonSerializerSettings); + return new ObjectResponseResult(typedBody!, responseText); + } + catch (Newtonsoft.Json.JsonException exception) + { + var message = "Could not deserialize the response body string as " + typeof(T).FullName + "."; + throw new ServalApiException(message, (int)response.StatusCode, responseText, headers, exception); + } + } + else + { + try + { + using (var responseStream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false)) + using (var streamReader = new System.IO.StreamReader(responseStream)) + using (var jsonTextReader = new Newtonsoft.Json.JsonTextReader(streamReader)) + { + var serializer = Newtonsoft.Json.JsonSerializer.Create(JsonSerializerSettings); + var typedBody = serializer.Deserialize(jsonTextReader); + return new ObjectResponseResult(typedBody!, string.Empty); + } + } + catch (Newtonsoft.Json.JsonException exception) + { + var message = "Could not deserialize the response body stream as " + typeof(T).FullName + "."; + throw new ServalApiException(message, (int)response.StatusCode, string.Empty, headers, exception); + } + } + } + + private string ConvertToString(object? value, System.Globalization.CultureInfo cultureInfo) + { + if (value == null) + { + return ""; + } + + if (value is System.Enum) + { + var name = System.Enum.GetName(value.GetType(), value); + if (name != null) + { + var field = System.Reflection.IntrospectionExtensions.GetTypeInfo(value.GetType()).GetDeclaredField(name); + if (field != null) + { + var attribute = System.Reflection.CustomAttributeExtensions.GetCustomAttribute(field, typeof(System.Runtime.Serialization.EnumMemberAttribute)) + as System.Runtime.Serialization.EnumMemberAttribute; + if (attribute != null) + { + return attribute.Value != null ? attribute.Value : name; + } + } + + var converted = System.Convert.ToString(System.Convert.ChangeType(value, System.Enum.GetUnderlyingType(value.GetType()), cultureInfo)); + return converted == null ? string.Empty : converted; + } + } + else if (value is bool) + { + return System.Convert.ToString((bool)value, cultureInfo).ToLowerInvariant(); + } + else if (value is byte[]) + { + return System.Convert.ToBase64String((byte[]) value); + } + else if (value is string[]) + { + return string.Join(",", (string[])value); + } + else if (value.GetType().IsArray) + { + var valueArray = (System.Array)value; + var valueTextArray = new string[valueArray.Length]; + for (var i = 0; i < valueArray.Length; i++) + { + valueTextArray[i] = ConvertToString(valueArray.GetValue(i), cultureInfo); + } + return string.Join(",", valueTextArray); + } + + var result = System.Convert.ToString(value, cultureInfo); + return result == null ? "" : result; + } + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class HealthReport + { + [Newtonsoft.Json.JsonProperty("status", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Status { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("totalDuration", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string TotalDuration { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("results", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IDictionary Results { get; set; } = new System.Collections.Generic.Dictionary(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class HealthReportEntry + { + [Newtonsoft.Json.JsonProperty("status", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Status { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("duration", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Duration { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("description", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Description { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("exception", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Exception { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("data", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IDictionary? Data { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class DeploymentInfo + { + [Newtonsoft.Json.JsonProperty("deploymentVersion", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string DeploymentVersion { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("aspNetCoreEnvironment", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string AspNetCoreEnvironment { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class Corpus + { + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Id { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("revision", Required = Newtonsoft.Json.Required.Always)] + public int Revision { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Language { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Url { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Files { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class CorpusFile + { + [Newtonsoft.Json.JsonProperty("file", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public ResourceLink File { get; set; } = new ResourceLink(); + + [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? TextId { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class ResourceLink + { + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Id { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Url { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class CorpusConfig + { + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Language { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Files { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class CorpusFileConfig + { + [Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string FileId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? TextId { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class DataFile + { + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Id { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Url { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("format", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + [Newtonsoft.Json.JsonConverter(typeof(Newtonsoft.Json.Converters.StringEnumConverter))] + public FileFormat Format { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("revision", Required = Newtonsoft.Json.Required.Always)] + public int Revision { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public enum FileFormat + { + + [System.Runtime.Serialization.EnumMember(Value = @"Text")] + Text = 0, + + [System.Runtime.Serialization.EnumMember(Value = @"Paratext")] + Paratext = 1, + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationEngine + { + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Id { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Url { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceLanguage", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string SourceLanguage { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetLanguage", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string TargetLanguage { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("type", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Type { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("isModelPersisted", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public bool? IsModelPersisted { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("isBuilding", Required = Newtonsoft.Json.Required.Always)] + public bool IsBuilding { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("modelRevision", Required = Newtonsoft.Json.Required.Always)] + public int ModelRevision { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("confidence", Required = Newtonsoft.Json.Required.Always)] + public double Confidence { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("corpusSize", Required = Newtonsoft.Json.Required.Always)] + public int CorpusSize { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationEngineConfig + { + /// + /// The translation engine name. + /// + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + /// + /// The source language tag. + /// + [Newtonsoft.Json.JsonProperty("sourceLanguage", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string SourceLanguage { get; set; } = default!; + + /// + /// The target language tag. + /// + [Newtonsoft.Json.JsonProperty("targetLanguage", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string TargetLanguage { get; set; } = default!; + + /// + /// The translation engine type. + /// + [Newtonsoft.Json.JsonProperty("type", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Type { get; set; } = default!; + + /// + /// The model is saved when built and can be retrieved. + /// + [Newtonsoft.Json.JsonProperty("isModelPersisted", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public bool? IsModelPersisted { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationResult + { + [Newtonsoft.Json.JsonProperty("translation", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Translation { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceTokens", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList SourceTokens { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("targetTokens", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList TargetTokens { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("confidences", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Confidences { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("sources", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList> Sources { get; set; } = new System.Collections.ObjectModel.Collection>(); + + [Newtonsoft.Json.JsonProperty("alignment", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Alignment { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("phrases", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Phrases { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public enum TranslationSource + { + + [System.Runtime.Serialization.EnumMember(Value = @"Primary")] + Primary = 0, + + [System.Runtime.Serialization.EnumMember(Value = @"Secondary")] + Secondary = 1, + + [System.Runtime.Serialization.EnumMember(Value = @"Human")] + Human = 2, + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class AlignedWordPair + { + [Newtonsoft.Json.JsonProperty("sourceIndex", Required = Newtonsoft.Json.Required.Always)] + public int SourceIndex { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetIndex", Required = Newtonsoft.Json.Required.Always)] + public int TargetIndex { get; set; } = default!; + + } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class HealthReport + public partial class Phrase { - [Newtonsoft.Json.JsonProperty("status", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("sourceSegmentStart", Required = Newtonsoft.Json.Required.Always)] + public int SourceSegmentStart { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceSegmentEnd", Required = Newtonsoft.Json.Required.Always)] + public int SourceSegmentEnd { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetSegmentCut", Required = Newtonsoft.Json.Required.Always)] + public int TargetSegmentCut { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class WordGraph + { + [Newtonsoft.Json.JsonProperty("sourceTokens", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList SourceTokens { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("initialStateScore", Required = Newtonsoft.Json.Required.Always)] + public float InitialStateScore { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("finalStates", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList FinalStates { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("arcs", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Arcs { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class WordGraphArc + { + [Newtonsoft.Json.JsonProperty("prevState", Required = Newtonsoft.Json.Required.Always)] + public int PrevState { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("nextState", Required = Newtonsoft.Json.Required.Always)] + public int NextState { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("score", Required = Newtonsoft.Json.Required.Always)] + public double Score { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetTokens", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList TargetTokens { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("confidences", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Confidences { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("sourceSegmentStart", Required = Newtonsoft.Json.Required.Always)] + public int SourceSegmentStart { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceSegmentEnd", Required = Newtonsoft.Json.Required.Always)] + public int SourceSegmentEnd { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("alignment", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Alignment { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("sources", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList> Sources { get; set; } = new System.Collections.ObjectModel.Collection>(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class SegmentPair + { + [Newtonsoft.Json.JsonProperty("sourceSegment", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Status { get; set; } = default!; + public string SourceSegment { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("totalDuration", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("targetSegment", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string TotalDuration { get; set; } = default!; + public string TargetSegment { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("results", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("sentenceStart", Required = Newtonsoft.Json.Required.Always)] + public bool SentenceStart { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationCorpus + { + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Id { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Url { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("engine", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IDictionary Results { get; set; } = new System.Collections.Generic.Dictionary(); + public ResourceLink Engine { get; set; } = new ResourceLink(); + + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceLanguage", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string SourceLanguage { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetLanguage", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string TargetLanguage { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceFiles", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList SourceFiles { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("targetFiles", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList TargetFiles { get; set; } = new System.Collections.ObjectModel.Collection(); } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class HealthReportEntry + public partial class TranslationCorpusFile { - [Newtonsoft.Json.JsonProperty("status", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("file", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public ResourceLink File { get; set; } = new ResourceLink(); + + [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? TextId { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationCorpusConfig + { + /// + /// The corpus name. + /// + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceLanguage", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Status { get; set; } = default!; + public string SourceLanguage { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("duration", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("targetLanguage", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Duration { get; set; } = default!; + public string TargetLanguage { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("description", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? Description { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("sourceFiles", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList SourceFiles { get; set; } = new System.Collections.ObjectModel.Collection(); - [Newtonsoft.Json.JsonProperty("exception", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? Exception { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("targetFiles", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList TargetFiles { get; set; } = new System.Collections.ObjectModel.Collection(); - [Newtonsoft.Json.JsonProperty("data", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IDictionary? Data { get; set; } = default!; + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationCorpusFileConfig + { + [Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string FileId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? TextId { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class DeploymentInfo + public partial class TranslationCorpusUpdateConfig { - [Newtonsoft.Json.JsonProperty("deploymentVersion", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("sourceFiles", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFiles { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetFiles", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetFiles { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationParallelCorpus + { + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string DeploymentVersion { get; set; } = default!; + public string Id { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("aspNetCoreEnvironment", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string AspNetCoreEnvironment { get; set; } = default!; + public string Url { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("engine", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public ResourceLink Engine { get; set; } = new ResourceLink(); + + [Newtonsoft.Json.JsonProperty("sourceCorpora", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList SourceCorpora { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("targetCorpora", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList TargetCorpora { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationParallelCorpusConfig + { + /// + /// The corpus name. + /// + [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Name { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceCorpusIds", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList SourceCorpusIds { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("targetCorpusIds", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList TargetCorpusIds { get; set; } = new System.Collections.ObjectModel.Collection(); + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TranslationParallelCorpusUpdateConfig + { + [Newtonsoft.Json.JsonProperty("sourceCorpusIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceCorpusIds { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetCorpusIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetCorpusIds { get; set; } = default!; + + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class Pretranslation + { + [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string TextId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("refs", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Refs { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("translation", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Translation { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class Corpus + public enum PretranslationUsfmTextOrigin { - [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Id { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("revision", Required = Newtonsoft.Json.Required.Always)] - public int Revision { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Language { get; set; } = default!; + [System.Runtime.Serialization.EnumMember(Value = @"PreferExisting")] + PreferExisting = 0, - [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? Name { get; set; } = default!; + [System.Runtime.Serialization.EnumMember(Value = @"PreferPretranslated")] + PreferPretranslated = 1, - [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Url { get; set; } = default!; + [System.Runtime.Serialization.EnumMember(Value = @"OnlyExisting")] + OnlyExisting = 2, - [Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Files { get; set; } = new System.Collections.ObjectModel.Collection(); + [System.Runtime.Serialization.EnumMember(Value = @"OnlyPretranslated")] + OnlyPretranslated = 3, } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class CorpusFile + public enum PretranslationUsfmTemplate { - [Newtonsoft.Json.JsonProperty("file", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public ResourceLink File { get; set; } = new ResourceLink(); - [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? TextId { get; set; } = default!; + [System.Runtime.Serialization.EnumMember(Value = @"Auto")] + Auto = 0, + + [System.Runtime.Serialization.EnumMember(Value = @"Source")] + Source = 1, + + [System.Runtime.Serialization.EnumMember(Value = @"Target")] + Target = 2, } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class ResourceLink + public partial class TranslationBuild { [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] @@ -7093,300 +10140,329 @@ public partial class ResourceLink [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] public string Url { get; set; } = default!; - } + [Newtonsoft.Json.JsonProperty("revision", Required = Newtonsoft.Json.Required.Always)] + public int Revision { get; set; } = default!; - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class CorpusConfig - { [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? Name { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("language", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Language { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("files", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("engine", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Files { get; set; } = new System.Collections.ObjectModel.Collection(); + public ResourceLink Engine { get; set; } = new ResourceLink(); - } + [Newtonsoft.Json.JsonProperty("trainOn", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TrainOn { get; set; } = default!; - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class CorpusFileConfig - { - [Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("pretranslate", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? Pretranslate { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("step", Required = Newtonsoft.Json.Required.Always)] + public int Step { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("percentCompleted", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public double? PercentCompleted { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("message", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? Message { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("queueDepth", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public int? QueueDepth { get; set; } = default!; + + /// + /// The current build job state. + /// + [Newtonsoft.Json.JsonProperty("state", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string FileId { get; set; } = default!; + [Newtonsoft.Json.JsonConverter(typeof(Newtonsoft.Json.Converters.StringEnumConverter))] + public JobState State { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? TextId { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("dateFinished", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.DateTimeOffset? DateFinished { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("options", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public object? Options { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("deploymentVersion", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? DeploymentVersion { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class DataFile + public partial class TrainingCorpus { - [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Id { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public ResourceLink? Corpus { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Url { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public System.Collections.Generic.IList? TextIds { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? Name { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public string? ScriptureRange { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("format", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - [Newtonsoft.Json.JsonConverter(typeof(Newtonsoft.Json.Converters.StringEnumConverter))] - public FileFormat Format { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("parallelCorpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public ResourceLink? ParallelCorpus { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("revision", Required = Newtonsoft.Json.Required.Always)] - public int Revision { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public enum FileFormat + public partial class ParallelCorpusFilter { + [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public ResourceLink Corpus { get; set; } = new ResourceLink(); - [System.Runtime.Serialization.EnumMember(Value = @"Text")] - Text = 0, + [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TextIds { get; set; } = default!; - [System.Runtime.Serialization.EnumMember(Value = @"Paratext")] - Paratext = 1, + [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ScriptureRange { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationEngine + public partial class PretranslateCorpus { - [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Id { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public ResourceLink? Corpus { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Url { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public System.Collections.Generic.IList? TextIds { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? Name { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public string? ScriptureRange { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("sourceLanguage", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string SourceLanguage { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("parallelCorpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public ResourceLink? ParallelCorpus { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("targetLanguage", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string TargetLanguage { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("type", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Type { get; set; } = default!; + } - [Newtonsoft.Json.JsonProperty("isModelPersisted", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public bool? IsModelPersisted { get; set; } = default!; + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public enum JobState + { - [Newtonsoft.Json.JsonProperty("isBuilding", Required = Newtonsoft.Json.Required.Always)] - public bool IsBuilding { get; set; } = default!; + [System.Runtime.Serialization.EnumMember(Value = @"Pending")] + Pending = 0, - [Newtonsoft.Json.JsonProperty("modelRevision", Required = Newtonsoft.Json.Required.Always)] - public int ModelRevision { get; set; } = default!; + [System.Runtime.Serialization.EnumMember(Value = @"Active")] + Active = 1, - [Newtonsoft.Json.JsonProperty("confidence", Required = Newtonsoft.Json.Required.Always)] - public double Confidence { get; set; } = default!; + [System.Runtime.Serialization.EnumMember(Value = @"Completed")] + Completed = 2, - [Newtonsoft.Json.JsonProperty("corpusSize", Required = Newtonsoft.Json.Required.Always)] - public int CorpusSize { get; set; } = default!; + [System.Runtime.Serialization.EnumMember(Value = @"Faulted")] + Faulted = 3, + + [System.Runtime.Serialization.EnumMember(Value = @"Canceled")] + Canceled = 4, } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationEngineConfig + public partial class TranslationBuildConfig { - /// - /// The translation engine name. - /// [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? Name { get; set; } = default!; - /// - /// The source language tag. - /// - [Newtonsoft.Json.JsonProperty("sourceLanguage", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string SourceLanguage { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("trainOn", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TrainOn { get; set; } = default!; - /// - /// The target language tag. - /// - [Newtonsoft.Json.JsonProperty("targetLanguage", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string TargetLanguage { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("pretranslate", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? Pretranslate { get; set; } = default!; - /// - /// The translation engine type. - /// - [Newtonsoft.Json.JsonProperty("type", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Type { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("options", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public object? Options { get; set; } = default!; - /// - /// The model is saved when built and can be retrieved. - /// - [Newtonsoft.Json.JsonProperty("isModelPersisted", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public bool? IsModelPersisted { get; set; } = default!; + } + + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class TrainingCorpusConfig + { + [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public string? CorpusId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public System.Collections.Generic.IList? TextIds { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public string? ScriptureRange { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("parallelCorpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ParallelCorpusId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("targetFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationResult + public partial class ParallelCorpusFilterConfig { - [Newtonsoft.Json.JsonProperty("translation", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Translation { get; set; } = default!; + public string CorpusId { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("sourceTokens", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList SourceTokens { get; set; } = new System.Collections.ObjectModel.Collection(); + [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? TextIds { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("targetTokens", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList TargetTokens { get; set; } = new System.Collections.ObjectModel.Collection(); + [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ScriptureRange { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("confidences", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Confidences { get; set; } = new System.Collections.ObjectModel.Collection(); + } - [Newtonsoft.Json.JsonProperty("sources", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList> Sources { get; set; } = new System.Collections.ObjectModel.Collection>(); + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public partial class PretranslateCorpusConfig + { + [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public string? CorpusId { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("alignment", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Alignment { get; set; } = new System.Collections.ObjectModel.Collection(); + [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public System.Collections.Generic.IList? TextIds { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("phrases", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Phrases { get; set; } = new System.Collections.ObjectModel.Collection(); + [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + [System.Obsolete] + public string? ScriptureRange { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("parallelCorpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? ParallelCorpusId { get; set; } = default!; + + [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public enum TranslationSource + public partial class ModelDownloadUrl { + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Url { get; set; } = default!; - [System.Runtime.Serialization.EnumMember(Value = @"Primary")] - Primary = 0, - - [System.Runtime.Serialization.EnumMember(Value = @"Secondary")] - Secondary = 1, + [Newtonsoft.Json.JsonProperty("modelRevision", Required = Newtonsoft.Json.Required.Always)] + public int ModelRevision { get; set; } = default!; - [System.Runtime.Serialization.EnumMember(Value = @"Human")] - Human = 2, + [Newtonsoft.Json.JsonProperty("expiresAt", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public System.DateTimeOffset ExpiresAt { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class AlignedWordPair + public partial class Queue { - [Newtonsoft.Json.JsonProperty("sourceIndex", Required = Newtonsoft.Json.Required.Always)] - public int SourceIndex { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("size", Required = Newtonsoft.Json.Required.Always)] + public int Size { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("targetIndex", Required = Newtonsoft.Json.Required.Always)] - public int TargetIndex { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("engineType", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string EngineType { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class Phrase + public partial class LanguageInfo { - [Newtonsoft.Json.JsonProperty("sourceSegmentStart", Required = Newtonsoft.Json.Required.Always)] - public int SourceSegmentStart { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("engineType", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string EngineType { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("sourceSegmentEnd", Required = Newtonsoft.Json.Required.Always)] - public int SourceSegmentEnd { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("isNative", Required = Newtonsoft.Json.Required.Always)] + public bool IsNative { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("targetSegmentCut", Required = Newtonsoft.Json.Required.Always)] - public int TargetSegmentCut { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("internalCode", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public string? InternalCode { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class WordGraph + public partial class Webhook { - [Newtonsoft.Json.JsonProperty("sourceTokens", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList SourceTokens { get; set; } = new System.Collections.ObjectModel.Collection(); + [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Id { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("initialStateScore", Required = Newtonsoft.Json.Required.Always)] - public float InitialStateScore { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Url { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("finalStates", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList FinalStates { get; set; } = new System.Collections.ObjectModel.Collection(); + [Newtonsoft.Json.JsonProperty("payloadUrl", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string PayloadUrl { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("arcs", Required = Newtonsoft.Json.Required.Always)] + [Newtonsoft.Json.JsonProperty("events", Required = Newtonsoft.Json.Required.Always, ItemConverterType = typeof(Newtonsoft.Json.Converters.StringEnumConverter))] [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Arcs { get; set; } = new System.Collections.ObjectModel.Collection(); + public System.Collections.Generic.IList Events { get; set; } = new System.Collections.ObjectModel.Collection(); } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class WordGraphArc + public enum WebhookEvent { - [Newtonsoft.Json.JsonProperty("prevState", Required = Newtonsoft.Json.Required.Always)] - public int PrevState { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("nextState", Required = Newtonsoft.Json.Required.Always)] - public int NextState { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("score", Required = Newtonsoft.Json.Required.Always)] - public double Score { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("targetTokens", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList TargetTokens { get; set; } = new System.Collections.ObjectModel.Collection(); - - [Newtonsoft.Json.JsonProperty("confidences", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Confidences { get; set; } = new System.Collections.ObjectModel.Collection(); - [Newtonsoft.Json.JsonProperty("sourceSegmentStart", Required = Newtonsoft.Json.Required.Always)] - public int SourceSegmentStart { get; set; } = default!; + [System.Runtime.Serialization.EnumMember(Value = @"TranslationBuildStarted")] + TranslationBuildStarted = 0, - [Newtonsoft.Json.JsonProperty("sourceSegmentEnd", Required = Newtonsoft.Json.Required.Always)] - public int SourceSegmentEnd { get; set; } = default!; + [System.Runtime.Serialization.EnumMember(Value = @"TranslationBuildFinished")] + TranslationBuildFinished = 1, - [Newtonsoft.Json.JsonProperty("alignment", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Alignment { get; set; } = new System.Collections.ObjectModel.Collection(); + [System.Runtime.Serialization.EnumMember(Value = @"WordAlignmentBuildStarted")] + WordAlignmentBuildStarted = 2, - [Newtonsoft.Json.JsonProperty("sources", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList> Sources { get; set; } = new System.Collections.ObjectModel.Collection>(); + [System.Runtime.Serialization.EnumMember(Value = @"WordAlignmentBuildFinished")] + WordAlignmentBuildFinished = 3, } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class SegmentPair + public partial class WebhookConfig { - [Newtonsoft.Json.JsonProperty("sourceSegment", Required = Newtonsoft.Json.Required.Always)] + /// + /// The payload URL. + /// + [Newtonsoft.Json.JsonProperty("payloadUrl", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string SourceSegment { get; set; } = default!; + public string PayloadUrl { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("targetSegment", Required = Newtonsoft.Json.Required.Always)] + /// + /// The shared secret. + /// + [Newtonsoft.Json.JsonProperty("secret", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string TargetSegment { get; set; } = default!; + public string Secret { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("sentenceStart", Required = Newtonsoft.Json.Required.Always)] - public bool SentenceStart { get; set; } = default!; + /// + /// The webhook events. + /// + [Newtonsoft.Json.JsonProperty("events", Required = Newtonsoft.Json.Required.Always, ItemConverterType = typeof(Newtonsoft.Json.Converters.StringEnumConverter))] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Events { get; set; } = new System.Collections.ObjectModel.Collection(); } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationCorpus + public partial class WordAlignmentEngine { [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] @@ -7396,10 +10472,6 @@ public partial class TranslationCorpus [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] public string Url { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("engine", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public ResourceLink Engine { get; set; } = new ResourceLink(); - [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? Name { get; set; } = default!; @@ -7411,80 +10483,92 @@ public partial class TranslationCorpus [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] public string TargetLanguage { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("sourceFiles", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList SourceFiles { get; set; } = new System.Collections.ObjectModel.Collection(); + [Newtonsoft.Json.JsonProperty("type", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Type { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("targetFiles", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList TargetFiles { get; set; } = new System.Collections.ObjectModel.Collection(); + [Newtonsoft.Json.JsonProperty("isBuilding", Required = Newtonsoft.Json.Required.Always)] + public bool IsBuilding { get; set; } = default!; - } + [Newtonsoft.Json.JsonProperty("modelRevision", Required = Newtonsoft.Json.Required.Always)] + public int ModelRevision { get; set; } = default!; - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationCorpusFile - { - [Newtonsoft.Json.JsonProperty("file", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public ResourceLink File { get; set; } = new ResourceLink(); + [Newtonsoft.Json.JsonProperty("confidence", Required = Newtonsoft.Json.Required.Always)] + public double Confidence { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? TextId { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("corpusSize", Required = Newtonsoft.Json.Required.Always)] + public int CorpusSize { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationCorpusConfig + public partial class WordAlignmentEngineConfig { /// - /// The corpus name. + /// The word alignment engine name. /// [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? Name { get; set; } = default!; + /// + /// The source language tag. + /// [Newtonsoft.Json.JsonProperty("sourceLanguage", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] public string SourceLanguage { get; set; } = default!; + /// + /// The target language tag. + /// [Newtonsoft.Json.JsonProperty("targetLanguage", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] public string TargetLanguage { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("sourceFiles", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList SourceFiles { get; set; } = new System.Collections.ObjectModel.Collection(); - - [Newtonsoft.Json.JsonProperty("targetFiles", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList TargetFiles { get; set; } = new System.Collections.ObjectModel.Collection(); + /// + /// The translation engine type. + /// + [Newtonsoft.Json.JsonProperty("type", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string Type { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationCorpusFileConfig + public partial class WordAlignmentResult { - [Newtonsoft.Json.JsonProperty("fileId", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string FileId { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("sourceTokens", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList SourceTokens { get; set; } = new System.Collections.ObjectModel.Collection(); - [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? TextId { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("targetTokens", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList TargetTokens { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("confidences", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Confidences { get; set; } = new System.Collections.ObjectModel.Collection(); + + [Newtonsoft.Json.JsonProperty("alignment", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Alignment { get; set; } = new System.Collections.ObjectModel.Collection(); } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationCorpusUpdateConfig + public partial class WordAlignmentRequest { - [Newtonsoft.Json.JsonProperty("sourceFiles", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? SourceFiles { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("sourceSegment", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string SourceSegment { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("targetFiles", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? TargetFiles { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("targetSegment", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] + public string TargetSegment { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationParallelCorpus + public partial class WordAlignmentParallelCorpus { [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] @@ -7509,7 +10593,7 @@ public partial class TranslationParallelCorpus } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationParallelCorpusConfig + public partial class WordAlignmentParallelCorpusConfig { /// /// The corpus name. @@ -7528,7 +10612,7 @@ public partial class TranslationParallelCorpusConfig } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationParallelCorpusUpdateConfig + public partial class WordAlignmentParallelCorpusUpdateConfig { [Newtonsoft.Json.JsonProperty("sourceCorpusIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public System.Collections.Generic.IList? SourceCorpusIds { get; set; } = default!; @@ -7539,7 +10623,7 @@ public partial class TranslationParallelCorpusUpdateConfig } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class Pretranslation + public partial class WordAlignment { [Newtonsoft.Json.JsonProperty("textId", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] @@ -7549,47 +10633,26 @@ public partial class Pretranslation [System.ComponentModel.DataAnnotations.Required] public System.Collections.Generic.IList Refs { get; set; } = new System.Collections.ObjectModel.Collection(); - [Newtonsoft.Json.JsonProperty("translation", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Translation { get; set; } = default!; - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public enum PretranslationUsfmTextOrigin - { - - [System.Runtime.Serialization.EnumMember(Value = @"PreferExisting")] - PreferExisting = 0, - - [System.Runtime.Serialization.EnumMember(Value = @"PreferPretranslated")] - PreferPretranslated = 1, - - [System.Runtime.Serialization.EnumMember(Value = @"OnlyExisting")] - OnlyExisting = 2, - - [System.Runtime.Serialization.EnumMember(Value = @"OnlyPretranslated")] - OnlyPretranslated = 3, - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public enum PretranslationUsfmTemplate - { + [Newtonsoft.Json.JsonProperty("sourceTokens", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList SourceTokens { get; set; } = new System.Collections.ObjectModel.Collection(); - [System.Runtime.Serialization.EnumMember(Value = @"Auto")] - Auto = 0, + [Newtonsoft.Json.JsonProperty("targetTokens", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList TargetTokens { get; set; } = new System.Collections.ObjectModel.Collection(); - [System.Runtime.Serialization.EnumMember(Value = @"Source")] - Source = 1, + [Newtonsoft.Json.JsonProperty("confidences", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Confidences { get; set; } = new System.Collections.ObjectModel.Collection(); - [System.Runtime.Serialization.EnumMember(Value = @"Target")] - Target = 2, + [Newtonsoft.Json.JsonProperty("alignment", Required = Newtonsoft.Json.Required.Always)] + [System.ComponentModel.DataAnnotations.Required] + public System.Collections.Generic.IList Alignment { get; set; } = new System.Collections.ObjectModel.Collection(); } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationBuild + public partial class WordAlignmentBuild { [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] @@ -7610,10 +10673,10 @@ public partial class TranslationBuild public ResourceLink Engine { get; set; } = new ResourceLink(); [Newtonsoft.Json.JsonProperty("trainOn", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? TrainOn { get; set; } = default!; + public System.Collections.Generic.IList? TrainOn { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("pretranslate", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? Pretranslate { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("wordAlignOn", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? WordAlignOn { get; set; } = default!; [Newtonsoft.Json.JsonProperty("step", Required = Newtonsoft.Json.Required.Always)] public int Step { get; set; } = default!; @@ -7647,33 +10710,21 @@ public partial class TranslationBuild } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TrainingCorpus + public partial class TrainingCorpus2 { - [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public ResourceLink? Corpus { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public System.Collections.Generic.IList? TextIds { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public string? ScriptureRange { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("parallelCorpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public ResourceLink? ParallelCorpus { get; set; } = default!; [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; [Newtonsoft.Json.JsonProperty("targetFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; + public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class ParallelCorpusFilter + public partial class ParallelCorpusFilter2 { [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required] @@ -7688,60 +10739,16 @@ public partial class ParallelCorpusFilter } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class PretranslateCorpus - { - [Newtonsoft.Json.JsonProperty("corpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public ResourceLink? Corpus { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public System.Collections.Generic.IList? TextIds { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public string? ScriptureRange { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("parallelCorpus", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public ResourceLink? ParallelCorpus { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public enum JobState - { - - [System.Runtime.Serialization.EnumMember(Value = @"Pending")] - Pending = 0, - - [System.Runtime.Serialization.EnumMember(Value = @"Active")] - Active = 1, - - [System.Runtime.Serialization.EnumMember(Value = @"Completed")] - Completed = 2, - - [System.Runtime.Serialization.EnumMember(Value = @"Faulted")] - Faulted = 3, - - [System.Runtime.Serialization.EnumMember(Value = @"Canceled")] - Canceled = 4, - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TranslationBuildConfig + public partial class WordAlignmentBuildConfig { [Newtonsoft.Json.JsonProperty("name", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? Name { get; set; } = default!; [Newtonsoft.Json.JsonProperty("trainOn", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? TrainOn { get; set; } = default!; + public System.Collections.Generic.IList? TrainOn { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("pretranslate", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? Pretranslate { get; set; } = default!; + [Newtonsoft.Json.JsonProperty("wordAlignOn", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] + public System.Collections.Generic.IList? WordAlignOn { get; set; } = default!; [Newtonsoft.Json.JsonProperty("options", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public object? Options { get; set; } = default!; @@ -7749,33 +10756,21 @@ public partial class TranslationBuildConfig } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class TrainingCorpusConfig + public partial class TrainingCorpusConfig2 { - [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public string? CorpusId { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public System.Collections.Generic.IList? TextIds { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public string? ScriptureRange { get; set; } = default!; - [Newtonsoft.Json.JsonProperty("parallelCorpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] public string? ParallelCorpusId { get; set; } = default!; [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; + public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; [Newtonsoft.Json.JsonProperty("targetFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; + public System.Collections.Generic.IList? TargetFilters { get; set; } = default!; } [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class ParallelCorpusFilterConfig + public partial class ParallelCorpusFilterConfig2 { [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Always)] [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] @@ -7789,131 +10784,6 @@ public partial class ParallelCorpusFilterConfig } - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class PretranslateCorpusConfig - { - [Newtonsoft.Json.JsonProperty("corpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public string? CorpusId { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("textIds", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public System.Collections.Generic.IList? TextIds { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("scriptureRange", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - [System.Obsolete] - public string? ScriptureRange { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("parallelCorpusId", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? ParallelCorpusId { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("sourceFilters", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public System.Collections.Generic.IList? SourceFilters { get; set; } = default!; - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class ModelDownloadUrl - { - [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Url { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("modelRevision", Required = Newtonsoft.Json.Required.Always)] - public int ModelRevision { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("expiresAt", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public System.DateTimeOffset ExpiresAt { get; set; } = default!; - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class Queue - { - [Newtonsoft.Json.JsonProperty("size", Required = Newtonsoft.Json.Required.Always)] - public int Size { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("engineType", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string EngineType { get; set; } = default!; - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class LanguageInfo - { - [Newtonsoft.Json.JsonProperty("engineType", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string EngineType { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("isNative", Required = Newtonsoft.Json.Required.Always)] - public bool IsNative { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("internalCode", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)] - public string? InternalCode { get; set; } = default!; - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class Webhook - { - [Newtonsoft.Json.JsonProperty("id", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Id { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("url", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Url { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("payloadUrl", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string PayloadUrl { get; set; } = default!; - - [Newtonsoft.Json.JsonProperty("events", Required = Newtonsoft.Json.Required.Always, ItemConverterType = typeof(Newtonsoft.Json.Converters.StringEnumConverter))] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Events { get; set; } = new System.Collections.ObjectModel.Collection(); - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public enum WebhookEvent - { - - [System.Runtime.Serialization.EnumMember(Value = @"TranslationBuildStarted")] - TranslationBuildStarted = 0, - - [System.Runtime.Serialization.EnumMember(Value = @"TranslationBuildFinished")] - TranslationBuildFinished = 1, - - } - - [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] - public partial class WebhookConfig - { - /// - /// The payload URL. - /// - [Newtonsoft.Json.JsonProperty("payloadUrl", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string PayloadUrl { get; set; } = default!; - - /// - /// The shared secret. - /// - [Newtonsoft.Json.JsonProperty("secret", Required = Newtonsoft.Json.Required.Always)] - [System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)] - public string Secret { get; set; } = default!; - - /// - /// The webhook events. - /// - [Newtonsoft.Json.JsonProperty("events", Required = Newtonsoft.Json.Required.Always, ItemConverterType = typeof(Newtonsoft.Json.Converters.StringEnumConverter))] - [System.ComponentModel.DataAnnotations.Required] - public System.Collections.Generic.IList Events { get; set; } = new System.Collections.ObjectModel.Collection(); - - } - [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class FileParameter { diff --git a/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs b/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs index 546eed93..47bc811e 100644 --- a/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs +++ b/src/Serval/src/Serval.DataFiles/Controllers/CorporaController.cs @@ -166,18 +166,18 @@ private async Task MapAsync(CorpusConfigDto corpusConfig, string id, Can }; } - private async Task> MapAsync( + private async Task> MapAsync( IReadOnlyList files, CancellationToken cancellationToken ) { - var dataFiles = new List(); + var dataFiles = new List(); foreach (CorpusFileConfigDto file in files) { DataFile? dataFile = await _dataFileService.GetAsync(file.FileId, cancellationToken); if (dataFile == null) throw new InvalidOperationException($"DataFile with id {file.FileId} does not exist."); - dataFiles.Add(new CorpusFile { FileRef = file.FileId, TextId = file.TextId }); + dataFiles.Add(new Models.CorpusFile { FileRef = file.FileId, TextId = file.TextId }); } return dataFiles; } @@ -195,7 +195,7 @@ private CorpusDto Map(Corpus source) }; } - private CorpusFileDto Map(CorpusFile source) + private CorpusFileDto Map(Models.CorpusFile source) { return new CorpusFileDto { diff --git a/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs b/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs index 35dbad08..684b678f 100644 --- a/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs +++ b/src/Serval/src/Serval.DataFiles/Services/CorpusService.cs @@ -21,7 +21,7 @@ public async Task GetAsync(string id, string owner, CancellationToken ca public async Task UpdateAsync( string id, - IReadOnlyList files, + IReadOnlyList files, CancellationToken cancellationToken = default ) { diff --git a/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs b/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs index a4f0e242..d5aa3645 100644 --- a/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs +++ b/src/Serval/src/Serval.DataFiles/Services/ICorpusService.cs @@ -6,6 +6,10 @@ public interface ICorpusService Task GetAsync(string id, CancellationToken cancellationToken = default); Task GetAsync(string id, string owner, CancellationToken cancellationToken = default); Task CreateAsync(Corpus corpus, CancellationToken cancellationToken = default); - Task UpdateAsync(string id, IReadOnlyList files, CancellationToken cancellationToken = default); + Task UpdateAsync( + string id, + IReadOnlyList files, + CancellationToken cancellationToken = default + ); Task DeleteAsync(string id, CancellationToken cancellationToken = default); } diff --git a/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/platform.proto b/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/platform.proto index 84b24ab1..39e0772c 100644 --- a/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/platform.proto +++ b/src/Serval/src/Serval.Grpc/Protos/serval/translation/v1/platform.proto @@ -12,8 +12,8 @@ service TranslationPlatformApi { rpc BuildFaulted(BuildFaultedRequest) returns (google.protobuf.Empty); rpc BuildRestarting(BuildRestartingRequest) returns (google.protobuf.Empty); - rpc IncrementTranslationEngineCorpusSize(IncrementTranslationEngineCorpusSizeRequest) returns (google.protobuf.Empty); - rpc InsertPretranslations(stream InsertPretranslationsRequest) returns (google.protobuf.Empty); + rpc IncrementTrainEngineCorpusSize(IncrementTrainEngineCorpusSizeRequest) returns (google.protobuf.Empty); + rpc InsertInferences(stream InsertInferencesRequest) returns (google.protobuf.Empty); } message UpdateBuildStatusRequest { @@ -47,12 +47,12 @@ message BuildRestartingRequest { string build_id = 1; } -message IncrementTranslationEngineCorpusSizeRequest { +message IncrementTrainEngineCorpusSizeRequest { string engine_id = 1; int32 count = 2; } -message InsertPretranslationsRequest { +message InsertInferencesRequest { string engine_id = 1; string corpus_id = 2; string text_id = 3; diff --git a/src/Serval/src/Serval.Grpc/Protos/serval/word_alignment/v1/engine.proto b/src/Serval/src/Serval.Grpc/Protos/serval/word_alignment/v1/engine.proto new file mode 100644 index 00000000..8fae85e1 --- /dev/null +++ b/src/Serval/src/Serval.Grpc/Protos/serval/word_alignment/v1/engine.proto @@ -0,0 +1,104 @@ +syntax = "proto3"; + +package serval.word_alignment.v1; + +import "google/protobuf/empty.proto"; + +service WordAlignmentEngineApi { + rpc Create(CreateRequest) returns (google.protobuf.Empty); + rpc Delete(DeleteRequest) returns (google.protobuf.Empty); + rpc GetWordAlignment(GetWordAlignmentRequest) returns (GetWordAlignmentResponse); + rpc StartBuild(StartBuildRequest) returns (google.protobuf.Empty); + rpc CancelBuild(CancelBuildRequest) returns (google.protobuf.Empty); + rpc GetQueueSize(GetQueueSizeRequest) returns (GetQueueSizeResponse); +} + +message CreateRequest { + string engine_type = 1; + string engine_id = 2; + optional string engine_name = 3; + string source_language = 4; + string target_language = 5; +} + +message DeleteRequest { + string engine_type = 1; + string engine_id = 2; +} + +message GetWordAlignmentRequest { + string engine_type = 1; + string engine_id = 2; + string source_segment = 3; + string target_segment = 4; +} + +message GetWordAlignmentResponse { + WordAlignmentResult result = 1; +} + +message StartBuildRequest { + string engine_type = 1; + string engine_id = 2; + string build_id = 3; + optional string options = 4; + repeated ParallelCorpus corpora = 5; +} + +message CancelBuildRequest { + string engine_type = 1; + string engine_id = 2; +} + +message GetQueueSizeRequest { + string engine_type = 1; +} + +message GetQueueSizeResponse { + int32 size = 1; +} + +message AlignedWordPair { + int32 source_index = 1; + int32 target_index = 2; +} + +message WordAlignmentResult { + repeated string source_tokens = 1; + repeated string target_tokens = 2; + repeated double confidences = 3; + repeated AlignedWordPair alignment = 4; +} + +message ParallelCorpus { + string id = 1; + repeated MonolingualCorpus source_corpora = 2; + repeated MonolingualCorpus target_corpora = 3; +} + +message MonolingualCorpus { + string id = 1; + string language = 2; + bool train_on_all = 3; + bool word_align_on_all = 4; + map train_on_chapters = 5; + map word_align_on_chapters = 6; + repeated string train_on_text_ids = 7; + repeated string word_align_on_text_ids = 8; + repeated CorpusFile files = 9; +} + +message ScriptureChapters { + repeated int32 chapters = 1; +} + +message CorpusFile { + string location = 1; + FileFormat format = 2; + string text_id = 3; +} + +enum FileFormat { + FILE_FORMAT_TEXT = 0; + FILE_FORMAT_PARATEXT = 1; +} diff --git a/src/Serval/src/Serval.Grpc/Protos/serval/word_alignment/v1/platform.proto b/src/Serval/src/Serval.Grpc/Protos/serval/word_alignment/v1/platform.proto new file mode 100644 index 00000000..7db42106 --- /dev/null +++ b/src/Serval/src/Serval.Grpc/Protos/serval/word_alignment/v1/platform.proto @@ -0,0 +1,66 @@ +syntax = "proto3"; + +package serval.word_alignment.v1; + +import "google/protobuf/empty.proto"; +import "Protos/serval/word_alignment/v1/engine.proto"; + + +service WordAlignmentPlatformApi { + rpc UpdateBuildStatus(UpdateBuildStatusRequest) returns (google.protobuf.Empty); + rpc BuildStarted(BuildStartedRequest) returns (google.protobuf.Empty); + rpc BuildCompleted(BuildCompletedRequest) returns (google.protobuf.Empty); + rpc BuildCanceled(BuildCanceledRequest) returns (google.protobuf.Empty); + rpc BuildFaulted(BuildFaultedRequest) returns (google.protobuf.Empty); + rpc BuildRestarting(BuildRestartingRequest) returns (google.protobuf.Empty); + + rpc IncrementTrainEngineCorpusSize(IncrementTrainEngineCorpusSizeRequest) returns (google.protobuf.Empty); + rpc InsertInferences(stream InsertInferencesRequest) returns (google.protobuf.Empty); +} + +message UpdateBuildStatusRequest { + string build_id = 1; + int32 step = 2; + optional double percent_completed = 3; + optional string message = 4; + optional int32 queue_depth = 5; +} + +message BuildStartedRequest { + string build_id = 1; +} + +message BuildCompletedRequest { + string build_id = 1; + int32 corpus_size = 2; + double confidence = 3; +} + +message BuildCanceledRequest { + string build_id = 1; +} + +message BuildFaultedRequest { + string build_id = 1; + string message = 2; +} + +message BuildRestartingRequest { + string build_id = 1; +} + +message IncrementTrainEngineCorpusSizeRequest { + string engine_id = 1; + int32 count = 2; +} + +message InsertInferencesRequest { + string engine_id = 1; + string corpus_id = 2; + string text_id = 3; + repeated string refs = 4; + repeated string source_tokens = 5; + repeated string target_tokens = 6; + repeated double confidences = 7; + repeated AlignedWordPair alignment = 8; +} diff --git a/src/Serval/src/Serval.Translation/Contracts/AlignedWordPairDto.cs b/src/Serval/src/Serval.Shared/Contracts/AlignedWordPairDto.cs similarity index 76% rename from src/Serval/src/Serval.Translation/Contracts/AlignedWordPairDto.cs rename to src/Serval/src/Serval.Shared/Contracts/AlignedWordPairDto.cs index 4cd0dd66..5b9c63f6 100644 --- a/src/Serval/src/Serval.Translation/Contracts/AlignedWordPairDto.cs +++ b/src/Serval/src/Serval.Shared/Contracts/AlignedWordPairDto.cs @@ -1,4 +1,4 @@ -namespace Serval.Translation.Contracts; +namespace Serval.Shared.Contracts; public record AlignedWordPairDto { diff --git a/src/Serval/src/Serval.Translation/Models/ParallelCorpusFilter.cs b/src/Serval/src/Serval.Shared/Contracts/ParallelCorpusFilter.cs similarity index 82% rename from src/Serval/src/Serval.Translation/Models/ParallelCorpusFilter.cs rename to src/Serval/src/Serval.Shared/Contracts/ParallelCorpusFilter.cs index 1cb311e8..184aee25 100644 --- a/src/Serval/src/Serval.Translation/Models/ParallelCorpusFilter.cs +++ b/src/Serval/src/Serval.Shared/Contracts/ParallelCorpusFilter.cs @@ -1,4 +1,4 @@ -namespace Serval.Translation.Contracts; +namespace Serval.Shared.Contracts; public record ParallelCorpusFilter { diff --git a/src/Serval/src/Serval.Translation/Contracts/QueueDto.cs b/src/Serval/src/Serval.Shared/Contracts/QueueDto.cs similarity index 75% rename from src/Serval/src/Serval.Translation/Contracts/QueueDto.cs rename to src/Serval/src/Serval.Shared/Contracts/QueueDto.cs index 51c75357..ade49773 100644 --- a/src/Serval/src/Serval.Translation/Contracts/QueueDto.cs +++ b/src/Serval/src/Serval.Shared/Contracts/QueueDto.cs @@ -1,4 +1,4 @@ -namespace Serval.Translation.Contracts; +namespace Serval.Shared.Contracts; public record QueueDto { diff --git a/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildFinished.cs b/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildFinished.cs new file mode 100644 index 00000000..44886a44 --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildFinished.cs @@ -0,0 +1,11 @@ +namespace Serval.Shared.Contracts; + +public record WordAlignmentBuildFinished +{ + public required string BuildId { get; init; } + public required string EngineId { get; init; } + public required string Owner { get; init; } + public required JobState BuildState { get; init; } + public required string Message { get; init; } + public required DateTime DateFinished { get; init; } +} diff --git a/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildFinishedDto.cs b/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildFinishedDto.cs new file mode 100644 index 00000000..f7ec9936 --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildFinishedDto.cs @@ -0,0 +1,10 @@ +namespace Serval.Shared.Contracts; + +public record WordAlignmentBuildFinishedDto +{ + public required ResourceLinkDto Build { get; init; } + public required ResourceLinkDto Engine { get; init; } + public required JobState BuildState { get; init; } + public required string Message { get; init; } + public required DateTime DateFinished { get; init; } +} diff --git a/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildStarted.cs b/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildStarted.cs new file mode 100644 index 00000000..08a6375f --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildStarted.cs @@ -0,0 +1,8 @@ +namespace Serval.Shared.Contracts; + +public record WordAlignmentBuildStarted +{ + public required string BuildId { get; init; } + public required string EngineId { get; init; } + public required string Owner { get; init; } +} diff --git a/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildStartedDto.cs b/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildStartedDto.cs new file mode 100644 index 00000000..00f1c178 --- /dev/null +++ b/src/Serval/src/Serval.Shared/Contracts/WordAlignmentBuildStartedDto.cs @@ -0,0 +1,7 @@ +namespace Serval.Shared.Contracts; + +public record WordAlignmentBuildStartedDto +{ + public required ResourceLinkDto Build { get; init; } + public required ResourceLinkDto Engine { get; init; } +} diff --git a/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs b/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs index c12582cc..5b63830c 100644 --- a/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs +++ b/src/Serval/src/Serval.Shared/Controllers/Endpoints.cs @@ -8,6 +8,9 @@ public static class Endpoints public const string GetTranslationCorpus = "GetTranslationCorpus"; public const string GetParallelTranslationCorpus = "GetParallelTranslationCorpus"; public const string GetTranslationBuild = "GetTranslationBuild"; + public const string GetWordAlignmentEngine = "GetWordAlignmentEngine"; + public const string GetParallelWordAlignmentCorpus = "GetParallelWordAlignmentCorpus"; + public const string GetWordAlignmentBuild = "GetWordAlignmentBuild"; public const string GetWebhook = "GetWebhook"; diff --git a/src/Serval/src/Serval.Shared/Controllers/Scopes.cs b/src/Serval/src/Serval.Shared/Controllers/Scopes.cs index c88f06b3..42ea7591 100644 --- a/src/Serval/src/Serval.Shared/Controllers/Scopes.cs +++ b/src/Serval/src/Serval.Shared/Controllers/Scopes.cs @@ -7,6 +7,11 @@ public static class Scopes public const string UpdateTranslationEngines = "update:translation_engines"; public const string DeleteTranslationEngines = "delete:translation_engines"; + public const string CreateWordAlignmentEngines = "create:word_alignment_engines"; + public const string ReadWordAlignmentEngines = "read:word_alignment_engines"; + public const string UpdateWordAlignmentEngines = "update:word_alignment_engines"; + public const string DeleteWordAlignmentEngines = "delete:word_alignment_engines"; + public const string CreateHooks = "create:hooks"; public const string ReadHooks = "read:hooks"; public const string DeleteHooks = "delete:hooks"; @@ -24,6 +29,10 @@ public static class Scopes ReadTranslationEngines, UpdateTranslationEngines, DeleteTranslationEngines, + CreateWordAlignmentEngines, + ReadWordAlignmentEngines, + UpdateWordAlignmentEngines, + DeleteWordAlignmentEngines, CreateHooks, ReadHooks, DeleteHooks, diff --git a/src/Serval/src/Serval.Translation/Models/AlignedWordPair.cs b/src/Serval/src/Serval.Shared/Models/AlignedWordPair.cs similarity index 77% rename from src/Serval/src/Serval.Translation/Models/AlignedWordPair.cs rename to src/Serval/src/Serval.Shared/Models/AlignedWordPair.cs index 5e367495..550abeb0 100644 --- a/src/Serval/src/Serval.Translation/Models/AlignedWordPair.cs +++ b/src/Serval/src/Serval.Shared/Models/AlignedWordPair.cs @@ -1,4 +1,4 @@ -namespace Serval.Translation.Models; +namespace Serval.Shared.Models; public record AlignedWordPair { diff --git a/src/Serval/src/Serval.Translation/Models/CorpusFile.cs b/src/Serval/src/Serval.Shared/Models/CorpusFile.cs similarity index 84% rename from src/Serval/src/Serval.Translation/Models/CorpusFile.cs rename to src/Serval/src/Serval.Shared/Models/CorpusFile.cs index 2672ba56..2739e605 100644 --- a/src/Serval/src/Serval.Translation/Models/CorpusFile.cs +++ b/src/Serval/src/Serval.Shared/Models/CorpusFile.cs @@ -1,4 +1,4 @@ -namespace Serval.Translation.Models; +namespace Serval.Shared.Models; public record CorpusFile { diff --git a/src/Serval/src/Serval.Translation/Models/MonolingualCorpus.cs b/src/Serval/src/Serval.Shared/Models/MonolingualCorpus.cs similarity index 86% rename from src/Serval/src/Serval.Translation/Models/MonolingualCorpus.cs rename to src/Serval/src/Serval.Shared/Models/MonolingualCorpus.cs index 0762e878..f9c58fb4 100644 --- a/src/Serval/src/Serval.Translation/Models/MonolingualCorpus.cs +++ b/src/Serval/src/Serval.Shared/Models/MonolingualCorpus.cs @@ -1,4 +1,4 @@ -namespace Serval.Translation.Models; +namespace Serval.Shared.Models; public record MonolingualCorpus { diff --git a/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs b/src/Serval/src/Serval.Shared/Models/ParallelCorpus.cs similarity index 87% rename from src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs rename to src/Serval/src/Serval.Shared/Models/ParallelCorpus.cs index 0fd059c7..9b554110 100644 --- a/src/Serval/src/Serval.Translation/Models/ParallelCorpus.cs +++ b/src/Serval/src/Serval.Shared/Models/ParallelCorpus.cs @@ -1,4 +1,4 @@ -namespace Serval.Translation.Models; +namespace Serval.Shared.Models; public record ParallelCorpus { diff --git a/src/Serval/src/Serval.Translation/Models/Queue.cs b/src/Serval/src/Serval.Shared/Models/Queue.cs similarity index 76% rename from src/Serval/src/Serval.Translation/Models/Queue.cs rename to src/Serval/src/Serval.Shared/Models/Queue.cs index 3e269b2c..7a3272a8 100644 --- a/src/Serval/src/Serval.Translation/Models/Queue.cs +++ b/src/Serval/src/Serval.Shared/Models/Queue.cs @@ -1,4 +1,4 @@ -namespace Serval.Translation.Models; +namespace Serval.Shared.Models; public record Queue { diff --git a/src/Serval/src/Serval.Shared/Usings.cs b/src/Serval/src/Serval.Shared/Usings.cs index 3e84144f..fb72d2d0 100644 --- a/src/Serval/src/Serval.Shared/Usings.cs +++ b/src/Serval/src/Serval.Shared/Usings.cs @@ -12,6 +12,7 @@ global using Microsoft.Extensions.Logging; global using Microsoft.Extensions.Options; global using Serval.Shared.Configuration; +global using Serval.Shared.Contracts; global using Serval.Shared.Models; global using Serval.Shared.Services; global using Serval.Shared.Utils; diff --git a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs index 47358472..eeb5555d 100644 --- a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs +++ b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs @@ -1023,6 +1023,7 @@ CancellationToken cancellationToken /// /// The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. /// See [nmt job settings documentation](https://github.com/sillsdev/serval/wiki/NMT-Build-Options) about configuring job parameters. + /// See [smt-transfer job settings documentation](https://github.com/sillsdev/serval/wiki/SMT-Transfer-Build-Options) about configuring job parameters. /// See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. /// /// When using a parallel corpus: diff --git a/src/Serval/src/Serval.Translation/Services/EngineService.cs b/src/Serval/src/Serval.Translation/Services/EngineService.cs index b0cfb5be..59214306 100644 --- a/src/Serval/src/Serval.Translation/Services/EngineService.cs +++ b/src/Serval/src/Serval.Translation/Services/EngineService.cs @@ -143,10 +143,15 @@ public override async Task CreateAsync(Engine engine, CancellationToken { engine.DateCreated = DateTime.UtcNow; await Entities.InsertAsync(engine, cancellationToken); - TranslationEngineApi.TranslationEngineApiClient? client = - _grpcClientFactory.CreateClient(engine.Type); - if (client is null) + TranslationEngineApi.TranslationEngineApiClient? client; + try + { + client = _grpcClientFactory.CreateClient(engine.Type); + } + catch (InvalidOperationException) + { throw new InvalidOperationException($"'{engine.Type}' is an invalid engine type."); + } var request = new CreateRequest { EngineType = engine.Type, @@ -252,7 +257,7 @@ public async Task StartBuildAsync(Build build, CancellationToken cancellationTok Dictionary? pretranslate = build.Pretranslate?.ToDictionary(c => c.ParallelCorpusRef! ); - IReadOnlyList parallelCorpora = engine + IReadOnlyList parallelCorpora = engine .ParallelCorpora.Where(pc => trainOn == null || trainOn.ContainsKey(pc.Id) @@ -418,8 +423,8 @@ public Task AddCorpusAsync(string engineId, Models.Corpus corpus, CancellationTo public async Task UpdateCorpusAsync( string engineId, string corpusId, - IReadOnlyList? sourceFiles, - IReadOnlyList? targetFiles, + IReadOnlyList? sourceFiles, + IReadOnlyList? targetFiles, CancellationToken cancellationToken = default ) { @@ -484,7 +489,7 @@ string id in originalEngine.Corpora.SelectMany(c => public Task AddParallelCorpusAsync( string engineId, - Models.ParallelCorpus corpus, + Shared.Models.ParallelCorpus corpus, CancellationToken cancellationToken = default ) { @@ -495,11 +500,11 @@ public Task AddParallelCorpusAsync( ); } - public async Task UpdateParallelCorpusAsync( + public async Task UpdateParallelCorpusAsync( string engineId, string parallelCorpusId, - IReadOnlyList? sourceCorpora, - IReadOnlyList? targetCorpora, + IReadOnlyList? sourceCorpora, + IReadOnlyList? targetCorpora, CancellationToken cancellationToken = default ) { @@ -563,8 +568,8 @@ public Task DeleteAllCorpusFilesAsync(string dataFileId, CancellationToken cance c.SourceFiles.Any(f => f.Id == dataFileId) || c.TargetFiles.Any(f => f.Id == dataFileId) ) || e.ParallelCorpora.Any(c => - c.SourceCorpora.Any(mc => mc.Files.Any(f => f.Id == dataFileId)) - || c.TargetCorpora.Any(mc => mc.Files.Any(f => f.Id == dataFileId)) + c.SourceCorpora.Any(sc => sc.Files.Any(f => f.Id == dataFileId)) + || c.TargetCorpora.Any(tc => tc.Files.Any(f => f.Id == dataFileId)) ), u => { @@ -595,8 +600,8 @@ public Task UpdateDataFileFilenameFilesAsync( c.SourceFiles.Any(f => f.Id == dataFileId) || c.TargetFiles.Any(f => f.Id == dataFileId) ) || e.ParallelCorpora.Any(c => - c.SourceCorpora.Any(mc => mc.Files.Any(f => f.Id == dataFileId)) - || c.TargetCorpora.Any(mc => mc.Files.Any(f => f.Id == dataFileId)) + c.SourceCorpora.Any(sc => sc.Files.Any(f => f.Id == dataFileId)) + || c.TargetCorpora.Any(tc => tc.Files.Any(f => f.Id == dataFileId)) ), u => { @@ -631,14 +636,14 @@ public Task UpdateDataFileFilenameFilesAsync( public Task UpdateCorpusFilesAsync( string corpusId, - IReadOnlyList files, + IReadOnlyList files, CancellationToken cancellationToken = default ) { return Entities.UpdateAllAsync( e => e.ParallelCorpora.Any(c => - c.SourceCorpora.Any(mc => mc.Id == corpusId) || c.TargetCorpora.Any(mc => mc.Id == corpusId) + c.SourceCorpora.Any(sc => sc.Id == corpusId) || c.TargetCorpora.Any(tc => tc.Id == corpusId) ), u => { @@ -709,9 +714,9 @@ private Models.TranslationResult Map(V1.TranslationResult source) return source.Values.Cast().ToHashSet(); } - private Models.AlignedWordPair Map(V1.AlignedWordPair source) + private Shared.Models.AlignedWordPair Map(V1.AlignedWordPair source) { - return new Models.AlignedWordPair { SourceIndex = source.SourceIndex, TargetIndex = source.TargetIndex }; + return new Shared.Models.AlignedWordPair { SourceIndex = source.SourceIndex, TargetIndex = source.TargetIndex }; } private Models.Phrase Map(V1.Phrase source) @@ -862,7 +867,7 @@ pretranslateCorpus is not null } private V1.ParallelCorpus Map( - Models.ParallelCorpus source, + Shared.Models.ParallelCorpus source, TrainingCorpus? trainingCorpus, PretranslateCorpus? pretranslateCorpus, bool trainOnAllCorpora, @@ -916,7 +921,7 @@ bool pretranslateOnAllCorpora } private V1.MonolingualCorpus Map( - Models.MonolingualCorpus inputCorpus, + Shared.Models.MonolingualCorpus inputCorpus, ParallelCorpusFilter? trainingFilter, ParallelCorpusFilter? pretranslateFilter, string? referenceFileLocation, @@ -969,6 +974,17 @@ pretranslateFilter is not null Files = { inputCorpus.Files.Select(Map) } }; + if ( + trainingFilter is not null + && trainingFilter.TextIds is not null + && trainingFilter.ScriptureRange is not null + ) + { + throw new InvalidOperationException( + "Cannot specify both TextIds and ScriptureRange in the training filter." + ); + } + if ( trainOnAll || (trainingFilter is not null && trainingFilter.TextIds is null && trainingFilter.ScriptureRange is null) @@ -984,6 +1000,17 @@ pretranslateFilter is not null returnCorpus.TrainOnTextIds.Add(trainingFilter.TextIds); } + if ( + pretranslateFilter is not null + && pretranslateFilter.TextIds is not null + && pretranslateFilter.ScriptureRange is not null + ) + { + throw new InvalidOperationException( + "Cannot specify both TextIds and ScriptureRange in the pretranslation filter." + ); + } + if ( pretranslateOnAll || ( @@ -1006,7 +1033,7 @@ pretranslateFilter is not null return returnCorpus; } - private V1.CorpusFile Map(Models.CorpusFile source) + private V1.CorpusFile Map(Shared.Models.CorpusFile source) { return new V1.CorpusFile { diff --git a/src/Serval/src/Serval.Translation/Services/TranslationPlatformServiceV1.cs b/src/Serval/src/Serval.Translation/Services/TranslationPlatformServiceV1.cs index 615e8c89..ddd9370d 100644 --- a/src/Serval/src/Serval.Translation/Services/TranslationPlatformServiceV1.cs +++ b/src/Serval/src/Serval.Translation/Services/TranslationPlatformServiceV1.cs @@ -265,8 +265,8 @@ await _builds.UpdateAsync( return Empty; } - public override async Task IncrementTranslationEngineCorpusSize( - IncrementTranslationEngineCorpusSizeRequest request, + public override async Task IncrementTrainEngineCorpusSize( + IncrementTrainEngineCorpusSizeRequest request, ServerCallContext context ) { @@ -278,8 +278,8 @@ await _engines.UpdateAsync( return Empty; } - public override async Task InsertPretranslations( - IAsyncStreamReader requestStream, + public override async Task InsertInferences( + IAsyncStreamReader requestStream, ServerCallContext context ) { @@ -287,7 +287,7 @@ ServerCallContext context int nextModelRevision = 0; var batch = new List(); - await foreach (InsertPretranslationsRequest request in requestStream.ReadAllAsync(context.CancellationToken)) + await foreach (InsertInferencesRequest request in requestStream.ReadAllAsync(context.CancellationToken)) { if (request.EngineId != engineId) { diff --git a/src/Serval/src/Serval.Webhooks/Configuration/IMediatorRegistrationConfiguratorExtensions.cs b/src/Serval/src/Serval.Webhooks/Configuration/IMediatorRegistrationConfiguratorExtensions.cs index 7239bf06..9e7dc5ef 100644 --- a/src/Serval/src/Serval.Webhooks/Configuration/IMediatorRegistrationConfiguratorExtensions.cs +++ b/src/Serval/src/Serval.Webhooks/Configuration/IMediatorRegistrationConfiguratorExtensions.cs @@ -8,6 +8,8 @@ this IMediatorRegistrationConfigurator configurator { configurator.AddConsumer(); configurator.AddConsumer(); + configurator.AddConsumer(); + configurator.AddConsumer(); return configurator; } } diff --git a/src/Serval/src/Serval.Webhooks/Consumers/WordAlignmentBuildFinishedConsumer.cs b/src/Serval/src/Serval.Webhooks/Consumers/WordAlignmentBuildFinishedConsumer.cs new file mode 100644 index 00000000..de604125 --- /dev/null +++ b/src/Serval/src/Serval.Webhooks/Consumers/WordAlignmentBuildFinishedConsumer.cs @@ -0,0 +1,36 @@ +namespace Serval.Webhooks.Consumers; + +public class WordAlignmentBuildFinishedConsumer(IWebhookService webhookService, IUrlService urlService) + : IConsumer +{ + private readonly IWebhookService _webhookService = webhookService; + private readonly IUrlService _urlService = urlService; + + public async Task Consume(ConsumeContext context) + { + await _webhookService.SendEventAsync( + WebhookEvent.WordAlignmentBuildFinished, + context.Message.Owner, + new WordAlignmentBuildFinishedDto + { + Build = new ResourceLinkDto + { + Id = context.Message.BuildId, + Url = _urlService.GetUrl( + Endpoints.GetWordAlignmentBuild, + new { id = context.Message.EngineId, buildId = context.Message.BuildId } + ) + }, + Engine = new ResourceLinkDto + { + Id = context.Message.EngineId, + Url = _urlService.GetUrl(Endpoints.GetWordAlignmentEngine, new { id = context.Message.EngineId })! + }, + BuildState = context.Message.BuildState, + Message = context.Message.Message, + DateFinished = context.Message.DateFinished + }, + context.CancellationToken + ); + } +} diff --git a/src/Serval/src/Serval.Webhooks/Consumers/WordAlignmentBuildStartedConsumer.cs b/src/Serval/src/Serval.Webhooks/Consumers/WordAlignmentBuildStartedConsumer.cs new file mode 100644 index 00000000..5fb11b41 --- /dev/null +++ b/src/Serval/src/Serval.Webhooks/Consumers/WordAlignmentBuildStartedConsumer.cs @@ -0,0 +1,33 @@ +namespace Serval.Webhooks.Consumers; + +public class WordAlignmentBuildStartedConsumer(IWebhookService webhookService, IUrlService urlService) + : IConsumer +{ + private readonly IWebhookService _webhookService = webhookService; + private readonly IUrlService _urlService = urlService; + + public async Task Consume(ConsumeContext context) + { + await _webhookService.SendEventAsync( + WebhookEvent.WordAlignmentBuildStarted, + context.Message.Owner, + new WordAlignmentBuildStartedDto + { + Build = new ResourceLinkDto + { + Id = context.Message.BuildId, + Url = _urlService.GetUrl( + Endpoints.GetWordAlignmentBuild, + new { id = context.Message.EngineId, buildId = context.Message.BuildId } + ) + }, + Engine = new ResourceLinkDto + { + Id = context.Message.EngineId, + Url = _urlService.GetUrl(Endpoints.GetWordAlignmentEngine, new { id = context.Message.EngineId }) + } + }, + context.CancellationToken + ); + } +} diff --git a/src/Serval/src/Serval.Webhooks/Contracts/WebhookEvent.cs b/src/Serval/src/Serval.Webhooks/Contracts/WebhookEvent.cs index 771d0ff8..3b23f6a6 100644 --- a/src/Serval/src/Serval.Webhooks/Contracts/WebhookEvent.cs +++ b/src/Serval/src/Serval.Webhooks/Contracts/WebhookEvent.cs @@ -4,4 +4,7 @@ public enum WebhookEvent { TranslationBuildStarted, TranslationBuildFinished, + + WordAlignmentBuildStarted, + WordAlignmentBuildFinished } diff --git a/src/Serval/src/Serval.WordAlignment/Configuration/IEndpointRouteBuilderExtensions.cs b/src/Serval/src/Serval.WordAlignment/Configuration/IEndpointRouteBuilderExtensions.cs new file mode 100644 index 00000000..84d4d6a5 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Configuration/IEndpointRouteBuilderExtensions.cs @@ -0,0 +1,11 @@ +namespace Microsoft.AspNetCore.Builder; + +public static class IEndpointRouteBuilderExtensions +{ + public static IEndpointRouteBuilder MapServalWordAlignmentServices(this IEndpointRouteBuilder builder) + { + builder.MapGrpcService(); + + return builder; + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Configuration/IMediatorRegistrationConfiguratorExtensions.cs b/src/Serval/src/Serval.WordAlignment/Configuration/IMediatorRegistrationConfiguratorExtensions.cs new file mode 100644 index 00000000..72db9d99 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Configuration/IMediatorRegistrationConfiguratorExtensions.cs @@ -0,0 +1,14 @@ +namespace Microsoft.Extensions.DependencyInjection; + +public static class IMediatorRegistrationConfiguratorExtensions +{ + public static IMediatorRegistrationConfigurator AddWordAlignmentConsumers( + this IMediatorRegistrationConfigurator configurator + ) + { + configurator.AddConsumer(); + configurator.AddConsumer(); + configurator.AddConsumer(); + return configurator; + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Configuration/IMemoryDataAccessConfiguratorExtensions.cs b/src/Serval/src/Serval.WordAlignment/Configuration/IMemoryDataAccessConfiguratorExtensions.cs new file mode 100644 index 00000000..3edb081f --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Configuration/IMemoryDataAccessConfiguratorExtensions.cs @@ -0,0 +1,14 @@ +namespace Microsoft.Extensions.DependencyInjection; + +public static class IMemoryDataAccessConfiguratorExtensions +{ + public static IMemoryDataAccessConfigurator AddWordAlignmentRepositories( + this IMemoryDataAccessConfigurator configurator + ) + { + configurator.AddRepository(); + configurator.AddRepository(); + configurator.AddRepository(); + return configurator; + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Configuration/IMongoDataAccessConfiguratorExtensions.cs b/src/Serval/src/Serval.WordAlignment/Configuration/IMongoDataAccessConfiguratorExtensions.cs new file mode 100644 index 00000000..aababccb --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Configuration/IMongoDataAccessConfiguratorExtensions.cs @@ -0,0 +1,70 @@ +using MongoDB.Driver; + +namespace Microsoft.Extensions.DependencyInjection; + +public static class IMongoDataAccessConfiguratorExtensions +{ + public static IMongoDataAccessConfigurator AddWordAlignmentRepositories( + this IMongoDataAccessConfigurator configurator + ) + { + configurator.AddRepository( + "word_alignment.engines", + init: async c => + { + await c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel(Builders.IndexKeys.Ascending(e => e.Owner)) + ); + await c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel(Builders.IndexKeys.Ascending(e => e.DateCreated)) + ); + } + ); + configurator.AddRepository( + "word_alignment.builds", + init: async c => + { + await c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel(Builders.IndexKeys.Ascending(b => b.EngineRef)) + ); + await c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel(Builders.IndexKeys.Ascending(b => b.DateCreated)) + ); + } + ); + configurator.AddRepository( + "word_alignment.word_alignments", + init: async c => + { + await c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel( + Builders.IndexKeys.Ascending(pt => pt.ModelRevision) + ) + ); + await c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel(Builders.IndexKeys.Ascending(pt => pt.CorpusRef)) + ); + await c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel(Builders.IndexKeys.Ascending(pt => pt.TextId)) + ); + await c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel( + Builders + .IndexKeys.Ascending(pt => pt.EngineRef) + .Ascending(pt => pt.ModelRevision) + ) + ); + await c.Indexes.CreateOrUpdateAsync( + new CreateIndexModel( + Builders + .IndexKeys.Ascending(pt => pt.EngineRef) + .Ascending(pt => pt.CorpusRef) + .Ascending(pt => pt.ModelRevision) + .Ascending(pt => pt.TextId) + ) + ); + } + ); + return configurator; + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Configuration/IServalBuilderExtensions.cs b/src/Serval/src/Serval.WordAlignment/Configuration/IServalBuilderExtensions.cs new file mode 100644 index 00000000..6ef12711 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Configuration/IServalBuilderExtensions.cs @@ -0,0 +1,38 @@ +using Serval.Health.V1; +using Serval.WordAlignment.V1; + +namespace Microsoft.Extensions.DependencyInjection; + +public static class IServalBuilderExtensions +{ + public static IServalBuilder AddWordAlignment(this IServalBuilder builder) + { + builder.AddApiOptions(builder.Configuration.GetSection(ApiOptions.Key)); + builder.AddDataFileOptions(builder.Configuration.GetSection(DataFileOptions.Key)); + + builder.Services.AddScoped(); + builder.Services.AddScoped(); + builder.Services.AddScoped(); + + builder.Services.AddSingleton(); + builder.Services.AddSingleton(); + + var wordAlignmentOptions = new WordAlignmentOptions(); + builder.Configuration.GetSection(WordAlignmentOptions.Key).Bind(wordAlignmentOptions); + + foreach (EngineInfo engine in wordAlignmentOptions.Engines) + { + builder.Services.AddGrpcClient( + engine.Type, + o => o.Address = new Uri(engine.Address) + ); + builder.Services.AddGrpcClient( + $"{engine.Type}-Health", + o => o.Address = new Uri(engine.Address) + ); + builder.Services.AddHealthChecks().AddCheck(engine.Type); + } + + return builder; + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Configuration/WordAlignmentOptions.cs b/src/Serval/src/Serval.WordAlignment/Configuration/WordAlignmentOptions.cs new file mode 100644 index 00000000..ac6cf3e3 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Configuration/WordAlignmentOptions.cs @@ -0,0 +1,14 @@ +namespace Serval.WordAlignment.Configuration; + +public class WordAlignmentOptions +{ + public const string Key = "WordAlignment"; + + public List Engines { get; set; } = new List(); +} + +public class EngineInfo +{ + public string Type { get; set; } = ""; + public string Address { get; set; } = ""; +} diff --git a/src/Serval/src/Serval.WordAlignment/Consumers/CorpusUpdatedConsumer.cs b/src/Serval/src/Serval.WordAlignment/Consumers/CorpusUpdatedConsumer.cs new file mode 100644 index 00000000..43633f9e --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Consumers/CorpusUpdatedConsumer.cs @@ -0,0 +1,26 @@ +namespace Serval.WordAlignment.Consumers; + +public class CorpusUpdatedConsumer(IEngineService engineService) : IConsumer +{ + private readonly IEngineService _engineService = engineService; + + public async Task Consume(ConsumeContext context) + { + await _engineService.UpdateCorpusFilesAsync( + context.Message.CorpusId, + context.Message.Files.Select(Map).ToList(), + context.CancellationToken + ); + } + + private static CorpusFile Map(CorpusFileResult corpusFile) + { + return new CorpusFile + { + Id = corpusFile.File.DataFileId, + TextId = corpusFile.TextId ?? corpusFile.File.Name, + Filename = corpusFile.File.Filename, + Format = corpusFile.File.Format, + }; + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Consumers/DataFileDeletedConsumer.cs b/src/Serval/src/Serval.WordAlignment/Consumers/DataFileDeletedConsumer.cs new file mode 100644 index 00000000..c6ab6f3a --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Consumers/DataFileDeletedConsumer.cs @@ -0,0 +1,11 @@ +namespace Serval.WordAlignment.Consumers; + +public class DataFileDeletedConsumer(IEngineService engineService) : IConsumer +{ + private readonly IEngineService _engineService = engineService; + + public async Task Consume(ConsumeContext context) + { + await _engineService.DeleteAllCorpusFilesAsync(context.Message.DataFileId, context.CancellationToken); + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Consumers/DataFileUpdatedConsumer.cs b/src/Serval/src/Serval.WordAlignment/Consumers/DataFileUpdatedConsumer.cs new file mode 100644 index 00000000..49145dbf --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Consumers/DataFileUpdatedConsumer.cs @@ -0,0 +1,15 @@ +namespace Serval.WordAlignment.Consumers; + +public class DataFileUpdatedConsumer(IEngineService engineService) : IConsumer +{ + private readonly IEngineService _engineService = engineService; + + public async Task Consume(ConsumeContext context) + { + await _engineService.UpdateDataFileFilenameFilesAsync( + context.Message.DataFileId, + context.Message.Filename, + context.CancellationToken + ); + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/ParallelCorpusFilterConfigDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/ParallelCorpusFilterConfigDto.cs new file mode 100644 index 00000000..c1286abf --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/ParallelCorpusFilterConfigDto.cs @@ -0,0 +1,8 @@ +namespace Serval.WordAlignment.Contracts; + +public record ParallelCorpusFilterConfigDto +{ + public required string CorpusId { get; init; } + public IReadOnlyList? TextIds { get; init; } + public string? ScriptureRange { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/ParallelCorpusFilterDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/ParallelCorpusFilterDto.cs new file mode 100644 index 00000000..0824fe95 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/ParallelCorpusFilterDto.cs @@ -0,0 +1,8 @@ +namespace Serval.WordAlignment.Contracts; + +public record ParallelCorpusFilterDto +{ + public required ResourceLinkDto Corpus { get; init; } + public IReadOnlyList? TextIds { get; init; } + public string? ScriptureRange { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/TrainingCorpusConfigDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/TrainingCorpusConfigDto.cs new file mode 100644 index 00000000..bfe1c3e5 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/TrainingCorpusConfigDto.cs @@ -0,0 +1,8 @@ +namespace Serval.WordAlignment.Contracts; + +public record TrainingCorpusConfigDto +{ + public string? ParallelCorpusId { get; init; } + public IReadOnlyList? SourceFilters { get; init; } + public IReadOnlyList? TargetFilters { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/TrainingCorpusDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/TrainingCorpusDto.cs new file mode 100644 index 00000000..de110ceb --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/TrainingCorpusDto.cs @@ -0,0 +1,8 @@ +namespace Serval.WordAlignment.Contracts; + +public record TrainingCorpusDto +{ + public ResourceLinkDto? ParallelCorpus { get; init; } + public IReadOnlyList? SourceFilters { get; init; } + public IReadOnlyList? TargetFilters { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentBuildConfigDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentBuildConfigDto.cs new file mode 100644 index 00000000..3a79b0e7 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentBuildConfigDto.cs @@ -0,0 +1,15 @@ +namespace Serval.WordAlignment.Contracts; + +public record WordAlignmentBuildConfigDto +{ + public string? Name { get; init; } + public IReadOnlyList? TrainOn { get; init; } + public IReadOnlyList? WordAlignOn { get; init; } + + /// + /// { + /// "property" : "value" + /// } + /// + public object? Options { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentBuildDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentBuildDto.cs new file mode 100644 index 00000000..9fc55652 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentBuildDto.cs @@ -0,0 +1,31 @@ +namespace Serval.WordAlignment.Contracts; + +public record WordAlignmentBuildDto +{ + public required string Id { get; init; } + public required string Url { get; init; } + public required int Revision { get; init; } + public string? Name { get; init; } + public required ResourceLinkDto Engine { get; init; } + public IReadOnlyList? TrainOn { get; init; } + public IReadOnlyList? WordAlignOn { get; init; } + public required int Step { get; init; } + public double? PercentCompleted { get; init; } + public string? Message { get; init; } + + public int? QueueDepth { get; init; } + + /// + /// The current build job state. + /// + public required JobState State { get; init; } + public DateTime? DateFinished { get; init; } + + /// + /// { + /// "property" : "value" + /// } + /// + public object? Options { get; init; } + public string? DeploymentVersion { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentDto.cs new file mode 100644 index 00000000..01da4cc7 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentDto.cs @@ -0,0 +1,11 @@ +namespace Serval.WordAlignment.Contracts; + +public record WordAlignmentDto +{ + public required string TextId { get; init; } + public required IReadOnlyList Refs { get; init; } + public required IReadOnlyList SourceTokens { get; init; } + public required IReadOnlyList TargetTokens { get; init; } + public required IReadOnlyList Confidences { get; init; } + public required IReadOnlyList Alignment { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentEngineConfigDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentEngineConfigDto.cs new file mode 100644 index 00000000..54a242b0 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentEngineConfigDto.cs @@ -0,0 +1,24 @@ +namespace Serval.WordAlignment.Contracts; + +public record WordAlignmentEngineConfigDto +{ + /// + /// The word alignment engine name. + /// + public string? Name { get; init; } + + /// + /// The source language tag. + /// + public required string SourceLanguage { get; init; } + + /// + /// The target language tag. + /// + public required string TargetLanguage { get; init; } + + /// + /// The translation engine type. + /// + public required string Type { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentEngineDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentEngineDto.cs new file mode 100644 index 00000000..ebccc8f4 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentEngineDto.cs @@ -0,0 +1,15 @@ +namespace Serval.WordAlignment.Contracts; + +public record WordAlignmentEngineDto +{ + public required string Id { get; init; } + public required string Url { get; init; } + public string? Name { get; init; } + public required string SourceLanguage { get; init; } + public required string TargetLanguage { get; init; } + public required string Type { get; init; } + public required bool IsBuilding { get; init; } + public required int ModelRevision { get; init; } + public required double Confidence { get; init; } + public required int CorpusSize { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentParallelCorpusConfigDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentParallelCorpusConfigDto.cs new file mode 100644 index 00000000..6aa341c4 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentParallelCorpusConfigDto.cs @@ -0,0 +1,12 @@ +namespace Serval.WordAlignment.Contracts; + +public record WordAlignmentParallelCorpusConfigDto +{ + /// + /// The corpus name. + /// + public string? Name { get; init; } + + public required IReadOnlyList SourceCorpusIds { get; init; } = new List(); + public required IReadOnlyList TargetCorpusIds { get; init; } = new List(); +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentParallelCorpusDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentParallelCorpusDto.cs new file mode 100644 index 00000000..53f00302 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentParallelCorpusDto.cs @@ -0,0 +1,10 @@ +namespace Serval.WordAlignment.Contracts; + +public record WordAlignmentParallelCorpusDto +{ + public required string Id { get; init; } + public required string Url { get; init; } + public required ResourceLinkDto Engine { get; init; } + public required IReadOnlyList SourceCorpora { get; init; } + public required IReadOnlyList TargetCorpora { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentParallelCorpusUpdateDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentParallelCorpusUpdateDto.cs new file mode 100644 index 00000000..5a966947 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentParallelCorpusUpdateDto.cs @@ -0,0 +1,23 @@ +using System.ComponentModel.DataAnnotations; + +namespace Serval.WordAlignment.Contracts; + +public record WordAlignmentParallelCorpusUpdateConfigDto : IValidatableObject +{ + public IReadOnlyList? SourceCorpusIds { get; init; } + + public IReadOnlyList? TargetCorpusIds { get; init; } + + public IEnumerable Validate( + ValidationContext validationContext + ) + { + if (SourceCorpusIds is null && TargetCorpusIds is null) + { + yield return new System.ComponentModel.DataAnnotations.ValidationResult( + "At least one field must be specified.", + [nameof(SourceCorpusIds), nameof(TargetCorpusIds)] + ); + } + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentRequestDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentRequestDto.cs new file mode 100644 index 00000000..0ad1fa0b --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentRequestDto.cs @@ -0,0 +1,7 @@ +namespace Serval.WordAlignment.Contracts; + +public record WordAlignmentRequestDto +{ + public required string SourceSegment { get; init; } + public required string TargetSegment { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentResultDto.cs b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentResultDto.cs new file mode 100644 index 00000000..c3be0152 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Contracts/WordAlignmentResultDto.cs @@ -0,0 +1,9 @@ +namespace Serval.WordAlignment.Contracts; + +public record WordAlignmentResultDto +{ + public required IReadOnlyList SourceTokens { get; init; } + public required IReadOnlyList TargetTokens { get; init; } + public required IReadOnlyList Confidences { get; init; } + public required IReadOnlyList Alignment { get; init; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Controllers/WordAlignmentEngineTypesController.cs b/src/Serval/src/Serval.WordAlignment/Controllers/WordAlignmentEngineTypesController.cs new file mode 100644 index 00000000..87162909 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Controllers/WordAlignmentEngineTypesController.cs @@ -0,0 +1,45 @@ +namespace Serval.WordAlignment.Controllers; + +[ApiVersion(1.0)] +[Route("api/v{version:apiVersion}/word-alignment/engine-types")] +[OpenApiTag("Word Alignment Engines")] +public class WordAlignmentEngineTypesController(IAuthorizationService authService, IEngineService engineService) + : ServalControllerBase(authService) +{ + private readonly IEngineService _engineService = engineService; + + /// + /// Get queue information for a given engine type + /// + /// A valid engine type: statistical or echo-word-alignment + /// + /// Queue information for the specified engine type + /// The client is not authenticated + /// The authenticated client cannot perform the operation + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadWordAlignmentEngines)] + [HttpGet("{engineType}/queues")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> GetQueueAsync( + [NotNull] string engineType, + CancellationToken cancellationToken + ) + { + try + { + return Map( + await _engineService.GetQueueAsync(engineType.ToPascalCase(), cancellationToken: cancellationToken) + ); + } + catch (InvalidOperationException ioe) + { + return BadRequest(ioe.Message); + } + } + + private static QueueDto Map(Queue source) => + new() { Size = source.Size, EngineType = source.EngineType.ToKebabCase() }; +} diff --git a/src/Serval/src/Serval.WordAlignment/Controllers/WordAlignmentEnginesController.cs b/src/Serval/src/Serval.WordAlignment/Controllers/WordAlignmentEnginesController.cs new file mode 100644 index 00000000..22d6b57e --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Controllers/WordAlignmentEnginesController.cs @@ -0,0 +1,958 @@ +namespace Serval.WordAlignment.Controllers; + +[ApiVersion(1.0)] +[Route("api/v{version:apiVersion}/word-alignment/engines")] +[OpenApiTag("Word Alignment Engines")] +public class WordAlignmentEnginesController( + IAuthorizationService authService, + IEngineService engineService, + IBuildService buildService, + IWordAlignmentService wordAlignmentService, + IOptionsMonitor apiOptions, + IUrlService urlService, + ILogger logger +) : ServalControllerBase(authService) +{ + private static readonly JsonSerializerOptions ObjectJsonSerializerOptions = + new() { Converters = { new ObjectToInferredTypesConverter() } }; + + private readonly IEngineService _engineService = engineService; + private readonly IBuildService _buildService = buildService; + private readonly IWordAlignmentService _wordAlignmentService = wordAlignmentService; + private readonly IOptionsMonitor _apiOptions = apiOptions; + private readonly IUrlService _urlService = urlService; + private readonly ILogger _logger = logger; + + /// + /// Get all word alignment engines + /// + /// + /// The engines + /// The client is not authenticated. + /// The authenticated client cannot perform the operation. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadWordAlignmentEngines)] + [HttpGet] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> GetAllAsync(CancellationToken cancellationToken) + { + return (await _engineService.GetAllAsync(Owner, cancellationToken)).Select(Map); + } + + /// + /// Get a word alignment engine by unique id + /// + /// The engine id + /// + /// The engine + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the engine. + /// The engine does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + + [Authorize(Scopes.ReadWordAlignmentEngines)] + [HttpGet("{id}", Name = Endpoints.GetWordAlignmentEngine)] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> GetAsync( + [NotNull] string id, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + return Ok(Map(engine)); + } + + /// + /// Create a new word alignment engine + /// + /// + /// ## Parameters + /// * **name**: (optional) A name to help identify and distinguish the file. + /// * Recommendation: Create a multi-part name to distinguish between projects, uses, etc. + /// * The name does not have to be unique, as the engine is uniquely identified by the auto-generated id + /// * **sourceLanguage**: The source language code (a valid [IETF language tag](https://en.wikipedia.org/wiki/IETF_language_tag) is recommended) + /// * **targetLanguage**: The target language code (a valid IETF language tag is recommended) + /// * **type**: **statistical** or **echo-word-alignment** + /// ### statistical + /// The Statistical engine is based off of the [Thot library](https://github.com/sillsdev/thot) and contains IBM-1, IBM-2, IBM-3, IBM-4, FastAlign and HMM algorithms. + /// ### echo-word-alignment + /// The echo-word-alignment engine has full coverage of all endpoints. Endpoints like create and build return empty responses. + /// Endpoints like get-word-alignment echo the sent content back to the user in the proper format. This engine is useful for debugging and testing purposes. + /// ## Sample request: + /// + /// { + /// "name": "myTeam:myProject:myEngine", + /// "sourceLanguage": "el", + /// "targetLanguage": "en", + /// "type": "statistical" + /// } + /// + /// + /// The engine configuration (see above) + /// + /// The new engine + /// Bad request. Is the engine type correct? + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the engine. + /// The engine does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.CreateWordAlignmentEngines)] + [HttpPost] + [ProducesResponseType(StatusCodes.Status201Created)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> CreateAsync( + [FromBody] WordAlignmentEngineConfigDto engineConfig, + CancellationToken cancellationToken + ) + { + Engine engine = Map(engineConfig); + Engine updatedEngine = await _engineService.CreateAsync(engine, cancellationToken); + WordAlignmentEngineDto dto = Map(updatedEngine); + return Created(dto.Url, dto); + } + + /// + /// Delete a word alignment engine + /// + /// The engine id + /// + /// The engine was successfully deleted. + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the engine. + /// The engine does not exist and therefore cannot be deleted. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.DeleteWordAlignmentEngines)] + [HttpDelete("{id}")] + [ProducesResponseType(typeof(void), StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task DeleteAsync([NotNull] string id, CancellationToken cancellationToken) + { + await AuthorizeAsync(id, cancellationToken); + await _engineService.DeleteAsync(id, cancellationToken); + return Ok(); + } + + /// + /// Align words between a source and target segment + /// + /// The engine id + /// The source and target segment + /// + /// The word alignment result + /// Bad request + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the engine. + /// The engine does not exist. + /// The method is not supported. + /// The engine needs to be built before it can alignment segments. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadWordAlignmentEngines)] + [HttpPost("{id}/get-word-alignment")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status405MethodNotAllowed)] + [ProducesResponseType(typeof(void), StatusCodes.Status409Conflict)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> GetWordAlignmentAsync( + [NotNull] string id, + [FromBody] WordAlignmentRequestDto wordAlignmentRequest, + CancellationToken cancellationToken + ) + { + await AuthorizeAsync(id, cancellationToken); + WordAlignmentResult result = await _engineService.GetWordAlignmentAsync( + id, + wordAlignmentRequest.SourceSegment, + wordAlignmentRequest.TargetSegment, + cancellationToken + ); + _logger.LogInformation("Got word alignment for engine {EngineId}", id); + return Ok(Map(result)); + } + + /// + /// Add a parallel corpus to an engine + /// + /// + /// ## Parameters + /// * **SourceCorpusIds**: The source corpora associated with the parallel corpus + /// * **TargetCorpusIds**: The target corpora associated with the parallel corpus + /// + /// The engine id + /// The corpus configuration (see remarks) + /// + /// + /// + /// The added corpus + /// Bad request + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the engine. + /// The engine does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateWordAlignmentEngines)] + [HttpPost("{id}/parallel-corpora")] + [ProducesResponseType(StatusCodes.Status201Created)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> AddParallelCorpusAsync( + [NotNull] string id, + [FromBody] WordAlignmentParallelCorpusConfigDto corpusConfig, + [FromServices] IRequestClient getCorpusClient, + [FromServices] IIdGenerator idGenerator, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + ParallelCorpus corpus = await MapAsync( + getCorpusClient, + idGenerator.GenerateId(), + corpusConfig, + cancellationToken + ); + await _engineService.AddParallelCorpusAsync(id, corpus, cancellationToken); + WordAlignmentParallelCorpusDto dto = Map(id, corpus); + return Created(dto.Url, dto); + } + + /// + /// Update a parallel corpus with a new set of corpora + /// + /// + /// Will completely replace the parallel corpus' file associations. Will not affect jobs already queued or running. Will not affect existing word graphs until new build is complete. + /// + /// The engine id + /// The parallel corpus id + /// The corpus configuration + /// The data file client + /// + /// The corpus was updated successfully + /// Bad request + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the engine. + /// The engine or corpus does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateWordAlignmentEngines)] + [HttpPatch("{id}/parallel-corpora/{parallelCorpusId}")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> UpdateParallelCorpusAsync( + [NotNull] string id, + [NotNull] string parallelCorpusId, + [FromBody] WordAlignmentParallelCorpusUpdateConfigDto corpusConfig, + [FromServices] IRequestClient getCorpusClient, + CancellationToken cancellationToken + ) + { + await AuthorizeAsync(id, cancellationToken); + ParallelCorpus parallelCorpus = await _engineService.UpdateParallelCorpusAsync( + id, + parallelCorpusId, + corpusConfig.SourceCorpusIds is null + ? null + : await MapAsync(getCorpusClient, corpusConfig.SourceCorpusIds, cancellationToken), + corpusConfig.TargetCorpusIds is null + ? null + : await MapAsync(getCorpusClient, corpusConfig.TargetCorpusIds, cancellationToken), + cancellationToken + ); + return Ok(Map(id, parallelCorpus)); + } + + /// + /// Get all parallel corpora for a engine + /// + /// The engine id + /// + /// The parallel corpora + /// The client is not authenticated + /// The authenticated client cannot perform the operation or does not own the engine + /// The engine does not exist + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadWordAlignmentEngines)] + [HttpGet("{id}/parallel-corpora")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task>> GetAllParallelCorporaAsync( + [NotNull] string id, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + return Ok(engine.ParallelCorpora.Select(c => Map(id, c))); + } + + /// + /// Get the configuration of a parallel corpus for a engine + /// + /// The engine id + /// The parallel corpus id + /// + /// The parallel corpus configuration + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the engine. + /// The engine or parallel corpus does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadTranslationEngines)] + [HttpGet("{id}/parallel-corpora/{parallelCorpusId}", Name = Endpoints.GetParallelWordAlignmentCorpus)] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> GetParallelCorpusAsync( + [NotNull] string id, + [NotNull] string parallelCorpusId, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + ParallelCorpus? corpus = engine.ParallelCorpora.FirstOrDefault(f => f.Id == parallelCorpusId); + if (corpus == null) + return NotFound(); + return Ok(Map(id, corpus)); + } + + /// + /// Remove a parallel corpus from a engine + /// + /// + /// Removing a parallel corpus will remove all word alignments associated with that corpus. + /// + /// The engine id + /// The parallel corpus id + /// + /// The parallel corpus was deleted successfully. + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the engine. + /// The engine or parallel corpus does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateWordAlignmentEngines)] + [HttpDelete("{id}/parallel-corpora/{parallelCorpusId}")] + [ProducesResponseType(typeof(void), StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task DeleteParallelCorpusAsync( + [NotNull] string id, + [NotNull] string parallelCorpusId, + CancellationToken cancellationToken + ) + { + await AuthorizeAsync(id, cancellationToken); + await _engineService.DeleteParallelCorpusAsync(id, parallelCorpusId, cancellationToken); + return Ok(); + } + + /// + /// Get all word alignments in a corpus of a engine + /// + /// + /// Word alignments are arranged in a list of dictionaries with the following fields per word alignment: + /// * **TextId**: The TextId of the SourceFile defined when the corpus was created. + /// * **Refs** (a list of strings): A list of references including: + /// * The references defined in the SourceFile per line, if any. + /// * An auto-generated reference of `[TextId]:[lineNumber]`, 1 indexed. + /// * **SourceTokens**: the tokenized source segment + /// * **TargetTokens**: the tokenized target segment + /// * **Confidences**: the confidence of the alignment ona scale from 0 to 1 + /// * **Alignment**: the word alignment, 0 indexed for source and target positions + /// + /// Word alignments can be filtered by text id if provided. + /// Only word alignments for the most recent successful build of the engine are returned. + /// + /// The engine id + /// The corpus id + /// The text id (optional) + /// + /// The word alignments + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the engine. + /// The engine or corpus does not exist. + /// The engine needs to be built first. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadWordAlignmentEngines)] + [HttpGet("{id}/corpora/{corpusId}/word-alignments")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status409Conflict)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task>> GetAllWordAlignmentsAsync( + [NotNull] string id, + [NotNull] string corpusId, + [FromQuery] string? textId, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + if (!engine.ParallelCorpora.Any(c => c.Id == corpusId)) + return NotFound(); + if (engine.ModelRevision == 0) + return Conflict(); + + IEnumerable wordAlignments = await _wordAlignmentService.GetAllAsync( + id, + engine.ModelRevision, + corpusId, + textId, + cancellationToken + ); + _logger.LogInformation( + "Returning {Count} word alignments for engine {EngineId}, corpus {CorpusId}, and query {TextId}", + wordAlignments.Count(), + id, + corpusId, + textId + ); + return Ok(wordAlignments.Select(Map)); + } + + /// + /// Get all build jobs for a engine + /// + /// The engine id + /// + /// The build jobs + /// The client is not authenticated. + /// The authenticated client cannot perform the operation or does not own the engine. + /// The engine does not exist. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadWordAlignmentEngines)] + [HttpGet("{id}/builds")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task>> GetAllBuildsAsync( + [NotNull] string id, + CancellationToken cancellationToken + ) + { + await AuthorizeAsync(id, cancellationToken); + return Ok((await _buildService.GetAllAsync(id, cancellationToken)).Select(Map)); + } + + /// + /// Get a build job + /// + /// + /// If the `minRevision` is not defined, the current build, at whatever state it is, + /// will be immediately returned. If `minRevision` is defined, Serval will wait for + /// up to 40 seconds for the engine to build to the `minRevision` specified, else + /// will timeout. + /// A use case is to actively query the state of the current build, where the subsequent + /// request sets the `minRevision` to the returned `revision` + 1 and timeouts are handled gracefully. + /// This method should use request throttling. + /// Note: Within the returned build, percentCompleted is a value between 0 and 1. + /// + /// The engine id + /// The build job id + /// The minimum revision + /// + /// The build job + /// The client is not authenticated. + /// The authenticated client does not own the engine. + /// The engine or build does not exist. + /// The long polling request timed out. This is expected behavior if you're using long-polling with the minRevision strategy specified in the docs. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadWordAlignmentEngines)] + [HttpGet("{id}/builds/{buildId}", Name = Endpoints.GetWordAlignmentBuild)] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status408RequestTimeout)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> GetBuildAsync( + [NotNull] string id, + [NotNull] string buildId, + [FromQuery] long? minRevision, + CancellationToken cancellationToken + ) + { + await AuthorizeAsync(id, cancellationToken); + if (minRevision != null) + { + (_, EntityChange change) = await TaskEx.Timeout( + ct => _buildService.GetNewerRevisionAsync(buildId, minRevision.Value, ct), + _apiOptions.CurrentValue.LongPollTimeout, + cancellationToken: cancellationToken + ); + return change.Type switch + { + EntityChangeType.None => StatusCode(StatusCodes.Status408RequestTimeout), + EntityChangeType.Delete => NotFound(), + _ => Ok(Map(change.Entity!)), + }; + } + else + { + Build build = await _buildService.GetAsync(buildId, cancellationToken); + return Ok(Map(build)); + } + } + + /// + /// Starts a build job for a engine. + /// + /// + /// Specify the corpora and textIds to train on. If no "trainOn" field is provided, all corpora will be used. + /// Paratext Projects, you may flag a subset of books for training by including their [abbreviations] + /// Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. + /// Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) + /// All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + /// + /// Specify the corpora or textIds to word align on. + /// When a corpus or textId is selected for word align on, only text segments that are in both the source and the target will be aligned. + /// + /// The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. + /// See [statistical alignment job settings documentation](https://github.com/sillsdev/serval/wiki/Statistical-Alignment-Build-Options) about configuring job parameters. + /// See [keyterms parsing documentation](https://github.com/sillsdev/serval/wiki/Paratext-Key-Terms-Parsing) on how to use keyterms for training. + /// + /// The engine id + /// The build config (see remarks) + /// + /// The new build job + /// The build configuration was invalid. + /// The client is not authenticated. + /// The authenticated client does not own the engine. + /// The engine does not exist. + /// There is already an active or pending build or a build in the process of being canceled. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateWordAlignmentEngines)] + [HttpPost("{id}/builds")] + [ProducesResponseType(StatusCodes.Status201Created)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status409Conflict)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> StartBuildAsync( + [NotNull] string id, + [FromBody] WordAlignmentBuildConfigDto buildConfig, + CancellationToken cancellationToken + ) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + Build build = Map(engine, buildConfig); + await _engineService.StartBuildAsync(build, cancellationToken); + + WordAlignmentBuildDto dto = Map(build); + return Created(dto.Url, dto); + } + + /// + /// Get the currently running build job for a engine + /// + /// + /// See documentation on endpoint /word-alignment/engines/{id}/builds/{id} - "Get a Build Job" for details on using `minRevision`. + /// + /// The engine id + /// The minimum revision + /// + /// The build job + /// There is no build currently running. + /// Bad request + /// The client is not authenticated. + /// The authenticated client does not own the engine. + /// The engine does not exist. + /// The long polling request timed out. This is expected behavior if you're using long-polling with the minRevision strategy specified in the docs. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.ReadWordAlignmentEngines)] + [HttpGet("{id}/current-build")] + [ProducesResponseType(StatusCodes.Status200OK)] + [ProducesResponseType(StatusCodes.Status204NoContent)] + [ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status408RequestTimeout)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task> GetCurrentBuildAsync( + [NotNull] string id, + [FromQuery] long? minRevision, + CancellationToken cancellationToken + ) + { + await AuthorizeAsync(id, cancellationToken); + if (minRevision != null) + { + (_, EntityChange change) = await TaskEx.Timeout( + ct => _buildService.GetActiveNewerRevisionAsync(id, minRevision.Value, ct), + _apiOptions.CurrentValue.LongPollTimeout, + cancellationToken: cancellationToken + ); + return change.Type switch + { + EntityChangeType.None => StatusCode(StatusCodes.Status408RequestTimeout), + EntityChangeType.Delete => NoContent(), + _ => Ok(Map(change.Entity!)), + }; + } + else + { + Build? build = await _buildService.GetActiveAsync(id, cancellationToken); + if (build == null) + return NoContent(); + + return Ok(Map(build)); + } + } + + /// + /// Cancel the current build job (whether pending or active) for a engine + /// + /// + /// + /// The engine id + /// + /// The build job was cancelled successfully. + /// There is no active build job. + /// The client is not authenticated. + /// The authenticated client does not own the engine. + /// The engine does not exist. + /// The engine does not support cancelling builds. + /// A necessary service is currently unavailable. Check `/health` for more details. + [Authorize(Scopes.UpdateWordAlignmentEngines)] + [HttpPost("{id}/current-build/cancel")] + [ProducesResponseType(typeof(void), StatusCodes.Status200OK)] + [ProducesResponseType(typeof(void), StatusCodes.Status204NoContent)] + [ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)] + [ProducesResponseType(typeof(void), StatusCodes.Status403Forbidden)] + [ProducesResponseType(typeof(void), StatusCodes.Status404NotFound)] + [ProducesResponseType(typeof(void), StatusCodes.Status405MethodNotAllowed)] + [ProducesResponseType(typeof(void), StatusCodes.Status503ServiceUnavailable)] + public async Task CancelBuildAsync([NotNull] string id, CancellationToken cancellationToken) + { + await AuthorizeAsync(id, cancellationToken); + if (!await _engineService.CancelBuildAsync(id, cancellationToken)) + return NoContent(); + return Ok(); + } + + private async Task AuthorizeAsync(string id, CancellationToken cancellationToken) + { + Engine engine = await _engineService.GetAsync(id, cancellationToken); + await AuthorizeAsync(engine); + } + + private async Task MapAsync( + IRequestClient getDataFileClient, + string corpusId, + WordAlignmentParallelCorpusConfigDto source, + CancellationToken cancellationToken + ) + { + return new ParallelCorpus + { + Id = corpusId, + SourceCorpora = await MapAsync(getDataFileClient, source.SourceCorpusIds, cancellationToken), + TargetCorpora = await MapAsync(getDataFileClient, source.TargetCorpusIds, cancellationToken) + }; + } + + private async Task> MapAsync( + IRequestClient getCorpusClient, + IEnumerable corpusIds, + CancellationToken cancellationToken + ) + { + var corpora = new List(); + foreach (string corpusId in corpusIds) + { + Response response = await getCorpusClient.GetResponse< + CorpusResult, + CorpusNotFound + >(new GetCorpus { CorpusId = corpusId, Owner = Owner }, cancellationToken); + if (response.Is(out Response? result)) + { + corpora.Add( + new MonolingualCorpus + { + Id = corpusId, + Name = result.Message.Name ?? "", + Language = result.Message.Language, + Files = result + .Message.Files.Select(f => new CorpusFile + { + Id = f.File.DataFileId, + Filename = f.File.Filename, + Format = f.File.Format, + TextId = f.TextId + }) + .ToList(), + } + ); + } + else if (response.Is(out Response? _)) + { + throw new InvalidOperationException($"The corpus {corpusId} cannot be found."); + } + } + return corpora; + } + + private WordAlignmentParallelCorpusDto Map(string engineId, ParallelCorpus source) + { + return new WordAlignmentParallelCorpusDto + { + Id = source.Id, + Url = _urlService.GetUrl(Endpoints.GetCorpus, new { id = engineId, corpusId = source.Id }), + Engine = new ResourceLinkDto + { + Id = engineId, + Url = _urlService.GetUrl(Endpoints.GetTranslationEngine, new { id = engineId }) + }, + SourceCorpora = source + .SourceCorpora.Select(c => new ResourceLinkDto + { + Id = c.Id, + Url = _urlService.GetUrl(Endpoints.GetCorpus, new { Id = c.Id }) + }) + .ToList(), + TargetCorpora = source + .TargetCorpora.Select(c => new ResourceLinkDto + { + Id = c.Id, + Url = _urlService.GetUrl(Endpoints.GetCorpus, new { Id = c.Id }) + }) + .ToList() + }; + } + + private static Build Map(Engine engine, WordAlignmentBuildConfigDto source) + { + return new Build + { + EngineRef = engine.Id, + Name = source.Name, + WordAlignOn = Map(engine, source.WordAlignOn), + TrainOn = Map(engine, source.TrainOn), + Options = Map(source.Options) + }; + } + + private static List? Map(Engine engine, IReadOnlyList? source) + { + if (source is null) + return null; + + var corpusIds = new HashSet(engine.ParallelCorpora.Select(c => c.Id)); + var trainingCorpora = new List(); + foreach (TrainingCorpusConfigDto cc in source) + { + if (cc.ParallelCorpusId == null) + { + throw new InvalidOperationException($"One of ParallelCorpusId and CorpusId must be set."); + } + if (!corpusIds.Contains(cc.ParallelCorpusId)) + { + throw new InvalidOperationException( + $"The parallel corpus {cc.ParallelCorpusId} is not valid: This parallel corpus does not exist for engine {engine.Id}." + ); + } + trainingCorpora.Add( + new TrainingCorpus + { + ParallelCorpusRef = cc.ParallelCorpusId, + SourceFilters = cc.SourceFilters?.Select(Map).ToList(), + TargetFilters = cc.TargetFilters?.Select(Map).ToList() + } + ); + } + return trainingCorpora; + } + + private static ParallelCorpusFilter Map(ParallelCorpusFilterConfigDto source) + { + if (source.TextIds != null && source.ScriptureRange != null) + { + throw new InvalidOperationException( + $"The parallel corpus filter for corpus {source.CorpusId} is not valid: At most, one of TextIds and ScriptureRange can be set." + ); + } + return new ParallelCorpusFilter + { + CorpusRef = source.CorpusId, + TextIds = source.TextIds, + ScriptureRange = source.ScriptureRange + }; + } + + private static Dictionary? Map(object? source) + { + try + { + return JsonSerializer.Deserialize>( + source?.ToString() ?? "{}", + ObjectJsonSerializerOptions + ); + } + catch (Exception e) + { + throw new InvalidOperationException($"Unable to parse field 'options' : {e.Message}", e); + } + } + + private WordAlignmentEngineDto Map(Engine source) + { + return new WordAlignmentEngineDto + { + Id = source.Id, + Url = _urlService.GetUrl(Endpoints.GetWordAlignmentEngine, new { id = source.Id }), + Name = source.Name, + SourceLanguage = source.SourceLanguage, + TargetLanguage = source.TargetLanguage, + Type = source.Type.ToKebabCase(), + IsBuilding = source.IsBuilding, + ModelRevision = source.ModelRevision, + Confidence = Math.Round(source.Confidence, 8), + CorpusSize = source.CorpusSize + }; + } + + private WordAlignmentBuildDto Map(Build source) + { + return new WordAlignmentBuildDto + { + Id = source.Id, + Url = _urlService.GetUrl(Endpoints.GetTranslationBuild, new { id = source.EngineRef, buildId = source.Id }), + Revision = source.Revision, + Name = source.Name, + Engine = new ResourceLinkDto + { + Id = source.EngineRef, + Url = _urlService.GetUrl(Endpoints.GetTranslationEngine, new { id = source.EngineRef }) + }, + TrainOn = source.TrainOn?.Select(s => Map(source.EngineRef, s)).ToList(), + WordAlignOn = source.WordAlignOn?.Select(s => Map(source.EngineRef, s)).ToList(), + Step = source.Step, + PercentCompleted = source.PercentCompleted, + Message = source.Message, + QueueDepth = source.QueueDepth, + State = source.State, + DateFinished = source.DateFinished, + Options = source.Options, + DeploymentVersion = source.DeploymentVersion + }; + } + + private TrainingCorpusDto Map(string engineId, TrainingCorpus source) + { + return new TrainingCorpusDto + { + ParallelCorpus = + source.ParallelCorpusRef != null + ? new ResourceLinkDto + { + Id = source.ParallelCorpusRef, + Url = _urlService.GetUrl( + Endpoints.GetParallelTranslationCorpus, + new { id = engineId, parallelCorpusId = source.ParallelCorpusRef } + ) + } + : null, + SourceFilters = source.SourceFilters?.Select(Map).ToList(), + TargetFilters = source.TargetFilters?.Select(Map).ToList() + }; + } + + private ParallelCorpusFilterDto Map(ParallelCorpusFilter source) + { + return new ParallelCorpusFilterDto + { + Corpus = new ResourceLinkDto + { + Id = source.CorpusRef, + Url = _urlService.GetUrl(Endpoints.GetCorpus, new { id = source.CorpusRef }) + }, + TextIds = source.TextIds, + ScriptureRange = source.ScriptureRange + }; + } + + private WordAlignmentResultDto Map(WordAlignmentResult source) + { + return new WordAlignmentResultDto + { + SourceTokens = source.SourceTokens.ToList(), + TargetTokens = source.TargetTokens.ToList(), + Confidences = source.Confidences.Select(c => Math.Round(c, 8)).ToList(), + Alignment = source.Alignment.Select(Map).ToList(), + }; + } + + private AlignedWordPairDto Map(AlignedWordPair source) + { + return new AlignedWordPairDto() { SourceIndex = source.SourceIndex, TargetIndex = source.TargetIndex }; + } + + private static WordAlignmentDto Map(Models.WordAlignment source) + { + return new WordAlignmentDto + { + TextId = source.TextId, + Refs = source.Refs, + SourceTokens = source.SourceTokens.ToList(), + TargetTokens = source.TargetTokens.ToList(), + Confidences = source.Confidences.Select(c => Math.Round(c, 8)).ToList(), + Alignment = source + .Alignment.Select(c => new AlignedWordPairDto() + { + SourceIndex = c.SourceIndex, + TargetIndex = c.TargetIndex + }) + .ToList(), + }; + } + + private Engine Map(WordAlignmentEngineConfigDto source) + { + return new Engine + { + Name = source.Name, + SourceLanguage = source.SourceLanguage, + TargetLanguage = source.TargetLanguage, + Type = source.Type.ToPascalCase(), + Owner = Owner, + ParallelCorpora = [], + IsInitialized = false + }; + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Models/Build.cs b/src/Serval/src/Serval.WordAlignment/Models/Build.cs new file mode 100644 index 00000000..b20e871c --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Models/Build.cs @@ -0,0 +1,21 @@ +namespace Serval.WordAlignment.Models; + +public record Build : IInitializableEntity +{ + public string Id { get; set; } = ""; + public int Revision { get; set; } = 1; + public string? Name { get; init; } + public required string EngineRef { get; init; } + public IReadOnlyList? TrainOn { get; init; } + public IReadOnlyList? WordAlignOn { get; init; } + public int Step { get; init; } + public double? PercentCompleted { get; init; } + public string? Message { get; init; } + public int? QueueDepth { get; init; } + public JobState State { get; init; } = JobState.Pending; + public DateTime? DateFinished { get; init; } + public IReadOnlyDictionary? Options { get; init; } + public string? DeploymentVersion { get; init; } + public bool? IsInitialized { get; set; } + public DateTime? DateCreated { get; set; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Models/Engine.cs b/src/Serval/src/Serval.WordAlignment/Models/Engine.cs new file mode 100644 index 00000000..9985ec58 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Models/Engine.cs @@ -0,0 +1,19 @@ +namespace Serval.WordAlignment.Models; + +public record Engine : IOwnedEntity, IInitializableEntity +{ + public string Id { get; set; } = ""; + public int Revision { get; set; } = 1; + public string? Name { get; init; } + public required string SourceLanguage { get; init; } + public required string TargetLanguage { get; init; } + public required string Type { get; init; } + public required string Owner { get; init; } + public required IReadOnlyList ParallelCorpora { get; init; } + public bool IsBuilding { get; init; } + public int ModelRevision { get; init; } + public double Confidence { get; init; } + public int CorpusSize { get; init; } + public bool? IsInitialized { get; set; } + public DateTime? DateCreated { get; set; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Models/TrainingCorpus.cs b/src/Serval/src/Serval.WordAlignment/Models/TrainingCorpus.cs new file mode 100644 index 00000000..fd9c06a6 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Models/TrainingCorpus.cs @@ -0,0 +1,8 @@ +namespace Serval.WordAlignment.Models; + +public record TrainingCorpus +{ + public string? ParallelCorpusRef { get; set; } + public IReadOnlyList? SourceFilters { get; set; } + public IReadOnlyList? TargetFilters { get; set; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Models/WordAlignment.cs b/src/Serval/src/Serval.WordAlignment/Models/WordAlignment.cs new file mode 100644 index 00000000..0e9725f0 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Models/WordAlignment.cs @@ -0,0 +1,16 @@ +namespace Serval.WordAlignment.Models; + +public class WordAlignment : IEntity +{ + public string Id { get; set; } = ""; + public int Revision { get; set; } = 1; + public required string EngineRef { get; init; } + public int ModelRevision { get; init; } + public required string CorpusRef { get; init; } + public required string TextId { get; init; } + public required IReadOnlyList Refs { get; init; } + public required IReadOnlyList SourceTokens { get; set; } + public required IReadOnlyList TargetTokens { get; set; } + public required IReadOnlyList Confidences { get; set; } + public required IReadOnlyList Alignment { get; set; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Models/WordAlignmentResult.cs b/src/Serval/src/Serval.WordAlignment/Models/WordAlignmentResult.cs new file mode 100644 index 00000000..6bce82fb --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Models/WordAlignmentResult.cs @@ -0,0 +1,9 @@ +namespace Serval.WordAlignment.Models; + +public record WordAlignmentResult +{ + public required IReadOnlyList SourceTokens { get; set; } + public required IReadOnlyList TargetTokens { get; set; } + public required IReadOnlyList Confidences { get; set; } + public required IReadOnlyList Alignment { get; set; } +} diff --git a/src/Serval/src/Serval.WordAlignment/Serval.WordAlignment.csproj b/src/Serval/src/Serval.WordAlignment/Serval.WordAlignment.csproj new file mode 100644 index 00000000..79613a01 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Serval.WordAlignment.csproj @@ -0,0 +1,27 @@ + + + + net8.0 + enable + enable + true + true + true + $(NoWarn);CS1591;CS1573 + + + + + + + + + + + + + + + + + diff --git a/src/Serval/src/Serval.WordAlignment/Services/BuildCleanupService.cs b/src/Serval/src/Serval.WordAlignment/Services/BuildCleanupService.cs new file mode 100644 index 00000000..902fb411 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Services/BuildCleanupService.cs @@ -0,0 +1,7 @@ +namespace Serval.WordAlignment.Services; + +public class BuildCleanupService( + IServiceProvider services, + ILogger logger, + TimeSpan? timeout = null +) : UninitializedCleanupService(services, logger, timeout) { } diff --git a/src/Serval/src/Serval.WordAlignment/Services/BuildService.cs b/src/Serval/src/Serval.WordAlignment/Services/BuildService.cs new file mode 100644 index 00000000..c3069135 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Services/BuildService.cs @@ -0,0 +1,63 @@ +namespace Serval.WordAlignment.Services; + +public class BuildService(IRepository builds) : EntityServiceBase(builds), IBuildService +{ + public async Task> GetAllAsync(string parentId, CancellationToken cancellationToken = default) + { + return await Entities.GetAllAsync(e => e.EngineRef == parentId, cancellationToken); + } + + public Task GetActiveAsync(string parentId, CancellationToken cancellationToken = default) + { + return Entities.GetAsync( + b => b.EngineRef == parentId && (b.State == JobState.Active || b.State == JobState.Pending), + cancellationToken + ); + } + + public Task> GetNewerRevisionAsync( + string id, + long minRevision, + CancellationToken cancellationToken = default + ) + { + return GetNewerRevisionAsync(e => e.Id == id, minRevision, cancellationToken); + } + + public Task> GetActiveNewerRevisionAsync( + string parentId, + long minRevision, + CancellationToken cancellationToken = default + ) + { + return GetNewerRevisionAsync( + b => b.EngineRef == parentId && (b.State == JobState.Active || b.State == JobState.Pending), + minRevision, + cancellationToken + ); + } + + private async Task> GetNewerRevisionAsync( + Expression> filter, + long minRevision, + CancellationToken cancellationToken = default + ) + { + using ISubscription subscription = await Entities.SubscribeAsync(filter, cancellationToken); + EntityChange curChange = subscription.Change; + if (curChange.Type == EntityChangeType.Delete && minRevision > 1) + return curChange; + while (true) + { + if (curChange.Entity is not null) + { + if (curChange.Type != EntityChangeType.Delete && minRevision <= curChange.Entity.Revision) + return curChange; + } + await subscription.WaitForChangeAsync(cancellationToken: cancellationToken); + curChange = subscription.Change; + if (curChange.Type == EntityChangeType.Delete) + return curChange; + } + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Services/EngineCleanupService.cs b/src/Serval/src/Serval.WordAlignment/Services/EngineCleanupService.cs new file mode 100644 index 00000000..c6b7faf0 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Services/EngineCleanupService.cs @@ -0,0 +1,7 @@ +namespace Serval.WordAlignment.Services; + +public class EngineCleanupService( + IServiceProvider services, + ILogger logger, + TimeSpan? timeout = null +) : UninitializedCleanupService(services, logger, timeout) { } diff --git a/src/Serval/src/Serval.WordAlignment/Services/EngineService.cs b/src/Serval/src/Serval.WordAlignment/Services/EngineService.cs new file mode 100644 index 00000000..731bc41a --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Services/EngineService.cs @@ -0,0 +1,628 @@ +using Serval.WordAlignment.V1; + +namespace Serval.WordAlignment.Services; + +public class EngineService( + IRepository engines, + IRepository builds, + IRepository wordAlignments, + GrpcClientFactory grpcClientFactory, + IOptionsMonitor dataFileOptions, + IDataAccessContext dataAccessContext, + ILoggerFactory loggerFactory, + IScriptureDataFileService scriptureDataFileService +) : OwnedEntityServiceBase(engines), IEngineService +{ + private readonly IRepository _builds = builds; + private readonly IRepository _wordAlignments = wordAlignments; + private readonly GrpcClientFactory _grpcClientFactory = grpcClientFactory; + private readonly IOptionsMonitor _dataFileOptions = dataFileOptions; + private readonly IDataAccessContext _dataAccessContext = dataAccessContext; + private readonly ILogger _logger = loggerFactory.CreateLogger(); + private readonly IScriptureDataFileService _scriptureDataFileService = scriptureDataFileService; + + public override async Task GetAsync(string id, CancellationToken cancellationToken = default) + { + Engine engine = await base.GetAsync(id, cancellationToken); + if (!(engine.IsInitialized ?? true)) + throw new EntityNotFoundException($"Could not find the {typeof(Engine).Name} '{id}'."); + return engine; + } + + public override async Task> GetAllAsync( + string owner, + CancellationToken cancellationToken = default + ) + { + return await Entities.GetAllAsync( + e => e.Owner == owner && (e.IsInitialized == null || e.IsInitialized.Value), + cancellationToken + ); + } + + public async Task GetWordAlignmentAsync( + string engineId, + string sourceSegment, + string targetSegment, + CancellationToken cancellationToken = default + ) + { + Engine engine = await GetAsync(engineId, cancellationToken); + + WordAlignmentEngineApi.WordAlignmentEngineApiClient client = + _grpcClientFactory.CreateClient(engine.Type); + GetWordAlignmentResponse response = await client.GetWordAlignmentAsync( + new GetWordAlignmentRequest + { + EngineType = engine.Type, + EngineId = engine.Id, + SourceSegment = sourceSegment, + TargetSegment = targetSegment + }, + cancellationToken: cancellationToken + ); + return Map(response.Result); + } + + public override async Task CreateAsync(Engine engine, CancellationToken cancellationToken = default) + { + try + { + engine.DateCreated = DateTime.UtcNow; + await Entities.InsertAsync(engine, cancellationToken); + WordAlignmentEngineApi.WordAlignmentEngineApiClient client; + try + { + client = _grpcClientFactory.CreateClient( + engine.Type + ); + } + catch (InvalidOperationException) + { + throw new InvalidOperationException($"'{engine.Type}' is an invalid engine type."); + } + var request = new CreateRequest + { + EngineType = engine.Type, + EngineId = engine.Id, + SourceLanguage = engine.SourceLanguage, + TargetLanguage = engine.TargetLanguage + }; + + if (engine.Name is not null) + request.EngineName = engine.Name; + await client.CreateAsync(request, cancellationToken: cancellationToken); + } + catch (RpcException rpcex) + { + await Entities.DeleteAsync(engine, CancellationToken.None); + if (rpcex.StatusCode == StatusCode.InvalidArgument) + { + throw new InvalidOperationException( + $"Unable to create engine {engine.Id} because of an invalid argument: {rpcex.Status.Detail}", + rpcex + ); + } + throw; + } + catch + { + await Entities.DeleteAsync(engine, CancellationToken.None); + throw; + } + return engine; + } + + public override async Task DeleteAsync(string engineId, CancellationToken cancellationToken = default) + { + Engine? engine = await Entities.GetAsync(engineId, cancellationToken); + if (engine is null) + throw new EntityNotFoundException($"Could not find the Engine '{engineId}'."); + + WordAlignmentEngineApi.WordAlignmentEngineApiClient client = + _grpcClientFactory.CreateClient(engine.Type); + await client.DeleteAsync( + new DeleteRequest { EngineType = engine.Type, EngineId = engine.Id }, + cancellationToken: cancellationToken + ); + + await _dataAccessContext.WithTransactionAsync( + async (ct) => + { + await Entities.DeleteAsync(engineId, ct); + await _builds.DeleteAllAsync(b => b.EngineRef == engineId, ct); + await _wordAlignments.DeleteAllAsync(pt => pt.EngineRef == engineId, ct); + }, + CancellationToken.None + ); + } + + private Dictionary> GetChapters(string fileLocation, string scriptureRange) + { + try + { + return ScriptureRangeParser.GetChapters( + scriptureRange, + _scriptureDataFileService.GetParatextProjectSettings(fileLocation).Versification + ); + } + catch (ArgumentException ae) + { + throw new InvalidOperationException($"The scripture range {scriptureRange} is not valid: {ae.Message}"); + } + } + + public async Task StartBuildAsync(Build build, CancellationToken cancellationToken = default) + { + build.DateCreated = DateTime.UtcNow; + Engine engine = await GetAsync(build.EngineRef, cancellationToken); + await _builds.InsertAsync(build, cancellationToken); + + WordAlignmentEngineApi.WordAlignmentEngineApiClient client = + _grpcClientFactory.CreateClient(engine.Type); + + try + { + StartBuildRequest request; + Dictionary? trainOn = build.TrainOn?.ToDictionary(c => c.ParallelCorpusRef!); + Dictionary? wordAlignOn = build.WordAlignOn?.ToDictionary(c => + c.ParallelCorpusRef! + ); + IReadOnlyList parallelCorpora = engine + .ParallelCorpora.Where(pc => + trainOn == null + || trainOn.ContainsKey(pc.Id) + || wordAlignOn == null + || wordAlignOn.ContainsKey(pc.Id) + ) + .ToList(); + + request = new StartBuildRequest + { + EngineType = engine.Type, + EngineId = engine.Id, + BuildId = build.Id, + Corpora = + { + parallelCorpora.Select(c => + Map( + c, + trainOn?.GetValueOrDefault(c.Id), + wordAlignOn?.GetValueOrDefault(c.Id), + trainOn is null, + wordAlignOn is null + ) + ) + } + }; + + if (build.Options is not null) + request.Options = JsonSerializer.Serialize(build.Options); + + // Log the build request summary + try + { + var buildRequestSummary = (JsonObject)JsonNode.Parse(JsonSerializer.Serialize(request))!; + // correct build options parsing + buildRequestSummary.Remove("Options"); + try + { + buildRequestSummary.Add("Options", JsonNode.Parse(request.Options)); + } + catch (JsonException) + { + buildRequestSummary.Add( + "Options", + "Build \"Options\" failed parsing: " + (request.Options ?? "null") + ); + } + buildRequestSummary.Add("Event", "BuildRequest"); + buildRequestSummary.Add("ModelRevision", engine.ModelRevision); + buildRequestSummary.Add("ClientId", engine.Owner); + _logger.LogInformation("{request}", buildRequestSummary.ToJsonString()); + } + catch (JsonException) + { + _logger.LogInformation("Error parsing build request summary."); + _logger.LogInformation("{request}", JsonSerializer.Serialize(request)); + } + await client.StartBuildAsync(request, cancellationToken: cancellationToken); + await _builds.UpdateAsync( + b => b.Id == build.Id, + u => u.Set(e => e.IsInitialized, true), + cancellationToken: CancellationToken.None + ); + } + catch + { + await _builds.DeleteAsync(build, CancellationToken.None); + throw; + } + } + + public async Task CancelBuildAsync(string engineId, CancellationToken cancellationToken = default) + { + Engine? engine = await GetAsync(engineId, cancellationToken); + if (engine is null) + throw new EntityNotFoundException($"Could not find the Engine '{engineId}'."); + + WordAlignmentEngineApi.WordAlignmentEngineApiClient client = + _grpcClientFactory.CreateClient(engine.Type); + try + { + await client.CancelBuildAsync( + new CancelBuildRequest { EngineType = engine.Type, EngineId = engine.Id }, + cancellationToken: cancellationToken + ); + } + catch (RpcException re) + { + if (re.StatusCode is StatusCode.Aborted) + return false; + throw; + } + return true; + } + + public Task AddParallelCorpusAsync( + string engineId, + Shared.Models.ParallelCorpus corpus, + CancellationToken cancellationToken = default + ) + { + return Entities.UpdateAsync( + e => e.Id == engineId && (e.IsInitialized == null || e.IsInitialized.Value), + u => u.Add(e => e.ParallelCorpora, corpus), + cancellationToken: cancellationToken + ); + } + + public async Task UpdateParallelCorpusAsync( + string engineId, + string parallelCorpusId, + IReadOnlyList? sourceCorpora, + IReadOnlyList? targetCorpora, + CancellationToken cancellationToken = default + ) + { + Engine? engine = await Entities.UpdateAsync( + e => + e.Id == engineId + && (e.IsInitialized == null || e.IsInitialized.Value) + && e.ParallelCorpora.Any(c => c.Id == parallelCorpusId), + u => + { + if (sourceCorpora is not null) + u.Set(c => c.ParallelCorpora[ArrayPosition.FirstMatching].SourceCorpora, sourceCorpora); + if (targetCorpora is not null) + u.Set(c => c.ParallelCorpora[ArrayPosition.FirstMatching].TargetCorpora, targetCorpora); + }, + cancellationToken: cancellationToken + ); + if (engine is null) + { + throw new EntityNotFoundException( + $"Could not find the Corpus '{parallelCorpusId}' in Engine '{engineId}'." + ); + } + + return engine.ParallelCorpora.First(c => c.Id == parallelCorpusId); + } + + public async Task DeleteParallelCorpusAsync( + string engineId, + string parallelCorpusId, + CancellationToken cancellationToken = default + ) + { + Engine? originalEngine = null; + await _dataAccessContext.WithTransactionAsync( + async (ct) => + { + originalEngine = await Entities.UpdateAsync( + e => e.Id == engineId && (e.IsInitialized == null || e.IsInitialized.Value), + u => u.RemoveAll(e => e.ParallelCorpora, c => c.Id == parallelCorpusId), + returnOriginal: true, + cancellationToken: ct + ); + if (originalEngine is null || !originalEngine.ParallelCorpora.Any(c => c.Id == parallelCorpusId)) + { + throw new EntityNotFoundException( + $"Could not find the Corpus '{parallelCorpusId}' in Engine '{engineId}'." + ); + } + await _wordAlignments.DeleteAllAsync(pt => pt.CorpusRef == parallelCorpusId, ct); + }, + cancellationToken: cancellationToken + ); + } + + public Task DeleteAllCorpusFilesAsync(string dataFileId, CancellationToken cancellationToken = default) + { + return Entities.UpdateAllAsync( + e => + e.ParallelCorpora.Any(c => + c.SourceCorpora.Any(sc => sc.Files.Any(f => f.Id == dataFileId)) + || c.TargetCorpora.Any(tc => tc.Files.Any(f => f.Id == dataFileId)) + ), + u => + { + u.RemoveAll( + e => e.ParallelCorpora[ArrayPosition.All].SourceCorpora[ArrayPosition.All].Files, + f => f.Id == dataFileId + ); + u.RemoveAll( + e => e.ParallelCorpora[ArrayPosition.All].TargetCorpora[ArrayPosition.All].Files, + f => f.Id == dataFileId + ); + }, + cancellationToken: cancellationToken + ); + } + + public Task UpdateDataFileFilenameFilesAsync( + string dataFileId, + string filename, + CancellationToken cancellationToken = default + ) + { + return Entities.UpdateAllAsync( + e => + e.ParallelCorpora.Any(c => + c.SourceCorpora.Any(cs => cs.Files.Any(f => f.Id == dataFileId)) + || c.TargetCorpora.Any(tc => tc.Files.Any(f => f.Id == dataFileId)) + ), + u => + { + u.SetAll( + e => e.ParallelCorpora[ArrayPosition.All].SourceCorpora[ArrayPosition.All].Files, + f => f.Filename, + filename, + f => f.Id == dataFileId + ); + u.SetAll( + e => e.ParallelCorpora[ArrayPosition.All].TargetCorpora[ArrayPosition.All].Files, + f => f.Filename, + filename, + f => f.Id == dataFileId + ); + }, + cancellationToken: cancellationToken + ); + } + + public Task UpdateCorpusFilesAsync( + string corpusId, + IReadOnlyList files, + CancellationToken cancellationToken = default + ) + { + return Entities.UpdateAllAsync( + e => + e.ParallelCorpora.Any(c => + c.SourceCorpora.Any(sc => sc.Id == corpusId) || c.TargetCorpora.Any(tc => tc.Id == corpusId) + ), + u => + { + u.SetAll( + e => e.ParallelCorpora[ArrayPosition.All].SourceCorpora, + mc => mc.Files, + files, + mc => mc.Id == corpusId + ); + u.SetAll( + e => e.ParallelCorpora[ArrayPosition.All].TargetCorpora, + mc => mc.Files, + files, + mc => mc.Id == corpusId + ); + }, + cancellationToken: cancellationToken + ); + } + + public async Task GetQueueAsync(string engineType, CancellationToken cancellationToken = default) + { + WordAlignmentEngineApi.WordAlignmentEngineApiClient client = + _grpcClientFactory.CreateClient(engineType); + GetQueueSizeResponse response = await client.GetQueueSizeAsync( + new GetQueueSizeRequest { EngineType = engineType }, + cancellationToken: cancellationToken + ); + return new Queue { Size = response.Size, EngineType = engineType }; + } + + private Models.WordAlignmentResult Map(V1.WordAlignmentResult source) + { + return new Models.WordAlignmentResult + { + SourceTokens = source.SourceTokens.ToList(), + TargetTokens = source.TargetTokens.ToList(), + Confidences = source.Confidences.ToList(), + Alignment = source.Alignment.Select(Map).ToList(), + }; + } + + private Shared.Models.AlignedWordPair Map(V1.AlignedWordPair source) + { + return new Shared.Models.AlignedWordPair { SourceIndex = source.SourceIndex, TargetIndex = source.TargetIndex }; + } + + private V1.ParallelCorpus Map( + Shared.Models.ParallelCorpus source, + TrainingCorpus? trainingCorpus, + TrainingCorpus? wordAlignmentCorpus, + bool trainOnAllCorpora, + bool wordAlignOnAllCorpora + ) + { + string? referenceFileLocation = + source.TargetCorpora.Count > 0 && source.TargetCorpora[0].Files.Count > 0 + ? Map(source.TargetCorpora[0].Files[0]).Location + : null; + + bool trainOnAllSources = + trainOnAllCorpora || (trainingCorpus is not null && trainingCorpus.SourceFilters is null); + bool wordAlignAllSources = + wordAlignOnAllCorpora || (wordAlignmentCorpus is not null && wordAlignmentCorpus.SourceFilters is null); + + bool trainOnAllTargets = + trainOnAllCorpora || (trainingCorpus is not null && trainingCorpus.TargetFilters is null); + bool wordAlignAllTargets = + wordAlignOnAllCorpora || (wordAlignmentCorpus is not null && wordAlignmentCorpus.TargetFilters is null); + + return new V1.ParallelCorpus + { + Id = source.Id, + SourceCorpora = + { + source.SourceCorpora.Select(sc => + Map( + sc, + trainingCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(), + wordAlignmentCorpus?.SourceFilters?.Where(sf => sf.CorpusRef == sc.Id).FirstOrDefault(), + referenceFileLocation, + trainOnAllSources, + wordAlignAllSources + ) + ) + }, + TargetCorpora = + { + source.TargetCorpora.Select(tc => + Map( + tc, + trainingCorpus?.TargetFilters?.Where(sf => sf.CorpusRef == tc.Id).FirstOrDefault(), + null, + referenceFileLocation, + trainOnAllTargets, + wordAlignAllTargets + ) + ) + } + }; + } + + private V1.MonolingualCorpus Map( + Shared.Models.MonolingualCorpus inputCorpus, + ParallelCorpusFilter? trainingFilter, + ParallelCorpusFilter? wordAlignmentFilter, + string? referenceFileLocation, + bool trainOnAll, + bool wordAlignOnAll + ) + { + Dictionary? trainOnChapters = null; + if ( + trainingFilter is not null + && trainingFilter.ScriptureRange is not null + && referenceFileLocation is not null + ) + { + trainOnChapters = GetChapters(referenceFileLocation, trainingFilter.ScriptureRange) + .Select( + (kvp) => + { + var scriptureChapters = new ScriptureChapters(); + scriptureChapters.Chapters.Add(kvp.Value); + return (kvp.Key, scriptureChapters); + } + ) + .ToDictionary(); + } + + Dictionary? wordAlignmentChapters = null; + if ( + wordAlignmentFilter is not null + && wordAlignmentFilter.ScriptureRange is not null + && referenceFileLocation is not null + ) + { + wordAlignmentChapters = GetChapters(referenceFileLocation, wordAlignmentFilter.ScriptureRange) + .Select( + (kvp) => + { + var scriptureChapters = new ScriptureChapters(); + scriptureChapters.Chapters.Add(kvp.Value); + return (kvp.Key, scriptureChapters); + } + ) + .ToDictionary(); + } + + var returnCorpus = new V1.MonolingualCorpus + { + Id = inputCorpus.Id, + Language = inputCorpus.Language, + Files = { inputCorpus.Files.Select(Map) } + }; + + if ( + trainingFilter is not null + && trainingFilter.TextIds is not null + && trainingFilter.ScriptureRange is not null + ) + { + throw new InvalidOperationException( + "Cannot specify both TextIds and ScriptureRange in the training filter." + ); + } + + if ( + trainOnAll + || (trainingFilter is not null && trainingFilter.TextIds is null && trainingFilter.ScriptureRange is null) + ) + { + returnCorpus.TrainOnAll = true; + } + else + { + if (trainOnChapters is not null) + returnCorpus.TrainOnChapters.Add(trainOnChapters); + if (trainingFilter?.TextIds is not null) + returnCorpus.TrainOnTextIds.Add(trainingFilter.TextIds); + } + + if ( + wordAlignmentFilter is not null + && wordAlignmentFilter.TextIds is not null + && wordAlignmentFilter.ScriptureRange is not null + ) + { + throw new InvalidOperationException( + "Cannot specify both TextIds and ScriptureRange in the word alignment filter." + ); + } + + if ( + wordAlignOnAll + || ( + wordAlignmentFilter is not null + && wordAlignmentFilter.TextIds is null + && wordAlignmentFilter.ScriptureRange is null + ) + ) + { + returnCorpus.WordAlignOnAll = true; + } + else + { + if (wordAlignmentChapters is not null) + returnCorpus.WordAlignOnChapters.Add(wordAlignmentChapters); + if (wordAlignmentFilter?.TextIds is not null) + returnCorpus.WordAlignOnTextIds.Add(wordAlignmentFilter.TextIds); + } + + return returnCorpus; + } + + private V1.CorpusFile Map(Shared.Models.CorpusFile source) + { + return new V1.CorpusFile + { + TextId = source.TextId, + Format = (V1.FileFormat)source.Format, + Location = Path.Combine(_dataFileOptions.CurrentValue.FilesDirectory, source.Filename) + }; + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Services/IBuildService.cs b/src/Serval/src/Serval.WordAlignment/Services/IBuildService.cs new file mode 100644 index 00000000..f7cbcabb --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Services/IBuildService.cs @@ -0,0 +1,18 @@ +namespace Serval.WordAlignment.Services; + +public interface IBuildService +{ + Task> GetAllAsync(string parentId, CancellationToken cancellationToken = default); + Task GetAsync(string id, CancellationToken cancellationToken = default); + Task GetActiveAsync(string parentId, CancellationToken cancellationToken = default); + Task> GetNewerRevisionAsync( + string id, + long minRevision, + CancellationToken cancellationToken = default + ); + Task> GetActiveNewerRevisionAsync( + string parentId, + long minRevision, + CancellationToken cancellationToken = default + ); +} diff --git a/src/Serval/src/Serval.WordAlignment/Services/IEngineService.cs b/src/Serval/src/Serval.WordAlignment/Services/IEngineService.cs new file mode 100644 index 00000000..e242b904 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Services/IEngineService.cs @@ -0,0 +1,51 @@ +namespace Serval.WordAlignment.Services; + +public interface IEngineService +{ + Task> GetAllAsync(string owner, CancellationToken cancellationToken = default); + Task GetAsync(string engineId, CancellationToken cancellationToken = default); + + Task CreateAsync(Engine engine, CancellationToken cancellationToken = default); + Task DeleteAsync(string engineId, CancellationToken cancellationToken = default); + + Task GetWordAlignmentAsync( + string engineId, + string sourceSegment, + string targetSegment, + CancellationToken cancellationToken = default + ); + + Task StartBuildAsync(Build build, CancellationToken cancellationToken = default); + + Task CancelBuildAsync(string engineId, CancellationToken cancellationToken = default); + + Task AddParallelCorpusAsync(string engineId, ParallelCorpus corpus, CancellationToken cancellationToken = default); + Task UpdateParallelCorpusAsync( + string engineId, + string parallelCorpusId, + IReadOnlyList? sourceCorpora, + IReadOnlyList? targetCorpora, + CancellationToken cancellationToken = default + ); + Task DeleteParallelCorpusAsync( + string engineId, + string parallelCorpusId, + CancellationToken cancellationToken = default + ); + + Task DeleteAllCorpusFilesAsync(string dataFileId, CancellationToken cancellationToken = default); + + Task UpdateDataFileFilenameFilesAsync( + string dataFileId, + string filename, + CancellationToken cancellationToken = default + ); + + Task UpdateCorpusFilesAsync( + string corpusId, + IReadOnlyList files, + CancellationToken cancellationToken = default + ); + + Task GetQueueAsync(string engineType, CancellationToken cancellationToken = default); +} diff --git a/src/Serval/src/Serval.WordAlignment/Services/IWordAlignmentService.cs b/src/Serval/src/Serval.WordAlignment/Services/IWordAlignmentService.cs new file mode 100644 index 00000000..fd94879f --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Services/IWordAlignmentService.cs @@ -0,0 +1,12 @@ +namespace Serval.WordAlignment.Services; + +public interface IWordAlignmentService +{ + Task> GetAllAsync( + string engineId, + int modelRevision, + string corpusId, + string? textId = null, + CancellationToken cancellationToken = default + ); +} diff --git a/src/Serval/src/Serval.WordAlignment/Services/WordAlignmentPlatformServiceV1.cs b/src/Serval/src/Serval.WordAlignment/Services/WordAlignmentPlatformServiceV1.cs new file mode 100644 index 00000000..e45cae0e --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Services/WordAlignmentPlatformServiceV1.cs @@ -0,0 +1,331 @@ +using Google.Protobuf.WellKnownTypes; +using Serval.WordAlignment.V1; + +namespace Serval.WordAlignment.Services; + +public class WordAlignmentPlatformServiceV1( + IRepository builds, + IRepository engines, + IRepository wordAlignments, + IDataAccessContext dataAccessContext, + IPublishEndpoint publishEndpoint +) : WordAlignmentPlatformApi.WordAlignmentPlatformApiBase +{ + private const int WordAlignmentInsertBatchSize = 128; + private static readonly Empty Empty = new(); + + private readonly IRepository _builds = builds; + private readonly IRepository _engines = engines; + private readonly IRepository _wordAlignments = wordAlignments; + private readonly IDataAccessContext _dataAccessContext = dataAccessContext; + private readonly IPublishEndpoint _publishEndpoint = publishEndpoint; + + public override async Task BuildStarted(BuildStartedRequest request, ServerCallContext context) + { + await _dataAccessContext.WithTransactionAsync( + async (ct) => + { + Build? build = await _builds.UpdateAsync( + request.BuildId, + u => u.Set(b => b.State, JobState.Active), + cancellationToken: ct + ); + if (build is null) + throw new RpcException(new Status(StatusCode.NotFound, "The build does not exist.")); + + Engine? engine = await _engines.UpdateAsync( + build.EngineRef, + u => u.Set(e => e.IsBuilding, true), + cancellationToken: ct + ); + if (engine is null) + throw new RpcException(new Status(StatusCode.NotFound, "The engine does not exist.")); + + await _publishEndpoint.Publish( + new TranslationBuildStarted + { + BuildId = build.Id, + EngineId = engine.Id, + Owner = engine.Owner + }, + ct + ); + }, + cancellationToken: context.CancellationToken + ); + return Empty; + } + + public override async Task BuildCompleted(BuildCompletedRequest request, ServerCallContext context) + { + await _dataAccessContext.WithTransactionAsync( + async (ct) => + { + Build? build = await _builds.UpdateAsync( + request.BuildId, + u => + u.Set(b => b.State, JobState.Completed) + .Set(b => b.Message, "Completed") + .Set(b => b.DateFinished, DateTime.UtcNow), + cancellationToken: ct + ); + if (build is null) + throw new RpcException(new Status(StatusCode.NotFound, "The build does not exist.")); + + Engine? engine = await _engines.UpdateAsync( + build.EngineRef, + u => + u.Set(e => e.Confidence, request.Confidence) + .Set(e => e.CorpusSize, request.CorpusSize) + .Set(e => e.IsBuilding, false) + .Inc(e => e.ModelRevision), + cancellationToken: ct + ); + if (engine is null) + throw new RpcException(new Status(StatusCode.NotFound, "The engine does not exist.")); + + // delete pretranslations created by the previous build + await _wordAlignments.DeleteAllAsync( + p => p.EngineRef == engine.Id && p.ModelRevision < engine.ModelRevision, + ct + ); + + await _publishEndpoint.Publish( + new TranslationBuildFinished + { + BuildId = build.Id, + EngineId = engine.Id, + Owner = engine.Owner, + BuildState = build.State, + Message = build.Message!, + DateFinished = build.DateFinished!.Value + }, + ct + ); + }, + cancellationToken: context.CancellationToken + ); + + return Empty; + } + + public override async Task BuildCanceled(BuildCanceledRequest request, ServerCallContext context) + { + await _dataAccessContext.WithTransactionAsync( + async (ct) => + { + Build? build = await _builds.UpdateAsync( + request.BuildId, + u => + u.Set(b => b.Message, "Canceled") + .Set(b => b.DateFinished, DateTime.UtcNow) + .Set(b => b.State, JobState.Canceled), + cancellationToken: ct + ); + if (build is null) + throw new RpcException(new Status(StatusCode.NotFound, "The build does not exist.")); + + Engine? engine = await _engines.UpdateAsync( + build.EngineRef, + u => u.Set(e => e.IsBuilding, false), + cancellationToken: ct + ); + if (engine is null) + throw new RpcException(new Status(StatusCode.NotFound, "The engine does not exist.")); + + // delete pretranslations that might have been created during the build + await _wordAlignments.DeleteAllAsync( + p => p.EngineRef == engine.Id && p.ModelRevision > engine.ModelRevision, + ct + ); + + await _publishEndpoint.Publish( + new TranslationBuildFinished + { + BuildId = build.Id, + EngineId = engine.Id, + Owner = engine.Owner, + BuildState = build.State, + Message = build.Message!, + DateFinished = build.DateFinished!.Value + }, + ct + ); + }, + cancellationToken: context.CancellationToken + ); + + return Empty; + } + + public override async Task BuildFaulted(BuildFaultedRequest request, ServerCallContext context) + { + await _dataAccessContext.WithTransactionAsync( + async (ct) => + { + Build? build = await _builds.UpdateAsync( + request.BuildId, + u => + u.Set(b => b.State, JobState.Faulted) + .Set(b => b.Message, request.Message) + .Set(b => b.DateFinished, DateTime.UtcNow), + cancellationToken: ct + ); + if (build is null) + throw new RpcException(new Status(StatusCode.NotFound, "The build does not exist.")); + + Engine? engine = await _engines.UpdateAsync( + build.EngineRef, + u => u.Set(e => e.IsBuilding, false), + cancellationToken: ct + ); + if (engine is null) + throw new RpcException(new Status(StatusCode.NotFound, "The engine does not exist.")); + + // delete pretranslations that might have been created during the build + await _wordAlignments.DeleteAllAsync( + p => p.EngineRef == engine.Id && p.ModelRevision > engine.ModelRevision, + ct + ); + + await _publishEndpoint.Publish( + new TranslationBuildFinished + { + BuildId = build.Id, + EngineId = engine.Id, + Owner = engine.Owner, + BuildState = build.State, + Message = build.Message!, + DateFinished = build.DateFinished!.Value + }, + ct + ); + }, + cancellationToken: context.CancellationToken + ); + + return Empty; + } + + public override async Task BuildRestarting(BuildRestartingRequest request, ServerCallContext context) + { + await _dataAccessContext.WithTransactionAsync( + async (ct) => + { + Build? build = await _builds.UpdateAsync( + request.BuildId, + u => + u.Set(b => b.Message, "Restarting") + .Set(b => b.Step, 0) + .Set(b => b.PercentCompleted, 0) + .Set(b => b.State, JobState.Pending), + cancellationToken: ct + ); + if (build is null) + throw new RpcException(new Status(StatusCode.NotFound, "The build does not exist.")); + + Engine? engine = await _engines.GetAsync(build.EngineRef, ct); + if (engine is null) + throw new RpcException(new Status(StatusCode.NotFound, "The engine does not exist.")); + + // delete pretranslations that might have been created during the build + await _wordAlignments.DeleteAllAsync( + p => p.EngineRef == engine.Id && p.ModelRevision > engine.ModelRevision, + ct + ); + }, + cancellationToken: context.CancellationToken + ); + + return Empty; + } + + public override async Task UpdateBuildStatus(UpdateBuildStatusRequest request, ServerCallContext context) + { + await _builds.UpdateAsync( + b => b.Id == request.BuildId && (b.State == JobState.Active || b.State == JobState.Pending), + u => + { + u.Set(b => b.Step, request.Step); + if (request.HasPercentCompleted) + { + u.Set( + b => b.PercentCompleted, + Math.Round(request.PercentCompleted, 4, MidpointRounding.AwayFromZero) + ); + } + if (request.HasMessage) + u.Set(b => b.Message, request.Message); + if (request.HasQueueDepth) + u.Set(b => b.QueueDepth, request.QueueDepth); + }, + cancellationToken: context.CancellationToken + ); + + return Empty; + } + + public override async Task IncrementTrainEngineCorpusSize( + IncrementTrainEngineCorpusSizeRequest request, + ServerCallContext context + ) + { + await _engines.UpdateAsync( + request.EngineId, + u => u.Inc(e => e.CorpusSize, request.Count), + cancellationToken: context.CancellationToken + ); + return Empty; + } + + public override async Task InsertInferences( + IAsyncStreamReader requestStream, + ServerCallContext context + ) + { + string engineId = ""; + int nextModelRevision = 0; + + var batch = new List(); + await foreach (InsertInferencesRequest request in requestStream.ReadAllAsync(context.CancellationToken)) + { + if (request.EngineId != engineId) + { + Engine? engine = await _engines.GetAsync(request.EngineId, context.CancellationToken); + if (engine is null) + throw new RpcException(new Status(StatusCode.NotFound, "The engine does not exist.")); + nextModelRevision = engine.ModelRevision + 1; + engineId = request.EngineId; + } + batch.Add( + new Models.WordAlignment + { + EngineRef = request.EngineId, + ModelRevision = nextModelRevision, + CorpusRef = request.CorpusId, + TextId = request.TextId, + Refs = request.Refs.ToList(), + SourceTokens = request.SourceTokens.ToList(), + TargetTokens = request.TargetTokens.ToList(), + Confidences = request.Confidences.ToList(), + Alignment = request + .Alignment.Select(a => new Shared.Models.AlignedWordPair + { + SourceIndex = a.SourceIndex, + TargetIndex = a.TargetIndex + }) + .ToList() + } + ); + if (batch.Count == WordAlignmentInsertBatchSize) + { + await _wordAlignments.InsertAllAsync(batch, context.CancellationToken); + batch.Clear(); + } + } + if (batch.Count > 0) + await _wordAlignments.InsertAllAsync(batch, CancellationToken.None); + + return Empty; + } +} diff --git a/src/Serval/src/Serval.WordAlignment/Services/WordAlignmentService.cs b/src/Serval/src/Serval.WordAlignment/Services/WordAlignmentService.cs new file mode 100644 index 00000000..09549609 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/Services/WordAlignmentService.cs @@ -0,0 +1,24 @@ +namespace Serval.WordAlignment.Services; + +public class WordAlignmentService(IRepository wordAlignments) + : EntityServiceBase(wordAlignments), + IWordAlignmentService +{ + public async Task> GetAllAsync( + string engineId, + int modelRevision, + string corpusId, + string? textId = null, + CancellationToken cancellationToken = default + ) + { + return await Entities.GetAllAsync( + pt => + pt.EngineRef == engineId + && pt.ModelRevision == modelRevision + && pt.CorpusRef == corpusId + && (textId == null || pt.TextId == textId), + cancellationToken + ); + } +} diff --git a/src/Serval/src/Serval.WordAlignment/using.cs b/src/Serval/src/Serval.WordAlignment/using.cs new file mode 100644 index 00000000..19fe0b79 --- /dev/null +++ b/src/Serval/src/Serval.WordAlignment/using.cs @@ -0,0 +1,30 @@ +global using System.Diagnostics.CodeAnalysis; +global using System.Linq.Expressions; +global using System.Text.Json; +global using System.Text.Json.Nodes; +global using Asp.Versioning; +global using CaseExtensions; +global using Grpc.Core; +global using Grpc.Net.ClientFactory; +global using MassTransit; +global using Microsoft.AspNetCore.Authorization; +global using Microsoft.AspNetCore.Http; +global using Microsoft.AspNetCore.Mvc; +global using Microsoft.AspNetCore.Routing; +global using Microsoft.Extensions.Configuration; +global using Microsoft.Extensions.Logging; +global using Microsoft.Extensions.Options; +global using NSwag.Annotations; +global using Serval.Shared.Configuration; +global using Serval.Shared.Contracts; +global using Serval.Shared.Controllers; +global using Serval.Shared.Models; +global using Serval.Shared.Services; +global using Serval.Shared.Utils; +global using Serval.WordAlignment.Configuration; +global using Serval.WordAlignment.Consumers; +global using Serval.WordAlignment.Contracts; +global using Serval.WordAlignment.Models; +global using Serval.WordAlignment.Services; +global using SIL.DataAccess; +global using SIL.ServiceToolkit.Utils; diff --git a/src/Serval/test/Serval.ApiServer.IntegrationTests/StatusTests.cs b/src/Serval/test/Serval.ApiServer.IntegrationTests/StatusTests.cs index cf9f64d6..b5c97b2e 100644 --- a/src/Serval/test/Serval.ApiServer.IntegrationTests/StatusTests.cs +++ b/src/Serval/test/Serval.ApiServer.IntegrationTests/StatusTests.cs @@ -27,7 +27,7 @@ public async Task GetHealthAsync(IEnumerable scope, int expectedStatusCo HealthReport healthReport = await client.GetHealthAsync(); Assert.That(healthReport, Is.Not.Null); Assert.That(healthReport.Status.ToString(), Is.Not.EqualTo("Healthy")); - Assert.That(healthReport.Results, Has.Count.EqualTo(6)); + Assert.That(healthReport.Results, Has.Count.EqualTo(8)); break; case 403: ex = Assert.ThrowsAsync(async () => diff --git a/src/Serval/test/Serval.ApiServer.IntegrationTests/WordAlignmentEngineTests.cs b/src/Serval/test/Serval.ApiServer.IntegrationTests/WordAlignmentEngineTests.cs new file mode 100644 index 00000000..276608ec --- /dev/null +++ b/src/Serval/test/Serval.ApiServer.IntegrationTests/WordAlignmentEngineTests.cs @@ -0,0 +1,1726 @@ +using Google.Protobuf.WellKnownTypes; +using Serval.WordAlignment.Models; +using Serval.WordAlignment.V1; +using static Serval.ApiServer.Utils; + +namespace Serval.ApiServer; + +#pragma warning disable CS0612 // Type or member is obsolete + +[TestFixture] +[Category("Integration")] +public class WordAlignmentEngineTests +{ + private static readonly WordAlignmentParallelCorpusConfig TestParallelCorpusConfig = + new() + { + Name = "TestCorpus", + SourceCorpusIds = [SOURCE_CORPUS_ID_1], + TargetCorpusIds = [TARGET_CORPUS_ID], + }; + + private static readonly WordAlignmentParallelCorpusConfig TestMixedParallelCorpusConfig = + new() + { + Name = "TestCorpus", + SourceCorpusIds = [SOURCE_CORPUS_ID_1, SOURCE_CORPUS_ID_2], + TargetCorpusIds = [TARGET_CORPUS_ID], + }; + + private static readonly WordAlignmentParallelCorpusConfig TestParallelCorpusConfigScripture = + new() + { + Name = "TestCorpus", + SourceCorpusIds = [SOURCE_CORPUS_ZIP_ID], + TargetCorpusIds = [TARGET_CORPUS_ZIP_ID], + }; + + private const string ECHO_ENGINE1_ID = "e00000000000000000000001"; + private const string ECHO_ENGINE2_ID = "e00000000000000000000002"; + private const string ECHO_ENGINE3_ID = "e00000000000000000000003"; + private const string STATISTICAL_ENGINE_ID = "be0000000000000000000001"; + private const string FILE1_SRC_ID = "f00000000000000000000001"; + private const string FILE1_FILENAME = "file_a"; + private const string FILE2_TRG_ID = "f00000000000000000000002"; + private const string FILE2_FILENAME = "file_b"; + private const string FILE3_SRC_ZIP_ID = "f00000000000000000000003"; + private const string FILE3_FILENAME = "file_c"; + private const string FILE4_TRG_ZIP_ID = "f00000000000000000000004"; + private const string FILE4_FILENAME = "file_d"; + private const string SOURCE_CORPUS_ID_1 = "cc0000000000000000000001"; + private const string SOURCE_CORPUS_ID_2 = "cc0000000000000000000002"; + private const string TARGET_CORPUS_ID = "cc0000000000000000000003"; + private const string SOURCE_CORPUS_ZIP_ID = "cc0000000000000000000004"; + private const string TARGET_CORPUS_ZIP_ID = "cc0000000000000000000005"; + + private const string DOES_NOT_EXIST_ENGINE_ID = "e00000000000000000000004"; + private const string DOES_NOT_EXIST_CORPUS_ID = "c00000000000000000000001"; + + private TestEnvironment _env; + + [SetUp] + public async Task SetUp() + { + _env = new TestEnvironment(); + var e0 = new Engine + { + Id = ECHO_ENGINE1_ID, + Name = "e0", + SourceLanguage = "en", + TargetLanguage = "en", + Type = "EchoWordAlignment", + Owner = "client1", + ParallelCorpora = [] + }; + var e1 = new Engine + { + Id = ECHO_ENGINE2_ID, + Name = "e1", + SourceLanguage = "en", + TargetLanguage = "en", + Type = "EchoWordAlignment", + Owner = "client1", + ParallelCorpora = [] + }; + var e2 = new Engine + { + Id = ECHO_ENGINE3_ID, + Name = "e2", + SourceLanguage = "en", + TargetLanguage = "en", + Type = "EchoWordAlignment", + Owner = "client2", + ParallelCorpora = [] + }; + var se0 = new Engine + { + Id = STATISTICAL_ENGINE_ID, + Name = "se0", + SourceLanguage = "en", + TargetLanguage = "es", + Type = "Statistical", + Owner = "client1", + ParallelCorpora = [] + }; + + await _env.Engines.InsertAllAsync([e0, e1, e2, se0]); + + var srcFile = new DataFiles.Models.DataFile + { + Id = FILE1_SRC_ID, + Owner = "client1", + Name = "src.txt", + Filename = FILE1_FILENAME, + Format = Shared.Contracts.FileFormat.Text + }; + var trgFile = new DataFiles.Models.DataFile + { + Id = FILE2_TRG_ID, + Owner = "client1", + Name = "trg.txt", + Filename = FILE2_FILENAME, + Format = Shared.Contracts.FileFormat.Text + }; + var srcParatextFile = new DataFiles.Models.DataFile + { + Id = FILE3_SRC_ZIP_ID, + Owner = "client1", + Name = "src.zip", + Filename = FILE3_FILENAME, + Format = Shared.Contracts.FileFormat.Paratext + }; + var trgParatextFile = new DataFiles.Models.DataFile + { + Id = FILE4_TRG_ZIP_ID, + Owner = "client1", + Name = "trg.zip", + Filename = FILE4_FILENAME, + Format = Shared.Contracts.FileFormat.Paratext + }; + await _env.DataFiles.InsertAllAsync([srcFile, trgFile, srcParatextFile, trgParatextFile]); + + var srcCorpus = new DataFiles.Models.Corpus + { + Id = SOURCE_CORPUS_ID_1, + Language = "en", + Owner = "client1", + Files = [new() { FileRef = srcFile.Id, TextId = "all" }] + }; + var srcCorpus2 = new DataFiles.Models.Corpus + { + Id = SOURCE_CORPUS_ID_2, + Language = "en", + Owner = "client1", + Files = [new() { FileRef = srcFile.Id, TextId = "all" }] + }; + var trgCorpus = new DataFiles.Models.Corpus + { + Id = TARGET_CORPUS_ID, + Language = "en", + Owner = "client1", + Files = [new() { FileRef = trgFile.Id, TextId = "all" }] + }; + var srcScriptureCorpus = new DataFiles.Models.Corpus + { + Id = SOURCE_CORPUS_ZIP_ID, + Language = "en", + Owner = "client1", + Files = [new() { FileRef = trgParatextFile.Id, TextId = "all" }] + }; + var trgScriptureCorpus = new DataFiles.Models.Corpus + { + Id = TARGET_CORPUS_ZIP_ID, + Language = "en", + Owner = "client1", + Files = [new() { FileRef = srcParatextFile.Id, TextId = "all" }] + }; + + await _env.Corpora.InsertAllAsync([srcCorpus, srcCorpus2, trgCorpus, srcScriptureCorpus, trgScriptureCorpus]); + } + + [Test] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 200)] + [TestCase(new[] { Scopes.ReadFiles }, 403)] //Arbitrary unrelated privilege + public async Task GetAllAsync(IEnumerable scope, int expectedStatusCode) + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(scope); + switch (expectedStatusCode) + { + case 200: + ICollection results = await client.GetAllAsync(); + Assert.That(results, Has.Count.EqualTo(4)); + Assert.That(results.All(eng => eng.SourceLanguage.Equals("en"))); + break; + case 403: + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.GetAllAsync(); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + default: + Assert.Fail("Unanticipated expectedStatusCode. Check test case for typo."); + break; + } + } + + [Test] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 200, ECHO_ENGINE1_ID)] + [TestCase(new[] { Scopes.ReadFiles }, 403, ECHO_ENGINE1_ID)] //Arbitrary unrelated privilege + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 403, ECHO_ENGINE3_ID)] //Engine is not owned + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 404, DOES_NOT_EXIST_ENGINE_ID)] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 404, "phony_id")] + public async Task GetByIdAsync(IEnumerable scope, int expectedStatusCode, string engineId) + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(scope); + switch (expectedStatusCode) + { + case 200: + WordAlignmentEngine result = await client.GetAsync(engineId); + Assert.That(result.Name, Is.EqualTo("e0")); + break; + case 403: + case 404: + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.GetAsync(engineId); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + default: + Assert.Fail("Unanticipated expectedStatusCode. Check test case for typo."); + break; + } + } + + [Test] + [TestCase(new[] { Scopes.CreateWordAlignmentEngines, Scopes.ReadWordAlignmentEngines }, 201, "EchoWordAlignment")] + [TestCase(new[] { Scopes.CreateWordAlignmentEngines }, 400, "NotARealKindOfMT")] + [TestCase(new[] { Scopes.ReadFiles }, 403, "EchoWordAlignment")] //Arbitrary unrelated privilege + public async Task CreateEngineAsync(IEnumerable scope, int expectedStatusCode, string engineType) + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(scope); + switch (expectedStatusCode) + { + case 201: + WordAlignmentEngine result = await client.CreateAsync( + new WordAlignmentEngineConfig + { + Name = "test", + SourceLanguage = "en", + TargetLanguage = "en", + Type = engineType + } + ); + Assert.That(result.Name, Is.EqualTo("test")); + WordAlignmentEngine? engine = await client.GetAsync(result.Id); + Assert.That(engine, Is.Not.Null); + Assert.That(engine.Name, Is.EqualTo("test")); + break; + case 400: + { + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.CreateAsync( + new WordAlignmentEngineConfig + { + Name = "test", + SourceLanguage = "en", + TargetLanguage = "es", + Type = engineType + } + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + } + case 403: + { + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.CreateAsync( + new WordAlignmentEngineConfig + { + Name = "test", + SourceLanguage = "en", + TargetLanguage = "en", + Type = engineType + } + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + } + default: + Assert.Fail("Unanticipated expectedStatusCode. Check test case for typo."); + break; + } + } + + [Test] + [TestCase(new[] { Scopes.DeleteWordAlignmentEngines, Scopes.ReadWordAlignmentEngines }, 200, ECHO_ENGINE1_ID)] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 403, ECHO_ENGINE1_ID)] //Arbitrary unrelated privilege + [TestCase(new[] { Scopes.DeleteWordAlignmentEngines }, 404, DOES_NOT_EXIST_ENGINE_ID)] + public async Task DeleteEngineByIdAsync(IEnumerable scope, int expectedStatusCode, string engineId) + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(scope); + switch (expectedStatusCode) + { + case 200: + await client.DeleteAsync(engineId); + ICollection results = await client.GetAllAsync(); + Assert.That(results, Has.Count.EqualTo(3)); + Assert.That(results.All(eng => eng.SourceLanguage.Equals("en"))); + break; + case 403: + case 404: + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.DeleteAsync(engineId); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + default: + Assert.Fail("Unanticipated expectedStatusCode. Check test case for typo."); + break; + } + } + + [Test] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines, Scopes.UpdateWordAlignmentEngines }, 200, ECHO_ENGINE1_ID)] + [TestCase( + new[] { Scopes.ReadWordAlignmentEngines, Scopes.UpdateWordAlignmentEngines }, + 404, + DOES_NOT_EXIST_ENGINE_ID + )] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines, Scopes.UpdateWordAlignmentEngines }, 409, ECHO_ENGINE1_ID)] + [TestCase(new[] { Scopes.ReadFiles }, 403, ECHO_ENGINE1_ID)] //Arbitrary unrelated privilege + public async Task TranslateSegmentWithEngineByIdAsync( + IEnumerable scope, + int expectedStatusCode, + string engineId + ) + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(scope); + switch (expectedStatusCode) + { + case 200: + await _env.Builds.InsertAsync( + new Build { EngineRef = engineId, State = Shared.Contracts.JobState.Completed } + ); + Client.WordAlignmentResult result = await client.GetWordAlignmentAsync( + engineId, + new WordAlignmentRequest { SourceSegment = "This is a test.", TargetSegment = "This is a test." }, + Arg.Any() + ); + Assert.That(result.SourceTokens, Is.EqualTo("This is a test .".Split())); + Assert.That(result.TargetTokens, Is.EqualTo("This is a test .".Split())); + break; + case 409: + { + _env.EchoClient.GetWordAlignmentAsync(Arg.Any()) + .Returns(CreateAsyncUnaryCall(StatusCode.Aborted)); + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.GetWordAlignmentAsync( + engineId, + new WordAlignmentRequest + { + SourceSegment = "This is a test.", + TargetSegment = "This is a test." + } + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + } + case 403: + case 404: + { + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.GetWordAlignmentAsync( + engineId, + new WordAlignmentRequest + { + SourceSegment = "This is a test.", + TargetSegment = "This is a test." + } + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + } + default: + Assert.Fail("Unanticipated expectedStatusCode. Check test case for typo."); + break; + } + } + + [Test] + public async Task AddParallelCorpusToEngineByIdAsync() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient( + new[] { Scopes.UpdateWordAlignmentEngines } + ); + WordAlignmentParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + Assert.Multiple(() => + { + Assert.That(result.SourceCorpora.First().Id, Is.EqualTo(SOURCE_CORPUS_ID_1)); + Assert.That(result.TargetCorpora.First().Id, Is.EqualTo(TARGET_CORPUS_ID)); + }); + Engine? engine = await _env.Engines.GetAsync(ECHO_ENGINE1_ID); + if (engine == null) + { + Assert.Fail("Engine not found"); + return; + } + Assert.Multiple(() => + { + Assert.That(engine.ParallelCorpora[0].SourceCorpora[0].Files[0].Filename, Is.EqualTo(FILE1_FILENAME)); + Assert.That(engine.ParallelCorpora[0].TargetCorpora[0].Files[0].Filename, Is.EqualTo(FILE2_FILENAME)); + }); + } + + public void AddParallelCorpusToEngineById_NoSuchEngine() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient( + new[] { Scopes.UpdateWordAlignmentEngines } + ); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.AddParallelCorpusAsync(DOES_NOT_EXIST_ENGINE_ID, TestParallelCorpusConfig); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void AddParallelCorpusToEngineById_NotAuthorized() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(new[] { Scopes.ReadFiles }); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.AddParallelCorpusAsync(ECHO_ENGINE1_ID, TestParallelCorpusConfig); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(403)); + } + + [Test] + public async Task UpdateParallelCorpusByIdForEngineByIdAsync() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + WordAlignmentParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + var updateConfig = new WordAlignmentParallelCorpusUpdateConfig + { + SourceCorpusIds = [SOURCE_CORPUS_ID_1], + TargetCorpusIds = [TARGET_CORPUS_ID] + }; + await client.UpdateParallelCorpusAsync(ECHO_ENGINE1_ID, result.Id, updateConfig); + Engine? engine = await _env.Engines.GetAsync(ECHO_ENGINE1_ID); + if (engine == null) + { + Assert.Fail("Engine not found"); + return; + } + Assert.Multiple(() => + { + Assert.That(engine.ParallelCorpora[0].SourceCorpora[0].Files[0].Filename, Is.EqualTo(FILE1_FILENAME)); + Assert.That(engine.ParallelCorpora[0].TargetCorpora[0].Files[0].Filename, Is.EqualTo(FILE2_FILENAME)); + }); + } + + [Test] + public void UpdateParallelCorpusByIdForEngineById_NoSuchCorpus() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + var updateConfig = new WordAlignmentParallelCorpusUpdateConfig + { + SourceCorpusIds = [SOURCE_CORPUS_ID_1], + TargetCorpusIds = [TARGET_CORPUS_ID] + }; + await client.UpdateParallelCorpusAsync(ECHO_ENGINE1_ID, DOES_NOT_EXIST_CORPUS_ID, updateConfig); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void UpdateParallelCorpusByIdForEngineById_NoSuchEngine() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + var updateConfig = new WordAlignmentParallelCorpusUpdateConfig + { + SourceCorpusIds = [SOURCE_CORPUS_ID_1], + TargetCorpusIds = [TARGET_CORPUS_ID] + }; + await client.UpdateParallelCorpusAsync(DOES_NOT_EXIST_ENGINE_ID, SOURCE_CORPUS_ID_1, updateConfig); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void UpdateParallelCorpusByIdForEngineById_NotAuthorized() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(new[] { Scopes.ReadFiles }); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + var updateConfig = new WordAlignmentParallelCorpusUpdateConfig + { + SourceCorpusIds = [SOURCE_CORPUS_ID_1], + TargetCorpusIds = [TARGET_CORPUS_ID] + }; + await client.UpdateParallelCorpusAsync(ECHO_ENGINE1_ID, DOES_NOT_EXIST_CORPUS_ID, updateConfig); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(403)); + } + + [Test] + public async Task GetAllParallelCorporaForEngineByIdAsync() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + WordAlignmentParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + WordAlignmentParallelCorpus resultAfterAdd = (await client.GetAllParallelCorporaAsync(ECHO_ENGINE1_ID)).First(); + Assert.Multiple(() => + { + Assert.That(resultAfterAdd.Id, Is.EqualTo(result.Id)); + Assert.That(resultAfterAdd.SourceCorpora.First().Id, Is.EqualTo(result.SourceCorpora.First().Id)); + }); + } + + [Test] + public void GetAllParallelCorporaForEngineById_NoSuchEngine() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + WordAlignmentParallelCorpus result = ( + await client.GetAllParallelCorporaAsync(DOES_NOT_EXIST_ENGINE_ID) + ).First(); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void GetAllParallelCorporaForEngineById_NotAuthorized() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(new[] { Scopes.ReadFiles }); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + WordAlignmentParallelCorpus result = (await client.GetAllParallelCorporaAsync(ECHO_ENGINE1_ID)).First(); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(403)); + } + + [Test] + public async Task GetParallelCorpusByIdForEngineByIdAsync() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + + Assert.That(result, Is.Not.Null); + WordAlignmentParallelCorpus resultAfterAdd = await client.GetParallelCorpusAsync(ECHO_ENGINE1_ID, result.Id); + Assert.Multiple(() => + { + Assert.That(resultAfterAdd.Id, Is.EqualTo(result.Id)); + Assert.That(resultAfterAdd.SourceCorpora[0].Id, Is.EqualTo(result.SourceCorpora[0].Id)); + }); + } + + [Test] + public void GetParallelCorpusByIdForEngineById_NoCorpora() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + WordAlignmentParallelCorpus result_afterAdd = await client.GetParallelCorpusAsync( + ECHO_ENGINE1_ID, + DOES_NOT_EXIST_CORPUS_ID + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void GetParallelCorpusByIdForEngineById_NoSuchEngine() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + WordAlignmentParallelCorpus result_afterAdd = await client.GetParallelCorpusAsync( + DOES_NOT_EXIST_ENGINE_ID, + SOURCE_CORPUS_ID_1 + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public async Task GetParallelCorpusByIdForEngineById_NoSuchCorpus() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + WordAlignmentParallelCorpus result_afterAdd = await client.GetParallelCorpusAsync( + ECHO_ENGINE1_ID, + DOES_NOT_EXIST_CORPUS_ID + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void GetParallelCorpusByIdForEngineById_NotAuthorized() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(new[] { Scopes.ReadFiles }); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + WordAlignmentParallelCorpus result_afterAdd = await client.GetParallelCorpusAsync( + ECHO_ENGINE1_ID, + DOES_NOT_EXIST_CORPUS_ID + ); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(403)); + } + + [Test] + public async Task DeleteParallelCorpusByIdForEngineByIdAsync() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + WordAlignmentParallelCorpus result = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + await client.DeleteParallelCorpusAsync(ECHO_ENGINE1_ID, result.Id); + ICollection resultsAfterDelete = await client.GetAllParallelCorporaAsync( + ECHO_ENGINE1_ID + ); + Assert.That(resultsAfterDelete, Has.Count.EqualTo(0)); + } + + [Test] + public void DeleteParallelCorpusByIdForEngineById_NoSuchCorpus() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.DeleteParallelCorpusAsync(ECHO_ENGINE1_ID, DOES_NOT_EXIST_CORPUS_ID); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void DeleteParallelCorpusByIdForEngineById_NoSuchEngine() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.DeleteParallelCorpusAsync(DOES_NOT_EXIST_ENGINE_ID, SOURCE_CORPUS_ID_1); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void DeleteParallelCorpusByIdForEngineById_NotAuthorized() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(new[] { Scopes.ReadFiles }); + + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.DeleteParallelCorpusAsync(ECHO_ENGINE1_ID, SOURCE_CORPUS_ID_1); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(403)); + } + + [Test] + public async Task GetAllWordAlignmentsAsync_Exists() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + + await _env.Engines.UpdateAsync(ECHO_ENGINE1_ID, u => u.Set(e => e.ModelRevision, 1)); + var wordAlignment = new WordAlignment.Models.WordAlignment + { + CorpusRef = addedCorpus.Id, + TextId = "all", + EngineRef = ECHO_ENGINE1_ID, + Refs = ["ref1", "ref2"], + SourceTokens = ["This", "is", "a", "test", "."], + TargetTokens = ["This", "is", "a", "test", "."], + Alignment = CreateNAlignedWordPair(5), + Confidences = [1, 1, 1, 1, 1], + ModelRevision = 1 + }; + await _env.WordAlignments.InsertAsync(wordAlignment); + + ICollection results = await client.GetAllWordAlignmentsAsync( + ECHO_ENGINE1_ID, + addedCorpus.Id + ); + Assert.That(results.All(p => p.TextId == "all"), Is.True); + } + + [Test] + public void GetAllWordAlignmentsAsync_EngineDoesNotExist() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync( + () => client.GetAllWordAlignmentsAsync(DOES_NOT_EXIST_ENGINE_ID, "cccccccccccccccccccccccc") + ); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public void GetAllWordAlignmentsAsync_CorpusDoesNotExist() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + + ServalApiException? ex = Assert.ThrowsAsync( + () => client.GetAllWordAlignmentsAsync(ECHO_ENGINE1_ID, "cccccccccccccccccccccccc") + ); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + [Test] + public async Task GetAllWordAlignmentsAsync_EngineNotBuilt() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + + ServalApiException? ex = Assert.ThrowsAsync( + () => client.GetAllWordAlignmentsAsync(ECHO_ENGINE1_ID, addedCorpus.Id) + ); + Assert.That(ex?.StatusCode, Is.EqualTo(409)); + } + + [Test] + public async Task GetAllWordAlignmentsAsync_TextIdExists() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + + await _env.Engines.UpdateAsync(ECHO_ENGINE1_ID, u => u.Set(e => e.ModelRevision, 1)); + var wordAlignment = new WordAlignment.Models.WordAlignment + { + CorpusRef = addedCorpus.Id, + TextId = "all", + EngineRef = ECHO_ENGINE1_ID, + Refs = ["ref1", "ref2"], + SourceTokens = ["This", "is", "a", "test", "."], + TargetTokens = ["This", "is", "a", "test", "."], + Alignment = CreateNAlignedWordPair(5), + Confidences = [1, 1, 1, 1, 1], + ModelRevision = 1 + }; + await _env.WordAlignments.InsertAsync(wordAlignment); + + ICollection results = await client.GetAllWordAlignmentsAsync( + ECHO_ENGINE1_ID, + addedCorpus.Id, + "all" + ); + Assert.That(results.All(p => p.TextId == "all"), Is.True); + } + + [Test] + public async Task GetAllWordAlignmentsAsync_TextIdDoesNotExist() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + + await _env.Engines.UpdateAsync(ECHO_ENGINE1_ID, u => u.Set(e => e.ModelRevision, 1)); + var wordAlignment = new WordAlignment.Models.WordAlignment + { + CorpusRef = addedCorpus.Id, + TextId = "all", + EngineRef = ECHO_ENGINE1_ID, + Refs = ["ref1", "ref2"], + SourceTokens = ["This", "is", "a", "test", "."], + TargetTokens = ["This", "is", "a", "test", "."], + Alignment = CreateNAlignedWordPair(5), + Confidences = [1, 1, 1, 1, 1], + ModelRevision = 1 + }; + await _env.WordAlignments.InsertAsync(wordAlignment); + ICollection results = await client.GetAllWordAlignmentsAsync( + ECHO_ENGINE1_ID, + addedCorpus.Id, + "not_the_right_id" + ); + Assert.That(results, Is.Empty); + } + + [Test] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 200, STATISTICAL_ENGINE_ID)] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 404, DOES_NOT_EXIST_ENGINE_ID, false)] + [TestCase(new[] { Scopes.ReadFiles }, 403, ECHO_ENGINE1_ID)] //Arbitrary unrelated privilege + public async Task GetAllBuildsForEngineByIdAsync( + IEnumerable scope, + int expectedStatusCode, + string engineId, + bool addBuild = true + ) + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(scope); + Build? build = null; + if (addBuild) + { + build = new Build { EngineRef = engineId }; + await _env.Builds.InsertAsync(build); + } + switch (expectedStatusCode) + { + case 200: + ICollection results = await client.GetAllBuildsAsync(engineId); + Assert.That(results, Is.Not.Empty); + Assert.Multiple(() => + { + Assert.That(results.First().Revision, Is.EqualTo(1)); + Assert.That(results.First().Id, Is.EqualTo(build?.Id)); + Assert.That(results.First().State, Is.EqualTo(JobState.Pending)); + }); + break; + case 403: + case 404: + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.GetAllBuildsAsync(engineId); + }); + break; + default: + Assert.Fail("Unanticipated expectedStatusCode. Check test case for typo."); + break; + } + } + + [Test] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 200, STATISTICAL_ENGINE_ID)] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 408, STATISTICAL_ENGINE_ID, true)] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 404, DOES_NOT_EXIST_ENGINE_ID, false)] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 404, STATISTICAL_ENGINE_ID, false)] + [TestCase(new[] { Scopes.ReadFiles }, 403, ECHO_ENGINE1_ID)] //Arbitrary unrelated privilege + public async Task GetBuildByIdForEngineByIdAsync( + IEnumerable scope, + int expectedStatusCode, + string engineId, + bool addBuild = true + ) + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(scope); + Build? build = null; + if (addBuild) + { + build = new Build { EngineRef = engineId }; + await _env.Builds.InsertAsync(build); + } + + switch (expectedStatusCode) + { + case 200: + { + Assert.That(build, Is.Not.Null); + WordAlignmentBuild result = await client.GetBuildAsync(engineId, build.Id); + Assert.Multiple(() => + { + Assert.That(result.Revision, Is.EqualTo(1)); + Assert.That(result.Id, Is.EqualTo(build.Id)); + Assert.That(result.State, Is.EqualTo(JobState.Pending)); + }); + break; + } + case 403: + case 404: + { + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.GetBuildAsync(engineId, "bbbbbbbbbbbbbbbbbbbbbbbb"); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + } + case 408: + { + Assert.That(build, Is.Not.Null); + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.GetBuildAsync(engineId, build.Id, 3); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + } + default: + Assert.Fail("Unanticipated expectedStatusCode. Check test case for typo."); + break; + } + } + + [Test] + [TestCase( + new[] { Scopes.UpdateWordAlignmentEngines, Scopes.CreateWordAlignmentEngines, Scopes.ReadWordAlignmentEngines }, + 201, + ECHO_ENGINE1_ID + )] + [TestCase( + new[] { Scopes.UpdateWordAlignmentEngines, Scopes.CreateWordAlignmentEngines, Scopes.ReadWordAlignmentEngines }, + 404, + DOES_NOT_EXIST_ENGINE_ID + )] + [TestCase( + new[] { Scopes.UpdateWordAlignmentEngines, Scopes.CreateWordAlignmentEngines, Scopes.ReadWordAlignmentEngines }, + 400, + ECHO_ENGINE1_ID + )] + [TestCase(new[] { Scopes.ReadFiles }, 403, ECHO_ENGINE1_ID)] //Arbitrary unrelated privilege + public async Task StartBuildForEngineByIdAsync(IEnumerable scope, int expectedStatusCode, string engineId) + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(scope); + TrainingCorpusConfig2 tcc; + WordAlignmentBuildConfig tbc; + switch (expectedStatusCode) + { + case 201: + WordAlignmentParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + engineId, + TestParallelCorpusConfig + ); + tcc = new TrainingCorpusConfig2 + { + ParallelCorpusId = addedCorpus.Id, + SourceFilters = [new ParallelCorpusFilterConfig2 { TextIds = ["all"] }], + TargetFilters = [new ParallelCorpusFilterConfig2 { TextIds = ["all"] }] + }; + tbc = new WordAlignmentBuildConfig + { + WordAlignOn = [tcc], + TrainOn = [tcc], + Options = """ + {"max_steps":10, + "use_key_terms":false, + "some_double":10.5, + "some_nested": {"more_nested": {"other_double":10.5}}, + "some_string":"string"} + """ + }; + WordAlignmentBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.GetCurrentBuildAsync(engineId); + }); + + WordAlignmentBuild build = await client.StartBuildAsync(engineId, tbc); + Assert.That(build, Is.Not.Null); + + build = await client.GetCurrentBuildAsync(engineId); + Assert.That(build, Is.Not.Null); + + Assert.That(build.DeploymentVersion, Is.Not.Null); + + break; + case 400: + case 403: + case 404: + + tcc = new TrainingCorpusConfig2 + { + ParallelCorpusId = "cccccccccccccccccccccccc", + SourceFilters = [new ParallelCorpusFilterConfig2 { TextIds = ["all"] }], + TargetFilters = [new ParallelCorpusFilterConfig2 { TextIds = ["all"] }] + }; + tbc = new WordAlignmentBuildConfig { WordAlignOn = [tcc], TrainOn = [tcc] }; + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.StartBuildAsync(engineId, tbc); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + default: + Assert.Fail("Unanticipated expectedStatusCode. Check test case for typo."); + break; + } + } + + [TestCase] + public async Task StartBuildForEngineAsync_UnparsableOptions() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfig + ); + TrainingCorpusConfig2 tcc = + new() + { + ParallelCorpusId = addedCorpus.Id, + SourceFilters = [new ParallelCorpusFilterConfig2 { TextIds = ["all"] }], + TargetFilters = [new ParallelCorpusFilterConfig2 { TextIds = ["all"] }] + }; + WordAlignmentBuildConfig tbc = + new() + { + WordAlignOn = [tcc], + TrainOn = [tcc], + Options = "unparsable json" + }; + WordAlignmentBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.GetCurrentBuildAsync(ECHO_ENGINE1_ID); + }); + + Assert.That( + () => client.StartBuildAsync(ECHO_ENGINE1_ID, tbc), + Throws.TypeOf().With.Message.Contains("Unable to parse field 'options'") + ); + } + + [Test] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 200, ECHO_ENGINE1_ID)] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 408, ECHO_ENGINE1_ID)] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 204, ECHO_ENGINE1_ID, false)] + [TestCase(new[] { Scopes.ReadWordAlignmentEngines }, 404, DOES_NOT_EXIST_ENGINE_ID, false)] + [TestCase(new[] { Scopes.ReadFiles }, 403, ECHO_ENGINE1_ID, false)] //Arbitrary unrelated privilege + public async Task GetCurrentBuildForEngineByIdAsync( + IEnumerable scope, + int expectedStatusCode, + string engineId, + bool addBuild = true + ) + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(scope); + Build? build = null; + if (addBuild) + { + build = new Build { EngineRef = engineId }; + await _env.Builds.InsertAsync(build); + } + + switch (expectedStatusCode) + { + case 200: + { + Assert.That(build, Is.Not.Null); + WordAlignmentBuild result = await client.GetCurrentBuildAsync(engineId); + Assert.That(result.Id, Is.EqualTo(build.Id)); + break; + } + case 204: + case 403: + case 404: + { + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.GetCurrentBuildAsync(engineId); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + } + case 408: + { + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.GetCurrentBuildAsync(engineId, minRevision: 3); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + } + default: + Assert.Fail("Unanticipated expectedStatusCode. Check test case for typo."); + break; + } + } + + [Test] + [TestCase(new[] { Scopes.UpdateWordAlignmentEngines }, 200, ECHO_ENGINE1_ID)] + [TestCase(new[] { Scopes.UpdateWordAlignmentEngines }, 404, DOES_NOT_EXIST_ENGINE_ID, false)] + [TestCase(new[] { Scopes.UpdateWordAlignmentEngines }, 204, ECHO_ENGINE1_ID, false)] + [TestCase(new[] { Scopes.ReadFiles }, 403, ECHO_ENGINE1_ID, false)] //Arbitrary unrelated privilege + public async Task CancelCurrentBuildForEngineByIdAsync( + IEnumerable scope, + int expectedStatusCode, + string engineId, + bool addBuild = true + ) + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(scope); + if (!addBuild) + { + var build = new Build { EngineRef = engineId }; + await _env.Builds.InsertAsync(build); + _env.StatisticalClient.CancelBuildAsync( + Arg.Any(), + null, + null, + Arg.Any() + ) + .Returns(CreateAsyncUnaryCall(StatusCode.Aborted)); + } + + switch (expectedStatusCode) + { + case 200: + case 204: + await client.CancelBuildAsync(engineId); + break; + case 403: + case 404: + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.CancelBuildAsync(engineId); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(expectedStatusCode)); + break; + default: + Assert.Fail("Unanticipated expectedStatusCode. Check test case for typo."); + break; + } + } + + [Test] + public async Task StartBuild_ParallelCorpus() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + STATISTICAL_ENGINE_ID, + TestParallelCorpusConfig + ); + TrainingCorpusConfig2 tcc = + new() + { + ParallelCorpusId = addedCorpus.Id, + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID_1, TextIds = ["all"] }], + TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID, TextIds = ["all"] }] + }; + ; + WordAlignmentBuildConfig tbc = new WordAlignmentBuildConfig + { + WordAlignOn = [tcc], + TrainOn = [tcc], + Options = """ + {"max_steps":10, + "use_key_terms":false, + "some_double":10.5, + "some_nested": {"more_nested": {"other_double":10.5}}, + "some_string":"string"} + """ + }; + WordAlignmentBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.GetCurrentBuildAsync(STATISTICAL_ENGINE_ID); + }); + + WordAlignmentBuild build = await client.StartBuildAsync(STATISTICAL_ENGINE_ID, tbc); + Assert.That(build, Is.Not.Null); + + build = await client.GetCurrentBuildAsync(STATISTICAL_ENGINE_ID); + Assert.That(build, Is.Not.Null); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + STATISTICAL_ENGINE_ID, + TestParallelCorpusConfig + ); + TrainingCorpusConfig2 tcc = + new() + { + ParallelCorpusId = addedCorpus.Id, + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID_1, TextIds = ["all"] }], + TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID, TextIds = ["all"] }] + }; + ; + WordAlignmentBuildConfig tbc = new WordAlignmentBuildConfig + { + WordAlignOn = [tcc], + TrainOn = [tcc], + Options = """ + {"max_steps":10, + "use_key_terms":false, + "some_double":10.5, + "some_nested": {"more_nested": {"other_double":10.5}}, + "some_string":"string"} + """ + }; + WordAlignmentBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.GetCurrentBuildAsync(STATISTICAL_ENGINE_ID); + }); + + WordAlignmentBuild build = await client.StartBuildAsync(STATISTICAL_ENGINE_ID, tbc); + Assert.That(build, Is.Not.Null); + + build = await client.GetCurrentBuildAsync(STATISTICAL_ENGINE_ID); + Assert.That(build, Is.Not.Null); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus_NoFilter() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + STATISTICAL_ENGINE_ID, + TestParallelCorpusConfig + ); + TrainingCorpusConfig2 tcc = + new() + { + ParallelCorpusId = addedCorpus.Id, + SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID_1 }], + TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID }] + }; + ; + WordAlignmentBuildConfig tbc = new WordAlignmentBuildConfig + { + WordAlignOn = [tcc], + TrainOn = [tcc], + Options = """ + {"max_steps":10, + "use_key_terms":false, + "some_double":10.5, + "some_nested": {"more_nested": {"other_double":10.5}}, + "some_string":"string"} + """ + }; + WordAlignmentBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.GetCurrentBuildAsync(STATISTICAL_ENGINE_ID); + }); + + WordAlignmentBuild build = await client.StartBuildAsync(STATISTICAL_ENGINE_ID, tbc); + Assert.That(build, Is.Not.Null); + Assert.That(build.TrainOn, Is.Not.Null); + Assert.That(build.TrainOn.Count, Is.EqualTo(1)); + Assert.That(build.TrainOn[0].SourceFilters, Is.Null); + Assert.That(build.TrainOn[0].TargetFilters, Is.Null); + Assert.That(build.WordAlignOn, Is.Not.Null); + Assert.That(build.WordAlignOn.Count, Is.EqualTo(1)); + Assert.That(build.WordAlignOn[0].SourceFilters, Is.Null); + Assert.That(build.WordAlignOn[0].TargetFilters, Is.Null); + + build = await client.GetCurrentBuildAsync(STATISTICAL_ENGINE_ID); + Assert.That(build, Is.Not.Null); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus_PretranslateNoCorpusSpecified() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedParallelCorpus = await client.AddParallelCorpusAsync( + STATISTICAL_ENGINE_ID, + TestMixedParallelCorpusConfig + ); + TrainingCorpusConfig2 wacc = new() { }; + TrainingCorpusConfig2 tcc = new() { ParallelCorpusId = addedParallelCorpus.Id }; + WordAlignmentBuildConfig tbc = new WordAlignmentBuildConfig { WordAlignOn = [wacc], TrainOn = [tcc] }; + WordAlignmentBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.StartBuildAsync(STATISTICAL_ENGINE_ID, tbc); + }); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus_PretranslateFilterOnMultipleSources() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedParallelCorpus = await client.AddParallelCorpusAsync( + STATISTICAL_ENGINE_ID, + TestParallelCorpusConfig + ); + TrainingCorpusConfig2 wacc = + new() + { + ParallelCorpusId = addedParallelCorpus.Id, + SourceFilters = + [ + new ParallelCorpusFilterConfig2() { CorpusId = SOURCE_CORPUS_ID_1 }, + new ParallelCorpusFilterConfig2() { CorpusId = SOURCE_CORPUS_ID_2 } + ] + }; + TrainingCorpusConfig2 tcc = new() { ParallelCorpusId = addedParallelCorpus.Id }; + WordAlignmentBuildConfig tbc = new WordAlignmentBuildConfig { WordAlignOn = [wacc], TrainOn = [tcc] }; + Assert.ThrowsAsync(async () => + { + await client.StartBuildAsync(STATISTICAL_ENGINE_ID, tbc); + }); + } + + [Test] + public async Task StartBuildAsync_ParallelCorpus_TrainOnNoCorpusSpecified() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedParallelCorpus = await client.AddParallelCorpusAsync( + STATISTICAL_ENGINE_ID, + TestParallelCorpusConfig + ); + TrainingCorpusConfig2 wacc = new() { ParallelCorpusId = addedParallelCorpus.Id }; + TrainingCorpusConfig2 tcc = new() { }; + WordAlignmentBuildConfig tbc = new WordAlignmentBuildConfig { WordAlignOn = [wacc], TrainOn = [tcc] }; + WordAlignmentBuild resultAfterStart; + Assert.ThrowsAsync(async () => + { + resultAfterStart = await client.StartBuildAsync(STATISTICAL_ENGINE_ID, tbc); + }); + } + + [Test] + public async Task TryToQueueMultipleBuildsPerSingleUser() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + string engineId = STATISTICAL_ENGINE_ID; + int expectedStatusCode = 409; + WordAlignmentParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + engineId, + TestParallelCorpusConfig + ); + TrainingCorpusConfig2 wacc = new() { ParallelCorpusId = addedCorpus.Id }; + var tbc = new WordAlignmentBuildConfig { WordAlignOn = [wacc] }; + WordAlignmentBuild build = await client.StartBuildAsync(engineId, tbc); + _env.StatisticalClient.StartBuildAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(StatusCode.Aborted)); + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + build = await client.StartBuildAsync(engineId, tbc); + }); + Assert.That(ex, Is.Not.Null); + Assert.That(ex.StatusCode, Is.EqualTo(expectedStatusCode)); + } + + [Test] + public async Task GetWordAlignmentsByTextId() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + WordAlignmentParallelCorpus addedCorpus = await client.AddParallelCorpusAsync( + ECHO_ENGINE1_ID, + TestParallelCorpusConfigScripture + ); + + await _env.Engines.UpdateAsync(ECHO_ENGINE1_ID, u => u.Set(e => e.ModelRevision, 1)); + var wordAlignment = new WordAlignment.Models.WordAlignment + { + CorpusRef = addedCorpus.Id, + TextId = "MAT", + EngineRef = ECHO_ENGINE1_ID, + Refs = ["MAT 1:1"], + SourceTokens = ["This", "is", "a", "test", "."], + TargetTokens = ["This", "is", "a", "test", "."], + Alignment = CreateNAlignedWordPair(5), + Confidences = [1, 1, 1, 1, 1], + ModelRevision = 1 + }; + await _env.WordAlignments.InsertAsync(wordAlignment); + + IList wordAlignments = await client.GetAllWordAlignmentsAsync( + ECHO_ENGINE1_ID, + addedCorpus.Id, + "MAT" + ); + Assert.That(wordAlignments, Has.Count.EqualTo(1)); + Assert.That(wordAlignments[0].SourceTokens, Is.EqualTo(new[] { "This", "is", "a", "test", "." })); + } + + [Test] + public void GetWordAlignmentsByTextId_EngineDoesNotExist() + { + WordAlignmentEnginesClient client = _env.CreateWordAlignmentEnginesClient(); + ServalApiException? ex = Assert.ThrowsAsync(async () => + { + await client.GetAllWordAlignmentsAsync(DOES_NOT_EXIST_ENGINE_ID, DOES_NOT_EXIST_CORPUS_ID, "MAT"); + }); + Assert.That(ex?.StatusCode, Is.EqualTo(404)); + } + + // [Test] + // [TestCase("Nmt")] + // [TestCase("EchoWordAlignment")] + // public async Task GetQueueAsync(string engineType) + // { + // TranslationEngineTypesClient client = _env.CreateTranslationEngineTypesClient(); + // Client.Queue queue = await client.GetQueueAsync(engineType); + // Assert.That(queue.Size, Is.EqualTo(0)); + // } + + // [Test] + // public void GetQueueAsync_NotAuthorized() + // { + // TranslationEngineTypesClient client = _env.CreateTranslationEngineTypesClient([Scopes.ReadFiles]); + // ServalApiException? ex = Assert.ThrowsAsync(async () => + // { + // Client.Queue queue = await client.GetQueueAsync("EchoWordAlignment"); + // }); + // Assert.That(ex, Is.Not.Null); + // Assert.That(ex.StatusCode, Is.EqualTo(403)); + // } + + [Test] + public async Task DataFileUpdate_Propagated() + { + WordAlignmentEnginesClient translationClient = _env.CreateWordAlignmentEnginesClient(); + DataFilesClient dataFilesClient = _env.CreateDataFilesClient(); + CorporaClient corporaClient = _env.CreateCorporaClient(); + await translationClient.AddParallelCorpusAsync(ECHO_ENGINE1_ID, TestParallelCorpusConfig); + + // Get the original files + DataFile orgFileFromClient = await dataFilesClient.GetAsync(FILE1_SRC_ID); + DataFiles.Models.DataFile orgFileFromRepo = (await _env.DataFiles.GetAsync(FILE1_SRC_ID))!; + DataFiles.Models.Corpus orgCorpusFromRepo = (await _env.Corpora.GetAsync(TARGET_CORPUS_ID))!; + Assert.That(orgFileFromClient.Name, Is.EqualTo(orgFileFromRepo.Name)); + Assert.That(orgCorpusFromRepo.Files[0].FileRef, Is.EqualTo(FILE2_TRG_ID)); + + // Update the file + await dataFilesClient.UpdateAsync(FILE1_SRC_ID, new FileParameter(new MemoryStream([1, 2, 3]), "test.txt")); + await corporaClient.UpdateAsync( + TARGET_CORPUS_ID, + [new CorpusFileConfig { FileId = FILE4_TRG_ZIP_ID, TextId = "all" }] + ); + + // Confirm the change is propagated everywhere + DataFiles.Models.DataFile newFileFromRepo = (await _env.DataFiles.GetAsync(FILE1_SRC_ID))!; + Assert.That(newFileFromRepo.Filename, Is.Not.EqualTo(orgFileFromRepo.Filename)); + + Engine newEngine = (await _env.Engines.GetAsync(ECHO_ENGINE1_ID))!; + + // Updated parallel corpus file filename + Assert.That( + newEngine.ParallelCorpora[0].SourceCorpora[0].Files[0].Filename, + Is.EqualTo(newFileFromRepo.Filename) + ); + + // Updated set of new corpus files + Assert.That(newEngine.ParallelCorpora[0].TargetCorpora[0].Id, Is.EqualTo(TARGET_CORPUS_ID)); + Assert.That(newEngine.ParallelCorpora[0].TargetCorpora[0].Files[0].Id, Is.EqualTo(FILE4_TRG_ZIP_ID)); + Assert.That(newEngine.ParallelCorpora[0].TargetCorpora[0].Files[0].Filename, Is.EqualTo(FILE4_FILENAME)); + Assert.That(newEngine.ParallelCorpora[0].TargetCorpora[0].Files.Count, Is.EqualTo(1)); + } + + [TearDown] + public void TearDown() + { + _env.Dispose(); + } + + private static IReadOnlyList CreateNAlignedWordPair(int numberOfAlignedWords) + { + var alignedWordPairs = new List(); + for (int i = 0; i < numberOfAlignedWords; i++) + { + alignedWordPairs.Add(new Shared.Models.AlignedWordPair { SourceIndex = i, TargetIndex = i }); + } + return alignedWordPairs; + } + + private class TestEnvironment : DisposableBase + { + private readonly IServiceScope _scope; + private readonly MongoClient _mongoClient; + + public TestEnvironment() + { + var clientSettings = new MongoClientSettings { LinqProvider = LinqProvider.V2 }; + _mongoClient = new MongoClient(clientSettings); + ResetDatabases(); + + Factory = new ServalWebApplicationFactory(); + _scope = Factory.Services.CreateScope(); + Engines = _scope.ServiceProvider.GetRequiredService>(); + DataFiles = _scope.ServiceProvider.GetRequiredService>(); + Corpora = _scope.ServiceProvider.GetRequiredService>(); + WordAlignments = _scope.ServiceProvider.GetRequiredService< + IRepository + >(); + Builds = _scope.ServiceProvider.GetRequiredService>(); + + EchoClient = Substitute.For(); + EchoClient + .CreateAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(new Empty())); + EchoClient + .DeleteAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(new Empty())); + EchoClient + .StartBuildAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(new Empty())); + EchoClient + .CancelBuildAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(new Empty())); + var wordAlignmentResult = new WordAlignment.V1.WordAlignmentResult + { + SourceTokens = { "This is a test .".Split() }, + TargetTokens = { "This is a test .".Split() }, + Confidences = { 1.0, 1.0, 1.0, 1.0, 1.0 }, + Alignment = + { + new WordAlignment.V1.AlignedWordPair { SourceIndex = 0, TargetIndex = 0 }, + new WordAlignment.V1.AlignedWordPair { SourceIndex = 1, TargetIndex = 1 }, + new WordAlignment.V1.AlignedWordPair { SourceIndex = 2, TargetIndex = 2 }, + new WordAlignment.V1.AlignedWordPair { SourceIndex = 3, TargetIndex = 3 }, + new WordAlignment.V1.AlignedWordPair { SourceIndex = 4, TargetIndex = 4 } + } + }; + var wordAlignmentResponse = new GetWordAlignmentResponse { Result = wordAlignmentResult }; + EchoClient + .GetWordAlignmentAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(wordAlignmentResponse)); + EchoClient + .GetQueueSizeAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(new GetQueueSizeResponse() { Size = 0 })); + + StatisticalClient = Substitute.For(); + StatisticalClient + .CreateAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(new Empty())); + StatisticalClient + .DeleteAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(new Empty())); + StatisticalClient + .StartBuildAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(new Empty())); + StatisticalClient + .CancelBuildAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(new Empty())); + StatisticalClient + .GetWordAlignmentAsync(Arg.Any(), null, null, Arg.Any()) + .Returns(CreateAsyncUnaryCall(StatusCode.Unimplemented)); + } + + public ServalWebApplicationFactory Factory { get; } + public IRepository Engines { get; } + public IRepository DataFiles { get; } + public IRepository Corpora { get; } + public IRepository WordAlignments { get; } + public IRepository Builds { get; } + public WordAlignmentEngineApi.WordAlignmentEngineApiClient EchoClient { get; } + public WordAlignmentEngineApi.WordAlignmentEngineApiClient StatisticalClient { get; } + + public WordAlignmentEnginesClient CreateWordAlignmentEnginesClient(IEnumerable? scope = null) + { + scope ??= + [ + Scopes.CreateWordAlignmentEngines, + Scopes.ReadWordAlignmentEngines, + Scopes.UpdateWordAlignmentEngines, + Scopes.DeleteWordAlignmentEngines + ]; + HttpClient httpClient = Factory + .WithWebHostBuilder(builder => + { + builder.ConfigureTestServices(services => + { + GrpcClientFactory grpcClientFactory = Substitute.For(); + grpcClientFactory + .CreateClient("EchoWordAlignment") + .Returns(EchoClient); + grpcClientFactory + .CreateClient("Statistical") + .Returns(StatisticalClient); + services.AddSingleton(grpcClientFactory); + services.AddTransient(CreateFileSystem); + }); + }) + .CreateClient(); + httpClient.DefaultRequestHeaders.Add("Scope", string.Join(" ", scope)); + return new WordAlignmentEnginesClient(httpClient); + } + + public DataFilesClient CreateDataFilesClient() + { + IEnumerable scope = [Scopes.DeleteFiles, Scopes.ReadFiles, Scopes.UpdateFiles, Scopes.CreateFiles]; + HttpClient httpClient = Factory + .WithWebHostBuilder(builder => + { + builder.ConfigureTestServices(services => + { + services.AddTransient(CreateFileSystem); + }); + }) + .CreateClient(); + if (scope is not null) + httpClient.DefaultRequestHeaders.Add("Scope", string.Join(" ", scope)); + return new DataFilesClient(httpClient); + } + + public CorporaClient CreateCorporaClient() + { + IEnumerable scope = [Scopes.DeleteFiles, Scopes.ReadFiles, Scopes.UpdateFiles, Scopes.CreateFiles]; + HttpClient httpClient = Factory.WithWebHostBuilder(_ => { }).CreateClient(); + if (scope is not null) + httpClient.DefaultRequestHeaders.Add("Scope", string.Join(" ", scope)); + return new CorporaClient(httpClient); + } + + public void ResetDatabases() + { + _mongoClient.DropDatabase("serval_test"); + _mongoClient.DropDatabase("serval_test_jobs"); + } + + private static IFileSystem CreateFileSystem(IServiceProvider sp) + { + IFileSystem fileSystem = Substitute.For(); + IOptionsMonitor dataFileOptions = sp.GetRequiredService< + IOptionsMonitor + >(); + fileSystem + .OpenZipFile(GetFilePath(dataFileOptions, FILE3_FILENAME)) + .Returns(ci => + { + IZipContainer source = CreateZipContainer("SRC"); + source.EntryExists("MATSRC.SFM").Returns(true); + string usfm = + $@"\id MAT - SRC +\h Matthew +\c 1 +\p +\v 1 Chapter one, verse one. +\v 2 Chapter one, verse two. +"; + source.OpenEntry("MATSRC.SFM").Returns(ci => new MemoryStream(Encoding.UTF8.GetBytes(usfm))); + return source; + }); + fileSystem + .OpenZipFile(GetFilePath(dataFileOptions, FILE4_FILENAME)) + .Returns(ci => + { + IZipContainer target = CreateZipContainer("TRG"); + target.EntryExists("MATTRG.SFM").Returns(false); + return target; + }); + fileSystem.OpenWrite(Arg.Any()).Returns(ci => new MemoryStream()); + return fileSystem; + } + + private static IZipContainer CreateZipContainer(string name) + { + IZipContainer container = Substitute.For(); + container.EntryExists("Settings.xml").Returns(true); + XElement settingsXml = + new( + "ScriptureText", + new XElement("StyleSheet", "usfm.sty"), + new XElement("Name", name), + new XElement("FullName", name), + new XElement("Encoding", "65001"), + new XElement( + "Naming", + new XAttribute("PrePart", ""), + new XAttribute("PostPart", $"{name}.SFM"), + new XAttribute("BookNameForm", "MAT") + ), + new XElement("BiblicalTermsListSetting", "Major::BiblicalTerms.xml") + ); + container + .OpenEntry("Settings.xml") + .Returns(new MemoryStream(Encoding.UTF8.GetBytes(settingsXml.ToString()))); + container.EntryExists("custom.vrs").Returns(false); + container.EntryExists("usfm.sty").Returns(false); + container.EntryExists("custom.sty").Returns(false); + return container; + } + + private static string GetFilePath(IOptionsMonitor dataFileOptions, string fileName) + { + return Path.Combine(dataFileOptions.CurrentValue.FilesDirectory, fileName); + } + + protected override void DisposeManagedResources() + { + _scope.Dispose(); + Factory.Dispose(); + ResetDatabases(); + } + } +} + +#pragma warning restore CS0612 // Type or member is obsolete diff --git a/src/Serval/test/Serval.E2ETests/ServalApiTests.cs b/src/Serval/test/Serval.E2ETests/ServalApiTests.cs index 2fb9f86a..f8c8bd09 100644 --- a/src/Serval/test/Serval.E2ETests/ServalApiTests.cs +++ b/src/Serval/test/Serval.E2ETests/ServalApiTests.cs @@ -48,6 +48,30 @@ public async Task GetEchoPretranslate() Assert.That(pretranslations, Has.Count.GreaterThan(1)); } + [Test] + public async Task GetEchoWordAlignment() + { + string engineId = await _helperClient.CreateNewEngineAsync("EchoWordAlignment", "es", "es", "Echo3"); + string[] books = ["1JN.txt", "2JN.txt", "3JN.txt"]; + ParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus(books, "es", "en", false); + await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, train_corpus, false); + await _helperClient.BuildEngineAsync(engineId); + WordAlignmentResult tResult = await _helperClient.WordAlignmentEnginesClient.GetWordAlignmentAsync( + engineId, + new WordAlignmentRequest() { SourceSegment = "espíritu verdad", TargetSegment = "espíritu verdad" } + ); + Assert.That( + tResult.Alignment, + Is.EqualTo( + new List + { + new() { SourceIndex = 0, TargetIndex = 0 }, + new() { SourceIndex = 1, TargetIndex = 1 } + } + ) + ); + } + [Test] public async Task GetSmtTranslation() { @@ -65,7 +89,7 @@ public async Task GetSmtTranslation() [Test] public async Task GetSmtAddSegment() { - string engineId = await _helperClient.CreateNewEngineAsync("smt-transfer", "es", "en", "SMT3"); + string engineId = await _helperClient.CreateNewEngineAsync("SmtTransfer", "es", "en", "SMT3"); string[] books = ["1JN.txt", "2JN.txt", "3JN.txt"]; await _helperClient.AddTextCorpusToEngineAsync(engineId, books, "es", "en", false); await _helperClient.BuildEngineAsync(engineId); @@ -146,13 +170,8 @@ public async Task NmtQueueMultiple() const int NUM_WORKERS = 8; string[] engineIds = new string[NUM_ENGINES]; string[] books = ["MAT.txt", "1JN.txt", "2JN.txt"]; - TranslationParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus( - books, - "es", - "en", - false - ); - TranslationParallelCorpusConfig pretranslate_corpus = await _helperClient.MakeParallelTextCorpus( + ParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus(books, "es", "en", false); + ParallelCorpusConfig pretranslate_corpus = await _helperClient.MakeParallelTextCorpus( ["3JN.txt"], "es", "en", @@ -165,7 +184,7 @@ public async Task NmtQueueMultiple() string engineId = engineIds[i]; await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, train_corpus, false); await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, pretranslate_corpus, true); - await _helperClient.StartBuildAsync(engineId); + await _helperClient.StartTranslationBuildAsync(engineId); //Ensure that tasks are enqueued roughly in order await Task.Delay(1_000); } @@ -227,13 +246,8 @@ public async Task NmtLargeBatchAndDownload() TranslationEngine engine = await _helperClient.TranslationEnginesClient.GetAsync(engineId); Assert.That(engine.IsModelPersisted, Is.True); string[] books = ["bible_LARGEFILE.txt"]; - TranslationParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus( - books, - "es", - "en", - false - ); - TranslationParallelCorpusConfig pretranslate_corpus = await _helperClient.MakeParallelTextCorpus( + ParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus(books, "es", "en", false); + ParallelCorpusConfig pretranslate_corpus = await _helperClient.MakeParallelTextCorpus( ["3JN.txt"], "es", "en", @@ -316,7 +330,7 @@ public async Task CircuitousRouteTranslateTopNAsync() const int N = 3; //Create engine - string engineId = await _helperClient.CreateNewEngineAsync("smt-transfer", "en", "fa", "SMT6"); + string engineId = await _helperClient.CreateNewEngineAsync("SmtTransfer", "en", "fa", "SMT6"); //Retrieve engine TranslationEngine engine = await _helperClient.TranslationEnginesClient.GetAsync(engineId); @@ -355,7 +369,7 @@ public async Task CircuitousRouteTranslateTopNAsync() [Test] public async Task GetSmtCancelAndRestartBuild() { - string engineId = await _helperClient.CreateNewEngineAsync("smt-transfer", "es", "en", "SMT7"); + string engineId = await _helperClient.CreateNewEngineAsync("SmtTransfer", "es", "en", "SMT7"); string[] books = ["1JN.txt", "2JN.txt", "3JN.txt"]; await _helperClient.AddTextCorpusToEngineAsync(engineId, books, "es", "en", false); @@ -370,7 +384,7 @@ public async Task GetSmtCancelAndRestartBuild() async Task StartAndCancelTwice(string engineId) { // start and first job - TranslationBuild build = await _helperClient.StartBuildAsync(engineId); + TranslationBuild build = await _helperClient.StartTranslationBuildAsync(engineId); await Task.Delay(1000); build = await _helperClient.TranslationEnginesClient.GetBuildAsync(engineId, build.Id); Assert.That(build.State == JobState.Active || build.State == JobState.Pending); @@ -381,7 +395,7 @@ async Task StartAndCancelTwice(string engineId) Assert.That(build.State == JobState.Canceled); // do a second job normally and make sure it works. - build = await _helperClient.StartBuildAsync(engineId); + build = await _helperClient.StartTranslationBuildAsync(engineId); await Task.Delay(1000); build = await _helperClient.TranslationEnginesClient.GetBuildAsync(engineId, build.Id); Assert.That(build.State == JobState.Active || build.State == JobState.Pending); @@ -460,6 +474,30 @@ public async Task ParatextProjectNmtJobAsync() Assert.That(usfm, Does.Contain("\\v 1")); } + [Test] + public async Task GetWordAlignment() + { + string engineId = await _helperClient.CreateNewEngineAsync("Statistical", "es", "en", "STAT1"); + string[] books = ["1JN.txt", "2JN.txt", "3JN.txt"]; + ParallelCorpusConfig train_corpus = await _helperClient.MakeParallelTextCorpus(books, "es", "en", false); + await _helperClient.AddParallelTextCorpusToEngineAsync(engineId, train_corpus, false); + await _helperClient.BuildEngineAsync(engineId); + WordAlignmentResult tResult = await _helperClient.WordAlignmentEnginesClient.GetWordAlignmentAsync( + engineId, + new WordAlignmentRequest() { SourceSegment = "espíritu verdad", TargetSegment = "spirit truth" } + ); + Assert.That( + tResult.Alignment, + Is.EqualTo( + new List + { + new() { SourceIndex = 0, TargetIndex = 0 }, + new() { SourceIndex = 1, TargetIndex = 1 } + } + ) + ); + } + [TearDown] public async Task TearDown() { diff --git a/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs b/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs index 87f54a13..4320231f 100644 --- a/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs +++ b/src/Serval/test/Serval.E2ETests/ServalClientHelper.cs @@ -2,15 +2,113 @@ namespace Serval.E2ETests; #pragma warning disable CS0612 // Type or member is obsolete +public enum EngineGroup +{ + Translation, + WordAlignment +} + +public record Build +{ + public string Id { get; set; } + public int Revision { get; set; } + public JobState State { get; set; } + + public Build(TranslationBuild translationBuild) + { + Id = translationBuild.Id; + Revision = translationBuild.Revision; + State = translationBuild.State; + } + + public Build(WordAlignmentBuild wordAlignmentBuild) + { + Id = wordAlignmentBuild.Id; + Revision = wordAlignmentBuild.Revision; + State = wordAlignmentBuild.State; + } +} + +public record ParallelCorpus +{ + public string Id { get; set; } + public string Url { get; set; } + public ResourceLink Engine { get; set; } + public IList SourceCorpora { get; set; } + public IList TargetCorpora { get; set; } + + public ParallelCorpus(TranslationParallelCorpus translationParallelCorpus) + { + Id = translationParallelCorpus.Id; + Url = translationParallelCorpus.Url; + Engine = translationParallelCorpus.Engine; + SourceCorpora = translationParallelCorpus.SourceCorpora; + TargetCorpora = translationParallelCorpus.TargetCorpora; + } + + public ParallelCorpus(WordAlignmentParallelCorpus wordAlignmentParallelCorpus) + { + Id = wordAlignmentParallelCorpus.Id; + Url = wordAlignmentParallelCorpus.Url; + Engine = wordAlignmentParallelCorpus.Engine; + SourceCorpora = wordAlignmentParallelCorpus.SourceCorpora; + TargetCorpora = wordAlignmentParallelCorpus.TargetCorpora; + } +} + +public record ParallelCorpusConfig +{ + public string? Name { get; set; } + public IList SourceCorpusIds { get; set; } + public IList TargetCorpusIds { get; set; } + + public TranslationParallelCorpusConfig ToTranslationParallelCorpusConfig() + { + return new TranslationParallelCorpusConfig + { + Name = Name, + SourceCorpusIds = SourceCorpusIds, + TargetCorpusIds = TargetCorpusIds + }; + } + + public WordAlignmentParallelCorpusConfig ToWordAlignmentParallelCorpusConfig() + { + return new WordAlignmentParallelCorpusConfig + { + Name = Name, + SourceCorpusIds = SourceCorpusIds, + TargetCorpusIds = TargetCorpusIds + }; + } + + public ParallelCorpusConfig(TranslationParallelCorpusConfig translationParallelCorpusConfig) + { + Name = translationParallelCorpusConfig.Name; + SourceCorpusIds = translationParallelCorpusConfig.SourceCorpusIds; + TargetCorpusIds = translationParallelCorpusConfig.TargetCorpusIds; + } + + public ParallelCorpusConfig(WordAlignmentParallelCorpusConfig wordAlignmentParallelCorpusConfig) + { + Name = wordAlignmentParallelCorpusConfig.Name; + SourceCorpusIds = wordAlignmentParallelCorpusConfig.SourceCorpusIds; + TargetCorpusIds = wordAlignmentParallelCorpusConfig.TargetCorpusIds; + } +} + public class ServalClientHelper : IAsyncDisposable { public DataFilesClient DataFilesClient { get; } public CorporaClient CorporaClient { get; } public TranslationEnginesClient TranslationEnginesClient { get; } + public WordAlignmentEnginesClient WordAlignmentEnginesClient { get; } public TranslationEngineTypesClient TranslationEngineTypesClient { get; } public TranslationBuildConfig TranslationBuildConfig { get; set; } + public WordAlignmentBuildConfig WordAlignmentBuildConfig { get; set; } + private IDictionary EngineIdToEngineGroup { get; } = new Dictionary(); private string _authToken = ""; private readonly HttpClient _httpClient; private readonly string _prefix; @@ -40,6 +138,8 @@ public ServalClientHelper(string audience, string prefix = "SCE_", bool ignoreSS TranslationEngineTypesClient = new TranslationEngineTypesClient(_httpClient); _prefix = prefix; TranslationBuildConfig = InitTranslationBuildConfig(); + WordAlignmentEnginesClient = new WordAlignmentEnginesClient(_httpClient); + WordAlignmentBuildConfig = InitWordAlignmentBuildConfig(); } public async Task InitAsync() @@ -65,6 +165,7 @@ public async Task InitAsync() public void Setup() { InitTranslationBuildConfig(); + InitWordAlignmentBuildConfig(); } public TranslationBuildConfig InitTranslationBuildConfig() @@ -87,6 +188,17 @@ public TranslationBuildConfig InitTranslationBuildConfig() return TranslationBuildConfig; } + public WordAlignmentBuildConfig InitWordAlignmentBuildConfig() + { + WordAlignmentBuildConfig = new WordAlignmentBuildConfig + { + WordAlignOn = [], + TrainOn = null, + Options = null + }; + return WordAlignmentBuildConfig; + } + public async Task ClearEnginesAsync() { IList existingTranslationEngines = await TranslationEnginesClient.GetAllAsync(); @@ -95,48 +207,91 @@ public async Task ClearEnginesAsync() if (translationEngine.Name?.Contains(_prefix) ?? false) await TranslationEnginesClient.DeleteAsync(translationEngine.Id); } + IList existingWordAlignmentEngines = await WordAlignmentEnginesClient.GetAllAsync(); + foreach (WordAlignmentEngine wordAlignmentEngine in existingWordAlignmentEngines) + { + if (wordAlignmentEngine.Name?.Contains(_prefix) ?? false) + await WordAlignmentEnginesClient.DeleteAsync(wordAlignmentEngine.Id); + } } public async Task CreateNewEngineAsync( - string engineTypeString, - string source_language, - string target_language, + string engineType, + string sourceLanguage, + string targetLanguage, string name = "", bool? isModelPersisted = null ) { - TranslationEngine engine = await TranslationEnginesClient.CreateAsync( - new TranslationEngineConfig - { - Name = _prefix + name, - SourceLanguage = source_language, - TargetLanguage = target_language, - Type = engineTypeString, - IsModelPersisted = isModelPersisted - } - ); - return engine.Id; + EngineGroup engineGroup = GetEngineGroup(engineType); + if (engineGroup == EngineGroup.Translation) + { + TranslationEngine engine = await TranslationEnginesClient.CreateAsync( + new TranslationEngineConfig + { + Name = name, + SourceLanguage = sourceLanguage, + TargetLanguage = targetLanguage, + Type = engineType, + IsModelPersisted = isModelPersisted + } + ); + EngineIdToEngineGroup[engine.Id] = engineGroup; + return engine.Id; + } + else + { + WordAlignmentEngine engine = await WordAlignmentEnginesClient.CreateAsync( + new WordAlignmentEngineConfig + { + Name = name, + SourceLanguage = sourceLanguage, + TargetLanguage = targetLanguage, + Type = engineType, + } + ); + EngineIdToEngineGroup[engine.Id] = engineGroup; + return engine.Id; + } } - public async Task StartBuildAsync(string engineId) + public async Task StartTranslationBuildAsync(string engineId) { return await TranslationEnginesClient.StartBuildAsync(engineId, TranslationBuildConfig); } public async Task BuildEngineAsync(string engineId) { - TranslationBuild newJob = await StartBuildAsync(engineId); - int revision = newJob.Revision; - await TranslationEnginesClient.GetBuildAsync(engineId, newJob.Id, newJob.Revision); + EngineGroup engineGroup = EngineIdToEngineGroup[engineId]; + Build newJob; + int revision; + if (engineGroup == EngineGroup.Translation) + { + newJob = new Build(await StartTranslationBuildAsync(engineId)); + revision = newJob.Revision; + await TranslationEnginesClient.GetBuildAsync(engineId, newJob.Id, newJob.Revision); + } + else + { + newJob = new Build(await WordAlignmentEnginesClient.StartBuildAsync(engineId, WordAlignmentBuildConfig)); + revision = newJob.Revision; + await WordAlignmentEnginesClient.GetBuildAsync(engineId, newJob.Id, newJob.Revision); + } while (true) { try { - TranslationBuild result = await TranslationEnginesClient.GetBuildAsync( - engineId, - newJob.Id, - revision + 1 - ); + Build result; + if (engineGroup == EngineGroup.Translation) + { + result = new Build(await TranslationEnginesClient.GetBuildAsync(engineId, newJob.Id, revision + 1)); + } + else + { + result = new Build( + await WordAlignmentEnginesClient.GetBuildAsync(engineId, newJob.Id, revision + 1) + ); + } if (!(result.State == JobState.Active || result.State == JobState.Pending)) // build completed break; @@ -155,12 +310,28 @@ public async Task BuildEngineAsync(string engineId) public async Task CancelBuildAsync(string engineId, string buildId, int timeoutSeconds = 20) { - await TranslationEnginesClient.CancelBuildAsync(engineId); + EngineGroup engineGroup = EngineIdToEngineGroup[engineId]; + if (engineGroup == EngineGroup.Translation) + { + await TranslationEnginesClient.CancelBuildAsync(engineId); + } + else + { + await WordAlignmentEnginesClient.CancelBuildAsync(engineId); + } int pollIntervalMs = 1000; int tries = 1; while (true) { - TranslationBuild build = await TranslationEnginesClient.GetBuildAsync(engineId, buildId); + Build build; + if (engineGroup == EngineGroup.Translation) + { + build = new Build(await TranslationEnginesClient.GetBuildAsync(engineId, buildId)); + } + else + { + build = new Build(await WordAlignmentEnginesClient.GetBuildAsync(engineId, buildId)); + } if (build.State != JobState.Pending && build.State != JobState.Active) break; if (tries++ > timeoutSeconds) @@ -176,9 +347,13 @@ public async Task AddTextCorpusToEngineAsync( string[] filesToAdd, string sourceLanguage, string targetLanguage, - bool pretranslate + bool inference ) { + EngineGroup engineGroup = EngineIdToEngineGroup[engineId]; + if (engineGroup == EngineGroup.WordAlignment) + throw new ArgumentException("Word alignment engines do not support non-parallel corpora."); + List sourceFiles = await UploadFilesAsync( filesToAdd, FileFormat.Text, @@ -187,7 +362,7 @@ bool pretranslate ); var targetFileConfig = new List(); - if (!pretranslate) + if (!inference) { List targetFiles = await UploadFilesAsync( filesToAdd, @@ -205,11 +380,20 @@ bool pretranslate var sourceFileConfig = new List(); - for (int i = 0; i < sourceFiles.Count; i++) + if (sourceLanguage == targetLanguage && !inference) { - sourceFileConfig.Add( - new TranslationCorpusFileConfig { FileId = sourceFiles[i].Id, TextId = filesToAdd[i] } - ); + // if it's the same language, and we are not pretranslating, do nothing (echo for suggestions) + // if pretranslating, we need to upload the source separately + // if different languages, we are not echoing. + } + else + { + for (int i = 0; i < sourceFiles.Count; i++) + { + sourceFileConfig.Add( + new TranslationCorpusFileConfig { FileId = sourceFiles[i].Id, TextId = filesToAdd[i] } + ); + } } TranslationCorpus response = await TranslationEnginesClient.AddCorpusAsync( @@ -224,7 +408,7 @@ bool pretranslate } ); - if (pretranslate) + if (inference) { TranslationBuildConfig.Pretranslate!.Add( new PretranslateCorpusConfig { CorpusId = response.Id, TextIds = filesToAdd.ToList() } @@ -234,11 +418,11 @@ bool pretranslate return response.Id; } - public async Task MakeParallelTextCorpus( + public async Task MakeParallelTextCorpus( string[] filesToAdd, string sourceLanguage, string targetLanguage, - bool pretranslate + bool inference ) { List sourceFiles = await UploadFilesAsync( @@ -249,7 +433,7 @@ bool pretranslate ); var targetFileConfig = new List(); - if (!pretranslate) + if (!inference) { List targetFiles = await UploadFilesAsync( filesToAdd, @@ -275,9 +459,18 @@ bool pretranslate var sourceFileConfig = new List(); - for (int i = 0; i < sourceFiles.Count; i++) + if (sourceLanguage == targetLanguage && !inference) { - sourceFileConfig.Add(new CorpusFileConfig { FileId = sourceFiles[i].Id, TextId = filesToAdd[i] }); + // if it's the same language, and we are not pretranslating, do nothing (echo for suggestions) + // if pretranslating, we need to upload the source separately + // if different languages, we are not echoing. + } + else + { + for (int i = 0; i < sourceFiles.Count; i++) + { + sourceFileConfig.Add(new CorpusFileConfig { FileId = sourceFiles[i].Id, TextId = filesToAdd[i] }); + } } CorpusConfig sourceCorpusConfig = @@ -293,22 +486,46 @@ bool pretranslate TranslationParallelCorpusConfig parallelCorpusConfig = new() { SourceCorpusIds = { sourceCorpus.Id }, TargetCorpusIds = { targetCorpus.Id } }; - return parallelCorpusConfig; + return new ParallelCorpusConfig(parallelCorpusConfig); } public async Task AddParallelTextCorpusToEngineAsync( string engineId, - TranslationParallelCorpusConfig parallelCorpusConfig, - bool pretranslate + ParallelCorpusConfig parallelCorpusConfig, + bool inference ) { - var parallelCorpus = await TranslationEnginesClient.AddParallelCorpusAsync(engineId, parallelCorpusConfig); - - if (pretranslate) + EngineGroup engineGroup = EngineIdToEngineGroup[engineId]; + ParallelCorpus parallelCorpus; + if (engineGroup == EngineGroup.Translation) { - TranslationBuildConfig.Pretranslate!.Add( - new PretranslateCorpusConfig { ParallelCorpusId = parallelCorpus.Id } + parallelCorpus = new ParallelCorpus( + await TranslationEnginesClient.AddParallelCorpusAsync( + engineId, + parallelCorpusConfig.ToTranslationParallelCorpusConfig() + ) ); + if (inference) + { + TranslationBuildConfig.Pretranslate!.Add( + new PretranslateCorpusConfig { ParallelCorpusId = parallelCorpus.Id } + ); + } + } + else + { + parallelCorpus = new ParallelCorpus( + await WordAlignmentEnginesClient.AddParallelCorpusAsync( + engineId, + parallelCorpusConfig.ToWordAlignmentParallelCorpusConfig() + ) + ); + if (inference) + { + WordAlignmentBuildConfig.WordAlignOn!.Add( + new TrainingCorpusConfig2 { ParallelCorpusId = parallelCorpus.Id } + ); + } } return parallelCorpus.Id; @@ -408,6 +625,19 @@ private static HttpClientHandler GetHttHandlerToIgnoreSslErrors() return handler; } + public static EngineGroup GetEngineGroup(string engineType) + { + return engineType switch + { + "SmtTransfer" => EngineGroup.Translation, + "Nmt" => EngineGroup.Translation, + "Echo" => EngineGroup.Translation, + "Statistical" => EngineGroup.WordAlignment, + "EchoWordAlignment" => EngineGroup.WordAlignment, + _ => throw new ArgumentOutOfRangeException(engineType, "Unknown engine type") + }; + } + public async ValueTask TearDown() { if (Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT") != "Development") diff --git a/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs b/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs index 0da83cf1..c6bee305 100644 --- a/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs +++ b/src/Serval/test/Serval.Translation.Tests/Services/EngineServiceTests.cs @@ -133,7 +133,7 @@ public async Task StartBuildAsync_TrainOnNotSpecified() new V1.CorpusFile { Location = "file1.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -154,7 +154,7 @@ public async Task StartBuildAsync_TrainOnNotSpecified() new V1.CorpusFile { Location = "file2.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -207,7 +207,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -229,7 +229,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -282,7 +282,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -304,7 +304,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -356,7 +356,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -377,7 +377,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -430,7 +430,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -451,7 +451,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -504,7 +504,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -525,7 +525,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -550,7 +550,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file3.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -571,7 +571,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file4.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "text1" } }, @@ -652,7 +652,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file1.zip" } }, @@ -684,7 +684,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file2.zip" } }, @@ -736,7 +736,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file1.zip" } }, @@ -757,7 +757,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file2.zip" } }, @@ -833,7 +833,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "MAT" } }, @@ -849,7 +849,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file3.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "MRK" } }, @@ -872,7 +872,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "MAT" } }, @@ -888,7 +888,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file4.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "MRK" } }, @@ -965,7 +965,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "MAT" } }, @@ -988,7 +988,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "MAT" } }, @@ -1065,7 +1065,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "MAT" } }, @@ -1088,7 +1088,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "MAT" } }, @@ -1114,7 +1114,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file3.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "MRK" } }, @@ -1136,7 +1136,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file4.txt", - Format = FileFormat.Text, + Format = V1.FileFormat.Text, TextId = "MRK" } }, @@ -1212,7 +1212,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file1.zip" } }, @@ -1228,7 +1228,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file3.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file3.zip" } }, @@ -1251,7 +1251,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file2.zip" } }, @@ -1267,7 +1267,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file4.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file4.zip" } }, @@ -1363,7 +1363,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file1.zip" } }, @@ -1379,7 +1379,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file3.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file3.zip" } }, @@ -1412,7 +1412,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file2.zip" } }, @@ -1428,7 +1428,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file4.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file4.zip" } }, @@ -1495,7 +1495,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file1.zip" } }, @@ -1522,7 +1522,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file3.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file3.zip" } }, @@ -1552,7 +1552,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file2.zip" } }, @@ -1579,7 +1579,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file4.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file4.zip" } }, @@ -1604,6 +1604,48 @@ await env.Service.StartBuildAsync( ); } + [Test] + public async Task StartBuildAsync_TextFilesScriptureRangeSpecified_ParallelCorpus() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateParallelCorpusEngineWithParatextProjectAsync()).Id; + Assert.ThrowsAsync( + () => + env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "parallel-corpus1", + SourceFilters = new List() + { + new() + { + CorpusRef = "parallel-corpus1-source1", + ScriptureRange = "MAT", + TextIds = [] + } + }, + TargetFilters = new List() + { + new() + { + CorpusRef = "parallel-corpus1-target1", + ScriptureRange = "MAT", + TextIds = [] + } + } + } + ] + } + ) + ); + } + [Test] public async Task StartBuildAsync_NoFilters_ParallelCorpus() { @@ -1640,7 +1682,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file1.zip" } }, @@ -1656,7 +1698,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file3.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file3.zip" } }, @@ -1675,7 +1717,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file2.zip" } }, @@ -1691,7 +1733,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file4.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file4.zip" } }, @@ -1734,7 +1776,7 @@ public async Task StartBuildAsync_TrainOnNotSpecified_ParallelCorpus() new V1.CorpusFile { Location = "file1.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file1.zip" } }, @@ -1750,7 +1792,7 @@ public async Task StartBuildAsync_TrainOnNotSpecified_ParallelCorpus() new V1.CorpusFile { Location = "file3.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file3.zip" } }, @@ -1769,7 +1811,7 @@ public async Task StartBuildAsync_TrainOnNotSpecified_ParallelCorpus() new V1.CorpusFile { Location = "file2.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file2.zip" } }, @@ -1785,7 +1827,7 @@ public async Task StartBuildAsync_TrainOnNotSpecified_ParallelCorpus() new V1.CorpusFile { Location = "file4.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file4.zip" } }, @@ -1845,7 +1887,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file1.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file1.zip" } }, @@ -1872,7 +1914,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file3.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file3.zip" } }, @@ -1891,7 +1933,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file2.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file2.zip" } }, @@ -1907,7 +1949,7 @@ await env.Service.StartBuildAsync( new V1.CorpusFile { Location = "file4.zip", - Format = FileFormat.Paratext, + Format = V1.FileFormat.Paratext, TextId = "file4.zip" } }, @@ -2287,12 +2329,12 @@ public async Task CreateParallelCorpusEngineWithTextFilesAsync() SourceLanguage = "es", TargetLanguage = "en", Type = "Smt", - ParallelCorpora = new Models.ParallelCorpus[] - { + ParallelCorpora = + [ new() { Id = "parallel-corpus1", - SourceCorpora = new List() + SourceCorpora = new List() { new() { @@ -2327,7 +2369,7 @@ public async Task CreateParallelCorpusEngineWithTextFilesAsync() ] } }, - TargetCorpora = new List() + TargetCorpora = new List() { new() { @@ -2363,7 +2405,7 @@ public async Task CreateParallelCorpusEngineWithTextFilesAsync() } } } - } + ] }; await Engines.InsertAsync(engine); return engine; @@ -2378,12 +2420,12 @@ public async Task CreateMultipleParallelCorpusEngineWithTextFilesAsync() SourceLanguage = "es", TargetLanguage = "en", Type = "Smt", - ParallelCorpora = new Models.ParallelCorpus[] - { + ParallelCorpora = + [ new() { Id = "parallel-corpus1", - SourceCorpora = new List() + SourceCorpora = new List() { new() { @@ -2402,7 +2444,7 @@ public async Task CreateMultipleParallelCorpusEngineWithTextFilesAsync() ] } }, - TargetCorpora = new List() + TargetCorpora = new List() { new() { @@ -2425,7 +2467,7 @@ public async Task CreateMultipleParallelCorpusEngineWithTextFilesAsync() new() { Id = "parallel-corpus2", - SourceCorpora = new List() + SourceCorpora = new List() { new() { @@ -2444,7 +2486,7 @@ public async Task CreateMultipleParallelCorpusEngineWithTextFilesAsync() ] } }, - TargetCorpora = new List() + TargetCorpora = new List() { new() { @@ -2464,7 +2506,7 @@ public async Task CreateMultipleParallelCorpusEngineWithTextFilesAsync() } } } - } + ] }; await Engines.InsertAsync(engine); return engine; @@ -2479,12 +2521,12 @@ public async Task CreateParallelCorpusEngineWithParatextProjectAsync() SourceLanguage = "es", TargetLanguage = "en", Type = "Smt", - ParallelCorpora = new Models.ParallelCorpus[] - { + ParallelCorpora = + [ new() { Id = "parallel-corpus1", - SourceCorpora = new List() + SourceCorpora = new List() { new() { @@ -2519,7 +2561,7 @@ public async Task CreateParallelCorpusEngineWithParatextProjectAsync() ] } }, - TargetCorpora = new List() + TargetCorpora = new List() { new() { @@ -2555,7 +2597,7 @@ public async Task CreateParallelCorpusEngineWithParatextProjectAsync() } } } - } + ] }; await Engines.InsertAsync(engine); return engine; diff --git a/src/Serval/test/Serval.Translation.Tests/Services/PlatformServiceTests.cs b/src/Serval/test/Serval.Translation.Tests/Services/PlatformServiceTests.cs index 10f3ca14..d716dfe6 100644 --- a/src/Serval/test/Serval.Translation.Tests/Services/PlatformServiceTests.cs +++ b/src/Serval/test/Serval.Translation.Tests/Services/PlatformServiceTests.cs @@ -37,7 +37,7 @@ await env.PlatformService.BuildRestarting( Assert.That(env.Engines.Get("e0").IsBuilding, Is.False); Assert.That(env.Pretranslations.Count, Is.EqualTo(0)); - await env.PlatformService.InsertPretranslations(new MockAsyncStreamReader("e0"), env.ServerCallContext); + await env.PlatformService.InsertInferences(new MockAsyncStreamReader("e0"), env.ServerCallContext); Assert.That(env.Pretranslations.Count, Is.EqualTo(1)); await env.PlatformService.BuildFaulted(new BuildFaultedRequest() { BuildId = "b0" }, env.ServerCallContext); @@ -49,12 +49,12 @@ await env.PlatformService.BuildRestarting( new BuildRestartingRequest() { BuildId = "b0" }, env.ServerCallContext ); - await env.PlatformService.InsertPretranslations(new MockAsyncStreamReader("e0"), env.ServerCallContext); + await env.PlatformService.InsertInferences(new MockAsyncStreamReader("e0"), env.ServerCallContext); Assert.That(env.Pretranslations.Count, Is.EqualTo(1)); await env.PlatformService.BuildCompleted(new BuildCompletedRequest() { BuildId = "b0" }, env.ServerCallContext); Assert.That(env.Pretranslations.Count, Is.EqualTo(1)); await env.PlatformService.BuildStarted(new BuildStartedRequest() { BuildId = "b0" }, env.ServerCallContext); - await env.PlatformService.InsertPretranslations(new MockAsyncStreamReader("e0"), env.ServerCallContext); + await env.PlatformService.InsertInferences(new MockAsyncStreamReader("e0"), env.ServerCallContext); await env.PlatformService.BuildCompleted(new BuildCompletedRequest() { BuildId = "b0" }, env.ServerCallContext); Assert.That(env.Pretranslations.Count, Is.EqualTo(1)); } @@ -106,8 +106,8 @@ await env.Engines.InsertAsync( } ); Assert.That(env.Engines.Get("e0").CorpusSize, Is.EqualTo(0)); - await env.PlatformService.IncrementTranslationEngineCorpusSize( - new IncrementTranslationEngineCorpusSizeRequest() { EngineId = "e0", Count = 1 }, + await env.PlatformService.IncrementTrainEngineCorpusSize( + new IncrementTrainEngineCorpusSizeRequest() { EngineId = "e0", Count = 1 }, env.ServerCallContext ); Assert.That(env.Engines.Get("e0").CorpusSize, Is.EqualTo(1)); @@ -155,12 +155,12 @@ public TestEnvironment() public TranslationPlatformServiceV1 PlatformService { get; } } - private class MockAsyncStreamReader(string engineId) : IAsyncStreamReader + private class MockAsyncStreamReader(string engineId) : IAsyncStreamReader { private bool _endOfStream = false; public string EngineId { get; } = engineId; - public InsertPretranslationsRequest Current => new() { EngineId = EngineId }; + public InsertInferencesRequest Current => new() { EngineId = EngineId }; public Task MoveNext(CancellationToken cancellationToken) { diff --git a/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs b/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs index 5aca4ed6..b2e5f0f3 100644 --- a/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs +++ b/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs @@ -270,7 +270,7 @@ private class TestEnvironment : IDisposable { public TestEnvironment() { - CorpusFile file1 = + Shared.Models.CorpusFile file1 = new() { Id = "file1", @@ -278,7 +278,7 @@ public TestEnvironment() Format = Shared.Contracts.FileFormat.Paratext, TextId = "project1" }; - CorpusFile file2 = + Shared.Models.CorpusFile file2 = new() { Id = "file2", @@ -321,7 +321,7 @@ public TestEnvironment() new() { Id = "parallel_corpus1", - SourceCorpora = new List() + SourceCorpora = new List() { new() { @@ -330,7 +330,7 @@ public TestEnvironment() Files = [file1], } }, - TargetCorpora = new List() + TargetCorpora = new List() { new() { diff --git a/src/Serval/test/Serval.Translation.Tests/Usings.cs b/src/Serval/test/Serval.Translation.Tests/Usings.cs index ef8a3ff7..9116c0c9 100644 --- a/src/Serval/test/Serval.Translation.Tests/Usings.cs +++ b/src/Serval/test/Serval.Translation.Tests/Usings.cs @@ -7,6 +7,7 @@ global using NSubstitute; global using NUnit.Framework; global using Serval.Shared.Configuration; +global using Serval.Shared.Contracts; global using Serval.Shared.Services; global using Serval.Shared.Utils; global using Serval.Translation.Contracts; diff --git a/src/Serval/test/Serval.WordAlignment.Tests/Serval.WordAlignment.Tests.csproj b/src/Serval/test/Serval.WordAlignment.Tests/Serval.WordAlignment.Tests.csproj new file mode 100644 index 00000000..589dadf3 --- /dev/null +++ b/src/Serval/test/Serval.WordAlignment.Tests/Serval.WordAlignment.Tests.csproj @@ -0,0 +1,39 @@ + + + + net8.0 + enable + enable + false + Serval.WordAlignment + true + true + true + $(NoWarn);CS1591;CS1573 + + + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + diff --git a/src/Serval/test/Serval.WordAlignment.Tests/Services/BuildServiceTests.cs b/src/Serval/test/Serval.WordAlignment.Tests/Services/BuildServiceTests.cs new file mode 100644 index 00000000..1f61c2d9 --- /dev/null +++ b/src/Serval/test/Serval.WordAlignment.Tests/Services/BuildServiceTests.cs @@ -0,0 +1,62 @@ +namespace Serval.WordAlignment.Services; + +[TestFixture] +public class BuildServiceTests +{ + const string BUILD1_ID = "b00000000000000000000001"; + + [Test] + public async Task GetActiveNewerRevisionAsync_Insert() + { + var builds = new MemoryRepository(); + var service = new BuildService(builds); + Task> task = service.GetActiveNewerRevisionAsync("engine1", 1); + var build = new Build + { + Id = BUILD1_ID, + EngineRef = "engine1", + PercentCompleted = 0.1 + }; + await builds.InsertAsync(build); + EntityChange change = await task; + Assert.That(change.Type, Is.EqualTo(EntityChangeType.Insert)); + Assert.That(change.Entity!.Revision, Is.EqualTo(1)); + Assert.That(change.Entity.PercentCompleted, Is.EqualTo(0.1)); + } + + [Test] + public async Task GetNewerRevisionAsync_Update() + { + var builds = new MemoryRepository(); + var service = new BuildService(builds); + var build = new Build { Id = BUILD1_ID, EngineRef = "engine1" }; + await builds.InsertAsync(build); + Task> task = service.GetNewerRevisionAsync(build.Id, 2); + await builds.UpdateAsync(build, u => u.Set(b => b.PercentCompleted, 0.1)); + EntityChange change = await task; + Assert.That(change.Type, Is.EqualTo(EntityChangeType.Update)); + Assert.That(change.Entity!.Revision, Is.EqualTo(2)); + Assert.That(change.Entity.PercentCompleted, Is.EqualTo(0.1)); + } + + [Test] + public async Task GetNewerRevisionAsync_Delete() + { + var builds = new MemoryRepository(); + var service = new BuildService(builds); + var build = new Build { Id = BUILD1_ID, EngineRef = "engine1" }; + await builds.InsertAsync(build); + Task> task = service.GetNewerRevisionAsync(build.Id, 2); + await builds.DeleteAsync(build); + EntityChange change = await task; + Assert.That(change.Type, Is.EqualTo(EntityChangeType.Delete)); + } + + [Test] + public async Task GetNewerRevisionAsync_DoesNotExist() + { + var service = new BuildService(new MemoryRepository()); + EntityChange change = await service.GetNewerRevisionAsync("build1", 2); + Assert.That(change.Type, Is.EqualTo(EntityChangeType.Delete)); + } +} diff --git a/src/Serval/test/Serval.WordAlignment.Tests/Services/EngineServiceTests.cs b/src/Serval/test/Serval.WordAlignment.Tests/Services/EngineServiceTests.cs new file mode 100644 index 00000000..c5dd57ce --- /dev/null +++ b/src/Serval/test/Serval.WordAlignment.Tests/Services/EngineServiceTests.cs @@ -0,0 +1,1690 @@ +using Google.Protobuf.WellKnownTypes; +using Serval.WordAlignment.V1; + +namespace Serval.WordAlignment.Services; + +[TestFixture] +public class EngineServiceTests +{ + const string BUILD1_ID = "b00000000000000000000001"; + + [Test] + public void GetWordAlignmentAsync_EngineDoesNotExist() + { + var env = new TestEnvironment(); + Assert.ThrowsAsync( + () => env.Service.GetWordAlignmentAsync("engine1", "esto es una prueba.", "this is a test.") + ); + } + + [Test] + public async Task GetWordAlignmentAsync_EngineExists() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithTextFilesAsync()).Id; + Models.WordAlignmentResult? result = await env.Service.GetWordAlignmentAsync( + engineId, + "esto es una prueba.", + "this is a test." + ); + Assert.That(result, Is.Not.Null); + Assert.That(result!.Alignment, Is.EqualTo(CreateNAlignedWordPair(5))); + } + + [Test] + public async Task CreateAsync() + { + var env = new TestEnvironment(); + Engine engine = + new() + { + Id = "engine1", + Owner = "owner1", + SourceLanguage = "es", + TargetLanguage = "en", + Type = "Statistical", + ParallelCorpora = [] + }; + await env.Service.CreateAsync(engine); + + engine = (await env.Engines.GetAsync("engine1"))!; + Assert.That(engine.SourceLanguage, Is.EqualTo("es")); + Assert.That(engine.TargetLanguage, Is.EqualTo("en")); + } + + [Test] + public async Task DeleteAsync_EngineExists() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithTextFilesAsync()).Id; + await env.Service.DeleteAsync("engine1"); + Engine? engine = await env.Engines.GetAsync(engineId); + Assert.That(engine, Is.Null); + } + + [Test] + public async Task DeleteAsync_ProjectDoesNotExist() + { + var env = new TestEnvironment(); + await env.CreateEngineWithTextFilesAsync(); + Assert.ThrowsAsync(() => env.Service.DeleteAsync("engine3")); + } + + [Test] + public async Task StartBuildAsync_TrainOnNotSpecified() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithTextFilesAsync()).Id; + await env.Service.StartBuildAsync(new Build { Id = BUILD1_ID, EngineRef = engineId }); + _ = env.WordAlignmentServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Statistical", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Id = "corpus1-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = V1.FileFormat.Text, + TextId = "text1" + } + }, + WordAlignOnAll = true, + TrainOnAll = true + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Id = "corpus1-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = V1.FileFormat.Text, + TextId = "text1" + } + }, + WordAlignOnAll = true, + TrainOnAll = true + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_TextIdsEmpty() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithTextFilesAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "corpus1", + SourceFilters = new List() + { + new() { CorpusRef = "corpus1-source1", TextIds = [] } + }, + TargetFilters = new List() + { + new() { CorpusRef = "corpus1-target1", TextIds = [] } + } + } + ] + } + ); + _ = env.WordAlignmentServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Statistical", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Id = "corpus1-source1", + Language = "es", + TrainOnTextIds = { }, + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = V1.FileFormat.Text, + TextId = "text1" + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Id = "corpus1-target1", + Language = "en", + TrainOnTextIds = { }, + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = V1.FileFormat.Text, + TextId = "text1" + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_TextIdsPopulated() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithTextFilesAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "corpus1", + SourceFilters = new List() + { + new() { CorpusRef = "corpus1-source1", TextIds = ["text1"] } + }, + TargetFilters = new List() + { + new() { CorpusRef = "corpus1-target1", TextIds = ["text1"] } + } + } + ] + } + ); + _ = env.WordAlignmentServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Statistical", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Id = "corpus1-source1", + Language = "es", + TrainOnTextIds = { "text1" }, + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = V1.FileFormat.Text, + TextId = "text1" + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Id = "corpus1-target1", + Language = "en", + TrainOnTextIds = { "text1" }, + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = V1.FileFormat.Text, + TextId = "text1" + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_TextIdsNotSpecified() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithTextFilesAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "corpus1", + SourceFilters = new List() { new() { CorpusRef = "corpus1-source1" } }, + TargetFilters = new List() { new() { CorpusRef = "corpus1-target1" } } + } + ] + } + ); + _ = env.WordAlignmentServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Statistical", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Id = "corpus1-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = V1.FileFormat.Text, + TextId = "text1" + } + }, + WordAlignOnAll = true, + TrainOnAll = true + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Id = "corpus1-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = V1.FileFormat.Text, + TextId = "text1" + } + }, + WordAlignOnAll = true, + TrainOnAll = true + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_OneOfMultipleCorpora() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateMultipleCorporaEngineWithTextFilesAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = [new TrainingCorpus { ParallelCorpusRef = "corpus1" }], + WordAlignOn = [new TrainingCorpus { ParallelCorpusRef = "corpus1" }] + } + ); + _ = env.WordAlignmentServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Statistical", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Id = "corpus1-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = V1.FileFormat.Text, + TextId = "MAT" + } + }, + WordAlignOnAll = true, + TrainOnAll = true + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Id = "corpus1-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = V1.FileFormat.Text, + TextId = "MAT" + } + }, + WordAlignOnAll = true, + TrainOnAll = true + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_TrainOnOneWordAlignTheOther() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateMultipleCorporaEngineWithTextFilesAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = [new TrainingCorpus { ParallelCorpusRef = "corpus1" }], + WordAlignOn = [new TrainingCorpus { ParallelCorpusRef = "corpus2" }] + } + ); + _ = env.WordAlignmentServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Statistical", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Id = "corpus1-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.txt", + Format = V1.FileFormat.Text, + TextId = "MAT" + } + }, + WordAlignOnAll = false, + TrainOnAll = true + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Id = "corpus1-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.txt", + Format = V1.FileFormat.Text, + TextId = "MAT" + } + }, + WordAlignOnAll = false, + TrainOnAll = true + } + } + } + }, + new V1.ParallelCorpus + { + Id = "corpus2", + SourceCorpora = + { + new List + { + new() + { + Id = "corpus2-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.txt", + Format = V1.FileFormat.Text, + TextId = "MRK" + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Id = "corpus2-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.txt", + Format = V1.FileFormat.Text, + TextId = "MRK" + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_TextFilesScriptureRangeSpecified() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithTextFilesAsync()).Id; + Assert.ThrowsAsync( + () => + env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "corpus1", + SourceFilters = new List() + { + new() + { + CorpusRef = "corpus1-source1", + ScriptureRange = "MAT", + TextIds = [] + } + }, + TargetFilters = new List() + { + new() + { + CorpusRef = "corpus1-target1", + ScriptureRange = "MAT", + TextIds = [] + } + } + } + ] + } + ) + ); + } + + [Test] + public async Task StartBuildAsync_ScriptureRangeSpecified() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithParatextProjectAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "corpus1", + SourceFilters = new List() + { + new() { CorpusRef = "corpus1-source1", ScriptureRange = "MAT 1;MRK" } + }, + TargetFilters = new List() + { + new() { CorpusRef = "corpus1-target1", ScriptureRange = "MAT;MRK 1" } + } + } + ] + } + ); + _ = env.WordAlignmentServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Statistical", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Id = "corpus1-source1", + Language = "es", + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 1 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { } } + } + }, + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = V1.FileFormat.Paratext, + TextId = "file1.zip" + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Id = "corpus1-target1", + Language = "en", + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { 1 } } + } + }, + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = V1.FileFormat.Paratext, + TextId = "file2.zip" + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_ScriptureRangeEmptyString() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithParatextProjectAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "corpus1", + SourceFilters = new List() + { + new() { CorpusRef = "corpus1-source1", ScriptureRange = "" } + }, + TargetFilters = new List() + { + new() { CorpusRef = "corpus1-target1", ScriptureRange = "" } + } + } + ] + } + ); + _ = env.WordAlignmentServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Statistical", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new List + { + new() + { + Id = "corpus1-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = V1.FileFormat.Paratext, + TextId = "file1.zip" + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + } + }, + TargetCorpora = + { + new List + { + new() + { + Id = "corpus1-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = V1.FileFormat.Paratext, + TextId = "file2.zip" + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_MixedSourceAndTarget() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithMultipleParatextProjectAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "corpus1", + SourceFilters = new List() + { + new() { CorpusRef = "corpus1-source1", ScriptureRange = "MAT 1-2;MRK 1-2" }, + new() { CorpusRef = "corpus1-source2", ScriptureRange = "MAT 3;MRK 1" } + }, + TargetFilters = new List() + { + new() { CorpusRef = "corpus1-target1", ScriptureRange = "MAT 2-3;MRK 2" }, + new() { CorpusRef = "corpus1-target2", ScriptureRange = "MAT 1;MRK 1-2" } + } + } + ] + } + ); + _ = env.WordAlignmentServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Statistical", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new V1.MonolingualCorpus() + { + Id = "corpus1-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = V1.FileFormat.Paratext, + TextId = "file1.zip" + } + }, + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 1, 2 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { 1, 2 } } + } + }, + WordAlignOnAll = true, + TrainOnAll = false + }, + new V1.MonolingualCorpus() + { + Id = "corpus1-source2", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.zip", + Format = V1.FileFormat.Paratext, + TextId = "file3.zip" + } + }, + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 3 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { 1 } } + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + }, + TargetCorpora = + { + new V1.MonolingualCorpus() + { + Id = "corpus1-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = V1.FileFormat.Paratext, + TextId = "file2.zip" + } + }, + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 2, 3 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { 2 } } + } + }, + WordAlignOnAll = true, + TrainOnAll = false + }, + new V1.MonolingualCorpus() + { + Id = "corpus1-target2", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.zip", + Format = V1.FileFormat.Paratext, + TextId = "file4.zip" + } + }, + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 1 } } + }, + { + "MRK", + new ScriptureChapters { Chapters = { 1, 2 } } + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + } + } + } + } + ); + } + + [Test] + public async Task StartBuildAsync_NoTargetFilter() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithMultipleParatextProjectAsync()).Id; + await env.Service.StartBuildAsync( + new Build + { + Id = BUILD1_ID, + EngineRef = engineId, + TrainOn = + [ + new TrainingCorpus + { + ParallelCorpusRef = "corpus1", + SourceFilters = new List() + { + new() { CorpusRef = "corpus1-source1", ScriptureRange = "MAT 1;MRK" } + }, + } + ] + } + ); + _ = env.WordAlignmentServiceClient.Received() + .StartBuildAsync( + new StartBuildRequest + { + BuildId = BUILD1_ID, + EngineId = engineId, + EngineType = "Statistical", + Corpora = + { + new V1.ParallelCorpus + { + Id = "corpus1", + SourceCorpora = + { + new V1.MonolingualCorpus() + { + Id = "corpus1-source1", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file1.zip", + Format = V1.FileFormat.Paratext, + TextId = "file1.zip" + } + }, + TrainOnChapters = + { + { + "MAT", + new ScriptureChapters { Chapters = { 1 } } + }, + { + "MRK", + new ScriptureChapters { } + } + }, + WordAlignOnAll = true, + TrainOnAll = false + }, + new V1.MonolingualCorpus() + { + Id = "corpus1-source2", + Language = "es", + Files = + { + new V1.CorpusFile + { + Location = "file3.zip", + Format = V1.FileFormat.Paratext, + TextId = "file3.zip" + } + }, + WordAlignOnAll = true, + TrainOnAll = false + } + }, + TargetCorpora = + { + new V1.MonolingualCorpus() + { + Id = "corpus1-target1", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file2.zip", + Format = V1.FileFormat.Paratext, + TextId = "file2.zip" + } + }, + WordAlignOnAll = true, + TrainOnAll = true + }, + new V1.MonolingualCorpus() + { + Id = "corpus1-target2", + Language = "en", + Files = + { + new V1.CorpusFile + { + Location = "file4.zip", + Format = V1.FileFormat.Paratext, + TextId = "file4.zip" + } + }, + WordAlignOnAll = true, + TrainOnAll = true + } + } + } + } + } + ); + } + + [Test] + public async Task CancelBuildAsync_EngineExistsNotBuilding() + { + var env = new TestEnvironment(); + string engineId = (await env.CreateEngineWithTextFilesAsync()).Id; + await env.Service.CancelBuildAsync(engineId); + } + + [Test] + public async Task UpdateCorpusAsync() + { + var env = new TestEnvironment(); + Engine engine = await env.CreateEngineWithTextFilesAsync(); + string corpusId = engine.ParallelCorpora[0].Id; + + Shared.Models.ParallelCorpus? corpus = await env.Service.UpdateParallelCorpusAsync( + engine.Id, + corpusId, + sourceCorpora: new List + { + new() + { + Id = "corpus1-source1", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file1", + Filename = "file1.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "text1" + } + ] + }, + new() + { + Id = "corpus1-source2", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file3", + Filename = "file3.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "text2" + } + ] + } + }, + null + ); + + Assert.That(corpus, Is.Not.Null); + Assert.That(corpus.SourceCorpora, Has.Count.EqualTo(2)); + Assert.That(corpus.SourceCorpora[0].Files[0].Id, Is.EqualTo("file1")); + Assert.That(corpus.SourceCorpora[1].Files[0].Id, Is.EqualTo("file3")); + Assert.That(corpus.TargetCorpora, Has.Count.EqualTo(1)); + } + + private class TestEnvironment + { + public TestEnvironment() + { + Engines = new MemoryRepository(); + WordAlignmentServiceClient = Substitute.For(); + var wordAlignmentResult = new V1.WordAlignmentResult + { + SourceTokens = { "esto es una prueba .".Split() }, + TargetTokens = { "this is a test .".Split() }, + Confidences = { 1.0, 1.0, 1.0, 1.0, 1.0 }, + Alignment = + { + new V1.AlignedWordPair { SourceIndex = 0, TargetIndex = 0 }, + new V1.AlignedWordPair { SourceIndex = 1, TargetIndex = 1 }, + new V1.AlignedWordPair { SourceIndex = 2, TargetIndex = 2 }, + new V1.AlignedWordPair { SourceIndex = 3, TargetIndex = 3 }, + new V1.AlignedWordPair { SourceIndex = 4, TargetIndex = 4 } + }, + }; + var wordAlignmentResponse = new GetWordAlignmentResponse { Result = wordAlignmentResult }; + WordAlignmentServiceClient + .GetWordAlignmentAsync(Arg.Any()) + .Returns(CreateAsyncUnaryCall(wordAlignmentResponse)); + WordAlignmentServiceClient + .CancelBuildAsync(Arg.Any()) + .Returns(CreateAsyncUnaryCall(new Empty())); + WordAlignmentServiceClient.CreateAsync(Arg.Any()).Returns(CreateAsyncUnaryCall(new Empty())); + WordAlignmentServiceClient.DeleteAsync(Arg.Any()).Returns(CreateAsyncUnaryCall(new Empty())); + WordAlignmentServiceClient + .StartBuildAsync(Arg.Any()) + .Returns(CreateAsyncUnaryCall(new Empty())); + GrpcClientFactory grpcClientFactory = Substitute.For(); + grpcClientFactory + .CreateClient("Statistical") + .Returns(WordAlignmentServiceClient); + IOptionsMonitor dataFileOptions = Substitute.For>(); + dataFileOptions.CurrentValue.Returns(new DataFileOptions()); + var scriptureDataFileService = Substitute.For(); + scriptureDataFileService + .GetParatextProjectSettings(Arg.Any()) + .Returns( + new ParatextProjectSettings( + name: "Tst", + fullName: "Test", + encoding: Encoding.UTF8, + versification: ScrVers.English, + stylesheet: new UsfmStylesheet("usfm.sty"), + fileNamePrefix: "TST", + fileNameForm: "MAT", + fileNameSuffix: ".USFM", + biblicalTermsListType: "BiblicalTerms", + biblicalTermsProjectName: "", + biblicalTermsFileName: "BiblicalTerms.xml", + languageCode: "en" + ) + ); + + Service = new EngineService( + Engines, + new MemoryRepository(), + new MemoryRepository(), + grpcClientFactory, + dataFileOptions, + new MemoryDataAccessContext(), + new LoggerFactory(), + scriptureDataFileService + ); + } + + public EngineService Service { get; } + public IRepository Engines { get; } + public WordAlignmentEngineApi.WordAlignmentEngineApiClient WordAlignmentServiceClient { get; } + + public async Task CreateEngineWithTextFilesAsync() + { + var engine = new Engine + { + Id = "engine1", + Owner = "owner1", + SourceLanguage = "es", + TargetLanguage = "en", + Type = "Statistical", + ParallelCorpora = + [ + new() + { + Id = "corpus1", + SourceCorpora = new List() + { + new() + { + Id = "corpus1-source1", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file1", + Filename = "file1.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "text1" + } + ] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "corpus1-target1", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file2", + Filename = "file2.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "text1" + } + ] + } + } + } + ] + }; + await Engines.InsertAsync(engine); + return engine; + } + + public async Task CreateEngineWithMulitipleTextFilesAsync() + { + var engine = new Engine + { + Id = "engine1", + Owner = "owner1", + SourceLanguage = "es", + TargetLanguage = "en", + Type = "Statistical", + ParallelCorpora = + [ + new() + { + Id = "corpus1", + SourceCorpora = new List() + { + new() + { + Id = "corpus1-source1", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file1", + Filename = "file1.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MAT" + } + ] + }, + new() + { + Id = "corpus1-source2", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file3", + Filename = "file3.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MRK" + } + ] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "corpus1-target1", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file2", + Filename = "file2.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MAT" + } + ] + }, + new() + { + Id = "corpus1-target2", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file4", + Filename = "file4.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MRK" + } + ] + } + } + } + ] + }; + await Engines.InsertAsync(engine); + return engine; + } + + public async Task CreateMultipleCorporaEngineWithTextFilesAsync() + { + var engine = new Engine + { + Id = "engine1", + Owner = "owner1", + SourceLanguage = "es", + TargetLanguage = "en", + Type = "Statistical", + ParallelCorpora = + [ + new() + { + Id = "corpus1", + SourceCorpora = new List() + { + new() + { + Id = "corpus1-source1", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file1", + Filename = "file1.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MAT" + } + ] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "corpus1-target1", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file2", + Filename = "file2.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MAT" + } + ] + } + } + }, + new() + { + Id = "corpus2", + SourceCorpora = new List() + { + new() + { + Id = "corpus2-source1", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file3", + Filename = "file3.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MRK" + } + ] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "corpus2-target1", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file4", + Filename = "file4.txt", + Format = Shared.Contracts.FileFormat.Text, + TextId = "MRK" + } + ] + } + } + } + ] + }; + await Engines.InsertAsync(engine); + return engine; + } + + public async Task CreateEngineWithParatextProjectAsync() + { + var engine = new Engine + { + Id = "engine1", + Owner = "owner1", + SourceLanguage = "es", + TargetLanguage = "en", + Type = "Statistical", + ParallelCorpora = + [ + new() + { + Id = "corpus1", + SourceCorpora = new List() + { + new() + { + Id = "corpus1-source1", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file1", + Filename = "file1.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "file1.zip" + } + ] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "corpus1-target1", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file2", + Filename = "file2.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "file2.zip" + } + ] + } + } + } + ] + }; + await Engines.InsertAsync(engine); + return engine; + } + + public async Task CreateEngineWithMultipleParatextProjectAsync() + { + var engine = new Engine + { + Id = "engine1", + Owner = "owner1", + SourceLanguage = "es", + TargetLanguage = "en", + Type = "Statistical", + ParallelCorpora = + [ + new() + { + Id = "corpus1", + SourceCorpora = new List() + { + new() + { + Id = "corpus1-source1", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file1", + Filename = "file1.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "file1.zip" + } + ] + }, + new() + { + Id = "corpus1-source2", + Name = "", + Language = "es", + Files = + [ + new() + { + Id = "file3", + Filename = "file3.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "file3.zip" + } + ] + } + }, + TargetCorpora = new List() + { + new() + { + Id = "corpus1-target1", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file2", + Filename = "file2.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "file2.zip" + } + ] + }, + new() + { + Id = "corpus1-target2", + Name = "", + Language = "en", + Files = + [ + new() + { + Id = "file4", + Filename = "file4.zip", + Format = Shared.Contracts.FileFormat.Paratext, + TextId = "file4.zip" + } + ] + } + } + } + ] + }; + await Engines.InsertAsync(engine); + return engine; + } + + private static AsyncUnaryCall CreateAsyncUnaryCall(TResponse response) + { + return new AsyncUnaryCall( + Task.FromResult(response), + Task.FromResult(new Metadata()), + () => Status.DefaultSuccess, + () => new Metadata(), + () => { } + ); + } + } + + private static IReadOnlyList CreateNAlignedWordPair(int numberOfAlignedWords) + { + var alignedWordPairs = new List(); + for (int i = 0; i < numberOfAlignedWords; i++) + { + alignedWordPairs.Add(new Shared.Models.AlignedWordPair { SourceIndex = i, TargetIndex = i }); + } + return alignedWordPairs; + } +} diff --git a/src/Serval/test/Serval.WordAlignment.Tests/Services/PlatformServiceTests.cs b/src/Serval/test/Serval.WordAlignment.Tests/Services/PlatformServiceTests.cs new file mode 100644 index 00000000..ba3b144f --- /dev/null +++ b/src/Serval/test/Serval.WordAlignment.Tests/Services/PlatformServiceTests.cs @@ -0,0 +1,172 @@ +using Serval.WordAlignment.V1; + +namespace Serval.WordAlignment.Services; + +[TestFixture] +public class PlatformServiceTests +{ + [Test] + public async Task TestBuildStateTransitionsAsync() + { + var env = new TestEnvironment(); + await env.Engines.InsertAsync( + new Engine() + { + Id = "e0", + Owner = "owner1", + Type = "nmt", + SourceLanguage = "en", + TargetLanguage = "es", + ParallelCorpora = [] + } + ); + await env.Builds.InsertAsync(new Build() { Id = "b0", EngineRef = "e0" }); + await env.PlatformService.BuildStarted(new BuildStartedRequest() { BuildId = "b0" }, env.ServerCallContext); + Assert.That(env.Builds.Get("b0").State, Is.EqualTo(Shared.Contracts.JobState.Active)); + Assert.That(env.Engines.Get("e0").IsBuilding, Is.True); + + await env.PlatformService.BuildCanceled(new BuildCanceledRequest() { BuildId = "b0" }, env.ServerCallContext); + Assert.That(env.Builds.Get("b0").State, Is.EqualTo(Shared.Contracts.JobState.Canceled)); + Assert.That(env.Engines.Get("e0").IsBuilding, Is.False); + + await env.PlatformService.BuildRestarting( + new BuildRestartingRequest() { BuildId = "b0" }, + env.ServerCallContext + ); + Assert.That(env.Builds.Get("b0").State, Is.EqualTo(Shared.Contracts.JobState.Pending)); + Assert.That(env.Engines.Get("e0").IsBuilding, Is.False); + + Assert.That(env.WordAlignments.Count, Is.EqualTo(0)); + await env.PlatformService.InsertInferences(new MockAsyncStreamReader("e0"), env.ServerCallContext); + Assert.That(env.WordAlignments.Count, Is.EqualTo(1)); + + await env.PlatformService.BuildFaulted(new BuildFaultedRequest() { BuildId = "b0" }, env.ServerCallContext); + Assert.That(env.WordAlignments.Count, Is.EqualTo(0)); + Assert.That(env.Builds.Get("b0").State, Is.EqualTo(Shared.Contracts.JobState.Faulted)); + Assert.That(env.Engines.Get("e0").IsBuilding, Is.False); + + await env.PlatformService.BuildRestarting( + new BuildRestartingRequest() { BuildId = "b0" }, + env.ServerCallContext + ); + await env.PlatformService.InsertInferences(new MockAsyncStreamReader("e0"), env.ServerCallContext); + Assert.That(env.WordAlignments.Count, Is.EqualTo(1)); + await env.PlatformService.BuildCompleted(new BuildCompletedRequest() { BuildId = "b0" }, env.ServerCallContext); + Assert.That(env.WordAlignments.Count, Is.EqualTo(1)); + await env.PlatformService.BuildStarted(new BuildStartedRequest() { BuildId = "b0" }, env.ServerCallContext); + await env.PlatformService.InsertInferences(new MockAsyncStreamReader("e0"), env.ServerCallContext); + await env.PlatformService.BuildCompleted(new BuildCompletedRequest() { BuildId = "b0" }, env.ServerCallContext); + Assert.That(env.WordAlignments.Count, Is.EqualTo(1)); + } + + [Test] + public async Task UpdateBuildStatusAsync() + { + var env = new TestEnvironment(); + await env.Engines.InsertAsync( + new Engine() + { + Id = "e0", + Owner = "owner1", + Type = "nmt", + SourceLanguage = "en", + TargetLanguage = "es", + ParallelCorpora = [] + } + ); + await env.Builds.InsertAsync(new Build() { Id = "b0", EngineRef = "e0" }); + Assert.That(env.Builds.Get("b0").QueueDepth, Is.Null); + Assert.That(env.Builds.Get("b0").PercentCompleted, Is.Null); + await env.PlatformService.UpdateBuildStatus( + new UpdateBuildStatusRequest() + { + BuildId = "b0", + QueueDepth = 1, + PercentCompleted = 0.5 + }, + env.ServerCallContext + ); + Assert.That(env.Builds.Get("b0").QueueDepth, Is.EqualTo(1)); + Assert.That(env.Builds.Get("b0").PercentCompleted, Is.EqualTo(0.5)); + } + + [Test] + public async Task IncrementCorpusSizeAsync() + { + var env = new TestEnvironment(); + await env.Engines.InsertAsync( + new Engine() + { + Id = "e0", + Owner = "owner1", + Type = "nmt", + SourceLanguage = "en", + TargetLanguage = "es", + ParallelCorpora = [] + } + ); + Assert.That(env.Engines.Get("e0").CorpusSize, Is.EqualTo(0)); + await env.PlatformService.IncrementTrainEngineCorpusSize( + new IncrementTrainEngineCorpusSizeRequest() { EngineId = "e0", Count = 1 }, + env.ServerCallContext + ); + Assert.That(env.Engines.Get("e0").CorpusSize, Is.EqualTo(1)); + } + + private class TestEnvironment + { + public TestEnvironment() + { + Builds = new MemoryRepository(); + Engines = new MemoryRepository(); + WordAlignments = new MemoryRepository(); + DataAccessContext = Substitute.For(); + PublishEndpoint = Substitute.For(); + ServerCallContext = Substitute.For(); + + DataAccessContext + .WithTransactionAsync(Arg.Any>(), Arg.Any()) + .Returns(x => + { + return ((Func)x[0])((CancellationToken)x[1]); + }); + DataAccessContext + .WithTransactionAsync(Arg.Any>>(), Arg.Any()) + .Returns(x => + { + return ((Func)x[0])((CancellationToken)x[1]); + }); + + PlatformService = new WordAlignmentPlatformServiceV1( + Builds, + Engines, + WordAlignments, + DataAccessContext, + PublishEndpoint + ); + } + + public MemoryRepository Builds { get; } + public MemoryRepository Engines { get; } + public MemoryRepository WordAlignments { get; } + public IDataAccessContext DataAccessContext { get; } + public IPublishEndpoint PublishEndpoint { get; } + public ServerCallContext ServerCallContext { get; } + public WordAlignmentPlatformServiceV1 PlatformService { get; } + } + + private class MockAsyncStreamReader(string engineId) : IAsyncStreamReader + { + private bool _endOfStream = false; + + public string EngineId { get; } = engineId; + public InsertInferencesRequest Current => new() { EngineId = EngineId }; + + public Task MoveNext(CancellationToken cancellationToken) + { + var ret = Task.FromResult(!_endOfStream); + _endOfStream = true; + return ret; + } + } +} diff --git a/src/Serval/test/Serval.WordAlignment.Tests/Usings.cs b/src/Serval/test/Serval.WordAlignment.Tests/Usings.cs new file mode 100644 index 00000000..b5d83bc1 --- /dev/null +++ b/src/Serval/test/Serval.WordAlignment.Tests/Usings.cs @@ -0,0 +1,16 @@ +global using System.Text; +global using Grpc.Core; +global using Grpc.Net.ClientFactory; +global using MassTransit; +global using Microsoft.Extensions.Logging; +global using Microsoft.Extensions.Options; +global using NSubstitute; +global using NUnit.Framework; +global using Serval.Shared.Configuration; +global using Serval.Shared.Contracts; +global using Serval.Shared.Services; +global using Serval.Shared.Utils; +global using Serval.WordAlignment.Models; +global using SIL.DataAccess; +global using SIL.Machine.Corpora; +global using SIL.Scripture; diff --git a/src/ServiceToolkit/src/SIL.ServiceToolkit/Models/MonolingualCorpus.cs b/src/ServiceToolkit/src/SIL.ServiceToolkit/Models/MonolingualCorpus.cs index c0323727..0bb1ef84 100644 --- a/src/ServiceToolkit/src/SIL.ServiceToolkit/Models/MonolingualCorpus.cs +++ b/src/ServiceToolkit/src/SIL.ServiceToolkit/Models/MonolingualCorpus.cs @@ -7,6 +7,6 @@ public record MonolingualCorpus public required IReadOnlyList Files { get; set; } public HashSet? TrainOnTextIds { get; set; } public Dictionary>? TrainOnChapters { get; set; } - public HashSet? PretranslateTextIds { get; set; } - public Dictionary>? PretranslateChapters { get; set; } + public HashSet? InferenceTextIds { get; set; } + public Dictionary>? InferenceChapters { get; set; } } diff --git a/src/ServiceToolkit/src/SIL.ServiceToolkit/Services/ParallelCorpusPreprocessingService.cs b/src/ServiceToolkit/src/SIL.ServiceToolkit/Services/ParallelCorpusPreprocessingService.cs index a5fb70ac..b0cb42b6 100644 --- a/src/ServiceToolkit/src/SIL.ServiceToolkit/Services/ParallelCorpusPreprocessingService.cs +++ b/src/ServiceToolkit/src/SIL.ServiceToolkit/Services/ParallelCorpusPreprocessingService.cs @@ -142,15 +142,15 @@ ITextCorpus tc private static ITextCorpus FilterPretranslateCorpora(MonolingualCorpus corpus, ITextCorpus textCorpus) { textCorpus = textCorpus.Transform(CleanSegment); - if (corpus.PretranslateTextIds is not null) + if (corpus.InferenceTextIds is not null) { - return textCorpus.FilterTexts(corpus.PretranslateTextIds); + return textCorpus.FilterTexts(corpus.InferenceTextIds); } - if (corpus.PretranslateChapters is not null) + if (corpus.InferenceChapters is not null) { return textCorpus - .FilterTexts(corpus.PretranslateChapters.Keys) - .Where(row => row.Ref is not ScriptureRef sr || IsInChapters(sr, corpus.PretranslateChapters)); + .FilterTexts(corpus.InferenceChapters.Keys) + .Where(row => row.Ref is not ScriptureRef sr || IsInChapters(sr, corpus.InferenceChapters)); } return textCorpus; }