Add tokenizer to trainer #1301
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: CI Build | |
on: | |
push: | |
branches: | |
- "**" | |
tags: | |
- "v*" | |
jobs: | |
build: | |
name: Build on ${{ matrix.os }} | |
runs-on: ${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
os: [ubuntu-20.04, windows-latest] | |
include: | |
- os: ubuntu-20.04 | |
sentencepiece4c-dll: libsentencepiece4c.so | |
sentencepiece4c-artifact: sp4c-linux-x64 | |
cmake-args: "-G Ninja -DCMAKE_BUILD_TYPE=Release" | |
- os: windows-latest | |
sentencepiece4c-dll: sentencepiece4c.dll | |
sentencepiece4c-artifact: sp4c-win-x64 | |
cmake-args: "-A x64" | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: lukka/get-cmake@latest | |
- name: Setup .NET | |
uses: actions/setup-dotnet@v4 | |
with: | |
dotnet-version: | | |
8.0.x | |
- name: Restore dotnet tools | |
run: dotnet tool restore | |
- name: Install libgoogle-perftools-dev | |
run: | | |
sudo apt-get update | |
sudo apt-get install -y libunwind-dev | |
sudo apt-get install -y libgoogle-perftools-dev | |
if: matrix.os == 'ubuntu-20.04' | |
- name: sentencepiece4c build | |
run: | | |
cmake -S ${{ github.workspace }}/src/sentencepiece4c -B ${{ github.workspace }}/src/sentencepiece4c/build ${{ matrix.cmake-args }} | |
cmake --build ${{ github.workspace }}/src/sentencepiece4c/build --config Release --target sentencepiece4c | |
- name: Upload sentence4c artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ matrix.sentencepiece4c-artifact }} | |
path: ${{ github.workspace }}/src/sentencepiece4c/bin/${{ matrix.sentencepiece4c-dll }} | |
- name: Restore dependencies | |
run: dotnet restore | |
- name: Check formatting | |
run: dotnet csharpier --check . | |
- name: Build | |
run: dotnet build --no-restore -c Release | |
- name: Test | |
run: dotnet test --verbosity normal --collect:"Xplat Code Coverage" | |
- name: Upload coverage reports to Codecov | |
uses: codecov/codecov-action@v4 | |
env: | |
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} | |
create_package: | |
name: Create NuGet package | |
needs: build | |
runs-on: windows-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup .NET | |
uses: actions/setup-dotnet@v4 | |
with: | |
dotnet-version: | | |
8.0.x | |
- uses: actions/download-artifact@v4 | |
with: | |
name: sp4c-linux-x64 | |
path: ${{ github.workspace }}/src/sentencepiece4c/bin | |
- uses: actions/download-artifact@v4 | |
with: | |
name: sp4c-win-x64 | |
path: ${{ github.workspace }}/src/sentencepiece4c/bin | |
- name: Pack | |
run: | | |
dotnet pack src\SIL.Machine\SIL.Machine.csproj -c Release -o artifacts | |
dotnet pack src\SIL.Machine.Translation.Thot\SIL.Machine.Translation.Thot.csproj -c Release -o artifacts | |
dotnet pack src\SIL.Machine.Morphology.HermitCrab\SIL.Machine.Morphology.HermitCrab.csproj -c Release -o artifacts | |
dotnet pack src\SIL.Machine.Tool\SIL.Machine.Tool.csproj -c Release -o artifacts | |
dotnet pack src\SIL.Machine.Morphology.HermitCrab.Tool\SIL.Machine.Morphology.HermitCrab.Tool.csproj -c Release -o artifacts | |
dotnet pack src\SIL.Machine.Plugin\SIL.Machine.Plugin.csproj -c Release -o artifacts | |
dotnet pack src\SIL.Machine.Tokenization.SentencePiece\SIL.Machine.Tokenization.SentencePiece.csproj -c Release -o artifacts | |
dotnet pack src\SIL.Machine.Translation.TensorFlow\SIL.Machine.Translation.TensorFlow.csproj -c Release -o artifacts | |
- name: Upload package | |
uses: actions/upload-artifact@v4 | |
with: | |
name: nuget-package | |
path: artifacts/*.nupkg | |
- name: Push | |
if: startsWith(github.ref, 'refs/tags/v') | |
run: dotnet nuget push artifacts\*.nupkg -n -k ${{ secrets.NUGET_API_KEY }} -s https://api.nuget.org/v3/index.json |