Skip to content

Commit

Permalink
Deduplicate kbts (#551)
Browse files Browse the repository at this point in the history
* Deduplicate kbts

* Switch to tuple
  • Loading branch information
Enkidu93 authored Dec 3, 2024
1 parent 28f3c44 commit aa7f31f
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ public async Task RunAsync_EnableKeyTerms()
Assert.That(src1Count, Is.EqualTo(14));
Assert.That(src2Count, Is.EqualTo(0));
Assert.That(trgCount, Is.EqualTo(1));
Assert.That(termCount, Is.EqualTo(166));
Assert.That(termCount, Is.EqualTo(144));
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,11 @@ public async Task PreprocessAsync(
IParallelTextCorpus parallelKeyTermsCorpus = sourceTermCorpora
.ChooseRandom(Seed)
.AlignRows(targetTermCorpora.ChooseFirst());
foreach (ParallelTextRow row in parallelKeyTermsCorpus)
foreach (
ParallelTextRow row in parallelKeyTermsCorpus.DistinctBy(row =>
(row.SourceText, row.TargetText)
)
)
{
await train(new Row(row.TextId, row.Refs, row.SourceText, row.TargetText, 1));
}
Expand Down

0 comments on commit aa7f31f

Please sign in to comment.