Skip to content

Commit

Permalink
test: added unittest with start_of_line=True and False
Browse files Browse the repository at this point in the history
  • Loading branch information
tomeras91 committed Jan 2, 2024
1 parent 13e0760 commit 7b7bef2
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions tests/test_jurassic_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,22 @@ def test_tokenizer__convert_tokens_to_ids(
actual_ids = tokenizer.convert_tokens_to_ids(tokens)

assert actual_ids == expected_ids


@pytest.mark.parametrize(
ids=[
"when_start_of_line__should_return_no_leading_whitespace",
"when_not_start_of_line__should_return_leading_whitespace",
],
argnames=["tokens", "start_of_line", "expected_text"],
argvalues=[
([30671], True, "hello"),
([30671], False, " hello"),
],
)
def test_tokenizer__decode_with_start_of_line(
tokens: List[int], start_of_line: bool, expected_text: str, tokenizer: JurassicTokenizer
):
actual_text = tokenizer.decode(tokens, start_of_line=start_of_line)

assert actual_text == expected_text

0 comments on commit 7b7bef2

Please sign in to comment.