Skip to content

Commit

Permalink
test: Add test for tokenizer call method
Browse files Browse the repository at this point in the history
  • Loading branch information
Paulooh007 committed Nov 2, 2023
1 parent d0f7c2f commit 3bc920a
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions laser_encoders/test_laser_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,19 @@ def test_tokenize(tokenizer, input_text: str):
assert tokenizer.tokenize(input_text) == expected_output


def test_tokenizer_call_method(tokenizer, input_text: str):
single_string = "This is a test sentence."
expected_output = "▁this ▁is ▁a ▁test ▁sent ence ."
assert tokenizer(single_string) == expected_output

list_of_strings = ["This is a test sentence.", "This is another test sentence."]
expected_output = [
"▁this ▁is ▁a ▁test ▁sent ence .",
"▁this ▁is ▁another ▁test ▁sent ence .",
]
assert tokenizer(list_of_strings) == expected_output


def test_normalization(tokenizer):
test_data = "Hello!!! How are you??? I'm doing great."
expected_output = "▁hel lo !!! ▁how ▁are ▁you ??? ▁i ' m ▁do ing ▁great ."
Expand Down

0 comments on commit 3bc920a

Please sign in to comment.