From 3bc920af9df07688313d937046356ce4699ae233 Mon Sep 17 00:00:00 2001 From: paul Date: Thu, 2 Nov 2023 18:36:20 +0100 Subject: [PATCH] test: Add test for tokenizer call method --- laser_encoders/test_laser_tokenizer.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/laser_encoders/test_laser_tokenizer.py b/laser_encoders/test_laser_tokenizer.py index 1155f8d2..1350c108 100644 --- a/laser_encoders/test_laser_tokenizer.py +++ b/laser_encoders/test_laser_tokenizer.py @@ -65,6 +65,19 @@ def test_tokenize(tokenizer, input_text: str): assert tokenizer.tokenize(input_text) == expected_output +def test_tokenizer_call_method(tokenizer, input_text: str): + single_string = "This is a test sentence." + expected_output = "▁this ▁is ▁a ▁test ▁sent ence ." + assert tokenizer(single_string) == expected_output + + list_of_strings = ["This is a test sentence.", "This is another test sentence."] + expected_output = [ + "▁this ▁is ▁a ▁test ▁sent ence .", + "▁this ▁is ▁another ▁test ▁sent ence .", + ] + assert tokenizer(list_of_strings) == expected_output + + def test_normalization(tokenizer): test_data = "Hello!!! How are you??? I'm doing great." expected_output = "▁hel lo !!! ▁how ▁are ▁you ??? ▁i ' m ▁do ing ▁great ."