Skip to content

Commit

Permalink
Explicitly test for tokens that are empty strings.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Nov 15, 2024
1 parent 482f6b9 commit 20dbb6a
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
4 changes: 4 additions & 0 deletions include/gesel/validate_database.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ inline void tokenize(uint64_t index, const std::string& text, std::unordered_map
inline void check_tokens(const std::vector<std::string>& tokens, const std::string& path) {
for (size_t t = 0, end = tokens.size(); t < end; ++t) {
const auto& token = tokens[t];
if (token.empty()) {
throw std::runtime_error("token should not be an empty string in '" + path + "' " + append_line_number(t));
}

for (auto x : token) {
if (invalid_token_character(x)) {
throw std::runtime_error("tokens should only contain lower-case alphabetical characters, digits or a dash in '" + path + "' " + append_line_number(t));
Expand Down
3 changes: 2 additions & 1 deletion tests/src/validate_database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
TEST(Tokenization, Generator) {
std::unordered_map<std::string, std::vector<uint64_t> > tokens_to_sets;
gesel::internal::tokenize(1, "aaron is awesome", tokens_to_sets);
gesel::internal::tokenize(2, "Aaron and Aaron", tokens_to_sets);
gesel::internal::tokenize(2, "Aaron and Aaron", tokens_to_sets); // throwing in some empty space to check that it doesn't get picked up.
gesel::internal::tokenize(5, "12345.4567890 is aaron", tokens_to_sets);

EXPECT_EQ(tokens_to_sets.size(), 6);
Expand All @@ -37,6 +37,7 @@ TEST(Tokenization, Checker) {
expect_error([&]() { gesel::internal::check_tokens(std::vector<std::string>{ "bravo", "alpha", "charlie" }, "foobar.tsv"); }, "sorted");
expect_error([&]() { gesel::internal::check_tokens(std::vector<std::string>{ "Alpha", "charlie" }, "foobar.tsv"); }, "alphabetical");
expect_error([&]() { gesel::internal::check_tokens(std::vector<std::string>{ "alpha bravo", "charlie" }, "foobar.tsv"); }, "alphabetical");
expect_error([&]() { gesel::internal::check_tokens(std::vector<std::string>{ "" }, "foobar.tsv"); }, "empty");
}

class TestValidateDatabase : public ::testing::Test {
Expand Down

0 comments on commit 20dbb6a

Please sign in to comment.