From 016fc1757526b916b0b70f1492af65b1e0dcda94 Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Sun, 8 Dec 2024 17:26:28 +0800 Subject: [PATCH] Improve error logging for UTF-8 decoding failures across cppjieba components. Updated error messages in DictTrie, PosTagger, PreFilter, and SegmentBase to provide clearer context on the specific input causing the failure. This change enhances the debugging experience when handling UTF-8 encoded strings. --- include/cppjieba/DictTrie.hpp | 2 +- include/cppjieba/PosTagger.hpp | 2 +- include/cppjieba/PreFilter.hpp | 2 +- include/cppjieba/SegmentBase.hpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/cppjieba/DictTrie.hpp b/include/cppjieba/DictTrie.hpp index 3478db4..97b4643 100644 --- a/include/cppjieba/DictTrie.hpp +++ b/include/cppjieba/DictTrie.hpp @@ -198,7 +198,7 @@ class DictTrie { double weight, const string& tag) { if (!DecodeUTF8RunesInString(word, node_info.word)) { - XLOG(ERROR) << "Decode " << word << " failed."; + XLOG(ERROR) << "UTF-8 decode failed for dict word: " << word; return false; } node_info.weight = weight; diff --git a/include/cppjieba/PosTagger.hpp b/include/cppjieba/PosTagger.hpp index a6810b2..1586330 100644 --- a/include/cppjieba/PosTagger.hpp +++ b/include/cppjieba/PosTagger.hpp @@ -35,7 +35,7 @@ class PosTagger { const DictTrie * dict = segment.GetDictTrie(); assert(dict != NULL); if (!DecodeUTF8RunesInString(str, runes)) { - XLOG(ERROR) << "Decode failed."; + XLOG(ERROR) << "UTF-8 decode failed for word: " << str; return POS_X; } tmp = dict->Find(runes.begin(), runes.end()); diff --git a/include/cppjieba/PreFilter.hpp b/include/cppjieba/PreFilter.hpp index e73b9ab..deb750b 100644 --- a/include/cppjieba/PreFilter.hpp +++ b/include/cppjieba/PreFilter.hpp @@ -18,7 +18,7 @@ class PreFilter { const string& sentence) : symbols_(symbols) { if (!DecodeUTF8RunesInString(sentence, sentence_)) { - XLOG(ERROR) << "decode failed. "; + XLOG(ERROR) << "UTF-8 decode failed for input sentence"; } cursor_ = sentence_.begin(); } diff --git a/include/cppjieba/SegmentBase.hpp b/include/cppjieba/SegmentBase.hpp index 2885b83..130b212 100644 --- a/include/cppjieba/SegmentBase.hpp +++ b/include/cppjieba/SegmentBase.hpp @@ -26,7 +26,7 @@ class SegmentBase { symbols_.clear(); RuneStrArray runes; if (!DecodeUTF8RunesInString(s, runes)) { - XLOG(ERROR) << "decode " << s << " failed"; + XLOG(ERROR) << "UTF-8 decode failed for separators: " << s; return false; } for (size_t i = 0; i < runes.size(); i++) {