From 732812cdfb3f7d2331f4a90ff9e78eedbbe4bbd8 Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Sat, 21 Sep 2024 23:51:32 +0800 Subject: [PATCH] class Jieba: support default dictpath --- .github/workflows/cmake.yml | 4 +-- include/cppjieba/Jieba.hpp | 51 +++++++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 018b4490..833882c7 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -48,5 +48,5 @@ jobs: working-directory: ${{github.workspace}}/build # Execute tests defined by the CMake configuration. # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail - run: ctest -C ${{env.BUILD_TYPE}} - + run: ctest -C ${{env.BUILD_TYPE}} --verbose + \ No newline at end of file diff --git a/include/cppjieba/Jieba.hpp b/include/cppjieba/Jieba.hpp index 8475404c..01fea361 100644 --- a/include/cppjieba/Jieba.hpp +++ b/include/cppjieba/Jieba.hpp @@ -8,19 +8,21 @@ namespace cppjieba { class Jieba { public: - Jieba(const string& dict_path, - const string& model_path, - const string& user_dict_path, - const string& idfPath, - const string& stopWordPath) - : dict_trie_(dict_path, user_dict_path), - model_(model_path), + Jieba(const string& dict_path = "", + const string& model_path = "", + const string& user_dict_path = "", + const string& idf_path = "", + const string& stop_word_path = "") + : dict_trie_(getPath(dict_path, "jieba.dict.utf8"), getPath(user_dict_path, "user.dict.utf8")), + model_(getPath(model_path, "hmm_model.utf8")), mp_seg_(&dict_trie_), hmm_seg_(&model_), mix_seg_(&dict_trie_, &model_), full_seg_(&dict_trie_), query_seg_(&dict_trie_, &model_), - extractor(&dict_trie_, &model_, idfPath, stopWordPath) { + extractor(&dict_trie_, &model_, + getPath(idf_path, "idf.utf8"), + getPath(stop_word_path, "stop_words.utf8")) { } ~Jieba() { } @@ -115,6 +117,39 @@ class Jieba { } private: + static string pathJoin(const string& dir, const string& filename) { + if (dir.empty()) { + return filename; + } + + char last_char = dir[dir.length() - 1]; + if (last_char == '/' || last_char == '\\') { + return dir + filename; + } else { + #ifdef _WIN32 + return dir + '\\' + filename; + #else + return dir + '/' + filename; + #endif + } + } + + static string getCurrentDirectory() { + string path(__FILE__); + size_t pos = path.find_last_of("/\\"); + return (pos == string::npos) ? "" : path.substr(0, pos); + } + + static string getPath(const string& path, const string& default_file) { + if (path.empty()) { + string current_dir = getCurrentDirectory(); + string parent_dir = current_dir.substr(0, current_dir.find_last_of("/\\")); + string grandparent_dir = parent_dir.substr(0, parent_dir.find_last_of("/\\")); + return pathJoin(pathJoin(grandparent_dir, "dict"), default_file); + } + return path; + } + DictTrie dict_trie_; HMMModel model_;