From dc6a8ba47910d9e64a3112419ec9698364ef48e3 Mon Sep 17 00:00:00 2001 From: Jiahao Li Date: Sun, 29 Oct 2023 21:42:06 +0800 Subject: [PATCH] Support ChatGLM3 (#158) --- .gitignore | 1 + README.md | 30 ++++++++-- chatglm.cpp | 116 ++++++++++++++++++++++++++++++------- chatglm.h | 98 ++++++++++++++++++++----------- chatglm_cpp/__init__.py | 2 +- chatglm_cpp/convert.py | 10 +++- chatglm_pybind.cpp | 9 ++- chatglm_test.cpp | 92 +++++++++++++++++++++++++++++ examples/cli_chat.py | 4 +- main.cpp | 4 +- tests/data/glm3_model.data | Bin 0 -> 30548 bytes tests/test_chatglm_cpp.py | 19 ++++++ tests/test_convert.py | 74 ++++++++++++++++++++++- 13 files changed, 392 insertions(+), 67 deletions(-) create mode 100644 tests/data/glm3_model.data diff --git a/.gitignore b/.gitignore index f3961fd..47826ac 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ __pycache__/ *.egg-info/ dist/ *.so +.hypothesis/ # cpp build/ diff --git a/README.md b/README.md index 8c0b4d3..e7bb168 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ![Python](https://img.shields.io/pypi/pyversions/chatglm-cpp) [![License: MIT](https://img.shields.io/badge/license-MIT-blue)](LICENSE) -C++ implementation of [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) and [ChatGLM2-6B](https://github.com/THUDM/ChatGLM2-6B) and more LLMs for real-time chatting on your MacBook. +C++ implementation of [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B), [ChatGLM2-6B](https://github.com/THUDM/ChatGLM2-6B), [ChatGLM3-6B](https://github.com/THUDM/ChatGLM3) and more LLMs for real-time chatting on your MacBook. ![demo](docs/demo.gif) @@ -21,7 +21,7 @@ Highlights: Support Matrix: * Hardwares: x86/arm CPU, NVIDIA GPU, Apple Silicon GPU * Platforms: Linux, MacOS, Windows -* Models: [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B), [ChatGLM2-6B](https://github.com/THUDM/ChatGLM2-6B), [CodeGeeX2](https://github.com/THUDM/CodeGeeX2), [Baichuan-13B](https://github.com/baichuan-inc/Baichuan-13B), [Baichuan-7B](https://github.com/baichuan-inc/Baichuan-7B), [Baichuan-13B](https://github.com/baichuan-inc/Baichuan-13B), [Baichuan2](https://github.com/baichuan-inc/Baichuan2), [InternLM](https://github.com/InternLM/InternLM) +* Models: [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B), [ChatGLM2-6B](https://github.com/THUDM/ChatGLM2-6B), [ChatGLM3-6B](https://github.com/THUDM/ChatGLM3), [CodeGeeX2](https://github.com/THUDM/CodeGeeX2), [Baichuan-13B](https://github.com/baichuan-inc/Baichuan-13B), [Baichuan-7B](https://github.com/baichuan-inc/Baichuan-7B), [Baichuan-13B](https://github.com/baichuan-inc/Baichuan-13B), [Baichuan2](https://github.com/baichuan-inc/Baichuan2), [InternLM](https://github.com/InternLM/InternLM) ## Getting Started @@ -45,7 +45,7 @@ python3 -m pip install -U pip python3 -m pip install torch tabulate tqdm transformers accelerate sentencepiece ``` -Use `convert.py` to transform ChatGLM-6B or ChatGLM2-6B into quantized GGML format. For example, to convert the fp16 original model to q4_0 (quantized int4) GGML model, run: +Use `convert.py` to transform ChatGLM-6B into quantized GGML format. For example, to convert the fp16 original model to q4_0 (quantized int4) GGML model, run: ```sh python3 chatglm_cpp/convert.py -i THUDM/chatglm-6b -t q4_0 -o chatglm-ggml.bin ``` @@ -53,6 +53,7 @@ python3 chatglm_cpp/convert.py -i THUDM/chatglm-6b -t q4_0 -o chatglm-ggml.bin The original model (`-i `) can be a HuggingFace model name or a local path to your pre-downloaded model. Currently supported models are: * ChatGLM-6B: `THUDM/chatglm-6b`, `THUDM/chatglm-6b-int8`, `THUDM/chatglm-6b-int4` * ChatGLM2-6B: `THUDM/chatglm2-6b`, `THUDM/chatglm2-6b-int4` +* ChatGLM3-6B: `THUDM/chatglm3-6b` * CodeGeeX2: `THUDM/codegeex2-6b`, `THUDM/codegeex2-6b-int4` * Baichuan & Baichuan2: `baichuan-inc/Baichuan-13B-Chat`, `baichuan-inc/Baichuan2-7B-Chat`, `baichuan-inc/Baichuan2-13B-Chat` @@ -101,6 +102,16 @@ python3 chatglm_cpp/convert.py -i THUDM/chatglm2-6b -t q4_0 -o chatglm2-ggml.bin ``` +
+ChatGLM3-6B + +```sh +python3 chatglm_cpp/convert.py -i THUDM/chatglm3-6b -t q4_0 -o chatglm3-ggml.bin +./build/bin/main -m chatglm3-ggml.bin -p 你好 --top_p 0.8 --temp 0.8 +# 你好👋!我是人工智能助手 ChatGLM3-6B,很高兴见到你,欢迎问我任何问题。 +``` +
+
CodeGeeX2 @@ -272,6 +283,15 @@ python3 web_demo.py -m ../chatglm2-ggml.bin --temp 0.8 --top_p 0.8 # web demo ```
+
+ChatGLM3-6B + +```sh +python3 cli_chat.py -m ../chatglm3-ggml.bin -p 你好 --temp 0.8 --top_p 0.8 # CLI demo +python3 web_demo.py -m ../chatglm3-ggml.bin --temp 0.8 --top_p 0.8 # web demo +``` +
+
CodeGeeX2 @@ -473,7 +493,7 @@ ChatGLM-6B: | file size | 3.3G | 3.7G | 4.0G | 4.4G | 6.2G | 12G | | mem usage | 4.0G | 4.4G | 4.7G | 5.1G | 6.9G | 13G | -ChatGLM2-6B / CodeGeeX2: +ChatGLM2-6B / ChatGLM3-6B / CodeGeeX2: | | Q4_0 | Q4_1 | Q5_0 | Q5_1 | Q8_0 | F16 | |--------------------------------|-------|-------|-------|-------|-------|-------| @@ -548,4 +568,4 @@ This will print timing for each graph operation when running the model. ## Acknowledgements * This project is greatly inspired by [@ggerganov](https://github.com/ggerganov)'s [llama.cpp](https://github.com/ggerganov/llama.cpp) and is based on his NN library [ggml](https://github.com/ggerganov/ggml). -* Thank [@THUDM](https://github.com/THUDM) for the amazing [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) and [ChatGLM2-6B](https://github.com/THUDM/ChatGLM2-6B) and for releasing the model sources and checkpoints. +* Thank [@THUDM](https://github.com/THUDM) for the amazing [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B), [ChatGLM2-6B](https://github.com/THUDM/ChatGLM2-6B) and [ChatGLM3-6B](https://github.com/THUDM/ChatGLM3) and for releasing the model sources and checkpoints. diff --git a/chatglm.cpp b/chatglm.cpp index a0a3c9c..0fdcdd2 100644 --- a/chatglm.cpp +++ b/chatglm.cpp @@ -422,6 +422,8 @@ std::string to_string(ModelType model_type) { return "ChatGLM"; case MODEL_TYPE_CHATGLM2: return "ChatGLM2"; + case MODEL_TYPE_CHATGLM3: + return "ChatGLM3"; case MODEL_TYPE_BAICHUAN7B: return "Baichuan7B"; case MODEL_TYPE_BAICHUAN13B: @@ -433,9 +435,8 @@ std::string to_string(ModelType model_type) { } } -BaseModelForCausalLM::BaseModelForCausalLM(ModelType model_type, ModelConfig config, size_t mem_size, - size_t scratch_size, size_t num_weights) - : model_type_(model_type), config(config) { +BaseModelForCausalLM::BaseModelForCausalLM(ModelConfig config, size_t mem_size, size_t scratch_size, size_t num_weights) + : config(config) { ctx_.dtype = config.dtype; const size_t ctx_w_size = num_weights * ggml_tensor_overhead(); const size_t ctx_kv_size = 2 * config.num_hidden_layers * @@ -821,7 +822,7 @@ ggml_tensor *GLMBlock::forward(ModelContext *ctx, ggml_tensor *hidden_states, gg } ChatGLMForCausalLM::ChatGLMForCausalLM(const ModelConfig &config) - : BasicModelForCausalLM(MODEL_TYPE_CHATGLM, config, MEM_SIZE, SCRATCH_SIZE, num_weights(config.num_hidden_layers)) { + : BasicModelForCausalLM(config, MEM_SIZE, SCRATCH_SIZE, num_weights(config.num_hidden_layers)) { state_dict_ = state_dict(); } @@ -933,8 +934,7 @@ bool ChatGLM2Tokenizer::is_special_id(int id) const { } ChatGLM2ForCausalLM::ChatGLM2ForCausalLM(const ModelConfig &config) - : BasicModelForCausalLM(MODEL_TYPE_CHATGLM2, config, MEM_SIZE, SCRATCH_SIZE, - num_weights(config.num_hidden_layers)) { + : BasicModelForCausalLM(config, MEM_SIZE, SCRATCH_SIZE, num_weights(config.num_hidden_layers)) { state_dict_ = state_dict(); } @@ -998,6 +998,79 @@ StateDict ChatGLM2ForCausalLM::state_dict() const { return sd; } +// ===== ChatGLM3-6B ===== + +ChatGLM3Tokenizer::ChatGLM3Tokenizer(std::string_view serialized_model_proto) { + const auto status = sp.LoadFromSerializedProto(serialized_model_proto); + CHATGLM_CHECK(status.ok()) << status.ToString(); + + int special_id = sp.GetPieceSize(); + mask_token_id = special_id++; + gmask_token_id = special_id++; + smask_token_id = special_id++; + sop_token_id = special_id++; + eop_token_id = special_id++; + system_token_id = special_id++; + user_token_id = special_id++; + assistant_token_id = special_id++; + observation_token_id = special_id++; +} + +std::vector ChatGLM3Tokenizer::encode(const std::string &text, int max_length) const { + std::vector ids; + sp.Encode(text, &ids); + ids.insert(ids.begin(), {gmask_token_id, sop_token_id}); // special prefix + truncate(ids, max_length); + return ids; +} + +std::string ChatGLM3Tokenizer::decode(const std::vector &ids) const { + // filter out special tokens + std::vector normal_ids(ids); + normal_ids.erase(std::remove_if(normal_ids.begin(), normal_ids.end(), [this](int id) { return is_special_id(id); }), + normal_ids.end()); + + std::string text; + sp.Decode(normal_ids, &text); + text = replace_punctuations(text); + return text; +} + +std::vector ChatGLM3Tokenizer::encode_history(const std::vector &history, int max_length) const { + // TODO: need a new api for system / tools / metadata prompt + std::vector newline_ids; + sp.Encode("\n", &newline_ids); + std::vector input_ids{gmask_token_id, sop_token_id}; + for (size_t i = 0; i < history.size(); i++) { + // TODO: support all roles + input_ids.emplace_back((i % 2 == 0) ? user_token_id : assistant_token_id); + // TODO: support metadata + input_ids.insert(input_ids.end(), newline_ids.begin(), newline_ids.end()); + std::vector content_ids; + sp.Encode(history[i], &content_ids); + input_ids.insert(input_ids.end(), content_ids.begin(), content_ids.end()); + } + input_ids.emplace_back(assistant_token_id); + // NOTE: push '\n' into input_ids to avoid model generating it, saving 2 tokens + input_ids.insert(input_ids.end(), newline_ids.begin(), newline_ids.end()); + truncate(input_ids, max_length); + return input_ids; +} + +bool ChatGLM3Tokenizer::is_special_id(int id) const { + return id == mask_token_id || id == gmask_token_id || id == smask_token_id || id == sop_token_id || + id == eop_token_id || id == system_token_id || id == user_token_id || id == assistant_token_id || + id == observation_token_id; +} + +void ChatGLM3Tokenizer::truncate(std::vector &ids, int max_length) { + if ((int)ids.size() > max_length) { + // sliding window: drop the least recent history while keeping the two special prefix tokens + int num_drop = (int)ids.size() - max_length; + ids.erase(ids.begin() + 2, ids.begin() + 2 + num_drop); + } +} + // ===== Baichuan ===== BaichuanTokenizer::BaichuanTokenizer(std::string_view serialized_model_proto) { @@ -1055,8 +1128,7 @@ void BaichuanTokenizer::truncate(std::vector &ids, int max_length) { // ===== Baichuan-7B ===== Baichuan7BForCausalLM::Baichuan7BForCausalLM(const ModelConfig &config) - : BasicModelForCausalLM(MODEL_TYPE_BAICHUAN7B, config, MEM_SIZE, SCRATCH_SIZE, - num_weights(config.num_hidden_layers)) { + : BasicModelForCausalLM(config, MEM_SIZE, SCRATCH_SIZE, num_weights(config.num_hidden_layers)) { state_dict_ = state_dict(); } @@ -1097,8 +1169,7 @@ StateDict Baichuan7BForCausalLM::state_dict() const { // ===== Baichuan-13B ===== Baichuan13BForCausalLM::Baichuan13BForCausalLM(const ModelConfig &config) - : BasicModelForCausalLM(MODEL_TYPE_BAICHUAN13B, config, MEM_SIZE, SCRATCH_SIZE, - num_weights(config.num_hidden_layers)) { + : BasicModelForCausalLM(config, MEM_SIZE, SCRATCH_SIZE, num_weights(config.num_hidden_layers)) { state_dict_ = state_dict(); } @@ -1192,8 +1263,7 @@ std::string InternLMTokenizer::build_prompt(const std::vector &hist template InternLMForCausalLM::InternLMForCausalLM(const ModelConfig &config) - : BasicModelForCausalLM(MODEL_TYPE_INTERNLM, config, MEM_SIZE, SCRATCH_SIZE, - num_weights(config.num_hidden_layers)) { + : BasicModelForCausalLM(config, MEM_SIZE, SCRATCH_SIZE, num_weights(config.num_hidden_layers)) { this->state_dict_ = state_dict(); } @@ -1258,7 +1328,7 @@ Pipeline::Pipeline(const std::string &path) { CHATGLM_CHECK(version == 1) << "only support version 1 for now but got " << version; // load config - ModelConfig config(loader.read_basic()); + ModelConfig config(model_type, loader.read_basic()); // load tokenizer int proto_size = loader.read_basic(); @@ -1269,26 +1339,32 @@ Pipeline::Pipeline(const std::string &path) { // load model model = std::make_unique(config); model->load(loader); - } else if (model_type == MODEL_TYPE_CHATGLM2) { + } else if (model_type == MODEL_TYPE_CHATGLM2 || model_type == MODEL_TYPE_CHATGLM3) { CHATGLM_CHECK(version == 1) << "only support version 1 for now but got " << version; // load config - ModelConfig config(loader.read_basic()); + ModelConfig config(model_type, loader.read_basic()); // load tokenizer int proto_size = loader.read_basic(); std::string_view serialized_model_proto((char *)mapped_file->data + loader.tell(), proto_size); loader.seek(proto_size, SEEK_CUR); - tokenizer = std::make_unique(serialized_model_proto); + + if (model_type == MODEL_TYPE_CHATGLM2) { + tokenizer = std::make_unique(serialized_model_proto); + model = std::make_unique(config); + } else { + tokenizer = std::make_unique(serialized_model_proto); + model = std::make_unique(config); + } // load model - model = std::make_unique(config); model->load(loader); } else if (model_type == MODEL_TYPE_BAICHUAN7B) { CHATGLM_CHECK(version == 1) << "only support version 1 for now but got " << version; // load config - ModelConfig config(loader.read_basic()); + ModelConfig config(model_type, loader.read_basic()); config.norm_eps = 1e-6; // load tokenizer @@ -1304,7 +1380,7 @@ Pipeline::Pipeline(const std::string &path) { CHATGLM_CHECK(version == 1) << "only support version 1 for now but got " << version; // load config - ModelConfig config(loader.read_basic()); + ModelConfig config(model_type, loader.read_basic()); config.norm_eps = 1e-6; // load tokenizer @@ -1320,7 +1396,7 @@ Pipeline::Pipeline(const std::string &path) { CHATGLM_CHECK(version == 1) << "only support version 1 for now but got " << version; // load config - ModelConfig config(loader.read_basic()); + ModelConfig config(model_type, loader.read_basic()); config.norm_eps = 1e-6; // load tokenizer diff --git a/chatglm.h b/chatglm.h index 59ba9df..cf3562b 100644 --- a/chatglm.h +++ b/chatglm.h @@ -46,6 +46,17 @@ ggml_tensor *tensor_to_device(ggml_tensor *tensor); ggml_tensor *tensor_to_cpu(ggml_tensor *tensor); +enum ModelType { + MODEL_TYPE_CHATGLM = 1, + MODEL_TYPE_CHATGLM2 = 2, + MODEL_TYPE_CHATGLM3 = 3, + MODEL_TYPE_BAICHUAN7B = 1024, + MODEL_TYPE_BAICHUAN13B = 1025, + MODEL_TYPE_INTERNLM = 1280, +}; + +std::string to_string(ModelType model_type); + // For compatibility struct ConfigRecordV1 { // common attributes @@ -74,25 +85,28 @@ class ModelConfig { public: ModelConfig() = default; - ModelConfig(ggml_type dtype, int vocab_size, int hidden_size, int num_attention_heads, int num_kv_heads, - int num_hidden_layers, int intermediate_size, float norm_eps, int max_length, int bos_token_id, - int eos_token_id, int pad_token_id, int sep_token_id) - : dtype(dtype), vocab_size(vocab_size), hidden_size(hidden_size), num_attention_heads(num_attention_heads), - num_kv_heads(num_kv_heads), num_hidden_layers(num_hidden_layers), intermediate_size(intermediate_size), - norm_eps(norm_eps), max_length(max_length), bos_token_id(bos_token_id), eos_token_id(eos_token_id), - pad_token_id(pad_token_id), sep_token_id(sep_token_id) {} - - ModelConfig(const ConfigRecordV1 &rec) - : ModelConfig(rec.dtype, rec.vocab_size, rec.hidden_size, rec.num_attention_heads, rec.num_attention_heads, + ModelConfig(ModelType model_type, ggml_type dtype, int vocab_size, int hidden_size, int num_attention_heads, + int num_kv_heads, int num_hidden_layers, int intermediate_size, float norm_eps, int max_length, + int bos_token_id, int eos_token_id, int pad_token_id, int sep_token_id) + : model_type(model_type), dtype(dtype), vocab_size(vocab_size), hidden_size(hidden_size), + num_attention_heads(num_attention_heads), num_kv_heads(num_kv_heads), num_hidden_layers(num_hidden_layers), + intermediate_size(intermediate_size), norm_eps(norm_eps), max_length(max_length), bos_token_id(bos_token_id), + eos_token_id(eos_token_id), pad_token_id(pad_token_id), sep_token_id(sep_token_id) {} + + ModelConfig(ModelType model_type, const ConfigRecordV1 &rec) + : ModelConfig(model_type, rec.dtype, rec.vocab_size, rec.hidden_size, rec.num_attention_heads, + rec.num_attention_heads, rec.num_hidden_layers, rec.intermediate_size, 1e-5, rec.max_length, + rec.bos_token_id, rec.eos_token_id, rec.pad_token_id, rec.sep_token_id) {} + + ModelConfig(ModelType model_type, const ConfigRecordV2 &rec) + : ModelConfig(model_type, rec.dtype, rec.vocab_size, rec.hidden_size, rec.num_attention_heads, rec.num_kv_heads, rec.num_hidden_layers, rec.intermediate_size, 1e-5, rec.max_length, rec.bos_token_id, rec.eos_token_id, rec.pad_token_id, rec.sep_token_id) {} - ModelConfig(const ConfigRecordV2 &rec) - : ModelConfig(rec.dtype, rec.vocab_size, rec.hidden_size, rec.num_attention_heads, rec.num_kv_heads, - rec.num_hidden_layers, rec.intermediate_size, 1e-5, rec.max_length, rec.bos_token_id, - rec.eos_token_id, rec.pad_token_id, rec.sep_token_id) {} + std::string model_type_name() const { return to_string(model_type); } public: + ModelType model_type; ggml_type dtype; int vocab_size; int hidden_size; @@ -734,19 +748,9 @@ struct GenerationConfig { top_p(top_p), temperature(temperature), repetition_penalty(repetition_penalty), num_threads(num_threads) {} }; -enum ModelType { - MODEL_TYPE_CHATGLM = 1, - MODEL_TYPE_CHATGLM2 = 2, - MODEL_TYPE_BAICHUAN7B = 1024, - MODEL_TYPE_BAICHUAN13B = 1025, - MODEL_TYPE_INTERNLM = 1280, -}; - int get_num_physical_cores(); int get_default_num_threads(); -std::string to_string(ModelType model_type); - struct TokenIdScore { int id; float score; @@ -764,16 +768,12 @@ struct TokenIdScore { class BaseModelForCausalLM { public: - BaseModelForCausalLM(ModelType model_type, ModelConfig config, size_t mem_size, size_t scratch_size, - size_t num_weights); + BaseModelForCausalLM(ModelConfig config, size_t mem_size, size_t scratch_size, size_t num_weights); virtual ~BaseModelForCausalLM() = default; virtual void load(ModelLoader &loader) = 0; virtual ggml_tensor *forward(ModelContext *ctx, ggml_tensor *input_ids, int n_past, int n_ctx) const = 0; - ModelType type() const { return model_type_; } - std::string type_name() const { return to_string(model_type_); } - std::vector generate(const std::vector &input_ids, const GenerationConfig &gen_config, BaseStreamer *streamer = nullptr); @@ -791,7 +791,6 @@ class BaseModelForCausalLM { static void sampling_softmax_inplace(TokenIdScore *first, TokenIdScore *last); protected: - ModelType model_type_; ModelContext ctx_; public: @@ -803,9 +802,8 @@ using StateDict = std::vector>; template class BasicModelForCausalLM : public BaseModelForCausalLM { protected: - BasicModelForCausalLM(ModelType model_type, const ModelConfig &config, size_t mem_size, size_t scratch_size, - size_t num_weights) - : BaseModelForCausalLM(model_type, config, mem_size, scratch_size, num_weights), transformer(&ctx_, config), + BasicModelForCausalLM(const ModelConfig &config, size_t mem_size, size_t scratch_size, size_t num_weights) + : BaseModelForCausalLM(config, mem_size, scratch_size, num_weights), transformer(&ctx_, config), lm_head(&ctx_, config.hidden_size, config.vocab_size, false) { CHATGLM_CHECK(ggml_used_mem(ctx_.ctx_w.get()) == ggml_get_mem_size(ctx_.ctx_w.get())) << "corrupted model weights"; @@ -983,6 +981,40 @@ class ChatGLM2ForCausalLM : public BasicModelForCausalLM { static constexpr size_t SCRATCH_SIZE = 1280 * MB; // 2k context }; +// ===== ChatGLM3-6B ===== + +class ChatGLM3Tokenizer : public BaseTokenizer { + public: + ChatGLM3Tokenizer(std::string_view serialized_model_proto); + + std::vector encode(const std::string &text, int max_length) const override; + + std::string decode(const std::vector &ids) const override; + + std::vector encode_history(const std::vector &history, int max_length) const override; + + bool is_special_id(int id) const; + + protected: + static void truncate(std::vector &ids, int max_length); + + public: + sentencepiece::SentencePieceProcessor sp; + int mask_token_id; + int gmask_token_id; + int smask_token_id; + int sop_token_id; + int eop_token_id; + int system_token_id; + int user_token_id; + int assistant_token_id; + int observation_token_id; +}; + +using ChatGLM3Model = ChatGLM2Model; + +using ChatGLM3ForCausalLM = ChatGLM2ForCausalLM; + // ===== Baichuan ===== class BaichuanTokenizer : public BaseTokenizer { diff --git a/chatglm_cpp/__init__.py b/chatglm_cpp/__init__.py index f0f7696..a1dc183 100644 --- a/chatglm_cpp/__init__.py +++ b/chatglm_cpp/__init__.py @@ -5,7 +5,7 @@ import chatglm_cpp._C as _C -__version__ = "0.2.9" +__version__ = "0.2.10" class Pipeline(_C.Pipeline): diff --git a/chatglm_cpp/convert.py b/chatglm_cpp/convert.py index d06d3e9..dd5bea2 100644 --- a/chatglm_cpp/convert.py +++ b/chatglm_cpp/convert.py @@ -41,6 +41,7 @@ class GGMLType(Enum): class ModelType(Enum): CHATGLM = 1 CHATGLM2 = 2 + CHATGLM3 = 3 BAICHUAN7B = 1024 BAICHUAN13B = 1025 INTERNLM = 1280 @@ -324,6 +325,10 @@ def dump_model(f, model, ggml_type): dump_state_dict(f, weight_names, model.state_dict(), model.config.quantization_bit, ggml_type) +class ChatGLM3Converter(ChatGLM2Converter): + MODEL_TYPE = ModelType.CHATGLM3 + + class BaichuanConverter(BaseConverter): @staticmethod def dump_config(f, config, ggml_type): @@ -481,7 +486,10 @@ def convert(f: BinaryIO, model_name_or_path: str, lora_model_name_or_path: Optio if model.config.model_type == "chatglm": if hasattr(model.config, "multi_query_attention"): - ChatGLM2Converter.convert(f, model, tokenizer, ggml_type) + if model.config.seq_length == 32768: + ChatGLM2Converter.convert(f, model, tokenizer, ggml_type) + else: + ChatGLM3Converter.convert(f, model, tokenizer, ggml_type) else: ChatGLMConverter.convert(f, model, tokenizer, ggml_type) elif model.config.model_type == "baichuan": diff --git a/chatglm_pybind.cpp b/chatglm_pybind.cpp index 8d56ccd..2fcd7c7 100644 --- a/chatglm_pybind.cpp +++ b/chatglm_pybind.cpp @@ -36,6 +36,7 @@ PYBIND11_MODULE(_C, m) { m.doc() = "ChatGLM.cpp python binding"; py::class_(m, "ModelConfig") + .def_readonly("model_type", &ModelConfig::model_type) .def_readonly("dtype", &ModelConfig::dtype) .def_readonly("vocab_size", &ModelConfig::vocab_size) .def_readonly("hidden_size", &ModelConfig::hidden_size) @@ -48,7 +49,8 @@ PYBIND11_MODULE(_C, m) { .def_readonly("bos_token_id", &ModelConfig::bos_token_id) .def_readonly("eos_token_id", &ModelConfig::eos_token_id) .def_readonly("pad_token_id", &ModelConfig::pad_token_id) - .def_readonly("sep_token_id", &ModelConfig::sep_token_id); + .def_readonly("sep_token_id", &ModelConfig::sep_token_id) + .def_property_readonly("model_type_name", &ModelConfig::model_type_name); py::class_(m, "BaseTokenizer") .def("encode", &BaseTokenizer::encode) @@ -56,7 +58,6 @@ PYBIND11_MODULE(_C, m) { .def("encode_history", &BaseTokenizer::encode_history); py::class_(m, "BaseModelForCausalLM") - .def_property_readonly("type_name", &BaseModelForCausalLM::type_name) .def("generate_next_token", &BaseModelForCausalLM::generate_next_token) .def_readonly("config", &BaseModelForCausalLM::config); @@ -85,6 +86,10 @@ PYBIND11_MODULE(_C, m) { py::class_(m, "ChatGLM2ForCausalLM"); + // ===== ChatGLM3 ===== + + py::class_(m, "ChatGLM3Tokenizer"); + // ===== Baichuan7B/13B ===== py::class_(m, "BaichuanTokenizer"); diff --git a/chatglm_test.cpp b/chatglm_test.cpp index cc54139..6df3028 100644 --- a/chatglm_test.cpp +++ b/chatglm_test.cpp @@ -751,6 +751,40 @@ TEST_F(ChatGLMTest, GLM2Model) { // } // } +TEST_F(ChatGLMTest, GLM3Model) { + fs::path data_path = fs::path(__FILE__).parent_path() / "tests/data/glm3_model.data"; + + ModelConfig config; + config.vocab_size = 5; + config.hidden_size = 32; + config.num_attention_heads = 8; + config.num_kv_heads = 2; + config.num_hidden_layers = 1; + config.intermediate_size = 48; + config.norm_eps = 1e-5; + config.max_length = 8; + + constexpr int seq_len = 3; + + ChatGLM3Model model(&ctx, config); + + tensor_to_device(model.layers[0].attention.k_cache); + tensor_to_device(model.layers[0].attention.v_cache); + + std::vector all_weights{model.word_embeddings.weight, + model.layers[0].input_layernorm.weight, + model.layers[0].attention.query_key_value.weight, + model.layers[0].attention.query_key_value.bias, + model.layers[0].attention.dense.weight, + model.layers[0].post_attention_layernorm.weight, + model.layers[0].mlp.gate_proj.weight, + model.layers[0].mlp.up_proj.weight, + model.layers[0].mlp.down_proj.weight, + model.final_layernorm.weight}; + + test_model(model, config, data_path, seq_len, all_weights); +} + TEST_F(ChatGLMTest, Baichuan7BModel) { fs::path data_path = fs::path(__FILE__).parent_path() / "tests/data/baichuan7b_model.data"; @@ -1082,6 +1116,64 @@ TEST(Pipeline, ChatGLM2) { } } +TEST(Pipeline, ChatGLM3) { + fs::path model_path = fs::path(__FILE__).parent_path() / "chatglm3-ggml.bin"; + if (!fs::exists(model_path)) { + GTEST_SKIP() << "Skipping ChatGLM3 e2e test (ggml model not found)"; + } + Pipeline pipeline(model_path.string()); + EXPECT_TRUE(dynamic_cast(pipeline.model.get())); + + // tokenizer + { + std::vector cases{{"你好", {64790, 64792, 36474, 54591}}}; + check_tokenizer(pipeline.tokenizer.get(), cases); + + { + std::vector history{"你好"}; + std::vector input_ids = pipeline.tokenizer->encode_history(history, 2048); + std::vector target_ids{64790, 64792, 64795, 30910, 13, 36474, 54591, 64796, 30910, 13}; + EXPECT_EQ(input_ids, target_ids); + } + { + std::vector history{"你好", + "你好👋!我是人工智能助手 ChatGLM3-6B,很高兴见到你,欢迎问我任何问题。", + "晚上睡不着应该怎么办"}; + std::vector input_ids = pipeline.tokenizer->encode_history(history, 2048); + std::vector target_ids{64790, 64792, 64795, 30910, 13, 36474, 54591, 64796, 30910, 13, + 36474, 54591, 243, 162, 148, 142, 31404, 33030, 34797, 42481, + 22011, 10461, 30944, 30966, 30941, 30978, 30949, 31123, 48895, 35214, + 54622, 31123, 32616, 39905, 31901, 31639, 31155, 64795, 30910, 13, + 30910, 32820, 54266, 31876, 35153, 64796, 30910, 13}; + EXPECT_EQ(input_ids, target_ids); + } + } + + // memory test + { + GenerationConfig gen_config; + gen_config.max_length = 2048; + gen_config.max_context_length = gen_config.max_length - 1; + gen_config.do_sample = false; + + std::ostringstream oss; + for (int i = 0; i < gen_config.max_context_length; i++) { + oss << "你好"; + } + std::vector history{oss.str()}; + pipeline.chat(history, gen_config); + } + + // chat + { + GenerationConfig gen_config; + gen_config.do_sample = false; + std::vector history{"你好"}; + std::string output = pipeline.chat(history, gen_config); + EXPECT_EQ(output, "你好👋!我是人工智能助手 ChatGLM3-6B,很高兴见到你,欢迎问我任何问题。"); + } +} + TEST(Pipeline, CodeGeeX2) { fs::path model_path = fs::path(__FILE__).parent_path() / "codegeex2-ggml.bin"; if (!fs::exists(model_path)) { diff --git a/examples/cli_chat.py b/examples/cli_chat.py index 0fcf8ee..c5cda13 100644 --- a/examples/cli_chat.py +++ b/examples/cli_chat.py @@ -70,7 +70,7 @@ def main(): history = [] while True: try: - prompt = input(f"{'Prompt':{len(pipeline.model.type_name)}} > ") + prompt = input(f"{'Prompt':{len(pipeline.model.config.model_type_name)}} > ") except EOFError: break if not prompt: @@ -81,7 +81,7 @@ def main(): history = [] continue history.append(prompt) - print(f"{pipeline.model.type_name} > ", sep="", end="") + print(f"{pipeline.model.config.model_type_name} > ", sep="", end="") output = "" for piece in pipeline.chat(history, **generation_kwargs): print(piece, sep="", end="", flush=True) diff --git a/main.cpp b/main.cpp index 2e9a1d9..cf305a7 100644 --- a/main.cpp +++ b/main.cpp @@ -139,7 +139,7 @@ static void chat(Args &args) { chatglm::Pipeline pipeline(args.model_path); int64_t end_load_us = ggml_time_us(); - std::string model_name = pipeline.model->type_name(); + std::string model_name = pipeline.model->config.model_type_name(); auto text_streamer = std::make_shared(std::cout, pipeline.tokenizer.get()); auto perf_streamer = std::make_shared(); @@ -174,7 +174,7 @@ static void chat(Args &args) { << "temperature = " << args.temp << " | " << "num_threads = " << args.num_threads << " |\n"; - std::cout << "loaded " << pipeline.model->type_name() << " model from " << args.model_path + std::cout << "loaded " << pipeline.model->config.model_type_name() << " model from " << args.model_path << " within: " << (end_load_us - start_load_us) / 1000.f << " ms\n"; std::cout << std::endl; diff --git a/tests/data/glm3_model.data b/tests/data/glm3_model.data new file mode 100644 index 0000000000000000000000000000000000000000..d5dc2c65eee8580cceb7580c430b7c9c6d1796d0 GIT binary patch literal 30548 zcmW)nfnSX2_x=YhZQBw;Z0w*-O9-t`YVKnYn}m>rFiQv_ZEOgmq9hp!Nivcom60Sh z_hlp*l_W`$43Z>Ck|gzge!o9pUNdt)&wb8y-q&?RE?cvgyBwhYS4$vYqek=i>8PLe zGfC$TQx~=6llRTPxYN!al$<>X-VXP8=8@h6~7 z8AO{7gpqjiUM}^Bk%;U+Fww(Ua(M8Cj{kiJ^=Iu6WiykCe=DWhJ5@C7)l2frFJP@P zRZy&vu;dX+ghe)hd|y*zLJoB->x6#NO{SG6^UQ!mZn|PU!m91b`s@i*Ua_P8+xC+C zwCm)sWimOg_yJMx1lT0(A(I7V?D*BYsP#OGrqV`o*^^8vT7l9^5&WKihMfAU{j?gy z{dWgq)l0SV=1JC^G7nL2Pms;MQ}CQwOU0WnlgG;^2zl1Q?z%N}lr8R~2kqUIL`nC2G;s>(m%B zpUab1Ni-=|+=xA3vb;FT+TBFmkH%51;b)Q!Hs)(u-$0t>gQB^qB--GTUFp9YT}S6a z6!{VLM&@tsgBC|?Lw(D!4nP|Y{hQ1`5C(f8S>MhxLO`_BuZ{gc|mKqm$aqq((FwQ6> zmls7);?!4p)7kw>(Q${G6Pc0z#KQB}A(R9dC zGbD;_=?JM0gO%kmSbrNsxrd@4${Md$&P|r+bw5vSfwy3P^D-m}ktiE;l?s2cfv({R zDa|I4G-)prefT$9e`7wE?vOL9NAI|Z)?{lg1h6R5V#nzHD8F7r`)@sfxL04Up&w+_ zY(tvHwG#7+=dg_1Mfz?TNObLuLhUHn#Gl~uiC688n?EA?r9DjS^oFcX#!z1QS-8Zl zCbO^Ui0O2L^hAGZ?D;1(_0*6>i5Wzt^K5q%1k#eDUoo}c4YIoO2%_Sbb~=+(Zcr-x zhDE`wN%e?ollsB6{RD!RGSp=Mfa&11B!06M#brrYx$`LITOB2{h+uN|&!zH=NtD|@ z5~Y26A!_OXs`4%&-@kq#Q>hQyQ@kMUa>&`ix@UH}d##+l3lkUf8G{z#?@r}&!3cE*GVRKgUHS73|Wq!faXhEN$j$Lg@iS8$GHl)oIcNY$27;R7esHuocOewM`ONeyb}l+var2S~oHgN`rU zOAc*`aJcZ8#KS8k@d30bI zasxuKKJXR!4ymIg@ds=QPp8&da%v4eL1r1{l+bzr?kPh_FL)l+Iu1bMqETcyw=ZNf zVkx9&PZ+--NZ*LaGx{$wGT#9!gTI)sdn7gH&fe;59_$f#1GokFc!3@ghKkJKr|i_@dF8}Cw?dMS4}8c{{>c4+o|>IS%??p zGU+WV?w9d|J6x;glH@qjW)fb^BTTOvuZxvjOHe6 zie&zAo1vO9oLp$jmi*byD)z)R);d&s`A6P-AGkuXIiYJSH0bKp(zsRbug8!>J zSafTnsFj^ue4dT~oCKA@l^4qdN!KE(e7Ozk7!S<}QMvl_{~-+iPu zM3W>d2(>z8Ka%{E0jJ>w!kK-O6n@Po zhu21se)pyw?a#=s;3CD%+z**t#$`i>LiFrDL`t_w$IobJ(B&uOZ9JJZM%1#}3(K&C z-6BiFg~%>^!6e6-*H9p z)qaq!{08y!Qi*)fH706&enY(I52|_FM9o9>seGdyq#5(!lz$HS+pa)b7tf`p*SOxD zRU~@41fl&3$@%P6i0rd$V}Gxq?urfyag2ob+6?Agw}rX<>W%bjE67p|$hTV>l+B6E zBzzeuZqGzbd?_jKcIW!DJt;Hj2Px#HC^S7tUOs(b5qXT4A4uV%qqDLr*H3}CTO-rB zMoXe+#F5*y2VAvcHVfKHSiY(ZVODd=vqvDaHk=I;)6o#u{6t0;H~4tR3#9B?D@k87 zg(CZ|Am=IHxpe4p~L5SaY7f@%w^Ab0I1Dc`mM zEngEU(yak8HmAsc#t)K?f4~&~>hmQX->Lro5*Q6On}k;}=iCtH?Rkrb zf9ZrQ`=La0(MN4!7|q%&aw+CNzuE^-~;=jc=S%lW>E8WH+bDV1yy%PSUNq#?(>Cc7;};=Oan;1CWIwFISB2d zktFYyDJg8uK<1K4<~i^kDer%WzoG+H69QSEJPozqIR)j0m#n4%zn+%mLG&@@DXPCR|0Te16kf^ z?%Od6X*Z6M_^1_AmE0vo%}u5{X9o4^6p}xBA(4*Pu*^$>mLLD#R{A=ZD+Z6@mI-$u z+BY*>yQ>t^S(V&uStRY~nuf&iWO)AE#+{ZLLbW)Bi9|`v_~BmKu|x%Buo2G~eVFn# zJtuFEe%x0a38yiA$^PCEiurg3rs0u@n!S=VyL<7Z{7*1Gbrhnwx#XQV8Wm|ODv{-r zQA;Fi{4|so?;8g3kOykjsw{?sx8ac8%xfDRkF8{t z7{vkGM5F2wHT@o@gto3H%k6IxXc6jh=ozlUF#bg3hEsn|tEd*WcW zsg33w>OhU%7Z`o}#4}AFlBVZu7Jt^BTobZkl4(ntryg8mGnQ~`P=#86%{G|!xHFq|)wa1dA`z+}1YTzQbF%ofo9Yj&*vPGNbaB=(pxaw*lH+giA zvg*|&-UT>?;9@`Oy~F4Vm+3o4WIOf*-W9kOgbjK@bnQ}ZV#-n&O}f!8t2Jp(o^gP{rN zDRE!%i|}4=m1yNHJa^J5Sn=!J@3%SdDlMYr{qE=H7DHs7}UF6lgUlu;?v z&Ht{b*H1f6)~bIfU-gircB|AQyClPC$wiV!j78j`ASz$n&Ku7!CatuFy8hBY-P>xB z=Xw$KR~A4fJ%_@xUxazmnyZR0BJuGmF!0c^2BnkoI8M7MK9&^-^fYzC0UH?V#rCy!avC~bWnHSceSYRhSAIQWoOS~kFWhAW#N zT0vD-S&(*)APrkd$yVK|GNgk-Pd%jixMx&6V-K&oRz{^q8eslMC=$Cp7K!U78YUI<<2AufGsB{7HyBqyr}G-H`i%m4X@ zBwaROWl09yeiC@mm;y;3$5wKX=WzMvWOb~57Gzd~N$nIz?H=t|G9?||&v5eeX=2Ld zooY|R>rAIQ#g~6fL*c}a2wRazM#ui;e&&Sa5qcaBd&1Kg%mrJ(N~!C;2$hz;ZRcQ zdvF_V8(XvR2U(v#51sOb+Iq`0*q+hRrazvMX4MLH>GpBhVHu0)(bq|)|0~}#=o6{h zk5Efr9kp&3>a}|UjGp%+%Q-Hn@sX2qmfk~cYW|{(jxg9*mNJvO?|6*ERq9{$f+7}Gk!(t)MBH+mLhg3LVAOHgbi2+(Hz(SK zwrSCRdL=T4ZpQJrAJEL3#^iyila%?YOsjfXRU*1#YXP`M1VqT8=)L@ zky|&H3wP!iDM~^p`r$`%YZwKYW3fbWLMxnqCylvKM}E74kW=~;rCBu)9o~j~_JsVK zM^UWkD-TT4Q=mVmzb1tv`Nh1EvvyqEUl?$>vdXy^!tjrL>M>k@L_7)+k6DO@@Ap+p?*#*5V-nC{3CiMTErA(@=X zr@Bgv*6Fe4!P5{kwi|8g_5i+y18M(_P*SY=hs!GSAd)m>iHvM*_e-9`srNN#we@7U zMesm$hbYeN3RMoAPW62hRCnkil~4T09V`R*#Lx^96-#b-98Ex|JP*m1hY5zIWK^6- znsrIcHbX^PgZG%~oe8x|3w);(kz5*?t)H_2_LE1GB5^W?jg5zN$8?hJ&Ss$tuA@(E zB2>M+sb^viX)?{3=9nDLcK?yvi8QwUcO7QboQHI%j)^>DN#$>b^dtX~!$G00d}}7h zbs6M$|0ZkJuA-PJ<0;1I1VmNg*_pxmq#XK|`|SEcW^b5tH|k@hGd7{ zGT%;u_Udxjst!}s56=ALPe}JbN^!PDv}0xwY>VzwB-?=IUjiCQI_^QMGKPbX8P?K85C45dczKe=z}Ph|LMA$*+#&NxO+rYo`` zUh1D zclsjja|qmer}M&uaacJ(_|5NkA?o01R=Z6BN#XC12j8Rir8^;?)uJ}u-AsvWENNy3 zKrj6y6bC23WT6=P?*;98@ih;qeL`!tg&}>A7-~;jit{ZLm_aky)SP3=gk(vr$7=LB zub@|}B9VEtJMvl{GxP+|1nf*sfE|VpJA0YiRp&=vFg{>aMu2S()=f=yV=u_&6QMd z`4&z-zO?3D6UG-eLorW@a%nJ+>Fi0BsvLyd-622uTM~6^mq`C9Cg=RM@HprQnOP8* z=dECR$I>wDMC@!+5sF*oW-t;F?%`3Hg9w*JDa8esTro`|- z(!Dw<8FIRk5|ax?PdgHasOg@tVXo#NKZ$=%MGCVqcPg)>Bu zej5SzGo#2ar6;wx9D`!O3^smw4*aeAL(=3RXignzx75JI`~-Ffx4~#=7jhN!y!gT& zq+fCcra#UB+5o67on(1-?@3npk&15&;&U`@s5k!;?c!;uEquyN&rYMRrE=2ix{6Gv z^`V&Qb0I29$@ZKdz$|VHI^X(#+&N(nm%jJ}*RT{S?m3rBWCs z+~G8rRL|~9$(v@;{++i-H()T9^i)7RUCiayT9OX1Viqs1kVyG&cF2IiB-*>!ZzW)MZ=}6YRb{DmM71%)L6^ZKMBC;IrN`pEop%rF2<6U{sJTa8)esLC>^5O8d zKFJ+!|4rUrAJ|C$4^&lKL&N5sgJ@e(mj1wttknMnwWbU~@eqYj5P< znv8IBL2n)w&Lk#To#mU3oMXi#{cS8HpEf}D?JC4<8v=G|;hZb!{91#vrQ-C?iip6zd`}9O((o&}t^O36&K0d`#6{mJ6P$A%@JmNM_r* zpk%-ovRXO|BJZo&4yT*A&5KPGGRB_r7KC7pDv`X0b>)`ZyFfJg54HTyG-?_0ojiwa zC*_!{%%JZDG8yCw=X?90e%4pugo5TYb0<;gR3@<;D{!8z}?S$1~M`ko| z6!)C{fyel!lGz{Sq+PX-$;+%EAV_j3QrC~tA#|6MCFP^4$;xCo^Del@*PtBH zV-H|ubtZ`=3U%W@`rKj3-&DIX5k`v*c%;Y(MW6Oju4^%c3kpkJ2}-zE4NdM}FbeWv z&YPb?T5UiH_b;G*^-{_h_dw9s74QfwBU#NRQZ^gO+l@ADjDw`5pf*$wIX2Wsid z>r6YPKZYbU30%*MPqfRSm_c`_&|FTQ$&pO#-N9Uz+=0YpF6G_rfYy5f+zxfe_#fFM zy}wBvWU~(CHEFDLLn$1>dmy>+SJK4w;0ohKylra?wQtKo(bT{D`FZ+r==p-ib7e+vMrGpUJF?SgVHxmA8E2 z=^7`2i}xY3v>s6GU4ZhL!Ax`cD=V(E;i{o2(3D2=p8bv>O}(FnDKElyNFjOG&!VXR zJ%dXdr}~jUApfI5UG?}3M0S<%u5)Cbll_qU%MIF1_aO5xrQ|Kqw8Tb7CJp@%TNfjE z&`BtISxWke!kLZq=dF)V^Td%V=HI&nQm-P4@!|-}7v6DK2lyWGfSV?aD}o1*LpJAw zTqM)m7jb-ND4iV#p@zC~pZk0!?wD;J0of!;;=4wQOSY2iB6zq6lFoc)`7{ z97ojNY2J925Q`y2)ev2j zXIDlJA=#PUJaShnNf+JbT@5dwH7SX?j5UJAKVf{(udO6E+@Rj1d_fi(!Ta1!a8YH` z_(w+|-XAQn7*)ff?ysix6Y8OC&`QKd^x*paJjte7lKo$^X=MFLGMe&+>n3*NyJh!j z&cY((eOAG_q#u(!bcEw){rh_7t>sO ziGXpBsmVgn?g3S#_p6M&&sf65Yy>>Sex%%+$E7)17SpR0#RGOhHim?m<(|6PY#ZvG zUZ7`s9LZkY=MKT&VcOn8VnN%+|9%p-Wm%XZ`%0pR`gSEPC$Yo%D7iU37UnoZR=ac` z5=zU_)GY}zos7r1+(4ysJINPZu?y*H52q!LWZCix=AQzfog<~)flrY( zQ+sSJN$y*sSYRiO0V}!xf&pYPc`k_#P2^rn&!f)#7P5R>$nV0>P^}B)p4BmslpKBO&KbTY>N6=HR56ELhnP&Xhi9~&q2@cggc(1D zTHk$QqV!eFvo(yRzdQ~7VW955`>Dc4i{(ojpt;`-?R}D{x{p7srhlNiYjTv*TQu7I zLyj5IXxwm?>#hjeIeQqN)8_@!@7JN&#GM*@dvfQE(U7W~)n$qAU{u?K{LTNRn(3{y zKD-#Jg?_weLnv9Et|R^7LjERlG1aeJKv) zVti@{d55j%#{Ukdw6wdFuJ;QK+gB>+*g=S51uRtXd}S<&6>bxBS6Bfr{quj6q+f)d zOXDHmzm9Up37MPGD)L!<2cp_;>e|0bn4Z2P8CS=`@BRVSxYeC1BA$`)1r1yd?S|)w zek9%rY6m5|KPq$eIiie3qO)ATI326_Wn6 zWaw#VLMO7g=}{z3JqGm`W0Jl-s5ZR*3Q-0#Q8FlspYb!%d|n_EbBZ!fH?N5TEs8|VrhdGSs|mN>$Y+)dIT?eOAR zpGt6?6{Fa19~<)K8)6pfQ}Uzth!_4>`Swz-oz;vEO%b`?dPqyI??dC>G8WkqKo(br zQRb)v+;Q6{ksH4I?uVrxI+?j#sb#%b|{rThvxE2iF2qY_Zjnok`G%TN65V> z-`#srP{?EMF)04}uU9_uBPvr4U1E~3731v1bS@XY>sJLG&DQ^f_5j`(zcGypf z*pHNY?j6!6j3s&04RuJ(b)LI=Ivi?GQoz_cvVUKU{JEjzvR_4wU(*Cn=ayYUXD-QK z`bbRHb@F1n0o>o-jC?Ic0>`OT8=e_Tp4C57ci972S#+|JvoYjcwVpYDwB>Pwaw+2C z3mB%YgUg1s*gyU`m7n?w)!&Ap*zu`c3vdsBm0AVUf-Y) zffYLiu7_2t7HQ%}$ebpTOG*u@4s{|*CS*s<%Avb66D>j(lw1Rmyz(`N%Jy$6f-GJyl>P@njg z5&{I)xOW6g7*he$x}RX_-v~n=E3#_p0ol-JT%!3MPQQ9n(~(fJn5Sc{wsIl&>BvQY z^it1>19}#IBcq4zaQqZWZh?m(YFla-GJP!RetnPhiuGjGd>b}z!dYw20-l2i3Mdk4 zW?l`G)~!=FCJA-FTF6*z-iDxE3&^H!89L6Vke_=lslLUKd`P@q@#c*bI^!K0PCX#g z?mxlef-RY>P2u+SqagP7hK=~V!0iR5cr=~I4;OMD>*lcL)lcAdxC6=W{m@`qPx(J( zz>i~q;{jI1d)_6aqw z+5l0keMoQ}o>c*A( zxwe-T4RR=kO~z9pFRW*mJoY!}P2L6j`3*t_^b*M?oKshY9)RazXGl(Ur;bFyI}Yi_ zA%gQs%aOX>bs2A&%H&mr`=feh5)iDn*zVp0xLSzk4krXp--<^GBp&m z^8zU%mB$g?dIE93pF-=TPjFUWW`2r%t`{zNUCKo_oFkLCYT^R&-Y2}fKMNU)yb`AB zv6!421~CVX0qNCtQoZ{)WNlBSroc*?xa}3hSHH5-u7dxTeTGk*DKP)}2PM9*1-`b@ zN)r3#92DzwNH*O>@W!5!d_o87FK}R=*fXTqn9M?cY2eyjk67F8G-w(QO1u?j(CqBO z(p33q-w;EtR`u|mx|G(RNTeNS>IBC1me$V!9E|Vr*u)@mS~?8rX4RBi^$%K24)8ib zkD7&Sgu%maBnj4#vYA`d%3Xu zj1+zG08XOWN*S*c%NE9%a_**?{6eI_$s)~@=(@));Snn+(HJc@(|8dr2lLk zbVjp~v9S#0XTI{rfit*S=LTfjWsv*(2ulC>5{;EkTx@1Z&4M0lecH@}MxG|A-y5}N zg*|p$6m;>BYV_YxMs&54%J2OGXQ?A;9XiRzeGus;IZ&}kLe+lrgbd*~*5Vxk)94Da zS+f-01L|0=voY3mmy<}phh)s{Ffxrdfa9d&FmbY{O??!?8I?fq;dEO6^(5kbq(Y{< zlgqq%GueW1Fq&=y@5>^Xti4KMa|V+Bo^9ABWKHD{n54G58HqNIQkzvCBhi8FvYfhuO_P^w6W!s`xLBJe+@nc(O4zQ;`VEJcW{fVn-|PW~Unl=k%oNqx5Q z=x?E9y8jP~S~?U}_l4Z_Uv=bi!34`6X-F;jp0dVxwZqm&P`of>x`6*sYx@&L2F`+^ z^Brnxt%2p^pOLxqER(O8#;l!7$T)8()-Nf9+ zw@$LyqF@T=Dm2=un4-gvwM2hK`QH0XIk}lU7xg94v@zNGf0uHTUi!k!?SbP1+iBSB z=j3m-nS!nn+*UNgX~j{@scohHZFPv*)C}3FzC5aWn2^irfI-R!h(?UJ)59plE>0tB zQD4fMSOsf!4O)sWQ{vHU+@oy;ET<0@*!?|TUg=AUvM!XFx(w^b+{c<@f+tdR60)F` zT;4T|X}?FJ&%XjY?VS#({5v!HS;(|Mvm*=oE}TWZ#3On;*$XwNeAN#oS}n5MHu@=9 zl$LW*)e1XP)pA%YZ|AiUeW>zH3rwtYsOx}C@+x&B>U%WESBW|bOlCb z_Ncx67erP+c$1(jiwx3HymKnI$UDW2R@MqyvVwK2I{5!*1Gj&lkn5{7Ncx+>qS{Bu zZ)Y>{grCXp(QD>(@))`F3MTSA00(}VLR86A9uUlGZd9SB>uJQN_9XF7e2BXa7P}ACztd!RRZYq^!6Tb+992&m;s10R$(#a(S}1s}A+7ML zj)(3_rCOD;jv5b-!qjQ!5n_{u4#PKO;W>%&o<0B#jK!FFPaz)>$qL^Je6`|vR%XXp z?%7|5#N28`AI^izCRfPq&oHsp8&Xk(TIal#JN{lr+O_N8u&!hVGVR7>Pb*kxasw6T2>D$97Z42zRO|Y^g>m?8fp=ze(K?Up zIET~NG2tMr-UKn-{~l3giX6IqQ4IYi;x9Ya+suEgDTsb zyaI2N=Gl3P;s07OK1`_Pwr_dVxKPqXB|+Ln=!^9SVK}px8ZEjbYRzNr7NUcplc34h z88BJIdDb!~l#W-tfd0m-%<98YZZ_&T)!z4n?tlsD&!0oqC*|Z2GZr4XW)yW<%iF7d zCdc)s5wxKI;cFivF5Q8l!gaewRcx$CyjpcVw=4 zgu*z1S(PM85?6+j)+CtQTh@@DO(yqV`jG7?uOZ3dy`)SIP-~C1(5BB(5ML9nL$Z*4 z3XDaGUn5#`hm&;jRE)RUi}I8LmUQg~Tqf6$)sz2maqDnYbuFXn!#&Bt>#NqMk8pT9v)6`fZp;{?!rCJAAG{6JECCUv){q|zaFaLe5RH_uDV_R#_I z&S__!>mKvM$8jiYs-Uj<=OJ716}}stpd44o?B@_kzX^V%QzT5~_Ap9Wh04bNguKBM zXk#8Sy&7W_u4zH^&SO}~Pa^V}J6gxYqB!yjciH(r8b9^{8okGI%f)?B@-jfkQ@`e( zqQyc!Gf*%Q4wsEosq;E z#A#GKGoM8)Yo^Tio2aJx9*To!!em1WxA}7*i4xYSqeVresNKlo#c?pYW(UvjrSP)+ z6~0+JNj@>wZvF4gun2mN_%oAe$Q2!mtqKu(R`3)%KU40p-mnmft={MsDZ1 zeA^wSIMd7uR~u1zpVH_{fCMRJ!RJmMhM}L0|AhSF}nT zw|o@x;(<`SeotP%x=`k-5VBcxnTw5|`)gJT7%HRjL zx;%lRuD5YYWoW{$B#`x%m{-0`GLb|2rxl`9j`->;~&3vC6xclVeaM2~AcylmU-B0I9_A%%hd5j`{drD4WsnAT730c^U zXiRY7=4}nsDCB-C`ZpuM^*t$_Qq+U2I}V#vcMZYwzUMKOG^jfx}3Y;m6DDYG{6sdz9(48DTe;K|V>7 zt=Ln_M3L*W&61{*mr_NVbD7-3dnf5nKgjGu>?!x|e6oKkM6L+6!l*81Pb$wy^D4cXnGs)%~ z(UQ_+!l!*Os{6o2d)BE#j~7w*X9e(l5`oOR&%8Fd0-~w;2wirWG<|ML#x&hWtMNsy zySQI%-F+f<4=sc7f`g>|?9ZDUzmcw&z}iE@pe!s^r%z8qnIel4Tkp{P_ixEP`YdY9 z1g3WL3e{%Lf-FnMLfZbNS60`_YtTn@JntZnED@A3&ZH9Zh54d8uwRjamKVp#`;b3! zizBICJBU0k&LWH3uetckQ0RpV=Vx*lWKd6{=qj~~Vkz{-HVFB+G>Lq0hmZ}`WH+2U zOKCnwVUid|s`?c{elg82+PNM)NeD6e`!wck51%sB_c z_MIf1^+8FF<}gAE%XnVNYeW=Q33Nk;$XlHt;$RBjl^BGZK2z_)>LZcio8tX`xZe~Xka&+u@)hr(WJK9e$M z@~tQ$cb^&5I*L*CjRJCO9t%Z}SJW^$km|oCk@9E;^*4WrHrIV*=G8!6leN6BiuI?d)z6tD=TQNW2B}HYPW69StX}{w;a*!&xw?J(>+&)mB;VQHS z3`Aze5$62KfXkfqDKTb0`JSH-<$u33_fu~LeWZZ;WD1fOO2%y8yhwzAPnx^`lGvTFxf1u zXBNqVublarCQeA9nt)WO^j45asNr5$u46{sRd@)yOrky>B=SsS@_*dO>Xi)rcWbCt z6Tn*R3sGMsg3tK?LDRit-WOuH<+C*CCZ9n=Q6()uAA#fRu2JQRQIO}JvvYF%hT=`; zkl%koO!|Gq_`H(p@mAi;08J4{1zv!Pd5jPmhXG6`!Z z(e?4nucreH?g$LBeGN&Y_Hg=ANVyH?Dbt~onb!xAYr#j@KiUlC9vL%{{37g99^_^V zrjV@lH;Rj@hkW-?k_DP`r;$PrvkR5TpM2+%reKr`Os>tMjI{rqf-d9$du4o|6kYc6 ziT5sG&Lv?_$t;Qa|0{>pl77tc;Bd0;-COVkQm9tCkBb*NVXFN__$~U4TBgUMR{Dfl zu)VzL<7Kk!vJ>q#{xES3;#Tf4@Ekvl6bJ1P(`!D>IT}eh{>Lb@J)1ATc96^u2Sa1j z3Gdpey!G@AM7juhGV`v~Kc|AMcg=+I>RYv+-whPErZGutZ}PJ`%`@-hkmb)p&T`cO zZZa?&VOkwEJ}xIgB$HmqduqJvNL4@O!ZaZT`a1+|6<&>;-fLWB+r^~Xs6`~?o$r@1?}Oi&Jm9#* z;X*4@{Wprs-mjppHt9&8vjkpuZqmf)SQuLGq=c$^D(PPb>0BAq+bT$!{8g$PL1fe~pNXEmW!|?oGx>&_WRvlhHMXB(l{dm5FC0njdp;s+ z$55($^BkhL3(#&JO!_+nZCGAFZCegu(9R}my+4?ubj#3vXbf3ryn?S~9GX`ugz#_4jh-vxmyI!UL~wGNsUP<BsEe=l943! zzNe8?l6ImPBqK=(qhFHbd%ry%{n7el- z8abNf!GhU&^#o_x^aA3d-C=tpJI{r_LGkf6RDYR=#_J-9-V+VT=Pjk0-yJ}IbOavQY;Ry#I&p)b&7m^*O{x(cZ^r6_vf&-GSvRhz% zmvK*pEquhJ>)^ZWGJ78t_`Kl?sFvQOarw_s)%}2k9}EPSLuVmya5kRyzs>gXEYvqT z4mR`^ip^sRTY`sUYRr02v+TSP%eE?C|r($Z# z9Sp5qfp%|;Azl;B&QfEk(bY99A7Kn$Rk4gseTYsA$3jjp;}X0wFtQTR;P4Wfa{4QZ z)XO-*o{L1+av92oOooP>SD^bLn;Nc;#`Jp)c;a&bnw>d+f8jp`7lb=1n#SjaoPWe>0_6de9zJukyatyew2c4t@467@^?S~$A z=ftBMY{U=hY_#g5jc!)&u%;#&ELz#w9g@-J;T4G9Q-t0R+ljWK2>8`K5EH#0>^JvE z>CzLRo_>fgsUO35tXdm~>u-th(iL#~=nk@7!C)*rOT^|?e14BRFt(EvcebPOXd2P> z-iQfz4q;@U#Sprwnt1qKB|Z(Tw-z4&21Enu*f65E-4#wq)%ZjxW#`73ka)!xQX~EV z^D8>2uIu2RCw&9GRfVv9-z`**c>sIMdYR)Pif9*K$X*s zVOzdKPceY1B@{fSHxkQ<86fEM6FS2H)QeaepJ)PjF-BpmO4E zDJNo0HDi43Zwvb@MC+nej3E!=%G--jFTWfc4c4M4$dxmH!&tWoC(x%n5A;vAfVa0P z%N)Hx?|$~!p6U#WX_LVre=OF7yvFjjk*M3*Nb#SOkkzmU{Zh|@wcb&pPP#(9%^ER| z_JCZPN6m*`fZTr~*;(;Bs`V?8vpj(I`V&!p?Jk zw{2Z?1l9*$2KkTSM0a}@(R}t`jJ7MR8$wa&PzgC5Y*tRKLAs_4>^Gc)teAx;>Ce15 zMNwogFN2^1E2+rUf;>SAslKZiBV7d1UZ;uX_$DIMw9}x?pv^C=zCO#ZO8n->`)Nh z!=ED1(jPJq6IdrHvLOZqWh;)ar*{-&G)0wZaf-=fkm|W zpNBL!unIL}j?p<8EvQU5j3-)CFysb8!K06;li3L--{(Vx;3_tbr~ysdYpTt7AF;ADA)FGxR<9;#E%`Y?vSVucqzDrT2;8P<_ zA?}hBif2qgjmcj$+J8UuO(cS4YBJ>aEkmaPV?Z3S3Wv`*i7wA~QR&06D7niRw)dxr z_1F_2)k>lgyTR0S%qY?_&@>Ls zD%<+ubD0#pH!a0P?Z=opHwaXX1T-686YZy-AkN)Enng1p@q!DOkBmWe$O}GtWdaG^ zd5$KLrzlxuN3wS$!qMhvRIdI3<%iyr$~&H{23&+re^+B=EU=zL7-!)8H%{w$1Muu3 zP7cVzo^!KNapf(zjq%68+7>V#6at#l=P18967v0afD_~NmC8r7{M$E}aPI_&48u@) zr9b+GAA@zB`x)alf#l?;qG>gc;t48R2OI>suc6}pPtdfpn^qqB1tq5+ar7SbJE`neLWXEIiu#Vh*4is5vp>%@e*Fx(f3AcC z4Rao3Gxjq*g&Kd&qav*n;JrACo?yL?-21aozTN>;XTGxxdI7C@Q;%-@cr*{VkMSFw zAvBkVXn)4AL<=FHEFGM5uR-QZDdbN%28x&gU_UVuqB5@G-kda4<@5)?waLUx|25X{ zEkNmm2W+nY0NmGKf+mc@XmJA3J@k=Ii>qhr`8%9^w*Y%5)M89eJlKEThmE_WAT^v0 z+6%)l@>745Sgt_n>%YL*Z8>|#S*B=(Aqm}9iFqwDY!zR}D6=S#zkW#+-OKqWfl@r0 z_7kPT*%s_bXjteX!u#|oll`+!-@R! zUnkH+ejR^4Z`y*x#(S8U+6)q(HZ=B|4wWGWXgb#e^LKuM?(6J4RzHx03`m8_x<;6=MaA+t zAw=Nt6a0(2Q8=cXwEN0XYMV|}C*Gm@-EL4$Wj&`~QV@7|7B+Vrr{;-WG=$~G?87`! z_P=N@XHY4IWU9e?Z#pK1En1|aXC&(wm)@~ ztgAX`UUV0fA2LaSS2b2Bim}`OAo?(-KzC^xHqP@$)8^ZxYxi}u7#EI)6PqE_;}C|Q zn#Qs;x41{9G&qFZ03J~X$#(hY@m{@mpY(~_>Ia+&T-OGHxwQZ0>!E=ob>cpQrDD< z9_1p8Y$*c4%q2v9O$h#bUSY6kEh_f7@IlS)^o0LKh(0h1Ei!6Bu2)K3%$C3m+gy}Z zT_vHXzN1;%3*7MZB1XCo1_3{oXyz;?2LJP>38S8ZVOckn1bITusRy74tl&8t4y}il z5qq}`D7QP z0xdvSrj60nE77gP5ke(1L0u8ai_|GZ`}OZIdE+;9_h~@&GF@!-%!b?tyC7*btECG@ zq9ELZ223>vpCDGdS`?aNTM>Ic`Dw+JhDBqBaI0+N%5Y2&ADFxj1D&6g}C1~Vn# z^WQeKdB=LlDHpkfv6-wN(hj}pcQ8k%4dnmag_3zym^yqoGb?CD82?Pf#!N%ti_G*2`?zLyo!eJ0}Yl~4* z*G6^A`=F|}kSM2bNAXKP$PwN`wbL9bIQ<0T*B-$DzY?hD4npdNNoW!~6JlCfZobBh zME`FHjAA|riRc)XoH>qJG5eVJr zphkOY_46dl6|@lV7y^s8=ivaS`=EY*lezUqV!4-;h`e?YiNg_^``;B5v^|2B_TAv~ zxfTOAmty(0HVEB%kp$U&q}JlAzzrV?x)0{EIp+hAJRL_8k3dS>!MeYPN2$R$F=XcS2-cQbDULjGi zzJc~p#x|v#=j}fYM*-hZkTt0bv;JUC!q`J3e%?38;?H2$m{i6;*%F5(UMNT$Y{TvR z7nJEcNbdOt)=R!a78K7Rn*lv?Y7_sUH$_;iv^V)8r-4PEp z7hj{P(Ge8IBc8c*V8K*)2!+msrcno|cPk~W}BznMf^x`7X+;byF-{Gl&y&L*Y=mn=|UMM^1 z28%a+Lft$2A*l8l$Rcl{X^IxQ3~nbXy$)1=zkoFcmXJC4D|EFRqhQXd!pWxJP?FDF zWf$wg9iD-FfC)MUJEB3nHsvH+05*MN>nY2FSD2&s^K~ft+k+1)C;;R5SLAutJH}PE zK%;3D$gReM`S~y~^PLZSYpcP*x)X>0nT(PF5mfhnF?H3>#3peEh8(zy(gDd-x#Ah< z-@Xi~@k7DPmhsx(m!s*jMI>kGCzSmt;9YE$#CrQXu<+Hv6o((+5;2POp1Y1IZC`;Qs~)nMb~ja(wGfjz57A$+ zAKdKQaW74VfPW`LW#ABu95opdAFJSlD-Q-s&OxZoDaXvNXFmldZIjD zgtF{aDBe620z4Npw*zw-70KCL^8$JqwPDS=hsfnJuggq-EbsP)x&^mU*P;*;jSoZX zn-uggP>=}AyJ)_4A#DGa3~_%vMXC7=$et60Svyle%vclt8|&X5n9p*ES>RXH3~?TJ zz}V+L5&ZjwQ~h{E9FwIeyHL)&uRVqCy%$iOc#{|CMpHMKg`o?^piW0GMto}n#h>Be zZgd;+K0HG+SvNlMMzF5FLGzQ6iQ76c2w!Ot@ufq&-xw(zJx~d8+4|Uf>^VMKABWjr z^FTbzlH^#vfR<6SutviAf4|0pV#XS@8*~=cXOlq?pik8588m9ecPQ3+1;yj5ncL|j z3V+{&;=84&IUa?=yeu|r%f!NpLW2ba>IkfkVfOhU!T zi(F`61t^a1r6G&!z<$Imh{->IPOlb&_ps$4tKW#t;WwyS?nbpK%QoLB1Mk_dseWiW zWa-90M)gb7OezG^rZiq8y$5!FW#Iev8}?e(qH^5dG%?ByL|rK~GPo1NMX_ML@dQe@ z`oWw>xy)l>fh86R;P+cJ@#9TV^IV(wjI{>YfCKFFPGF-)5onul$H1sMmbGBB{?rEK zyat2ItNug~QpQ`@UB&LUC@{Toi8SsLV1B?&QgYG?nE8r z>kdXg$OVlwg?e}B5%r!(;w{`tH>_!e(DFZ7tyo9G&1_JW{gG_nRDhM*Gco7ke#mS8 zfg)pPJ}8&N_WKc#)SQf=g>bQ9!P@Gx@?bjBfbeu0{9FY@)d4$T3m4HY&h-c?0UQ6&9OBR{1 zTK$cU^yF8v{XimUS9^jWW0#G(Vhr8Bd_THRe+MxvPcA6cE_Au^3o0trFnRh(u$LYH zw;PV2mffJ@gUbkSuY|6{SyWIl4rJUU^c$E#)@8?oW==8~M2E53V+W`oban-{2SeiqxauEUAjH<*Ls zFfGa50;-3xm={@!r`1x7+K{t@*0zK$^rI$$?I&Yndxs_r8K-8o0lT*YQtqkU<#^Lwg0JRHRBQ_;mE z5iM+opwS=uVDiuo=DmIg6CUS)YtMPCOKb#}tuCPKlSi!mn0G-d8im8PL3C<14N5h@ z4S(N7$63!owV$nbaZyC+FCu<-4ui*E38;`<1NLwQ>gHJkYYAKj9od0rC4D5F3> zM-D-4R;(8sh@V{>(8XyGv0nX>+BLRLn7j$++LR{ZutUS#!`nV9cD;vT4fs(3QJ3$_M8q~|CGACOWga$o>7Ta=^4#{Uz zr)wZKbfba+DG(N!l!^o8EcvuM2D5htseJ2JbI3dUDr`4$=J zasCf9>o-El?*Y%n?k>T7LO{ks~~>D?&#ZwiUH{0-HL z42)WKiMhefQ&o;7^lZA1f|V&``&GuSPAec8Nx7uF`#jdLI%WsdF3@0-f<5bJH6-7}br4zMP{2~{<)n`VL7OBp`h^2VSHtJrMYgCyZ17R z8;-;0{4W^!n&mKyKhy5EFW4E<#F)jAj19O$%_<5Zw}rWz`+WvM|8bDf7EP4y_oz$n zYRq5kxBfCps5y;N(q3UxAB6I2sx_5&xE|4*ftWFWtXgJYBLlB$@rKTEd zEO)zyL2`Rkn0Fx8cRZV^9|VI9UGQW9%P9O;gTiHxL2=s(1#UBKj8bGEy3@e+?**Vr z`$-g0i+J%iXU^}8Gg$nmLB~hmu${SY($92b&Y>tUUw#`h?p_9~-%P+XW+O2O4gl{s zDJZ^+IpwT;j98QmL6d!m`879teDHmgN{+}p|?3jqakLCSl?^J6W-+5Dv00h z1#3j_!DW;+h%J9%t4$5_kbgzx_io(VTZ7`N&xPTRtzdtR@%+B4L6Z14bDx*Ow0+Du z@pcPLIP@AqhTTD}zn)?DeU_Du`HxIn*#IH^n6GPqDHvDWq-)kN7q_$*9t~Ba`x!Y( z*_qMeW*KNCIv^f1m^WRugLfl zVcbnEQ2)6Wh4)*jI_D};>o6n#)C&tz#@G%&BuSt2rcj#Vq8wdPg+0piY8B6|%Pn;5uy&p0ltLY)iYc??M zQBIuN7NW+$i&PBx2r~j3aQNzUbTBN#yh_G!AM7GAo%uK*wH}`rQV1OR5p>=sFvo3Q z$e7zpduz^sBkOzFKgkC9xnD49z&R+Nb&V=o4ifE;USML*{~Rd^aQA! zil}fstCJ2{gYtwk_%}WV?)4W;RTZLL$0e3$)rL_Wmmxf~4<23ki+NHUpw413I(Y8H z;7A=1(51ZDq)?R2J&dXY77)jWgT1m1j0byQ^n^3myWs-X`(H)(*F7Ne2}84IL1?Hf zgus&znCI;S3Kf4*(esgDoRCPeV~(SnQx&G%&PSKnX&_q{h)~*$of~_gInf{F3S-P* z`HY~p*=Syzfz1b7&`O;SJ)2ul&-e~oziOdo9^>ZU{f2^7BW(4nda!P5GxC@IL)||U zXuH!q6qq8+xthxIgWtg{@+D@p@4)7b*`T*~Cg>KYqvA^ud?<=QwNV?mxK^U|^+*uB zXyyY?zeMejNGNZ%Ma5BlPUvES@^_6?)4wm8z1Rf8zbDb&X2!r)JA>fVg2MKN%Ry~x zfq9FVt45ZAobxp_&tUhHJiSO{^X5>;nN6s!jfCKfNf5mA8JKon;^dlPbn=PQP|&BI z&4GV%U6Gd{e%f^~ShbC)pG>6&?Q_^%?-?)^BosX-14#K*luy|u67TkUZ2ChnSdjE51-`55XgNLEZ zUzHGg{|vQGyFk?Q4|3wj*S0S2$I{9)#?sBSB-`JyGxnD$U=iDb2Gfd&cYPX7Y+;^& zksq;Z`W0f_!JMzT#|aoEfWrC)%Sc|P&70qVtVP2s-A`i1imjmbz0PU6@3Wjr3i#)B zLhkf9=Bc_!D@xfN1}k4c=IUtlh6FMpJ1TL!!xTgWR2=%(LCjoM_A`p{_zc zHyEYUBSF0?7Hbx;HTsJyksA#m8wSQ={?r>Foq3W7MgXWo0*NGdF9vNBVW?jw)Lndq ze7g{<+&`dv;8~V)IZV_`-c$3WX;5r%7vvkBlH&1gATYaHkovt7jAy42fxRp5GIB6< zs_tWOu@XwT50GN^8hyQ_s9aS6iPr@XJ#rqaefFUu`2)4)71YYSh1HZSd#oiPe&dFK z^*5Fc-}sC78+V%quDT0dQ*VK3;ypfCc!1@UPhh|gPaHn19+E~W!TbCJa+RxXo+6_yg>#q_ba=VWinj&c3{S=~NSdDe18XVK=Fyl%Ph?~Ch z2H6v-lhZEDJ9i(Hd(wzb{rSp^QG`hwR(4{ZO+>fMd*kfkL8#X@&p?Cc1qdt<@XITr^k zdx@eYnKWqKNtSW2$C^7jux3XIY8Q908n}v<|2m4wq>I!buL4uqdYENl1mO{SXnY|C ztr+vWrn?LbQZ^CQ#C_Q091E$gLe$gRg3`1y>Q~c6>n45!gR)yxysrt}5+K~0Ai(PTYBy_^3q7FshV3Ag?O|MQJQW@=!{~=QL5)pZSnEA(h&c&0eS1JW z@&&ZdJ%V;wpFr394=nw82Qyu7LF1uZ^mAtS(lmC1Of`>PWNFE2`yTGevPlu8bgTWHMfWsOIsmNsS=|7i= z{RcCU4ELeRE$iq2KLxfLc0$4a7_|R+5M*{OB&KgV<1$-OHh4bOcyyp3;y$)lS}~6F zE6wT2gvxGYcLK?5>R2|R{ls<98{Q7eBRfe*D)TDqyMyt^Olq0Tqco_FC}QVxf(Na( zQSx5U|H@_)i5EaTBihz`R|PfwIUnj`iqPWYSMWZ(m$fr2e)QxBK41Ccik8&-x9cqc`H>utOjiT~fIC;#UaCWar((V)Ph34eUA+A%DIr z^BOuqaX;33(PK3xdyc*RzwWXjE=Hra1PC5>P^~_Hz zibV11Ph8xBv0%FGDrc~F3N=sHprS2=*M2Y&#TC_sDGSb{V8#MS7bl^?_DN{Y+!h{c zF$x0T+xk7;h(XDJqWalt5-D5_!AUz_`%rDTjQGi*0o@5TWSTL^KfN}SNaBrjnr}_2*+TR}kge<0gE17m>A;PR8GY`qa} zu6|Bjm7S~?scq|WdkwYU&r7HNt+n8inh2us z??e#CX2zr4(D+zhc;+yT&N`3%&59pEHYAvC=2O>luTRNi3* z>l8(pT49BsN8CYW`ZB`hGWSd0yWpj@5rcPJL-pQHF5Y1q7TBG|^nVMPZ_x~ND=v}9 zlHCw6{5O^}cO!O;WpCf}CweS*h6+2jpO_q@K5362e7_DhZ>^+O2em-snnX*s>o5;v z8Pz>jP5$56o-MwM<+5@#to;Fs;_Ai1DSC8zIR?ED(yoE5mtS!k#P!zr z#2^-}L#weSJRIXj{SK1TXRtLd4qV>P#mMQOSk3y4nvy(P-f|i=|K5Q3ntUkTej7>$ zwPM)RBQUM;AiBzKLiX=vjI*0Ydt!vpoT?&`9o~$yxdL%QBk+i1HRh-boakTx!9$Nx z&(R$09;V}S&lD*4dkdmUazv>rqam00t}5k1~dz_>K#&Pn*r z*+pf8l3PZ-EoC%j?{VOJ?J;1!6pUJ5fpCWlxC)YBolhyk?=8^%X96nwNg(<$>s3D= zMncbPL;E!5A=ujvieG)HZW^2W9&_Maa$Qht%X$obO?aF56qIL*iM_HHEsc_}RF};9h9z^x6r<6xIuyqRP`Sf4qCGbsvJ8GlKvk(>Um>+cX3mPP=MYCnkA$?aG=D&FilC~98 zWO$#bby;uKsTl;z@(VYt7o*noJd`;UkpI8GB=Z4O1> z#Y0A9K6Aj$;R0T>cYGYh$_FvfXtEl-jkl2DXX3#)>>k@+-s56}T1;5`6&xBE3*1nE zt^2#7^;<4(AE0DjhelBLi=Zw;PC?R_yAWB={0lmV(M5QW{6AZorC_5U8`fPsfJSml3_dXn9agk37Q_z1RNcV0T4BS{+u+XshjadS4oojz;?x~d zE};K&5Z9TIaD^x8SsuaAkE}jtUwzoO7W=OKNok^Q0eU1Cld}in;QHo79Q*GJEO0o9 zA2y7lu6!olcljsIyRA3>h|Ul$$J7#<-VZ9W)t`dN%UogQxI=Vl$rsu=B?f$hk3pPq z2;KiM2(M15pf#QoigGTzCnnl!xtA949EsUm^r@$cH{LD5@rsLdf?7dmM{UF@i;RmF zRPe;$*ch0@>WxDS`xjyUU3?k!i}&Di>9^evQGataJdM3i1*Lwly6hwVZ|(qX_^#KF%r750e~{%#GWDM&MV%cHm^$nNC9KyGnPF9Q)AbC* z_dftWsnOnj=9#@`y09sD-AmGe!P)W~H--HqKPsjQ3^K}4H zOQ!QrTobvZvA-2HynbD<^yn+xq@2nvsw<>@+5h19-izE0Iuz-yH1aN_@BB-{yU4R8 zON&l+dthRvl!`WSJU8V#1~yNdU$?kl5&juP`gSZRY}`J8_4h+*#rb})BKb6}JdgvT z{1~X6c9N6Acm7w$(D_=Aa+wR~E)~2R&C|zs3NQ4g;uMQCI%fPfEDm3VPGw)HHv1h0 zyxhP)8dZjCkIW#0W>1@6v9B0c4!MIr{v2HNYQ$E`xo4B>-Ph1?Ml-A$lSUl--=n`4 z{e|PE@Gx-qBGT|Wk=$!-Cx-?Mnm>g-3!}G9#=Op4GSO!_gmqo!CyN#%cY7@ll7Zah WD61l_vp-#FvKHIp7U6)RcKUw+TTbBs literal 0 HcmV?d00001 diff --git a/tests/test_chatglm_cpp.py b/tests/test_chatglm_cpp.py index 8f63f1e..29b2829 100644 --- a/tests/test_chatglm_cpp.py +++ b/tests/test_chatglm_cpp.py @@ -7,6 +7,7 @@ CHATGLM_MODEL_PATH = PROJECT_ROOT / "chatglm-ggml.bin" CHATGLM2_MODEL_PATH = PROJECT_ROOT / "chatglm2-ggml.bin" +CHATGLM3_MODEL_PATH = PROJECT_ROOT / "chatglm3-ggml.bin" CODEGEEX2_MODEL_PATH = PROJECT_ROOT / "codegeex2-ggml.bin" BAICHUAN13B_MODEL_PATH = PROJECT_ROOT / "baichuan-13b-chat-ggml.bin" BAICHUAN2_7B_MODEL_PATH = PROJECT_ROOT / "baichuan2-7b-chat-ggml.bin" @@ -55,6 +56,24 @@ def test_chatglm2_pipeline(): assert stream_output == target +@pytest.mark.skipif(not CHATGLM3_MODEL_PATH.exists(), reason="model file not found") +def test_chatglm3_pipeline(): + history = ["你好"] + target = "你好👋!我是人工智能助手 ChatGLM3-6B,很高兴见到你,欢迎问我任何问题。" + + pipeline = chatglm_cpp.Pipeline(CHATGLM3_MODEL_PATH) + output = pipeline.chat(history, do_sample=False) + assert output == target + + stream_output = pipeline.stream_chat(history, do_sample=False) + stream_output = "".join(stream_output) + assert stream_output == target + + stream_output = pipeline.chat(history, do_sample=False, stream=True) + stream_output = "".join(stream_output) + assert stream_output == target + + @pytest.mark.skipif(not CODEGEEX2_MODEL_PATH.exists(), reason="model file not found") def test_codegeex2_pipeline(): prompt = "# language: Python\n# write a bubble sort function\n" diff --git a/tests/test_convert.py b/tests/test_convert.py index ff4ff1d..816b163 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -384,6 +384,77 @@ def make_data_glm2_model(): y3.numpy().tofile(f) +def make_data_glm3_model(): + CHATGLM3_MODEL_PATH = Path("./chatglm3-6b").expanduser() + + sys.path.append(str(CHATGLM3_MODEL_PATH)) + from modeling_chatglm import ChatGLMModel + from transformers import AutoConfig + + config = AutoConfig.from_pretrained(CHATGLM3_MODEL_PATH, trust_remote_code=True) + config.hidden_size = 32 + config.num_attention_heads = 8 + config.num_layers = 1 + config.padded_vocab_size = 5 + config.multi_query_group_num = 2 + config.ffn_hidden_size = 48 + config.kv_channels = config.hidden_size // config.num_attention_heads + config.torch_dtype = torch.float32 + + m = ChatGLMModel(config).float().eval() + for param in m.parameters(): + param.data.uniform_(-0.5, 0.5) + + seq_len = 3 + + # self attention + x1 = torch.arange(seq_len, dtype=torch.int64)[None, :] + position_ids = torch.arange(seq_len, dtype=torch.int64)[None, :] + attn_mask = torch.ones(1, seq_len, dtype=torch.int64) + with torch.no_grad(): + out = m(x1, position_ids=position_ids, attention_mask=attn_mask, use_cache=True) + y1 = out.last_hidden_state + kv_cache = out.past_key_values + + # cross attention + x2 = torch.tensor([[seq_len]], dtype=torch.int64) + position_ids = torch.tensor([[seq_len]], dtype=torch.int64) + attn_mask = torch.ones(1, seq_len + 1, dtype=torch.int64) + with torch.no_grad(): + out = m(x2, position_ids=position_ids, attention_mask=attn_mask, past_key_values=kv_cache, use_cache=True) + y2 = out.last_hidden_state + kv_cache = out.past_key_values + + # cross attention + x3 = torch.tensor([[seq_len + 1]], dtype=torch.int64) + position_ids = torch.tensor([[seq_len + 1]], dtype=torch.int64) + attn_mask = torch.ones(1, seq_len + 2, dtype=torch.int64) + with torch.no_grad(): + out = m(x3, position_ids=position_ids, attention_mask=attn_mask, past_key_values=kv_cache, use_cache=True) + y3 = out.last_hidden_state + kv_cache = out.past_key_values + + print(m) + + with open(HERE / "data/glm3_model.data", "wb") as f: + m.embedding.word_embeddings.weight.data.numpy().tofile(f) + m.encoder.layers[0].input_layernorm.weight.data.numpy().tofile(f) + m.encoder.layers[0].self_attention.query_key_value.weight.data.numpy().tofile(f) + m.encoder.layers[0].self_attention.query_key_value.bias.data.numpy().tofile(f) + m.encoder.layers[0].self_attention.dense.weight.data.numpy().tofile(f) + m.encoder.layers[0].post_attention_layernorm.weight.data.numpy().tofile(f) + m.encoder.layers[0].mlp.dense_h_to_4h.weight.data.numpy().tofile(f) + m.encoder.layers[0].mlp.dense_4h_to_h.weight.data.numpy().tofile(f) + m.encoder.final_layernorm.weight.data.numpy().tofile(f) + + x1.int().numpy().tofile(f) + y1.numpy().tofile(f) + x2.int().numpy().tofile(f) + y2.numpy().tofile(f) + x3.int().numpy().tofile(f) + y3.numpy().tofile(f) + + def _make_data_baichuan_model(model_path, out_name): sys.path.append(str(model_path)) from modeling_baichuan import BaichuanModel @@ -549,9 +620,10 @@ def main(): # make_data_rms_norm() # make_data_glm_model() # make_data_glm2_model() + make_data_glm3_model() # make_data_baichuan7b_model() # make_data_baichuan13b_model() - make_internlm_model() + # make_internlm_model() if __name__ == "__main__":