Separate folder for ggml models & Fix dockerfile (#296)

li-plus · Apr 29, 2024 · 5f584ce · 5f584ce
1 parent d0f45ba
commit 5f584ce
Show file tree

Hide file tree

Showing 16 changed files with 119 additions and 116 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,11 +1,11 @@
-.git/
+**/.git/
 .github/
 .hypothesis/
 .pytest_cache/
 build/
 chatglm_cpp.egg-info/
 dist/
 .dockerignore
-*.bin
+models/
 Dockerfile
 **/__pycache__/
diff --git a/Dockerfile b/Dockerfile
@@ -47,6 +47,7 @@ RUN \
     rm -rf /var/lib/apt/lists/*
 
 COPY --from=build /chatglm.cpp/build/bin/main /chatglm.cpp/build/bin/main
+COPY --from=build /chatglm.cpp/build/lib/*.so /chatglm.cpp/build/lib/
 COPY --from=build /chatglm.cpp/dist/ /chatglm.cpp/dist/
 
 ADD examples examples

diff --git a/README.md b/README.md
diff --git a/chatglm_cpp/convert.py b/chatglm_cpp/convert.py
@@ -562,7 +562,7 @@ def main():
         help="Lora model name or path used in PeftModel.from_pretrained",
     )
     parser.add_argument(
-        "-o", "--save_path", default="chatglm-ggml.bin", type=Path, help="Path to save the generated GGML model"
+        "-o", "--save_path", default="models/chatglm-ggml.bin", type=Path, help="Path to save the generated GGML model"
     )
     parser.add_argument(
         "-t",

diff --git a/chatglm_cpp/langchain_api.py b/chatglm_cpp/langchain_api.py
@@ -11,7 +11,7 @@
 
 
 class Settings(BaseSettings):
-    model: str = "chatglm-ggml.bin"
+    model: str = "models/chatglm-ggml.bin"
 
 
 class ChatRequest(BaseModel):

diff --git a/chatglm_cpp/openai_api.py b/chatglm_cpp/openai_api.py
@@ -16,7 +16,7 @@
 
 
 class Settings(BaseSettings):
-    model: str = "chatglm3-ggml.bin"
+    model: str = "models/chatglm3-ggml.bin"
     num_threads: int = 0
 
 

diff --git a/chatglm_test.cpp b/chatglm_test.cpp
@@ -995,7 +995,7 @@ static void check_chat_format(const Pipeline &pipeline) {
 }
 
 TEST(Pipeline, ChatGLM) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "chatglm-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/chatglm-ggml.bin";
     if (!fs::exists(model_path)) {
         GTEST_SKIP() << "Skipping ChatGLM e2e test (ggml model not found)";
     }
@@ -1057,7 +1057,7 @@ TEST(Pipeline, ChatGLM) {
 }
 
 TEST(Pipeline, ChatGLM2) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "chatglm2-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/chatglm2-ggml.bin";
     if (!fs::exists(model_path)) {
         GTEST_SKIP() << "Skipping ChatGLM2 e2e test (ggml model not found)";
     }
@@ -1127,7 +1127,7 @@ static inline std::string read_text(const fs::path &path) {
 }
 
 TEST(Pipeline, ChatGLM3) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "chatglm3-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/chatglm3-ggml.bin";
     if (!fs::exists(model_path)) {
         GTEST_SKIP() << "Skipping ChatGLM3 e2e test (ggml model not found)";
     }
@@ -1296,7 +1296,7 @@ primes_up_to_100
 }
 
 TEST(Pipeline, CodeGeeX2) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "codegeex2-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/codegeex2-ggml.bin";
     if (!fs::exists(model_path)) {
         GTEST_SKIP() << "Skipping CodeGeeX2 e2e test (ggml model not found)";
     }
@@ -1320,12 +1320,12 @@ TEST(Pipeline, CodeGeeX2) {
         std::string prompt = "# language: Python\n# write a bubble sort function\n";
         std::string target = R"(
 
-def bubble_sort(list):
-    for i in range(len(list) - 1):
-        for j in range(len(list) - 1):
-            if list[j] > list[j + 1]:
-                list[j], list[j + 1] = list[j + 1], list[j]
-    return list
+def bubble_sort(lst):
+    for i in range(len(lst) - 1):
+        for j in range(len(lst) - 1 - i):
+            if lst[j] > lst[j + 1]:
+                lst[j], lst[j + 1] = lst[j + 1], lst[j]
+    return lst
 
 
 print(bubble_sort([5, 4, 3, 2, 1])))";
@@ -1336,7 +1336,7 @@ print(bubble_sort([5, 4, 3, 2, 1])))";
 }
 
 TEST(Pipeline, Baichuan13B) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "baichuan-13b-chat-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/baichuan-13b-chat-ggml.bin";
     if (!fs::exists(model_path)) {
         GTEST_SKIP() << "Skipping Baichuan13B e2e test (ggml model not found)";
     }
@@ -1391,7 +1391,7 @@ TEST(Pipeline, Baichuan13B) {
 }
 
 TEST(Pipeline, Baichuan2_7B) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "baichuan2-7b-chat-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/baichuan2-7b-chat-ggml.bin";
     if (!fs::exists(model_path)) {
         GTEST_SKIP() << "Skipping Baichuan2-7B e2e test (ggml model not found)";
     }
@@ -1446,7 +1446,7 @@ TEST(Pipeline, Baichuan2_7B) {
 }
 
 TEST(Pipeline, Baichuan2_13B) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "baichuan2-13b-chat-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/baichuan2-13b-chat-ggml.bin";
     if (!fs::exists(model_path)) {
         GTEST_SKIP() << "Skipping Baichuan2-13B e2e test (ggml model not found)";
     }
@@ -1489,7 +1489,7 @@ TEST(Pipeline, Baichuan2_13B) {
 }
 
 TEST(Pipeline, InternLM) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "internlm-chat-7b-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/internlm-chat-7b-ggml.bin";
     if (!fs::exists(model_path)) {
         GTEST_SKIP() << "Skipping InternLM e2e test (ggml model not found)";
     }
@@ -1539,7 +1539,7 @@ TEST(Pipeline, InternLM) {
         gen_config.do_sample = false;
         std::vector<ChatMessage> messages{{ChatMessage::ROLE_USER, "你好"}};
         ChatMessage output = pipeline.chat(messages, gen_config);
-        EXPECT_EQ(output.content, "你好，有什么我可以帮助你的吗？");
+        EXPECT_EQ(output.content, "你好！有什么我可以帮助你的吗？");
     }
 }
 
@@ -1578,32 +1578,32 @@ static void run_benchmark(const fs::path &model_path) {
 }
 
 TEST(Benchmark, ChatGLM) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "chatglm-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/chatglm-ggml.bin";
     run_benchmark(model_path);
 }
 
 TEST(Benchmark, ChatGLM2) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "chatglm2-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/chatglm2-ggml.bin";
     run_benchmark(model_path);
 }
 
 TEST(Benchmark, Baichuan2_7B) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "baichuan2-7b-chat-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/baichuan2-7b-chat-ggml.bin";
     run_benchmark(model_path);
 }
 
 TEST(Benchmark, Baichuan2_13B) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "baichuan2-13b-chat-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/baichuan2-13b-chat-ggml.bin";
     run_benchmark(model_path);
 }
 
 TEST(Benchmark, InternLM7B) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "internlm-chat-7b-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/internlm-chat-7b-ggml.bin";
     run_benchmark(model_path);
 }
 
 TEST(Benchmark, InternLM20B) {
-    fs::path model_path = fs::path(__FILE__).parent_path() / "internlm-chat-20b-ggml.bin";
+    fs::path model_path = fs::path(__FILE__).parent_path() / "models/internlm-chat-20b-ggml.bin";
     run_benchmark(model_path);
 }
 

diff --git a/examples/chatglm3_demo.py b/examples/chatglm3_demo.py
@@ -17,7 +17,7 @@
 from PIL import Image
 
 IPYKERNEL = "chatglm3-demo"
-MODEL_PATH = Path(__file__).resolve().parent.parent / "chatglm3-ggml.bin"
+MODEL_PATH = Path(__file__).resolve().parent.parent / "models/chatglm3-ggml.bin"
 
 CHAT_SYSTEM_PROMPT = "You are ChatGLM3, a large language model trained by Zhipu.AI. Follow the user's instructions carefully. Respond using markdown."
 

diff --git a/examples/cli_demo.py b/examples/cli_demo.py
@@ -4,7 +4,7 @@
 
 import chatglm_cpp
 
-DEFAULT_MODEL_PATH = Path(__file__).resolve().parent.parent / "chatglm-ggml.bin"
+DEFAULT_MODEL_PATH = Path(__file__).resolve().parent.parent / "models/chatglm-ggml.bin"
 
 BANNER = """
     ________          __  ________    __  ___                 

diff --git a/examples/web_demo.py b/examples/web_demo.py
@@ -6,7 +6,7 @@
 import chatglm_cpp
 import gradio as gr
 
-DEFAULT_MODEL_PATH = Path(__file__).resolve().parent.parent / "chatglm-ggml.bin"
+DEFAULT_MODEL_PATH = Path(__file__).resolve().parent.parent / "models/chatglm-ggml.bin"
 
 parser = argparse.ArgumentParser()
 parser.add_argument("-m", "--model", default=DEFAULT_MODEL_PATH, type=Path, help="model path")

diff --git a/main.cpp b/main.cpp
@@ -22,7 +22,7 @@ static inline InferenceMode to_inference_mode(const std::string &s) {
 }
 
 struct Args {
-    std::string model_path = "chatglm-ggml.bin";
+    std::string model_path = "models/chatglm-ggml.bin";
     InferenceMode mode = INFERENCE_MODE_CHAT;
     bool sync = false;
     std::string prompt = "你好";
@@ -44,7 +44,7 @@ static void usage(const std::string &prog) {
 
 options:
   -h, --help            show this help message and exit
-  -m, --model PATH      model path (default: chatglm-ggml.bin)
+  -m, --model PATH      model path (default: models/chatglm-ggml.bin)
   --mode                inference mode chosen from {chat, generate} (default: chat)
   --sync                synchronized generation without streaming
   -p, --prompt PROMPT   prompt to start generation with (default: 你好)

diff --git a/models/.gitignore b/models/.gitignore
@@ -0,0 +1 @@
+*.bin
diff --git a/tests/perf.sh b/tests/perf.sh
@@ -3,13 +3,13 @@
 export CUDA_VISIBLE_DEVICES=0
 
 # InternLM-7B
-hf_model=internlm/internlm-chat-7b-v1_1
-ggml_model=internlm-chat-7b-ggml.bin
+hf_model=internlm/internlm-chat-7b
+ggml_model=models/internlm-chat-7b-ggml.bin
 benchmark=Benchmark.InternLM7B
 
 # InternLM-20B
 # hf_model=internlm/internlm-chat-20b
-# ggml_model=internlm-chat-20b-ggml.bin
+# ggml_model=models/internlm-chat-20b-ggml.bin
 # benchmark=Benchmark.InternLM20B
 
 for dtype in q4_0 q4_1 q5_0 q5_1 q8_0 f16; do

diff --git a/tests/perplexity.cpp b/tests/perplexity.cpp
@@ -5,7 +5,7 @@
 #include <iostream>
 
 struct Args {
-    std::string model_path = "chatglm-ggml.bin";
+    std::string model_path = "models/chatglm-ggml.bin";
     std::string corpus_path = "data/wikitext-2-raw/wiki.test.raw";
     int max_length = 1024;
     int stride = 512;

diff --git a/tests/ppl.sh b/tests/ppl.sh
@@ -4,15 +4,15 @@ export CUDA_VISIBLE_DEVICES=0
 
 # ChatGLM3-6B-Base
 hf_model=THUDM/chatglm3-6b-base
-ggml_model=chatglm3-base-ggml.bin
+ggml_model=models/hatglm3-base-ggml.bin
 
 # Baichuan2-7B-Base
 # hf_model=baichuan-inc/Baichuan2-7B-Base
-# ggml_model=baichuan2-7b-base-ggml.bin
+# ggml_model=models/baichuan2-7b-base-ggml.bin
 
 # InternLM
 # hf_model=internlm/internlm-7b
-# ggml_model=internlm-7b-base-ggml.bin
+# ggml_model=models/internlm-7b-base-ggml.bin
 
 for dtype in f16; do
     python3 chatglm_cpp/convert.py -i $hf_model -o $ggml_model -t $dtype

diff --git a/tests/test_chatglm_cpp.py b/tests/test_chatglm_cpp.py
@@ -5,15 +5,15 @@
 
 PROJECT_ROOT = Path(__file__).resolve().parent.parent
 
-CHATGLM_MODEL_PATH = PROJECT_ROOT / "chatglm-ggml.bin"
-CHATGLM2_MODEL_PATH = PROJECT_ROOT / "chatglm2-ggml.bin"
-CHATGLM3_MODEL_PATH = PROJECT_ROOT / "chatglm3-ggml.bin"
-CODEGEEX2_MODEL_PATH = PROJECT_ROOT / "codegeex2-ggml.bin"
-BAICHUAN13B_MODEL_PATH = PROJECT_ROOT / "baichuan-13b-chat-ggml.bin"
-BAICHUAN2_7B_MODEL_PATH = PROJECT_ROOT / "baichuan2-7b-chat-ggml.bin"
-BAICHUAN2_13B_MODEL_PATH = PROJECT_ROOT / "baichuan2-13b-chat-ggml.bin"
-INTERNLM7B_MODEL_PATH = PROJECT_ROOT / "internlm-chat-7b-ggml.bin"
-INTERNLM20B_MODEL_PATH = PROJECT_ROOT / "internlm-chat-20b-ggml.bin"
+CHATGLM_MODEL_PATH = PROJECT_ROOT / "models/chatglm-ggml.bin"
+CHATGLM2_MODEL_PATH = PROJECT_ROOT / "models/chatglm2-ggml.bin"
+CHATGLM3_MODEL_PATH = PROJECT_ROOT / "models/chatglm3-ggml.bin"
+CODEGEEX2_MODEL_PATH = PROJECT_ROOT / "models/codegeex2-ggml.bin"
+BAICHUAN13B_MODEL_PATH = PROJECT_ROOT / "models/baichuan-13b-chat-ggml.bin"
+BAICHUAN2_7B_MODEL_PATH = PROJECT_ROOT / "models/baichuan2-7b-chat-ggml.bin"
+BAICHUAN2_13B_MODEL_PATH = PROJECT_ROOT / "models/baichuan2-13b-chat-ggml.bin"
+INTERNLM7B_MODEL_PATH = PROJECT_ROOT / "models/internlm-chat-7b-ggml.bin"
+INTERNLM20B_MODEL_PATH = PROJECT_ROOT / "models/internlm-chat-20b-ggml.bin"
 
 
 def test_chatglm_version():
@@ -80,12 +80,12 @@ def test_codegeex2_pipeline():
     prompt = "# language: Python\n# write a bubble sort function\n"
     target = """
 
-def bubble_sort(list):
-    for i in range(len(list) - 1):
-        for j in range(len(list) - 1):
-            if list[j] > list[j + 1]:
-                list[j], list[j + 1] = list[j + 1], list[j]
-    return list
+def bubble_sort(lst):
+    for i in range(len(lst) - 1):
+        for j in range(len(lst) - 1 - i):
+            if lst[j] > lst[j + 1]:
+                lst[j], lst[j + 1] = lst[j + 1], lst[j]
+    return lst
 
 
 print(bubble_sort([5, 4, 3, 2, 1]))"""
@@ -131,7 +131,7 @@ def test_baichuan2_13b_pipeline():
 
 @pytest.mark.skipif(not INTERNLM7B_MODEL_PATH.exists(), reason="model file not found")
 def test_internlm7b_pipeline():
-    check_pipeline(model_path=INTERNLM7B_MODEL_PATH, prompt="你好", target="你好，有什么我可以帮助你的吗？")
+    check_pipeline(model_path=INTERNLM7B_MODEL_PATH, prompt="你好", target="你好！有什么我可以帮助你的吗？")
 
 
 @pytest.mark.skipif(not INTERNLM20B_MODEL_PATH.exists(), reason="model file not found")