Skip to content

Commit

Permalink
Separate folder for ggml models & Fix dockerfile (#296)
Browse files Browse the repository at this point in the history
  • Loading branch information
li-plus authored Apr 29, 2024
1 parent d0f45ba commit 5f584ce
Show file tree
Hide file tree
Showing 16 changed files with 119 additions and 116 deletions.
4 changes: 2 additions & 2 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
.git/
**/.git/
.github/
.hypothesis/
.pytest_cache/
build/
chatglm_cpp.egg-info/
dist/
.dockerignore
*.bin
models/
Dockerfile
**/__pycache__/
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ RUN \
rm -rf /var/lib/apt/lists/*

COPY --from=build /chatglm.cpp/build/bin/main /chatglm.cpp/build/bin/main
COPY --from=build /chatglm.cpp/build/lib/*.so /chatglm.cpp/build/lib/
COPY --from=build /chatglm.cpp/dist/ /chatglm.cpp/dist/

ADD examples examples
Expand Down
125 changes: 63 additions & 62 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion chatglm_cpp/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ def main():
help="Lora model name or path used in PeftModel.from_pretrained",
)
parser.add_argument(
"-o", "--save_path", default="chatglm-ggml.bin", type=Path, help="Path to save the generated GGML model"
"-o", "--save_path", default="models/chatglm-ggml.bin", type=Path, help="Path to save the generated GGML model"
)
parser.add_argument(
"-t",
Expand Down
2 changes: 1 addition & 1 deletion chatglm_cpp/langchain_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


class Settings(BaseSettings):
model: str = "chatglm-ggml.bin"
model: str = "models/chatglm-ggml.bin"


class ChatRequest(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion chatglm_cpp/openai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


class Settings(BaseSettings):
model: str = "chatglm3-ggml.bin"
model: str = "models/chatglm3-ggml.bin"
num_threads: int = 0


Expand Down
42 changes: 21 additions & 21 deletions chatglm_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,7 @@ static void check_chat_format(const Pipeline &pipeline) {
}

TEST(Pipeline, ChatGLM) {
fs::path model_path = fs::path(__FILE__).parent_path() / "chatglm-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/chatglm-ggml.bin";
if (!fs::exists(model_path)) {
GTEST_SKIP() << "Skipping ChatGLM e2e test (ggml model not found)";
}
Expand Down Expand Up @@ -1057,7 +1057,7 @@ TEST(Pipeline, ChatGLM) {
}

TEST(Pipeline, ChatGLM2) {
fs::path model_path = fs::path(__FILE__).parent_path() / "chatglm2-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/chatglm2-ggml.bin";
if (!fs::exists(model_path)) {
GTEST_SKIP() << "Skipping ChatGLM2 e2e test (ggml model not found)";
}
Expand Down Expand Up @@ -1127,7 +1127,7 @@ static inline std::string read_text(const fs::path &path) {
}

TEST(Pipeline, ChatGLM3) {
fs::path model_path = fs::path(__FILE__).parent_path() / "chatglm3-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/chatglm3-ggml.bin";
if (!fs::exists(model_path)) {
GTEST_SKIP() << "Skipping ChatGLM3 e2e test (ggml model not found)";
}
Expand Down Expand Up @@ -1296,7 +1296,7 @@ primes_up_to_100
}

TEST(Pipeline, CodeGeeX2) {
fs::path model_path = fs::path(__FILE__).parent_path() / "codegeex2-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/codegeex2-ggml.bin";
if (!fs::exists(model_path)) {
GTEST_SKIP() << "Skipping CodeGeeX2 e2e test (ggml model not found)";
}
Expand All @@ -1320,12 +1320,12 @@ TEST(Pipeline, CodeGeeX2) {
std::string prompt = "# language: Python\n# write a bubble sort function\n";
std::string target = R"(
def bubble_sort(list):
for i in range(len(list) - 1):
for j in range(len(list) - 1):
if list[j] > list[j + 1]:
list[j], list[j + 1] = list[j + 1], list[j]
return list
def bubble_sort(lst):
for i in range(len(lst) - 1):
for j in range(len(lst) - 1 - i):
if lst[j] > lst[j + 1]:
lst[j], lst[j + 1] = lst[j + 1], lst[j]
return lst
print(bubble_sort([5, 4, 3, 2, 1])))";
Expand All @@ -1336,7 +1336,7 @@ print(bubble_sort([5, 4, 3, 2, 1])))";
}

TEST(Pipeline, Baichuan13B) {
fs::path model_path = fs::path(__FILE__).parent_path() / "baichuan-13b-chat-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/baichuan-13b-chat-ggml.bin";
if (!fs::exists(model_path)) {
GTEST_SKIP() << "Skipping Baichuan13B e2e test (ggml model not found)";
}
Expand Down Expand Up @@ -1391,7 +1391,7 @@ TEST(Pipeline, Baichuan13B) {
}

TEST(Pipeline, Baichuan2_7B) {
fs::path model_path = fs::path(__FILE__).parent_path() / "baichuan2-7b-chat-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/baichuan2-7b-chat-ggml.bin";
if (!fs::exists(model_path)) {
GTEST_SKIP() << "Skipping Baichuan2-7B e2e test (ggml model not found)";
}
Expand Down Expand Up @@ -1446,7 +1446,7 @@ TEST(Pipeline, Baichuan2_7B) {
}

TEST(Pipeline, Baichuan2_13B) {
fs::path model_path = fs::path(__FILE__).parent_path() / "baichuan2-13b-chat-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/baichuan2-13b-chat-ggml.bin";
if (!fs::exists(model_path)) {
GTEST_SKIP() << "Skipping Baichuan2-13B e2e test (ggml model not found)";
}
Expand Down Expand Up @@ -1489,7 +1489,7 @@ TEST(Pipeline, Baichuan2_13B) {
}

TEST(Pipeline, InternLM) {
fs::path model_path = fs::path(__FILE__).parent_path() / "internlm-chat-7b-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/internlm-chat-7b-ggml.bin";
if (!fs::exists(model_path)) {
GTEST_SKIP() << "Skipping InternLM e2e test (ggml model not found)";
}
Expand Down Expand Up @@ -1539,7 +1539,7 @@ TEST(Pipeline, InternLM) {
gen_config.do_sample = false;
std::vector<ChatMessage> messages{{ChatMessage::ROLE_USER, "你好"}};
ChatMessage output = pipeline.chat(messages, gen_config);
EXPECT_EQ(output.content, "你好有什么我可以帮助你的吗?");
EXPECT_EQ(output.content, "你好有什么我可以帮助你的吗?");
}
}

Expand Down Expand Up @@ -1578,32 +1578,32 @@ static void run_benchmark(const fs::path &model_path) {
}

TEST(Benchmark, ChatGLM) {
fs::path model_path = fs::path(__FILE__).parent_path() / "chatglm-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/chatglm-ggml.bin";
run_benchmark(model_path);
}

TEST(Benchmark, ChatGLM2) {
fs::path model_path = fs::path(__FILE__).parent_path() / "chatglm2-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/chatglm2-ggml.bin";
run_benchmark(model_path);
}

TEST(Benchmark, Baichuan2_7B) {
fs::path model_path = fs::path(__FILE__).parent_path() / "baichuan2-7b-chat-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/baichuan2-7b-chat-ggml.bin";
run_benchmark(model_path);
}

TEST(Benchmark, Baichuan2_13B) {
fs::path model_path = fs::path(__FILE__).parent_path() / "baichuan2-13b-chat-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/baichuan2-13b-chat-ggml.bin";
run_benchmark(model_path);
}

TEST(Benchmark, InternLM7B) {
fs::path model_path = fs::path(__FILE__).parent_path() / "internlm-chat-7b-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/internlm-chat-7b-ggml.bin";
run_benchmark(model_path);
}

TEST(Benchmark, InternLM20B) {
fs::path model_path = fs::path(__FILE__).parent_path() / "internlm-chat-20b-ggml.bin";
fs::path model_path = fs::path(__FILE__).parent_path() / "models/internlm-chat-20b-ggml.bin";
run_benchmark(model_path);
}

Expand Down
2 changes: 1 addition & 1 deletion examples/chatglm3_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from PIL import Image

IPYKERNEL = "chatglm3-demo"
MODEL_PATH = Path(__file__).resolve().parent.parent / "chatglm3-ggml.bin"
MODEL_PATH = Path(__file__).resolve().parent.parent / "models/chatglm3-ggml.bin"

CHAT_SYSTEM_PROMPT = "You are ChatGLM3, a large language model trained by Zhipu.AI. Follow the user's instructions carefully. Respond using markdown."

Expand Down
2 changes: 1 addition & 1 deletion examples/cli_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import chatglm_cpp

DEFAULT_MODEL_PATH = Path(__file__).resolve().parent.parent / "chatglm-ggml.bin"
DEFAULT_MODEL_PATH = Path(__file__).resolve().parent.parent / "models/chatglm-ggml.bin"

BANNER = """
________ __ ________ __ ___
Expand Down
2 changes: 1 addition & 1 deletion examples/web_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import chatglm_cpp
import gradio as gr

DEFAULT_MODEL_PATH = Path(__file__).resolve().parent.parent / "chatglm-ggml.bin"
DEFAULT_MODEL_PATH = Path(__file__).resolve().parent.parent / "models/chatglm-ggml.bin"

parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", default=DEFAULT_MODEL_PATH, type=Path, help="model path")
Expand Down
4 changes: 2 additions & 2 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ static inline InferenceMode to_inference_mode(const std::string &s) {
}

struct Args {
std::string model_path = "chatglm-ggml.bin";
std::string model_path = "models/chatglm-ggml.bin";
InferenceMode mode = INFERENCE_MODE_CHAT;
bool sync = false;
std::string prompt = "你好";
Expand All @@ -44,7 +44,7 @@ static void usage(const std::string &prog) {
options:
-h, --help show this help message and exit
-m, --model PATH model path (default: chatglm-ggml.bin)
-m, --model PATH model path (default: models/chatglm-ggml.bin)
--mode inference mode chosen from {chat, generate} (default: chat)
--sync synchronized generation without streaming
-p, --prompt PROMPT prompt to start generation with (default: 你好)
Expand Down
1 change: 1 addition & 0 deletions models/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.bin
6 changes: 3 additions & 3 deletions tests/perf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
export CUDA_VISIBLE_DEVICES=0

# InternLM-7B
hf_model=internlm/internlm-chat-7b-v1_1
ggml_model=internlm-chat-7b-ggml.bin
hf_model=internlm/internlm-chat-7b
ggml_model=models/internlm-chat-7b-ggml.bin
benchmark=Benchmark.InternLM7B

# InternLM-20B
# hf_model=internlm/internlm-chat-20b
# ggml_model=internlm-chat-20b-ggml.bin
# ggml_model=models/internlm-chat-20b-ggml.bin
# benchmark=Benchmark.InternLM20B

for dtype in q4_0 q4_1 q5_0 q5_1 q8_0 f16; do
Expand Down
2 changes: 1 addition & 1 deletion tests/perplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <iostream>

struct Args {
std::string model_path = "chatglm-ggml.bin";
std::string model_path = "models/chatglm-ggml.bin";
std::string corpus_path = "data/wikitext-2-raw/wiki.test.raw";
int max_length = 1024;
int stride = 512;
Expand Down
6 changes: 3 additions & 3 deletions tests/ppl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ export CUDA_VISIBLE_DEVICES=0

# ChatGLM3-6B-Base
hf_model=THUDM/chatglm3-6b-base
ggml_model=chatglm3-base-ggml.bin
ggml_model=models/hatglm3-base-ggml.bin

# Baichuan2-7B-Base
# hf_model=baichuan-inc/Baichuan2-7B-Base
# ggml_model=baichuan2-7b-base-ggml.bin
# ggml_model=models/baichuan2-7b-base-ggml.bin

# InternLM
# hf_model=internlm/internlm-7b
# ggml_model=internlm-7b-base-ggml.bin
# ggml_model=models/internlm-7b-base-ggml.bin

for dtype in f16; do
python3 chatglm_cpp/convert.py -i $hf_model -o $ggml_model -t $dtype
Expand Down
32 changes: 16 additions & 16 deletions tests/test_chatglm_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@

PROJECT_ROOT = Path(__file__).resolve().parent.parent

CHATGLM_MODEL_PATH = PROJECT_ROOT / "chatglm-ggml.bin"
CHATGLM2_MODEL_PATH = PROJECT_ROOT / "chatglm2-ggml.bin"
CHATGLM3_MODEL_PATH = PROJECT_ROOT / "chatglm3-ggml.bin"
CODEGEEX2_MODEL_PATH = PROJECT_ROOT / "codegeex2-ggml.bin"
BAICHUAN13B_MODEL_PATH = PROJECT_ROOT / "baichuan-13b-chat-ggml.bin"
BAICHUAN2_7B_MODEL_PATH = PROJECT_ROOT / "baichuan2-7b-chat-ggml.bin"
BAICHUAN2_13B_MODEL_PATH = PROJECT_ROOT / "baichuan2-13b-chat-ggml.bin"
INTERNLM7B_MODEL_PATH = PROJECT_ROOT / "internlm-chat-7b-ggml.bin"
INTERNLM20B_MODEL_PATH = PROJECT_ROOT / "internlm-chat-20b-ggml.bin"
CHATGLM_MODEL_PATH = PROJECT_ROOT / "models/chatglm-ggml.bin"
CHATGLM2_MODEL_PATH = PROJECT_ROOT / "models/chatglm2-ggml.bin"
CHATGLM3_MODEL_PATH = PROJECT_ROOT / "models/chatglm3-ggml.bin"
CODEGEEX2_MODEL_PATH = PROJECT_ROOT / "models/codegeex2-ggml.bin"
BAICHUAN13B_MODEL_PATH = PROJECT_ROOT / "models/baichuan-13b-chat-ggml.bin"
BAICHUAN2_7B_MODEL_PATH = PROJECT_ROOT / "models/baichuan2-7b-chat-ggml.bin"
BAICHUAN2_13B_MODEL_PATH = PROJECT_ROOT / "models/baichuan2-13b-chat-ggml.bin"
INTERNLM7B_MODEL_PATH = PROJECT_ROOT / "models/internlm-chat-7b-ggml.bin"
INTERNLM20B_MODEL_PATH = PROJECT_ROOT / "models/internlm-chat-20b-ggml.bin"


def test_chatglm_version():
Expand Down Expand Up @@ -80,12 +80,12 @@ def test_codegeex2_pipeline():
prompt = "# language: Python\n# write a bubble sort function\n"
target = """
def bubble_sort(list):
for i in range(len(list) - 1):
for j in range(len(list) - 1):
if list[j] > list[j + 1]:
list[j], list[j + 1] = list[j + 1], list[j]
return list
def bubble_sort(lst):
for i in range(len(lst) - 1):
for j in range(len(lst) - 1 - i):
if lst[j] > lst[j + 1]:
lst[j], lst[j + 1] = lst[j + 1], lst[j]
return lst
print(bubble_sort([5, 4, 3, 2, 1]))"""
Expand Down Expand Up @@ -131,7 +131,7 @@ def test_baichuan2_13b_pipeline():

@pytest.mark.skipif(not INTERNLM7B_MODEL_PATH.exists(), reason="model file not found")
def test_internlm7b_pipeline():
check_pipeline(model_path=INTERNLM7B_MODEL_PATH, prompt="你好", target="你好有什么我可以帮助你的吗?")
check_pipeline(model_path=INTERNLM7B_MODEL_PATH, prompt="你好", target="你好有什么我可以帮助你的吗?")


@pytest.mark.skipif(not INTERNLM20B_MODEL_PATH.exists(), reason="model file not found")
Expand Down

0 comments on commit 5f584ce

Please sign in to comment.