Skip to content

Commit

Permalink
Refine the config and the ci test of llm kv cache. (#1915)
Browse files Browse the repository at this point in the history
A follow up of #1913

Signed-off-by: Ye Cao <[email protected]>
  • Loading branch information
dashanji authored Jun 20, 2024
1 parent b6e0bfa commit 4548925
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 33 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ jobs:
rm -rf default.etcd
rm -rf /dev/shm/etcd*
mkdir -p /tmp/vineyard/llm_cache
python3 test/runner.py $RUNNER_ARGS --with-llm-python
- name: Run contrib/thirdparty tests
Expand Down
12 changes: 6 additions & 6 deletions modules/llm-cache/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ import vineyard

from vineyard.llm import KVCache
from vineyard.llm import KVTensor
from vineyard.llm.config import FileCacheConfig
from vineyard.llm.config import VineyardCacheConfig
from vineyard.llm.cache import FileCacheConfig
from vineyard.llm.cache import VineyardCacheConfig

vineyard_cache_config = VineyardCacheConfig(
socket="/tmp/vineyard_test.sock"
Expand Down Expand Up @@ -205,7 +205,7 @@ for _ in range(len(tokens)):
]
)

matched = cache.query(tokens, kv_tensors_to_query)
matched = cache.query(None, tokens, kv_tensors_to_query)
kv_tensors_from_cache = kv_tensors_to_query[:matched]
assert matched == len(tokens)

Expand Down Expand Up @@ -243,8 +243,8 @@ import vineyard

from vineyard.llm import KVCache
from vineyard.llm import KVTensor
from vineyard.llm.config import FileCacheConfig
from vineyard.llm.config import VineyardCacheConfig
from vineyard.llm.cache import FileCacheConfig
from vineyard.llm.cache import VineyardCacheConfig

file_cache_config = FileCacheConfig(
chunk_size=2,
Expand Down Expand Up @@ -299,7 +299,7 @@ for _ in range(len(tokens)):
for _ in range(cache.layer)
]
)
matched = cache.query(tokens, kv_tensors)
matched = cache.query(None, tokens, kv_tensors)
assert matched == len(tokens)

assert len(kv_tensors) == len(kv_tensors_from_cache)
Expand Down
5 changes: 3 additions & 2 deletions modules/llm-cache/tests/k8s-test/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from vineyard.llm import KVCache, KVTensor
from vineyard.llm.config import FileCacheConfig
from vineyard.llm.cache import FileCacheConfig

def start_server(port=8888):
serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Expand Down Expand Up @@ -79,11 +79,12 @@ def reserve_kv_tensors(kv_tensors, num_tokens, kv_tensor):
tokens = tokens.decode('utf-8')
tokens = tokens.replace('\n', '').split(' ')
tokens = [int(token) for token in tokens]
tokens = tokens[:len(tokens) - len(tokens) % batch_size]

kv_cache_list = reserve_kv_tensors(kv_cache_list, len(tokens), kv_tensor)

query_start_time = time.time()
matched = cache.query(tokens, kv_cache_list)
matched = cache.query(None, tokens, kv_cache_list)
query_end_time = time.time()
if matched > 0:
total_query_time += query_end_time - query_start_time
Expand Down
20 changes: 10 additions & 10 deletions modules/llm-cache/tests/kv_cache_local_file_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ void print_current_tokens(const std::vector<int>& prefix, int next_token) {
}

void print_kv_state(const std::vector<std::pair<LLMKV, LLMKV>>& kv_state) {
LOG(INFO) << "kv_state: ";
VLOG(100) << "kv_state: ";
for (size_t i = 0; i < kv_state.size(); ++i) {
uint8_t* key_state_data =
reinterpret_cast<uint8_t*>(kv_state[i].first.data);
Expand All @@ -72,10 +72,10 @@ void print_kv_state(const std::vector<std::pair<LLMKV, LLMKV>>& kv_state) {
key_state_str += std::to_string(key_state_data[j]) + " ";
value_state_str += std::to_string(value_state_data[j]) + " ";
}
LOG(INFO) << "layer " << i << ":";
LOG(INFO) << "key_state: " << key_state_str;
LOG(INFO) << "value_state: " << value_state_str;
LOG(INFO) << "---------------------";
VLOG(100) << "layer " << i << ":";
VLOG(100) << "key_state: " << key_state_str;
VLOG(100) << "value_state: " << value_state_str;
VLOG(100) << "---------------------";
}
}

Expand Down Expand Up @@ -107,26 +107,26 @@ void check_kv_state(const std::vector<std::pair<LLMKV, LLMKV>>& kv_state,
int& token) {
VINEYARD_ASSERT(kv_state.size() == (size_t) layer);
for (size_t index = 0; index < kv_state.size(); ++index) {
LOG(INFO) << "kv_state length: " << kv_state[index].first.length
VLOG(100) << "kv_state length: " << kv_state[index].first.length
<< "tensorNBytes: " << tensorNBytes << "layer: " << layer;
VINEYARD_ASSERT(kv_state[index].first.length == (size_t) tensorNBytes);
VINEYARD_ASSERT(kv_state[index].second.length == (size_t) tensorNBytes);
for (int i = 0; i < tensorNBytes; ++i) {
if ((reinterpret_cast<uint8_t*>(kv_state[index].first.data))[i] !=
(static_cast<uint8_t>(token)) + i + index) {
LOG(INFO) << "token:" << token << " tensorNBytes" << tensorNBytes
VLOG(100) << "token:" << token << " tensorNBytes" << tensorNBytes
<< " layer:" << index;
LOG(INFO) << "key_state[" << i << "]: "
VLOG(100) << "key_state[" << i << "]: "
<< (reinterpret_cast<uint8_t*>(kv_state[index].first.data))[i]
<< ". But is should be "
<< (static_cast<uint8_t>(token)) + i + index;
throw std::runtime_error("key_state error!");
}
if (reinterpret_cast<uint8_t*>(kv_state[index].second.data)[i] !=
(static_cast<uint8_t>(token)) + i + index) {
LOG(INFO) << "token:" << token << " tensorNBytes" << tensorNBytes
VLOG(100) << "token:" << token << " tensorNBytes" << tensorNBytes
<< " layer:" << index;
LOG(INFO) << "value_state[" << i << "]: "
VLOG(100) << "value_state[" << i << "]: "
<< (reinterpret_cast<uint8_t*>(
kv_state[index].second.data))[i]
<< ". But is should be "
Expand Down
18 changes: 9 additions & 9 deletions modules/llm-cache/tests/kv_cache_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void print_current_tokens(const std::vector<int>& prefix, int next_token) {
}

void print_kv_state(const std::vector<std::pair<LLMKV, LLMKV>>& kv_state) {
LOG(INFO) << "kv_state: ";
VLOG(100) << "kv_state: ";
for (size_t i = 0; i < kv_state.size(); ++i) {
uint8_t* key_state_data =
reinterpret_cast<uint8_t*>(kv_state[i].first.data);
Expand All @@ -77,10 +77,10 @@ void print_kv_state(const std::vector<std::pair<LLMKV, LLMKV>>& kv_state) {
key_state_str += std::to_string(key_state_data[j]) + " ";
value_state_str += std::to_string(value_state_data[j]) + " ";
}
LOG(INFO) << "layer " << i << ":";
LOG(INFO) << "key_state: " << key_state_str;
LOG(INFO) << "value_state: " << value_state_str;
LOG(INFO) << "---------------------";
VLOG(100) << "layer " << i << ":";
VLOG(100) << "key_state: " << key_state_str;
VLOG(100) << "value_state: " << value_state_str;
VLOG(100) << "---------------------";
}
}

Expand Down Expand Up @@ -116,19 +116,19 @@ void check_kv_state(const std::vector<std::pair<LLMKV, LLMKV>>& kv_state,
for (int i = 0; i < tensorNBytes; ++i) {
if ((reinterpret_cast<uint8_t*>(kv_state[index].first.data))[i] !=
(static_cast<uint8_t>(token)) + i + index) {
LOG(INFO) << "token:" << token << " tensorNBytes" << tensorNBytes
VLOG(100) << "token:" << token << " tensorNBytes" << tensorNBytes
<< " layer:" << index;
LOG(INFO) << "key_state[" << i << "]: "
VLOG(100) << "key_state[" << i << "]: "
<< (reinterpret_cast<uint8_t*>(kv_state[index].first.data))[i]
<< ". But is should be "
<< (static_cast<uint8_t>(token)) + i + index;
throw std::runtime_error("key_state error!");
}
if (reinterpret_cast<uint8_t*>(kv_state[index].second.data)[i] !=
(static_cast<uint8_t>(token)) + i + index) {
LOG(INFO) << "token:" << token << " tensorNBytes" << tensorNBytes
VLOG(100) << "token:" << token << " tensorNBytes" << tensorNBytes
<< " layer:" << index;
LOG(INFO) << "value_state[" << i << "]: "
VLOG(100) << "value_state[" << i << "]: "
<< (reinterpret_cast<uint8_t*>(
kv_state[index].second.data))[i]
<< ". But is should be "
Expand Down
3 changes: 1 addition & 2 deletions python/vineyard/llm/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def __init__(
"""
import vineyard

self.socket = socket
self.block_size = block_size
self.sync_interval = sync_interval
self.llm_cache_sync_lock = llm_cache_sync_lock
Expand All @@ -91,7 +90,7 @@ def __init__(
def __repr__(self):
return (
f'VineyardCacheConfig('
f'socket={self.socket}, '
f'ipc_client={self.ipc_client}, '
f'block_size={self.block_size}, '
f'sync_interval={self.sync_interval}, '
f'llm_cache_sync_lock={self.llm_cache_sync_lock}, '
Expand Down
8 changes: 4 additions & 4 deletions python/vineyard/llm/tests/test_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@

from vineyard.llm import KVCache
from vineyard.llm import KVTensor
from vineyard.llm.config import FileCacheConfig
from vineyard.llm.config import VineyardCacheConfig
from vineyard.llm.cache import FileCacheConfig
from vineyard.llm.cache import VineyardCacheConfig


def test_kv_cache_update_and_query_on_blob(vineyard_ipc_sockets):
Expand Down Expand Up @@ -75,7 +75,7 @@ def test_kv_cache_update_and_query_on_blob(vineyard_ipc_sockets):
]
)

matched = cache.query(tokens, kv_tensors_to_query)
matched = cache.query(None, tokens, kv_tensors_to_query)
kv_tensors_from_cache = kv_tensors_to_query[:matched]
assert matched == len(tokens)

Expand Down Expand Up @@ -151,7 +151,7 @@ def test_kv_cache_update_and_query_on_fs():
for _ in range(cache.layer)
]
)
matched = cache.query(tokens, kv_tensors)
matched = cache.query(None, tokens, kv_tensors)
assert matched == len(tokens)

assert len(kv_tensors) == len(kv_tensors_from_cache)
Expand Down

0 comments on commit 4548925

Please sign in to comment.