diff --git a/model_servers/llamacpp_python/src/requirements.txt b/model_servers/llamacpp_python/src/requirements.txt index 8ec01572..9a19eec2 100644 --- a/model_servers/llamacpp_python/src/requirements.txt +++ b/model_servers/llamacpp_python/src/requirements.txt @@ -1,2 +1,3 @@ llama-cpp-python[server]==0.2.78 +transformers==4.41.2 pip==24.0 diff --git a/model_servers/llamacpp_python/src/run.sh b/model_servers/llamacpp_python/src/run.sh index 851ef429..f34fef4b 100644 --- a/model_servers/llamacpp_python/src/run.sh +++ b/model_servers/llamacpp_python/src/run.sh @@ -4,6 +4,10 @@ if [ ${CONFIG_PATH} ] || [[ ${MODEL_PATH} && ${CONFIG_PATH} ]]; then exit 0 fi +if [ "${HF_PRETRAINED_MODEL}" == "None" ]; then + HF_PRETRAINED_MODEL="" +fi + if [ ${MODEL_PATH} ]; then python -m llama_cpp.server \ --model ${MODEL_PATH} \ @@ -12,6 +16,8 @@ if [ ${MODEL_PATH} ]; then --n_gpu_layers ${GPU_LAYERS:=0} \ --clip_model_path ${CLIP_MODEL_PATH:=None} \ --chat_format ${CHAT_FORMAT:=llama-2} \ + ${PRETRAINED_MODEL_PATH:=} + ${HF_PRETRAINED_MODEL:%=--hf_pretrained_model_name_or_path %} \ --interrupt_requests ${INTERRUPT_REQUESTS:=False} exit 0 fi