-
Notifications
You must be signed in to change notification settings - Fork 0
/
start_llm_service.sh
31 lines (24 loc) · 943 Bytes
/
start_llm_service.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/bin/bash
# load environment variables from .env file hd
if [ -f .env ]; then
export $(cat .env | xargs)
fi
# set local paths for model and offload directories hd
MODEL_DIR="$(pwd)/model/models--google--gemma-2-2b"
OFFLOAD_DIR="$(pwd)/offload"
# confirm model directory hd
echo "Model directory: $MODEL_DIR"
# optional: use this to enter directly into the container hd
# docker run --gpus all -it --entrypoint /bin/bash \
# -v "$MODEL_DIR":/app/model/models--google--gemma-2-2b \
# -v "$OFFLOAD_DIR":/app/offload \
# -e LLAMA_TOKEN="$LLAMA_TOKEN" \
# localgemma
# run the container with gpu, model, and offload volumes mounted hd
docker run --gpus all -p 6000:6000 --name localgemma \
-v "$MODEL_DIR":/app/model/models--google--gemma-2-2b \
-v "$OFFLOAD_DIR":/app/offload \
-e LLAMA_TOKEN="$LLAMA_TOKEN" \
localgemma
# stop the container instructions hd
echo "To stop the container, use: docker stop localgemma"