forked from hamelsmu/llama-inference
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
OpenAI compatible servers benchmark based from the anyscale and exlla…
…ma benchmarks.
- Loading branch information
Showing
5 changed files
with
127 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# syntax=docker/dockerfile:1 | ||
|
||
# From the parent directory (main directory of this repo) run: | ||
# | ||
# docker build --build-arg USERID=$(id -u) -t localhost/openai-bench openai | ||
# | ||
# docker run --rm -it --name openai-bench -e OPENAI_API_KEY='secret' \ | ||
# -v"$PWD":/home/user/llama-inference localhost/openai-bench \ | ||
# --baseurl https://api.openai.com:443/v1 --model gpt-3.5-turbo \ | ||
# --filename gpt-3.5-turbo.bench.csv --note "gpt-3.5-turbo" | ||
# | ||
# The container isn't self contained but designed to be used with the repo. | ||
|
||
# Look for newer tags here https://hub.docker.com/_/python: | ||
FROM docker.io/python:3.12-bookworm | ||
ARG USERID=1000 | ||
COPY requirements.txt /tmp | ||
RUN pip3 install --no-cache-dir -r /tmp/requirements.txt | ||
RUN adduser --disabled-password --gecos '' -u $USERID user | ||
USER user | ||
WORKDIR /home/user/llama-inference/openai/ | ||
ENTRYPOINT ["python3", "bench.py"] | ||
CMD [ \ | ||
"--baseurl", "https://api.openai.com:443/v1", \ | ||
"--model", "gpt-3.5-turbo", \ | ||
"--filename", "/dev/stdout", \ | ||
"--note", "gpt-3.5-turbo" \ | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# OpenAI Chat API compatible benchmark | ||
|
||
Benchmark for servers compatible with the [OpenAI Chat Completions API](https://platform.openai.com/docs/guides/text-generation/chat-completions-api). | ||
|
||
You can follow the instructions in the [Dockerfile](Dockerfile) to build a container to run the benchmark. | ||
|
||
Or alternatively on your own machine: | ||
```sh | ||
cd openai | ||
pip3 -r requirements.txt | ||
export OPENAI_API_KEY='secret' #obtain from https://platform.openai.com/api-keys | ||
python3 client.py #to test a single run | ||
python3 bench.py --help #get usage help | ||
python3 bench.py --baseurl https://api.openai.com:443/v1 --model gpt-3.5-turbo \ | ||
--filename gpt-3.5-turbo.bench.csv --note "gpt-3.5-turbo" | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from client import chat,args | ||
import sys | ||
sys.path.append('../common/') | ||
from questions import questions | ||
import pandas as pd | ||
|
||
if __name__ == '__main__': | ||
counter = 1 | ||
responses = [] | ||
for q in questions: | ||
response = chat(q) | ||
if counter >= 2: # allow for a warmup | ||
responses.append(response) | ||
counter +=1 | ||
|
||
df = pd.DataFrame(responses) | ||
df.to_csv(args.filename, index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from openai import OpenAI | ||
import argparse | ||
import time | ||
|
||
#TODO: should use a parent parser without --filename here. | ||
parser = argparse.ArgumentParser(description='Run LLM inference requests and save to a CSV.') | ||
parser.add_argument('--filename', type=str, default='/dev/stdout', | ||
help='Path to the output CSV file (stdout by default).') | ||
parser.add_argument('--note', type=str, | ||
help='Note to add to the rows of the file (--model by default).') | ||
parser.add_argument('--model', type=str, default='gpt-3.5-turbo', | ||
help='Model to use (gpt-3.5-turbo by default).') | ||
parser.add_argument('--baseurl', type=str, default='https://api.openai.com:443/v1', | ||
help='Endpoint base URL (https://api.openai.com:443/v1 by default).') | ||
args = parser.parse_args() | ||
if ('note' not in args): | ||
args.note = args.model | ||
|
||
client = OpenAI(base_url=args.baseurl) | ||
|
||
def chat(prompt:str): | ||
start = time.perf_counter() | ||
result = client.chat.completions.create( | ||
model=args.model, | ||
max_tokens=200, | ||
messages=[ | ||
{"role": "system", "content": "You are a very verbose and helpful assistant"}, | ||
{"role": "user", "content": prompt} | ||
] | ||
) | ||
request_time = time.perf_counter() - start | ||
return {'tok_count': result.usage.completion_tokens, | ||
'time': request_time, | ||
'question': prompt, | ||
'answer': result.choices[0].message.content, | ||
'note': args.note} | ||
|
||
if __name__ == '__main__': | ||
prompt = "San Francisco is a city in" | ||
result = chat(prompt) | ||
tokPerSec = result['tok_count']/result['time'] | ||
print(f"User: {prompt}\n" | ||
f"Chatbot in {result['time']}s with {result['tok_count']} tokens ({tokPerSec} t/s):\n" | ||
f"{result['answer']}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
annotated-types==0.6.0 | ||
anyio==4.3.0 | ||
certifi==2024.2.2 | ||
distro==1.9.0 | ||
h11==0.14.0 | ||
httpcore==1.0.5 | ||
httpx==0.27.0 | ||
idna==3.6 | ||
numpy==1.26.4 | ||
openai==1.16.2 | ||
pandas==2.2.1 | ||
pydantic==2.6.4 | ||
pydantic_core==2.16.3 | ||
python-dateutil==2.9.0.post0 | ||
pytz==2024.1 | ||
setuptools==69.1.1 | ||
six==1.16.0 | ||
sniffio==1.3.1 | ||
tqdm==4.66.2 | ||
typing_extensions==4.11.0 | ||
tzdata==2024.1 | ||
wheel==0.43.0 |