From 10ee48e09a4f8d0cb869060d076103af0fcc9ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Baldo?= Date: Mon, 8 Apr 2024 18:01:59 -0300 Subject: [PATCH] OpenAI compatible servers benchmark based from the anyscale and exllama benchmarks. --- openai/Dockerfile | 28 ++++++++++++++++++++++++++ openai/README.md | 16 +++++++++++++++ openai/bench.py | 17 ++++++++++++++++ openai/client.py | 44 +++++++++++++++++++++++++++++++++++++++++ openai/requirements.txt | 22 +++++++++++++++++++++ 5 files changed, 127 insertions(+) create mode 100644 openai/Dockerfile create mode 100644 openai/README.md create mode 100644 openai/bench.py create mode 100644 openai/client.py create mode 100644 openai/requirements.txt diff --git a/openai/Dockerfile b/openai/Dockerfile new file mode 100644 index 0000000..d4f9f5c --- /dev/null +++ b/openai/Dockerfile @@ -0,0 +1,28 @@ +# syntax=docker/dockerfile:1 + +# From the parent directory (main directory of this repo) run: +# +# docker build --build-arg USERID=$(id -u) -t localhost/openai-bench openai +# +# docker run --rm -it --name openai-bench -e OPENAI_API_KEY='secret' \ +# -v"$PWD":/home/user/llama-inference localhost/openai-bench \ +# --baseurl https://api.openai.com:443/v1 --model gpt-3.5-turbo \ +# --filename gpt-3.5-turbo.bench.csv --note "gpt-3.5-turbo" +# +# The container isn't self contained but designed to be used with the repo. + +# Look for newer tags here https://hub.docker.com/_/python: +FROM docker.io/python:3.12-bookworm +ARG USERID=1000 +COPY requirements.txt /tmp +RUN pip3 install --no-cache-dir -r /tmp/requirements.txt +RUN adduser --disabled-password --gecos '' -u $USERID user +USER user +WORKDIR /home/user/llama-inference/openai/ +ENTRYPOINT ["python3", "bench.py"] +CMD [ \ + "--baseurl", "https://api.openai.com:443/v1", \ + "--model", "gpt-3.5-turbo", \ + "--filename", "/dev/stdout", \ + "--note", "gpt-3.5-turbo" \ +] diff --git a/openai/README.md b/openai/README.md new file mode 100644 index 0000000..14797ba --- /dev/null +++ b/openai/README.md @@ -0,0 +1,16 @@ +# OpenAI Chat API compatible benchmark + +Benchmark for servers compatible with the [OpenAI Chat Completions API](https://platform.openai.com/docs/guides/text-generation/chat-completions-api). + +You can follow the instructions in the [Dockerfile](Dockerfile) to build a container to run the benchmark. + +Or alternatively on your own machine: +```sh +cd openai +pip3 -r requirements.txt +export OPENAI_API_KEY='secret' #obtain from https://platform.openai.com/api-keys +python3 client.py #to test a single run +python3 bench.py --help #get usage help +python3 bench.py --baseurl https://api.openai.com:443/v1 --model gpt-3.5-turbo \ + --filename gpt-3.5-turbo.bench.csv --note "gpt-3.5-turbo" +``` diff --git a/openai/bench.py b/openai/bench.py new file mode 100644 index 0000000..18a92a7 --- /dev/null +++ b/openai/bench.py @@ -0,0 +1,17 @@ +from client import chat,args +import sys +sys.path.append('../common/') +from questions import questions +import pandas as pd + +if __name__ == '__main__': + counter = 1 + responses = [] + for q in questions: + response = chat(q) + if counter >= 2: # allow for a warmup + responses.append(response) + counter +=1 + + df = pd.DataFrame(responses) + df.to_csv(args.filename, index=False) diff --git a/openai/client.py b/openai/client.py new file mode 100644 index 0000000..f1b243e --- /dev/null +++ b/openai/client.py @@ -0,0 +1,44 @@ +from openai import OpenAI +import argparse +import time + +#TODO: should use a parent parser without --filename here. +parser = argparse.ArgumentParser(description='Run LLM inference requests and save to a CSV.') +parser.add_argument('--filename', type=str, default='/dev/stdout', + help='Path to the output CSV file (stdout by default).') +parser.add_argument('--note', type=str, + help='Note to add to the rows of the file (--model by default).') +parser.add_argument('--model', type=str, default='gpt-3.5-turbo', + help='Model to use (gpt-3.5-turbo by default).') +parser.add_argument('--baseurl', type=str, default='https://api.openai.com:443/v1', + help='Endpoint base URL (https://api.openai.com:443/v1 by default).') +args = parser.parse_args() +if ('note' not in args): + args.note = args.model + +client = OpenAI(base_url=args.baseurl) + +def chat(prompt:str): + start = time.perf_counter() + result = client.chat.completions.create( + model=args.model, + max_tokens=200, + messages=[ + {"role": "system", "content": "You are a very verbose and helpful assistant"}, + {"role": "user", "content": prompt} + ] + ) + request_time = time.perf_counter() - start + return {'tok_count': result.usage.completion_tokens, + 'time': request_time, + 'question': prompt, + 'answer': result.choices[0].message.content, + 'note': args.note} + +if __name__ == '__main__': + prompt = "San Francisco is a city in" + result = chat(prompt) + tokPerSec = result['tok_count']/result['time'] + print(f"User: {prompt}\n" + f"Chatbot in {result['time']}s with {result['tok_count']} tokens ({tokPerSec} t/s):\n" + f"{result['answer']}") diff --git a/openai/requirements.txt b/openai/requirements.txt new file mode 100644 index 0000000..dd36072 --- /dev/null +++ b/openai/requirements.txt @@ -0,0 +1,22 @@ +annotated-types==0.6.0 +anyio==4.3.0 +certifi==2024.2.2 +distro==1.9.0 +h11==0.14.0 +httpcore==1.0.5 +httpx==0.27.0 +idna==3.6 +numpy==1.26.4 +openai==1.16.2 +pandas==2.2.1 +pydantic==2.6.4 +pydantic_core==2.16.3 +python-dateutil==2.9.0.post0 +pytz==2024.1 +setuptools==69.1.1 +six==1.16.0 +sniffio==1.3.1 +tqdm==4.66.2 +typing_extensions==4.11.0 +tzdata==2024.1 +wheel==0.43.0