diff --git a/openai/bench.py b/openai/bench.py index 18a92a7..2bfa7c3 100644 --- a/openai/bench.py +++ b/openai/bench.py @@ -1,17 +1,19 @@ -from client import chat,args import sys -sys.path.append('../common/') -from questions import questions import pandas as pd -if __name__ == '__main__': +from client import args, chat + +sys.path.append("../common/") +from questions import questions + +if __name__ == "__main__": counter = 1 responses = [] for q in questions: response = chat(q) - if counter >= 2: # allow for a warmup + if counter >= 2: # allow for a warmup responses.append(response) - counter +=1 + counter += 1 df = pd.DataFrame(responses) df.to_csv(args.filename, index=False) diff --git a/openai/client.py b/openai/client.py index f1b243e..9934b70 100644 --- a/openai/client.py +++ b/openai/client.py @@ -1,44 +1,69 @@ -from openai import OpenAI import argparse import time -#TODO: should use a parent parser without --filename here. -parser = argparse.ArgumentParser(description='Run LLM inference requests and save to a CSV.') -parser.add_argument('--filename', type=str, default='/dev/stdout', - help='Path to the output CSV file (stdout by default).') -parser.add_argument('--note', type=str, - help='Note to add to the rows of the file (--model by default).') -parser.add_argument('--model', type=str, default='gpt-3.5-turbo', - help='Model to use (gpt-3.5-turbo by default).') -parser.add_argument('--baseurl', type=str, default='https://api.openai.com:443/v1', - help='Endpoint base URL (https://api.openai.com:443/v1 by default).') +from openai import OpenAI + +# TODO: should use a parent parser without --filename here. +parser = argparse.ArgumentParser( + description="Run LLM inference requests and save to a CSV." +) +parser.add_argument( + "--filename", + type=str, + default="/dev/stdout", + help="Path to the output CSV file (stdout by default).", +) +parser.add_argument( + "--note", type=str, help="Note to add to the rows of the file (--model by default)." +) +parser.add_argument( + "--model", + type=str, + default="gpt-3.5-turbo", + help="Model to use (gpt-3.5-turbo by default).", +) +parser.add_argument( + "--baseurl", + type=str, + default="https://api.openai.com:443/v1", + help="Endpoint base URL (https://api.openai.com:443/v1 by default).", +) args = parser.parse_args() -if ('note' not in args): +if "note" not in args: args.note = args.model client = OpenAI(base_url=args.baseurl) -def chat(prompt:str): + +def chat(prompt: str): start = time.perf_counter() result = client.chat.completions.create( model=args.model, max_tokens=200, messages=[ - {"role": "system", "content": "You are a very verbose and helpful assistant"}, - {"role": "user", "content": prompt} - ] + { + "role": "system", + "content": "You are a very verbose and helpful assistant", + }, + {"role": "user", "content": prompt}, + ], ) request_time = time.perf_counter() - start - return {'tok_count': result.usage.completion_tokens, - 'time': request_time, - 'question': prompt, - 'answer': result.choices[0].message.content, - 'note': args.note} + return { + "tok_count": result.usage.completion_tokens, + "time": request_time, + "question": prompt, + "answer": result.choices[0].message.content, + "note": args.note, + } + -if __name__ == '__main__': +if __name__ == "__main__": prompt = "San Francisco is a city in" result = chat(prompt) - tokPerSec = result['tok_count']/result['time'] - print(f"User: {prompt}\n" - f"Chatbot in {result['time']}s with {result['tok_count']} tokens ({tokPerSec} t/s):\n" - f"{result['answer']}") + tokPerSec = result["tok_count"] / result["time"] + print( + f"User: {prompt}\n" + f"Chatbot in {result['time']}s with {result['tok_count']} tokens ({tokPerSec} t/s):\n" + f"{result['answer']}" + )