diff --git a/exllama/bench.py b/exllama/bench.py index 752d0ab..a037e8a 100644 --- a/exllama/bench.py +++ b/exllama/bench.py @@ -1,19 +1,30 @@ +import argparse +import os +import sys +import time + from openai import OpenAI -import os, sys, time, argparse -if not os.getenv('OPENAI_API_KEY'): +if not os.getenv("OPENAI_API_KEY"): raise ValueError("Must set environment variable OPENAI_API_KEY") import pandas as pd -sys.path.append('../common/') + +sys.path.append("../common/") from questions import questions -client = OpenAI(base_url='http://0.0.0.0:5001/v1') +client = OpenAI(base_url="http://0.0.0.0:5001/v1") # Parse the command-line arguments # Define the argument parser -parser = argparse.ArgumentParser(description='Run LLM inference requests and save to a csv.') -parser.add_argument('--filename', type=str, required=True, help='Path to the output CSV file.') -parser.add_argument('--note', type=str, required=True, help='Note to add to the rows of the file.') +parser = argparse.ArgumentParser( + description="Run LLM inference requests and save to a csv." +) +parser.add_argument( + "--filename", type=str, required=True, help="Path to the output CSV file." +) +parser.add_argument( + "--note", type=str, required=True, help="Note to add to the rows of the file." +) args = parser.parse_args() @@ -23,21 +34,25 @@ def generate_text_and_save_results(filename): for q in questions: start = time.perf_counter() - result =client.completions.create(model='TheBloke_Llama-2-7B-GPTQ', - prompt=q, - max_tokens=200, - temperature=0) + result = client.completions.create( + model="TheBloke_Llama-2-7B-GPTQ", prompt=q, max_tokens=200, temperature=0 + ) request_time = time.perf_counter() - start - if counter >= 2: # allow for a warmup - responses.append({'tok_count': result.usage.completion_tokens, - 'time': request_time, - 'question': q, - 'answer': result.choices[0].text, - 'note': args.note}) - counter +=1 + if counter >= 2: # allow for a warmup + responses.append( + { + "tok_count": result.usage.completion_tokens, + "time": request_time, + "question": q, + "answer": result.choices[0].text, + "note": args.note, + } + ) + counter += 1 df = pd.DataFrame(responses) df.to_csv(filename, index=False) -if __name__ == '__main__': + +if __name__ == "__main__": generate_text_and_save_results(args.filename)