Skip to content

Commit

Permalink
Merge branch 'update-exllama' into ibaldoall
Browse files Browse the repository at this point in the history
  • Loading branch information
ibaldonl committed Jun 6, 2024
2 parents b0977f1 + 5620c5c commit cbc71c3
Showing 1 changed file with 34 additions and 19 deletions.
53 changes: 34 additions & 19 deletions exllama/bench.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
import argparse
import os
import sys
import time

from openai import OpenAI

import os, sys, time, argparse
if not os.getenv('OPENAI_API_KEY'):
if not os.getenv("OPENAI_API_KEY"):
raise ValueError("Must set environment variable OPENAI_API_KEY")
import pandas as pd
sys.path.append('../common/')

sys.path.append("../common/")
from questions import questions

client = OpenAI(base_url='http://0.0.0.0:5001/v1')
client = OpenAI(base_url="http://0.0.0.0:5001/v1")

# Parse the command-line arguments
# Define the argument parser
parser = argparse.ArgumentParser(description='Run LLM inference requests and save to a csv.')
parser.add_argument('--filename', type=str, required=True, help='Path to the output CSV file.')
parser.add_argument('--note', type=str, required=True, help='Note to add to the rows of the file.')
parser = argparse.ArgumentParser(
description="Run LLM inference requests and save to a csv."
)
parser.add_argument(
"--filename", type=str, required=True, help="Path to the output CSV file."
)
parser.add_argument(
"--note", type=str, required=True, help="Note to add to the rows of the file."
)
args = parser.parse_args()


Expand All @@ -23,21 +34,25 @@ def generate_text_and_save_results(filename):

for q in questions:
start = time.perf_counter()
result =client.completions.create(model='TheBloke_Llama-2-7B-GPTQ',
prompt=q,
max_tokens=200,
temperature=0)
result = client.completions.create(
model="TheBloke_Llama-2-7B-GPTQ", prompt=q, max_tokens=200, temperature=0
)
request_time = time.perf_counter() - start
if counter >= 2: # allow for a warmup
responses.append({'tok_count': result.usage.completion_tokens,
'time': request_time,
'question': q,
'answer': result.choices[0].text,
'note': args.note})
counter +=1
if counter >= 2: # allow for a warmup
responses.append(
{
"tok_count": result.usage.completion_tokens,
"time": request_time,
"question": q,
"answer": result.choices[0].text,
"note": args.note,
}
)
counter += 1

df = pd.DataFrame(responses)
df.to_csv(filename, index=False)

if __name__ == '__main__':

if __name__ == "__main__":
generate_text_and_save_results(args.filename)

0 comments on commit cbc71c3

Please sign in to comment.