diff --git a/exllama/bench.py b/exllama/bench.py index f2f3000..752d0ab 100644 --- a/exllama/bench.py +++ b/exllama/bench.py @@ -1,4 +1,5 @@ -import openai +from openai import OpenAI + import os, sys, time, argparse if not os.getenv('OPENAI_API_KEY'): raise ValueError("Must set environment variable OPENAI_API_KEY") @@ -6,8 +7,8 @@ sys.path.append('../common/') from questions import questions -url = 'http://0.0.0.0:5001/v1' -openai.api_base = url +client = OpenAI(base_url='http://0.0.0.0:5001/v1') + # Parse the command-line arguments # Define the argument parser parser = argparse.ArgumentParser(description='Run LLM inference requests and save to a csv.') @@ -22,7 +23,7 @@ def generate_text_and_save_results(filename): for q in questions: start = time.perf_counter() - result =openai.Completion.create(model='TheBloke_Llama-2-7B-GPTQ', + result =client.completions.create(model='TheBloke_Llama-2-7B-GPTQ', prompt=q, max_tokens=200, temperature=0) diff --git a/exllama/environment.yml b/exllama/environment.yml index 78cb9c3..55a0e72 100644 --- a/exllama/environment.yml +++ b/exllama/environment.yml @@ -30,7 +30,7 @@ dependencies: - aiohttp==3.8.5 - aiosignal==1.3.1 - altair==5.0.1 - - anyio==3.7.1 + - anyio==4.3.0 - appdirs==1.4.4 - asttokens==2.2.1 - async-timeout==4.0.2 @@ -40,7 +40,7 @@ dependencies: - bitsandbytes==0.40.2 - blinker==1.6.2 - cachetools==5.3.1 - - certifi==2023.7.22 + - certifi==2024.2.2 - charset-normalizer==3.2.0 - click==8.1.6 - cmake==3.27.0 @@ -73,10 +73,10 @@ dependencies: - gradio-client==0.2.5 - grpcio==1.56.2 - h11==0.14.0 - - httpcore==0.17.3 - - httpx==0.24.1 + - httpcore==1.0.5 + - httpx==0.27.0 - huggingface-hub==0.16.4 - - idna==3.4 + - idna==3.6 - ipdb==0.13.13 - ipython==8.14.0 - itsdangerous==2.1.2 @@ -102,7 +102,7 @@ dependencies: - multiprocess==0.70.14 - networkx==3.1 - nltk==3.8.1 - - numpy==1.25.1 + - numpy==1.26.4 - nvidia-cublas-cu11==11.10.3.66 - nvidia-cuda-cupti-cu11==11.7.101 - nvidia-cuda-nvrtc-cu11==11.7.99 @@ -115,10 +115,10 @@ dependencies: - nvidia-nccl-cu11==2.14.3 - nvidia-nvtx-cu11==11.7.91 - oauthlib==3.2.2 - - openai==0.27.8 + - openai==1.16.2 - orjson==3.9.2 - packaging==23.1 - - pandas==2.0.3 + - pandas==2.2.1 - parso==0.8.3 - pathtools==0.1.2 - peft==0.4.0.dev0 @@ -134,13 +134,13 @@ dependencies: - pyarrow==12.0.1 - pyasn1==0.5.0 - pyasn1-modules==0.3.0 - - pydantic==1.10.11 + - pydantic==2.6.4 - pydub==0.25.1 - pygments==2.15.1 - pyparsing==3.0.9 - - python-dateutil==2.8.2 + - python-dateutil==2.9.0.post0 - python-multipart==0.0.6 - - pytz==2023.3 + - pytz==2024.1 - pyyaml==6.0.1 - referencing==0.30.0 - regex==2023.6.3 @@ -159,7 +159,7 @@ dependencies: - setproctitle==1.3.2 - six==1.16.0 - smmap==5.0.0 - - sniffio==1.3.0 + - sniffio==1.3.1 - stack-data==0.6.2 - starlette==0.27.0 - sympy==1.12 @@ -172,12 +172,12 @@ dependencies: - toolz==0.12.0 - torch==2.0.1 - torchvision==0.15.2 - - tqdm==4.65.0 + - tqdm==4.66.2 - traitlets==5.9.0 - transformers==4.31.0 - triton==2.0.0 - - typing-extensions==4.7.1 - - tzdata==2023.3 + - typing-extensions==4.10.0 + - tzdata==2024.1 - uc-micro-py==1.0.2 - urllib3==1.26.16 - uvicorn==0.23.1