diff --git a/powerinfer/Dockerfile b/powerinfer/Dockerfile index 5d68019..a1de8fe 100644 --- a/powerinfer/Dockerfile +++ b/powerinfer/Dockerfile @@ -43,10 +43,10 @@ FROM nvidia/cuda:12.3.1-devel-rockylinux9 ARG USERID=1000 RUN yum install -y python3-pip cmake libcudnn8 git && yum clean all && rm -rf /var/cache/yum/* RUN git clone https://github.com/SJTU-IPADS/PowerInfer -WORKDIR PowerInfer +WORKDIR /PowerInfer RUN pip install --no-cache-dir -r requirements.txt RUN cmake -S . -B build -DLLAMA_CUBLAS=ON -RUN cmake --build build --config Release -j $(nproc) +RUN cmake --build build --config Release -j "$(nproc)" RUN pip install --no-cache-dir pandas #for the benchmark. RUN adduser -u $USERID user USER user diff --git a/powerinfer/README.md b/powerinfer/README.md index a72ad06..13bae21 100644 --- a/powerinfer/README.md +++ b/powerinfer/README.md @@ -1,4 +1,6 @@ -[PowerInfer](https://github.com/SJTU-IPADS/PowerInfer) +#PowerInfer benchmark + +Benchmark for [PowerInfer](https://github.com/SJTU-IPADS/PowerInfer). Note that the model loses some inference quality in exchange for speed as shown in https://huggingface.co/SparseLLM/ReluLLaMA-7B.