forked from dusty-nv/jetson-containers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile.v2
36 lines (25 loc) · 1014 Bytes
/
Dockerfile.v2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#
# Dockerfile for exllama_v2 (see config.py for package configuration)
#
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
ARG TORCH_CUDA_ARCH_LIST
ARG EXLLAMA2_REPO=turboderp/exllamav2
ARG EXLLAMA2_BRANCH=master
WORKDIR /opt
ADD https://api.github.com/repos/${EXLLAMA2_REPO}/git/refs/heads/${EXLLAMA2_BRANCH} /tmp/exllama2_version.json
RUN git clone --branch=${EXLLAMA2_BRANCH} --depth=1 https://github.com/${EXLLAMA2_REPO} exllamav2
RUN sed 's|torch.*|torch|g' -i exllamav2/requirements.txt && \
sed 's|torch.*"|torch"|g' -i exllamav2/setup.py && \
cat exllamav2/requirements.txt && \
cat exllamav2/setup.py
# build the wheel
RUN cd exllamav2 && \
python3 setup.py --verbose bdist_wheel && \
cp dist/exllamav2*.whl /opt
RUN pip3 install --no-cache-dir --verbose /opt/exllamav2*.whl
WORKDIR /
# this will build cuda_ext.py to ~/.cache/torch_extensions/
RUN cd /opt/exllamav2 && python3 test_inference.py --help
# make sure it loads
RUN pip3 show exllamav2 && python3 -c 'import exllamav2'