From 0f690fcf72300c302aae05de7e13a22226469c8f Mon Sep 17 00:00:00 2001 From: Keming Date: Tue, 2 Apr 2024 18:52:00 +0800 Subject: [PATCH] feat: add related dockerfiles (#2) Signed-off-by: Keming --- docker/README.md | 7 +++ docker/encoder/Dockerfile | 54 +++++++++++++++++++++ docker/encoder/main.py | 34 +++++++++++++ docker/encoder/requirements.txt | 4 ++ docker/highlight/Dockerfile | 55 +++++++++++++++++++++ docker/highlight/main.py | 80 +++++++++++++++++++++++++++++++ docker/highlight/requirements.txt | 5 ++ docker/sparse/Dockerfile | 55 +++++++++++++++++++++ docker/sparse/main.py | 57 ++++++++++++++++++++++ docker/sparse/requirements.txt | 5 ++ 10 files changed, 356 insertions(+) create mode 100644 docker/README.md create mode 100644 docker/encoder/Dockerfile create mode 100644 docker/encoder/main.py create mode 100644 docker/encoder/requirements.txt create mode 100644 docker/highlight/Dockerfile create mode 100644 docker/highlight/main.py create mode 100644 docker/highlight/requirements.txt create mode 100644 docker/sparse/Dockerfile create mode 100644 docker/sparse/main.py create mode 100644 docker/sparse/requirements.txt diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..39fe1f3 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,7 @@ +# Usage + +There are some pre-built images available on Docker Hub: + +- [encoder](./encoder/Dockerfile): `kemingy/cross-encoder` +- [highlight](./highlight/Dockerfile): `kemingy/colbert-highlight` +- [sparse](./sparse/Dockerfile): `kemingy/spladepp` diff --git a/docker/encoder/Dockerfile b/docker/encoder/Dockerfile new file mode 100644 index 0000000..ba2830b --- /dev/null +++ b/docker/encoder/Dockerfile @@ -0,0 +1,54 @@ +FROM ubuntu:22.04 + +ARG CONDA_VERSION=py310_23.3.1-0 + +ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 + +RUN apt update && \ + apt install -y --no-install-recommends \ + wget \ + ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +RUN set -x && \ + UNAME_M="$(uname -m)" && \ + if [ "${UNAME_M}" = "x86_64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \ + SHA256SUM="aef279d6baea7f67940f16aad17ebe5f6aac97487c7c03466ff01f4819e5a651"; \ + elif [ "${UNAME_M}" = "s390x" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-s390x.sh"; \ + SHA256SUM="ed4f51afc967e921ff5721151f567a4c43c4288ac93ec2393c6238b8c4891de8"; \ + elif [ "${UNAME_M}" = "aarch64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \ + SHA256SUM="6950c7b1f4f65ce9b87ee1a2d684837771ae7b2e6044e0da9e915d1dee6c924c"; \ + elif [ "${UNAME_M}" = "ppc64le" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-ppc64le.sh"; \ + SHA256SUM="b3de538cd542bc4f5a2f2d2a79386288d6e04f0e1459755f3cefe64763e51d16"; \ + fi && \ + wget "${MINICONDA_URL}" -O miniconda.sh -q && \ + echo "${SHA256SUM} miniconda.sh" > shasum && \ + if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \ + mkdir -p /opt && \ + bash miniconda.sh -b -p /opt/conda && \ + rm miniconda.sh shasum && \ + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc && \ + find /opt/conda/ -follow -type f -name '*.a' -delete && \ + find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ + /opt/conda/bin/conda clean -afy + +ENV PYTHON_PREFIX=/opt/conda/bin + +RUN update-alternatives --install /usr/bin/python python ${PYTHON_PREFIX}/python 1 && \ + update-alternatives --install /usr/bin/python3 python3 ${PYTHON_PREFIX}/python3 1 && \ + update-alternatives --install /usr/bin/pip pip ${PYTHON_PREFIX}/pip 1 && \ + update-alternatives --install /usr/bin/pip3 pip3 ${PYTHON_PREFIX}/pip3 1 + +RUN pip install torch --index-url https://download.pytorch.org/whl/cpu +RUN pip install mosec sentence_transformers msgspec +RUN mkdir -p /workspace +WORKDIR /workspace +COPY main.py /workspace/main.py + +ENTRYPOINT [ "python", "main.py" ] diff --git a/docker/encoder/main.py b/docker/encoder/main.py new file mode 100644 index 0000000..e9407a9 --- /dev/null +++ b/docker/encoder/main.py @@ -0,0 +1,34 @@ +from os import environ + +from mosec import Server, Worker +from mosec.mixin import TypedMsgPackMixin +from msgspec import Struct +from sentence_transformers import CrossEncoder + +DEFAULT_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2" +WORKER_NUM = environ.get("WORKER_NUM", 1) + + +class Request(Struct, kw_only=True): + query: str + docs: list[str] + + +class Response(Struct, kw_only=True): + scores: list[float] + + +class Encoder(TypedMsgPackMixin, Worker): + def __init__(self): + self.model_name = environ.get("MODEL_NAME", DEFAULT_MODEL) + self.model = CrossEncoder(self.model_name) + + def forward(self, req: Request) -> Response: + scores = self.model.predict([[req.query, doc] for doc in req.docs]) + return Response(scores=scores.tolist()) + + +if __name__ == "__main__": + server = Server() + server.append_worker(Encoder, num=WORKER_NUM) + server.run() diff --git a/docker/encoder/requirements.txt b/docker/encoder/requirements.txt new file mode 100644 index 0000000..ca716f8 --- /dev/null +++ b/docker/encoder/requirements.txt @@ -0,0 +1,4 @@ +torch +sentence_transformers +mosec +msgspec diff --git a/docker/highlight/Dockerfile b/docker/highlight/Dockerfile new file mode 100644 index 0000000..70bf6a9 --- /dev/null +++ b/docker/highlight/Dockerfile @@ -0,0 +1,55 @@ +FROM ubuntu:22.04 + +ARG CONDA_VERSION=py310_23.3.1-0 + +ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 + +RUN apt update && \ + apt install -y --no-install-recommends \ + wget \ + ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +RUN set -x && \ + UNAME_M="$(uname -m)" && \ + if [ "${UNAME_M}" = "x86_64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \ + SHA256SUM="aef279d6baea7f67940f16aad17ebe5f6aac97487c7c03466ff01f4819e5a651"; \ + elif [ "${UNAME_M}" = "s390x" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-s390x.sh"; \ + SHA256SUM="ed4f51afc967e921ff5721151f567a4c43c4288ac93ec2393c6238b8c4891de8"; \ + elif [ "${UNAME_M}" = "aarch64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \ + SHA256SUM="6950c7b1f4f65ce9b87ee1a2d684837771ae7b2e6044e0da9e915d1dee6c924c"; \ + elif [ "${UNAME_M}" = "ppc64le" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-ppc64le.sh"; \ + SHA256SUM="b3de538cd542bc4f5a2f2d2a79386288d6e04f0e1459755f3cefe64763e51d16"; \ + fi && \ + wget "${MINICONDA_URL}" -O miniconda.sh -q && \ + echo "${SHA256SUM} miniconda.sh" > shasum && \ + if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \ + mkdir -p /opt && \ + bash miniconda.sh -b -p /opt/conda && \ + rm miniconda.sh shasum && \ + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc && \ + find /opt/conda/ -follow -type f -name '*.a' -delete && \ + find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ + /opt/conda/bin/conda clean -afy + +ENV PYTHON_PREFIX=/opt/conda/bin + +RUN update-alternatives --install /usr/bin/python python ${PYTHON_PREFIX}/python 1 && \ + update-alternatives --install /usr/bin/python3 python3 ${PYTHON_PREFIX}/python3 1 && \ + update-alternatives --install /usr/bin/pip pip ${PYTHON_PREFIX}/pip 1 && \ + update-alternatives --install /usr/bin/pip3 pip3 ${PYTHON_PREFIX}/pip3 1 + +RUN mkdir -p /workspace +WORKDIR /workspace +COPY requirements.txt /workspace/requirements.txt +RUN pip install -r requirements.txt +RUN wget https://huggingface.co/vespa-engine/col-minilm/resolve/main/onnx/model_quantized.onnx +COPY main.py /workspace/main.py + +ENTRYPOINT [ "python", "main.py" ] diff --git a/docker/highlight/main.py b/docker/highlight/main.py new file mode 100644 index 0000000..e55ea1c --- /dev/null +++ b/docker/highlight/main.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import msgspec +import numpy as np +import onnxruntime as ort +from mosec import Server, Worker +from transformers import AutoTokenizer + +MODEL_NAME = "vespa-engine/col-minilm" + + +class Token(msgspec.Struct, kw_only=True): + text: str + id: int + vector: np.ndarray + + +class HighlightToken(msgspec.Struct, kw_only=True): + text: str + score: float + + +class Highlight(Worker): + def __init__(self): + self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) + self.session = ort.InferenceSession("model_quantized.onnx") + + def forward(self, queries: list[str]) -> list[list[HighlightToken]]: + """ + Args: + queries: 1st is the query, the rest are documents + Returns: + the max similarity for each token in the documents + """ + tokens = self.tokenizer(queries, padding=True, return_tensors="np") + outputs = self.session.run( + ["contextual"], + { + "input_ids": tokens["input_ids"], + "attention_mask": tokens["attention_mask"], + }, + )[0] + token_vectors = [] + for ids, masks, vectors in zip( + tokens["input_ids"], tokens["attention_mask"], outputs + ): + token_vector = [] + for id, mask, vector in zip(ids, masks, vectors): + if id in self.tokenizer.all_special_ids or mask == 0: + continue + token_vector.append( + Token(text=self.tokenizer.decode(id), id=id, vector=vector) + ) + token_vectors.append(token_vector) + + similarities = [] + for i in range(1, len(queries)): + similarities.append( + [ + HighlightToken( + score=max( + token.vector @ query_token.vector + for query_token in token_vectors[0] + ).tolist(), + text=token.text, + ) + for token in token_vectors[i] + ] + ) + + return similarities + + def serialize(self, obj): + return msgspec.json.encode(obj) + + +if __name__ == "__main__": + server = Server() + server.append_worker(Highlight, num=1) + server.run() diff --git a/docker/highlight/requirements.txt b/docker/highlight/requirements.txt new file mode 100644 index 0000000..4736abf --- /dev/null +++ b/docker/highlight/requirements.txt @@ -0,0 +1,5 @@ +onnxruntime>=1.17.1 +mosec>=0.8.4 +transformers>=4.38.1 +msgspec>=0.18.6 +numpy diff --git a/docker/sparse/Dockerfile b/docker/sparse/Dockerfile new file mode 100644 index 0000000..0eadda6 --- /dev/null +++ b/docker/sparse/Dockerfile @@ -0,0 +1,55 @@ +FROM ubuntu:22.04 + +ARG CONDA_VERSION=py310_23.3.1-0 + +ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 + +RUN apt update && \ + apt install -y --no-install-recommends \ + wget \ + ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +RUN set -x && \ + UNAME_M="$(uname -m)" && \ + if [ "${UNAME_M}" = "x86_64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \ + SHA256SUM="aef279d6baea7f67940f16aad17ebe5f6aac97487c7c03466ff01f4819e5a651"; \ + elif [ "${UNAME_M}" = "s390x" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-s390x.sh"; \ + SHA256SUM="ed4f51afc967e921ff5721151f567a4c43c4288ac93ec2393c6238b8c4891de8"; \ + elif [ "${UNAME_M}" = "aarch64" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \ + SHA256SUM="6950c7b1f4f65ce9b87ee1a2d684837771ae7b2e6044e0da9e915d1dee6c924c"; \ + elif [ "${UNAME_M}" = "ppc64le" ]; then \ + MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-ppc64le.sh"; \ + SHA256SUM="b3de538cd542bc4f5a2f2d2a79386288d6e04f0e1459755f3cefe64763e51d16"; \ + fi && \ + wget "${MINICONDA_URL}" -O miniconda.sh -q && \ + echo "${SHA256SUM} miniconda.sh" > shasum && \ + if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \ + mkdir -p /opt && \ + bash miniconda.sh -b -p /opt/conda && \ + rm miniconda.sh shasum && \ + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc && \ + find /opt/conda/ -follow -type f -name '*.a' -delete && \ + find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ + /opt/conda/bin/conda clean -afy + +ENV PYTHON_PREFIX=/opt/conda/bin + +RUN update-alternatives --install /usr/bin/python python ${PYTHON_PREFIX}/python 1 && \ + update-alternatives --install /usr/bin/python3 python3 ${PYTHON_PREFIX}/python3 1 && \ + update-alternatives --install /usr/bin/pip pip ${PYTHON_PREFIX}/pip 1 && \ + update-alternatives --install /usr/bin/pip3 pip3 ${PYTHON_PREFIX}/pip3 1 + +RUN mkdir -p /workspace +WORKDIR /workspace +COPY requirements.txt /workspace/requirements.txt +RUN pip install -r requirements.txt +RUN wget https://huggingface.co/prithivida/Splade_PP_en_v1/resolve/main/model.onnx +COPY main.py /workspace/main.py + +ENTRYPOINT [ "python", "main.py" ] diff --git a/docker/sparse/main.py b/docker/sparse/main.py new file mode 100644 index 0000000..7592c90 --- /dev/null +++ b/docker/sparse/main.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +import msgspec +import numpy as np +import onnxruntime as ort +from mosec import Server, Worker +from transformers import AutoTokenizer + +MODEL_NAME = "prithivida/Splade_PP_en_v1" + + +class SparseEmbedding(msgspec.Struct, kw_only=True, frozen=True): + dim: int + indices: list[int] + values: list[float] + + +class SpladePP(Worker): + def __init__(self): + self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) + self.session = ort.InferenceSession("model.onnx") + self.dim = self.tokenizer.vocab_size + + def forward(self, queries: list[str]) -> list[SparseEmbedding]: + tokens = self.tokenizer(queries, padding=True, return_tensors="np") + outputs = self.session.run( + None, + { + "input_ids": tokens["input_ids"], + "input_mask": tokens["attention_mask"], + "segment_ids": tokens["token_type_ids"], + }, + )[0] + + relu_log = np.log(1 + np.maximum(outputs, 0)) + weighted_log = relu_log * np.expand_dims(tokens["attention_mask"], axis=-1) + scores = np.max(weighted_log, axis=1) + + results = [] + for row in scores: + indices = row.nonzero()[0] + values = row[indices] + results.append( + SparseEmbedding( + dim=self.dim, indices=indices.tolist(), values=values.tolist() + ) + ) + return results + + def serialize(self, obj): + return msgspec.json.encode(obj) + + +if __name__ == "__main__": + server = Server() + server.append_worker(SpladePP, num=1) + server.run() diff --git a/docker/sparse/requirements.txt b/docker/sparse/requirements.txt new file mode 100644 index 0000000..4736abf --- /dev/null +++ b/docker/sparse/requirements.txt @@ -0,0 +1,5 @@ +onnxruntime>=1.17.1 +mosec>=0.8.4 +transformers>=4.38.1 +msgspec>=0.18.6 +numpy