From 0f690fcf72300c302aae05de7e13a22226469c8f Mon Sep 17 00:00:00 2001
From: Keming <kemingy94@gmail.com>
Date: Tue, 2 Apr 2024 18:52:00 +0800
Subject: [PATCH] feat: add related dockerfiles (#2)

Signed-off-by: Keming <kemingy94@gmail.com>
---
 docker/README.md                  |  7 +++
 docker/encoder/Dockerfile         | 54 +++++++++++++++++++++
 docker/encoder/main.py            | 34 +++++++++++++
 docker/encoder/requirements.txt   |  4 ++
 docker/highlight/Dockerfile       | 55 +++++++++++++++++++++
 docker/highlight/main.py          | 80 +++++++++++++++++++++++++++++++
 docker/highlight/requirements.txt |  5 ++
 docker/sparse/Dockerfile          | 55 +++++++++++++++++++++
 docker/sparse/main.py             | 57 ++++++++++++++++++++++
 docker/sparse/requirements.txt    |  5 ++
 10 files changed, 356 insertions(+)
 create mode 100644 docker/README.md
 create mode 100644 docker/encoder/Dockerfile
 create mode 100644 docker/encoder/main.py
 create mode 100644 docker/encoder/requirements.txt
 create mode 100644 docker/highlight/Dockerfile
 create mode 100644 docker/highlight/main.py
 create mode 100644 docker/highlight/requirements.txt
 create mode 100644 docker/sparse/Dockerfile
 create mode 100644 docker/sparse/main.py
 create mode 100644 docker/sparse/requirements.txt

diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 0000000..39fe1f3
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,7 @@
+# Usage
+
+There are some pre-built images available on Docker Hub:
+
+- [encoder](./encoder/Dockerfile): `kemingy/cross-encoder`
+- [highlight](./highlight/Dockerfile): `kemingy/colbert-highlight`
+- [sparse](./sparse/Dockerfile): `kemingy/spladepp`
diff --git a/docker/encoder/Dockerfile b/docker/encoder/Dockerfile
new file mode 100644
index 0000000..ba2830b
--- /dev/null
+++ b/docker/encoder/Dockerfile
@@ -0,0 +1,54 @@
+FROM ubuntu:22.04
+
+ARG CONDA_VERSION=py310_23.3.1-0
+
+ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8
+
+RUN apt update && \
+    apt install -y --no-install-recommends \
+        wget \
+        ca-certificates && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN set -x && \
+    UNAME_M="$(uname -m)" && \
+    if [ "${UNAME_M}" = "x86_64" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \
+        SHA256SUM="aef279d6baea7f67940f16aad17ebe5f6aac97487c7c03466ff01f4819e5a651"; \
+    elif [ "${UNAME_M}" = "s390x" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-s390x.sh"; \
+        SHA256SUM="ed4f51afc967e921ff5721151f567a4c43c4288ac93ec2393c6238b8c4891de8"; \
+    elif [ "${UNAME_M}" = "aarch64" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \
+        SHA256SUM="6950c7b1f4f65ce9b87ee1a2d684837771ae7b2e6044e0da9e915d1dee6c924c"; \
+    elif [ "${UNAME_M}" = "ppc64le" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-ppc64le.sh"; \
+        SHA256SUM="b3de538cd542bc4f5a2f2d2a79386288d6e04f0e1459755f3cefe64763e51d16"; \
+    fi && \
+    wget "${MINICONDA_URL}" -O miniconda.sh -q && \
+    echo "${SHA256SUM} miniconda.sh" > shasum && \
+    if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \
+    mkdir -p /opt && \
+    bash miniconda.sh -b -p /opt/conda && \
+    rm miniconda.sh shasum && \
+    ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
+    echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
+    echo "conda activate base" >> ~/.bashrc && \
+    find /opt/conda/ -follow -type f -name '*.a' -delete && \
+    find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
+    /opt/conda/bin/conda clean -afy
+
+ENV PYTHON_PREFIX=/opt/conda/bin
+
+RUN update-alternatives --install /usr/bin/python python ${PYTHON_PREFIX}/python 1 && \
+    update-alternatives --install /usr/bin/python3 python3 ${PYTHON_PREFIX}/python3 1 && \
+    update-alternatives --install /usr/bin/pip pip ${PYTHON_PREFIX}/pip 1 && \
+    update-alternatives --install /usr/bin/pip3 pip3 ${PYTHON_PREFIX}/pip3 1
+
+RUN pip install torch --index-url https://download.pytorch.org/whl/cpu
+RUN pip install mosec sentence_transformers msgspec
+RUN mkdir -p /workspace
+WORKDIR /workspace
+COPY main.py /workspace/main.py
+
+ENTRYPOINT [ "python", "main.py" ]
diff --git a/docker/encoder/main.py b/docker/encoder/main.py
new file mode 100644
index 0000000..e9407a9
--- /dev/null
+++ b/docker/encoder/main.py
@@ -0,0 +1,34 @@
+from os import environ
+
+from mosec import Server, Worker
+from mosec.mixin import TypedMsgPackMixin
+from msgspec import Struct
+from sentence_transformers import CrossEncoder
+
+DEFAULT_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
+WORKER_NUM = environ.get("WORKER_NUM", 1)
+
+
+class Request(Struct, kw_only=True):
+    query: str
+    docs: list[str]
+
+
+class Response(Struct, kw_only=True):
+    scores: list[float]
+
+
+class Encoder(TypedMsgPackMixin, Worker):
+    def __init__(self):
+        self.model_name = environ.get("MODEL_NAME", DEFAULT_MODEL)
+        self.model = CrossEncoder(self.model_name)
+
+    def forward(self, req: Request) -> Response:
+        scores = self.model.predict([[req.query, doc] for doc in req.docs])
+        return Response(scores=scores.tolist())
+
+
+if __name__ == "__main__":
+    server = Server()
+    server.append_worker(Encoder, num=WORKER_NUM)
+    server.run()
diff --git a/docker/encoder/requirements.txt b/docker/encoder/requirements.txt
new file mode 100644
index 0000000..ca716f8
--- /dev/null
+++ b/docker/encoder/requirements.txt
@@ -0,0 +1,4 @@
+torch 
+sentence_transformers
+mosec
+msgspec
diff --git a/docker/highlight/Dockerfile b/docker/highlight/Dockerfile
new file mode 100644
index 0000000..70bf6a9
--- /dev/null
+++ b/docker/highlight/Dockerfile
@@ -0,0 +1,55 @@
+FROM ubuntu:22.04
+
+ARG CONDA_VERSION=py310_23.3.1-0
+
+ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8
+
+RUN apt update && \
+    apt install -y --no-install-recommends \
+        wget \
+        ca-certificates && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN set -x && \
+    UNAME_M="$(uname -m)" && \
+    if [ "${UNAME_M}" = "x86_64" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \
+        SHA256SUM="aef279d6baea7f67940f16aad17ebe5f6aac97487c7c03466ff01f4819e5a651"; \
+    elif [ "${UNAME_M}" = "s390x" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-s390x.sh"; \
+        SHA256SUM="ed4f51afc967e921ff5721151f567a4c43c4288ac93ec2393c6238b8c4891de8"; \
+    elif [ "${UNAME_M}" = "aarch64" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \
+        SHA256SUM="6950c7b1f4f65ce9b87ee1a2d684837771ae7b2e6044e0da9e915d1dee6c924c"; \
+    elif [ "${UNAME_M}" = "ppc64le" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-ppc64le.sh"; \
+        SHA256SUM="b3de538cd542bc4f5a2f2d2a79386288d6e04f0e1459755f3cefe64763e51d16"; \
+    fi && \
+    wget "${MINICONDA_URL}" -O miniconda.sh -q && \
+    echo "${SHA256SUM} miniconda.sh" > shasum && \
+    if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \
+    mkdir -p /opt && \
+    bash miniconda.sh -b -p /opt/conda && \
+    rm miniconda.sh shasum && \
+    ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
+    echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
+    echo "conda activate base" >> ~/.bashrc && \
+    find /opt/conda/ -follow -type f -name '*.a' -delete && \
+    find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
+    /opt/conda/bin/conda clean -afy
+
+ENV PYTHON_PREFIX=/opt/conda/bin
+
+RUN update-alternatives --install /usr/bin/python python ${PYTHON_PREFIX}/python 1 && \
+    update-alternatives --install /usr/bin/python3 python3 ${PYTHON_PREFIX}/python3 1 && \
+    update-alternatives --install /usr/bin/pip pip ${PYTHON_PREFIX}/pip 1 && \
+    update-alternatives --install /usr/bin/pip3 pip3 ${PYTHON_PREFIX}/pip3 1
+
+RUN mkdir -p /workspace
+WORKDIR /workspace
+COPY requirements.txt /workspace/requirements.txt
+RUN pip install -r requirements.txt
+RUN wget https://huggingface.co/vespa-engine/col-minilm/resolve/main/onnx/model_quantized.onnx
+COPY main.py /workspace/main.py
+
+ENTRYPOINT [ "python", "main.py" ]
diff --git a/docker/highlight/main.py b/docker/highlight/main.py
new file mode 100644
index 0000000..e55ea1c
--- /dev/null
+++ b/docker/highlight/main.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+import msgspec
+import numpy as np
+import onnxruntime as ort
+from mosec import Server, Worker
+from transformers import AutoTokenizer
+
+MODEL_NAME = "vespa-engine/col-minilm"
+
+
+class Token(msgspec.Struct, kw_only=True):
+    text: str
+    id: int
+    vector: np.ndarray
+
+
+class HighlightToken(msgspec.Struct, kw_only=True):
+    text: str
+    score: float
+
+
+class Highlight(Worker):
+    def __init__(self):
+        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        self.session = ort.InferenceSession("model_quantized.onnx")
+
+    def forward(self, queries: list[str]) -> list[list[HighlightToken]]:
+        """
+        Args:
+            queries: 1st is the query, the rest are documents
+        Returns:
+            the max similarity for each token in the documents
+        """
+        tokens = self.tokenizer(queries, padding=True, return_tensors="np")
+        outputs = self.session.run(
+            ["contextual"],
+            {
+                "input_ids": tokens["input_ids"],
+                "attention_mask": tokens["attention_mask"],
+            },
+        )[0]
+        token_vectors = []
+        for ids, masks, vectors in zip(
+            tokens["input_ids"], tokens["attention_mask"], outputs
+        ):
+            token_vector = []
+            for id, mask, vector in zip(ids, masks, vectors):
+                if id in self.tokenizer.all_special_ids or mask == 0:
+                    continue
+                token_vector.append(
+                    Token(text=self.tokenizer.decode(id), id=id, vector=vector)
+                )
+            token_vectors.append(token_vector)
+
+        similarities = []
+        for i in range(1, len(queries)):
+            similarities.append(
+                [
+                    HighlightToken(
+                        score=max(
+                            token.vector @ query_token.vector
+                            for query_token in token_vectors[0]
+                        ).tolist(),
+                        text=token.text,
+                    )
+                    for token in token_vectors[i]
+                ]
+            )
+
+        return similarities
+
+    def serialize(self, obj):
+        return msgspec.json.encode(obj)
+
+
+if __name__ == "__main__":
+    server = Server()
+    server.append_worker(Highlight, num=1)
+    server.run()
diff --git a/docker/highlight/requirements.txt b/docker/highlight/requirements.txt
new file mode 100644
index 0000000..4736abf
--- /dev/null
+++ b/docker/highlight/requirements.txt
@@ -0,0 +1,5 @@
+onnxruntime>=1.17.1
+mosec>=0.8.4
+transformers>=4.38.1
+msgspec>=0.18.6
+numpy
diff --git a/docker/sparse/Dockerfile b/docker/sparse/Dockerfile
new file mode 100644
index 0000000..0eadda6
--- /dev/null
+++ b/docker/sparse/Dockerfile
@@ -0,0 +1,55 @@
+FROM ubuntu:22.04
+
+ARG CONDA_VERSION=py310_23.3.1-0
+
+ENV DEBIAN_FRONTEND=noninteractive LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8
+
+RUN apt update && \
+    apt install -y --no-install-recommends \
+        wget \
+        ca-certificates && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN set -x && \
+    UNAME_M="$(uname -m)" && \
+    if [ "${UNAME_M}" = "x86_64" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh"; \
+        SHA256SUM="aef279d6baea7f67940f16aad17ebe5f6aac97487c7c03466ff01f4819e5a651"; \
+    elif [ "${UNAME_M}" = "s390x" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-s390x.sh"; \
+        SHA256SUM="ed4f51afc967e921ff5721151f567a4c43c4288ac93ec2393c6238b8c4891de8"; \
+    elif [ "${UNAME_M}" = "aarch64" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-aarch64.sh"; \
+        SHA256SUM="6950c7b1f4f65ce9b87ee1a2d684837771ae7b2e6044e0da9e915d1dee6c924c"; \
+    elif [ "${UNAME_M}" = "ppc64le" ]; then \
+        MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-ppc64le.sh"; \
+        SHA256SUM="b3de538cd542bc4f5a2f2d2a79386288d6e04f0e1459755f3cefe64763e51d16"; \
+    fi && \
+    wget "${MINICONDA_URL}" -O miniconda.sh -q && \
+    echo "${SHA256SUM} miniconda.sh" > shasum && \
+    if [ "${CONDA_VERSION}" != "latest" ]; then sha256sum --check --status shasum; fi && \
+    mkdir -p /opt && \
+    bash miniconda.sh -b -p /opt/conda && \
+    rm miniconda.sh shasum && \
+    ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
+    echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
+    echo "conda activate base" >> ~/.bashrc && \
+    find /opt/conda/ -follow -type f -name '*.a' -delete && \
+    find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
+    /opt/conda/bin/conda clean -afy
+
+ENV PYTHON_PREFIX=/opt/conda/bin
+
+RUN update-alternatives --install /usr/bin/python python ${PYTHON_PREFIX}/python 1 && \
+    update-alternatives --install /usr/bin/python3 python3 ${PYTHON_PREFIX}/python3 1 && \
+    update-alternatives --install /usr/bin/pip pip ${PYTHON_PREFIX}/pip 1 && \
+    update-alternatives --install /usr/bin/pip3 pip3 ${PYTHON_PREFIX}/pip3 1
+
+RUN mkdir -p /workspace
+WORKDIR /workspace
+COPY requirements.txt /workspace/requirements.txt
+RUN pip install -r requirements.txt
+RUN wget https://huggingface.co/prithivida/Splade_PP_en_v1/resolve/main/model.onnx
+COPY main.py /workspace/main.py
+
+ENTRYPOINT [ "python", "main.py" ]
diff --git a/docker/sparse/main.py b/docker/sparse/main.py
new file mode 100644
index 0000000..7592c90
--- /dev/null
+++ b/docker/sparse/main.py
@@ -0,0 +1,57 @@
+from __future__ import annotations
+
+import msgspec
+import numpy as np
+import onnxruntime as ort
+from mosec import Server, Worker
+from transformers import AutoTokenizer
+
+MODEL_NAME = "prithivida/Splade_PP_en_v1"
+
+
+class SparseEmbedding(msgspec.Struct, kw_only=True, frozen=True):
+    dim: int
+    indices: list[int]
+    values: list[float]
+
+
+class SpladePP(Worker):
+    def __init__(self):
+        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        self.session = ort.InferenceSession("model.onnx")
+        self.dim = self.tokenizer.vocab_size
+
+    def forward(self, queries: list[str]) -> list[SparseEmbedding]:
+        tokens = self.tokenizer(queries, padding=True, return_tensors="np")
+        outputs = self.session.run(
+            None,
+            {
+                "input_ids": tokens["input_ids"],
+                "input_mask": tokens["attention_mask"],
+                "segment_ids": tokens["token_type_ids"],
+            },
+        )[0]
+
+        relu_log = np.log(1 + np.maximum(outputs, 0))
+        weighted_log = relu_log * np.expand_dims(tokens["attention_mask"], axis=-1)
+        scores = np.max(weighted_log, axis=1)
+
+        results = []
+        for row in scores:
+            indices = row.nonzero()[0]
+            values = row[indices]
+            results.append(
+                SparseEmbedding(
+                    dim=self.dim, indices=indices.tolist(), values=values.tolist()
+                )
+            )
+        return results
+
+    def serialize(self, obj):
+        return msgspec.json.encode(obj)
+
+
+if __name__ == "__main__":
+    server = Server()
+    server.append_worker(SpladePP, num=1)
+    server.run()
diff --git a/docker/sparse/requirements.txt b/docker/sparse/requirements.txt
new file mode 100644
index 0000000..4736abf
--- /dev/null
+++ b/docker/sparse/requirements.txt
@@ -0,0 +1,5 @@
+onnxruntime>=1.17.1
+mosec>=0.8.4
+transformers>=4.38.1
+msgspec>=0.18.6
+numpy