michaelfeil · bavithra-devi · Sep 10, 2024 · Oct 24, 2024 · Nov 22, 2024 · greptile-apps
diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
@@ -0,0 +1,50 @@
+name: Wasp (Semgrep) - SAST Check
+
+on:
+  pull_request_target:
+    branches:
+        - main
+
+  schedule:
+    - cron: '0 */24 * * *' 
+  workflow_dispatch: 
+
+jobs:
+  wasp-scan:
+    name: Wasp scan
+    runs-on:
+      group: security-lrg
+    steps:
+      - name: Setting permission
+        run: sudo chown runner:runner -R .*
+
+      - name: Repository checkout
+        uses: actions/checkout@v4
+
+      - name: Running Wasp scan
+        uses: freshactions/wasp@latest
+        env:
+          WASP_LOG_LEVEL: DEBUG
+          WASP_SAVE_JSON: true
+          WASP_SAVE_HTML: true
+          WASP_SAVE_CSV: true
+          WASP_FRESHRELEASE_PR_PROJECT_KEY: ${{ vars.SECURITY_APPSEC_FRESHRELEASE_PROJECT_KEY }}
+          WASP_DRY_RUN: ${{ vars.SECURITY_APPSEC_WASP_DRY_RUN }}
+
+          WASP_FRESHRELEASE_URL: ${{ vars.SECURITY_APPSEC_FRESHRELEASE_URL }}
+          WASP_FRESHRELEASE_PR_ISSUE_TYPE: ${{ vars.SECURITY_APPSEC_FRESHRELEASE_PR_ISSUE_TYPE }}          
+
+          WASP_TOKEN: ${{ secrets.SECURITY_APPSEC_WASP_TOKEN }}
+          WASP_FRESHRELEASE_TOKEN: ${{ secrets.SECURITY_APPSEC_FRESHRELEASE_TOKEN }}
+          WASP_SLACK_TOKEN: ${{ secrets.SECURITY_APPSEC_SLACK_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.SECURITY_APPSEC_GH_TOKEN }}
+
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: Wasp scan report archive
+          retention-days: ${{ vars.SECURITY_APPSEC_WASP_RESULT_RETENTION_DAYS }}
+          path: |
+            wasp-report.csv
+            wasp-report.json
+            wasp-report.html
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,10 @@ __pycache__/
 # C extensions
 *.so
 
+# Pycharn
+.idea
+models/
+
 # Distribution / packaging
 .Python
 build/

diff --git a/libs/infinity_emb/Dockerfile b/libs/infinity_emb/Dockerfile
@@ -1,5 +1,9 @@
+ARG BASE_IMAGE=nvidia/cuda:12.1.0-base-ubuntu22.04
 # Use the Python base image
-FROM nvidia/cuda:12.1.1-base-ubuntu22.04 AS base
+FROM $BASE_IMAGE AS base
+
+# Define a build-time argument with a default value
+ARG DTYPE=auto
 
 ENV PYTHONUNBUFFERED=1 \
     \
@@ -14,21 +18,29 @@ ENV PYTHONUNBUFFERED=1 \
     # do not ask any interactive question
     POETRY_NO_INTERACTION=1 \
     EXTRAS="all" \
-    PYTHON="python3.11"
-RUN apt-get update && apt-get install build-essential python3-dev $PYTHON-venv $PYTHON curl -y 
+    PYTHON="python3.11" \
+    DTYPE=$DTYPE
+
+RUN apt-get update && apt-get install build-essential python3-dev $PYTHON-venv $PYTHON curl -y
+
+# Set the working directory for the app
 WORKDIR /app
 
 FROM base as builder
-# Set the working directory for the app
+
 # Define the version of Poetry to install (default is 1.7.1)
 # Define the directory to install Poetry to (default is /opt/poetry)
 ARG POETRY_VERSION=1.7.1
 ARG POETRY_HOME=/opt/poetry
+
 # Create a Python virtual environment for Poetry and install it
 RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=$POETRY_HOME POETRY_VERSION=$POETRY_VERSION $PYTHON -
+
 ENV PATH=$POETRY_HOME/bin:$PATH
+
 # Test if Poetry is installed in the expected path
 RUN echo "Poetry version:" && poetry --version
+
 # Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
 COPY poetry.lock poetry.toml pyproject.toml README.md /app/
 # Install dependencies only
@@ -39,75 +51,14 @@ RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}"  --without li
 # remove cache
 RUN poetry cache clear pypi --all
 
-FROM builder as testing
-# install lint and test dependencies
-RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}"
-# lint 
-RUN poetry run ruff .
-RUN poetry run black --check .
-RUN poetry run mypy .
-# pytest
-COPY tests tests
-# run end to end tests because of duration of build in github ci.
-# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
-# poetry run python -m pytest tests/end_to_end -x
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
-poetry run python -m pytest tests/end_to_end -x ; \
-else \
-poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
-fi
-RUN echo "all tests passed" > "test_results.txt"
-
+# Use a multi-stage build -> production version
+FROM base AS production
 
-# Use a multi-stage build -> production version, with download
-FROM base AS tested-builder
 COPY --from=builder /app /app
-# force testing stage to run
-COPY --from=testing /app/test_results.txt /app/test_results.txt
-ENV HF_HOME=/app/.cache/huggingface
-ENV PATH=/app/.venv/bin:$PATH
-# do nothing
-RUN echo "copied all files"
-
+COPY /models /models
+COPY environment_config.sh ./environment_config.sh
 
-# Export with tensorrt, not recommended.
-# docker buildx build --target=production-tensorrt -f Dockerfile .
-FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS production-tensorrt
-ENV PYTHONUNBUFFERED=1 \
-    PIP_NO_CACHE_DIR=off \
-    PYTHON="python3.11"
-RUN apt-get update && apt-get install python3-dev python3-pip $PYTHON build-essential curl -y 
-COPY --from=builder /app /app
-# force testing stage to run
-COPY --from=testing /app/test_results.txt /app/test_results.txt
-ENV HF_HOME=/app/.cache/torch
+ENV SENTENCE_TRANSFORMERS_HOME=/app/.cache/torch
 ENV PATH=/app/.venv/bin:$PATH
-RUN pip install --no-cache-dir "onnxruntime-gpu==1.17.0" "tensorrt==8.6.*"
-ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
-ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH}
-ENTRYPOINT ["infinity_emb"]
-
-
-# Use a multi-stage build -> production version, with download
-# docker buildx build --target=production-with-download \
-# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
-FROM tested-builder AS production-with-download
-# collect model name and engine from build args
-ARG MODEL_NAME
-RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
-ARG ENGINE
-RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
-ARG EXTRA_PACKAGES
-RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
-# will exit with 3 if model is downloaded # TODO: better exit code
-RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
-ENTRYPOINT ["infinity_emb"]
 
-# flash attention fa2
-FROM tested-builder AS production-with-fa2
-RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
-ENTRYPOINT ["infinity_emb"]
-
-# Use a multi-stage build -> production version
-FROM tested-builder AS production
-ENTRYPOINT ["infinity_emb"]
+ENTRYPOINT ["/bin/bash" , "-c", "source ./environment_config.sh $DTYPE"]
diff --git a/libs/infinity_emb/environment_config.sh b/libs/infinity_emb/environment_config.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+#export OTEL_TRACES_SAMPLER=parentbased_always_off
+export OTEL_RESOURCE_ATTRIBUTES=service.name=${SHERLOCK_SERVICE_NAME},host.name=${POD_NAME},host.ip=${POD_IP}
+export OTEL_EXPORTER_OTLP_ENDPOINT=http://${HOST_IP}:5680
+infinity_emb v2 --model-id /models --dtype ${DTYPE}
diff --git a/libs/infinity_emb/infinity_emb/infinity_server.py b/libs/infinity_emb/infinity_emb/infinity_server.py
@@ -25,7 +25,11 @@
     RerankInput,
     ReRankResult,
 )
-from infinity_emb.log_handler import UVICORN_LOG_LEVELS, logger
+from infinity_emb.log_handler import (
+    UVICORN_LOG_LEVELS,
+    logger,
+    StructuredLoggingMiddleware,
+)
 from infinity_emb.primitives import (
     Device,
     Dtype,
@@ -129,6 +133,7 @@ async def validate_token(
 
     instrumentator = Instrumentator().instrument(app)
     app.add_exception_handler(errors.OpenAIException, errors.openai_exception_handler)
+    app.add_middleware(StructuredLoggingMiddleware)
 
     @app.get("/health", operation_id="health", response_class=responses.ORJSONResponse)
     async def _health() -> dict[str, float]:
@@ -220,13 +225,13 @@ async def _embeddings(data: OpenAIEmbeddingInput):
             if isinstance(data.input, str):
                 data.input = [data.input]
 
-            logger.debug("[📝] Received request with %s inputs ", len(data.input))
+            logger.info("[📝] Received request with %s inputs ", len(data.input))
             start = time.perf_counter()
 
             embedding, usage = await engine.embed(sentences=data.input)
 
             duration = (time.perf_counter() - start) * 1000
-            logger.debug("[✅] Done in %s ms", duration)
+            logger.info("[✅] Done in %s ms", duration)
 
             return OpenAIEmbeddingResult.to_embeddings_response(
                 embeddings=embedding,