From ff7f1b027eb553b14c6e1cfb259c15bb499b1a6c Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 5 Dec 2024 10:54:33 +0100 Subject: [PATCH 1/7] Enable build and push of Dockerfile. --- .github/workflows/package.yaml | 55 ++++++++++++++++++++++++++++++++++ Dockerfile | 26 ++++++++++++++++ demo/download_models.py | 9 ++++++ demo/run.sh | 26 ++++++++++++++++ 4 files changed, 116 insertions(+) create mode 100644 .github/workflows/package.yaml create mode 100644 Dockerfile create mode 100644 demo/download_models.py create mode 100644 demo/run.sh diff --git a/.github/workflows/package.yaml b/.github/workflows/package.yaml new file mode 100644 index 0000000..36011c5 --- /dev/null +++ b/.github/workflows/package.yaml @@ -0,0 +1,55 @@ +name: Create and publish a Docker image + +on: + push: + branches: + - 'main' + tags: + - 'v*' + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + attestations: write + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + id: push + uses: docker/build-push-action@v6 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + - name: Generate artifact attestation + uses: actions/attest-build-provenance@v1 + with: + subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..180f8fd --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.10-slim + +RUN groupadd --gid 1000 appuser \ + && useradd --uid 1000 --gid 1000 -ms /bin/bash appuser + +RUN pip3 install --no-cache-dir --upgrade \ + pip \ + virtualenv + +RUN apt-get update && apt-get install -y \ + build-essential \ + software-properties-common \ + git + +USER appuser +ENV VIRTUAL_ENV=/home/appuser/venv + +COPY . /home/appuser/document-to-podcast +WORKDIR /home/appuser/document-to-podcast + +RUN virtualenv ${VIRTUAL_ENV} +RUN . ${VIRTUAL_ENV}/bin/activate && pip install -e /home/appuser/document-to-podcast +RUN . ${VIRTUAL_ENV}/bin/activate && python demo/download_models.py + +EXPOSE 8501 +ENTRYPOINT ["./demo/run.sh"] diff --git a/demo/download_models.py b/demo/download_models.py new file mode 100644 index 0000000..9c0ec51 --- /dev/null +++ b/demo/download_models.py @@ -0,0 +1,9 @@ +from document_to_podcast.inference.model_loaders import ( + load_llama_cpp_model, + load_parler_tts_model_and_tokenizer, +) + +load_llama_cpp_model( + model_id="allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf" +) +load_parler_tts_model_and_tokenizer("parler-tts/parler-tts-mini-v1") diff --git a/demo/run.sh b/demo/run.sh new file mode 100644 index 0000000..90eda18 --- /dev/null +++ b/demo/run.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +APP_PID= +stopRunningProcess() { + # Based on https://linuxconfig.org/how-to-propagate-a-signal-to-child-processes-from-a-bash-script + if test ! "${APP_PID}" = '' && ps -p ${APP_PID} > /dev/null ; then + > /proc/1/fd/1 echo "Stopping ${COMMAND_PATH} which is running with process ID ${APP_PID}" + + kill -TERM ${APP_PID} + > /proc/1/fd/1 echo "Waiting for ${COMMAND_PATH} to process SIGTERM signal" + + wait ${APP_PID} + > /proc/1/fd/1 echo "All processes have stopped running" + else + > /proc/1/fd/1 echo "${COMMAND_PATH} was not started when the signal was sent or it has already been stopped" + fi +} + +trap stopRunningProcess EXIT TERM + +source ${VIRTUAL_ENV}/bin/activate + +streamlit run ${HOME}/document-to-podcast/demo/app.py & +APP_ID=${!} + +wait ${APP_ID} From 8a70ad0d7f7b879d89fb9c4db1ab4307cd23ae73 Mon Sep 17 00:00:00 2001 From: David de la Iglesia Castro Date: Thu, 5 Dec 2024 10:45:57 +0000 Subject: [PATCH 2/7] Install CPU torch. Make run.sh executable --- Dockerfile | 1 + demo/run.sh | 0 2 files changed, 1 insertion(+) mode change 100644 => 100755 demo/run.sh diff --git a/Dockerfile b/Dockerfile index 180f8fd..251cab9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,6 +19,7 @@ COPY . /home/appuser/document-to-podcast WORKDIR /home/appuser/document-to-podcast RUN virtualenv ${VIRTUAL_ENV} +RUN . ${VIRTUAL_ENV}/bin/activate && pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu RUN . ${VIRTUAL_ENV}/bin/activate && pip install -e /home/appuser/document-to-podcast RUN . ${VIRTUAL_ENV}/bin/activate && python demo/download_models.py diff --git a/demo/run.sh b/demo/run.sh old mode 100644 new mode 100755 From d8f0f4f610c8d4092ceff483954f108a0d076feb Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 10 Dec 2024 12:59:51 +0100 Subject: [PATCH 3/7] Add comments --- demo/download_models.py | 4 ++++ demo/run.sh | 2 ++ 2 files changed, 6 insertions(+) diff --git a/demo/download_models.py b/demo/download_models.py index 9c0ec51..59e7267 100644 --- a/demo/download_models.py +++ b/demo/download_models.py @@ -1,3 +1,7 @@ +""" +Used when building the Dockerfile to download the models that are used in the hosted demo +""" + from document_to_podcast.inference.model_loaders import ( load_llama_cpp_model, load_parler_tts_model_and_tokenizer, diff --git a/demo/run.sh b/demo/run.sh index 90eda18..bada048 100755 --- a/demo/run.sh +++ b/demo/run.sh @@ -1,5 +1,7 @@ #!/bin/bash +# Adapted from https://docs.streamlit.io/deploy/tutorials/kubernetes + APP_PID= stopRunningProcess() { # Based on https://linuxconfig.org/how-to-propagate-a-signal-to-child-processes-from-a-bash-script From 2c995638ceb8e8ce11302cc41e46a31b1777b254 Mon Sep 17 00:00:00 2001 From: daavoo Date: Tue, 10 Dec 2024 13:08:10 +0100 Subject: [PATCH 4/7] Don't install on editable mode --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 251cab9..0a757e9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ WORKDIR /home/appuser/document-to-podcast RUN virtualenv ${VIRTUAL_ENV} RUN . ${VIRTUAL_ENV}/bin/activate && pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu -RUN . ${VIRTUAL_ENV}/bin/activate && pip install -e /home/appuser/document-to-podcast +RUN . ${VIRTUAL_ENV}/bin/activate && pip install /home/appuser/document-to-podcast RUN . ${VIRTUAL_ENV}/bin/activate && python demo/download_models.py EXPOSE 8501 From 04a3f5afbca5ac9c474e99addedbd99986cbfcfa Mon Sep 17 00:00:00 2001 From: David de la Iglesia Castro Date: Wed, 11 Dec 2024 09:46:19 +0000 Subject: [PATCH 5/7] Set appuser after installation --- Dockerfile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0a757e9..5e3a356 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,5 @@ FROM python:3.10-slim -RUN groupadd --gid 1000 appuser \ - && useradd --uid 1000 --gid 1000 -ms /bin/bash appuser - RUN pip3 install --no-cache-dir --upgrade \ pip \ virtualenv @@ -12,8 +9,7 @@ RUN apt-get update && apt-get install -y \ software-properties-common \ git -USER appuser -ENV VIRTUAL_ENV=/home/appuser/venv + ENV VIRTUAL_ENV=/home/venv COPY . /home/appuser/document-to-podcast WORKDIR /home/appuser/document-to-podcast @@ -23,5 +19,10 @@ RUN . ${VIRTUAL_ENV}/bin/activate && pip install torch torchaudio --index-url ht RUN . ${VIRTUAL_ENV}/bin/activate && pip install /home/appuser/document-to-podcast RUN . ${VIRTUAL_ENV}/bin/activate && python demo/download_models.py +RUN groupadd --gid 1000 appuser \ + && useradd --uid 1000 --gid 1000 -ms /bin/bash appuser + +USER appuser + EXPOSE 8501 ENTRYPOINT ["./demo/run.sh"] From 52de68287e2df1e982dd73b3fbabe15085e561fe Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 12 Dec 2024 10:52:14 +0100 Subject: [PATCH 6/7] Drop venv --- Dockerfile | 13 ++++--------- demo/run.sh | 2 -- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5e3a356..5ed0d18 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,23 +1,18 @@ FROM python:3.10-slim -RUN pip3 install --no-cache-dir --upgrade \ - pip \ - virtualenv - +RUN pip3 install --no-cache-dir --upgrade pip RUN apt-get update && apt-get install -y \ build-essential \ software-properties-common \ git - ENV VIRTUAL_ENV=/home/venv COPY . /home/appuser/document-to-podcast WORKDIR /home/appuser/document-to-podcast -RUN virtualenv ${VIRTUAL_ENV} -RUN . ${VIRTUAL_ENV}/bin/activate && pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu -RUN . ${VIRTUAL_ENV}/bin/activate && pip install /home/appuser/document-to-podcast -RUN . ${VIRTUAL_ENV}/bin/activate && python demo/download_models.py +RUN pip3 install torch torchaudio --index-url https://download.pytorch.org/whl/cpu +RUN pip3 install /home/appuser/document-to-podcast +RUN python3 demo/download_models.py RUN groupadd --gid 1000 appuser \ && useradd --uid 1000 --gid 1000 -ms /bin/bash appuser diff --git a/demo/run.sh b/demo/run.sh index bada048..bad3e42 100755 --- a/demo/run.sh +++ b/demo/run.sh @@ -20,8 +20,6 @@ stopRunningProcess() { trap stopRunningProcess EXIT TERM -source ${VIRTUAL_ENV}/bin/activate - streamlit run ${HOME}/document-to-podcast/demo/app.py & APP_ID=${!} From 6cdb9337f949eb70a75fb6333d10b869501512fa Mon Sep 17 00:00:00 2001 From: daavoo Date: Thu, 12 Dec 2024 10:57:21 +0100 Subject: [PATCH 7/7] Updated to push to DockerHub --- .github/workflows/package.yaml | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/.github/workflows/package.yaml b/.github/workflows/package.yaml index 36011c5..fa61c96 100644 --- a/.github/workflows/package.yaml +++ b/.github/workflows/package.yaml @@ -6,10 +6,7 @@ on: - 'main' tags: - 'v*' - -env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} + workflow_dispatch: jobs: build-and-push-image: @@ -25,12 +22,11 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 - - name: Log in to the Container registry + - name: Log in to DockerHub uses: docker/login-action@v3 with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Extract metadata (tags, labels) for Docker id: meta @@ -46,10 +42,3 @@ jobs: push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - - - name: Generate artifact attestation - uses: actions/attest-build-provenance@v1 - with: - subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}} - subject-digest: ${{ steps.push.outputs.digest }} - push-to-registry: true