From e8978ff53e4d261ce6f33229fff6033bcb14feed Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Fri, 27 Dec 2024 16:10:59 -0800 Subject: [PATCH] feat: adds docker development environment --- Makefile | 8 ++++ dev/Dockerfile-brozzler-worker | 31 +++++++++++++++ dev/Dockerfile-warcprox | 26 ++++++++++++ dev/README.md | 66 +++++++++++++++++++++++++++++++ dev/brozzler-worker-entrypoint.sh | 19 +++++++++ dev/docker-compose.yml | 51 ++++++++++++++++++++++++ dev/run-brozzler-worker.sh | 8 ++++ 7 files changed, 209 insertions(+) create mode 100644 dev/Dockerfile-brozzler-worker create mode 100644 dev/Dockerfile-warcprox create mode 100644 dev/README.md create mode 100755 dev/brozzler-worker-entrypoint.sh create mode 100644 dev/docker-compose.yml create mode 100755 dev/run-brozzler-worker.sh diff --git a/Makefile b/Makefile index f99dcc97..45c4fccd 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,11 @@ +venv: venv/touchfile + +venv/touchfile: setup.py + test -d venv || python3 -m venv venv + venv/bin/pip install --upgrade pip + venv/bin/pip install -Ue .[yt_dlp] + touch venv/touchfile + .PHONY: format format: venv/bin/black -t py35 -t py36 -t py37 -t py38 -t py39 -t py310 -t py311 -t py312 . diff --git a/dev/Dockerfile-brozzler-worker b/dev/Dockerfile-brozzler-worker new file mode 100644 index 00000000..f4f2f5a8 --- /dev/null +++ b/dev/Dockerfile-brozzler-worker @@ -0,0 +1,31 @@ +FROM ubuntu:latest + +ARG RETHINKDB_SERVERS_URL + +WORKDIR /app + +RUN apt-get update && \ + apt-get install -y python3 python3-pip python3-venv wget && \ + apt-get install -y libjpeg-turbo8-dev zlib1g-dev gcc python3-dev python3-dbg ffmpeg xfonts-base fonts-arphic-bkai00mp fonts-arphic-bsmi00lp fonts-arphic-gbsn00lp fonts-arphic-gkai00mp fonts-arphic-ukai fonts-farsiweb fonts-nafees fonts-sil-abyssinica fonts-sil-ezra fonts-sil-padauk fonts-unfonts-extra fonts-unfonts-core fonts-indic fonts-thai-tlwg fonts-lklug-sinhala fonts-liberation libnspr4 libnss3 xdg-utils + +RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb && \ + dpkg -i google-chrome-stable_current_amd64.deb + +ENV SERVICE_NAME="brozzler-worker" + +RUN useradd -ms /bin/bash $SERVICE_NAME && \ + mkdir -p /var/log/$SERVICE_NAME && \ + chown $SERVICE_NAME:$SERVICE_NAME /var/log/$SERVICE_NAME + +WORKDIR /brozzler +RUN python3 -m venv /opt/brozzler-worker-venv +# Enable venv +ENV PATH="/opt/brozzler-worker-venv/bin:$PATH" + +RUN pip install --upgrade pip && \ + pip install setuptools + +COPY ./dev/brozzler-worker-entrypoint.sh /entrypoint.sh +COPY ./dev/run-brozzler-worker.sh /run-brozzler-worker.sh + +ENTRYPOINT ["bash", "/entrypoint.sh"] \ No newline at end of file diff --git a/dev/Dockerfile-warcprox b/dev/Dockerfile-warcprox new file mode 100644 index 00000000..1bc4b338 --- /dev/null +++ b/dev/Dockerfile-warcprox @@ -0,0 +1,26 @@ +FROM ubuntu:latest + +RUN apt-get update && \ + apt-get install -y python3 python3-pip python3-venv && \ + python3 -m venv /opt/venv + +# Enable venv +ENV PATH="/opt/venv/bin:$PATH" + +RUN pip install --upgrade pip && \ + pip install setuptools && \ + pip install warcprox>=2.4.31 + +EXPOSE 8888 + +VOLUME /warcs /logs +WORKDIR /app +copy . /app + +ENTRYPOINT warcprox --address 0.0.0.0 --port 8888 --dir /warcs --crawl-log-dir /logs \ + --rethinkdb-services-url ${RETHINKDB_SERVICES_URL} \ + --rethinkdb-stats-url=${RETHINKDB_STATS_URL} \ + --prefix="brozzler" \ + --rollover-idle-time=86400 \ + --base32 --gzip --verbose + diff --git a/dev/README.md b/dev/README.md new file mode 100644 index 00000000..7a7c2bfe --- /dev/null +++ b/dev/README.md @@ -0,0 +1,66 @@ +# Brozzler Docker Development + +## Development +### Native local development +#### Prerequisites + +- Python 3.8 +- docker, docker-compose + +##### `Python` +**Install `pyenv`** + +`pyenv` is a version manager for Python, enabling the user to install any +version of Python, managed separately from the system's managed Python. + +- Linux: https://github.com/pyenv/pyenv/?tab=readme-ov-file#basic-github-checkout +- Mac OS: https://github.com/pyenv/pyenv/?tab=readme-ov-file#homebrew-in-macos + - Note: strongly consider installing from `HEAD`, i.e.: `brew install pyenv --head` +- Lastly, [configure your shell environment to use + `pyenv`](https://github.com/pyenv/pyenv/?tab=readme-ov-file#set-up-your-shell-environment-for-pyenv) + +**Note**: `pyenv-virtualenv` should not be used for vault development. The vault +`Makefile` expects to manage its own virtualenv. + +**Install Python** +```sh +# Linux: +pyenv install 3.12.3 + +# Apple Silicon +arch -x86_64 pyenv install 3.8.10 +``` +##### Mac OS considerations + +[Docker for Desktop Mac](https://docs.docker.com/desktop/install/mac-install/) +or [Colima](https://github.com/abiosoft/colima). +- Recommended to enable [Use Rosetta for x86_64/amd64 emulation on Apple Silicon](https://www.docker.com/blog/docker-desktop-4-25/) + +#### `Brozzler` development environment +```shell +cd path/to/project + +# ensure virtual environment exists: +make venv +source venv/bin/activate + +# start service dependencies +docker-compose --file dev/docker-compose.yml up -d + +# Queue a new job +brozzler-new-job --rethinkdb-servers localhost --rethinkdb-db brozzler_dev path/to/your_job.yml +``` +Notes: +- See [job-conf.rst](../job-conf.rst) for creating your brozzler job configuration +- Contents of logs and warcprox volumes can be viewed in docker desktop + + +##### Attach to brozzler worker container +```shell +# deactivate brozzler-worker +docker-compose --file dev/docker-compose.yml down brozzler-worker + +# Run and attach to brozzler worker +docker-compose --file dev/docker-compose.yml run brozzler-worker +``` + diff --git a/dev/brozzler-worker-entrypoint.sh b/dev/brozzler-worker-entrypoint.sh new file mode 100755 index 00000000..28b8593a --- /dev/null +++ b/dev/brozzler-worker-entrypoint.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -e + +venv="/opt/brozzler-worker-venv" + +if [ -f "/brozzler/setup.py" ]; then + echo "#### Installing /brozzler in $venv" + $venv/bin/pip install --disable-pip-version-check -e /brozzler[yt_dlp] --quiet + $venv/bin/pip install --disable-pip-version-check rethinkdb==2.4.9 doublethink==0.4.9 +fi + +echo "Running brozzler-worker" + +su brozzler-worker /run-brozzler-worker.sh + +echo "Run worker like: /run-brozzler-worker.sh" +su brozzler-worker + +/bin/bash diff --git a/dev/docker-compose.yml b/dev/docker-compose.yml new file mode 100644 index 00000000..0c6f6518 --- /dev/null +++ b/dev/docker-compose.yml @@ -0,0 +1,51 @@ +version: "3.5" + +services: + rethinkdb: + image: rethinkdb:latest + command: rethinkdb --bind all + ports: + - "8080:8080" # WebUI + - "28015:28015" + expose: + - "8080" + - "28015" + volumes: + - rethinkdb_volume:/data + warcprox: + image: warcprox + build: + context: . + dockerfile: Dockerfile-warcprox + environment: + - RETHINKDB_SERVICES_URL=rethinkdb://rethinkdb:28015/brozzler_dev/services + - RETHINKDB_STATS_URL=rethinkdb://rethinkdb:28015/brozzler_dev/stats + ports: + - "8888:8888" + expose: + - "8000" + volumes: + - warcprox_warcs:/warcs + - warcprox_logs:/logs + depends_on: + - rethinkdb + brozzler-worker: + image: brozzler-worker + platform: linux/amd64 + build: + context: ../ + dockerfile: ./dev/Dockerfile-brozzler-worker + tty: true + environment: + - RETHINKDB_SERVERS_URL=rethinkdb:28015 + volumes: + - ../:/brozzler + - brozzler_venv:/opt/brozzler-worker-venv + depends_on: + - warcprox + - rethinkdb +volumes: + rethinkdb_volume: {} + warcprox_warcs: {} + warcprox_logs: {} + brozzler_venv: {} diff --git a/dev/run-brozzler-worker.sh b/dev/run-brozzler-worker.sh new file mode 100755 index 00000000..83f49e06 --- /dev/null +++ b/dev/run-brozzler-worker.sh @@ -0,0 +1,8 @@ +#!/bin/bash +source /opt/brozzler-worker-venv/bin/activate + +brozzler-worker --verbose \ + --rethinkdb-servers=rethinkdb \ + --rethinkdb-db=brozzler_dev \ + --max-browsers=1 \ + --warcprox-auto