Skip to content

Commit

Permalink
feat: adds docker development environment
Browse files Browse the repository at this point in the history
  • Loading branch information
adam-miller committed Dec 28, 2024
1 parent eb922f5 commit e8978ff
Show file tree
Hide file tree
Showing 7 changed files with 209 additions and 0 deletions.
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
venv: venv/touchfile

venv/touchfile: setup.py
test -d venv || python3 -m venv venv
venv/bin/pip install --upgrade pip
venv/bin/pip install -Ue .[yt_dlp]
touch venv/touchfile

.PHONY: format
format:
venv/bin/black -t py35 -t py36 -t py37 -t py38 -t py39 -t py310 -t py311 -t py312 .
Expand Down
31 changes: 31 additions & 0 deletions dev/Dockerfile-brozzler-worker
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
FROM ubuntu:latest

ARG RETHINKDB_SERVERS_URL

WORKDIR /app

RUN apt-get update && \
apt-get install -y python3 python3-pip python3-venv wget && \
apt-get install -y libjpeg-turbo8-dev zlib1g-dev gcc python3-dev python3-dbg ffmpeg xfonts-base fonts-arphic-bkai00mp fonts-arphic-bsmi00lp fonts-arphic-gbsn00lp fonts-arphic-gkai00mp fonts-arphic-ukai fonts-farsiweb fonts-nafees fonts-sil-abyssinica fonts-sil-ezra fonts-sil-padauk fonts-unfonts-extra fonts-unfonts-core fonts-indic fonts-thai-tlwg fonts-lklug-sinhala fonts-liberation libnspr4 libnss3 xdg-utils

RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb && \
dpkg -i google-chrome-stable_current_amd64.deb

ENV SERVICE_NAME="brozzler-worker"

RUN useradd -ms /bin/bash $SERVICE_NAME && \
mkdir -p /var/log/$SERVICE_NAME && \
chown $SERVICE_NAME:$SERVICE_NAME /var/log/$SERVICE_NAME

WORKDIR /brozzler
RUN python3 -m venv /opt/brozzler-worker-venv
# Enable venv
ENV PATH="/opt/brozzler-worker-venv/bin:$PATH"

RUN pip install --upgrade pip && \
pip install setuptools

COPY ./dev/brozzler-worker-entrypoint.sh /entrypoint.sh
COPY ./dev/run-brozzler-worker.sh /run-brozzler-worker.sh

ENTRYPOINT ["bash", "/entrypoint.sh"]
26 changes: 26 additions & 0 deletions dev/Dockerfile-warcprox
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM ubuntu:latest

RUN apt-get update && \
apt-get install -y python3 python3-pip python3-venv && \
python3 -m venv /opt/venv

# Enable venv
ENV PATH="/opt/venv/bin:$PATH"

RUN pip install --upgrade pip && \
pip install setuptools && \
pip install warcprox>=2.4.31

EXPOSE 8888

VOLUME /warcs /logs
WORKDIR /app
copy . /app

ENTRYPOINT warcprox --address 0.0.0.0 --port 8888 --dir /warcs --crawl-log-dir /logs \
--rethinkdb-services-url ${RETHINKDB_SERVICES_URL} \
--rethinkdb-stats-url=${RETHINKDB_STATS_URL} \
--prefix="brozzler" \
--rollover-idle-time=86400 \
--base32 --gzip --verbose

66 changes: 66 additions & 0 deletions dev/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Brozzler Docker Development

## Development
### Native local development
#### Prerequisites

- Python 3.8
- docker, docker-compose

##### `Python`
**Install `pyenv`**

`pyenv` is a version manager for Python, enabling the user to install any
version of Python, managed separately from the system's managed Python.

- Linux: https://github.com/pyenv/pyenv/?tab=readme-ov-file#basic-github-checkout
- Mac OS: https://github.com/pyenv/pyenv/?tab=readme-ov-file#homebrew-in-macos
- Note: strongly consider installing from `HEAD`, i.e.: `brew install pyenv --head`
- Lastly, [configure your shell environment to use
`pyenv`](https://github.com/pyenv/pyenv/?tab=readme-ov-file#set-up-your-shell-environment-for-pyenv)

**Note**: `pyenv-virtualenv` should not be used for vault development. The vault
`Makefile` expects to manage its own virtualenv.

**Install Python**
```sh
# Linux:
pyenv install 3.12.3

# Apple Silicon
arch -x86_64 pyenv install 3.8.10
```
##### Mac OS considerations

[Docker for Desktop Mac](https://docs.docker.com/desktop/install/mac-install/)
or [Colima](https://github.com/abiosoft/colima).
- Recommended to enable [Use Rosetta for x86_64/amd64 emulation on Apple Silicon](https://www.docker.com/blog/docker-desktop-4-25/)

#### `Brozzler` development environment
```shell
cd path/to/project

# ensure virtual environment exists:
make venv
source venv/bin/activate

# start service dependencies
docker-compose --file dev/docker-compose.yml up -d

# Queue a new job
brozzler-new-job --rethinkdb-servers localhost --rethinkdb-db brozzler_dev path/to/your_job.yml
```
Notes:
- See [job-conf.rst](../job-conf.rst) for creating your brozzler job configuration
- Contents of logs and warcprox volumes can be viewed in docker desktop


##### Attach to brozzler worker container
```shell
# deactivate brozzler-worker
docker-compose --file dev/docker-compose.yml down brozzler-worker

# Run and attach to brozzler worker
docker-compose --file dev/docker-compose.yml run brozzler-worker
```

19 changes: 19 additions & 0 deletions dev/brozzler-worker-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash
set -e

venv="/opt/brozzler-worker-venv"

if [ -f "/brozzler/setup.py" ]; then
echo "#### Installing /brozzler in $venv"
$venv/bin/pip install --disable-pip-version-check -e /brozzler[yt_dlp] --quiet
$venv/bin/pip install --disable-pip-version-check rethinkdb==2.4.9 doublethink==0.4.9
fi

echo "Running brozzler-worker"

su brozzler-worker /run-brozzler-worker.sh

echo "Run worker like: /run-brozzler-worker.sh"
su brozzler-worker

/bin/bash
51 changes: 51 additions & 0 deletions dev/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
version: "3.5"

services:
rethinkdb:
image: rethinkdb:latest
command: rethinkdb --bind all
ports:
- "8080:8080" # WebUI
- "28015:28015"
expose:
- "8080"
- "28015"
volumes:
- rethinkdb_volume:/data
warcprox:
image: warcprox
build:
context: .
dockerfile: Dockerfile-warcprox
environment:
- RETHINKDB_SERVICES_URL=rethinkdb://rethinkdb:28015/brozzler_dev/services
- RETHINKDB_STATS_URL=rethinkdb://rethinkdb:28015/brozzler_dev/stats
ports:
- "8888:8888"
expose:
- "8000"
volumes:
- warcprox_warcs:/warcs
- warcprox_logs:/logs
depends_on:
- rethinkdb
brozzler-worker:
image: brozzler-worker
platform: linux/amd64
build:
context: ../
dockerfile: ./dev/Dockerfile-brozzler-worker
tty: true
environment:
- RETHINKDB_SERVERS_URL=rethinkdb:28015
volumes:
- ../:/brozzler
- brozzler_venv:/opt/brozzler-worker-venv
depends_on:
- warcprox
- rethinkdb
volumes:
rethinkdb_volume: {}
warcprox_warcs: {}
warcprox_logs: {}
brozzler_venv: {}
8 changes: 8 additions & 0 deletions dev/run-brozzler-worker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
source /opt/brozzler-worker-venv/bin/activate

brozzler-worker --verbose \
--rethinkdb-servers=rethinkdb \
--rethinkdb-db=brozzler_dev \
--max-browsers=1 \
--warcprox-auto

0 comments on commit e8978ff

Please sign in to comment.