-
Notifications
You must be signed in to change notification settings - Fork 99
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: adds docker development environment
- Loading branch information
1 parent
eb922f5
commit e8978ff
Showing
7 changed files
with
209 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
FROM ubuntu:latest | ||
|
||
ARG RETHINKDB_SERVERS_URL | ||
|
||
WORKDIR /app | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y python3 python3-pip python3-venv wget && \ | ||
apt-get install -y libjpeg-turbo8-dev zlib1g-dev gcc python3-dev python3-dbg ffmpeg xfonts-base fonts-arphic-bkai00mp fonts-arphic-bsmi00lp fonts-arphic-gbsn00lp fonts-arphic-gkai00mp fonts-arphic-ukai fonts-farsiweb fonts-nafees fonts-sil-abyssinica fonts-sil-ezra fonts-sil-padauk fonts-unfonts-extra fonts-unfonts-core fonts-indic fonts-thai-tlwg fonts-lklug-sinhala fonts-liberation libnspr4 libnss3 xdg-utils | ||
|
||
RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb && \ | ||
dpkg -i google-chrome-stable_current_amd64.deb | ||
|
||
ENV SERVICE_NAME="brozzler-worker" | ||
|
||
RUN useradd -ms /bin/bash $SERVICE_NAME && \ | ||
mkdir -p /var/log/$SERVICE_NAME && \ | ||
chown $SERVICE_NAME:$SERVICE_NAME /var/log/$SERVICE_NAME | ||
|
||
WORKDIR /brozzler | ||
RUN python3 -m venv /opt/brozzler-worker-venv | ||
# Enable venv | ||
ENV PATH="/opt/brozzler-worker-venv/bin:$PATH" | ||
|
||
RUN pip install --upgrade pip && \ | ||
pip install setuptools | ||
|
||
COPY ./dev/brozzler-worker-entrypoint.sh /entrypoint.sh | ||
COPY ./dev/run-brozzler-worker.sh /run-brozzler-worker.sh | ||
|
||
ENTRYPOINT ["bash", "/entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
FROM ubuntu:latest | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y python3 python3-pip python3-venv && \ | ||
python3 -m venv /opt/venv | ||
|
||
# Enable venv | ||
ENV PATH="/opt/venv/bin:$PATH" | ||
|
||
RUN pip install --upgrade pip && \ | ||
pip install setuptools && \ | ||
pip install warcprox>=2.4.31 | ||
|
||
EXPOSE 8888 | ||
|
||
VOLUME /warcs /logs | ||
WORKDIR /app | ||
copy . /app | ||
|
||
ENTRYPOINT warcprox --address 0.0.0.0 --port 8888 --dir /warcs --crawl-log-dir /logs \ | ||
--rethinkdb-services-url ${RETHINKDB_SERVICES_URL} \ | ||
--rethinkdb-stats-url=${RETHINKDB_STATS_URL} \ | ||
--prefix="brozzler" \ | ||
--rollover-idle-time=86400 \ | ||
--base32 --gzip --verbose | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# Brozzler Docker Development | ||
|
||
## Development | ||
### Native local development | ||
#### Prerequisites | ||
|
||
- Python 3.8 | ||
- docker, docker-compose | ||
|
||
##### `Python` | ||
**Install `pyenv`** | ||
|
||
`pyenv` is a version manager for Python, enabling the user to install any | ||
version of Python, managed separately from the system's managed Python. | ||
|
||
- Linux: https://github.com/pyenv/pyenv/?tab=readme-ov-file#basic-github-checkout | ||
- Mac OS: https://github.com/pyenv/pyenv/?tab=readme-ov-file#homebrew-in-macos | ||
- Note: strongly consider installing from `HEAD`, i.e.: `brew install pyenv --head` | ||
- Lastly, [configure your shell environment to use | ||
`pyenv`](https://github.com/pyenv/pyenv/?tab=readme-ov-file#set-up-your-shell-environment-for-pyenv) | ||
|
||
**Note**: `pyenv-virtualenv` should not be used for vault development. The vault | ||
`Makefile` expects to manage its own virtualenv. | ||
|
||
**Install Python** | ||
```sh | ||
# Linux: | ||
pyenv install 3.12.3 | ||
|
||
# Apple Silicon | ||
arch -x86_64 pyenv install 3.8.10 | ||
``` | ||
##### Mac OS considerations | ||
|
||
[Docker for Desktop Mac](https://docs.docker.com/desktop/install/mac-install/) | ||
or [Colima](https://github.com/abiosoft/colima). | ||
- Recommended to enable [Use Rosetta for x86_64/amd64 emulation on Apple Silicon](https://www.docker.com/blog/docker-desktop-4-25/) | ||
|
||
#### `Brozzler` development environment | ||
```shell | ||
cd path/to/project | ||
|
||
# ensure virtual environment exists: | ||
make venv | ||
source venv/bin/activate | ||
|
||
# start service dependencies | ||
docker-compose --file dev/docker-compose.yml up -d | ||
|
||
# Queue a new job | ||
brozzler-new-job --rethinkdb-servers localhost --rethinkdb-db brozzler_dev path/to/your_job.yml | ||
``` | ||
Notes: | ||
- See [job-conf.rst](../job-conf.rst) for creating your brozzler job configuration | ||
- Contents of logs and warcprox volumes can be viewed in docker desktop | ||
|
||
|
||
##### Attach to brozzler worker container | ||
```shell | ||
# deactivate brozzler-worker | ||
docker-compose --file dev/docker-compose.yml down brozzler-worker | ||
|
||
# Run and attach to brozzler worker | ||
docker-compose --file dev/docker-compose.yml run brozzler-worker | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/bash | ||
set -e | ||
|
||
venv="/opt/brozzler-worker-venv" | ||
|
||
if [ -f "/brozzler/setup.py" ]; then | ||
echo "#### Installing /brozzler in $venv" | ||
$venv/bin/pip install --disable-pip-version-check -e /brozzler[yt_dlp] --quiet | ||
$venv/bin/pip install --disable-pip-version-check rethinkdb==2.4.9 doublethink==0.4.9 | ||
fi | ||
|
||
echo "Running brozzler-worker" | ||
|
||
su brozzler-worker /run-brozzler-worker.sh | ||
|
||
echo "Run worker like: /run-brozzler-worker.sh" | ||
su brozzler-worker | ||
|
||
/bin/bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
version: "3.5" | ||
|
||
services: | ||
rethinkdb: | ||
image: rethinkdb:latest | ||
command: rethinkdb --bind all | ||
ports: | ||
- "8080:8080" # WebUI | ||
- "28015:28015" | ||
expose: | ||
- "8080" | ||
- "28015" | ||
volumes: | ||
- rethinkdb_volume:/data | ||
warcprox: | ||
image: warcprox | ||
build: | ||
context: . | ||
dockerfile: Dockerfile-warcprox | ||
environment: | ||
- RETHINKDB_SERVICES_URL=rethinkdb://rethinkdb:28015/brozzler_dev/services | ||
- RETHINKDB_STATS_URL=rethinkdb://rethinkdb:28015/brozzler_dev/stats | ||
ports: | ||
- "8888:8888" | ||
expose: | ||
- "8000" | ||
volumes: | ||
- warcprox_warcs:/warcs | ||
- warcprox_logs:/logs | ||
depends_on: | ||
- rethinkdb | ||
brozzler-worker: | ||
image: brozzler-worker | ||
platform: linux/amd64 | ||
build: | ||
context: ../ | ||
dockerfile: ./dev/Dockerfile-brozzler-worker | ||
tty: true | ||
environment: | ||
- RETHINKDB_SERVERS_URL=rethinkdb:28015 | ||
volumes: | ||
- ../:/brozzler | ||
- brozzler_venv:/opt/brozzler-worker-venv | ||
depends_on: | ||
- warcprox | ||
- rethinkdb | ||
volumes: | ||
rethinkdb_volume: {} | ||
warcprox_warcs: {} | ||
warcprox_logs: {} | ||
brozzler_venv: {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/bin/bash | ||
source /opt/brozzler-worker-venv/bin/activate | ||
|
||
brozzler-worker --verbose \ | ||
--rethinkdb-servers=rethinkdb \ | ||
--rethinkdb-db=brozzler_dev \ | ||
--max-browsers=1 \ | ||
--warcprox-auto |