From 421a16ceb7056ea9e62fab761e7a78976a19d189 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Mon, 12 Aug 2024 04:28:41 +0200 Subject: [PATCH 01/13] Update Dockerfile to Ubuntu 22.04 + integrate qlever script The old Dockerfile called `ServerMain` directly using a small selection of environment variables with outdated names. It was also outdated in other respects. The new Dockerfile installs the `qlever` script, so that it can be called from inside the container. Remaining questions / TODOs, feedback welcome: 1. Right now, the script is installed as part of the docker build via `pipx install qlever`. Is this the right way to do it? Alternatives would be to clone the GitHub repo and `pipx install -e .` from there, or include the the GitHub repo as a submodule of this repository. 2. How do we handle the QLever UI. We could just call `qlever ui` from inside the container. But that would pull the Docker image for the Qlever UI and run a Docker container inside of a Docker container. It's possible, but not the right way to do it. If both are needed, the container for the QLever engine and the contaner for the QLever UI should run side by side. 3. The`qlever setup-config` command should have options that overwrite the variables in the produced Qleverfile. In particular, there should be an option for setting `SYSTEM = native`. Otherwise it has to be stated explictly for each command, where that is relevant (in particular: `qlever index`, `qlever start`, `qlever example-queries`). --- Dockerfile | 44 ++++++++++++-------------- Dockerfiles/Dockerfile.Ubuntu22.04 | 51 ++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 24 deletions(-) create mode 100644 Dockerfiles/Dockerfile.Ubuntu22.04 diff --git a/Dockerfile b/Dockerfile index 46ae129e3e..d838e0b193 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:22.04 as base +FROM ubuntu:24.04 as base LABEL maintainer="Johannes Kalmbach " ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 @@ -7,45 +7,41 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository -y ppa:mhier/libboost-latest FROM base as builder -RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev +RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev -COPY . /app/ +COPY . /qlever/ -WORKDIR /app/ +WORKDIR /qlever/ ENV DEBIAN_FRONTEND=noninteractive -WORKDIR /app/build/ +WORKDIR /qlever/build/ RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -D_NO_TIMING_TESTS=ON -GNinja .. && ninja RUN ctest --rerun-failed --output-on-failure FROM base as runtime -WORKDIR /app +WORKDIR /qlever ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev +RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev pipx bash-completion -ARG UID=1000 -RUN groupadd -r qlever && useradd --no-log-init -r -u $UID -g qlever qlever && chown qlever:qlever /app +ARG UID=2000 +RUN groupadd -r qlever && useradd --no-log-init -d /qlever -r -u $UID -g qlever qlever && chown qlever:qlever /qlever USER qlever -ENV PATH=/app/:$PATH +RUN PIPX_HOME=/qlever/.local PIPX_BIN_DIR=/qlever/.local/bin PIPX_MAN_DIR=/qlever/.local/share pipx install qlever +RUN echo "eval \"\$(register-python-argcomplete qlever)\"" >> /qlever/.bashrc +ENV QLEVER_ARGCOMPLETE_ENABLED=1 -COPY --from=builder /app/build/*Main /app/ -COPY --from=builder /app/e2e/* /app/e2e/ -ENV PATH=/app/:$PATH +COPY --from=builder /qlever/build/*Main /qlever/ +COPY --from=builder /qlever/e2e/* /qlever/e2e/ +ENV PATH=/qlever/:/qlever/.local/bin:$PATH USER qlever EXPOSE 7001 -VOLUME ["/input", "/index"] +VOLUME ["/data"] -ENV INDEX_PREFIX index -ENV MEMORY_FOR_QUERIES 70 -ENV CACHE_MAX_SIZE_GB 30 -ENV CACHE_MAX_SIZE_GB_SINGLE_ENTRY 5 -ENV CACHE_MAX_NUM_ENTRIES 1000 -# Need the shell to get the INDEX_PREFIX environment variable -ENTRYPOINT ["/bin/sh", "-c", "exec ServerMain -i \"/index/${INDEX_PREFIX}\" -j 8 -m ${MEMORY_FOR_QUERIES} -c ${CACHE_MAX_SIZE_GB} -e ${CACHE_MAX_SIZE_GB_SINGLE_ENTRY} -k ${CACHE_MAX_NUM_ENTRIES} -p 7001 \"$@\"", "--"] +ENTRYPOINT ["bash"] -# Build image: docker build -t qlever.master . +# Build image: docker build -t qlever . -# Build index: DB=wikidata; docker run -it --rm -v "$(pwd)":/index --entrypoint bash --name qlever.$DB-index qlever.master -c "IndexBuilderMain -f /index/$DB.nt -i /index/$DB -s /index/$DB.settings.json | tee /index/$DB.index-log.txt"; rm -f $DB/*tmp* +# Run container, interactive session: docker run -it --rm -v "$(pwd)":/data --name qlever qlever -# Run engine: DB=wikidata; PORT=7001; docker rm -f qlever.$DB; docker run -d --restart=unless-stopped -v "$(pwd)":/index -p $PORT:7001 -e INDEX_PREFIX=$DB -e MEMORY_FOR_QUERIES=30 --name qlever.$DB qlever.master; docker logs -f --tail=100 qlever.$DB +# Run container, create SPARQL endpoint for "Olympics" dataset: docker run -it --rm -v "$(pwd)":/data -p 7001:7001 --name qlever qlever -c "qlever setup-config olympics && qlever get-data && qlever index --system native && qlever start --system native --port 7001 && qlever example-queries --port 7001" diff --git a/Dockerfiles/Dockerfile.Ubuntu22.04 b/Dockerfiles/Dockerfile.Ubuntu22.04 new file mode 100644 index 0000000000..46ae129e3e --- /dev/null +++ b/Dockerfiles/Dockerfile.Ubuntu22.04 @@ -0,0 +1,51 @@ +FROM ubuntu:22.04 as base +LABEL maintainer="Johannes Kalmbach " +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV LC_CTYPE C.UTF-8 +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository -y ppa:mhier/libboost-latest + +FROM base as builder +RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev + +COPY . /app/ + +WORKDIR /app/ +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /app/build/ +RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -D_NO_TIMING_TESTS=ON -GNinja .. && ninja +RUN ctest --rerun-failed --output-on-failure + +FROM base as runtime +WORKDIR /app +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev + +ARG UID=1000 +RUN groupadd -r qlever && useradd --no-log-init -r -u $UID -g qlever qlever && chown qlever:qlever /app +USER qlever +ENV PATH=/app/:$PATH + +COPY --from=builder /app/build/*Main /app/ +COPY --from=builder /app/e2e/* /app/e2e/ +ENV PATH=/app/:$PATH + +USER qlever +EXPOSE 7001 +VOLUME ["/input", "/index"] + +ENV INDEX_PREFIX index +ENV MEMORY_FOR_QUERIES 70 +ENV CACHE_MAX_SIZE_GB 30 +ENV CACHE_MAX_SIZE_GB_SINGLE_ENTRY 5 +ENV CACHE_MAX_NUM_ENTRIES 1000 +# Need the shell to get the INDEX_PREFIX environment variable +ENTRYPOINT ["/bin/sh", "-c", "exec ServerMain -i \"/index/${INDEX_PREFIX}\" -j 8 -m ${MEMORY_FOR_QUERIES} -c ${CACHE_MAX_SIZE_GB} -e ${CACHE_MAX_SIZE_GB_SINGLE_ENTRY} -k ${CACHE_MAX_NUM_ENTRIES} -p 7001 \"$@\"", "--"] + +# Build image: docker build -t qlever.master . + +# Build index: DB=wikidata; docker run -it --rm -v "$(pwd)":/index --entrypoint bash --name qlever.$DB-index qlever.master -c "IndexBuilderMain -f /index/$DB.nt -i /index/$DB -s /index/$DB.settings.json | tee /index/$DB.index-log.txt"; rm -f $DB/*tmp* + +# Run engine: DB=wikidata; PORT=7001; docker rm -f qlever.$DB; docker run -d --restart=unless-stopped -v "$(pwd)":/index -p $PORT:7001 -e INDEX_PREFIX=$DB -e MEMORY_FOR_QUERIES=30 --name qlever.$DB qlever.master; docker logs -f --tail=100 qlever.$DB From 471d2d184363b001c0962535b89634b73672f0ad Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Mon, 12 Aug 2024 16:56:15 +0200 Subject: [PATCH 02/13] Replace `as` by `AS` (three times) --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index d838e0b193..b94ad923be 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:24.04 as base +FROM ubuntu:24.04 AS base LABEL maintainer="Johannes Kalmbach " ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 @@ -6,7 +6,7 @@ ENV LC_CTYPE C.UTF-8 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository -y ppa:mhier/libboost-latest -FROM base as builder +FROM base AS builder RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev COPY . /qlever/ @@ -18,7 +18,7 @@ WORKDIR /qlever/build/ RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -D_NO_TIMING_TESTS=ON -GNinja .. && ninja RUN ctest --rerun-failed --output-on-failure -FROM base as runtime +FROM base AS runtime WORKDIR /qlever ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev pipx bash-completion From fabb137582ad86c58f872f640abc4bda7b93cb8d Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Thu, 15 Aug 2024 22:06:30 +0200 Subject: [PATCH 03/13] Use ENV=... instead of ENV ... everywhere --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index b94ad923be..7b8af2ae5e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ FROM ubuntu:24.04 AS base LABEL maintainer="Johannes Kalmbach " -ENV LANG C.UTF-8 -ENV LC_ALL C.UTF-8 -ENV LC_CTYPE C.UTF-8 +ENV LANG=C.UTF-8 +ENV LC_ALL=C.UTF-8 +ENV LC_CTYPE=C.UTF-8 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository -y ppa:mhier/libboost-latest From 8b5e0c18e2e8717eea16bce2425be83d8456fe24 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Sun, 17 Nov 2024 16:28:59 +0100 Subject: [PATCH 04/13] Now works both in interacive and batch mode It was surprisingly hard to get it to work well in both settings, with reasonable user rights and all. --- .dockerignore | 17 +++++------------ Dockerfile | 34 +++++++++++++++++++++++----------- docker-entrypoint.sh | 31 +++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 23 deletions(-) create mode 100644 docker-entrypoint.sh diff --git a/.dockerignore b/.dockerignore index 138565d2e7..bc19307271 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,16 +1,9 @@ * -!.clang-format -!CMakeLists.txt -!CompilationInfo.cmake -!.git -!LICENSE -!README.md -!e2e -!evaluation -!index -!misc !src !test -!third_party -!wikidata_settings.json +!e2e !benchmark +!.git +!CMakeLists.txt +!CompilationInfo.cmake +!docker-entrypoint.sh diff --git a/Dockerfile b/Dockerfile index eb222ce299..8b8689e72a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,14 @@ FROM base AS builder ARG TARGETPLATFORM RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev -COPY . /qlever/ +COPY src /qlever/src/ +COPY test /qlever/test/ +COPY e2e /qlever/e2e/ +COPY benchmark /qlever/benchmark/ +COPY .git /qlever/.git/ +COPY CMakeLists.txt /qlever/ +COPY CompilationInfo.cmake /qlever/ +#COPY src test e2e CMakeLists.txt CompilationInfo.cmake .git /qlever/ WORKDIR /qlever/ ENV DEBIAN_FRONTEND=noninteractive @@ -30,24 +37,29 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev pipx bash-completion ARG UID=2000 -RUN groupadd -r qlever && useradd --no-log-init -d /qlever -r -u $UID -g qlever qlever && chown qlever:qlever /qlever +ARG GID=2000 +RUN groupadd -r -g $GID qlever +RUN useradd --no-log-init -d /qlever -r -u $UID -g qlever qlever +RUN chown qlever:qlever /qlever +RUN apt-get install -y vim sudo && echo "qlever ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers USER qlever -RUN PIPX_HOME=/qlever/.local PIPX_BIN_DIR=/qlever/.local/bin PIPX_MAN_DIR=/qlever/.local/share pipx install qlever +ENV PIPX_HOME=/qlever/.local +ENV PIPX_BIN_DIR=/qlever/.local/bin +ENV PIPX_MAN_DIR=/qlever/.local/share +ENV PATH=/qlever:/qlever/.local/bin:$PATH +RUN pipx install qlever RUN echo "eval \"\$(register-python-argcomplete qlever)\"" >> /qlever/.bashrc +RUN echo "export PATH=$PATH" >> /qlever/.bashrc ENV QLEVER_ARGCOMPLETE_ENABLED=1 +ENV QLEVER_IS_RUNNING_IN_CONTAINER=1 COPY --from=builder /qlever/build/*Main /qlever/ COPY --from=builder /qlever/e2e/* /qlever/e2e/ -ENV PATH=/qlever/:/qlever/.local/bin:$PATH +COPY docker-entrypoint.sh /qlever/ +RUN sudo chmod +x /qlever/docker-entrypoint.sh USER qlever EXPOSE 7001 VOLUME ["/data"] -ENTRYPOINT ["bash"] - -# Build image: docker build -t qlever . - -# Run container, interactive session: docker run -it --rm -v "$(pwd)":/data --name qlever qlever - -# Run container, create SPARQL endpoint for "Olympics" dataset: docker run -it --rm -v "$(pwd)":/data -p 7001:7001 --name qlever qlever -c "qlever setup-config olympics && qlever get-data && qlever index --system native && qlever start --system native --port 7001 && qlever example-queries --port 7001" +ENTRYPOINT ["/qlever/docker-entrypoint.sh"] diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 0000000000..b2d14e9926 --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Entrypoint script for the QLever docker image. + +HELP_MESSAGE=' +The recommended way to run a container with this image is as follows: + +In interactive mode: + +docker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -v $(pwd):/data -w /data qlever + +In batch mode (example): + +docker run -it --rm -u $(id -u):$(id -g) -v $(pwd):/data -w /data --entrypoint bash qlever -c "qlever setup-config olympics && qlever get-data && qlever index && qlever start && qlever example-queries" +' + +ERROR() { + echo -e "\x1b[31m$1\x1b[0m" + echo -e "$HELP_MESSAGE" + exit 1 +} + +if [ -z "$UID" ] || [ -z "$GID" ]; then + ERROR "Environment variables UID and GID not set" +elif [ "$(pwd)" != "/data" ]; then + ERROR "The working directory must be /data, but it is $(pwd)" +else + sudo -E bash -c "usermod -u $UID qlever && groupmod -g $GID qlever && chown -R qlever:qlever /qlever && sudo -E -u qlever bash" + # Run the command as the qlever user. + # exec su-exec qlever "$@" +fi From 4ba8516c659d39c02aee885197dc0796d128db11 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Sun, 17 Nov 2024 23:13:13 +0100 Subject: [PATCH 05/13] Pretty neat and functional now TODO: Check whether all parts of the Dockerfile are still needs and move `.bashrc` for user `qlever` to own file `docker-bashrc`. --- Dockerfile | 11 ++++++++--- docker-entrypoint.sh | 19 +++++++++++++------ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8b8689e72a..14ba34c60c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,6 +36,7 @@ WORKDIR /qlever ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev pipx bash-completion +# Setup user `qlever`. ARG UID=2000 ARG GID=2000 RUN groupadd -r -g $GID qlever @@ -49,9 +50,13 @@ ENV PIPX_MAN_DIR=/qlever/.local/share ENV PATH=/qlever:/qlever/.local/bin:$PATH RUN pipx install qlever RUN echo "eval \"\$(register-python-argcomplete qlever)\"" >> /qlever/.bashrc -RUN echo "export PATH=$PATH" >> /qlever/.bashrc -ENV QLEVER_ARGCOMPLETE_ENABLED=1 -ENV QLEVER_IS_RUNNING_IN_CONTAINER=1 +RUN echo "export QLEVER_ARGCOMPLETE_ENABLED=1" >> /qlever/.bashrc +RUN echo "export QLEVER_IS_RUNNING_IN_CONTAINER=1" >> /qlever/.bashrc +RUN echo "PATH=$PATH" >> /qlever/.bashrc +RUN echo 'PS1="\u@docker:\W\$ "' >> /qlever/.bashrc +RUN echo 'alias ll="ls -l"' >> /qlever/.bashrc +RUN echo "cd /data" >> /qlever/.bashrc +RUN echo "source /qlever/.bashrc" >> /qlever/.bash_profile COPY --from=builder /qlever/build/*Main /qlever/ COPY --from=builder /qlever/e2e/* /qlever/e2e/ diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index b2d14e9926..bc629a50e8 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -7,14 +7,15 @@ The recommended way to run a container with this image is as follows: In interactive mode: -docker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -v $(pwd):/data -w /data qlever +\x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -v $(pwd):/data -w /data qlever\x1b[0m In batch mode (example): -docker run -it --rm -u $(id -u):$(id -g) -v $(pwd):/data -w /data --entrypoint bash qlever -c "qlever setup-config olympics && qlever get-data && qlever index && qlever start && qlever example-queries" +\x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -v $(pwd):/data -w /data qlever -c "qlever setup-config olympics && qlever get-data && qlever index && qlever start && qlever example-queries"\x1b[0m ' ERROR() { + echo echo -e "\x1b[31m$1\x1b[0m" echo -e "$HELP_MESSAGE" exit 1 @@ -23,9 +24,15 @@ ERROR() { if [ -z "$UID" ] || [ -z "$GID" ]; then ERROR "Environment variables UID and GID not set" elif [ "$(pwd)" != "/data" ]; then - ERROR "The working directory must be /data, but it is $(pwd)" + ERROR "The working directory should be /data, but it is $(pwd)" else - sudo -E bash -c "usermod -u $UID qlever && groupmod -g $GID qlever && chown -R qlever:qlever /qlever && sudo -E -u qlever bash" - # Run the command as the qlever user. - # exec su-exec qlever "$@" + if [ $# -eq 0 ]; then + echo + sudo -E bash -c "usermod -u $UID -s /bin/bash qlever && groupmod -g $GID qlever && chown -R qlever:qlever /qlever && su - qlever --login" + else + if [ "$1" == "-c" ]; then + shift + fi + sudo -E bash -c "usermod -u $UID -s /bin/bash qlever && groupmod -g $GID qlever && chown -R qlever:qlever /qlever && su - qlever -s /bin/bash --login -c \"$@\"" + fi fi From c31d45abf986424186b6c7cb5e41172eee390a63 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Mon, 18 Nov 2024 09:45:49 +0100 Subject: [PATCH 06/13] Everything cleaned up now and (hopefully) still works --- Dockerfile | 41 +++++++++++++++++++++-------------------- docker-entrypoint.sh | 44 +++++++++++++++++++++++++++++++++----------- 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/Dockerfile b/Dockerfile index 14ba34c60c..1e307f3101 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,7 @@ FROM ubuntu:24.04 AS base -LABEL maintainer="Johannes Kalmbach " +LABEL maintainer="Hannah Bast " + +# Packages needed for both both building and running the binaries. ENV LANG=C.UTF-8 ENV LC_ALL=C.UTF-8 ENV LC_CTYPE=C.UTF-8 @@ -7,10 +9,18 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y software-properties-common wget && add-apt-repository -y ppa:mhier/libboost-latest RUN wget https://apt.kitware.com/kitware-archive.sh && chmod +x kitware-archive.sh &&./kitware-archive.sh +# Stage 1: Build the binaries. FROM base AS builder ARG TARGETPLATFORM +ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev +# Copy everything we need to build the binaries. +# +# NOTE: We are deliberately explicit here, for two reasons. First, so that we +# don't copy more than necessary without having to rely on `.dockerignore`. +# Second, so that we can copy `docker-entrypoint.sh` separately below (we don't +# to rebuild the whole container when making a small change in there). COPY src /qlever/src/ COPY test /qlever/test/ COPY e2e /qlever/e2e/ @@ -18,10 +28,6 @@ COPY benchmark /qlever/benchmark/ COPY .git /qlever/.git/ COPY CMakeLists.txt /qlever/ COPY CompilationInfo.cmake /qlever/ -#COPY src test e2e CMakeLists.txt CompilationInfo.cmake .git /qlever/ - -WORKDIR /qlever/ -ENV DEBIAN_FRONTEND=noninteractive # Don't build and run tests on ARM64, as it takes too long on GitHub actions. # TODO: re-enable these tests as soon as we can use a native ARM64 platform to compile the docker container. @@ -31,40 +37,35 @@ RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "target is ARM64, don't RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then cmake --build . --target IndexBuilderMain ServerMain; else cmake --build . ; fi RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "Skipping tests for ARM64" ; else ctest --rerun-failed --output-on-failure ; fi +# Stage 2: Create the final image. FROM base AS runtime WORKDIR /qlever ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev pipx bash-completion -# Setup user `qlever`. -ARG UID=2000 -ARG GID=2000 -RUN groupadd -r -g $GID qlever -RUN useradd --no-log-init -d /qlever -r -u $UID -g qlever qlever +# Set up user `qlever` with temporary sudo rights (which will be removed again +# by the `docker-entrypoint.sh` script, see the comments in that file). +RUN groupadd -r qlever && useradd --no-log-init -d /qlever -r -g qlever qlever RUN chown qlever:qlever /qlever RUN apt-get install -y vim sudo && echo "qlever ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers USER qlever -ENV PIPX_HOME=/qlever/.local -ENV PIPX_BIN_DIR=/qlever/.local/bin -ENV PIPX_MAN_DIR=/qlever/.local/share -ENV PATH=/qlever:/qlever/.local/bin:$PATH RUN pipx install qlever +ENV PATH=/qlever:/qlever/.local/bin:$PATH RUN echo "eval \"\$(register-python-argcomplete qlever)\"" >> /qlever/.bashrc -RUN echo "export QLEVER_ARGCOMPLETE_ENABLED=1" >> /qlever/.bashrc -RUN echo "export QLEVER_IS_RUNNING_IN_CONTAINER=1" >> /qlever/.bashrc -RUN echo "PATH=$PATH" >> /qlever/.bashrc -RUN echo 'PS1="\u@docker:\W\$ "' >> /qlever/.bashrc +RUN echo "export QLEVER_ARGCOMPLETE_ENABLED=1 && export QLEVER_IS_RUNNING_IN_CONTAINER=1" >> /qlever/.bashrc +RUN echo "PATH=$PATH && PS1=\"\u@docker:\W\$ \"" >> /qlever/.bashrc RUN echo 'alias ll="ls -l"' >> /qlever/.bashrc RUN echo "cd /data" >> /qlever/.bashrc RUN echo "source /qlever/.bashrc" >> /qlever/.bash_profile +# Copy the binaries and the entrypoint script. COPY --from=builder /qlever/build/*Main /qlever/ -COPY --from=builder /qlever/e2e/* /qlever/e2e/ COPY docker-entrypoint.sh /qlever/ RUN sudo chmod +x /qlever/docker-entrypoint.sh -USER qlever +# TODO: Are these necessary or useful for anything? EXPOSE 7001 VOLUME ["/data"] +# Our entrypoint script does some clever things; see the comments in there. ENTRYPOINT ["/qlever/docker-entrypoint.sh"] diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index bc629a50e8..d646666ef5 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -1,7 +1,18 @@ #!/bin/bash -# Entrypoint script for the QLever docker image. - +# Entrypoint script for the QLever docker image. It sets the UID and GID of the +# user `qlever` inside the container to the UID and GID specified in the call of +# `docker run`, typically the UID and GID of the user outside the container. +# That way, we don't need to set special permissions for the mounted volume, +# and everthing looks nice inside of the container, too. +# +# NOTE: The container should be started with `-e UID=... -e GID=...` and not +# `-u ...:...` for the following reason. In order to change the UID and GID of +# the internal user `qlever`, we need `sudo` rights, which are granted to that +# user via the configuration in the Dockerfile. However, if we run the container +# with `-u ...:...`, the user changes and no longer has `sudo` rights. + +# Help message that is printed if the container is not startes as recommended. HELP_MESSAGE=' The recommended way to run a container with this image is as follows: @@ -9,11 +20,12 @@ In interactive mode: \x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -v $(pwd):/data -w /data qlever\x1b[0m -In batch mode (example): +In batch mode (example, add `-p :` for outside access to the server): \x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -v $(pwd):/data -w /data qlever -c "qlever setup-config olympics && qlever get-data && qlever index && qlever start && qlever example-queries"\x1b[0m ' +# Helper function to print an error message (in red) and the help message. ERROR() { echo echo -e "\x1b[31m$1\x1b[0m" @@ -21,18 +33,28 @@ ERROR() { exit 1 } +# Check that UID and GID are set and that the working directory is `/data`. if [ -z "$UID" ] || [ -z "$GID" ]; then ERROR "Environment variables UID and GID not set" elif [ "$(pwd)" != "/data" ]; then ERROR "The working directory should be /data, but it is $(pwd)" +fi + +# If `docker run` is run without arguments, start an interactive shell. Otherwise, +# run the sequence of commands given as arguments (the first argument may be the +# standard `-c`, but it can also be omitted). +# +# NOTE: The call `su - qlever ...` has to be inside of the `sudo` call, because once +# the UID and GID of the user `qlever` have been changed, it no longer has `sudo` +# rights. And just remaining in the shell or starting a new shell (with `bash`) does +# not work, because neither of these would have the new UID and GID. Hence also +# the slight code duplication. +if [ $# -eq 0 ]; then + echo + sudo -E bash -c "usermod -u $UID -s /bin/bash qlever && groupmod -g $GID qlever && chown -R qlever:qlever /qlever && su - qlever --login" else - if [ $# -eq 0 ]; then - echo - sudo -E bash -c "usermod -u $UID -s /bin/bash qlever && groupmod -g $GID qlever && chown -R qlever:qlever /qlever && su - qlever --login" - else - if [ "$1" == "-c" ]; then - shift - fi - sudo -E bash -c "usermod -u $UID -s /bin/bash qlever && groupmod -g $GID qlever && chown -R qlever:qlever /qlever && su - qlever -s /bin/bash --login -c \"$@\"" + if [ "$1" == "-c" ]; then + shift fi + sudo -E bash -c "usermod -u $UID -s /bin/bash qlever && groupmod -g $GID qlever && chown -R qlever:qlever /qlever && su - qlever -s /bin/bash --login -c \"$@\"" fi From 974ffb04bd671effa00a4271a8ec4596075ecba5 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Mon, 18 Nov 2024 09:49:29 +0100 Subject: [PATCH 07/13] Fix typo --- docker-entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index d646666ef5..d7822fff6b 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -4,7 +4,7 @@ # user `qlever` inside the container to the UID and GID specified in the call of # `docker run`, typically the UID and GID of the user outside the container. # That way, we don't need to set special permissions for the mounted volume, -# and everthing looks nice inside of the container, too. +# and everything looks nice inside of the container, too. # # NOTE: The container should be started with `-e UID=... -e GID=...` and not # `-u ...:...` for the following reason. In order to change the UID and GID of From e17f2b66f66f9d55a6db1009077f8ee6967aec9a Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Mon, 18 Nov 2024 10:35:34 +0100 Subject: [PATCH 08/13] Do copy the e2e files into the image They are needed for the `docker publish` action --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 1e307f3101..8b4d534be1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -60,6 +60,7 @@ RUN echo "source /qlever/.bashrc" >> /qlever/.bash_profile # Copy the binaries and the entrypoint script. COPY --from=builder /qlever/build/*Main /qlever/ +COPY --from=builder /qlever/e2e/* /qlever/e2e/ COPY docker-entrypoint.sh /qlever/ RUN sudo chmod +x /qlever/docker-entrypoint.sh From 77ce159d2583ce24488444ce1d62ee1df7c50608 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Mon, 2 Dec 2024 23:53:06 +0100 Subject: [PATCH 09/13] Remove cached list of package directories after installing packages --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8b4d534be1..ada84debf4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,13 +41,13 @@ RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "Skipping tests for ARM64 FROM base AS runtime WORKDIR /qlever ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev pipx bash-completion +RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev pipx bash-completion vim sudo && rm -rf /var/lib/apt/lists/* # Set up user `qlever` with temporary sudo rights (which will be removed again # by the `docker-entrypoint.sh` script, see the comments in that file). RUN groupadd -r qlever && useradd --no-log-init -d /qlever -r -g qlever qlever RUN chown qlever:qlever /qlever -RUN apt-get install -y vim sudo && echo "qlever ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers +RUN echo "qlever ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers USER qlever RUN pipx install qlever ENV PATH=/qlever:/qlever/.local/bin:$PATH From 2ca7d0361e764027abc65a01b8da244222c6f71e Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Tue, 3 Dec 2024 12:22:18 +0100 Subject: [PATCH 10/13] Final amendments + fix the old Dockerfiles --- Dockerfile | 6 +-- Dockerfiles/Dockerfile.Ubuntu18.04 | 59 ------------------------------ Dockerfiles/Dockerfile.Ubuntu20.04 | 30 +++++---------- Dockerfiles/Dockerfile.Ubuntu22.04 | 26 ++++--------- docker-entrypoint.sh | 28 +++++++++----- 5 files changed, 35 insertions(+), 114 deletions(-) delete mode 100644 Dockerfiles/Dockerfile.Ubuntu18.04 diff --git a/Dockerfile b/Dockerfile index ada84debf4..cc3845520e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,7 +30,7 @@ COPY CMakeLists.txt /qlever/ COPY CompilationInfo.cmake /qlever/ # Don't build and run tests on ARM64, as it takes too long on GitHub actions. -# TODO: re-enable these tests as soon as we can use a native ARM64 platform to compile the docker container. +# TODO: re-enable these tests as soon as we can use a native ARM64 platform to compile the Docker container. WORKDIR /qlever/build/ RUN cmake -DCMAKE_BUILD_TYPE=Release -DLOGLEVEL=INFO -DUSE_PARALLEL=true -D_NO_TIMING_TESTS=ON -GNinja .. RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "target is ARM64, don't build tests to avoid timeout"; fi @@ -64,9 +64,5 @@ COPY --from=builder /qlever/e2e/* /qlever/e2e/ COPY docker-entrypoint.sh /qlever/ RUN sudo chmod +x /qlever/docker-entrypoint.sh -# TODO: Are these necessary or useful for anything? -EXPOSE 7001 -VOLUME ["/data"] - # Our entrypoint script does some clever things; see the comments in there. ENTRYPOINT ["/qlever/docker-entrypoint.sh"] diff --git a/Dockerfiles/Dockerfile.Ubuntu18.04 b/Dockerfiles/Dockerfile.Ubuntu18.04 deleted file mode 100644 index 1bf75191c5..0000000000 --- a/Dockerfiles/Dockerfile.Ubuntu18.04 +++ /dev/null @@ -1,59 +0,0 @@ -FROM ubuntu:18.04 as base -LABEL maintainer="Johannes Kalmbach " -ENV LANG C.UTF-8 -ENV LC_ALL C.UTF-8 -ENV LC_CTYPE C.UTF-8 -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update -RUN apt install -y software-properties-common -RUN add-apt-repository -y ppa:mhier/libboost-latest -RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test -RUN apt-get update - -FROM base as builder -# Install the kitware repository for a recent cmake, the version on Ubuntu 18.04 is too old -RUN apt install -y gpg wget -RUN wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null -RUN echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null -RUN apt-get update && build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git gcc-11 g++-11 libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-url1.81-dev - -COPY . /app/ - -WORKDIR /app/ -ENV DEBIAN_FRONTEND=noninteractive - -WORKDIR /app/build/ -RUN cmake -DCMAKE_BUILD_TYPE=Release -DJEMALLOC_MANUALLY_INSTALLED=True -DCMAKE_CXX_COMPILER="g++-11" -DLOGLEVEL=INFO -DUSE_PARALLEL=true -GNinja .. && ninja -RUN make test - -FROM base as runtime -WORKDIR /app -ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime lbzip2 libjemalloc-dev libzstd-dev libssl-dev - -ARG UID=1000 -RUN groupadd -r qlever && useradd --no-log-init -r -u $UID -g qlever qlever && chown qlever:qlever /app -USER qlever -ENV PATH=/app/:$PATH - -COPY --from=builder /app/build/*Main /app/ -COPY --from=builder /app/e2e/* /app/e2e/ -ENV PATH=/app/:$PATH - -USER qlever -EXPOSE 7001 -VOLUME ["/input", "/index"] - -ENV INDEX_PREFIX index -ENV MEMORY_FOR_QUERIES 70 -ENV CACHE_MAX_SIZE_GB 30 -ENV CACHE_MAX_SIZE_GB_SINGLE_ENTRY 5 -ENV CACHE_MAX_NUM_ENTRIES 1000 -# Need the shell to get the INDEX_PREFIX environment variable -ENTRYPOINT ["/bin/sh", "-c", "exec ServerMain -i \"/index/${INDEX_PREFIX}\" -j 8 -m ${MEMORY_FOR_QUERIES} -c ${CACHE_MAX_SIZE_GB} -e ${CACHE_MAX_SIZE_GB_SINGLE_ENTRY} -k ${CACHE_MAX_NUM_ENTRIES} -p 7001 \"$@\"", "--"] - -# Build image: docker build -t qlever.master . - -# Build index: DB=wikidata; docker run -it --rm -v "$(pwd)":/index --entrypoint bash --name qlever.$DB-index qlever.master -c "IndexBuilderMain -f /index/$DB.nt -i /index/$DB -s /index/$DB.settings.json | tee /index/$DB.index-log.txt"; rm -f $DB/*tmp* - -# Run engine: DB=wikidata; PORT=7001; docker rm -f qlever.$DB; docker run -d --restart=unless-stopped -v "$(pwd)":/index -p $PORT:7001 -e INDEX_PREFIX=$DB -e MEMORY_FOR_QUERIES=30 --name qlever.$DB qlever.master; docker logs -f --tail=100 qlever.$DB diff --git a/Dockerfiles/Dockerfile.Ubuntu20.04 b/Dockerfiles/Dockerfile.Ubuntu20.04 index 3d9da94d24..abb5e71e95 100644 --- a/Dockerfiles/Dockerfile.Ubuntu20.04 +++ b/Dockerfiles/Dockerfile.Ubuntu20.04 @@ -1,3 +1,7 @@ +# This Dockerfile is DEPRECATED, use the latest Dockerfile from the repository. +# +# The only reason it is here is to document how to install QLever on Ubuntu 20.04. + FROM ubuntu:20.04 as base LABEL maintainer="Johannes Kalmbach " ENV LANG C.UTF-8 @@ -5,13 +9,14 @@ ENV LC_ALL C.UTF-8 ENV LC_CTYPE C.UTF-8 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -RUN apt install -y software-properties-common +RUN apt install -y software-properties-common wget RUN add-apt-repository -y ppa:mhier/libboost-latest RUN add-apt-repository -y ppa:ubuntu-toolchain-r/test +RUN wget https://apt.kitware.com/kitware-archive.sh && chmod +x kitware-archive.sh &&./kitware-archive.sh RUN apt-get update FROM base as builder -RUN apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git gcc-11 g++-11 libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-url1.81-dev +RUN apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git gcc-11 g++-11 libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-url1.81-dev libboost-iostreams1.81-dev libboost-program-options1.81 COPY . /app/ @@ -25,7 +30,7 @@ RUN make test FROM base as runtime WORKDIR /app ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime lbzip2 libjemalloc-dev libzstd-dev libssl-dev +RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.81 libstdc++6 ARG UID=1000 RUN groupadd -r qlever && useradd --no-log-init -r -u $UID -g qlever qlever && chown qlever:qlever /app @@ -34,22 +39,5 @@ ENV PATH=/app/:$PATH COPY --from=builder /app/build/*Main /app/ COPY --from=builder /app/e2e/* /app/e2e/ -ENV PATH=/app/:$PATH - -USER qlever -EXPOSE 7001 -VOLUME ["/input", "/index"] - -ENV INDEX_PREFIX index -ENV MEMORY_FOR_QUERIES 70 -ENV CACHE_MAX_SIZE_GB 30 -ENV CACHE_MAX_SIZE_GB_SINGLE_ENTRY 5 -ENV CACHE_MAX_NUM_ENTRIES 1000 -# Need the shell to get the INDEX_PREFIX environment variable -ENTRYPOINT ["/bin/sh", "-c", "exec ServerMain -i \"/index/${INDEX_PREFIX}\" -j 8 -m ${MEMORY_FOR_QUERIES} -c ${CACHE_MAX_SIZE_GB} -e ${CACHE_MAX_SIZE_GB_SINGLE_ENTRY} -k ${CACHE_MAX_NUM_ENTRIES} -p 7001 \"$@\"", "--"] - -# Build image: docker build -t qlever.master . - -# Build index: DB=wikidata; docker run -it --rm -v "$(pwd)":/index --entrypoint bash --name qlever.$DB-index qlever.master -c "IndexBuilderMain -f /index/$DB.nt -i /index/$DB -s /index/$DB.settings.json | tee /index/$DB.index-log.txt"; rm -f $DB/*tmp* -# Run engine: DB=wikidata; PORT=7001; docker rm -f qlever.$DB; docker run -d --restart=unless-stopped -v "$(pwd)":/index -p $PORT:7001 -e INDEX_PREFIX=$DB -e MEMORY_FOR_QUERIES=30 --name qlever.$DB qlever.master; docker logs -f --tail=100 qlever.$DB +ENTRYPOINT ["bash"] diff --git a/Dockerfiles/Dockerfile.Ubuntu22.04 b/Dockerfiles/Dockerfile.Ubuntu22.04 index 46ae129e3e..27fdb33602 100644 --- a/Dockerfiles/Dockerfile.Ubuntu22.04 +++ b/Dockerfiles/Dockerfile.Ubuntu22.04 @@ -1,10 +1,15 @@ +# This Dockerfile is DEPRECATED, use the latest Dockerfile from the repository. +# +# The only reason it is here is to document how to install QLever on Ubuntu 22.04. + FROM ubuntu:22.04 as base LABEL maintainer="Johannes Kalmbach " ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 ENV LC_CTYPE C.UTF-8 ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y software-properties-common && add-apt-repository -y ppa:mhier/libboost-latest +RUN apt-get update && apt-get install -y software-properties-common wget && add-apt-repository -y ppa:mhier/libboost-latest +RUN wget https://apt.kitware.com/kitware-archive.sh && chmod +x kitware-archive.sh &&./kitware-archive.sh FROM base as builder RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.81-dev libboost-program-options1.81-dev libboost-iostreams1.81-dev libboost-url1.81-dev @@ -30,22 +35,5 @@ ENV PATH=/app/:$PATH COPY --from=builder /app/build/*Main /app/ COPY --from=builder /app/e2e/* /app/e2e/ -ENV PATH=/app/:$PATH - -USER qlever -EXPOSE 7001 -VOLUME ["/input", "/index"] - -ENV INDEX_PREFIX index -ENV MEMORY_FOR_QUERIES 70 -ENV CACHE_MAX_SIZE_GB 30 -ENV CACHE_MAX_SIZE_GB_SINGLE_ENTRY 5 -ENV CACHE_MAX_NUM_ENTRIES 1000 -# Need the shell to get the INDEX_PREFIX environment variable -ENTRYPOINT ["/bin/sh", "-c", "exec ServerMain -i \"/index/${INDEX_PREFIX}\" -j 8 -m ${MEMORY_FOR_QUERIES} -c ${CACHE_MAX_SIZE_GB} -e ${CACHE_MAX_SIZE_GB_SINGLE_ENTRY} -k ${CACHE_MAX_NUM_ENTRIES} -p 7001 \"$@\"", "--"] - -# Build image: docker build -t qlever.master . - -# Build index: DB=wikidata; docker run -it --rm -v "$(pwd)":/index --entrypoint bash --name qlever.$DB-index qlever.master -c "IndexBuilderMain -f /index/$DB.nt -i /index/$DB -s /index/$DB.settings.json | tee /index/$DB.index-log.txt"; rm -f $DB/*tmp* -# Run engine: DB=wikidata; PORT=7001; docker rm -f qlever.$DB; docker run -d --restart=unless-stopped -v "$(pwd)":/index -p $PORT:7001 -e INDEX_PREFIX=$DB -e MEMORY_FOR_QUERIES=30 --name qlever.$DB qlever.master; docker logs -f --tail=100 qlever.$DB +ENTRYPOINT ["bash"] diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index d7822fff6b..bb33190f2e 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -14,30 +14,38 @@ # Help message that is printed if the container is not startes as recommended. HELP_MESSAGE=' -The recommended way to run a container with this image is as follows: +The recommended way to run a container with this image is as follows (run in a fresh directory, and adapt the ports to your needs): -In interactive mode: +In batch mode: -\x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -v $(pwd):/data -w /data qlever\x1b[0m +\x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -p 7019:7019 -v $(pwd):/data -w /data qlever -c "qlever setup-config olympics && qlever get-data && qlever index && qlever start && qlever example-queries"\x1b[0m -In batch mode (example, add `-p :` for outside access to the server): +In interactive mode (you can then call `qlever` inside the container): -\x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -v $(pwd):/data -w /data qlever -c "qlever setup-config olympics && qlever get-data && qlever index && qlever start && qlever example-queries"\x1b[0m +\x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -p 7019:7019 -v $(pwd):/data -w /data qlever\x1b[0m + +If you prefer `-u $(id -u):$(id -g)`, set the entrypoint to `bash`: + +\x1b[34mdocker run -it --rm -u $(id -u):$(id -g) -p 7019:7019 -v $(pwd):/data -w /data --entrypoint bash qlever -c "..."\x1b[0m + +Explanation: With the first two options, there will be a user `qlever` inside the container, which acts like you when reading or writing outside files. With the third option, the user inside the container is you, but without a proper user and group name (which is fine for batch mode, but ugly in interactive mode). ' -# Helper function to print an error message (in red) and the help message. -ERROR() { +# Show the `HELP_MESSAGE`. For now, don't show `$1` (see below), but start +# with a friendly welcome instead. +HELP() { echo - echo -e "\x1b[31m$1\x1b[0m" + # echo -e "\x1b[31m$1\x1b[0m" + echo -e "\x1b[34mWELCOME TO THE QLEVER DOCKER IMAGE\x1b[0m" echo -e "$HELP_MESSAGE" exit 1 } # Check that UID and GID are set and that the working directory is `/data`. if [ -z "$UID" ] || [ -z "$GID" ]; then - ERROR "Environment variables UID and GID not set" + HELP "Environment variables UID and GID not set" elif [ "$(pwd)" != "/data" ]; then - ERROR "The working directory should be /data, but it is $(pwd)" + HELP "The working directory should be /data, but it is $(pwd)" fi # If `docker run` is run without arguments, start an interactive shell. Otherwise, From e3ae2b7a728cfc233f5a62f734b6e83fc2850076 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Wed, 4 Dec 2024 04:46:31 +0100 Subject: [PATCH 11/13] More final tweaks + also works with Podman now! --- Dockerfile | 34 ++++++++++++++++--------- docker-entrypoint.sh | 59 +++++++++++++++++++++++++------------------- 2 files changed, 56 insertions(+), 37 deletions(-) diff --git a/Dockerfile b/Dockerfile index cc3845520e..fc0dcc6619 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,12 +7,13 @@ ENV LC_ALL=C.UTF-8 ENV LC_CTYPE=C.UTF-8 ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y software-properties-common wget && add-apt-repository -y ppa:mhier/libboost-latest -RUN wget https://apt.kitware.com/kitware-archive.sh && chmod +x kitware-archive.sh &&./kitware-archive.sh -# Stage 1: Build the binaries. +# Install the packages needed for building the binaries (this is a separate +# stage to keep the final image small). FROM base AS builder ARG TARGETPLATFORM ENV DEBIAN_FRONTEND=noninteractive +RUN wget https://apt.kitware.com/kitware-archive.sh && chmod +x kitware-archive.sh &&./kitware-archive.sh RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev # Copy everything we need to build the binaries. @@ -37,26 +38,37 @@ RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "target is ARM64, don't RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then cmake --build . --target IndexBuilderMain ServerMain; else cmake --build . ; fi RUN if [ $TARGETPLATFORM = "linux/arm64" ] ; then echo "Skipping tests for ARM64" ; else ctest --rerun-failed --output-on-failure ; fi -# Stage 2: Create the final image. +# Install the packages needed for the final image. FROM base AS runtime WORKDIR /qlever ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y wget python3-yaml unzip curl bzip2 pkg-config libicu-dev python3-icu libgomp1 uuid-runtime make lbzip2 libjemalloc-dev libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev pipx bash-completion vim sudo && rm -rf /var/lib/apt/lists/* # Set up user `qlever` with temporary sudo rights (which will be removed again -# by the `docker-entrypoint.sh` script, see the comments in that file). +# by the `docker-entrypoint.sh` script, see there). RUN groupadd -r qlever && useradd --no-log-init -d /qlever -r -g qlever qlever RUN chown qlever:qlever /qlever RUN echo "qlever ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + +# Set up a profile script that is sourced whenever a new login shell is +# started (by any user, hence in `/etc/profile.d/`). For some reason, podman +# wants it in `/qlever/.bashrc`, so we also copy it there. +ENV QLEVER_PROFILE=/etc/profile.d/qlever.sh +RUN echo "eval \"\$(register-python-argcomplete qlever)\"" >> $QLEVER_PROFILE +RUN echo "export QLEVER_ARGCOMPLETE_ENABLED=1 && export QLEVER_IS_RUNNING_IN_CONTAINER=1" >> $QLEVER_PROFILE +RUN echo "PATH=/qlever:/qlever/.local/bin:$PATH && PS1=\"\u@docker:\W\$ \"" >> $QLEVER_PROFILE +RUN echo 'alias ll="ls -l"' >> $QLEVER_PROFILE +RUN echo "if [ -d /data ]; then cd /data; fi" >> $QLEVER_PROFILE +RUN cp $QLEVER_PROFILE /qlever/.bashrc + +# Install the `qlever` command line tool. We have t set the two environment +# variables again here because in batch mode, the profile script above is not +# sourced. The `PATH` is set again to avoid a warning from `pipx`. USER qlever -RUN pipx install qlever ENV PATH=/qlever:/qlever/.local/bin:$PATH -RUN echo "eval \"\$(register-python-argcomplete qlever)\"" >> /qlever/.bashrc -RUN echo "export QLEVER_ARGCOMPLETE_ENABLED=1 && export QLEVER_IS_RUNNING_IN_CONTAINER=1" >> /qlever/.bashrc -RUN echo "PATH=$PATH && PS1=\"\u@docker:\W\$ \"" >> /qlever/.bashrc -RUN echo 'alias ll="ls -l"' >> /qlever/.bashrc -RUN echo "cd /data" >> /qlever/.bashrc -RUN echo "source /qlever/.bashrc" >> /qlever/.bash_profile +RUN pipx install qlever +ENV QLEVER_ARGCOMPLETE_ENABLED=1 +ENV QLEVER_IS_RUNNING_IN_CONTAINER=1 # Copy the binaries and the entrypoint script. COPY --from=builder /qlever/build/*Main /qlever/ diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index bb33190f2e..7e30dc510e 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -14,49 +14,56 @@ # Help message that is printed if the container is not startes as recommended. HELP_MESSAGE=' -The recommended way to run a container with this image is as follows (run in a fresh directory, and adapt the ports to your needs): +The recommended way to run a container with this image is as follows. Run in a fresh directory. Add `-p :` if you want to expose ports. Inside the container, the `qlever` command-line tool is available, as well as the QLever binaries (which you need not call directly, they are called by the various `qlever` commands). -In batch mode: +In batch mode (user `qlever` inside the container, with the same UID and GID as outside): -\x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -p 7019:7019 -v $(pwd):/data -w /data qlever -c "qlever setup-config olympics && qlever get-data && qlever index && qlever start && qlever example-queries"\x1b[0m +\x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -v $(pwd):/data -w /data qlever -c "qlever setup-config olympics && qlever get-data && qlever index"\x1b[0m -In interactive mode (you can then call `qlever` inside the container): +The same, but in interactive mode: -\x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -p 7019:7019 -v $(pwd):/data -w /data qlever\x1b[0m +\x1b[34mdocker run -it --rm -e UID=$(id -u) -e GID=$(id -g) -v $(pwd):/data -w /data qlever\x1b[0m -If you prefer `-u $(id -u):$(id -g)`, set the entrypoint to `bash`: +It also works with `-u $(id -u):$(id -g)` (but then the user inside the container has no proper name): -\x1b[34mdocker run -it --rm -u $(id -u):$(id -g) -p 7019:7019 -v $(pwd):/data -w /data --entrypoint bash qlever -c "..."\x1b[0m +\x1b[34mdocker run -it --rm -u $(id -u):$(id -g) -v $(pwd):/data -w /data qlever\x1b[0m +\x1b[34mdocker run -it --rm -u $(id -u):$(id -g) -v $(pwd):/data -w /data qlever -c "..."\x1b[0m -Explanation: With the first two options, there will be a user `qlever` inside the container, which acts like you when reading or writing outside files. With the third option, the user inside the container is you, but without a proper user and group name (which is fine for batch mode, but ugly in interactive mode). +With podman you should use `-u $(id -u):$(id -g)` together with `--userns=keep-id`: + +\x1b[34mpodman run -it --rm -u $(id -u):$(id -g) --userns=keep-id -v $(pwd):/data -w /data qlever\x1b[0m +\x1b[34mpodman run -it --rm -u $(id -u):$(id -g) --userns=keep-id -v $(pwd):/data -w /data qlever -c "..."\x1b[0m ' -# Show the `HELP_MESSAGE`. For now, don't show `$1` (see below), but start -# with a friendly welcome instead. -HELP() { +# If the container is run without `-v ...:/data -w /data` (in particular, without +# any arguments), show the help message and exit. +if [ "$(pwd)" != "/data" ]; then echo - # echo -e "\x1b[31m$1\x1b[0m" echo -e "\x1b[34mWELCOME TO THE QLEVER DOCKER IMAGE\x1b[0m" echo -e "$HELP_MESSAGE" exit 1 -} +fi + +# If the container is run with arguments, but the first argument is not `-c`, +# prepend `-c` to the arguments (so that the user can omit the `-c`). +if [[ $# -gt 0 && "$1" != "-c" ]]; then + set -- -c "$@" +fi -# Check that UID and GID are set and that the working directory is `/data`. -if [ -z "$UID" ] || [ -z "$GID" ]; then - HELP "Environment variables UID and GID not set" -elif [ "$(pwd)" != "/data" ]; then - HELP "The working directory should be /data, but it is $(pwd)" +# If the user is not `qlever`, start a new login shell (to make sure that the +# profile script from the Dockerfile is executed). +# specified. +if ! whoami > /dev/null || [ "$(whoami)" != "qlever" ]; then + exec bash --login "$@" fi -# If `docker run` is run without arguments, start an interactive shell. Otherwise, -# run the sequence of commands given as arguments (the first argument may be the -# standard `-c`, but it can also be omitted). +# With `-e UID=... -e GID=...`, change the UID and GID of the user `qlever` inside +# the container accordingly. # -# NOTE: The call `su - qlever ...` has to be inside of the `sudo` call, because once -# the UID and GID of the user `qlever` have been changed, it no longer has `sudo` -# rights. And just remaining in the shell or starting a new shell (with `bash`) does -# not work, because neither of these would have the new UID and GID. Hence also -# the slight code duplication. +# NOTE: The call `su - qlever ...` has to be inside of the `sudo` call, because +# once the UID and GID of the user `qlever` have been changed, it no longer has +# `sudo` rights. And just remaining in the shell or starting a new shell (with +# `bash`) does not work, because neither of these would have the new UID and GID. if [ $# -eq 0 ]; then echo sudo -E bash -c "usermod -u $UID -s /bin/bash qlever && groupmod -g $GID qlever && chown -R qlever:qlever /qlever && su - qlever --login" From 562c948f66e5856f4dcc5b12f5ab72b1f7e05c47 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Wed, 4 Dec 2024 06:07:22 +0100 Subject: [PATCH 12/13] Update Dockerfile Co-authored-by: Ludovic Muller --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fc0dcc6619..f0a198e22d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ RUN apt-get update && apt-get install -y software-properties-common wget && add- FROM base AS builder ARG TARGETPLATFORM ENV DEBIAN_FRONTEND=noninteractive -RUN wget https://apt.kitware.com/kitware-archive.sh && chmod +x kitware-archive.sh &&./kitware-archive.sh +RUN wget https://apt.kitware.com/kitware-archive.sh && chmod +x kitware-archive.sh && ./kitware-archive.sh RUN apt-get update && apt-get install -y build-essential cmake libicu-dev tzdata pkg-config uuid-runtime uuid-dev git libjemalloc-dev ninja-build libzstd-dev libssl-dev libboost1.83-dev libboost-program-options1.83-dev libboost-iostreams1.83-dev libboost-url1.83-dev # Copy everything we need to build the binaries. From 73ed78d1c95b8855886f9de90f6d178f7cd33f80 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Wed, 4 Dec 2024 06:08:14 +0100 Subject: [PATCH 13/13] Update Dockerfile Co-authored-by: Ludovic Muller --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index f0a198e22d..757cadd258 100644 --- a/Dockerfile +++ b/Dockerfile @@ -61,7 +61,7 @@ RUN echo 'alias ll="ls -l"' >> $QLEVER_PROFILE RUN echo "if [ -d /data ]; then cd /data; fi" >> $QLEVER_PROFILE RUN cp $QLEVER_PROFILE /qlever/.bashrc -# Install the `qlever` command line tool. We have t set the two environment +# Install the `qlever` command line tool. We have to set the two environment # variables again here because in batch mode, the profile script above is not # sourced. The `PATH` is set again to avoid a warning from `pipx`. USER qlever