Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Share the additional container storage #766

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 1 addition & 45 deletions training/ilab-wrapper/ilab
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,6 @@

echo-err() { echo "$@" >&2; }

verify_range() {
subuid_range="$1"
username="$2"
NUMBER_OF_MATCHING_SUBUID_RANGES=$(if [[ -z "$subuid_range" ]]; then echo 0; else wc -l <<<"$subuid_range"; fi)

if [[ "$NUMBER_OF_MATCHING_SUBUID_RANGES" == 0 ]]; then
echo-err "No /etc/subuid range found for user $username ($UID)"
exit 1
elif [[ "$NUMBER_OF_MATCHING_SUBUID_RANGES" != 1 ]]; then
# TODO: Handle multiple subuid ranges. But for now, hard fail
echo-err "Multiple /etc/subuid ranges found for user $username ($UID), this is currently unsupported:"
echo-err "$subuid_range"
exit 1
fi
}

check_insights() {
if [[ -f /etc/insights-client/machine-id ]]; then
return
Expand Down Expand Up @@ -100,35 +84,7 @@ do
fi
done

# We run the container as sudo in order to be able to access the root container
# storage, which has the ilab image pre-pulled. But for security reasons we map
# root UID 0 inside the container to the current user's UID (and all the other
# subuids to the user's /etc/subuid range) so that we're effectively running
# the container as the current user.
#
# In the future, we will run podman as the current user, once we figure a
# reasonable way for the current user to access the root's user container
# storage.
if [[ "$UID" == 0 ]]; then
# If we're already running as root, we don't need to map any UIDs
IMPERSONATE_CURRENT_USER_PODMAN_FLAGS=()
else
CURRENT_USER_NAME=$(id --user --name)
CURRENT_USER_SUBUID_RANGE=$(awk \
--field-separator ':' \
--assign current_user="$CURRENT_USER_NAME" \
--assign current_uid="$UID" \
'$1 == current_user || $1 == current_uid {print $2 ":" $3}' \
/etc/subuid)

verify_range "$CURRENT_USER_SUBUID_RANGE" "$CURRENT_USER_NAME"

IMPERSONATE_CURRENT_USER_PODMAN_FLAGS=("--uidmap" "0:$UID" "--uidmap" "1:$CURRENT_USER_SUBUID_RANGE")
fi

PRESERVE_ENV="SSL_CERT_FILE,SSL_CERT_DIR,VLLM_LOGGING_LEVEL,NCCL_DEBUG,HOME,HF_TOKEN"
PODMAN_COMMAND=("sudo" "--preserve-env=$PRESERVE_ENV" "podman" "run" "--rm" "-it"
"${IMPERSONATE_CURRENT_USER_PODMAN_FLAGS[@]}"
PODMAN_COMMAND=("podman" "run" "--rm" "-it"
"--device" "${CONTAINER_DEVICE}"
"--security-opt" "label=disable" "--net" "host"
"--shm-size" "10G"
Expand Down
11 changes: 7 additions & 4 deletions training/nvidia-bootc/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,15 @@ VOLUME /var/lib/containers
RUN --mount=type=secret,id=${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson \
if [ -f "/run/.input/instructlab-nvidia/oci-layout" ]; then \
IID=$(podman --root /usr/lib/containers/storage pull oci:/run/.input/instructlab-nvidia) && \
podman --root /usr/lib/containers/storage image tag ${IID} ${INSTRUCTLAB_IMAGE}; \
podman --root /usr/lib/containers/storage --storage-opt overlay.force_mask=shared image tag ${IID} ${INSTRUCTLAB_IMAGE}; \
elif [ -f "/run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson" ]; then \
IID=$(sudo podman --root /usr/lib/containers/storage pull --authfile /run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson ${INSTRUCTLAB_IMAGE}); \
IID=$(sudo podman --root /usr/lib/containers/storage pull --storage-opt overlay.force_mask=shared --authfile /run/secrets/${INSTRUCTLAB_IMAGE_PULL_SECRET}/.dockerconfigjson ${INSTRUCTLAB_IMAGE}); \
else \
IID=$(sudo podman --root /usr/lib/containers/storage pull ${INSTRUCTLAB_IMAGE}); \
fi
IID=$(sudo podman --root /usr/lib/containers/storage --storage-opt overlay.force_mask=shared pull ${INSTRUCTLAB_IMAGE}); \
fi && \
chmod -R a+rX /usr/lib/containers
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are you also setting force_mask=shared in the storage.conf file?

If you do so, then podman will store the original permissions in an extended attribute, that it is used by fuse-overlays

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, it is not set. Should it be in the global storage.conf or it could be in the user config?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry for the delay.

It must be in the configuration file for the user that is creating the storage (i.e. runs podman pull). Also, when using --root, it tells Podman to ignore any other setting from the storage.conf, so in this case, it must be provided on the command line as a --storage-opt overlay.force_mask=shared

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @giuseppe. I updated the user storage.conf template and the Containerfile. Would you mind reviewing again, please?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, LGTM

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that bootc does not preserve these extended arguments, you will lose them


COPY containers-storage.conf /etc/skel/.config/containers/storage.conf

RUN podman system reset --force 2>/dev/null

Expand Down
17 changes: 17 additions & 0 deletions training/nvidia-bootc/containers-storage.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[storage]
driver = "overlay"

[storage.options]
size = ""
remap-uids = ""
remap-gids = ""
ignore_chown_errors = ""
remap-user = ""
remap-group = ""
skip_mount_home = ""
mount_program = "/usr/bin/fuse-overlayfs"
mountopt = ""
additionalimagestores = [ "/usr/lib/containers/storage",]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will need to allow non-root users access (at lest read) to this path.
Probably need to sort out selinux as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The read access is set in the Containerfile. We could limit to a specific group, too.


[storage.options.overlay]
force_mask = "shared"
46 changes: 1 addition & 45 deletions training/nvidia-bootc/duplicated/ilab-wrapper/ilab
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,6 @@

echo-err() { echo "$@" >&2; }

verify_range() {
subuid_range="$1"
username="$2"
NUMBER_OF_MATCHING_SUBUID_RANGES=$(if [[ -z "$subuid_range" ]]; then echo 0; else wc -l <<<"$subuid_range"; fi)

if [[ "$NUMBER_OF_MATCHING_SUBUID_RANGES" == 0 ]]; then
echo-err "No /etc/subuid range found for user $username ($UID)"
exit 1
elif [[ "$NUMBER_OF_MATCHING_SUBUID_RANGES" != 1 ]]; then
# TODO: Handle multiple subuid ranges. But for now, hard fail
echo-err "Multiple /etc/subuid ranges found for user $username ($UID), this is currently unsupported:"
echo-err "$subuid_range"
exit 1
fi
}

check_insights() {
if [[ -f /etc/insights-client/machine-id ]]; then
return
Expand Down Expand Up @@ -100,35 +84,7 @@ do
fi
done

# We run the container as sudo in order to be able to access the root container
# storage, which has the ilab image pre-pulled. But for security reasons we map
# root UID 0 inside the container to the current user's UID (and all the other
# subuids to the user's /etc/subuid range) so that we're effectively running
# the container as the current user.
#
# In the future, we will run podman as the current user, once we figure a
# reasonable way for the current user to access the root's user container
# storage.
if [[ "$UID" == 0 ]]; then
# If we're already running as root, we don't need to map any UIDs
IMPERSONATE_CURRENT_USER_PODMAN_FLAGS=()
else
CURRENT_USER_NAME=$(id --user --name)
CURRENT_USER_SUBUID_RANGE=$(awk \
--field-separator ':' \
--assign current_user="$CURRENT_USER_NAME" \
--assign current_uid="$UID" \
'$1 == current_user || $1 == current_uid {print $2 ":" $3}' \
/etc/subuid)

verify_range "$CURRENT_USER_SUBUID_RANGE" "$CURRENT_USER_NAME"

IMPERSONATE_CURRENT_USER_PODMAN_FLAGS=("--uidmap" "0:$UID" "--uidmap" "1:$CURRENT_USER_SUBUID_RANGE")
fi

PRESERVE_ENV="VLLM_LOGGING_LEVEL,NCCL_DEBUG,HOME,HF_TOKEN"
PODMAN_COMMAND=("sudo" "--preserve-env=$PRESERVE_ENV" "podman" "run" "--rm" "-it"
"${IMPERSONATE_CURRENT_USER_PODMAN_FLAGS[@]}"
PODMAN_COMMAND=("podman" "run" "--rm" "-it"
"--device" "${CONTAINER_DEVICE}"
"--security-opt" "label=disable" "--net" "host"
"--shm-size" "10G"
Expand Down