-
Notifications
You must be signed in to change notification settings - Fork 74
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP][RFC] New Image Builder Interface #817
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
python 3.10.12 | ||
poetry 1.5.1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# truss-utils | ||
|
||
This package is a repository of common functions that help with developing | ||
production AI/ML models with Truss. | ||
|
||
## Use | ||
|
||
In the `model.py` of your truss, you can do something like the following: | ||
|
||
``` | ||
from truss_utils.image import pil_to_64 | ||
|
||
class Model: | ||
... | ||
|
||
def predict(self, model_input): | ||
# call Stable diffusion | ||
... | ||
|
||
return pil_to_b64(image) | ||
``` |
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
[tool.poetry] | ||
name = "truss-build" | ||
version = "0.1.0" | ||
description = "Abstraction to build Docker images for Truss" | ||
authors = ["Truss Maintainers <[email protected]>"] | ||
license = "MIT" | ||
readme = "README.md" | ||
packages = [{ include = "truss", from = "./src" }] | ||
|
||
[tool.poetry.dependencies] | ||
python = ">=3.10,<3.12" | ||
docker = "^7.0.0" | ||
|
||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
build-backend = "poetry.core.masonry.api" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
__version__ = "0.1.0" | ||
|
||
from truss.build.image import Image | ||
|
||
if __name__ == "__main__": | ||
from pprint import pprint | ||
from pathlib import Path | ||
|
||
img = ( | ||
Image() | ||
.apt_install("python3.10-venv") | ||
.pip_install("numpy", "torch") | ||
.env({"NOPROXY": "*"}) | ||
) | ||
print(img.serialize()) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from .image import Image | ||
import tempfile | ||
from pathlib import Path | ||
|
||
|
||
def build(image: Image): | ||
with tempfile.NamedTemporaryFile( | ||
mode="w", delete=False, suffix=".Dockerfile" | ||
) as temp_dockerfile: | ||
dockerfile_path = temp_dockerfile.name | ||
print(f"Dockerfile created at: {dockerfile_path}") | ||
Path(dockerfile_path).write_text(image.serialize()) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from abc import abstractmethod | ||
import shlex | ||
|
||
|
||
# TODO(bola): Support secrets | ||
class Command: | ||
@abstractmethod | ||
def serialize(self): | ||
pass | ||
|
||
|
||
class FromCommand(Command): | ||
def __init__(self, image, tag=None, AS=None): | ||
self.image = image | ||
self.tag = tag | ||
self._as = AS | ||
|
||
def serialize(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add When I read "serialize" I usually assume it's bytes. Maybe even consider renaming this method to |
||
ret = f"FROM {self.image}" | ||
if self.tag is not None: | ||
ret += f":{self.tag}" | ||
if self._as is not None: | ||
ret += f" AS {self._as}" | ||
return ret | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: I know that in this context runtime is not really an issue, but I got used to try to always use efficient implementations - unless it's significantly more complex and less readable. So here you could collect the conditional command in |
||
|
||
|
||
class RunCommand(Command): | ||
def __init__(self, command, mounts=None): | ||
self.command = command | ||
self.mounts = mounts | ||
|
||
def serialize(self): | ||
cmd = f"RUN " | ||
if self.mounts is not None: | ||
for mount in self.mounts: | ||
cmd += f"--mount={mount} " | ||
cmd += self.command | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: like previous comment, collect parts in list and join at the end. This loop has in principle quadratic runtime. |
||
return cmd | ||
|
||
|
||
class CopyCommand(Command): | ||
# TODO(bola): From should be an image object. | ||
def __init__(self, src, dst, FROM=None): | ||
self.src = src | ||
self.dst = dst | ||
self._from = FROM | ||
|
||
def serialize(self): | ||
cmd = "COPY " | ||
if self._from: | ||
cmd += f"--from={self._from} " | ||
return f"{cmd}{self.src} {self.dst}" | ||
|
||
|
||
class EntrypointCommand(Command): | ||
def __init__(self, command): | ||
self.command = command | ||
|
||
def serialize(self): | ||
return f"ENTRYPOINT {self.command}" | ||
|
||
|
||
class EnvCommand(Command): | ||
def __init__(self, name, value): | ||
self.name = name | ||
self.value = value | ||
|
||
def serialize(self): | ||
return f'ENV {self.name}="{shlex.quote(self.value)}"' | ||
|
||
|
||
class ExposeCommand(Command): | ||
def __init__(self, ports): | ||
self.ports = ports | ||
|
||
def serialize(self): | ||
return f"EXPOSE {self.ports}" | ||
|
||
|
||
class VolumeCommand(Command): | ||
def __init__(self, volumes): | ||
self.volumes = volumes | ||
|
||
def serialize(self): | ||
return f"VOLUME {self.volumes}" | ||
|
||
|
||
class WorkdirCommand(Command): | ||
def __init__(self, path): | ||
self.path = path | ||
|
||
def serialize(self): | ||
return f"WORKDIR {shlex.quote(self.path)}" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
class InvalidError(Exception): | ||
pass | ||
|
||
|
||
class NotFoundError(Exception): | ||
pass | ||
|
||
|
||
class RemoteError(Exception): | ||
pass |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
from .commands import * | ||
from typing import Dict, List, Optional, Union | ||
from pathlib import Path | ||
from .utils import flatten_str_args, make_pip_install_args | ||
import shlex | ||
|
||
# TODO(bola): support secrets | ||
|
||
|
||
class Image: | ||
def __init__(self) -> None: | ||
self._base_image = "" | ||
self._commands: list[Command] = [] | ||
self._base_image: Optional[str] = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: To be very precise, class attributes should be type annotated before
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when you say class attributes, this meand instance variables? They belong to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, both instance and class variables would be on top level and distinguished buy
See https://peps.python.org/pep-0526/#class-and-instance-variable-annotations |
||
|
||
# TODO(support static constructors for different ways of pulling base iamge) | ||
# Image.from_dockerfile, from_aws_ecr, from_gcp_artifact_registry, from_registry | ||
# also can add common base images: Image.truss_base().... | ||
|
||
def serialize(self) -> str: | ||
dockerfile = "# syntax = docker/dockerfile:1\n" # Support BuildKit | ||
dockerfile += "# Auto-generated by truss.build, do not edit!\n" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: consider making these string literals module- (or class-) private constants ( I really think the less string literals appear somewhere hidden/inline in the depth of code the better... |
||
for command in self._commands: | ||
dockerfile += command.serialize() + "\n" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
return dockerfile | ||
|
||
def apt_install( | ||
self, | ||
*packages: Union[str, List[str]], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Optional/Opinion: is there a reason to support both variable args and list? While it may seem more user friendly to have multiple and flexible options, I personally think it's less mental load to have exactly one right way of doing things and seeing that everywhere consistently, as opposed to having to make a decision each time knowing about "two forms". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am mostly trying to be API compatible with |
||
) -> "Image": | ||
"""Install a list of Debian packages using `apt`. | ||
|
||
**Example** | ||
|
||
```python | ||
image = Image.debian_slim().apt_install("git") | ||
``` | ||
""" | ||
pkgs = flatten_str_args("apt_install", "packages", packages) | ||
if not pkgs: | ||
return self | ||
package_args = " ".join(shlex.quote(pkg) for pkg in pkgs) | ||
self._commands.extend( | ||
[ | ||
RunCommand("apt-get update"), | ||
RunCommand(f"RUN apt-get install -y {package_args}"), | ||
] | ||
) | ||
return self | ||
|
||
def run_commands( | ||
self, | ||
*commands: Union[str, List[str]], | ||
) -> "Image": | ||
"""Extend an image with a list of shell commands to run.""" | ||
cmds = flatten_str_args("run_commands", "commands", commands) | ||
if not cmds: | ||
return self | ||
self._commands.extend([RunCommand(cmd) for cmd in cmds]) | ||
return self | ||
|
||
def workdir(self, path: str) -> "Image": | ||
"""Sets the working directory for subequent image build steps. | ||
|
||
:param path: A path to set the workdir. | ||
|
||
**Example** | ||
|
||
```python | ||
image = ( | ||
Image.debian_slim() | ||
.run_commands("git clone https://xyz app") | ||
.workdir("/app") | ||
.run_commands("yarn install") | ||
) | ||
``` | ||
""" | ||
self._commands.append(WorkdirCommand(path)) | ||
return self | ||
|
||
def env(self, vars: Dict[str, str]) -> "Image": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: to denote that |
||
""" | ||
Sets the environmental variables of the image. | ||
|
||
:param vars: A dictionary where keys are the names of the environment variables and values are the values to be set for those variables. | ||
|
||
**Example** | ||
|
||
```python | ||
image = ( | ||
Image.conda() | ||
.env({"CONDA_OVERRIDE_CUDA": "11.2"}) | ||
.conda_install("jax", "cuda-nvcc", channels=["conda-forge", "nvidia"]) | ||
.pip_install("dm-haiku", "optax") | ||
) | ||
``` | ||
""" | ||
self._commands.extend([EnvCommand(k, v) for k, v in vars.items()]) | ||
return self | ||
|
||
def pip_install( | ||
self, | ||
*packages: Union[str, List[str]], | ||
find_links: Optional[str] = None, | ||
index_url: Optional[str] = None, | ||
extra_index_url: Optional[str] = None, | ||
pre: bool = False, | ||
) -> "Image": | ||
""" | ||
Install a list of Python packages using pip. | ||
|
||
:param packages: A list of Python packages, e.g., ["numpy", "matplotlib>=3.5.0"] | ||
:param find_links: Passes -f (--find-links) to pip install | ||
:param index_url: Passes -i (--index-url) to pip install | ||
:param extra_index_url: Passes --extra-index-url to pip install | ||
:param pre: Passes --pre (allow pre-releases) to pip install | ||
|
||
**Example** | ||
|
||
```python | ||
image = Image.debian_slim().pip_install("click", "httpx~=0.23.3") | ||
``` | ||
""" | ||
pkgs = flatten_str_args("pip_install", "packages", packages) | ||
if not pkgs: | ||
return self | ||
extra_args = make_pip_install_args(find_links, index_url, extra_index_url, pre) | ||
package_args = " ".join(shlex.quote(pkg) for pkg in sorted(pkgs)) | ||
cmd = f"python3 -m pip install {package_args}" | ||
if extra_args: | ||
cmd += f" {extra_args}" | ||
self._commands.append(RunCommand(cmd)) | ||
return self | ||
|
||
def pip_install_from_requirements( | ||
self, | ||
requirements_txt: str, # Path to a requirements.txt file. | ||
find_links: Optional[str] = None, # Passes -f (--find-links) pip install | ||
*, | ||
index_url: Optional[str] = None, # Passes -i (--index-url) to pip install | ||
extra_index_url: Optional[ | ||
str | ||
] = None, # Passes --extra-index-url to pip install | ||
pre: bool = False, # Passes --pre (allow pre-releases) to pip install | ||
) -> "Image": | ||
"""Install a list of Python packages from a local `requirements.txt` file.""" | ||
pass | ||
|
||
def copy_local_file( | ||
self, local_path: Union[str, Path], remote_path: Union[str, Path] = "./" | ||
) -> "Image": | ||
"""Copy a file into the image as a part of building it. | ||
|
||
This works in a similar way to [`COPY`](https://docs.docker.com/engine/reference/builder/#copy) in a `Dockerfile`. | ||
""" | ||
raise NotImplementedError() | ||
|
||
def copy_local_dir( | ||
self, local_path: Union[str, Path], remote_path: Union[str, Path] = "." | ||
) -> "Image": | ||
"""Copy a directory into the image as a part of building the image. | ||
|
||
This works in a similar way to [`COPY`](https://docs.docker.com/engine/reference/builder/#copy) in a `Dockerfile`. | ||
""" | ||
raise NotImplementedError() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add type annotations.