Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v2-lib: pass kfd device if use_all_gpus is selected and kfd device exists #1105

Merged
merged 19 commits into from
Dec 5, 2024
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 2 additions & 0 deletions library/2.0.32/devices.py → library/2.0.33/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def _auto_add_devices_from_values(self):

if resources.get("gpus", {}).get("use_all_gpus", False):
self.add_device("/dev/dri", "/dev/dri", allow_disallowed=True)
if resources["gpus"].get("kfd_device_exists", False):
self.add_device("/dev/kfd", "/dev/kfd", allow_disallowed=True) # AMD ROCm

def add_device(self, host_device: str, container_device: str, cgroup_perm: str = "", allow_disallowed=False):
# Host device can be mapped to multiple container devices,
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,16 @@ def test_automatically_add_gpu_devices(mock_values):
assert output["services"]["test_container"]["group_add"] == [44, 107, 568]


def test_automatically_add_gpu_devices_and_kfd(mock_values):
mock_values["resources"] = {"gpus": {"use_all_gpus": True, "kfd_device_exists": True}}
render = Render(mock_values)
c1 = render.add_container("test_container", "test_image")
c1.healthcheck.disable()
output = render.render()
assert output["services"]["test_container"]["devices"] == ["/dev/dri:/dev/dri", "/dev/kfd:/dev/kfd"]
assert output["services"]["test_container"]["group_add"] == [44, 107, 568]


def test_remove_gpu_devices(mock_values):
mock_values["resources"] = {"gpus": {"use_all_gpus": True}}
render = Render(mock_values)
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def _valid_path_or_raise(path: str):


def allowed_device_or_raise(path: str):
disallowed_devices = ["/dev/dri", "/dev/bus/usb", "/dev/snd"]
disallowed_devices = ["/dev/dri", "/dev/kfd", "/dev/bus/usb", "/dev/snd"]
if path in disallowed_devices:
raise RenderError(f"Device [{path}] is not allowed to be manually added.")
return path
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion library/hashes.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
0.0.1: f074617a82a86d2a6cc78a4c8a4296fc9d168e456f12713e50c696557b302133
1.1.7: d05e43e25b7dc1736be6cc1efa4b9255368aa346e3e7a4350a38440f29b73186
2.0.32: 4a0bf69cccda322e191eab36ab81ca6d0c8e5d64a0b2fa117c609804b55b86c6
2.0.33: 167a3532cd10bbb030a484b3b2a84248fad6d59e9ad0338ad0bb6c543dfca949
Loading