From 0c9f29aa17092672f690375d9d07f1d16157d409 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 25 Sep 2020 14:00:40 -0700 Subject: [PATCH 1/3] Skip pyNVML to support Tegra devices --- dask_cuda/utils.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/dask_cuda/utils.py b/dask_cuda/utils.py index 72c7736c1..c0934afc0 100644 --- a/dask_cuda/utils.py +++ b/dask_cuda/utils.py @@ -20,6 +20,15 @@ def nvtx_annotate(message=None, color="blue", domain=None): yield +@toolz.memoize +def _is_tegra(): + import os + + return os.path.isdir("/sys/class/tegra-firmware/") or os.path.isfile( + "/etc/nv_tegra_release" + ) + + class CPUAffinity: def __init__(self, cores): self.cores = cores @@ -96,8 +105,13 @@ def get_cpu_count(): @toolz.memoize def get_gpu_count(): - pynvml.nvmlInit() - return pynvml.nvmlDeviceGetCount() + if _is_tegra(): + import numba.cuda + + return len(numba.cuda.gpus) + else: + pynvml.nvmlInit() + return pynvml.nvmlDeviceGetCount() def get_cpu_affinity(device_index): @@ -153,10 +167,15 @@ def get_device_total_memory(index=0): """ Return total memory of CUDA device with index """ - pynvml.nvmlInit() - return pynvml.nvmlDeviceGetMemoryInfo( - pynvml.nvmlDeviceGetHandleByIndex(index) - ).total + if _is_tegra(): + import numba.cuda + + return numba.cuda.current_context().get_memory_info()[1] + else: + pynvml.nvmlInit() + return pynvml.nvmlDeviceGetMemoryInfo( + pynvml.nvmlDeviceGetHandleByIndex(index) + ).total def get_ucx_net_devices( From 287406af01929e2c3c7bd819f23e1492a9072c2b Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 7 Oct 2020 11:17:00 -0700 Subject: [PATCH 2/3] Support for Tegra in get_cpu_affinity --- dask_cuda/utils.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/dask_cuda/utils.py b/dask_cuda/utils.py index c0934afc0..a97135fb6 100644 --- a/dask_cuda/utils.py +++ b/dask_cuda/utils.py @@ -139,21 +139,24 @@ def get_cpu_affinity(device_index): 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79] """ - pynvml.nvmlInit() - - try: - # Result is a list of 64-bit integers, thus ceil(get_cpu_count() / 64) - affinity = pynvml.nvmlDeviceGetCpuAffinity( - pynvml.nvmlDeviceGetHandleByIndex(device_index), - math.ceil(get_cpu_count() / 64), - ) - return unpack_bitmask(affinity) - except pynvml.NVMLError: - warnings.warn( - "Cannot get CPU affinity for device with index %d, setting default affinity" - % device_index - ) + if _is_tegra(): return list(range(get_cpu_count())) + else: + pynvml.nvmlInit() + + try: + # Result is a list of 64-bit integers, thus ceil(get_cpu_count() / 64) + affinity = pynvml.nvmlDeviceGetCpuAffinity( + pynvml.nvmlDeviceGetHandleByIndex(device_index), + math.ceil(get_cpu_count() / 64), + ) + return unpack_bitmask(affinity) + except pynvml.NVMLError: + warnings.warn( + "Cannot get CPU affinity for device with index %d, setting default affinity" + % device_index + ) + return list(range(get_cpu_count())) def get_n_gpus(): From d2d5e30cf55345a149741cb7dd0d5ebb21191564 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 7 Oct 2020 12:48:58 -0700 Subject: [PATCH 3/3] Use CUDA driver via Numba to get memory info on Tegra --- dask_cuda/utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dask_cuda/utils.py b/dask_cuda/utils.py index a97135fb6..cb7c58805 100644 --- a/dask_cuda/utils.py +++ b/dask_cuda/utils.py @@ -171,9 +171,16 @@ def get_device_total_memory(index=0): Return total memory of CUDA device with index """ if _is_tegra(): + from ctypes import byref, c_size_t import numba.cuda - return numba.cuda.current_context().get_memory_info()[1] + driver = numba.cuda.driver.Driver() + + numba.cuda.current_context() + free = c_size_t() + total = c_size_t() + driver.cuMemGetInfo(byref(free), byref(total)) + return total.value else: pynvml.nvmlInit() return pynvml.nvmlDeviceGetMemoryInfo(