From 9c1f4a26b3c49b875ffc6ab706416105437cb964 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 19 Dec 2024 13:42:51 -0800 Subject: [PATCH 1/3] Avoid private cudf DeviceScalar in favor of using pylibcudf & pyarrow --- .../morpheus/morpheus/_lib/cudf_helpers.pyx | 33 ++++++------------- .../morpheus/_lib/cudf_helpers/__init__.pyi | 2 ++ 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/python/morpheus/morpheus/_lib/cudf_helpers.pyx b/python/morpheus/morpheus/_lib/cudf_helpers.pyx index fe0e96536..a61d7f1c6 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers.pyx +++ b/python/morpheus/morpheus/_lib/cudf_helpers.pyx @@ -35,15 +35,10 @@ from cudf._lib.column cimport Column # isort: off -# imports needed for get_element, which is required by from_column_view_with_fix -cimport pylibcudf.libcudf.copying as cpp_copying -from pylibcudf.libcudf.column.column_view cimport column_view -from libcpp.memory cimport make_unique, unique_ptr -from pylibcudf.libcudf.scalar.scalar cimport scalar -from pylibcudf cimport Table as plc_Table -from cudf._lib.scalar cimport DeviceScalar - # imports needed for from_column_view_with_fix +import pylibcudf as plc +from pylibcudf cimport Column as plc_Column, Table as plc_Table +from pylibcudf.libcudf.column.column_view cimport column_view import rmm from libc.stdint cimport uintptr_t from cudf.core.buffer import ( @@ -64,18 +59,6 @@ from cudf._lib.null_mask import bitmask_allocation_size_bytes # isort: on -cdef get_element(column_view col_view, size_type index): - - cdef unique_ptr[scalar] c_output - with nogil: - c_output = move( - cpp_copying.get_element(col_view, index) - ) - - return DeviceScalar.from_unique_ptr( - move(c_output), dtype=dtype_from_column_view(col_view) - ) - cdef Column from_column_view_with_fix(column_view cv, object owner): """ Given a ``cudf::column_view``, constructs a ``cudf.Column`` from it, @@ -118,9 +101,13 @@ cdef Column from_column_view_with_fix(column_view cv, object owner): if offset_child_column.size() == 0: base_nbytes = 0 else: - chars_size = get_element( - offset_child_column, offset_child_column.size()-1).value - base_nbytes = chars_size + offset_child_column_owner = owner.children[offsets_column_index] + plc_owner = offset_child_column_owner.to_pylibcudf(mode="read") + plc_offsets_col = plc_Column.from_column_view( + offset_child_column, plc_owner + ) + plc_scalar = plc.copying.get_element(plc_offsets_col, offset_child_column.size()-1) + base_nbytes = plc.interop.to_arrow(plc_scalar).as_py() if data_ptr: if data_owner is None: diff --git a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi index bece30f67..166b0e42d 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi +++ b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi @@ -8,6 +8,7 @@ from cudf.core.dtypes import StructDtype import _cython_3_0_11 import cudf import itertools +import pylibcudf import rmm __all__ = [ @@ -19,6 +20,7 @@ __all__ = [ "bitmask_allocation_size_bytes", "cudf", "itertools", + "plc", "rmm" ] From 70ffce74cefa42dd9f8061c86e9a636ddad54d36 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 Jan 2025 13:53:18 -0800 Subject: [PATCH 2/3] Add back get_element --- .../morpheus/morpheus/_lib/cudf_helpers.pyx | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/python/morpheus/morpheus/_lib/cudf_helpers.pyx b/python/morpheus/morpheus/_lib/cudf_helpers.pyx index a61d7f1c6..1611fa222 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers.pyx +++ b/python/morpheus/morpheus/_lib/cudf_helpers.pyx @@ -35,10 +35,15 @@ from cudf._lib.column cimport Column # isort: off -# imports needed for from_column_view_with_fix -import pylibcudf as plc -from pylibcudf cimport Column as plc_Column, Table as plc_Table +# imports needed for get_element, which is required by from_column_view_with_fix +cimport pylibcudf.libcudf.copying as cpp_copying from pylibcudf.libcudf.column.column_view cimport column_view +from libcpp.memory cimport make_unique, unique_ptr +from pylibcudf.libcudf.scalar.scalar cimport scalar +from pylibcudf cimport Table as plc_Table, Scalar as plc_Scalar +import pylibcudf as plc + +# imports needed for from_column_view_with_fix import rmm from libc.stdint cimport uintptr_t from cudf.core.buffer import ( @@ -59,6 +64,18 @@ from cudf._lib.null_mask import bitmask_allocation_size_bytes # isort: on +cdef get_element(column_view col_view, size_type index): + + cdef unique_ptr[scalar] c_output + with nogil: + c_output = move( + cpp_copying.get_element(col_view, index) + ) + + plc_scalar = plc_Scalar.from_libcudf(move(c_output)) + return plc.interop.to_arrow(plc_scalar).to_py() + + cdef Column from_column_view_with_fix(column_view cv, object owner): """ Given a ``cudf::column_view``, constructs a ``cudf.Column`` from it, @@ -101,13 +118,9 @@ cdef Column from_column_view_with_fix(column_view cv, object owner): if offset_child_column.size() == 0: base_nbytes = 0 else: - offset_child_column_owner = owner.children[offsets_column_index] - plc_owner = offset_child_column_owner.to_pylibcudf(mode="read") - plc_offsets_col = plc_Column.from_column_view( - offset_child_column, plc_owner - ) - plc_scalar = plc.copying.get_element(plc_offsets_col, offset_child_column.size()-1) - base_nbytes = plc.interop.to_arrow(plc_scalar).as_py() + chars_size = get_element( + offset_child_column, offset_child_column.size()-1) + base_nbytes = chars_size if data_ptr: if data_owner is None: From 4d102ccbe83734a1225e971505a7bb82631c87a2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 3 Jan 2025 14:57:39 -0800 Subject: [PATCH 3/3] Update copyright year --- python/morpheus/morpheus/_lib/cudf_helpers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/morpheus/morpheus/_lib/cudf_helpers.pyx b/python/morpheus/morpheus/_lib/cudf_helpers.pyx index 1611fa222..84612b961 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers.pyx +++ b/python/morpheus/morpheus/_lib/cudf_helpers.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License");