Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove cudf._lib.utils in favor of python APIs #17625

Merged
merged 2 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx utils.pyx)
set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx)
set(linked_libraries cudf::cudf)

rapids_cython_create_modules(
Expand Down
Empty file.
6 changes: 0 additions & 6 deletions python/cudf/cudf/_lib/utils.pxd

This file was deleted.

94 changes: 0 additions & 94 deletions python/cudf/cudf/_lib/utils.pyx

This file was deleted.

4 changes: 3 additions & 1 deletion python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,9 @@ def _quantile_table(
column_order,
null_precedence,
)
columns = libcudf.utils.columns_from_pylibcudf_table(plc_table)
columns = [
ColumnBase.from_pylibcudf(col) for col in plc_table.columns()
]
return self._from_columns_like_self(
columns,
column_names=self._column_names,
Expand Down
13 changes: 10 additions & 3 deletions python/cudf/cudf/io/avro.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pylibcudf as plc

import cudf
from cudf._lib.utils import data_from_pylibcudf_io
from cudf._lib.column import Column
from cudf.utils import ioutils


Expand Down Expand Up @@ -46,5 +46,12 @@ def read_avro(
options.set_columns(columns)

plc_result = plc.io.avro.read_avro(options)

return cudf.DataFrame._from_data(*data_from_pylibcudf_io(plc_result))
data = {
name: Column.from_pylibcudf(col)
for name, col in zip(
plc_result.column_names(include_children=False),
plc_result.columns,
strict=True,
)
}
return cudf.DataFrame._from_data(data)
16 changes: 12 additions & 4 deletions python/cudf/cudf/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
import pylibcudf as plc

import cudf
from cudf._lib.column import Column
from cudf._lib.types import dtype_to_pylibcudf_type
from cudf._lib.utils import data_from_pylibcudf_io
from cudf.api.types import is_hashable, is_scalar
from cudf.core.buffer import acquire_spill_lock
from cudf.utils import ioutils
Expand Down Expand Up @@ -251,9 +251,17 @@ def read_csv(
if na_values is not None:
options.set_na_values([str(val) for val in na_values])

df = cudf.DataFrame._from_data(
*data_from_pylibcudf_io(plc.io.csv.read_csv(options))
)
table_w_meta = plc.io.csv.read_csv(options)
data = {
name: Column.from_pylibcudf(col)
for name, col in zip(
table_w_meta.column_names(include_children=False),
table_w_meta.columns,
strict=True,
)
}

df = cudf.DataFrame._from_data(data)

if isinstance(dtype, abc.Mapping):
for k, v in dtype.items():
Expand Down
26 changes: 14 additions & 12 deletions python/cudf/cudf/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import cudf
from cudf._lib.column import Column
from cudf._lib.types import dtype_to_pylibcudf_type
from cudf._lib.utils import _data_from_columns, data_from_pylibcudf_io
from cudf.core.buffer import acquire_spill_lock
from cudf.utils import ioutils
from cudf.utils.dtypes import _maybe_convert_to_default_type
Expand Down Expand Up @@ -172,13 +171,11 @@ def read_json(
)
)
)
df = cudf.DataFrame._from_data(
*_data_from_columns(
columns=[Column.from_pylibcudf(col) for col in res_cols],
column_names=res_col_names,
index_names=None,
)
)
data = {
name: Column.from_pylibcudf(col)
for name, col in zip(res_col_names, res_cols, strict=True)
}
df = cudf.DataFrame._from_data(data)
ioutils._add_df_col_struct_names(df, res_child_names)
return df
else:
Expand All @@ -201,10 +198,15 @@ def read_json(
extra_parameters=kwargs,
)
)

df = cudf.DataFrame._from_data(
*data_from_pylibcudf_io(table_w_meta)
)
data = {
name: Column.from_pylibcudf(col)
for name, col in zip(
table_w_meta.column_names(include_children=False),
table_w_meta.columns,
strict=True,
)
}
df = cudf.DataFrame._from_data(data)

# Post-processing to add in struct column names
ioutils._add_df_col_struct_names(df, table_w_meta.child_names)
Expand Down
37 changes: 31 additions & 6 deletions python/cudf/cudf/io/orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
import pylibcudf as plc

import cudf
from cudf._lib.column import Column
from cudf._lib.types import dtype_to_pylibcudf_type
from cudf._lib.utils import data_from_pylibcudf_io
from cudf.api.types import is_list_like
from cudf.core.buffer import acquire_spill_lock
from cudf.core.index import _index_from_data
from cudf.utils import ioutils

try:
Expand Down Expand Up @@ -323,11 +324,35 @@ def read_orc(
actual_index_names = list(index_col_names.values())
col_names = names[len(actual_index_names) :]

data, index = data_from_pylibcudf_io(
tbl_w_meta,
col_names if columns is None else names,
actual_index_names,
)
result_col_names = col_names if columns is None else names
if actual_index_names is None:
index = None
data = {
name: Column.from_pylibcudf(col)
for name, col in zip(
result_col_names, tbl_w_meta.columns, strict=True
)
}
else:
result_columns = [
Column.from_pylibcudf(col) for col in tbl_w_meta.columns
]
index = _index_from_data(
dict(
zip(
actual_index_names,
result_columns[: len(actual_index_names)],
strict=True,
)
)
)
data = dict(
zip(
result_col_names,
result_columns[len(actual_index_names) :],
strict=True,
)
)

if is_range_index:
index = range_idx
Expand Down
29 changes: 14 additions & 15 deletions python/cudf/cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@

import cudf
from cudf._lib.column import Column
from cudf._lib.utils import (
_data_from_columns,
data_from_pylibcudf_io,
)
from cudf.api.types import is_list_like
from cudf.core.buffer import acquire_spill_lock
from cudf.core.column import as_column, column_empty
Expand Down Expand Up @@ -1238,16 +1234,11 @@ def _read_parquet(
# Drop residual columns to save memory
tbl._columns[i] = None

df = cudf.DataFrame._from_data(
*_data_from_columns(
columns=[
Column.from_pylibcudf(plc)
for plc in concatenated_columns
],
column_names=column_names,
index_names=None,
)
)
data = {
name: Column.from_pylibcudf(col)
for name, col in zip(column_names, concatenated_columns)
}
df = cudf.DataFrame._from_data(data)
df = _process_metadata(
df,
column_names,
Expand Down Expand Up @@ -1287,8 +1278,16 @@ def _read_parquet(
options.set_filter(filters)

tbl_w_meta = plc.io.parquet.read_parquet(options)
data = {
name: Column.from_pylibcudf(col)
for name, col in zip(
tbl_w_meta.column_names(include_children=False),
tbl_w_meta.columns,
strict=True,
)
}

df = cudf.DataFrame._from_data(*data_from_pylibcudf_io(tbl_w_meta))
df = cudf.DataFrame._from_data(data)

df = _process_metadata(
df,
Expand Down
Loading