Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into cccl-set-32-bit-offset-type-with-cmake
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice authored Dec 7, 2024
2 parents 816fc09 + 80fc629 commit c6041b3
Show file tree
Hide file tree
Showing 45 changed files with 2,187 additions and 1,597 deletions.
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies:
- cramjam
- cubinlinker
- cuda-nvtx=11.8
- cuda-python>=11.7.1,<12.0a0
- cuda-python>=11.8.5,<12.0a0
- cuda-sanitizer-api=11.8.86
- cuda-version=11.8
- cudatoolkit
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- cuda-nvcc
- cuda-nvrtc-dev
- cuda-nvtx-dev
- cuda-python>=12.0,<13.0a0
- cuda-python>=12.6.2,<13.0a0
- cuda-sanitizer-api
- cuda-version=12.5
- cupy>=12.0.0
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ requirements:
- cudatoolkit
- ptxcompiler >=0.7.0
- cubinlinker # CUDA enhanced compatibility.
- cuda-python >=11.7.1,<12.0a0
- cuda-python >=11.8.5,<12.0a0
{% else %}
- cuda-cudart
- libcufile # [linux64]
Expand All @@ -100,7 +100,7 @@ requirements:
# TODO: Add nvjitlink here
# xref: https://github.com/rapidsai/cudf/issues/12822
- cuda-nvrtc
- cuda-python >=12.0,<13.0a0
- cuda-python >=12.6.2,<13.0a0
- pynvjitlink
{% endif %}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/pylibcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@ requirements:
- {{ pin_compatible('rmm', max_pin='x.x') }}
- fsspec >=0.6.0
{% if cuda_major == "11" %}
- cuda-python >=11.7.1,<12.0a0
- cuda-python >=11.8.5,<12.0a0
{% else %}
- cuda-python >=12.0,<13.0a0
- cuda-python >=12.6.2,<13.0a0
{% endif %}
- nvtx >=0.2.1
- packaging
Expand Down
59 changes: 39 additions & 20 deletions cpp/benchmarks/string/find.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,19 @@

static void bench_find_string(nvbench::state& state)
{
auto const n_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
auto const api = state.get_string("api");

if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(row_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}
auto const tgt_type = state.get_string("target");

auto const stream = cudf::get_default_stream();
auto const col = create_string_column(n_rows, row_width, hit_rate);
auto const col = create_string_column(num_rows, max_width, hit_rate);
auto const input = cudf::strings_column_view(col->view());

cudf::string_scalar target("0987 5W43");
auto target = cudf::string_scalar("0987 5W43");
auto targets_col = cudf::make_column_from_scalar(target, num_rows);
auto const targets = cudf::strings_column_view(targets_col->view());

state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
auto const chars_size = input.chars_size(stream);
Expand All @@ -55,23 +53,44 @@ static void bench_find_string(nvbench::state& state)
}

if (api == "find") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::find(input, target); });
if (tgt_type == "scalar") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::find(input, target); });
} else if (tgt_type == "column") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::find(input, targets); });
}
} else if (api == "contains") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::contains(input, target); });
if (tgt_type == "scalar") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::contains(input, target); });
} else if (tgt_type == "column") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::contains(input, targets); });
}
} else if (api == "starts_with") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::starts_with(input, target); });
if (tgt_type == "scalar") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::starts_with(input, target); });
} else if (tgt_type == "column") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::starts_with(input, targets); });
}
} else if (api == "ends_with") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::ends_with(input, target); });
if (tgt_type == "scalar") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::ends_with(input, target); });
} else if (tgt_type == "column") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { cudf::strings::ends_with(input, targets); });
}
}
}

NVBENCH_BENCH(bench_find_string)
.set_name("find_string")
.add_int64_axis("max_width", {32, 64, 128, 256})
.add_int64_axis("num_rows", {32768, 262144, 2097152})
.add_int64_axis("hit_rate", {20, 80}) // percentage
.add_string_axis("api", {"find", "contains", "starts_with", "ends_with"})
.add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
.add_int64_axis("num_rows", {260'000, 1'953'000, 16'777'216})
.add_int64_axis("hit_rate", {20, 80}); // percentage
.add_string_axis("target", {"scalar", "column"});
1 change: 1 addition & 0 deletions cpp/include/cudf/io/parquet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ class parquet_reader_options_builder {
*
* @param val Boolean value whether to read matching projected and filter columns from mismatched
* Parquet sources.
*
* @return this for chaining.
*/
parquet_reader_options_builder& allow_mismatched_pq_schemas(bool val)
Expand Down
17 changes: 8 additions & 9 deletions cpp/include/cudf/strings/string_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -373,24 +373,23 @@ __device__ inline size_type string_view::find_impl(char const* str,
size_type pos,
size_type count) const
{
auto const nchars = length();
if (!str || pos < 0 || pos > nchars) return npos;
if (count < 0) count = nchars;
if (!str || pos < 0) { return npos; }
if (pos > 0 && pos > length()) { return npos; }

// use iterator to help reduce character/byte counting
auto itr = begin() + pos;
auto const itr = begin() + pos;
auto const spos = itr.byte_offset();
auto const epos = ((pos + count) < nchars) ? (itr + count).byte_offset() : size_bytes();
auto const epos =
(count >= 0) && ((pos + count) < length()) ? (itr + count).byte_offset() : size_bytes();

auto const find_length = (epos - spos) - bytes + 1;
auto const d_target = string_view{str, bytes};

auto ptr = data() + (forward ? spos : (epos - bytes));
for (size_type idx = 0; idx < find_length; ++idx) {
bool match = true;
for (size_type jdx = 0; match && (jdx < bytes); ++jdx) {
match = (ptr[jdx] == str[jdx]);
if (d_target.compare(ptr, bytes) == 0) {
return forward ? pos : character_offset(epos - bytes - idx);
}
if (match) { return forward ? pos : character_offset(epos - bytes - idx); }
// use pos to record the current find position
pos += strings::detail::is_begin_utf8_char(*ptr);
forward ? ++ptr : --ptr;
Expand Down
126 changes: 86 additions & 40 deletions cpp/src/io/json/host_tree_algorithms.cu
Original file line number Diff line number Diff line change
Expand Up @@ -222,18 +222,19 @@ struct json_column_data {
using hashmap_of_device_columns =
std::unordered_map<NodeIndexT, std::reference_wrapper<device_json_column>>;

std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree(
device_json_column& root,
host_span<uint8_t const> is_str_column_all_nulls,
tree_meta_t& d_column_tree,
device_span<NodeIndexT const> d_unique_col_ids,
device_span<size_type const> d_max_row_offsets,
std::vector<std::string> const& column_names,
NodeIndexT row_array_parent_col_id,
bool is_array_of_arrays,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);
std::
tuple<cudf::detail::host_vector<bool>, cudf::detail::host_vector<bool>, hashmap_of_device_columns>
build_tree(device_json_column& root,
host_span<uint8_t const> is_str_column_all_nulls,
tree_meta_t& d_column_tree,
device_span<NodeIndexT const> d_unique_col_ids,
device_span<size_type const> d_max_row_offsets,
std::vector<std::string> const& column_names,
NodeIndexT row_array_parent_col_id,
bool is_array_of_arrays,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

void scatter_offsets(tree_meta_t const& tree,
device_span<NodeIndexT const> col_ids,
Expand All @@ -242,6 +243,7 @@ void scatter_offsets(tree_meta_t const& tree,
device_span<size_type> sorted_col_ids, // Reuse this for parent_col_ids
tree_meta_t const& d_column_tree,
host_span<const bool> ignore_vals,
host_span<const bool> is_mixed,
hashmap_of_device_columns const& columns,
rmm::cuda_stream_view stream);

Expand Down Expand Up @@ -363,17 +365,17 @@ void make_device_json_column(device_span<SymbolT const> input,
}
return std::vector<uint8_t>();
}();
auto const [ignore_vals, columns] = build_tree(root,
is_str_column_all_nulls,
d_column_tree,
d_unique_col_ids,
d_max_row_offsets,
column_names,
row_array_parent_col_id,
is_array_of_arrays,
options,
stream,
mr);
auto const [ignore_vals, is_mixed_pruned, columns] = build_tree(root,
is_str_column_all_nulls,
d_column_tree,
d_unique_col_ids,
d_max_row_offsets,
column_names,
row_array_parent_col_id,
is_array_of_arrays,
options,
stream,
mr);
if (ignore_vals.empty()) return;
scatter_offsets(tree,
col_ids,
Expand All @@ -382,22 +384,24 @@ void make_device_json_column(device_span<SymbolT const> input,
sorted_col_ids,
d_column_tree,
ignore_vals,
is_mixed_pruned,
columns,
stream);
}

std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree(
device_json_column& root,
host_span<uint8_t const> is_str_column_all_nulls,
tree_meta_t& d_column_tree,
device_span<NodeIndexT const> d_unique_col_ids,
device_span<size_type const> d_max_row_offsets,
std::vector<std::string> const& column_names,
NodeIndexT row_array_parent_col_id,
bool is_array_of_arrays,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
std::
tuple<cudf::detail::host_vector<bool>, cudf::detail::host_vector<bool>, hashmap_of_device_columns>
build_tree(device_json_column& root,
host_span<uint8_t const> is_str_column_all_nulls,
tree_meta_t& d_column_tree,
device_span<NodeIndexT const> d_unique_col_ids,
device_span<size_type const> d_max_row_offsets,
std::vector<std::string> const& column_names,
NodeIndexT row_array_parent_col_id,
bool is_array_of_arrays,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
bool const is_enabled_lines = options.is_enabled_lines();
bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string();
Expand Down Expand Up @@ -488,7 +492,9 @@ std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree
// NoPruning: iterate through schema and enforce type.

if (adj[parent_node_sentinel].empty())
return {cudf::detail::make_host_vector<bool>(0, stream), {}}; // for empty file
return {cudf::detail::make_host_vector<bool>(0, stream),
cudf::detail::make_host_vector<bool>(0, stream),
{}}; // for empty file
CUDF_EXPECTS(adj[parent_node_sentinel].size() == 1, "Should be 1");
auto expected_types = cudf::detail::make_host_vector<NodeT>(num_columns, stream);
std::fill_n(expected_types.begin(), num_columns, NUM_NODE_CLASSES);
Expand Down Expand Up @@ -551,11 +557,14 @@ std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree
auto list_child = schema.child_types.at(this_list_child_name);
for (auto const& child_id : child_ids)
mark_is_pruned(child_id, list_child);
// TODO: Store null map of non-target types for list children to mark list entry as null.
}
};
if (is_array_of_arrays) {
if (adj[adj[parent_node_sentinel][0]].empty())
return {cudf::detail::make_host_vector<bool>(0, stream), {}};
return {cudf::detail::make_host_vector<bool>(0, stream),
cudf::detail::make_host_vector<bool>(0, stream),
{}};
auto root_list_col_id =
is_enabled_lines ? adj[parent_node_sentinel][0] : adj[adj[parent_node_sentinel][0]][0];
// mark root and row array col_id as not pruned.
Expand Down Expand Up @@ -647,8 +656,12 @@ std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree
? adj[parent_node_sentinel][0]
: (adj[adj[parent_node_sentinel][0]].empty() ? -1 : adj[adj[parent_node_sentinel][0]][0]);

// List children which are pruned mixed types, nullify parent list row.
auto is_mixed_pruned = cudf::detail::make_host_vector<bool>(num_columns, stream);
std::fill_n(is_mixed_pruned.begin(), num_columns, false);
auto handle_mixed_types = [&column_categories,
&is_str_column_all_nulls,
&is_mixed_pruned,
&is_pruned,
&expected_types,
&is_enabled_mixed_types_as_string,
Expand Down Expand Up @@ -794,6 +807,14 @@ std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree
"list child column insertion failed, duplicate column name in the parent");
ref.get().column_order.emplace_back(list_child_name);
auto this_ref = std::ref(ref.get().child_columns.at(list_child_name));
if (options.is_enabled_experimental()) {
for (auto const& child_id : child_ids) {
if (is_pruned[child_id]) {
// store this child_id for mixed_type nullify parent list_id.
is_mixed_pruned[child_id] = is_pruned[child_id];
}
}
}
// Mixed type handling
handle_mixed_types(child_ids);
if (child_ids.empty()) {
Expand Down Expand Up @@ -829,7 +850,7 @@ std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree
[](auto exp, auto cat) { return exp == NUM_NODE_CLASSES ? cat : exp; });
cudf::detail::cuda_memcpy_async<NodeT>(d_column_tree.node_categories, expected_types, stream);

return {is_pruned, columns};
return {is_pruned, is_mixed_pruned, columns};
}

void scatter_offsets(tree_meta_t const& tree,
Expand All @@ -839,6 +860,7 @@ void scatter_offsets(tree_meta_t const& tree,
device_span<size_type> sorted_col_ids, // Reuse this for parent_col_ids
tree_meta_t const& d_column_tree,
host_span<const bool> ignore_vals,
host_span<const bool> is_mixed_pruned,
hashmap_of_device_columns const& columns,
rmm::cuda_stream_view stream)
{
Expand All @@ -857,6 +879,8 @@ void scatter_offsets(tree_meta_t const& tree,

auto d_ignore_vals = cudf::detail::make_device_uvector_async(
ignore_vals, stream, cudf::get_current_device_resource_ref());
auto d_is_mixed_pruned = cudf::detail::make_device_uvector_async(
is_mixed_pruned, stream, cudf::get_current_device_resource_ref());
auto d_columns_data = cudf::detail::make_device_uvector_async(
columns_data, stream, cudf::get_current_device_resource_ref());

Expand Down Expand Up @@ -921,9 +945,31 @@ void scatter_offsets(tree_meta_t const& tree,
column_categories[col_ids[parent_node_id]] == NC_LIST and
(!d_ignore_vals[col_ids[parent_node_id]]);
});
// For children of list and in ignore_vals, find it's parent node id, and set corresponding
// parent's null mask to null. Setting mixed type list rows to null.
auto const num_list_children = thrust::distance(
thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()), list_children_end);
thrust::for_each_n(
rmm::exec_policy_nosync(stream),
thrust::make_counting_iterator<size_type>(0),
num_list_children,
[node_ids = node_ids.begin(),
parent_node_ids = tree.parent_node_ids.begin(),
column_categories = d_column_tree.node_categories.begin(),
col_ids = col_ids.begin(),
row_offsets = row_offsets.begin(),
d_is_mixed_pruned = d_is_mixed_pruned.begin(),
d_ignore_vals = d_ignore_vals.begin(),
d_columns_data = d_columns_data.begin()] __device__(size_type i) {
auto const node_id = node_ids[i];
auto const parent_node_id = parent_node_ids[node_id];
if (parent_node_id == parent_node_sentinel or d_ignore_vals[col_ids[parent_node_id]]) return;
if (column_categories[col_ids[parent_node_id]] == NC_LIST and
d_is_mixed_pruned[col_ids[node_id]]) {
clear_bit(d_columns_data[col_ids[parent_node_id]].validity, row_offsets[parent_node_id]);
}
});

auto const num_list_children =
list_children_end - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin());
thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream),
parent_col_ids.begin(),
parent_col_ids.begin() + num_list_children,
Expand Down
Loading

0 comments on commit c6041b3

Please sign in to comment.