Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into comp-headers-cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
vuule authored Dec 19, 2024
2 parents 350db40 + d8f469f commit 0cf8375
Show file tree
Hide file tree
Showing 32 changed files with 911 additions and 405 deletions.
39 changes: 24 additions & 15 deletions cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -57,62 +57,71 @@ struct MurmurHash3_x86_32 {
};

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<bool>::operator()(bool const& key) const
MurmurHash3_x86_32<bool>::result_type __device__ inline MurmurHash3_x86_32<bool>::operator()(
bool const& key) const
{
return this->compute(static_cast<uint8_t>(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<float>::operator()(float const& key) const
MurmurHash3_x86_32<float>::result_type __device__ inline MurmurHash3_x86_32<float>::operator()(
float const& key) const
{
return this->compute(normalize_nans_and_zeros(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<double>::operator()(double const& key) const
MurmurHash3_x86_32<double>::result_type __device__ inline MurmurHash3_x86_32<double>::operator()(
double const& key) const
{
return this->compute(normalize_nans_and_zeros(key));
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
MurmurHash3_x86_32<cudf::string_view>::result_type
__device__ inline MurmurHash3_x86_32<cudf::string_view>::operator()(
cudf::string_view const& key) const
{
return this->compute_bytes(reinterpret_cast<cuda::std::byte const*>(key.data()),
key.size_bytes());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal32>::operator()(
numeric::decimal32 const& key) const
MurmurHash3_x86_32<numeric::decimal32>::result_type
__device__ inline MurmurHash3_x86_32<numeric::decimal32>::operator()(
numeric::decimal32 const& key) const
{
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal64>::operator()(
numeric::decimal64 const& key) const
MurmurHash3_x86_32<numeric::decimal64>::result_type
__device__ inline MurmurHash3_x86_32<numeric::decimal64>::operator()(
numeric::decimal64 const& key) const
{
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<numeric::decimal128>::operator()(
numeric::decimal128 const& key) const
MurmurHash3_x86_32<numeric::decimal128>::result_type
__device__ inline MurmurHash3_x86_32<numeric::decimal128>::operator()(
numeric::decimal128 const& key) const
{
return this->compute(key.value());
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<cudf::list_view>::operator()(
cudf::list_view const& key) const
MurmurHash3_x86_32<cudf::list_view>::result_type
__device__ inline MurmurHash3_x86_32<cudf::list_view>::operator()(
cudf::list_view const& key) const
{
CUDF_UNREACHABLE("List column hashing is not supported");
}

template <>
hash_value_type __device__ inline MurmurHash3_x86_32<cudf::struct_view>::operator()(
cudf::struct_view const& key) const
MurmurHash3_x86_32<cudf::struct_view>::result_type
__device__ inline MurmurHash3_x86_32<cudf::struct_view>::operator()(
cudf::struct_view const& key) const
{
CUDF_UNREACHABLE("Direct hashing of struct_view is not supported");
}
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/io/fst/logical_stack.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,12 @@ void sparse_stack_op_to_top_of_stack(StackSymbolItT d_symbols,
stream));
}

// Check if the last element of d_kv_operations is 0. If not, then we have a problem.
if (num_symbols_in && !supports_reset_op) {
StackOpT last_symbol = d_kv_ops_current.element(num_symbols_in - 1, stream);
CUDF_EXPECTS(last_symbol.stack_level == 0, "The logical stack is not empty!");
}

// Stable radix sort, sorting by stack level of the operations
d_kv_operations_unsigned = cub::DoubleBuffer<StackOpUnsignedT>{
reinterpret_cast<StackOpUnsignedT*>(d_kv_operations.Current()),
Expand Down
10 changes: 4 additions & 6 deletions cpp/src/io/json/nested_json_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1473,10 +1473,11 @@ void get_stack_context(device_span<SymbolT const> json_in,
to_stack_op::start_state,
stream);

auto stack_ops_bufsize = d_num_stack_ops.value(stream);
// Copy back to actual number of stack operations
auto num_stack_ops = d_num_stack_ops.value(stream);
// Sequence of stack symbols and their position in the original input (sparse representation)
rmm::device_uvector<StackSymbolT> stack_ops{stack_ops_bufsize, stream};
rmm::device_uvector<SymbolOffsetT> stack_op_indices{stack_ops_bufsize, stream};
rmm::device_uvector<StackSymbolT> stack_ops{num_stack_ops, stream};
rmm::device_uvector<SymbolOffsetT> stack_op_indices{num_stack_ops, stream};

// Run bracket-brace FST to retrieve starting positions of structs and lists
json_to_stack_ops_fst.Transduce(json_in.begin(),
Expand All @@ -1487,9 +1488,6 @@ void get_stack_context(device_span<SymbolT const> json_in,
to_stack_op::start_state,
stream);

// Copy back to actual number of stack operations
auto const num_stack_ops = d_num_stack_ops.value(stream);

// Stack operations with indices are converted to top of the stack for each character in the input
if (stack_behavior == stack_behavior_t::ResetOnDelimiter) {
fst::sparse_stack_op_to_top_of_stack<fst::stack_op_support::WITH_RESET_SUPPORT, StackLevelT>(
Expand Down
11 changes: 11 additions & 0 deletions cpp/tests/io/json/json_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3450,4 +3450,15 @@ TEST_P(JsonCompressedIOTest, BasicJsonLines)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1), float64_wrapper{{1.1, 2.2, 3.3}});
}

TEST_F(JsonReaderTest, MismatchedBeginEndTokens)
{
std::string data = R"({"not_valid": "json)";
auto opts =
cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
.lines(true)
.recovery_mode(cudf::io::json_recovery_mode_t::FAIL)
.build();
EXPECT_THROW(cudf::io::read_json(opts), cudf::logic_error);
}

CUDF_TEST_PROGRAM_MAIN()
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources column.pyx groupby.pyx scalar.pyx strings_udf.pyx types.pyx utils.pyx)
set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx utils.pyx)
set(linked_libraries cudf::cudf)

rapids_cython_create_modules(
Expand Down
5 changes: 1 addition & 4 deletions python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
import numpy as np

from . import (
groupby,
strings_udf,
)
from . import strings_udf

MAX_COLUMN_SIZE = np.iinfo(np.int32).max
MAX_COLUMN_SIZE_STR = "INT32_MAX"
Expand Down
Loading

0 comments on commit 0cf8375

Please sign in to comment.