Skip to content

Commit

Permalink
keep backwards compatibility
Browse files Browse the repository at this point in the history
Signed-off-by: Suraj Aralihalli <[email protected]>
  • Loading branch information
SurajAralihalli committed May 1, 2024
1 parent b711b80 commit 26112a5
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 6 deletions.
18 changes: 18 additions & 0 deletions cpp/include/cudf/io/detail/tokenize_json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ enum class LineEndTokenOption { Keep, Discard };
*
* @param json_in The JSON input
* @param options Parsing options specifying the parsing behaviour
* @param line_end_option option whether to keep or discard line_end_token
* @param stream The CUDA stream to which kernels are dispatched
* @param mr Optional, resource with which to allocate
* @return Pair of device vectors, where the first vector represents the token types and the second
Expand All @@ -142,6 +143,23 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @brief Parses the given JSON string and emits a sequence of tokens that demarcate relevant
* sections from the input.
*
* @param json_in The JSON input
* @param options Parsing options specifying the parsing behaviour
* @param stream The CUDA stream to which kernels are dispatched
* @param mr Optional, resource with which to allocate
* @return Pair of device vectors, where the first vector represents the token types and the second
* vector represents the index within the input corresponding to each token
*/
std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> get_token_stream(
device_span<SymbolT const> json_in,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

} // namespace detail

} // namespace cudf::io::json
1 change: 1 addition & 0 deletions cpp/src/io/json/nested_json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ void get_stack_context(device_span<SymbolT const> json_in,
*
* @param tokens The tokens to be post-processed
* @param token_indices The tokens' corresponding indices that are post-processed
* @param line_end_option option whether to keep or discard line_end_token
* @param stream The cuda stream to dispatch GPU kernels to
* @return Returns the post-processed token stream
*/
Expand Down
9 changes: 9 additions & 0 deletions cpp/src/io/json/nested_json_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1656,6 +1656,15 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
return std::make_pair(std::move(tokens), std::move(tokens_indices));
}

std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> get_token_stream(
device_span<SymbolT const> json_in,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
return get_token_stream(json_in, options, LineEndTokenOption::Discard, stream, mr);
}

/**
* @brief Parses the given JSON string and generates a tree representation of the given input.
*
Expand Down
8 changes: 2 additions & 6 deletions cpp/tests/io/nested_json_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,12 +436,8 @@ TEST_F(JsonTest, TokenStream)
cudf::device_span<SymbolT const>{d_scalar.data(), static_cast<size_t>(d_scalar.size())};

// Parse the JSON and get the token stream
auto [d_tokens_gpu, d_token_indices_gpu] =
cuio_json::detail::get_token_stream(d_input,
default_options,
cuio_json::detail::LineEndTokenOption::Discard,
stream,
rmm::mr::get_current_device_resource());
auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream(
d_input, default_options, stream, rmm::mr::get_current_device_resource());
// Copy back the number of tokens that were written
auto const tokens_gpu = cudf::detail::make_std_vector_async(d_tokens_gpu, stream);
auto const token_indices_gpu = cudf::detail::make_std_vector_async(d_token_indices_gpu, stream);
Expand Down

0 comments on commit 26112a5

Please sign in to comment.