keep backwards compatibility

Signed-off-by: Suraj Aralihalli <[email protected]>
rapidsai · May 1, 2024 · 26112a5 · 26112a5
1 parent b711b80
commit 26112a5
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 6 deletions.
diff --git a/cpp/include/cudf/io/detail/tokenize_json.hpp b/cpp/include/cudf/io/detail/tokenize_json.hpp
@@ -130,6 +130,7 @@ enum class LineEndTokenOption { Keep, Discard };
  *
  * @param json_in The JSON input
  * @param options Parsing options specifying the parsing behaviour
+ * @param line_end_option option whether to keep or discard line_end_token
  * @param stream The CUDA stream to which kernels are dispatched
  * @param mr Optional, resource with which to allocate
  * @return Pair of device vectors, where the first vector represents the token types and the second
@@ -142,6 +143,23 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr);
 
+/**
+ * @brief Parses the given JSON string and emits a sequence of tokens that demarcate relevant
+ * sections from the input.
+ *
+ * @param json_in The JSON input
+ * @param options Parsing options specifying the parsing behaviour
+ * @param stream The CUDA stream to which kernels are dispatched
+ * @param mr Optional, resource with which to allocate
+ * @return Pair of device vectors, where the first vector represents the token types and the second
+ * vector represents the index within the input corresponding to each token
+ */
+std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> get_token_stream(
+  device_span<SymbolT const> json_in,
+  cudf::io::json_reader_options const& options,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
+
 }  // namespace detail
 
 }  // namespace cudf::io::json
diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
@@ -211,6 +211,7 @@ void get_stack_context(device_span<SymbolT const> json_in,
  *
  * @param tokens The tokens to be post-processed
  * @param token_indices The tokens' corresponding indices that are post-processed
+ * @param line_end_option option whether to keep or discard line_end_token
  * @param stream The cuda stream to dispatch GPU kernels to
  * @return Returns the post-processed token stream
  */

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
@@ -1656,6 +1656,15 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
   return std::make_pair(std::move(tokens), std::move(tokens_indices));
 }
 
+std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> get_token_stream(
+  device_span<SymbolT const> json_in,
+  cudf::io::json_reader_options const& options,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  return get_token_stream(json_in, options, LineEndTokenOption::Discard, stream, mr);
+}
+
 /**
  * @brief Parses the given JSON string and generates a tree representation of the given input.
  *

diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
@@ -436,12 +436,8 @@ TEST_F(JsonTest, TokenStream)
     cudf::device_span<SymbolT const>{d_scalar.data(), static_cast<size_t>(d_scalar.size())};
 
   // Parse the JSON and get the token stream
-  auto [d_tokens_gpu, d_token_indices_gpu] =
-    cuio_json::detail::get_token_stream(d_input,
-                                        default_options,
-                                        cuio_json::detail::LineEndTokenOption::Discard,
-                                        stream,
-                                        rmm::mr::get_current_device_resource());
+  auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream(
+    d_input, default_options, stream, rmm::mr::get_current_device_resource());
   // Copy back the number of tokens that were written
   auto const tokens_gpu        = cudf::detail::make_std_vector_async(d_tokens_gpu, stream);
   auto const token_indices_gpu = cudf::detail::make_std_vector_async(d_token_indices_gpu, stream);