From 65a51ffa364b8a54fadab041cb5c563873303643 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 9 May 2024 19:19:45 -0400 Subject: [PATCH] Add large-strings gtest for cudf::interleave_columns (#15669) Adds a gtest for `cudf::interleave_columns` that tests it can produce large-strings appropriately. Follow on to #15544 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Vyas Ramasubramani (https://github.com/vyasr) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/15669 --- cpp/tests/CMakeLists.txt | 8 ++- cpp/tests/large_strings/reshape_tests.cpp | 64 +++++++++++++++++++++++ 2 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 cpp/tests/large_strings/reshape_tests.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index bbb919aa2d1..e779e1d1410 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -571,8 +571,12 @@ ConfigureTest( # ################################################################################################## # * large strings test ---------------------------------------------------------------------------- ConfigureTest( - LARGE_STRINGS_TEST large_strings/large_strings_fixture.cpp large_strings/merge_tests.cpp - large_strings/concatenate_tests.cpp large_strings/parquet_tests.cpp + LARGE_STRINGS_TEST + large_strings/large_strings_fixture.cpp + large_strings/merge_tests.cpp + large_strings/concatenate_tests.cpp + large_strings/parquet_tests.cpp + large_strings/reshape_tests.cpp GPUS 1 PERCENT 100 ) diff --git a/cpp/tests/large_strings/reshape_tests.cpp b/cpp/tests/large_strings/reshape_tests.cpp new file mode 100644 index 00000000000..b688a40a8d3 --- /dev/null +++ b/cpp/tests/large_strings/reshape_tests.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "large_strings_fixture.hpp" + +#include + +#include +#include +#include +#include + +#include + +struct ReshapeTest : public cudf::test::StringsLargeTest {}; + +TEST_F(ReshapeTest, InterleaveLargeStrings) +{ + auto const input = this->long_column(); + auto input_views = std::vector(); + auto const view = cudf::table_view({input}); + std::vector splits; + int const multiplier = 10; + for (int i = 0; i < multiplier; ++i) { // 2500MB > 2GB + input_views.push_back(view); + splits.push_back(view.num_rows() * (i + 1)); + } + splits.pop_back(); // remove last entry + + auto result = cudf::interleave_columns(input_views); + auto sv = cudf::strings_column_view(result->view()); + EXPECT_EQ(sv.size(), view.num_rows() * multiplier); + EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT64}); + + auto sliced = cudf::split(sv.parent(), splits); + for (auto c : sliced) { + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(c, input); + } + + // also check regular sizes returns 32-bit offsets + input_views.clear(); + input_views.push_back(view); + input_views.push_back(view); + result = cudf::interleave_columns(input_views); + sv = cudf::strings_column_view(result->view()); + EXPECT_EQ(sv.size(), view.num_rows() * 2); + EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT32}); + sliced = cudf::split(sv.parent(), {view.num_rows()}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sliced[0], input); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sliced[1], input); +}