From b051f805129fab36ee5da7299ed0fb98850fa44c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Malte=20F=C3=B6rster?= <97973773+mfoerste4@users.noreply.github.com> Date: Thu, 5 Dec 2024 06:27:33 +0100 Subject: [PATCH] Add C++ API scalar quantization (#494) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First draft for scalar quantization. WIP status: * only int8_t target type * quantile computation inefficient (via sampling & sorting) Authors: - Malte Förster (https://github.com/mfoerste4) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Tamas Bela Feher (https://github.com/tfeher) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/494 --- cpp/CMakeLists.txt | 1 + .../cuvs/preprocessing/quantize/scalar.hpp | 489 ++++++++++++++++++ .../preprocessing/quantize/detail/scalar.cuh | 227 ++++++++ cpp/src/preprocessing/quantize/scalar.cu | 74 +++ cpp/test/CMakeLists.txt | 5 + cpp/test/preprocessing/scalar_quantization.cu | 291 +++++++++++ docs/source/cpp_api.rst | 1 + docs/source/cpp_api/preprocessing.rst | 12 + .../source/cpp_api/preprocessing_quantize.rst | 20 + 9 files changed, 1120 insertions(+) create mode 100644 cpp/include/cuvs/preprocessing/quantize/scalar.hpp create mode 100644 cpp/src/preprocessing/quantize/detail/scalar.cuh create mode 100644 cpp/src/preprocessing/quantize/scalar.cu create mode 100644 cpp/test/preprocessing/scalar_quantization.cu create mode 100644 docs/source/cpp_api/preprocessing.rst create mode 100644 docs/source/cpp_api/preprocessing_quantize.rst diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6af423bd5..199bb232d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -459,6 +459,7 @@ if(BUILD_SHARED_LIBS) src/neighbors/vamana_serialize_float.cu src/neighbors/vamana_serialize_uint8.cu src/neighbors/vamana_serialize_int8.cu + src/preprocessing/quantize/scalar.cu src/selection/select_k_float_int64_t.cu src/selection/select_k_float_int32_t.cu src/selection/select_k_float_uint32_t.cu diff --git a/cpp/include/cuvs/preprocessing/quantize/scalar.hpp b/cpp/include/cuvs/preprocessing/quantize/scalar.hpp new file mode 100644 index 000000000..49b4bb7a6 --- /dev/null +++ b/cpp/include/cuvs/preprocessing/quantize/scalar.hpp @@ -0,0 +1,489 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace cuvs::preprocessing::quantize::scalar { + +/** + * @defgroup scalar Scalar quantizer utilities + * @{ + */ + +/** + * @brief quantizer parameters. + */ +struct params { + /* + * specifies how many outliers at top & bottom will be ignored + * needs to be within range of (0, 1] + */ + float quantile = 0.99; +}; + +/** + * @brief Defines and stores scalar for quantisation upon training + * + * The quantization is performed by a linear mapping of an interval in the + * float data type to the full range of the quantized int type. + * + * @tparam T data element type + * + */ +template +struct quantizer { + T min_; + T max_; +}; + +/** + * @brief Initializes a scalar quantizer to be used later for quantizing the dataset. + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); + * @endcode + * + * @param[in] res raft resource + * @param[in] params configure scalar quantizer, e.g. quantile + * @param[in] dataset a row-major matrix view on device + * + * @return quantizer + */ +quantizer train(raft::resources const& res, + const params params, + raft::device_matrix_view dataset); + +/** + * @brief Initializes a scalar quantizer to be used later for quantizing the dataset. + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); + * @endcode + * + * @param[in] res raft resource + * @param[in] params configure scalar quantizer, e.g. quantile + * @param[in] dataset a row-major matrix view on host + * + * @return quantizer + */ +quantizer train(raft::resources const& res, + const params params, + raft::host_matrix_view dataset); + +/** + * @brief Applies quantization transform to given dataset + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); auto quantized_dataset = raft::make_device_matrix(handle, samples, + * features); cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on device + * @param[out] out a row-major matrix view on device + * + */ +void transform(raft::resources const& res, + const quantizer& quantizer, + raft::device_matrix_view dataset, + raft::device_matrix_view out); + +/** + * @brief Applies quantization transform to given dataset + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); auto quantized_dataset = raft::make_host_matrix(samples, features); + * cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on host + * @param[out] out a row-major matrix view on host + * + */ +void transform(raft::resources const& res, + const quantizer& quantizer, + raft::host_matrix_view dataset, + raft::host_matrix_view out); + +/** + * @brief Perform inverse quantization step on previously quantized dataset + * + * Note that depending on the chosen data types train dataset the conversion is + * not lossless. + * + * Usage example: + * @code{.cpp} + * auto quantized_dataset = raft::make_device_matrix(handle, samples, features); + * cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); auto dataset_revert = raft::make_device_matrix(handle, samples, features); + * cuvs::preprocessing::quantize::scalar::inverse_transform(handle, quantizer, + * dataset_revert.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on device + * @param[out] out a row-major matrix view on device + * + */ +void inverse_transform(raft::resources const& res, + const quantizer& quantizer, + raft::device_matrix_view dataset, + raft::device_matrix_view out); + +/** + * @brief Perform inverse quantization step on previously quantized dataset + * + * Note that depending on the chosen data types train dataset the conversion is + * not lossless. + * + * Usage example: + * @code{.cpp} + * auto quantized_dataset = raft::make_host_matrix(samples, features); + * cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); auto dataset_revert = raft::make_host_matrix(samples, + * features); cuvs::preprocessing::quantize::scalar::inverse_transform(handle, quantizer, + * dataset_revert.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on host + * @param[out] out a row-major matrix view on host + * + */ +void inverse_transform(raft::resources const& res, + const quantizer& quantizer, + raft::host_matrix_view dataset, + raft::host_matrix_view out); + +/** + * @brief Initializes a scalar quantizer to be used later for quantizing the dataset. + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); + * @endcode + * + * @param[in] res raft resource + * @param[in] params configure scalar quantizer, e.g. quantile + * @param[in] dataset a row-major matrix view on device + * + * @return quantizer + */ +quantizer train(raft::resources const& res, + const params params, + raft::device_matrix_view dataset); + +/** + * @brief Initializes a scalar quantizer to be used later for quantizing the dataset. + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); + * @endcode + * + * @param[in] res raft resource + * @param[in] params configure scalar quantizer, e.g. quantile + * @param[in] dataset a row-major matrix view on host + * + * @return quantizer + */ +quantizer train(raft::resources const& res, + const params params, + raft::host_matrix_view dataset); + +/** + * @brief Applies quantization transform to given dataset + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); auto quantized_dataset = raft::make_device_matrix(handle, samples, + * features); cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on device + * @param[out] out a row-major matrix view on device + * + */ +void transform(raft::resources const& res, + const quantizer& quantizer, + raft::device_matrix_view dataset, + raft::device_matrix_view out); + +/** + * @brief Applies quantization transform to given dataset + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); auto quantized_dataset = raft::make_host_matrix(samples, features); + * cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on host + * @param[out] out a row-major matrix view on host + * + */ +void transform(raft::resources const& res, + const quantizer& quantizer, + raft::host_matrix_view dataset, + raft::host_matrix_view out); + +/** + * @brief Perform inverse quantization step on previously quantized dataset + * + * Note that depending on the chosen data types train dataset the conversion is + * not lossless. + * + * Usage example: + * @code{.cpp} + * auto quantized_dataset = raft::make_device_matrix(handle, samples, features); + * cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); auto dataset_revert = raft::make_device_matrix(handle, + * samples, features); cuvs::preprocessing::quantize::scalar::inverse_transform(handle, quantizer, + * dataset_revert.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on device + * @param[out] out a row-major matrix view on device + * + */ +void inverse_transform(raft::resources const& res, + const quantizer& quantizer, + raft::device_matrix_view dataset, + raft::device_matrix_view out); + +/** + * @brief Perform inverse quantization step on previously quantized dataset + * + * Note that depending on the chosen data types train dataset the conversion is + * not lossless. + * + * Usage example: + * @code{.cpp} + * auto quantized_dataset = raft::make_host_matrix(samples, features); + * cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); auto dataset_revert = raft::make_host_matrix(samples, + * features); cuvs::preprocessing::quantize::scalar::inverse_transform(handle, quantizer, + * dataset_revert.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on host + * @param[out] out a row-major matrix view on host + * + */ +void inverse_transform(raft::resources const& res, + const quantizer& quantizer, + raft::host_matrix_view dataset, + raft::host_matrix_view out); + +/** + * @brief Initializes a scalar quantizer to be used later for quantizing the dataset. + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); + * @endcode + * + * @param[in] res raft resource + * @param[in] params configure scalar quantizer, e.g. quantile + * @param[in] dataset a row-major matrix view on device + * + * @return quantizer + */ +quantizer train(raft::resources const& res, + const params params, + raft::device_matrix_view dataset); + +/** + * @brief Initializes a scalar quantizer to be used later for quantizing the dataset. + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); + * @endcode + * + * @param[in] res raft resource + * @param[in] params configure scalar quantizer, e.g. quantile + * @param[in] dataset a row-major matrix view on host + * + * @return quantizer + */ +quantizer train(raft::resources const& res, + const params params, + raft::host_matrix_view dataset); + +/** + * @brief Applies quantization transform to given dataset + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); auto quantized_dataset = raft::make_device_matrix(handle, samples, + * features); cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on device + * @param[out] out a row-major matrix view on device + * + */ +void transform(raft::resources const& res, + const quantizer& quantizer, + raft::device_matrix_view dataset, + raft::device_matrix_view out); + +/** + * @brief Applies quantization transform to given dataset + * + * Usage example: + * @code{.cpp} + * raft::handle_t handle; + * cuvs::preprocessing::quantize::scalar::params params; + * auto quantizer = cuvs::preprocessing::quantize::scalar::train(handle, params, + * dataset); auto quantized_dataset = raft::make_host_matrix(samples, features); + * cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on host + * @param[out] out a row-major matrix view on host + * + */ +void transform(raft::resources const& res, + const quantizer& quantizer, + raft::host_matrix_view dataset, + raft::host_matrix_view out); + +/** + * @brief Perform inverse quantization step on previously quantized dataset + * + * Note that depending on the chosen data types train dataset the conversion is + * not lossless. + * + * Usage example: + * @code{.cpp} + * auto quantized_dataset = raft::make_device_matrix(handle, samples, features); + * cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); auto dataset_revert = raft::make_device_matrix(handle, + * samples, features); cuvs::preprocessing::quantize::scalar::inverse_transform(handle, quantizer, + * dataset_revert.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on device + * @param[out] out a row-major matrix view on device + * + */ +void inverse_transform(raft::resources const& res, + const quantizer& quantizer, + raft::device_matrix_view dataset, + raft::device_matrix_view out); + +/** + * @brief Perform inverse quantization step on previously quantized dataset + * + * Note that depending on the chosen data types train dataset the conversion is + * not lossless. + * + * Usage example: + * @code{.cpp} + * auto quantized_dataset = raft::make_host_matrix(samples, features); + * cuvs::preprocessing::quantize::scalar::transform(handle, quantizer, dataset, + * quantized_dataset.view()); auto dataset_revert = raft::make_host_matrix(samples, + * features); cuvs::preprocessing::quantize::scalar::inverse_transform(handle, quantizer, + * dataset_revert.view()); + * @endcode + * + * @param[in] res raft resource + * @param[in] quantizer a scalar quantizer + * @param[in] dataset a row-major matrix view on host + * @param[out] out a row-major matrix view on host + * + */ +void inverse_transform(raft::resources const& res, + const quantizer& quantizer, + raft::host_matrix_view dataset, + raft::host_matrix_view out); + +/** @} */ // end of group scalar + +} // namespace cuvs::preprocessing::quantize::scalar diff --git a/cpp/src/preprocessing/quantize/detail/scalar.cuh b/cpp/src/preprocessing/quantize/detail/scalar.cuh new file mode 100644 index 000000000..fc132eb7f --- /dev/null +++ b/cpp/src/preprocessing/quantize/detail/scalar.cuh @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cuvs::preprocessing::quantize::detail { + +template +_RAFT_HOST_DEVICE bool fp_lt(const T& a, const T& b) +{ + return a < b; +} + +template <> +_RAFT_HOST_DEVICE bool fp_lt(const half& a, const half& b) +{ + return static_cast(a) < static_cast(b); +} + +template +struct quantize_op { + const T min_; + const T max_; + const QuantI q_type_min_ = std::numeric_limits::min(); + const QuantI q_type_max_ = std::numeric_limits::max(); + const TempT scalar_; + const TempT offset_; + + constexpr explicit quantize_op(T min, T max) + : min_(min), + max_(max), + scalar_(static_cast(max_) > static_cast(min_) + ? ((static_cast(q_type_max_) - static_cast(q_type_min_)) / + (static_cast(max_) - static_cast(min_))) + : static_cast(1)), + offset_(static_cast(q_type_min_) - static_cast(min_) * scalar_) + { + } + + constexpr RAFT_INLINE_FUNCTION QuantI operator()(const T& x) const + { + if (!fp_lt(min_, x)) return q_type_min_; + if (!fp_lt(x, max_)) return q_type_max_; + return static_cast(lroundf(scalar_ * static_cast(x) + offset_)); + } + + constexpr RAFT_INLINE_FUNCTION T operator()(const QuantI& x) const + { + return static_cast((static_cast(x) - offset_) / scalar_); + } +}; + +template +std::tuple quantile_min_max(raft::resources const& res, + raft::device_matrix_view dataset, + double quantile) +{ + // settings for quantile approximation + constexpr size_t max_num_samples = 1000000; + constexpr int seed = 137; + + cudaStream_t stream = raft::resource::get_cuda_stream(res); + + // select subsample + raft::random::RngState rng(seed); + size_t n_elements = dataset.extent(0) * dataset.extent(1); + size_t subset_size = std::min(max_num_samples, n_elements); + auto subset = raft::make_device_vector(res, subset_size); + auto dataset_view = raft::make_device_vector_view(dataset.data_handle(), n_elements); + raft::random::sample_without_replacement( + res, rng, dataset_view, std::nullopt, subset.view(), std::nullopt); + + // quantile / sort and pick for now + thrust::sort(raft::resource::get_thrust_policy(res), + subset.data_handle(), + subset.data_handle() + subset_size); + + double half_quantile_pos = (0.5 + 0.5 * quantile) * subset_size; + int pos_max = std::ceil(half_quantile_pos) - 1; + int pos_min = subset_size - pos_max - 1; + + T minmax_h[2]; + raft::update_host(&(minmax_h[0]), subset.data_handle() + pos_min, 1, stream); + raft::update_host(&(minmax_h[1]), subset.data_handle() + pos_max, 1, stream); + raft::resource::sync_stream(res); + + return {minmax_h[0], minmax_h[1]}; +} + +template +std::tuple quantile_min_max(raft::resources const& res, + raft::host_matrix_view dataset, + double quantile) +{ + // settings for quantile approximation + constexpr size_t max_num_samples = 1000000; + constexpr int seed = 137; + + // select subsample + std::mt19937 rng(seed); + size_t n_elements = dataset.extent(0) * dataset.extent(1); + size_t subset_size = std::min(max_num_samples, n_elements); + std::vector subset; + std::sample(dataset.data_handle(), + dataset.data_handle() + n_elements, + std::back_inserter(subset), + subset_size, + rng); + + // quantile / sort and pick for now + thrust::sort(thrust::omp::par, subset.data(), subset.data() + subset_size, fp_lt); + double half_quantile_pos = (0.5 + 0.5 * quantile) * subset_size; + int pos_max = std::ceil(half_quantile_pos) - 1; + int pos_min = subset_size - pos_max - 1; + + return {subset[pos_min], subset[pos_max]}; +} + +template +cuvs::preprocessing::quantize::scalar::quantizer train( + raft::resources const& res, + const cuvs::preprocessing::quantize::scalar::params params, + raft::device_matrix_view dataset) +{ + RAFT_EXPECTS(params.quantile > 0.0 && params.quantile <= 1.0, + "quantile for scalar quantization needs to be within (0, 1] but is %f", + params.quantile); + + auto [min, max] = detail::quantile_min_max(res, dataset, params.quantile); + + RAFT_LOG_DEBUG("quantizer train min=%lf max=%lf.", double(min), double(max)); + + return cuvs::preprocessing::quantize::scalar::quantizer{min, max}; +} + +template +cuvs::preprocessing::quantize::scalar::quantizer train( + raft::resources const& res, + const cuvs::preprocessing::quantize::scalar::params params, + raft::host_matrix_view dataset) +{ + RAFT_EXPECTS(params.quantile > 0.0 && params.quantile <= 1.0, + "quantile for scalar quantization needs to be within (0, 1] but is %f", + params.quantile); + + auto [min, max] = detail::quantile_min_max(res, dataset, params.quantile); + + RAFT_LOG_DEBUG("quantizer train min=%lf max=%lf.", double(min), double(max)); + + return cuvs::preprocessing::quantize::scalar::quantizer{min, max}; +} + +template +void transform(raft::resources const& res, + const cuvs::preprocessing::quantize::scalar::quantizer& quantizer, + raft::device_matrix_view dataset, + raft::device_matrix_view out) +{ + cudaStream_t stream = raft::resource::get_cuda_stream(res); + + raft::linalg::map(res, out, quantize_op(quantizer.min_, quantizer.max_), dataset); +} + +template +void transform(raft::resources const& res, + const cuvs::preprocessing::quantize::scalar::quantizer& quantizer, + raft::host_matrix_view dataset, + raft::host_matrix_view out) +{ + auto main_op = quantize_op(quantizer.min_, quantizer.max_); + size_t n_elements = dataset.extent(0) * dataset.extent(1); + +#pragma omp parallel for + for (size_t i = 0; i < n_elements; ++i) { + out.data_handle()[i] = main_op(dataset.data_handle()[i]); + } +} + +template +void inverse_transform(raft::resources const& res, + const cuvs::preprocessing::quantize::scalar::quantizer& quantizer, + raft::device_matrix_view dataset, + raft::device_matrix_view out) +{ + cudaStream_t stream = raft::resource::get_cuda_stream(res); + + raft::linalg::map(res, out, quantize_op(quantizer.min_, quantizer.max_), dataset); +} + +template +void inverse_transform(raft::resources const& res, + const cuvs::preprocessing::quantize::scalar::quantizer& quantizer, + raft::host_matrix_view dataset, + raft::host_matrix_view out) +{ + auto main_op = quantize_op(quantizer.min_, quantizer.max_); + size_t n_elements = dataset.extent(0) * dataset.extent(1); + +#pragma omp parallel for + for (size_t i = 0; i < n_elements; ++i) { + out.data_handle()[i] = main_op(dataset.data_handle()[i]); + } +} + +} // namespace cuvs::preprocessing::quantize::detail diff --git a/cpp/src/preprocessing/quantize/scalar.cu b/cpp/src/preprocessing/quantize/scalar.cu new file mode 100644 index 000000000..9624ad4fe --- /dev/null +++ b/cpp/src/preprocessing/quantize/scalar.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "./detail/scalar.cuh" + +#include + +namespace cuvs::preprocessing::quantize::scalar { + +#define CUVS_INST_QUANTIZATION(T, QuantI) \ + auto train(raft::resources const& res, \ + const params params, \ + raft::device_matrix_view dataset) \ + ->quantizer \ + { \ + return detail::train(res, params, dataset); \ + } \ + auto train(raft::resources const& res, \ + const params params, \ + raft::host_matrix_view dataset) \ + ->quantizer \ + { \ + return detail::train(res, params, dataset); \ + } \ + void transform(raft::resources const& res, \ + const quantizer& quantizer, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view out) \ + { \ + detail::transform(res, quantizer, dataset, out); \ + } \ + void transform(raft::resources const& res, \ + const quantizer& quantizer, \ + raft::host_matrix_view dataset, \ + raft::host_matrix_view out) \ + { \ + detail::transform(res, quantizer, dataset, out); \ + } \ + void inverse_transform(raft::resources const& res, \ + const quantizer& quantizer, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view out) \ + { \ + detail::inverse_transform(res, quantizer, dataset, out); \ + } \ + void inverse_transform(raft::resources const& res, \ + const quantizer& quantizer, \ + raft::host_matrix_view dataset, \ + raft::host_matrix_view out) \ + { \ + detail::inverse_transform(res, quantizer, dataset, out); \ + } \ + template struct quantizer; + +CUVS_INST_QUANTIZATION(double, int8_t); +CUVS_INST_QUANTIZATION(float, int8_t); +CUVS_INST_QUANTIZATION(half, int8_t); + +#undef CUVS_INST_QUANTIZATION + +} // namespace cuvs::preprocessing::quantize::scalar \ No newline at end of file diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 1c8de2ad0..0ecac6ec2 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -226,6 +226,11 @@ if(BUILD_TESTS) PERCENT 100 ) + + ConfigureTest( + NAME PREPROCESSING_TEST PATH preprocessing/scalar_quantization.cu GPUS 1 PERCENT 100 + ) + ConfigureTest( NAME STATS_TEST PATH stats/trustworthiness.cu stats/silhouette_score.cu GPUS 1 PERCENT 100 ) diff --git a/cpp/test/preprocessing/scalar_quantization.cu b/cpp/test/preprocessing/scalar_quantization.cu new file mode 100644 index 000000000..2fdfe7555 --- /dev/null +++ b/cpp/test/preprocessing/scalar_quantization.cu @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" +#include +#include +#include +#include +#include +#include +#include + +namespace cuvs::preprocessing::quantize::scalar { + +template +struct QuantizationInputs { + cuvs::preprocessing::quantize::scalar::params quantization_params; + int rows; + int cols; + T min = T(-1.0); + T max = T(1.0); + double threshold = 2e-2; +}; + +template +std::ostream& operator<<(std::ostream& os, const QuantizationInputs& inputs) +{ + return os << "quantization_quantile:<" << inputs.quantization_params.quantile + << "> rows:" << inputs.rows << " cols:" << inputs.cols << " min:" << (double)inputs.min + << " max:" << (double)inputs.max; +} + +template +class QuantizationTest : public ::testing::TestWithParam> { + public: + QuantizationTest() + : params_(::testing::TestWithParam>::GetParam()), + stream(raft::resource::get_cuda_stream(handle)), + input_(0, stream) + { + } + + double getRelativeErrorStddev(const T* array_a, const T* array_b, size_t size, float quantile) + { + // relative error elementwise + rmm::device_uvector relative_error(size, stream); + raft::linalg::binaryOp( + relative_error.data(), + array_a, + array_b, + size, + [] __device__(double a, double b) { + return a != b ? (raft::abs(a - b) / raft::max(raft::abs(a), raft::abs(b))) : 0; + }, + stream); + + // sort by size --> remove largest errors to account for quantile chosen + thrust::sort(raft::resource::get_thrust_policy(handle), + relative_error.data(), + relative_error.data() + size); + int elements_to_consider = + std::ceil(double(params_.quantization_params.quantile) * double(size)); + + rmm::device_uvector mu(1, stream); + RAFT_CUDA_TRY(cudaMemsetAsync(mu.data(), 0, sizeof(double), stream)); + + rmm::device_uvector error_stddev(1, stream); + raft::stats::stddev(error_stddev.data(), + relative_error.data(), + mu.data(), + 1, + elements_to_consider, + false, + true, + stream); + + double error_stddev_h; + raft::update_host(&error_stddev_h, error_stddev.data(), 1, stream); + raft::resource::sync_stream(handle, stream); + return error_stddev_h; + } + + protected: + void testScalarQuantization() + { + // dataset identical on host / device + auto dataset = raft::make_device_matrix_view( + (const T*)(input_.data()), rows_, cols_); + auto dataset_h = raft::make_host_matrix_view( + (const T*)(host_input_.data()), rows_, cols_); + + size_t print_size = std::min(input_.size(), 20ul); + + // train quantizer_1 on device + auto quantizer_1 = + cuvs::preprocessing::quantize::scalar::train(handle, params_.quantization_params, dataset); + std::cerr << "Q1: min = " << (double)quantizer_1.min_ << ", max = " << (double)quantizer_1.max_ + << std::endl; + + { + auto quantized_input_h = raft::make_host_matrix(rows_, cols_); + auto quantized_input_d = raft::make_device_matrix(handle, rows_, cols_); + cuvs::preprocessing::quantize::scalar::transform( + handle, quantizer_1, dataset, quantized_input_d.view()); + cuvs::preprocessing::quantize::scalar::transform( + handle, quantizer_1, dataset_h, quantized_input_h.view()); + + { + raft::print_device_vector("Input array: ", input_.data(), print_size, std::cerr); + + rmm::device_uvector quantization_for_print(print_size, stream); + raft::linalg::unaryOp(quantization_for_print.data(), + quantized_input_d.data_handle(), + print_size, + raft::cast_op{}, + stream); + raft::resource::sync_stream(handle, stream); + raft::print_device_vector( + "Quantized array 1: ", quantization_for_print.data(), print_size, std::cerr); + } + + // test (inverse) transform host/device equal + ASSERT_TRUE(devArrMatchHost(quantized_input_h.data_handle(), + quantized_input_d.data_handle(), + input_.size(), + cuvs::Compare(), + stream)); + + auto quantized_input_h_const_view = raft::make_host_matrix_view( + quantized_input_h.data_handle(), rows_, cols_); + auto re_transformed_input_h = raft::make_host_matrix(rows_, cols_); + cuvs::preprocessing::quantize::scalar::inverse_transform( + handle, quantizer_1, quantized_input_h_const_view, re_transformed_input_h.view()); + + auto quantized_input_d_const_view = raft::make_device_matrix_view( + quantized_input_d.data_handle(), rows_, cols_); + auto re_transformed_input_d = raft::make_device_matrix(handle, rows_, cols_); + cuvs::preprocessing::quantize::scalar::inverse_transform( + handle, quantizer_1, quantized_input_d_const_view, re_transformed_input_d.view()); + raft::print_device_vector( + "re-transformed array: ", re_transformed_input_d.data_handle(), print_size, std::cerr); + + { + double l2_error = getRelativeErrorStddev(dataset.data_handle(), + re_transformed_input_d.data_handle(), + input_.size(), + params_.quantization_params.quantile); + std::cerr << "error stddev = " << l2_error << ", threshold = " << params_.threshold + << std::endl; + // test (inverse) transform close to original dataset + ASSERT_TRUE(l2_error < params_.threshold); + } + } + + // train quantizer_2 on host + auto quantizer_2 = + cuvs::preprocessing::quantize::scalar::train(handle, params_.quantization_params, dataset_h); + std::cerr << "Q2: min = " << (double)quantizer_2.min_ << ", max = " << (double)quantizer_2.max_ + << std::endl; + + // check both quantizers are the same (valid if sampling is identical) + if (input_.size() <= 1000000) { + ASSERT_TRUE((double)quantizer_1.min_ == (double)quantizer_2.min_); + ASSERT_TRUE((double)quantizer_1.max_ == (double)quantizer_2.max_); + } + + { + // test transform host/device equal + auto quantized_input_h = raft::make_host_matrix(rows_, cols_); + auto quantized_input_d = raft::make_device_matrix(handle, rows_, cols_); + cuvs::preprocessing::quantize::scalar::transform( + handle, quantizer_2, dataset, quantized_input_d.view()); + cuvs::preprocessing::quantize::scalar::transform( + handle, quantizer_2, dataset_h, quantized_input_h.view()); + + { + rmm::device_uvector quantization_for_print(print_size, stream); + raft::linalg::unaryOp(quantization_for_print.data(), + quantized_input_d.data_handle(), + print_size, + raft::cast_op{}, + stream); + raft::resource::sync_stream(handle, stream); + raft::print_device_vector( + "Quantized array 2: ", quantization_for_print.data(), print_size, std::cerr); + } + + ASSERT_TRUE(devArrMatchHost(quantized_input_h.data_handle(), + quantized_input_d.data_handle(), + input_.size(), + cuvs::Compare(), + stream)); + } + + // sort_by_key (input, quantization) -- check <= on result + { + auto quantized_input = raft::make_device_matrix(handle, rows_, cols_); + cuvs::preprocessing::quantize::scalar::transform( + handle, quantizer_1, dataset, quantized_input.view()); + thrust::sort_by_key(raft::resource::get_thrust_policy(handle), + input_.data(), + input_.data() + input_.size(), + quantized_input.data_handle()); + std::vector quantized_input_sorted_host(input_.size()); + raft::update_host( + quantized_input_sorted_host.data(), quantized_input.data_handle(), input_.size(), stream); + raft::resource::sync_stream(handle, stream); + + for (size_t i = 0; i < input_.size() - 1; ++i) { + ASSERT_TRUE(quantized_input_sorted_host[i] <= quantized_input_sorted_host[i + 1]); + } + } + } + + void SetUp() override + { + rows_ = params_.rows; + cols_ = params_.cols; + + int n_elements = rows_ * cols_; + input_.resize(n_elements, stream); + host_input_.resize(n_elements); + + // random input + unsigned long long int seed = 1234ULL; + raft::random::RngState r(seed); + uniform(handle, r, input_.data(), input_.size(), params_.min, params_.max); + + raft::update_host(host_input_.data(), input_.data(), input_.size(), stream); + + raft::resource::sync_stream(handle, stream); + } + + private: + raft::resources handle; + cudaStream_t stream; + + QuantizationInputs params_; + int rows_; + int cols_; + rmm::device_uvector input_; + std::vector host_input_; +}; + +template +const std::vector> inputs = { + {{1.0}, 5, 5, T(0.0), T(1.0)}, + {{0.98}, 10, 20, T(0.0), T(1.0)}, + {{0.90}, 1000, 1500, T(-500.0), T(100.0)}, + {{0.59}, 100, 200}, + {{0.1}, 1, 1, T(0.0), T(1.0)}, + {{0.01}, 50, 50, T(0.0), T(1.0)}, + {{0.94}, 10, 20, T(-1.0), T(0.0)}, + {{0.95}, 10, 2, T(50.0), T(100.0)}, + {{0.95}, 10, 20, T(-500.0), T(-100.0)}, + {{0.95}, 10, 20, T(5.0), T(5.0)}, +}; + +typedef QuantizationTest QuantizationTest_float_int8t; +TEST_P(QuantizationTest_float_int8t, ScalarQuantizationTest) { this->testScalarQuantization(); } + +typedef QuantizationTest QuantizationTest_double_int8t; +TEST_P(QuantizationTest_double_int8t, ScalarQuantizationTest) { this->testScalarQuantization(); } + +typedef QuantizationTest QuantizationTest_half_int8t; +TEST_P(QuantizationTest_half_int8t, ScalarQuantizationTest) { this->testScalarQuantization(); } + +INSTANTIATE_TEST_CASE_P(QuantizationTest, + QuantizationTest_float_int8t, + ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(QuantizationTest, + QuantizationTest_double_int8t, + ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(QuantizationTest, + QuantizationTest_half_int8t, + ::testing::ValuesIn(inputs)); + +} // namespace cuvs::preprocessing::quantize::scalar diff --git a/docs/source/cpp_api.rst b/docs/source/cpp_api.rst index 49732dc92..34f48a88f 100644 --- a/docs/source/cpp_api.rst +++ b/docs/source/cpp_api.rst @@ -10,5 +10,6 @@ C++ API Documentation cpp_api/cluster.rst cpp_api/distance.rst cpp_api/neighbors.rst + cpp_api/preprocessing.rst cpp_api/selection.rst cpp_api/stats.rst diff --git a/docs/source/cpp_api/preprocessing.rst b/docs/source/cpp_api/preprocessing.rst new file mode 100644 index 000000000..1c2b0f051 --- /dev/null +++ b/docs/source/cpp_api/preprocessing.rst @@ -0,0 +1,12 @@ +Preprocessing +============= + +.. role:: py(code) + :language: c++ + :class: highlight + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + preprocessing_quantize.rst diff --git a/docs/source/cpp_api/preprocessing_quantize.rst b/docs/source/cpp_api/preprocessing_quantize.rst new file mode 100644 index 000000000..b660c61c5 --- /dev/null +++ b/docs/source/cpp_api/preprocessing_quantize.rst @@ -0,0 +1,20 @@ +Quantize +======== + +This page provides C++ class references for the publicly-exposed elements of the +`cuvs/preprocessing/quantize` package. + +.. role:: py(code) + :language: c++ + :class: highlight + +Scalar +------ + +``#include `` + +namespace *cuvs::preprocessing::quantize::scalar* + +.. doxygengroup:: scalar + :project: cuvs +