Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up namespaces and improve compression-related headers #17621

Merged
merged 13 commits into from
Dec 20, 2024
Prev Previous commit
Next Next commit
jesus
  • Loading branch information
vuule committed Dec 18, 2024
commit 4f7794d3a67bb6793009a1d53a09799a4169ee9f
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/nvcomp_adapter.hpp
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@
#include <string>

namespace CUDF_EXPORT cudf {
namespace io::nvcomp {
namespace io::detail::nvcomp {

enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4, GZIP };

@@ -88,5 +88,5 @@ inline bool operator==(feature_status_parameters const& lhs, feature_status_para
[[nodiscard]] std::optional<std::string> is_decompression_disabled(
compression_type compression, feature_status_parameters params = feature_status_parameters());

} // namespace io::nvcomp
} // namespace io::detail::nvcomp
} // namespace CUDF_EXPORT cudf
37 changes: 37 additions & 0 deletions cpp/src/io/comp/common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cstddef>

namespace cudf::io::detail {

/**
* @brief The value used for padding a data buffer such that its size will be multiple of it.
vuule marked this conversation as resolved.
Show resolved Hide resolved
*
* Padding is necessary for input/output buffers of several compression/decompression kernels
* (inflate_kernel and nvcomp snappy). Such kernels operate on aligned data pointers, which require
* padding to the buffers so that the pointers can shift along the address space to satisfy their
* alignment requirement.
*
* In the meantime, it is not entirely clear why such padding is needed. We need to further
* investigate and implement a better fix rather than just padding the buffer.
* See https://github.com/rapidsai/cudf/issues/13605.
*/
constexpr std::size_t BUFFER_PADDING_MULTIPLE{8};

} // namespace cudf::io::detail
5 changes: 2 additions & 3 deletions cpp/src/io/comp/comp.cpp
Original file line number Diff line number Diff line change
@@ -87,15 +87,14 @@ std::vector<std::uint8_t> compress_snappy(host_span<uint8_t const> src,
outputs[0] = d_dst;
outputs.host_to_device_async(stream);

cudf::detail::hostdevice_vector<cudf::io::compression_result> hd_status(1, stream);
cudf::detail::hostdevice_vector<compression_result> hd_status(1, stream);
hd_status[0] = {};
hd_status.host_to_device_async(stream);

nvcomp::batched_compress(nvcomp::compression_type::SNAPPY, inputs, outputs, hd_status, stream);

hd_status.device_to_host_sync(stream);
CUDF_EXPECTS(hd_status[0].status == cudf::io::compression_status::SUCCESS,
"snappy compression failed");
CUDF_EXPECTS(hd_status[0].status == compression_status::SUCCESS, "snappy compression failed");
return cudf::detail::make_std_vector_sync<uint8_t>(d_dst, stream);
}

22 changes: 20 additions & 2 deletions cpp/src/io/comp/comp.hpp
Original file line number Diff line number Diff line change
@@ -16,16 +16,34 @@

#pragma once

#include "common.hpp"

#include <cudf/io/types.hpp>
#include <cudf/utilities/span.hpp>

#include <memory>
#include <string>
#include <vector>

namespace CUDF_EXPORT cudf {
namespace io::detail {

/**
* @brief Status of a compression/decompression operation.
*/
enum class compression_status : uint8_t {
SUCCESS, ///< Successful, output is valid
FAILURE, ///< Failed, output is invalid (e.g. input is unsupported in some way)
SKIPPED, ///< Operation skipped (if conversion, uncompressed data can be used)
OUTPUT_OVERFLOW, ///< Output buffer is too small; operation can succeed with larger output
};

/**
* @brief Descriptor of compression/decompression result.
*/
struct compression_result {
uint64_t bytes_written;
compression_status status;
};

/**
* @brief Compresses a system memory buffer.
*
7 changes: 3 additions & 4 deletions cpp/src/io/comp/debrotli.cu
Original file line number Diff line number Diff line change
@@ -63,8 +63,8 @@ THE SOFTWARE.

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace io {
namespace cudf::io::detail {

constexpr uint32_t huffman_lookup_table_width = 8;
constexpr int8_t brotli_code_length_codes = 18;
constexpr uint32_t brotli_num_distance_short_codes = 16;
@@ -2114,5 +2114,4 @@ void gpu_debrotli(device_span<device_span<uint8_t const> const> inputs,
#endif
}

} // namespace io
} // namespace cudf
} // namespace cudf::io::detail
6 changes: 2 additions & 4 deletions cpp/src/io/comp/gpuinflate.cu
Original file line number Diff line number Diff line change
@@ -49,8 +49,7 @@ Mark Adler madler@alumni.caltech.edu

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace io {
namespace cudf::io::detail {

constexpr int max_bits = 15; // maximum bits in a code
constexpr int max_l_codes = 286; // maximum number of literal/length codes
@@ -1223,5 +1222,4 @@ void gpu_copy_uncompressed_blocks(device_span<device_span<uint8_t const> const>
}
}

} // namespace io
} // namespace cudf
} // namespace cudf::io::detail
40 changes: 4 additions & 36 deletions cpp/src/io/comp/gpuinflate.hpp
Original file line number Diff line number Diff line change
@@ -16,6 +16,8 @@

#pragma once

#include "io/comp/comp.hpp"

#include <cudf/io/types.hpp>
#include <cudf/utilities/export.hpp>
#include <cudf/utilities/span.hpp>
@@ -24,43 +26,10 @@

#include <cstdint>

namespace cudf {
namespace io {

/**
* @brief Status of a compression/decompression operation.
*/
enum class compression_status : uint8_t {
SUCCESS, ///< Successful, output is valid
FAILURE, ///< Failed, output is invalid (e.g. input is unsupported in some way)
SKIPPED, ///< Operation skipped (if conversion, uncompressed data can be used)
OUTPUT_OVERFLOW, ///< Output buffer is too small; operation can succeed with larger output
};

/**
* @brief Descriptor of compression/decompression result.
*/
struct compression_result {
uint64_t bytes_written;
compression_status status;
};
namespace cudf::io::detail {

enum class gzip_header_included { NO, YES };

/**
* @brief The value used for padding a data buffer such that its size will be multiple of it.
*
* Padding is necessary for input/output buffers of several compression/decompression kernels
* (inflate_kernel and nvcomp snappy). Such kernels operate on aligned data pointers, which require
* padding to the buffers so that the pointers can shift along the address space to satisfy their
* alignment requirement.
*
* In the meantime, it is not entirely clear why such padding is needed. We need to further
* investigate and implement a better fix rather than just padding the buffer.
* See https://github.com/rapidsai/cudf/issues/13605.
*/
constexpr std::size_t BUFFER_PADDING_MULTIPLE{8};

/**
* @brief Interface for decompressing GZIP-compressed data
*
@@ -168,5 +137,4 @@ void gpu_snap(device_span<device_span<uint8_t const> const> inputs,
device_span<compression_result const> results,
rmm::cuda_stream_view stream);

} // namespace io
} // namespace cudf
} // namespace cudf::io::detail
6 changes: 2 additions & 4 deletions cpp/src/io/comp/io_uncomp.hpp
Original file line number Diff line number Diff line change
@@ -16,15 +16,13 @@

#pragma once

#include "common.hpp"

#include <cudf/io/types.hpp>
#include <cudf/utilities/span.hpp>

#include <memory>
#include <string>
#include <vector>

using cudf::host_span;

namespace CUDF_EXPORT cudf {
namespace io::detail {

4 changes: 2 additions & 2 deletions cpp/src/io/comp/nvcomp_adapter.cpp
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@

#include <mutex>

namespace cudf::io::nvcomp {
namespace cudf::io::detail::nvcomp {
namespace {

// Dispatcher for nvcompBatched<format>DecompressGetTempSizeEx
@@ -478,4 +478,4 @@ std::optional<size_t> compress_max_allowed_chunk_size(compression_type compressi
}
}

} // namespace cudf::io::nvcomp
} // namespace cudf::io::detail::nvcomp
4 changes: 2 additions & 2 deletions cpp/src/io/comp/nvcomp_adapter.cu
Original file line number Diff line number Diff line change
@@ -23,7 +23,7 @@
#include <thrust/transform.h>
#include <thrust/tuple.h>

namespace cudf::io::nvcomp {
namespace cudf::io::detail::nvcomp {

batched_args create_batched_nvcomp_args(device_span<device_span<uint8_t const> const> inputs,
device_span<device_span<uint8_t> const> outputs,
@@ -127,4 +127,4 @@ std::pair<size_t, size_t> max_chunk_and_total_input_size(device_span<size_t cons
return {max, sum};
}

} // namespace cudf::io::nvcomp
} // namespace cudf::io::detail::nvcomp
6 changes: 3 additions & 3 deletions cpp/src/io/comp/nvcomp_adapter.cuh
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@

#pragma once

#include "gpuinflate.hpp"
#include "comp.hpp"

#include <cudf/utilities/span.hpp>

@@ -27,7 +27,7 @@

#include <optional>

namespace cudf::io::nvcomp {
namespace cudf::io::detail::nvcomp {

struct batched_args {
rmm::device_uvector<void const*> input_data_ptrs;
@@ -76,4 +76,4 @@ void skip_unsupported_inputs(device_span<size_t> input_sizes,
std::pair<size_t, size_t> max_chunk_and_total_input_size(device_span<size_t const> input_sizes,
rmm::cuda_stream_view stream);

} // namespace cudf::io::nvcomp
} // namespace cudf::io::detail::nvcomp
6 changes: 3 additions & 3 deletions cpp/src/io/comp/nvcomp_adapter.hpp
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@

#pragma once

#include "gpuinflate.hpp"
#include "io/comp/comp.hpp"

#include <cudf/io/nvcomp_adapter.hpp>
#include <cudf/utilities/span.hpp>
@@ -25,7 +25,7 @@

#include <optional>

namespace cudf::io::nvcomp {
namespace cudf::io::detail::nvcomp {
/**
* @brief Device batch decompression of given type.
*
@@ -103,4 +103,4 @@ void batched_compress(compression_type compression,
device_span<compression_result> results,
rmm::cuda_stream_view stream);

} // namespace cudf::io::nvcomp
} // namespace cudf::io::detail::nvcomp
6 changes: 2 additions & 4 deletions cpp/src/io/comp/snap.cu
Original file line number Diff line number Diff line change
@@ -19,8 +19,7 @@

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace io {
namespace cudf::io::detail {
constexpr int hash_bits = 12;

// TBD: Tentatively limits to 2-byte codes to prevent long copy search followed by long literal
@@ -344,5 +343,4 @@ void gpu_snap(device_span<device_span<uint8_t const> const> inputs,
}
}

} // namespace io
} // namespace cudf
} // namespace cudf::io::detail
4 changes: 2 additions & 2 deletions cpp/src/io/comp/statistics.cu
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@
#include <cuda/functional>
#include <thrust/transform_reduce.h>

namespace cudf::io {
namespace cudf::io::detail {

writer_compression_statistics collect_compression_statistics(
device_span<device_span<uint8_t const> const> inputs,
@@ -61,4 +61,4 @@ writer_compression_statistics collect_compression_statistics(
output_size_successful};
}

} // namespace cudf::io
} // namespace cudf::io::detail
6 changes: 2 additions & 4 deletions cpp/src/io/comp/unsnap.cu
Original file line number Diff line number Diff line change
@@ -21,8 +21,7 @@

#include <cub/cub.cuh>

namespace cudf {
namespace io {
namespace cudf::io::detail {
constexpr int32_t batch_size = (1 << 5);
constexpr int32_t batch_count = (1 << 2);
constexpr int32_t prefetch_size = (1 << 9); // 512B, in 32B chunks
@@ -717,5 +716,4 @@ void gpu_unsnap(device_span<device_span<uint8_t const> const> inputs,
unsnap_kernel<128><<<dim_grid, dim_block, 0, stream.value()>>>(inputs, outputs, results);
}

} // namespace io
} // namespace cudf
} // namespace cudf::io::detail
20 changes: 10 additions & 10 deletions cpp/src/io/orc/orc_gpu.hpp
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@

#pragma once

#include "io/comp/gpuinflate.hpp"
#include "io/comp/comp.hpp"
#include "io/statistics/statistics.cuh"
#include "io/utilities/column_buffer.hpp"
#include "orc.hpp"
@@ -73,14 +73,14 @@ struct CompressedStreamInfo {
uint8_t const* compressed_data{}; // [in] base ptr to compressed stream data
uint8_t*
uncompressed_data{}; // [in] base ptr to uncompressed stream data or NULL if not known yet
size_t compressed_data_size{}; // [in] compressed data size for this stream
device_span<uint8_t const>* dec_in_ctl{}; // [in] input buffer to decompress
device_span<uint8_t>* dec_out_ctl{}; // [in] output buffer to decompress into
device_span<compression_result> dec_res{}; // [in] results of decompression
device_span<uint8_t const>* copy_in_ctl{}; // [out] input buffer to copy
device_span<uint8_t>* copy_out_ctl{}; // [out] output buffer to copy to
uint32_t num_compressed_blocks{}; // [in,out] number of entries in decctl(in), number of
// compressed blocks(out)
size_t compressed_data_size{}; // [in] compressed data size for this stream
device_span<uint8_t const>* dec_in_ctl{}; // [in] input buffer to decompress
device_span<uint8_t>* dec_out_ctl{}; // [in] output buffer to decompress into
device_span<cudf::io::detail::compression_result> dec_res{}; // [in] results of decompression
device_span<uint8_t const>* copy_in_ctl{}; // [out] input buffer to copy
device_span<uint8_t>* copy_out_ctl{}; // [out] output buffer to copy to
uint32_t num_compressed_blocks{}; // [in,out] number of entries in decctl(in), number of
// compressed blocks(out)
uint32_t num_uncompressed_blocks{}; // [in,out] number of entries in dec_in_ctl(in), number of
// uncompressed blocks(out)
uint64_t max_uncompressed_size{}; // [out] maximum uncompressed data size of stream
@@ -414,7 +414,7 @@ std::optional<writer_compression_statistics> CompressOrcDataStreams(
bool collect_statistics,
device_2dspan<StripeStream> strm_desc,
device_2dspan<encoder_chunk_streams> enc_streams,
device_span<compression_result> comp_res,
device_span<cudf::io::detail::compression_result> comp_res,
rmm::cuda_stream_view stream);

/**
Loading
Loading