Skip to content

Commit

Permalink
Add configuration option for choosing number of GPU streams when they…
Browse files Browse the repository at this point in the history
…'re not per-thread
  • Loading branch information
msimberg committed Nov 27, 2024
1 parent 0be8cc9 commit f59aaa9
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 33 deletions.
2 changes: 2 additions & 0 deletions include/dlaf/init.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ struct configuration {
// values
// - getOptionsDescription to add a corresponding command line option
bool print_config = false;
std::size_t num_np_gpu_streams = 32;
std::size_t num_hp_gpu_streams = 32;
std::size_t num_np_gpu_streams_per_thread = 3;
std::size_t num_hp_gpu_streams_per_thread = 3;
std::size_t umpire_host_memory_pool_initial_block_bytes = 1 << 30;
Expand Down
21 changes: 21 additions & 0 deletions src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,13 @@
namespace dlaf {
std::ostream& operator<<(std::ostream& os, const configuration& cfg) {
// clang-format off
#if PIKA_VERSION_FULL >= 0x001F00 // >= 0.31.0
os << " num_np_gpu_streams = " << cfg.num_np_gpu_streams << std::endl;
os << " num_hp_gpu_streams = " << cfg.num_hp_gpu_streams << std::endl;
#else
os << " num_np_gpu_streams_per_thread = " << cfg.num_np_gpu_streams_per_thread << std::endl;
os << " num_hp_gpu_streams_per_thread = " << cfg.num_hp_gpu_streams_per_thread << std::endl;
#endif
os << " umpire_host_memory_pool_initial_block_bytes = " << cfg.umpire_host_memory_pool_initial_block_bytes << std::endl;
os << " umpire_host_memory_pool_next_block_bytes = " << cfg.umpire_host_memory_pool_next_block_bytes << std::endl;
os << " umpire_host_memory_pool_alignment_bytes = " << cfg.umpire_host_memory_pool_alignment_bytes << std::endl;
Expand Down Expand Up @@ -122,8 +127,13 @@ struct Init<Backend::GPU> {
cfg.umpire_device_memory_pool_initial_block_bytes, cfg.umpire_device_memory_pool_alignment_bytes,
cfg.umpire_host_memory_pool_coalescing_free_ratio,
cfg.umpire_host_memory_pool_coalescing_reallocation_ratio);
#if PIKA_VERSION_FULL >= 0x001F00 // >= 0.31.0
initializeGpuPool(device, cfg.num_np_gpu_streams, cfg.num_hp_gpu_streams,
cfg.num_gpu_blas_handles, cfg.num_gpu_lapack_handles);
#else
initializeGpuPool(device, cfg.num_np_gpu_streams_per_thread, cfg.num_hp_gpu_streams_per_thread,
cfg.num_gpu_blas_handles, cfg.num_gpu_lapack_handles);
#endif
pika::cuda::experimental::detail::register_polling(pika::resource::get_thread_pool("default"));
}

Expand Down Expand Up @@ -240,8 +250,17 @@ void warnUnusedConfigurationOption(const pika::program_options::variables_map& v
void updateConfiguration(const pika::program_options::variables_map& vm, configuration& cfg) {
// clang-format off
updateConfigurationValue(vm, cfg.print_config, "PRINT_CONFIG", "print-config");
#if PIKA_VERSION_FULL >= 0x001F00 // >= 0.31.0
updateConfigurationValue(vm, cfg.num_np_gpu_streams, "NUM_NP_GPU_STREAMS", "num-np-gpu-streams");
updateConfigurationValue(vm, cfg.num_hp_gpu_streams, "NUM_HP_GPU_STREAMS", "num-hp-gpu-streams");
warnUnusedConfigurationOption(vm, "NUM_NP_GPU_STREAMS_PER_THREAD", "num-np-gpu-streams-per-thread", "only supported with pika 0.30.X or older");
warnUnusedConfigurationOption(vm, "NUM_HP_GPU_STREAMS_PER_THREAD", "num-hp-gpu-streams-per-thread", "only supported with pika 0.30.X or older");
#else
updateConfigurationValue(vm, cfg.num_np_gpu_streams_per_thread, "NUM_NP_GPU_STREAMS_PER_THREAD", "num-np-gpu-streams-per-thread");
updateConfigurationValue(vm, cfg.num_hp_gpu_streams_per_thread, "NUM_HP_GPU_STREAMS_PER_THREAD", "num-hp-gpu-streams-per-thread");
warnUnusedConfigurationOption(vm, "NUM_NP_GPU_STREAMS", "num-np-gpu-streams", "only supported with pika 0.31.0 or newer");
warnUnusedConfigurationOption(vm, "NUM_HP_GPU_STREAMS", "num-hp-gpu-streams", "only supported with pika 0.31.0 or newer");
#endif
updateConfigurationValue(vm, cfg.umpire_host_memory_pool_initial_block_bytes, "UMPIRE_HOST_MEMORY_POOL_INITIAL_BLOCK_BYTES", "umpire-host-memory-pool-initial-block-bytes");
updateConfigurationValue(vm, cfg.umpire_host_memory_pool_next_block_bytes, "UMPIRE_HOST_MEMORY_POOL_NEXT_BLOCK_BYTES", "umpire-host-memory-pool-next-block-bytes");
updateConfigurationValue(vm, cfg.umpire_host_memory_pool_alignment_bytes, "UMPIRE_HOST_MEMORY_POOL_ALIGNMENT_BYTES", "umpire-host-memory-pool-alignment-bytes");
Expand Down Expand Up @@ -316,6 +335,8 @@ pika::program_options::options_description getOptionsDescription() {
// clang-format off
desc.add_options()("dlaf:help", "Print help message");
desc.add_options()("dlaf:print-config", "Print the DLA-Future configuration");
desc.add_options()("dlaf:num-np-gpu-streams", pika::program_options::value<std::size_t>(), "Number of normal priority GPU streams");
desc.add_options()("dlaf:num-hp-gpu-streams", pika::program_options::value<std::size_t>(), "Number of high priority GPU streams");
desc.add_options()("dlaf:num-np-gpu-streams-per-thread", pika::program_options::value<std::size_t>(), "Number of normal priority GPU streams per worker thread");
desc.add_options()("dlaf:num-hp-gpu-streams-per-thread", pika::program_options::value<std::size_t>(), "Number of high priority GPU streams per worker thread");
desc.add_options()("dlaf:umpire-host-memory-pool-initial-block-bytes", pika::program_options::value<std::size_t>(), "Number of bytes to preallocate for pinned host memory pool");
Expand Down
66 changes: 33 additions & 33 deletions test/unit/init/test_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#include <gtest/gtest.h>

static const char* binary_name = "init_test";
static const char* env_var_name = "DLAF_NUM_HP_GPU_STREAMS_PER_THREAD";
static const char* command_line_option_name = "--dlaf:num-hp-gpu-streams-per-thread";
static const char* env_var_name = "DLAF_NUM_GPU_BLAS_HANDLES";
static const char* command_line_option_name = "--dlaf:num-gpu-blas-handles";
static const char* print_bind = "--pika:print-bind";

static int argc_without_option = 1;
Expand Down Expand Up @@ -91,10 +91,10 @@ class InitTest : public ::testing::TestWithParam<InitializerType> {};

int precedence_main(int, char*[]) {
const dlaf::configuration default_cfg;
const std::size_t default_val = default_cfg.num_hp_gpu_streams_per_thread;
// Note that this test doesn't mean that the default value has to be 3. It is
const std::size_t default_val = default_cfg.num_gpu_blas_handles;
// Note that this test doesn't mean that the default value has to be 16. It is
// included to help catch unexpected changes in the configuration handling.
EXPECT_EQ(default_val, 3);
EXPECT_EQ(default_val, 16);

// Make sure environment is clean for the test.
unsetenv(env_var_name);
Expand All @@ -103,37 +103,37 @@ int precedence_main(int, char*[]) {
{
InitializeTester init(current_initializer_type, argc_without_option, argv_without_option);
dlaf::configuration cfg = dlaf::internal::getConfiguration();
EXPECT_EQ(default_val, cfg.num_hp_gpu_streams_per_thread);
EXPECT_EQ(default_val, cfg.num_gpu_blas_handles);
}

// User configuration should take precedence over default configuration.
{
dlaf::configuration user_cfg = default_cfg;
user_cfg.num_hp_gpu_streams_per_thread = default_val + 1;
user_cfg.num_gpu_blas_handles = default_val + 1;

InitializeTester init(current_initializer_type, argc_without_option, argv_without_option, user_cfg);
dlaf::configuration cfg = dlaf::internal::getConfiguration();
EXPECT_EQ(user_cfg.num_hp_gpu_streams_per_thread, cfg.num_hp_gpu_streams_per_thread);
EXPECT_EQ(user_cfg.num_gpu_blas_handles, cfg.num_gpu_blas_handles);
}

// Environment variables should take precedence over user configuration.
{
dlaf::configuration user_cfg = default_cfg;
user_cfg.num_hp_gpu_streams_per_thread = default_val + 1;
const std::size_t env_var_val = user_cfg.num_hp_gpu_streams_per_thread + 1;
user_cfg.num_gpu_blas_handles = default_val + 1;
const std::size_t env_var_val = user_cfg.num_gpu_blas_handles + 1;
const std::string env_var_val_str = std::to_string(env_var_val);
setenv(env_var_name, env_var_val_str.c_str(), 1);

InitializeTester init(current_initializer_type, argc_without_option, argv_without_option, user_cfg);
dlaf::configuration cfg = dlaf::internal::getConfiguration();
EXPECT_EQ(env_var_val, cfg.num_hp_gpu_streams_per_thread);
EXPECT_EQ(env_var_val, cfg.num_gpu_blas_handles);
}

// Command-line options should take precedence over environment variables.
{
dlaf::configuration user_cfg = default_cfg;
user_cfg.num_hp_gpu_streams_per_thread = default_val + 1;
const std::size_t env_var_val = user_cfg.num_hp_gpu_streams_per_thread + 1;
user_cfg.num_gpu_blas_handles = default_val + 1;
const std::size_t env_var_val = user_cfg.num_gpu_blas_handles + 1;
const std::string env_var_val_str = std::to_string(env_var_val);
setenv(env_var_name, env_var_val_str.c_str(), 1);
const std::size_t command_line_option_val = env_var_val + 1;
Expand All @@ -145,7 +145,7 @@ int precedence_main(int, char*[]) {

InitializeTester init(current_initializer_type, argc_with_option, argv_with_option, user_cfg);
dlaf::configuration cfg = dlaf::internal::getConfiguration();
EXPECT_EQ(command_line_option_val, cfg.num_hp_gpu_streams_per_thread);
EXPECT_EQ(command_line_option_val, cfg.num_gpu_blas_handles);
}

pika::finalize();
Expand All @@ -160,10 +160,10 @@ TEST_P(InitTest, Precedence) {

int vm_no_command_line_option_main(pika::program_options::variables_map& vm) {
const dlaf::configuration default_cfg;
const std::size_t default_val = default_cfg.num_hp_gpu_streams_per_thread;
// Note that this test doesn't mean that the default value has to be 3. It is
const std::size_t default_val = default_cfg.num_gpu_blas_handles;
// Note that this test doesn't mean that the default value has to be 16. It is
// included to help catch unexpected changes in the configuration handling.
EXPECT_EQ(default_val, 3);
EXPECT_EQ(default_val, 16);

// Make sure environment is clean for the test.
unsetenv(env_var_name);
Expand All @@ -172,37 +172,37 @@ int vm_no_command_line_option_main(pika::program_options::variables_map& vm) {
{
InitializeTester init(current_initializer_type, vm);
dlaf::configuration cfg = dlaf::internal::getConfiguration();
EXPECT_EQ(default_val, cfg.num_hp_gpu_streams_per_thread);
EXPECT_EQ(default_val, cfg.num_gpu_blas_handles);
}

// User configuration should take precedence over default configuration.
{
dlaf::configuration user_cfg = default_cfg;
user_cfg.num_hp_gpu_streams_per_thread = default_val + 1;
user_cfg.num_gpu_blas_handles = default_val + 1;

InitializeTester init(current_initializer_type, vm, user_cfg);
dlaf::configuration cfg = dlaf::internal::getConfiguration();
EXPECT_EQ(user_cfg.num_hp_gpu_streams_per_thread, cfg.num_hp_gpu_streams_per_thread);
EXPECT_EQ(user_cfg.num_gpu_blas_handles, cfg.num_gpu_blas_handles);
}

// Environment variables should take precedence over user configuration.
{
dlaf::configuration user_cfg = default_cfg;
user_cfg.num_hp_gpu_streams_per_thread = default_val + 1;
const std::size_t env_var_val = user_cfg.num_hp_gpu_streams_per_thread + 1;
user_cfg.num_gpu_blas_handles = default_val + 1;
const std::size_t env_var_val = user_cfg.num_gpu_blas_handles + 1;
const std::string env_var_val_str = std::to_string(env_var_val);
setenv(env_var_name, env_var_val_str.c_str(), 1);

InitializeTester init(current_initializer_type, vm, user_cfg);
dlaf::configuration cfg = dlaf::internal::getConfiguration();
EXPECT_EQ(env_var_val, cfg.num_hp_gpu_streams_per_thread);
EXPECT_EQ(env_var_val, cfg.num_gpu_blas_handles);
}

// Command-line options should take precedence over environment variables.
{
dlaf::configuration user_cfg = default_cfg;
user_cfg.num_hp_gpu_streams_per_thread = default_val + 1;
const std::size_t env_var_val = user_cfg.num_hp_gpu_streams_per_thread + 1;
user_cfg.num_gpu_blas_handles = default_val + 1;
const std::size_t env_var_val = user_cfg.num_gpu_blas_handles + 1;
const std::string env_var_val_str = std::to_string(env_var_val);
setenv(env_var_name, env_var_val_str.c_str(), 1);
const std::size_t command_line_option_val = env_var_val + 1;
Expand All @@ -214,7 +214,7 @@ int vm_no_command_line_option_main(pika::program_options::variables_map& vm) {

InitializeTester init(current_initializer_type, argc_with_option, argv_with_option, user_cfg);
dlaf::configuration cfg = dlaf::internal::getConfiguration();
EXPECT_EQ(command_line_option_val, cfg.num_hp_gpu_streams_per_thread);
EXPECT_EQ(command_line_option_val, cfg.num_gpu_blas_handles);
}

pika::finalize();
Expand All @@ -229,20 +229,20 @@ TEST_P(InitTest, VariablesMapNoCommandLineOption) {

int vm_command_line_option_main(pika::program_options::variables_map& vm) {
dlaf::configuration default_cfg;
const std::size_t default_val = default_cfg.num_hp_gpu_streams_per_thread;
const std::size_t default_val = default_cfg.num_gpu_blas_handles;

// Command-line options should take precedence over everything else.
{
dlaf::configuration user_cfg = default_cfg;
user_cfg.num_hp_gpu_streams_per_thread = default_val + 1;
const std::size_t env_var_val = user_cfg.num_hp_gpu_streams_per_thread + 1;
user_cfg.num_gpu_blas_handles = default_val + 1;
const std::size_t env_var_val = user_cfg.num_gpu_blas_handles + 1;
const std::string env_var_val_str = std::to_string(env_var_val);
setenv(env_var_name, env_var_val_str.c_str(), 1);
const std::size_t command_line_option_val = env_var_val + 1;

InitializeTester init(current_initializer_type, vm, user_cfg);
dlaf::configuration cfg = dlaf::internal::getConfiguration();
EXPECT_EQ(command_line_option_val, cfg.num_hp_gpu_streams_per_thread);
EXPECT_EQ(command_line_option_val, cfg.num_gpu_blas_handles);
}

pika::finalize();
Expand All @@ -259,10 +259,10 @@ TEST_P(InitTest, VariablesMapCommandLineOption) {
p.desc_cmdline = options;

dlaf::configuration default_cfg;
const std::size_t default_val = default_cfg.num_hp_gpu_streams_per_thread;
// Note that this test doesn't mean that the default value has to be 3. It is
const std::size_t default_val = default_cfg.num_gpu_blas_handles;
// Note that this test doesn't mean that the default value has to be 16. It is
// included to help catch unexpected changes in the configuration handling.
EXPECT_EQ(default_val, 3);
EXPECT_EQ(default_val, 16);

std::size_t command_line_option_val = default_val + 3;
std::string command_line_option_val_str = std::to_string(command_line_option_val);
Expand Down

0 comments on commit f59aaa9

Please sign in to comment.