From f59aaa93c94df07a94f90db481349e14c69fcc27 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 29 Oct 2024 16:07:34 +0100 Subject: [PATCH] Add configuration option for choosing number of GPU streams when they're not per-thread --- include/dlaf/init.h | 2 ++ src/init.cpp | 21 ++++++++++++ test/unit/init/test_init.cpp | 66 ++++++++++++++++++------------------ 3 files changed, 56 insertions(+), 33 deletions(-) diff --git a/include/dlaf/init.h b/include/dlaf/init.h index fe21d84ac5..b8a85e3b97 100644 --- a/include/dlaf/init.h +++ b/include/dlaf/init.h @@ -36,6 +36,8 @@ struct configuration { // values // - getOptionsDescription to add a corresponding command line option bool print_config = false; + std::size_t num_np_gpu_streams = 32; + std::size_t num_hp_gpu_streams = 32; std::size_t num_np_gpu_streams_per_thread = 3; std::size_t num_hp_gpu_streams_per_thread = 3; std::size_t umpire_host_memory_pool_initial_block_bytes = 1 << 30; diff --git a/src/init.cpp b/src/init.cpp index 0d4366e15e..e6aeae85f8 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -30,8 +30,13 @@ namespace dlaf { std::ostream& operator<<(std::ostream& os, const configuration& cfg) { // clang-format off +#if PIKA_VERSION_FULL >= 0x001F00 // >= 0.31.0 + os << " num_np_gpu_streams = " << cfg.num_np_gpu_streams << std::endl; + os << " num_hp_gpu_streams = " << cfg.num_hp_gpu_streams << std::endl; +#else os << " num_np_gpu_streams_per_thread = " << cfg.num_np_gpu_streams_per_thread << std::endl; os << " num_hp_gpu_streams_per_thread = " << cfg.num_hp_gpu_streams_per_thread << std::endl; +#endif os << " umpire_host_memory_pool_initial_block_bytes = " << cfg.umpire_host_memory_pool_initial_block_bytes << std::endl; os << " umpire_host_memory_pool_next_block_bytes = " << cfg.umpire_host_memory_pool_next_block_bytes << std::endl; os << " umpire_host_memory_pool_alignment_bytes = " << cfg.umpire_host_memory_pool_alignment_bytes << std::endl; @@ -122,8 +127,13 @@ struct Init { cfg.umpire_device_memory_pool_initial_block_bytes, cfg.umpire_device_memory_pool_alignment_bytes, cfg.umpire_host_memory_pool_coalescing_free_ratio, cfg.umpire_host_memory_pool_coalescing_reallocation_ratio); +#if PIKA_VERSION_FULL >= 0x001F00 // >= 0.31.0 + initializeGpuPool(device, cfg.num_np_gpu_streams, cfg.num_hp_gpu_streams, + cfg.num_gpu_blas_handles, cfg.num_gpu_lapack_handles); +#else initializeGpuPool(device, cfg.num_np_gpu_streams_per_thread, cfg.num_hp_gpu_streams_per_thread, cfg.num_gpu_blas_handles, cfg.num_gpu_lapack_handles); +#endif pika::cuda::experimental::detail::register_polling(pika::resource::get_thread_pool("default")); } @@ -240,8 +250,17 @@ void warnUnusedConfigurationOption(const pika::program_options::variables_map& v void updateConfiguration(const pika::program_options::variables_map& vm, configuration& cfg) { // clang-format off updateConfigurationValue(vm, cfg.print_config, "PRINT_CONFIG", "print-config"); +#if PIKA_VERSION_FULL >= 0x001F00 // >= 0.31.0 + updateConfigurationValue(vm, cfg.num_np_gpu_streams, "NUM_NP_GPU_STREAMS", "num-np-gpu-streams"); + updateConfigurationValue(vm, cfg.num_hp_gpu_streams, "NUM_HP_GPU_STREAMS", "num-hp-gpu-streams"); + warnUnusedConfigurationOption(vm, "NUM_NP_GPU_STREAMS_PER_THREAD", "num-np-gpu-streams-per-thread", "only supported with pika 0.30.X or older"); + warnUnusedConfigurationOption(vm, "NUM_HP_GPU_STREAMS_PER_THREAD", "num-hp-gpu-streams-per-thread", "only supported with pika 0.30.X or older"); +#else updateConfigurationValue(vm, cfg.num_np_gpu_streams_per_thread, "NUM_NP_GPU_STREAMS_PER_THREAD", "num-np-gpu-streams-per-thread"); updateConfigurationValue(vm, cfg.num_hp_gpu_streams_per_thread, "NUM_HP_GPU_STREAMS_PER_THREAD", "num-hp-gpu-streams-per-thread"); + warnUnusedConfigurationOption(vm, "NUM_NP_GPU_STREAMS", "num-np-gpu-streams", "only supported with pika 0.31.0 or newer"); + warnUnusedConfigurationOption(vm, "NUM_HP_GPU_STREAMS", "num-hp-gpu-streams", "only supported with pika 0.31.0 or newer"); +#endif updateConfigurationValue(vm, cfg.umpire_host_memory_pool_initial_block_bytes, "UMPIRE_HOST_MEMORY_POOL_INITIAL_BLOCK_BYTES", "umpire-host-memory-pool-initial-block-bytes"); updateConfigurationValue(vm, cfg.umpire_host_memory_pool_next_block_bytes, "UMPIRE_HOST_MEMORY_POOL_NEXT_BLOCK_BYTES", "umpire-host-memory-pool-next-block-bytes"); updateConfigurationValue(vm, cfg.umpire_host_memory_pool_alignment_bytes, "UMPIRE_HOST_MEMORY_POOL_ALIGNMENT_BYTES", "umpire-host-memory-pool-alignment-bytes"); @@ -316,6 +335,8 @@ pika::program_options::options_description getOptionsDescription() { // clang-format off desc.add_options()("dlaf:help", "Print help message"); desc.add_options()("dlaf:print-config", "Print the DLA-Future configuration"); + desc.add_options()("dlaf:num-np-gpu-streams", pika::program_options::value(), "Number of normal priority GPU streams"); + desc.add_options()("dlaf:num-hp-gpu-streams", pika::program_options::value(), "Number of high priority GPU streams"); desc.add_options()("dlaf:num-np-gpu-streams-per-thread", pika::program_options::value(), "Number of normal priority GPU streams per worker thread"); desc.add_options()("dlaf:num-hp-gpu-streams-per-thread", pika::program_options::value(), "Number of high priority GPU streams per worker thread"); desc.add_options()("dlaf:umpire-host-memory-pool-initial-block-bytes", pika::program_options::value(), "Number of bytes to preallocate for pinned host memory pool"); diff --git a/test/unit/init/test_init.cpp b/test/unit/init/test_init.cpp index 491c5251da..cc0396d0bb 100644 --- a/test/unit/init/test_init.cpp +++ b/test/unit/init/test_init.cpp @@ -21,8 +21,8 @@ #include static const char* binary_name = "init_test"; -static const char* env_var_name = "DLAF_NUM_HP_GPU_STREAMS_PER_THREAD"; -static const char* command_line_option_name = "--dlaf:num-hp-gpu-streams-per-thread"; +static const char* env_var_name = "DLAF_NUM_GPU_BLAS_HANDLES"; +static const char* command_line_option_name = "--dlaf:num-gpu-blas-handles"; static const char* print_bind = "--pika:print-bind"; static int argc_without_option = 1; @@ -91,10 +91,10 @@ class InitTest : public ::testing::TestWithParam {}; int precedence_main(int, char*[]) { const dlaf::configuration default_cfg; - const std::size_t default_val = default_cfg.num_hp_gpu_streams_per_thread; - // Note that this test doesn't mean that the default value has to be 3. It is + const std::size_t default_val = default_cfg.num_gpu_blas_handles; + // Note that this test doesn't mean that the default value has to be 16. It is // included to help catch unexpected changes in the configuration handling. - EXPECT_EQ(default_val, 3); + EXPECT_EQ(default_val, 16); // Make sure environment is clean for the test. unsetenv(env_var_name); @@ -103,37 +103,37 @@ int precedence_main(int, char*[]) { { InitializeTester init(current_initializer_type, argc_without_option, argv_without_option); dlaf::configuration cfg = dlaf::internal::getConfiguration(); - EXPECT_EQ(default_val, cfg.num_hp_gpu_streams_per_thread); + EXPECT_EQ(default_val, cfg.num_gpu_blas_handles); } // User configuration should take precedence over default configuration. { dlaf::configuration user_cfg = default_cfg; - user_cfg.num_hp_gpu_streams_per_thread = default_val + 1; + user_cfg.num_gpu_blas_handles = default_val + 1; InitializeTester init(current_initializer_type, argc_without_option, argv_without_option, user_cfg); dlaf::configuration cfg = dlaf::internal::getConfiguration(); - EXPECT_EQ(user_cfg.num_hp_gpu_streams_per_thread, cfg.num_hp_gpu_streams_per_thread); + EXPECT_EQ(user_cfg.num_gpu_blas_handles, cfg.num_gpu_blas_handles); } // Environment variables should take precedence over user configuration. { dlaf::configuration user_cfg = default_cfg; - user_cfg.num_hp_gpu_streams_per_thread = default_val + 1; - const std::size_t env_var_val = user_cfg.num_hp_gpu_streams_per_thread + 1; + user_cfg.num_gpu_blas_handles = default_val + 1; + const std::size_t env_var_val = user_cfg.num_gpu_blas_handles + 1; const std::string env_var_val_str = std::to_string(env_var_val); setenv(env_var_name, env_var_val_str.c_str(), 1); InitializeTester init(current_initializer_type, argc_without_option, argv_without_option, user_cfg); dlaf::configuration cfg = dlaf::internal::getConfiguration(); - EXPECT_EQ(env_var_val, cfg.num_hp_gpu_streams_per_thread); + EXPECT_EQ(env_var_val, cfg.num_gpu_blas_handles); } // Command-line options should take precedence over environment variables. { dlaf::configuration user_cfg = default_cfg; - user_cfg.num_hp_gpu_streams_per_thread = default_val + 1; - const std::size_t env_var_val = user_cfg.num_hp_gpu_streams_per_thread + 1; + user_cfg.num_gpu_blas_handles = default_val + 1; + const std::size_t env_var_val = user_cfg.num_gpu_blas_handles + 1; const std::string env_var_val_str = std::to_string(env_var_val); setenv(env_var_name, env_var_val_str.c_str(), 1); const std::size_t command_line_option_val = env_var_val + 1; @@ -145,7 +145,7 @@ int precedence_main(int, char*[]) { InitializeTester init(current_initializer_type, argc_with_option, argv_with_option, user_cfg); dlaf::configuration cfg = dlaf::internal::getConfiguration(); - EXPECT_EQ(command_line_option_val, cfg.num_hp_gpu_streams_per_thread); + EXPECT_EQ(command_line_option_val, cfg.num_gpu_blas_handles); } pika::finalize(); @@ -160,10 +160,10 @@ TEST_P(InitTest, Precedence) { int vm_no_command_line_option_main(pika::program_options::variables_map& vm) { const dlaf::configuration default_cfg; - const std::size_t default_val = default_cfg.num_hp_gpu_streams_per_thread; - // Note that this test doesn't mean that the default value has to be 3. It is + const std::size_t default_val = default_cfg.num_gpu_blas_handles; + // Note that this test doesn't mean that the default value has to be 16. It is // included to help catch unexpected changes in the configuration handling. - EXPECT_EQ(default_val, 3); + EXPECT_EQ(default_val, 16); // Make sure environment is clean for the test. unsetenv(env_var_name); @@ -172,37 +172,37 @@ int vm_no_command_line_option_main(pika::program_options::variables_map& vm) { { InitializeTester init(current_initializer_type, vm); dlaf::configuration cfg = dlaf::internal::getConfiguration(); - EXPECT_EQ(default_val, cfg.num_hp_gpu_streams_per_thread); + EXPECT_EQ(default_val, cfg.num_gpu_blas_handles); } // User configuration should take precedence over default configuration. { dlaf::configuration user_cfg = default_cfg; - user_cfg.num_hp_gpu_streams_per_thread = default_val + 1; + user_cfg.num_gpu_blas_handles = default_val + 1; InitializeTester init(current_initializer_type, vm, user_cfg); dlaf::configuration cfg = dlaf::internal::getConfiguration(); - EXPECT_EQ(user_cfg.num_hp_gpu_streams_per_thread, cfg.num_hp_gpu_streams_per_thread); + EXPECT_EQ(user_cfg.num_gpu_blas_handles, cfg.num_gpu_blas_handles); } // Environment variables should take precedence over user configuration. { dlaf::configuration user_cfg = default_cfg; - user_cfg.num_hp_gpu_streams_per_thread = default_val + 1; - const std::size_t env_var_val = user_cfg.num_hp_gpu_streams_per_thread + 1; + user_cfg.num_gpu_blas_handles = default_val + 1; + const std::size_t env_var_val = user_cfg.num_gpu_blas_handles + 1; const std::string env_var_val_str = std::to_string(env_var_val); setenv(env_var_name, env_var_val_str.c_str(), 1); InitializeTester init(current_initializer_type, vm, user_cfg); dlaf::configuration cfg = dlaf::internal::getConfiguration(); - EXPECT_EQ(env_var_val, cfg.num_hp_gpu_streams_per_thread); + EXPECT_EQ(env_var_val, cfg.num_gpu_blas_handles); } // Command-line options should take precedence over environment variables. { dlaf::configuration user_cfg = default_cfg; - user_cfg.num_hp_gpu_streams_per_thread = default_val + 1; - const std::size_t env_var_val = user_cfg.num_hp_gpu_streams_per_thread + 1; + user_cfg.num_gpu_blas_handles = default_val + 1; + const std::size_t env_var_val = user_cfg.num_gpu_blas_handles + 1; const std::string env_var_val_str = std::to_string(env_var_val); setenv(env_var_name, env_var_val_str.c_str(), 1); const std::size_t command_line_option_val = env_var_val + 1; @@ -214,7 +214,7 @@ int vm_no_command_line_option_main(pika::program_options::variables_map& vm) { InitializeTester init(current_initializer_type, argc_with_option, argv_with_option, user_cfg); dlaf::configuration cfg = dlaf::internal::getConfiguration(); - EXPECT_EQ(command_line_option_val, cfg.num_hp_gpu_streams_per_thread); + EXPECT_EQ(command_line_option_val, cfg.num_gpu_blas_handles); } pika::finalize(); @@ -229,20 +229,20 @@ TEST_P(InitTest, VariablesMapNoCommandLineOption) { int vm_command_line_option_main(pika::program_options::variables_map& vm) { dlaf::configuration default_cfg; - const std::size_t default_val = default_cfg.num_hp_gpu_streams_per_thread; + const std::size_t default_val = default_cfg.num_gpu_blas_handles; // Command-line options should take precedence over everything else. { dlaf::configuration user_cfg = default_cfg; - user_cfg.num_hp_gpu_streams_per_thread = default_val + 1; - const std::size_t env_var_val = user_cfg.num_hp_gpu_streams_per_thread + 1; + user_cfg.num_gpu_blas_handles = default_val + 1; + const std::size_t env_var_val = user_cfg.num_gpu_blas_handles + 1; const std::string env_var_val_str = std::to_string(env_var_val); setenv(env_var_name, env_var_val_str.c_str(), 1); const std::size_t command_line_option_val = env_var_val + 1; InitializeTester init(current_initializer_type, vm, user_cfg); dlaf::configuration cfg = dlaf::internal::getConfiguration(); - EXPECT_EQ(command_line_option_val, cfg.num_hp_gpu_streams_per_thread); + EXPECT_EQ(command_line_option_val, cfg.num_gpu_blas_handles); } pika::finalize(); @@ -259,10 +259,10 @@ TEST_P(InitTest, VariablesMapCommandLineOption) { p.desc_cmdline = options; dlaf::configuration default_cfg; - const std::size_t default_val = default_cfg.num_hp_gpu_streams_per_thread; - // Note that this test doesn't mean that the default value has to be 3. It is + const std::size_t default_val = default_cfg.num_gpu_blas_handles; + // Note that this test doesn't mean that the default value has to be 16. It is // included to help catch unexpected changes in the configuration handling. - EXPECT_EQ(default_val, 3); + EXPECT_EQ(default_val, 16); std::size_t command_line_option_val = default_val + 3; std::string command_line_option_val_str = std::to_string(command_line_option_val);