From d71241bdc535f63f50fccef1e5f16162eae490c4 Mon Sep 17 00:00:00 2001 From: Panlichen Date: Thu, 20 May 2021 03:18:51 -0600 Subject: [PATCH 01/26] json parsing in Conf; template constructor for DefaultSubgroupAllocator --- include/derecho/conf/conf.hpp | 14 ++-- .../core/detail/subgroup_functions_impl.hpp | 64 +++++++++++++++++++ include/derecho/core/subgroup_functions.hpp | 12 ++++ src/conf/conf.cpp | 2 + 4 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 include/derecho/core/detail/subgroup_functions_impl.hpp diff --git a/include/derecho/conf/conf.hpp b/include/derecho/conf/conf.hpp index 0cc603be..ba117133 100644 --- a/include/derecho/conf/conf.hpp +++ b/include/derecho/conf/conf.hpp @@ -1,8 +1,8 @@ #ifndef CONF_HPP #define CONF_HPP -#include #include "getpot/GetPot" +#include #include #include #include @@ -40,6 +40,8 @@ class Conf { #define CONF_DERECHO_MAX_P2P_REQUEST_PAYLOAD_SIZE "DERECHO/max_p2p_request_payload_size" #define CONF_DERECHO_MAX_P2P_REPLY_PAYLOAD_SIZE "DERECHO/max_p2p_reply_payload_size" #define CONF_DERECHO_P2P_WINDOW_SIZE "DERECHO/p2p_window_size" +#define CONF_DERECHO_JSON_LAYOUT "DERECHO/json_layout" +#define CONF_DERECHO_JSON_LAYOUT_PATH "DERECHO/json_layout_path" #define CONF_SUBGROUP_DEFAULT_MAX_PAYLOAD_SIZE "SUBGROUP/DEFAULT/max_payload_size" #define CONF_SUBGROUP_DEFAULT_MAX_REPLY_PAYLOAD_SIZE "SUBGROUP/DEFAULT/max_reply_payload_size" @@ -81,9 +83,9 @@ class Conf { {CONF_DERECHO_RESTART_TIMEOUT_MS, "2000"}, {CONF_DERECHO_DISABLE_PARTITIONING_SAFETY, "true"}, {CONF_DERECHO_ENABLE_BACKUP_RESTART_LEADERS, "false"}, - {CONF_DERECHO_MAX_P2P_REQUEST_PAYLOAD_SIZE, "10240"}, - {CONF_DERECHO_MAX_P2P_REPLY_PAYLOAD_SIZE, "10240"}, - {CONF_DERECHO_P2P_WINDOW_SIZE, "16"}, + {CONF_DERECHO_MAX_P2P_REQUEST_PAYLOAD_SIZE, "10240"}, + {CONF_DERECHO_MAX_P2P_REPLY_PAYLOAD_SIZE, "10240"}, + {CONF_DERECHO_P2P_WINDOW_SIZE, "16"}, {CONF_DERECHO_MAX_NODE_ID, "1024"}, // [SUBGROUP/] {CONF_SUBGROUP_DEFAULT_MAX_PAYLOAD_SIZE, "10240"}, @@ -101,8 +103,8 @@ class Conf { {CONF_PERS_FILE_PATH, ".plog"}, {CONF_PERS_RAMDISK_PATH, "/dev/shm/volatile_t"}, {CONF_PERS_RESET, "false"}, - {CONF_PERS_MAX_LOG_ENTRY, "1048576"}, // 1M log entries. - {CONF_PERS_MAX_DATA_SIZE, "549755813888"}, // 512G total data size. + {CONF_PERS_MAX_LOG_ENTRY, "1048576"}, // 1M log entries. + {CONF_PERS_MAX_DATA_SIZE, "549755813888"}, // 512G total data size. {CONF_PERS_PRIVATE_KEY_FILE, "private_key.pem"}, // [LOGGER] {CONF_LOGGER_DEFAULT_LOG_NAME, "derecho_debug"}, diff --git a/include/derecho/core/detail/subgroup_functions_impl.hpp b/include/derecho/core/detail/subgroup_functions_impl.hpp new file mode 100644 index 00000000..a178a8a7 --- /dev/null +++ b/include/derecho/core/detail/subgroup_functions_impl.hpp @@ -0,0 +1,64 @@ +/** + * @file subgroup_functions_impl.h + * @brief Contains implementations of functions that parse json layout + * @date May 20, 2021 + */ + +#include +#include "../subgroup_functions.hpp" + +namespace derecho { + +/** + * parse_json_subgroup_policy() + * + * Generate a single-type subgroup allocation policy from json string + * @param json_config subgroup configuration represented in json format. + * @return SubgroupAllocationPolicy + */ +SubgroupAllocationPolicy parse_json_subgroup_policy(const json&); + +template +void populate_policy_by_subgroup_type_map( + std::map>& dsa_map, + const json& layout, int type_idx) { + dsa_map.emplace(std::type_index(typeid(ReplicatedType)), parse_json_subgroup_policy(layout[type_idx])); +} + +template +void populate_policy_by_subgroup_type_map( + std::map>& dsa_map, + const json& layout, int type_idx) { + dsa_map.emplace(std::type_index(typeid(FirstReplicatedType)), parse_json_subgroup_policy(layout[type_idx])); + populate_policy_by_subgroup_type_map(dsa_map, layout, type_idx + 1); +} + +template +DefaultSubgroupAllocator::DefaultSubgroupAllocator(const json& layout) { + std::map> dsa_map; + + populate_policy_by_subgroup_type_map(dsa_map, layout, 0); + + policies = std::move(dsa_map); +} + +template +DefaultSubgroupAllocator::DefaultSubgroupAllocator(const std::string& layout_path) { + json layout; + + std::ifstream json_layout_stream(layout_path.c_str()); + if (!json_layout_stream) { + throw derecho_exception("The json layout file " + layout_path + " not found."); + // TODO: do we need further actions like return something? + } + + json_layout_stream >> layout; + + std::map> dsa_map; + + populate_policy_by_subgroup_type_map(dsa_map, layout, 0); + + policies = std::move(dsa_map); +} + +} /* namespace derecho */ \ No newline at end of file diff --git a/include/derecho/core/subgroup_functions.hpp b/include/derecho/core/subgroup_functions.hpp index fdd80db6..4343c42a 100644 --- a/include/derecho/core/subgroup_functions.hpp +++ b/include/derecho/core/subgroup_functions.hpp @@ -6,13 +6,18 @@ #pragma once +#include #include +#include #include +#include "derecho_exception.hpp" #include "derecho_modes.hpp" #include "detail/derecho_internal.hpp" #include "subgroup_info.hpp" +using json = nlohmann::json; + namespace derecho { /** @@ -352,9 +357,16 @@ class DefaultSubgroupAllocator { : policies(to_copy.policies) {} DefaultSubgroupAllocator(DefaultSubgroupAllocator&&) = default; + template + DefaultSubgroupAllocator(const json& layout); + template + DefaultSubgroupAllocator(const std::string& layout_path); + subgroup_allocation_map_t operator()(const std::vector& subgroup_type_order, const std::unique_ptr& prev_view, View& curr_view) const; }; } // namespace derecho + +#include "detail/subgroup_functions_impl.hpp" diff --git a/src/conf/conf.cpp b/src/conf/conf.cpp index f55aec0c..efd94332 100644 --- a/src/conf/conf.cpp +++ b/src/conf/conf.cpp @@ -52,6 +52,8 @@ struct option Conf::long_options[] = { MAKE_LONG_OPT_ENTRY(CONF_DERECHO_MAX_P2P_REPLY_PAYLOAD_SIZE), MAKE_LONG_OPT_ENTRY(CONF_DERECHO_P2P_WINDOW_SIZE), MAKE_LONG_OPT_ENTRY(CONF_DERECHO_MAX_NODE_ID), + MAKE_LONG_OPT_ENTRY(CONF_DERECHO_JSON_LAYOUT), + MAKE_LONG_OPT_ENTRY(CONF_DERECHO_JSON_LAYOUT_PATH), // [SUBGROUP/] MAKE_LONG_OPT_ENTRY(CONF_SUBGROUP_DEFAULT_RDMC_SEND_ALGORITHM), MAKE_LONG_OPT_ENTRY(CONF_SUBGROUP_DEFAULT_MAX_PAYLOAD_SIZE), From 1e85580a6b417ebf50d44d59ee9ad2943b5b0273 Mon Sep 17 00:00:00 2001 From: Panlichen Date: Thu, 20 May 2021 05:52:39 -0600 Subject: [PATCH 02/26] DSA template constructor wrapper; simple tests --- CMakeLists.txt | 3 + .../core/detail/subgroup_functions_impl.hpp | 19 ++- include/derecho/core/subgroup_functions.hpp | 11 +- src/applications/demos/CMakeLists.txt | 5 + .../demos/simple_replicated_objects_json.cpp | 121 +++++++++++++++++ .../simple_replicated_objects_json_file.cpp | 123 ++++++++++++++++++ src/core/subgroup_functions.cpp | 68 +++++++--- 7 files changed, 324 insertions(+), 26 deletions(-) create mode 100644 src/applications/demos/simple_replicated_objects_json.cpp create mode 100644 src/applications/demos/simple_replicated_objects_json_file.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c41a9d36..9f1f82a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,9 @@ find_package(spdlog 1.3.1 REQUIRED) # find openssl find_package(OpenSSL 1.1.1 REQUIRED) +# json +find_package(nlohmann_json 3.2.0 REQUIRED) + add_subdirectory(src/mutils-serialization) add_subdirectory(src/conf) add_subdirectory(src/utils) diff --git a/include/derecho/core/detail/subgroup_functions_impl.hpp b/include/derecho/core/detail/subgroup_functions_impl.hpp index a178a8a7..00367536 100644 --- a/include/derecho/core/detail/subgroup_functions_impl.hpp +++ b/include/derecho/core/detail/subgroup_functions_impl.hpp @@ -9,6 +9,17 @@ namespace derecho { +/** + * defining key strings used in the layout json file. + */ +#define JSON_CONF_LAYOUT "layout" +#define JSON_CONF_TYPE_ALIAS "type_alias" +#define MIN_NODES_BY_SHARD "min_nodes_by_shard" +#define MAX_NODES_BY_SHARD "max_nodes_by_shard" +#define DELIVERY_MODES_BY_SHARD "delivery_modes_by_shard" +#define DELIVERY_MODE_ORDERED "Ordered" +#define DELIVERY_MODE_RAW "Raw" +#define PROFILES_BY_SHARD "profiles_by_shard" /** * parse_json_subgroup_policy() * @@ -34,16 +45,16 @@ void populate_policy_by_subgroup_type_map( } template -DefaultSubgroupAllocator::DefaultSubgroupAllocator(const json& layout) { +DefaultSubgroupAllocator construct_DSA_with_layout(const json& layout) { std::map> dsa_map; populate_policy_by_subgroup_type_map(dsa_map, layout, 0); - policies = std::move(dsa_map); + return DefaultSubgroupAllocator(dsa_map); } template -DefaultSubgroupAllocator::DefaultSubgroupAllocator(const std::string& layout_path) { +DefaultSubgroupAllocator construct_DSA_with_layout_path(const std::string& layout_path) { json layout; std::ifstream json_layout_stream(layout_path.c_str()); @@ -58,7 +69,7 @@ DefaultSubgroupAllocator::DefaultSubgroupAllocator(const std::string& layout_pat populate_policy_by_subgroup_type_map(dsa_map, layout, 0); - policies = std::move(dsa_map); + return DefaultSubgroupAllocator(dsa_map); } } /* namespace derecho */ \ No newline at end of file diff --git a/include/derecho/core/subgroup_functions.hpp b/include/derecho/core/subgroup_functions.hpp index 4343c42a..0966870a 100644 --- a/include/derecho/core/subgroup_functions.hpp +++ b/include/derecho/core/subgroup_functions.hpp @@ -357,16 +357,17 @@ class DefaultSubgroupAllocator { : policies(to_copy.policies) {} DefaultSubgroupAllocator(DefaultSubgroupAllocator&&) = default; - template - DefaultSubgroupAllocator(const json& layout); - template - DefaultSubgroupAllocator(const std::string& layout_path); - subgroup_allocation_map_t operator()(const std::vector& subgroup_type_order, const std::unique_ptr& prev_view, View& curr_view) const; }; +template +DefaultSubgroupAllocator construct_DSA_with_layout(const json& layout); + +template +DefaultSubgroupAllocator construct_DSA_with_layout_path(const std::string& layout_path); + } // namespace derecho #include "detail/subgroup_functions_impl.hpp" diff --git a/src/applications/demos/CMakeLists.txt b/src/applications/demos/CMakeLists.txt index e41285b6..ad017b5f 100644 --- a/src/applications/demos/CMakeLists.txt +++ b/src/applications/demos/CMakeLists.txt @@ -13,6 +13,11 @@ target_link_libraries(random_messages derecho) add_executable(simple_replicated_objects simple_replicated_objects.cpp) target_link_libraries(simple_replicated_objects derecho) +add_executable(simple_replicated_objects_json simple_replicated_objects_json.cpp) +target_link_libraries(simple_replicated_objects_json derecho) +add_executable(simple_replicated_objects_json_file simple_replicated_objects_json_file.cpp) +target_link_libraries(simple_replicated_objects_json_file derecho) + # overlapping_replicated_objects add_executable(overlapping_replicated_objects overlapping_replicated_objects.cpp) target_link_libraries(overlapping_replicated_objects derecho) diff --git a/src/applications/demos/simple_replicated_objects_json.cpp b/src/applications/demos/simple_replicated_objects_json.cpp new file mode 100644 index 00000000..d1a1b2f6 --- /dev/null +++ b/src/applications/demos/simple_replicated_objects_json.cpp @@ -0,0 +1,121 @@ +/** + * @file simple_replicated_objects.cpp + * + * This test creates two subgroups, one of each type Foo and Bar (defined in sample_objects.h). + * It requires at least 6 nodes to join the group; the first three are part of the Foo subgroup, + * while the next three are part of the Bar subgroup. + * Every node (identified by its node_id) makes some calls to ordered_send in their subgroup; + * some also call p2p_send. By these calls they verify that the state machine operations are + * executed properly. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "sample_objects.hpp" +#include +#include + +using derecho::ExternalCaller; +using derecho::Replicated; +using std::cout; +using std::endl; + +int main(int argc, char** argv) { + // Read configurations from the command line options as well as the default config file + derecho::Conf::initialize(argc, argv); + + //Define subgroup membership using the default subgroup allocator function + //Each Replicated type will have one subgroup and one shard, with three members in the shard + derecho::SubgroupInfo subgroup_function{derecho::construct_DSA_with_layout(derecho::getConfString(CONF_DERECHO_JSON_LAYOUT))}; + //Each replicated type needs a factory; this can be used to supply constructor arguments + //for the subgroup's initial state. These must take a PersistentRegistry* argument, but + //in this case we ignore it because the replicated objects aren't persistent. + auto foo_factory = [](persistent::PersistentRegistry*, derecho::subgroup_id_t) { return std::make_unique(-1); }; + auto bar_factory = [](persistent::PersistentRegistry*, derecho::subgroup_id_t) { return std::make_unique(); }; + + derecho::Group group(derecho::UserMessageCallbacks{}, subgroup_function, {}, + std::vector{}, + foo_factory, bar_factory); + + cout << "Finished constructing/joining Group" << endl; + + //Now have each node send some updates to the Replicated objects + //The code must be different depending on which subgroup this node is in, + //which we can determine based on which membership list it appears in + uint32_t my_id = derecho::getConfUInt32(CONF_DERECHO_LOCAL_ID); + std::vector foo_members = group.get_subgroup_members(0)[0]; + std::vector bar_members = group.get_subgroup_members(0)[0]; + auto find_in_foo_results = std::find(foo_members.begin(), foo_members.end(), my_id); + if(find_in_foo_results != foo_members.end()) { + uint32_t rank_in_foo = std::distance(foo_members.begin(), find_in_foo_results); + Replicated& foo_rpc_handle = group.get_subgroup(); + if(rank_in_foo == 0) { + int new_value = 1; + cout << "Changing Foo's state to " << new_value << endl; + derecho::rpc::QueryResults results = foo_rpc_handle.ordered_send(new_value); + decltype(results)::ReplyMap& replies = results.get(); + cout << "Got a reply map!" << endl; + for(auto& reply_pair : replies) { + cout << "Reply from node " << reply_pair.first << " was " << std::boolalpha << reply_pair.second.get() << endl; + } + cout << "Reading Foo's state just to allow node 1's message to be delivered" << endl; + foo_rpc_handle.ordered_send(); + } else if(rank_in_foo == 1) { + int new_value = 3; + cout << "Changing Foo's state to " << new_value << endl; + derecho::rpc::QueryResults results = foo_rpc_handle.ordered_send(new_value); + decltype(results)::ReplyMap& replies = results.get(); + cout << "Got a reply map!" << endl; + for(auto& reply_pair : replies) { + cout << "Reply from node " << reply_pair.first << " was " << std::boolalpha << reply_pair.second.get() << endl; + } + } else if(rank_in_foo == 2) { + std::this_thread::sleep_for(std::chrono::seconds(1)); + cout << "Reading Foo's state from the group" << endl; + derecho::rpc::QueryResults foo_results = foo_rpc_handle.ordered_send(); + for(auto& reply_pair : foo_results.get()) { + cout << "Node " << reply_pair.first << " says the state is: " << reply_pair.second.get() << endl; + } + } + } else { + uint32_t rank_in_bar = derecho::index_of(bar_members, my_id); + Replicated& bar_rpc_handle = group.get_subgroup(); + if(rank_in_bar == 0) { + cout << "Appending to Bar." << endl; + derecho::rpc::QueryResults void_future = bar_rpc_handle.ordered_send("Write from 0..."); + derecho::rpc::QueryResults::ReplyMap& sent_nodes = void_future.get(); + cout << "Append delivered to nodes: "; + for(const node_id_t& node : sent_nodes) { + cout << node << " "; + } + cout << endl; + } else if(rank_in_bar == 1) { + cout << "Appending to Bar" << endl; + bar_rpc_handle.ordered_send("Write from 1..."); + node_id_t p2p_target = foo_members[2]; + cout << "Reading Foo's state from node " << p2p_target << endl; + ExternalCaller& p2p_foo_handle = group.get_nonmember_subgroup(); + derecho::rpc::QueryResults foo_results = p2p_foo_handle.p2p_send(p2p_target); + int response = foo_results.get().get(p2p_target); + cout << " Response: " << response << endl; + } else if(rank_in_bar == 2) { + bar_rpc_handle.ordered_send("Write from 2..."); + cout << "Printing log from Bar" << endl; + derecho::rpc::QueryResults bar_results = bar_rpc_handle.ordered_send(); + for(auto& reply_pair : bar_results.get()) { + cout << "Node " << reply_pair.first << " says the log is: " << reply_pair.second.get() << endl; + } + cout << "Clearing Bar's log" << endl; + derecho::rpc::QueryResults void_future = bar_rpc_handle.ordered_send(); + } + } + + cout << "Reached end of main(), entering infinite loop so program doesn't exit" << std::endl; + while(true) { + } +} diff --git a/src/applications/demos/simple_replicated_objects_json_file.cpp b/src/applications/demos/simple_replicated_objects_json_file.cpp new file mode 100644 index 00000000..7e876301 --- /dev/null +++ b/src/applications/demos/simple_replicated_objects_json_file.cpp @@ -0,0 +1,123 @@ +/** + * @file simple_replicated_objects.cpp + * + * This test creates two subgroups, one of each type Foo and Bar (defined in sample_objects.h). + * It requires at least 6 nodes to join the group; the first three are part of the Foo subgroup, + * while the next three are part of the Bar subgroup. + * Every node (identified by its node_id) makes some calls to ordered_send in their subgroup; + * some also call p2p_send. By these calls they verify that the state machine operations are + * executed properly. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "sample_objects.hpp" + +using derecho::ExternalCaller; +using derecho::Replicated; +using std::cout; +using std::endl; + +int main(int argc, char** argv) { + // Read configurations from the command line options as well as the default config file + derecho::Conf::initialize(argc, argv); + + //Define subgroup membership using the default subgroup allocator function + //Each Replicated type will have one subgroup and one shard, with three members in the shard + derecho::SubgroupInfo subgroup_function {derecho::construct_DSA_with_layout_path( + derecho::getConfString(CONF_DERECHO_JSON_LAYOUT_PATH) + )}; + //Each replicated type needs a factory; this can be used to supply constructor arguments + //for the subgroup's initial state. These must take a PersistentRegistry* argument, but + //in this case we ignore it because the replicated objects aren't persistent. + auto foo_factory = [](persistent::PersistentRegistry*,derecho::subgroup_id_t) { return std::make_unique(-1); }; + auto bar_factory = [](persistent::PersistentRegistry*,derecho::subgroup_id_t) { return std::make_unique(); }; + + derecho::Group group(derecho::UserMessageCallbacks{}, subgroup_function, {}, + std::vector{}, + foo_factory, bar_factory); + + cout << "Finished constructing/joining Group" << endl; + + //Now have each node send some updates to the Replicated objects + //The code must be different depending on which subgroup this node is in, + //which we can determine based on which membership list it appears in + uint32_t my_id = derecho::getConfUInt32(CONF_DERECHO_LOCAL_ID); + std::vector foo_members = group.get_subgroup_members(0)[0]; + std::vector bar_members = group.get_subgroup_members(0)[0]; + auto find_in_foo_results = std::find(foo_members.begin(), foo_members.end(), my_id); + if(find_in_foo_results != foo_members.end()) { + uint32_t rank_in_foo = std::distance(foo_members.begin(), find_in_foo_results); + Replicated& foo_rpc_handle = group.get_subgroup(); + if(rank_in_foo == 0) { + int new_value = 1; + cout << "Changing Foo's state to " << new_value << endl; + derecho::rpc::QueryResults results = foo_rpc_handle.ordered_send(new_value); + decltype(results)::ReplyMap& replies = results.get(); + cout << "Got a reply map!" << endl; + for(auto& reply_pair : replies) { + cout << "Reply from node " << reply_pair.first << " was " << std::boolalpha << reply_pair.second.get() << endl; + } + cout << "Reading Foo's state just to allow node 1's message to be delivered" << endl; + foo_rpc_handle.ordered_send(); + } else if(rank_in_foo == 1) { + int new_value = 3; + cout << "Changing Foo's state to " << new_value << endl; + derecho::rpc::QueryResults results = foo_rpc_handle.ordered_send(new_value); + decltype(results)::ReplyMap& replies = results.get(); + cout << "Got a reply map!" << endl; + for(auto& reply_pair : replies) { + cout << "Reply from node " << reply_pair.first << " was " << std::boolalpha << reply_pair.second.get() << endl; + } + } else if(rank_in_foo == 2) { + std::this_thread::sleep_for(std::chrono::seconds(1)); + cout << "Reading Foo's state from the group" << endl; + derecho::rpc::QueryResults foo_results = foo_rpc_handle.ordered_send(); + for(auto& reply_pair : foo_results.get()) { + cout << "Node " << reply_pair.first << " says the state is: " << reply_pair.second.get() << endl; + } + } + } else { + uint32_t rank_in_bar = derecho::index_of(bar_members, my_id); + Replicated& bar_rpc_handle = group.get_subgroup(); + if(rank_in_bar == 0) { + cout << "Appending to Bar." << endl; + derecho::rpc::QueryResults void_future = bar_rpc_handle.ordered_send("Write from 0..."); + derecho::rpc::QueryResults::ReplyMap& sent_nodes = void_future.get(); + cout << "Append delivered to nodes: "; + for(const node_id_t& node : sent_nodes) { + cout << node << " "; + } + cout << endl; + } else if(rank_in_bar == 1) { + cout << "Appending to Bar" << endl; + bar_rpc_handle.ordered_send("Write from 1..."); + node_id_t p2p_target = foo_members[2]; + cout << "Reading Foo's state from node " << p2p_target << endl; + ExternalCaller& p2p_foo_handle = group.get_nonmember_subgroup(); + derecho::rpc::QueryResults foo_results = p2p_foo_handle.p2p_send(p2p_target); + int response = foo_results.get().get(p2p_target); + cout << " Response: " << response << endl; + } else if(rank_in_bar == 2) { + bar_rpc_handle.ordered_send("Write from 2..."); + cout << "Printing log from Bar" << endl; + derecho::rpc::QueryResults bar_results = bar_rpc_handle.ordered_send(); + for(auto& reply_pair : bar_results.get()) { + cout << "Node " << reply_pair.first << " says the log is: " << reply_pair.second.get() << endl; + } + cout << "Clearing Bar's log" << endl; + derecho::rpc::QueryResults void_future = bar_rpc_handle.ordered_send(); + } + } + + cout << "Reached end of main(), entering infinite loop so program doesn't exit" << std::endl; + while(true) { + } +} diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 26ebdde7..f9528ec7 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -39,16 +39,16 @@ subgroup_allocation_map_t one_subgroup_entire_view_raw(const std::vector& delivery_mode std::vector max_nodes_by_shard; for(const std::string& profile : profiles_by_shard) { const std::string conf_profile_prefix = "SUBGROUP/" + profile + "/"; - if (!hasCustomizedConfKey(conf_profile_prefix + min_nodes_profile_field)) { - dbg_default_error("{} not found in config.",conf_profile_prefix + min_nodes_profile_field); + if(!hasCustomizedConfKey(conf_profile_prefix + min_nodes_profile_field)) { + dbg_default_error("{} not found in config.", conf_profile_prefix + min_nodes_profile_field); return {}; } - if (!hasCustomizedConfKey(conf_profile_prefix + max_nodes_profile_field)) { - dbg_default_error("{} not found in config.",conf_profile_prefix + max_nodes_profile_field); + if(!hasCustomizedConfKey(conf_profile_prefix + max_nodes_profile_field)) { + dbg_default_error("{} not found in config.", conf_profile_prefix + max_nodes_profile_field); return {}; } min_nodes_by_shard.emplace_back(getConfUInt32(conf_profile_prefix + min_nodes_profile_field)); @@ -130,8 +129,7 @@ void DefaultSubgroupAllocator::compute_standard_memberships( if(!std::holds_alternative(policies.at(subgroup_type))) { continue; } - subgroup_layouts[subgroup_type] = - allocate_standard_subgroup_type(subgroup_type, curr_view, shard_sizes); + subgroup_layouts[subgroup_type] = allocate_standard_subgroup_type(subgroup_type, curr_view, shard_sizes); } } else { for(uint32_t subgroup_type_id = 0; subgroup_type_id < subgroup_type_order.size(); @@ -141,9 +139,8 @@ void DefaultSubgroupAllocator::compute_standard_memberships( if(!std::holds_alternative(policies.at(subgroup_type))) { continue; } - subgroup_layouts[subgroup_type] = - update_standard_subgroup_type(subgroup_type, subgroup_type_id, - prev_view, curr_view, shard_sizes); + subgroup_layouts[subgroup_type] = update_standard_subgroup_type(subgroup_type, subgroup_type_id, + prev_view, curr_view, shard_sizes); } } } @@ -271,7 +268,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ ? sharding_policy.shards_mode : sharding_policy.modes_by_shard[shard_num]; std::string profile = sharding_policy.shards_profile; - if (!sharding_policy.even_shards) { + if(!sharding_policy.even_shards) { profile = sharding_policy.profiles_by_shard[shard_num]; } //Put the SubView at the end of subgroup_allocation[subgroup_num] @@ -408,4 +405,41 @@ subgroup_allocation_map_t DefaultSubgroupAllocator::operator()( return subgroup_allocations; } +SubgroupAllocationPolicy parse_json_subgroup_policy(const json& jconf) { + if(!jconf.is_object() || !jconf[JSON_CONF_LAYOUT].is_array()) { + dbg_default_error("parse_json_subgroup_policy cannot parse {}.", jconf.get()); + throw derecho::derecho_exception("parse_json_subgroup_policy cannot parse" + jconf.get()); + } + + SubgroupAllocationPolicy subgroup_allocation_policy; + subgroup_allocation_policy.identical_subgroups = false; + subgroup_allocation_policy.num_subgroups = jconf[JSON_CONF_LAYOUT].size(); + subgroup_allocation_policy.shard_policy_by_subgroup = std::vector(); + + for(auto subgroup_it : jconf[JSON_CONF_LAYOUT]) { + ShardAllocationPolicy shard_allocation_policy; + size_t num_shards = subgroup_it[MIN_NODES_BY_SHARD].size(); + if(subgroup_it[MAX_NODES_BY_SHARD].size() != num_shards || subgroup_it[DELIVERY_MODES_BY_SHARD].size() != num_shards || subgroup_it[PROFILES_BY_SHARD].size() != num_shards) { + dbg_default_error("parse_json_subgroup_policy: shards does not match in at least one subgroup: {}", + subgroup_it.get()); + throw derecho::derecho_exception("parse_json_subgroup_policy: shards does not match in at least one subgroup:" + subgroup_it.get()); + } + shard_allocation_policy.even_shards = false; + shard_allocation_policy.num_shards = num_shards; + shard_allocation_policy.min_num_nodes_by_shard = subgroup_it[MIN_NODES_BY_SHARD].get>(); + shard_allocation_policy.max_num_nodes_by_shard = subgroup_it[MAX_NODES_BY_SHARD].get>(); + std::vector delivery_modes_by_shard; + for(auto it : subgroup_it[DELIVERY_MODES_BY_SHARD]) { + if(it == DELIVERY_MODE_RAW) { + shard_allocation_policy.modes_by_shard.push_back(Mode::UNORDERED); + } else { + shard_allocation_policy.modes_by_shard.push_back(Mode::ORDERED); + } + } + shard_allocation_policy.profiles_by_shard = subgroup_it[PROFILES_BY_SHARD].get>(); + subgroup_allocation_policy.shard_policy_by_subgroup.emplace_back(std::move(shard_allocation_policy)); + } + return subgroup_allocation_policy; +} + } // namespace derecho From 0356c6510c4799ee441fc10986b1340550d40aac Mon Sep 17 00:00:00 2001 From: Panlichen Date: Thu, 20 May 2021 21:25:09 -0600 Subject: [PATCH 03/26] local test passed --- src/applications/demos/json_cfgs/derecho.cfg | 168 ++++++++++++++++++ .../demos/json_cfgs/derecho.cfg_json_layout | 168 ++++++++++++++++++ .../demos/json_cfgs/derecho.cfg_json_path | 144 +++++++++++++++ src/applications/demos/json_cfgs/layout.json | 25 +++ .../demos/simple_replicated_objects_json.cpp | 12 +- 5 files changed, 516 insertions(+), 1 deletion(-) create mode 100644 src/applications/demos/json_cfgs/derecho.cfg create mode 100644 src/applications/demos/json_cfgs/derecho.cfg_json_layout create mode 100644 src/applications/demos/json_cfgs/derecho.cfg_json_path create mode 100644 src/applications/demos/json_cfgs/layout.json diff --git a/src/applications/demos/json_cfgs/derecho.cfg b/src/applications/demos/json_cfgs/derecho.cfg new file mode 100644 index 00000000..0b09e46e --- /dev/null +++ b/src/applications/demos/json_cfgs/derecho.cfg @@ -0,0 +1,168 @@ +[DERECHO] +# leader ip - the leader's ip address +leader_ip = 192.168.1.1 +# leader gms port - the leader's gms port +leader_gms_port = 23580 +# leader external client port - the leader's +leader_external_port = 32645 +# my local id - each node should have a different id +local_id = 0 +# my local ip address +local_ip = 192.168.1.1 +# derecho gms port +gms_port = 23580 +# derecho rpc port +state_transfer_port = 28366 +# sst tcp port +sst_port = 37683 +# rdmc tcp port +rdmc_port = 31675 +# external port +external_port = 32645 +# this is the frequency of the failure detector thread. +# It is best to leave this to 1 ms for RDMA. If it is too high, +# you run the risk of overflowing the queue of outstanding sends. +heartbeat_ms = 100 +# sst poll completion queue timeout in millisecond +sst_poll_cq_timeout_ms = 100 +# disable partitioning safety +# By disabling this feature, the derecho is allowed to run when active +# members cannot form a majority. Please be aware of the 'split-brain' +# syndrome:https://en.wikipedia.org/wiki/Split-brain and make sure your +# application is fine with it. +# To help the user play with derecho at beginning, we disabled the +# partitioning safety. We suggest to set it to false for serious deployment +disable_partitioning_safety = false + +# maximum payload size for P2P requests +max_p2p_request_payload_size = 10240 +# maximum payload size for P2P replies +max_p2p_reply_payload_size = 10240 +# window size for P2P requests and replies +p2p_window_size = 16 +# json layout string +json_layout = ' +[ + { + "type_alias": "Foo", + "layout": [ + { + "min_nodes_by_shard": [1], + "max_nodes_by_shard": [1], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["VCS"] + } + ] + }, + { + "type_alias": "Bar", + "layout": [ + { + "min_nodes_by_shard": [1], + "max_nodes_by_shard": [1], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["DEFAULT"] + } + ] + } +]' + +# Subgroup configurations +# - The default subgroup settings +[SUBGROUP/DEFAULT] +# maximum payload size +# Any message with size large than this has to be broken +# down to multiple messages. +# Large message consumes memory space because the memory buffers +# have to be pre-allocated. +max_payload_size = 10240 +# maximum reply payload size +# This is for replies generated by ordered sends in the subgroup +max_reply_payload_size = 10240 +# maximum smc (SST's small message multicast) payload size +# If the message size is smaller or equal to this size, +# it will be sent using SST multicast, otherwise it will +# try RDMC if the message size is smaller than max_payload_size. +max_smc_payload_size = 10240 +# block size depends on your max_payload_size. +# It is only relevant if you are ever going to send a message using RDMC. +# In that case, it should be set to the same value as the max_payload_size, +# if the max_payload_size is around 1 MB. For very large messages, the block # size should be a few MBs (1 is fine). +block_size = 1048576 +# message window size +# the length of the message pipeline +window_size = 16 +# the send algorithm for RDMC. Other options are +# chain_send, sequential_send, tree_send +rdmc_send_algorithm = binomial_send +# - SAMPLE for large message settings + +[SUBGROUP/VCS] +max_payload_size = 8192 +max_reply_payload_size = 8192 +max_smc_payload_size = 10240 +# does not matter unless max_payload_size > max_smc_payload_size +block_size = 1048576 +window_size = 50 +rdmc_send_algorithm = binomial_send +num_shards = 1 +min_nodes = 1 +max_nodes = 4 + +# RDMA section contains configurations of the following +# - which RDMA device to use +# - device configurations +[RDMA] +# 1. provider = bgq|gni|mlx|netdir|psm|psm2|rxd|rxm|shm|sockets|udp|usnic|verbs +# possible options(only 'sockets' and 'verbs' providers are tested so far): +# bgq - The Blue Gene/Q Fabric Provider +# gni - The GNI Fabric Provider (Cray XC (TM) systems) +# mlx - The MLX Fabric Provider (UCX library) +# netdir - The Network Direct Fabric Provider (Microsoft Network Direct SPI) +# psm - The PSM Fabric Provider +# psm2 - The PSM2 Fabric Provider +# rxd - The RxD (RDM over DGRAM) Utility Provider +# rxm - The RxM (RDM over MSG) Utility Provider +# shm - The SHM Fabric Provider +# sockets - The Sockets Fabric Provider (TCP) +# udp - The UDP Fabric Provider +# usnic - The usNIC Fabric Provider (Cisco VIC) +# verbs - The Verbs Fabric Provider +provider = verbs + +# 2. domain +# For sockets provider, domain is the NIC name (ifconfig | grep -v -e "^ ") +# For verbs provider, domain is the device name (ibv_devices) +domain = mlx5_1 + +# 3. tx_depth +# tx_depth applies to hints->tx_attr->size, where hint is a struct fi_info object. +# see https://ofiwg.github.io/libfabric/master/man/fi_getinfo.3.html +tx_depth = 256 + +# 4. rx_depth: +# rx_depth applies to hints->rx_attr->size, where hint is a struct fi_info object. +# see https://ofiwg.github.io/libfabric/master/man/fi_getinfo.3.html +rx_depth = 256 + +# Persistent configurations +[PERS] +# persistent directory for file system-based logfile. +file_path = .plog +ramdisk_path = /dev/shm/volatile_t +# Reset persistent data +# CAUTION: "reset = true" removes existing persisted data!!! +reset = false +# Max number of the log entries in each persistent, default to 1048576 +max_log_entry = 1048576 +# Max data size in bytes for each persistent, default to 512GB +max_data_size = 549755813888 + +# Logger configurations +[LOGGER] +# default log name +default_log_name = derecho_debug +# default log level +# Available options: +# trace,debug,info,warn,error,critical,off +default_log_level = trace diff --git a/src/applications/demos/json_cfgs/derecho.cfg_json_layout b/src/applications/demos/json_cfgs/derecho.cfg_json_layout new file mode 100644 index 00000000..c01b409e --- /dev/null +++ b/src/applications/demos/json_cfgs/derecho.cfg_json_layout @@ -0,0 +1,168 @@ +[DERECHO] +# leader ip - the leader's ip address +leader_ip = 192.168.1.1 +# leader gms port - the leader's gms port +leader_gms_port = 23580 +# leader external client port - the leader's +leader_external_port = 32645 +# my local id - each node should have a different id +local_id = 0 +# my local ip address +local_ip = 192.168.1.1 +# derecho gms port +gms_port = 23580 +# derecho rpc port +state_transfer_port = 28366 +# sst tcp port +sst_port = 37683 +# rdmc tcp port +rdmc_port = 31675 +# external port +external_port = 32645 +# this is the frequency of the failure detector thread. +# It is best to leave this to 1 ms for RDMA. If it is too high, +# you run the risk of overflowing the queue of outstanding sends. +heartbeat_ms = 100 +# sst poll completion queue timeout in millisecond +sst_poll_cq_timeout_ms = 100 +# disable partitioning safety +# By disabling this feature, the derecho is allowed to run when active +# members cannot form a majority. Please be aware of the 'split-brain' +# syndrome:https://en.wikipedia.org/wiki/Split-brain and make sure your +# application is fine with it. +# To help the user play with derecho at beginning, we disabled the +# partitioning safety. We suggest to set it to false for serious deployment +disable_partitioning_safety = false + +# maximum payload size for P2P requests +max_p2p_request_payload_size = 10240 +# maximum payload size for P2P replies +max_p2p_reply_payload_size = 10240 +# window size for P2P requests and replies +p2p_window_size = 16 +# json layout string +json_layout = ' +[ + { + "type_alias": "Foo", + "layout": [ + { + "min_nodes_by_shard": [3], + "max_nodes_by_shard": [3], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["VCS"] + } + ] + }, + { + "type_alias": "Bar", + "layout": [ + { + "min_nodes_by_shard": [3], + "max_nodes_by_shard": [3], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["DEFAULT"] + } + ] + } +]' + +# Subgroup configurations +# - The default subgroup settings +[SUBGROUP/DEFAULT] +# maximum payload size +# Any message with size large than this has to be broken +# down to multiple messages. +# Large message consumes memory space because the memory buffers +# have to be pre-allocated. +max_payload_size = 10240 +# maximum reply payload size +# This is for replies generated by ordered sends in the subgroup +max_reply_payload_size = 10240 +# maximum smc (SST's small message multicast) payload size +# If the message size is smaller or equal to this size, +# it will be sent using SST multicast, otherwise it will +# try RDMC if the message size is smaller than max_payload_size. +max_smc_payload_size = 10240 +# block size depends on your max_payload_size. +# It is only relevant if you are ever going to send a message using RDMC. +# In that case, it should be set to the same value as the max_payload_size, +# if the max_payload_size is around 1 MB. For very large messages, the block # size should be a few MBs (1 is fine). +block_size = 1048576 +# message window size +# the length of the message pipeline +window_size = 16 +# the send algorithm for RDMC. Other options are +# chain_send, sequential_send, tree_send +rdmc_send_algorithm = binomial_send +# - SAMPLE for large message settings + +[SUBGROUP/VCS] +max_payload_size = 8192 +max_reply_payload_size = 8192 +max_smc_payload_size = 10240 +# does not matter unless max_payload_size > max_smc_payload_size +block_size = 1048576 +window_size = 50 +rdmc_send_algorithm = binomial_send +num_shards = 1 +min_nodes = 1 +max_nodes = 4 + +# RDMA section contains configurations of the following +# - which RDMA device to use +# - device configurations +[RDMA] +# 1. provider = bgq|gni|mlx|netdir|psm|psm2|rxd|rxm|shm|sockets|udp|usnic|verbs +# possible options(only 'sockets' and 'verbs' providers are tested so far): +# bgq - The Blue Gene/Q Fabric Provider +# gni - The GNI Fabric Provider (Cray XC (TM) systems) +# mlx - The MLX Fabric Provider (UCX library) +# netdir - The Network Direct Fabric Provider (Microsoft Network Direct SPI) +# psm - The PSM Fabric Provider +# psm2 - The PSM2 Fabric Provider +# rxd - The RxD (RDM over DGRAM) Utility Provider +# rxm - The RxM (RDM over MSG) Utility Provider +# shm - The SHM Fabric Provider +# sockets - The Sockets Fabric Provider (TCP) +# udp - The UDP Fabric Provider +# usnic - The usNIC Fabric Provider (Cisco VIC) +# verbs - The Verbs Fabric Provider +provider = verbs + +# 2. domain +# For sockets provider, domain is the NIC name (ifconfig | grep -v -e "^ ") +# For verbs provider, domain is the device name (ibv_devices) +domain = mlx5_1 + +# 3. tx_depth +# tx_depth applies to hints->tx_attr->size, where hint is a struct fi_info object. +# see https://ofiwg.github.io/libfabric/master/man/fi_getinfo.3.html +tx_depth = 256 + +# 4. rx_depth: +# rx_depth applies to hints->rx_attr->size, where hint is a struct fi_info object. +# see https://ofiwg.github.io/libfabric/master/man/fi_getinfo.3.html +rx_depth = 256 + +# Persistent configurations +[PERS] +# persistent directory for file system-based logfile. +file_path = .plog +ramdisk_path = /dev/shm/volatile_t +# Reset persistent data +# CAUTION: "reset = true" removes existing persisted data!!! +reset = false +# Max number of the log entries in each persistent, default to 1048576 +max_log_entry = 1048576 +# Max data size in bytes for each persistent, default to 512GB +max_data_size = 549755813888 + +# Logger configurations +[LOGGER] +# default log name +default_log_name = derecho_debug +# default log level +# Available options: +# trace,debug,info,warn,error,critical,off +default_log_level = trace diff --git a/src/applications/demos/json_cfgs/derecho.cfg_json_path b/src/applications/demos/json_cfgs/derecho.cfg_json_path new file mode 100644 index 00000000..60119650 --- /dev/null +++ b/src/applications/demos/json_cfgs/derecho.cfg_json_path @@ -0,0 +1,144 @@ +[DERECHO] +# leader ip - the leader's ip address +leader_ip = 192.168.1.1 +# leader gms port - the leader's gms port +leader_gms_port = 23580 +# leader external client port - the leader's +leader_external_port = 32645 +# my local id - each node should have a different id +local_id = 0 +# my local ip address +local_ip = 192.168.1.1 +# derecho gms port +gms_port = 23580 +# derecho rpc port +state_transfer_port = 28366 +# sst tcp port +sst_port = 37683 +# rdmc tcp port +rdmc_port = 31675 +# external port +external_port = 32645 +# this is the frequency of the failure detector thread. +# It is best to leave this to 1 ms for RDMA. If it is too high, +# you run the risk of overflowing the queue of outstanding sends. +heartbeat_ms = 100 +# sst poll completion queue timeout in millisecond +sst_poll_cq_timeout_ms = 100 +# disable partitioning safety +# By disabling this feature, the derecho is allowed to run when active +# members cannot form a majority. Please be aware of the 'split-brain' +# syndrome:https://en.wikipedia.org/wiki/Split-brain and make sure your +# application is fine with it. +# To help the user play with derecho at beginning, we disabled the +# partitioning safety. We suggest to set it to false for serious deployment +disable_partitioning_safety = false + +# maximum payload size for P2P requests +max_p2p_request_payload_size = 10240 +# maximum payload size for P2P replies +max_p2p_reply_payload_size = 10240 +# window size for P2P requests and replies +p2p_window_size = 16 +# json layout string, absolute path is more safe, the relative path needs to be calculated based on the location of the executable file +json_layout_path = '/users/Poanpan/mydata/temp/layout.json' + +# Subgroup configurations +# - The default subgroup settings +[SUBGROUP/DEFAULT] +# maximum payload size +# Any message with size large than this has to be broken +# down to multiple messages. +# Large message consumes memory space because the memory buffers +# have to be pre-allocated. +max_payload_size = 10240 +# maximum reply payload size +# This is for replies generated by ordered sends in the subgroup +max_reply_payload_size = 10240 +# maximum smc (SST's small message multicast) payload size +# If the message size is smaller or equal to this size, +# it will be sent using SST multicast, otherwise it will +# try RDMC if the message size is smaller than max_payload_size. +max_smc_payload_size = 10240 +# block size depends on your max_payload_size. +# It is only relevant if you are ever going to send a message using RDMC. +# In that case, it should be set to the same value as the max_payload_size, +# if the max_payload_size is around 1 MB. For very large messages, the block # size should be a few MBs (1 is fine). +block_size = 1048576 +# message window size +# the length of the message pipeline +window_size = 16 +# the send algorithm for RDMC. Other options are +# chain_send, sequential_send, tree_send +rdmc_send_algorithm = binomial_send +# - SAMPLE for large message settings + +[SUBGROUP/VCS] +max_payload_size = 8192 +max_reply_payload_size = 8192 +max_smc_payload_size = 10240 +# does not matter unless max_payload_size > max_smc_payload_size +block_size = 1048576 +window_size = 50 +rdmc_send_algorithm = binomial_send +num_shards = 1 +min_nodes = 1 +max_nodes = 4 + +# RDMA section contains configurations of the following +# - which RDMA device to use +# - device configurations +[RDMA] +# 1. provider = bgq|gni|mlx|netdir|psm|psm2|rxd|rxm|shm|sockets|udp|usnic|verbs +# possible options(only 'sockets' and 'verbs' providers are tested so far): +# bgq - The Blue Gene/Q Fabric Provider +# gni - The GNI Fabric Provider (Cray XC (TM) systems) +# mlx - The MLX Fabric Provider (UCX library) +# netdir - The Network Direct Fabric Provider (Microsoft Network Direct SPI) +# psm - The PSM Fabric Provider +# psm2 - The PSM2 Fabric Provider +# rxd - The RxD (RDM over DGRAM) Utility Provider +# rxm - The RxM (RDM over MSG) Utility Provider +# shm - The SHM Fabric Provider +# sockets - The Sockets Fabric Provider (TCP) +# udp - The UDP Fabric Provider +# usnic - The usNIC Fabric Provider (Cisco VIC) +# verbs - The Verbs Fabric Provider +provider = verbs + +# 2. domain +# For sockets provider, domain is the NIC name (ifconfig | grep -v -e "^ ") +# For verbs provider, domain is the device name (ibv_devices) +domain = mlx5_1 + +# 3. tx_depth +# tx_depth applies to hints->tx_attr->size, where hint is a struct fi_info object. +# see https://ofiwg.github.io/libfabric/master/man/fi_getinfo.3.html +tx_depth = 256 + +# 4. rx_depth: +# rx_depth applies to hints->rx_attr->size, where hint is a struct fi_info object. +# see https://ofiwg.github.io/libfabric/master/man/fi_getinfo.3.html +rx_depth = 256 + +# Persistent configurations +[PERS] +# persistent directory for file system-based logfile. +file_path = .plog +ramdisk_path = /dev/shm/volatile_t +# Reset persistent data +# CAUTION: "reset = true" removes existing persisted data!!! +reset = false +# Max number of the log entries in each persistent, default to 1048576 +max_log_entry = 1048576 +# Max data size in bytes for each persistent, default to 512GB +max_data_size = 549755813888 + +# Logger configurations +[LOGGER] +# default log name +default_log_name = derecho_debug +# default log level +# Available options: +# trace,debug,info,warn,error,critical,off +default_log_level = trace diff --git a/src/applications/demos/json_cfgs/layout.json b/src/applications/demos/json_cfgs/layout.json new file mode 100644 index 00000000..659c1af7 --- /dev/null +++ b/src/applications/demos/json_cfgs/layout.json @@ -0,0 +1,25 @@ + +[ + { + "type_alias": "Foo", + "layout": [ + { + "min_nodes_by_shard": [1], + "max_nodes_by_shard": [1], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["VCS"] + } + ] + }, + { + "type_alias": "Bar", + "layout": [ + { + "min_nodes_by_shard": [1], + "max_nodes_by_shard": [1], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["DEFAULT"] + } + ] + } +] diff --git a/src/applications/demos/simple_replicated_objects_json.cpp b/src/applications/demos/simple_replicated_objects_json.cpp index d1a1b2f6..f88e2092 100644 --- a/src/applications/demos/simple_replicated_objects_json.cpp +++ b/src/applications/demos/simple_replicated_objects_json.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -24,6 +25,7 @@ using derecho::ExternalCaller; using derecho::Replicated; using std::cout; using std::endl; +using json = nlohmann::json; int main(int argc, char** argv) { // Read configurations from the command line options as well as the default config file @@ -31,7 +33,15 @@ int main(int argc, char** argv) { //Define subgroup membership using the default subgroup allocator function //Each Replicated type will have one subgroup and one shard, with three members in the shard - derecho::SubgroupInfo subgroup_function{derecho::construct_DSA_with_layout(derecho::getConfString(CONF_DERECHO_JSON_LAYOUT))}; + + json json_layout = json::parse(derecho::getConfString(CONF_DERECHO_JSON_LAYOUT)); + cout << "json_layout parsed\n"; + auto dsa_object = derecho::construct_DSA_with_layout(json_layout); + cout << "dsa_object constructed\n"; + derecho::SubgroupInfo subgroup_function{dsa_object}; + + // derecho::SubgroupInfo subgroup_function{derecho::construct_DSA_with_layout(json::parse(derecho::getConfString(CONF_DERECHO_JSON_LAYOUT)))}; + //Each replicated type needs a factory; this can be used to supply constructor arguments //for the subgroup's initial state. These must take a PersistentRegistry* argument, but //in this case we ignore it because the replicated objects aren't persistent. From da12dbb1e0dc9939d1a1dc087d05b42d7448f02e Mon Sep 17 00:00:00 2001 From: Panlichen Date: Fri, 21 May 2021 01:03:12 -0600 Subject: [PATCH 04/26] function parse_json_subgroup_policy conflict with cascade --- .../core/detail/subgroup_functions_impl.hpp | 18 +++++++++--------- src/core/subgroup_functions.cpp | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/derecho/core/detail/subgroup_functions_impl.hpp b/include/derecho/core/detail/subgroup_functions_impl.hpp index 00367536..c835ae70 100644 --- a/include/derecho/core/detail/subgroup_functions_impl.hpp +++ b/include/derecho/core/detail/subgroup_functions_impl.hpp @@ -21,34 +21,34 @@ namespace derecho { #define DELIVERY_MODE_RAW "Raw" #define PROFILES_BY_SHARD "profiles_by_shard" /** - * parse_json_subgroup_policy() + * derecho_parse_json_subgroup_policy() * * Generate a single-type subgroup allocation policy from json string * @param json_config subgroup configuration represented in json format. * @return SubgroupAllocationPolicy */ -SubgroupAllocationPolicy parse_json_subgroup_policy(const json&); +SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json&); template -void populate_policy_by_subgroup_type_map( +void derecho_populate_policy_by_subgroup_type_map( std::map>& dsa_map, const json& layout, int type_idx) { - dsa_map.emplace(std::type_index(typeid(ReplicatedType)), parse_json_subgroup_policy(layout[type_idx])); + dsa_map.emplace(std::type_index(typeid(ReplicatedType)), derecho_parse_json_subgroup_policy(layout[type_idx])); } template -void populate_policy_by_subgroup_type_map( +void derecho_populate_policy_by_subgroup_type_map( std::map>& dsa_map, const json& layout, int type_idx) { - dsa_map.emplace(std::type_index(typeid(FirstReplicatedType)), parse_json_subgroup_policy(layout[type_idx])); - populate_policy_by_subgroup_type_map(dsa_map, layout, type_idx + 1); + dsa_map.emplace(std::type_index(typeid(FirstReplicatedType)), derecho_parse_json_subgroup_policy(layout[type_idx])); + derecho_populate_policy_by_subgroup_type_map(dsa_map, layout, type_idx + 1); } template DefaultSubgroupAllocator construct_DSA_with_layout(const json& layout) { std::map> dsa_map; - populate_policy_by_subgroup_type_map(dsa_map, layout, 0); + derecho_populate_policy_by_subgroup_type_map(dsa_map, layout, 0); return DefaultSubgroupAllocator(dsa_map); } @@ -67,7 +67,7 @@ DefaultSubgroupAllocator construct_DSA_with_layout_path(const std::string& layou std::map> dsa_map; - populate_policy_by_subgroup_type_map(dsa_map, layout, 0); + derecho_populate_policy_by_subgroup_type_map(dsa_map, layout, 0); return DefaultSubgroupAllocator(dsa_map); } diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index f9528ec7..600a2e05 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -405,7 +405,7 @@ subgroup_allocation_map_t DefaultSubgroupAllocator::operator()( return subgroup_allocations; } -SubgroupAllocationPolicy parse_json_subgroup_policy(const json& jconf) { +SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf) { if(!jconf.is_object() || !jconf[JSON_CONF_LAYOUT].is_array()) { dbg_default_error("parse_json_subgroup_policy cannot parse {}.", jconf.get()); throw derecho::derecho_exception("parse_json_subgroup_policy cannot parse" + jconf.get()); From 2a4786e06270f702458217c1058c8503859eba6b Mon Sep 17 00:00:00 2001 From: Panlichen Date: Mon, 24 May 2021 01:36:07 -0600 Subject: [PATCH 05/26] parse node id pool per shards --- .../core/detail/subgroup_functions_impl.hpp | 19 ++++++------ include/derecho/core/subgroup_functions.hpp | 6 ++++ src/applications/demos/json_cfgs/derecho.cfg | 30 ++----------------- .../demos/json_cfgs/layout-with-id-pool.json | 27 +++++++++++++++++ src/core/subgroup_functions.cpp | 7 ++++- 5 files changed, 52 insertions(+), 37 deletions(-) create mode 100644 src/applications/demos/json_cfgs/layout-with-id-pool.json diff --git a/include/derecho/core/detail/subgroup_functions_impl.hpp b/include/derecho/core/detail/subgroup_functions_impl.hpp index c835ae70..d515f4ce 100644 --- a/include/derecho/core/detail/subgroup_functions_impl.hpp +++ b/include/derecho/core/detail/subgroup_functions_impl.hpp @@ -4,22 +4,23 @@ * @date May 20, 2021 */ -#include #include "../subgroup_functions.hpp" +#include namespace derecho { /** * defining key strings used in the layout json file. */ -#define JSON_CONF_LAYOUT "layout" -#define JSON_CONF_TYPE_ALIAS "type_alias" -#define MIN_NODES_BY_SHARD "min_nodes_by_shard" -#define MAX_NODES_BY_SHARD "max_nodes_by_shard" +#define JSON_CONF_LAYOUT "layout" +#define JSON_CONF_TYPE_ALIAS "type_alias" +#define MIN_NODES_BY_SHARD "min_nodes_by_shard" +#define MAX_NODES_BY_SHARD "max_nodes_by_shard" +#define RESERVED_NODE_ID_BY_SHRAD "reserved_node_id_by_shard" #define DELIVERY_MODES_BY_SHARD "delivery_modes_by_shard" -#define DELIVERY_MODE_ORDERED "Ordered" -#define DELIVERY_MODE_RAW "Raw" -#define PROFILES_BY_SHARD "profiles_by_shard" +#define DELIVERY_MODE_ORDERED "Ordered" +#define DELIVERY_MODE_RAW "Raw" +#define PROFILES_BY_SHARD "profiles_by_shard" /** * derecho_parse_json_subgroup_policy() * @@ -58,7 +59,7 @@ DefaultSubgroupAllocator construct_DSA_with_layout_path(const std::string& layou json layout; std::ifstream json_layout_stream(layout_path.c_str()); - if (!json_layout_stream) { + if(!json_layout_stream) { throw derecho_exception("The json layout file " + layout_path + " not found."); // TODO: do we need further actions like return something? } diff --git a/include/derecho/core/subgroup_functions.hpp b/include/derecho/core/subgroup_functions.hpp index 0966870a..7e2dded5 100644 --- a/include/derecho/core/subgroup_functions.hpp +++ b/include/derecho/core/subgroup_functions.hpp @@ -98,6 +98,12 @@ struct ShardAllocationPolicy { * indicating which profile it should use. (Ignored if even_shards is * true). */ std::vector profiles_by_shard; + /** Only used when even_shards is false. + * For each shard, this stores a list of node ids reserved for it. When a + * new node comes with id inside the list, it will be added into the + * dedicated shard directly. Overlapping among shards can be realized by + * this mechanism. */ + std::vector> reserved_node_id_by_shard; }; /** diff --git a/src/applications/demos/json_cfgs/derecho.cfg b/src/applications/demos/json_cfgs/derecho.cfg index 0b09e46e..72ad9f45 100644 --- a/src/applications/demos/json_cfgs/derecho.cfg +++ b/src/applications/demos/json_cfgs/derecho.cfg @@ -6,7 +6,7 @@ leader_gms_port = 23580 # leader external client port - the leader's leader_external_port = 32645 # my local id - each node should have a different id -local_id = 0 +local_id = 2 # my local ip address local_ip = 192.168.1.1 # derecho gms port @@ -40,32 +40,8 @@ max_p2p_request_payload_size = 10240 max_p2p_reply_payload_size = 10240 # window size for P2P requests and replies p2p_window_size = 16 -# json layout string -json_layout = ' -[ - { - "type_alias": "Foo", - "layout": [ - { - "min_nodes_by_shard": [1], - "max_nodes_by_shard": [1], - "delivery_modes_by_shard": ["Ordered"], - "profiles_by_shard": ["VCS"] - } - ] - }, - { - "type_alias": "Bar", - "layout": [ - { - "min_nodes_by_shard": [1], - "max_nodes_by_shard": [1], - "delivery_modes_by_shard": ["Ordered"], - "profiles_by_shard": ["DEFAULT"] - } - ] - } -]' +# json layout string, absolute path is more safe, the relative path needs to be calculated based on the location of the executable file +json_layout_path = '/users/Poanpan/mydata/temp/layout-with-id-pool.json' # Subgroup configurations # - The default subgroup settings diff --git a/src/applications/demos/json_cfgs/layout-with-id-pool.json b/src/applications/demos/json_cfgs/layout-with-id-pool.json new file mode 100644 index 00000000..0baa256b --- /dev/null +++ b/src/applications/demos/json_cfgs/layout-with-id-pool.json @@ -0,0 +1,27 @@ + +[ + { + "type_alias": "Foo", + "layout": [ + { + "min_nodes_by_shard": [2], + "max_nodes_by_shard": [2], + "reserved_node_id_by_shard":[[1, 2, 3]], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["VCS"] + } + ] + }, + { + "type_alias": "Bar", + "layout": [ + { + "min_nodes_by_shard": [2], + "max_nodes_by_shard": [2], + "reserved_node_id_by_shard":[[2, 3, 4]], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["DEFAULT"] + } + ] + } +] diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 600a2e05..04ad966d 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -419,7 +419,7 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf) { for(auto subgroup_it : jconf[JSON_CONF_LAYOUT]) { ShardAllocationPolicy shard_allocation_policy; size_t num_shards = subgroup_it[MIN_NODES_BY_SHARD].size(); - if(subgroup_it[MAX_NODES_BY_SHARD].size() != num_shards || subgroup_it[DELIVERY_MODES_BY_SHARD].size() != num_shards || subgroup_it[PROFILES_BY_SHARD].size() != num_shards) { + if(subgroup_it[MAX_NODES_BY_SHARD].size() != num_shards || subgroup_it[DELIVERY_MODES_BY_SHARD].size() != num_shards || subgroup_it[PROFILES_BY_SHARD].size() != num_shards || subgroup_it[RESERVED_NODE_ID_BY_SHRAD].size() != num_shards) { dbg_default_error("parse_json_subgroup_policy: shards does not match in at least one subgroup: {}", subgroup_it.get()); throw derecho::derecho_exception("parse_json_subgroup_policy: shards does not match in at least one subgroup:" + subgroup_it.get()); @@ -438,6 +438,11 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf) { } shard_allocation_policy.profiles_by_shard = subgroup_it[PROFILES_BY_SHARD].get>(); subgroup_allocation_policy.shard_policy_by_subgroup.emplace_back(std::move(shard_allocation_policy)); + + // "reserved_node_id_by_shard" is not a mandatory field + if(subgroup_it.contains(RESERVED_NODE_ID_BY_SHRAD)) { + shard_allocation_policy.reserved_node_id_by_shard = subgroup_it[RESERVED_NODE_ID_BY_SHRAD].get>>(); + } } return subgroup_allocation_policy; } From 44a5783841d3b30622f1884dd3626e54adea784a Mon Sep 17 00:00:00 2001 From: Panlichen Date: Tue, 25 May 2021 04:10:43 -0600 Subject: [PATCH 06/26] node id pool w/o prev_view --- .../core/detail/subgroup_functions_impl.hpp | 24 +++-- include/derecho/core/subgroup_functions.hpp | 22 ++++- src/core/subgroup_functions.cpp | 93 ++++++++++++++++++- 3 files changed, 124 insertions(+), 15 deletions(-) diff --git a/include/derecho/core/detail/subgroup_functions_impl.hpp b/include/derecho/core/detail/subgroup_functions_impl.hpp index d515f4ce..3df37089 100644 --- a/include/derecho/core/detail/subgroup_functions_impl.hpp +++ b/include/derecho/core/detail/subgroup_functions_impl.hpp @@ -28,30 +28,35 @@ namespace derecho { * @param json_config subgroup configuration represented in json format. * @return SubgroupAllocationPolicy */ -SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json&); +SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json&, std::set&); template void derecho_populate_policy_by_subgroup_type_map( std::map>& dsa_map, + std::set& all_reserved_node_ids, const json& layout, int type_idx) { - dsa_map.emplace(std::type_index(typeid(ReplicatedType)), derecho_parse_json_subgroup_policy(layout[type_idx])); + dsa_map.emplace(std::type_index(typeid(ReplicatedType)), derecho_parse_json_subgroup_policy(layout[type_idx], all_reserved_node_ids)); } template void derecho_populate_policy_by_subgroup_type_map( std::map>& dsa_map, + std::set& all_reserved_node_ids, const json& layout, int type_idx) { - dsa_map.emplace(std::type_index(typeid(FirstReplicatedType)), derecho_parse_json_subgroup_policy(layout[type_idx])); - derecho_populate_policy_by_subgroup_type_map(dsa_map, layout, type_idx + 1); + dsa_map.emplace(std::type_index(typeid(FirstReplicatedType)), derecho_parse_json_subgroup_policy(layout[type_idx], all_reserved_node_ids)); + derecho_populate_policy_by_subgroup_type_map(dsa_map, all_reserved_node_ids, layout, type_idx + 1); } template DefaultSubgroupAllocator construct_DSA_with_layout(const json& layout) { std::map> dsa_map; - derecho_populate_policy_by_subgroup_type_map(dsa_map, layout, 0); + std::set all_reserved_node_ids; - return DefaultSubgroupAllocator(dsa_map); + derecho_populate_policy_by_subgroup_type_map( + dsa_map, all_reserved_node_ids, layout, 0); + + return DefaultSubgroupAllocator(dsa_map, all_reserved_node_ids); } template @@ -68,9 +73,12 @@ DefaultSubgroupAllocator construct_DSA_with_layout_path(const std::string& layou std::map> dsa_map; - derecho_populate_policy_by_subgroup_type_map(dsa_map, layout, 0); + std::set all_reserved_node_ids; + + derecho_populate_policy_by_subgroup_type_map( + dsa_map, all_reserved_node_ids, layout, 0); - return DefaultSubgroupAllocator(dsa_map); + return DefaultSubgroupAllocator(dsa_map, all_reserved_node_ids); } } /* namespace derecho */ \ No newline at end of file diff --git a/include/derecho/core/subgroup_functions.hpp b/include/derecho/core/subgroup_functions.hpp index 7e2dded5..446ce6d5 100644 --- a/include/derecho/core/subgroup_functions.hpp +++ b/include/derecho/core/subgroup_functions.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "derecho_exception.hpp" @@ -102,8 +103,10 @@ struct ShardAllocationPolicy { * For each shard, this stores a list of node ids reserved for it. When a * new node comes with id inside the list, it will be added into the * dedicated shard directly. Overlapping among shards can be realized by - * this mechanism. */ - std::vector> reserved_node_id_by_shard; + * this mechanism. + * Need to use std::set instead of std::unordered_set to makesure set functions + * run correctly.*/ + std::vector> reserved_node_id_by_shard; }; /** @@ -268,6 +271,11 @@ class DefaultSubgroupAllocator { */ const std::map> policies; + /** + * The union set of reserved_node_ids from all shards. + */ + const std::set all_reserved_node_ids; + /** * Determines how many members each shard can have in the current view, based * on each shard's policy (minimum and maximum number of nodes) and the size @@ -359,8 +367,16 @@ class DefaultSubgroupAllocator { std::variant>& policies_by_subgroup_type) : policies(policies_by_subgroup_type) {} + + DefaultSubgroupAllocator(const std::map>& + policies_by_subgroup_type, + const std::set& all_reserved_node_ids) + : policies(policies_by_subgroup_type), + all_reserved_node_ids(all_reserved_node_ids) {} DefaultSubgroupAllocator(const DefaultSubgroupAllocator& to_copy) - : policies(to_copy.policies) {} + : policies(to_copy.policies), + all_reserved_node_ids(to_copy.all_reserved_node_ids) {} DefaultSubgroupAllocator(DefaultSubgroupAllocator&&) = default; subgroup_allocation_map_t operator()(const std::vector& subgroup_type_order, diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 04ad966d..313faefb 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -152,6 +152,18 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( const View& curr_view) const { //First, determine how many nodes we will need for a minimal allocation int nodes_needed = 0; + + // If there are reserved node_ids, and some appear in curr_view, calculate the + // intersection and count them once, in case that we want shard overlapping. + std::set all_active_reserved_node_id_set(curr_view.members.begin(), curr_view.members.end()); + if(all_reserved_node_ids.size() > 0) { + std::set_intersection( + all_active_reserved_node_id_set.begin(), all_active_reserved_node_id_set.end(), + all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + std::inserter(all_active_reserved_node_id_set, all_active_reserved_node_id_set.begin())); + nodes_needed = all_active_reserved_node_id_set.size(); + } + std::map>> shard_sizes; for(uint32_t subgroup_type_id = 0; subgroup_type_id < subgroup_type_order.size(); ++subgroup_type_id) { const std::type_index& subgroup_type = subgroup_type_order[subgroup_type_id]; @@ -192,6 +204,20 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( } shard_sizes[subgroup_type][subgroup_num][shard_num] = min_shard_size; nodes_needed += min_shard_size; + + // If this shard reserve existing nodes, subtract the number of these nodes from nodes_needed + std::set active_reserved_node_id_set( + sharding_policy.reserved_node_id_by_shard[shard_num].begin(), + sharding_policy.reserved_node_id_by_shard[shard_num].end()); + + std::set_intersection( + active_reserved_node_id_set.begin(), + active_reserved_node_id_set.end(), + all_active_reserved_node_id_set.begin(), + all_active_reserved_node_id_set.end(), + std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.begin())); + + nodes_needed -= active_reserved_node_id_set.size(); } } } @@ -248,14 +274,54 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ const std::map>>& shard_sizes) const { //The size of shard_sizes[subgroup_type] is the number of subgroups of this type subgroup_shard_layout_t subgroup_allocation(shard_sizes.at(subgroup_type).size()); + + /** This function is invoked when we have no prev_view, and thus next_unassigned_rank is 0. + * If we have reserved node_ids, we need to rearrange node_ids in curr_view.members into two "parts": + * the first part holds current active reserved node_ids, + * while the second part holds normal node_ids. + * We then rearrange next_unassigned_rank to be the length of the first part, for nodes in the first part + * are actually assigned, and sometimes more than once if we want to overlap shards. + */ + // We cannot modify curr_view.members inplace, which will corrupt curr_view.my_rank, curr_view.node_id_to_rank, etc. Besides, View::members is const. + std::vector curr_members; + std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); + const SubgroupAllocationPolicy& subgroup_type_policy + = std::get(policies.at(subgroup_type)); + if(all_reserved_node_ids.size() > 0) { + std::set_intersection( + curr_member_set.begin(), curr_member_set.end(), + all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + std::inserter(curr_members, curr_members.end())); + curr_view.next_unassigned_rank = curr_members.size(); + std::set_difference( + curr_member_set.begin(), curr_member_set.end(), + all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + std::inserter(curr_members, curr_members.end())); + } else { + curr_members = curr_view.members; + } + for(uint32_t subgroup_num = 0; subgroup_num < subgroup_allocation.size(); ++subgroup_num) { //The size of shard_sizes[subgroup_type][subgroup_num] is the number of shards for(uint32_t shard_num = 0; shard_num < shard_sizes.at(subgroup_type)[subgroup_num].size(); ++shard_num) { uint32_t shard_size = shard_sizes.at(subgroup_type)[subgroup_num][shard_num]; + std::vector desired_nodes; + + // Allocate active reserved nodes first. + const std::set reserved_node_id_set = subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num]; + if(reserved_node_id_set.size() > 0) { + std::set_intersection( + reserved_node_id_set.begin(), reserved_node_id_set.end(), + curr_member_set.begin(), curr_member_set.end(), + std::inserter(desired_nodes, desired_nodes.end())); + shard_size -= desired_nodes.size(); + } + //Grab the next shard_size nodes - std::vector desired_nodes(&curr_view.members[curr_view.next_unassigned_rank], - &curr_view.members[curr_view.next_unassigned_rank + shard_size]); + desired_nodes.insert(desired_nodes.end(), + &curr_members[curr_view.next_unassigned_rank], + &curr_members[curr_view.next_unassigned_rank + shard_size]); curr_view.next_unassigned_rank += shard_size; //Figure out what the Mode policy for this shard is const SubgroupAllocationPolicy& subgroup_type_policy @@ -292,6 +358,21 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( */ const subgroup_id_t previous_assignment_offset = prev_view->subgroup_ids_by_type_id.at(subgroup_type_id)[0]; subgroup_shard_layout_t next_assignment(shard_sizes.at(subgroup_type).size()); + + /** This function is invoked if there is a prev_view, and the curr_view.members is already + * arranged into two parts: the first part hold survive nodes from prev_view, and the second + * part holds newly added nodes. The next_unassigned_rank is the length of the first part. + * If we have reserved node_ids, we need to rearrnage curr_view.members into 3 parts: + * the first part holds survive non-reserved node_ids, + * the second part holds reserved node_ids, no matter it's from prev_view or newly added, + * and the third part holds newly added non-reserved node_ids. + * We then rearrange next_unassigned_rank to be the length of the first two parts, for they are assigned already. + */ + if(all_reserved_node_ids.size() > 0) { + const SubgroupAllocationPolicy& subgroup_type_policy + = std::get(policies.at(subgroup_type)); + } + for(uint32_t subgroup_num = 0; subgroup_num < next_assignment.size(); ++subgroup_num) { //The size of shard_sizes[subgroup_type][subgroup_num] is the number of shards for(uint32_t shard_num = 0; shard_num < shard_sizes.at(subgroup_type)[subgroup_num].size(); @@ -405,7 +486,7 @@ subgroup_allocation_map_t DefaultSubgroupAllocator::operator()( return subgroup_allocations; } -SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf) { +SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf, std::set& all_reserved_node_ids) { if(!jconf.is_object() || !jconf[JSON_CONF_LAYOUT].is_array()) { dbg_default_error("parse_json_subgroup_policy cannot parse {}.", jconf.get()); throw derecho::derecho_exception("parse_json_subgroup_policy cannot parse" + jconf.get()); @@ -441,7 +522,11 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf) { // "reserved_node_id_by_shard" is not a mandatory field if(subgroup_it.contains(RESERVED_NODE_ID_BY_SHRAD)) { - shard_allocation_policy.reserved_node_id_by_shard = subgroup_it[RESERVED_NODE_ID_BY_SHRAD].get>>(); + shard_allocation_policy.reserved_node_id_by_shard = subgroup_it[RESERVED_NODE_ID_BY_SHRAD].get>>(); + + for(auto reserved_id_set : shard_allocation_policy.reserved_node_id_by_shard) { + std::set_union(all_reserved_node_ids.begin(), all_reserved_node_ids.end(), reserved_id_set.begin(), reserved_id_set.end(), std::inserter(all_reserved_node_ids, all_reserved_node_ids.begin())); + } } } return subgroup_allocation_policy; From 2fec2ffaa3fd23d5cea8381c5ac0bcabd4881066 Mon Sep 17 00:00:00 2001 From: Panlichen Date: Tue, 25 May 2021 06:48:31 -0600 Subject: [PATCH 07/26] redundant debug log --- .../core/detail/subgroup_functions_impl.hpp | 35 +++++++++++++++++ src/core/subgroup_functions.cpp | 39 +++++++++++++++---- 2 files changed, 66 insertions(+), 8 deletions(-) diff --git a/include/derecho/core/detail/subgroup_functions_impl.hpp b/include/derecho/core/detail/subgroup_functions_impl.hpp index 3df37089..fd8cecf5 100644 --- a/include/derecho/core/detail/subgroup_functions_impl.hpp +++ b/include/derecho/core/detail/subgroup_functions_impl.hpp @@ -5,7 +5,9 @@ */ #include "../subgroup_functions.hpp" +#include #include +#include namespace derecho { @@ -30,6 +32,17 @@ namespace derecho { */ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json&, std::set&); +template +void print_set(const std::set uset) { + std::stringstream stream; + for(auto thing : uset) { + stream << thing << ' '; + } + + std::string out = stream.str(); + dbg_default_info(out); +} + template void derecho_populate_policy_by_subgroup_type_map( std::map>& dsa_map, @@ -78,6 +91,28 @@ DefaultSubgroupAllocator construct_DSA_with_layout_path(const std::string& layou derecho_populate_policy_by_subgroup_type_map( dsa_map, all_reserved_node_ids, layout, 0); + dbg_default_info("after parsing, all_reserved_node_ids is :"); + print_set(all_reserved_node_ids); + + auto map_iter = dsa_map.begin(); + int type_ = 0; + while(map_iter != dsa_map.end()) { + dbg_default_info("for type {}", type_++); + SubgroupAllocationPolicy& subgroup_policy + = std::get(map_iter->second); + dbg_default_info("has {} subgroups", subgroup_policy.num_subgroups); + for(int sg_ = 0; sg_ < subgroup_policy.num_subgroups; sg_++) { + dbg_default_info("for subgroup {}", sg_); + auto shard_policy = subgroup_policy.shard_policy_by_subgroup[sg_]; + dbg_default_info("has {} shards", shard_policy.num_shards); + for(int shard_ = 0; shard_ < shard_policy.num_shards; shard_++) { + dbg_default_info("for shard {}, its reserved node_ids are:", shard_); + print_set(shard_policy.reserved_node_id_by_shard[shard_]); + } + } + map_iter++; + } + return DefaultSubgroupAllocator(dsa_map, all_reserved_node_ids); } diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 313faefb..29cd492e 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -119,6 +119,7 @@ void DefaultSubgroupAllocator::compute_standard_memberships( View& curr_view, subgroup_allocation_map_t& subgroup_layouts) const { //First, determine how many nodes each shard can have based on their policies + dbg_default_info("ready to calculate size"); std::map>> shard_sizes = compute_standard_shard_sizes(subgroup_type_order, prev_view, curr_view); //Now we can go through and actually allocate nodes to each shard, @@ -155,13 +156,19 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( // If there are reserved node_ids, and some appear in curr_view, calculate the // intersection and count them once, in case that we want shard overlapping. - std::set all_active_reserved_node_id_set(curr_view.members.begin(), curr_view.members.end()); + std::set all_active_reserved_node_id_set; + std::set temp_node_id(curr_view.members.begin(), curr_view.members.end()); if(all_reserved_node_ids.size() > 0) { std::set_intersection( - all_active_reserved_node_id_set.begin(), all_active_reserved_node_id_set.end(), + temp_node_id.begin(), temp_node_id.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(all_active_reserved_node_id_set, all_active_reserved_node_id_set.begin())); + + dbg_default_info("all_active_reserved_node_id_set is: "); + print_set(all_active_reserved_node_id_set); + nodes_needed = all_active_reserved_node_id_set.size(); + dbg_default_info("after processing reserve, nodes_needed is {}", nodes_needed); } std::map>> shard_sizes; @@ -204,20 +211,33 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( } shard_sizes[subgroup_type][subgroup_num][shard_num] = min_shard_size; nodes_needed += min_shard_size; + dbg_default_info("after defautl calc, nodes_needed is {}", nodes_needed); // If this shard reserve existing nodes, subtract the number of these nodes from nodes_needed - std::set active_reserved_node_id_set( + std::set active_reserved_node_id_set; + + dbg_default_info("sharding_policy.reserved_node_id_by_shard has {} sets", sharding_policy.reserved_node_id_by_shard.size()); + + dbg_default_info("sharding_policy.reserved_node_id_by_shard[{}] for current shard is: ", shard_num); + print_set(sharding_policy.reserved_node_id_by_shard[shard_num]); + + std::set reserved_node_id_set( sharding_policy.reserved_node_id_by_shard[shard_num].begin(), sharding_policy.reserved_node_id_by_shard[shard_num].end()); - + dbg_default_info("reserved_node_id_set for current shard is: "); + print_set(reserved_node_id_set); std::set_intersection( - active_reserved_node_id_set.begin(), - active_reserved_node_id_set.end(), + reserved_node_id_set.begin(), + reserved_node_id_set.end(), all_active_reserved_node_id_set.begin(), all_active_reserved_node_id_set.end(), std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.begin())); + dbg_default_info("active_reserved_node_id_set for current shard is: "); + print_set(active_reserved_node_id_set); + nodes_needed -= active_reserved_node_id_set.size(); + dbg_default_info("after substract reserve, nodes_needed is {}", nodes_needed); } } } @@ -309,7 +329,9 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ std::vector desired_nodes; // Allocate active reserved nodes first. - const std::set reserved_node_id_set = subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num]; + const std::set reserved_node_id_set( + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].begin(), + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].end()); if(reserved_node_id_set.size() > 0) { std::set_intersection( reserved_node_id_set.begin(), reserved_node_id_set.end(), @@ -518,7 +540,6 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf, s } } shard_allocation_policy.profiles_by_shard = subgroup_it[PROFILES_BY_SHARD].get>(); - subgroup_allocation_policy.shard_policy_by_subgroup.emplace_back(std::move(shard_allocation_policy)); // "reserved_node_id_by_shard" is not a mandatory field if(subgroup_it.contains(RESERVED_NODE_ID_BY_SHRAD)) { @@ -528,6 +549,8 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf, s std::set_union(all_reserved_node_ids.begin(), all_reserved_node_ids.end(), reserved_id_set.begin(), reserved_id_set.end(), std::inserter(all_reserved_node_ids, all_reserved_node_ids.begin())); } } + + subgroup_allocation_policy.shard_policy_by_subgroup.emplace_back(std::move(shard_allocation_policy)); } return subgroup_allocation_policy; } From 6b63babe6484a8c56a434c1b85ad7d97453d1456 Mon Sep 17 00:00:00 2001 From: Panlichen Date: Tue, 25 May 2021 07:20:50 -0600 Subject: [PATCH 08/26] allocate reserved node_id w/o perv_view done --- .../core/detail/subgroup_functions_impl.hpp | 24 +---------- src/core/subgroup_functions.cpp | 40 +++++++++---------- 2 files changed, 21 insertions(+), 43 deletions(-) diff --git a/include/derecho/core/detail/subgroup_functions_impl.hpp b/include/derecho/core/detail/subgroup_functions_impl.hpp index fd8cecf5..542b64a0 100644 --- a/include/derecho/core/detail/subgroup_functions_impl.hpp +++ b/include/derecho/core/detail/subgroup_functions_impl.hpp @@ -40,7 +40,7 @@ void print_set(const std::set uset) { } std::string out = stream.str(); - dbg_default_info(out); + dbg_default_debug(out); } template @@ -91,28 +91,6 @@ DefaultSubgroupAllocator construct_DSA_with_layout_path(const std::string& layou derecho_populate_policy_by_subgroup_type_map( dsa_map, all_reserved_node_ids, layout, 0); - dbg_default_info("after parsing, all_reserved_node_ids is :"); - print_set(all_reserved_node_ids); - - auto map_iter = dsa_map.begin(); - int type_ = 0; - while(map_iter != dsa_map.end()) { - dbg_default_info("for type {}", type_++); - SubgroupAllocationPolicy& subgroup_policy - = std::get(map_iter->second); - dbg_default_info("has {} subgroups", subgroup_policy.num_subgroups); - for(int sg_ = 0; sg_ < subgroup_policy.num_subgroups; sg_++) { - dbg_default_info("for subgroup {}", sg_); - auto shard_policy = subgroup_policy.shard_policy_by_subgroup[sg_]; - dbg_default_info("has {} shards", shard_policy.num_shards); - for(int shard_ = 0; shard_ < shard_policy.num_shards; shard_++) { - dbg_default_info("for shard {}, its reserved node_ids are:", shard_); - print_set(shard_policy.reserved_node_id_by_shard[shard_]); - } - } - map_iter++; - } - return DefaultSubgroupAllocator(dsa_map, all_reserved_node_ids); } diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 29cd492e..1fd0a584 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -119,7 +119,7 @@ void DefaultSubgroupAllocator::compute_standard_memberships( View& curr_view, subgroup_allocation_map_t& subgroup_layouts) const { //First, determine how many nodes each shard can have based on their policies - dbg_default_info("ready to calculate size"); + dbg_default_debug("ready to calculate size"); std::map>> shard_sizes = compute_standard_shard_sizes(subgroup_type_order, prev_view, curr_view); //Now we can go through and actually allocate nodes to each shard, @@ -157,6 +157,7 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( // If there are reserved node_ids, and some appear in curr_view, calculate the // intersection and count them once, in case that we want shard overlapping. std::set all_active_reserved_node_id_set; + // We need a temp_node_id set to hold all active node ids. set_intersection can not work inplace. std::set temp_node_id(curr_view.members.begin(), curr_view.members.end()); if(all_reserved_node_ids.size() > 0) { std::set_intersection( @@ -164,11 +165,11 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(all_active_reserved_node_id_set, all_active_reserved_node_id_set.begin())); - dbg_default_info("all_active_reserved_node_id_set is: "); + dbg_default_debug("all_active_reserved_node_id_set is: "); print_set(all_active_reserved_node_id_set); nodes_needed = all_active_reserved_node_id_set.size(); - dbg_default_info("after processing reserve, nodes_needed is {}", nodes_needed); + dbg_default_debug("after processing reserve, nodes_needed is {}", nodes_needed); } std::map>> shard_sizes; @@ -211,33 +212,27 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( } shard_sizes[subgroup_type][subgroup_num][shard_num] = min_shard_size; nodes_needed += min_shard_size; - dbg_default_info("after defautl calc, nodes_needed is {}", nodes_needed); + dbg_default_debug("after defautl calc, nodes_needed is {}", nodes_needed); // If this shard reserve existing nodes, subtract the number of these nodes from nodes_needed std::set active_reserved_node_id_set; - dbg_default_info("sharding_policy.reserved_node_id_by_shard has {} sets", sharding_policy.reserved_node_id_by_shard.size()); + dbg_default_debug("sharding_policy.reserved_node_id_by_shard has {} sets", sharding_policy.reserved_node_id_by_shard.size()); - dbg_default_info("sharding_policy.reserved_node_id_by_shard[{}] for current shard is: ", shard_num); + dbg_default_debug("reserved_node_id_set for shard {} is: ", shard_num); print_set(sharding_policy.reserved_node_id_by_shard[shard_num]); - - std::set reserved_node_id_set( - sharding_policy.reserved_node_id_by_shard[shard_num].begin(), - sharding_policy.reserved_node_id_by_shard[shard_num].end()); - dbg_default_info("reserved_node_id_set for current shard is: "); - print_set(reserved_node_id_set); std::set_intersection( - reserved_node_id_set.begin(), - reserved_node_id_set.end(), + sharding_policy.reserved_node_id_by_shard[shard_num].begin(), + sharding_policy.reserved_node_id_by_shard[shard_num].end(), all_active_reserved_node_id_set.begin(), all_active_reserved_node_id_set.end(), std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.begin())); - dbg_default_info("active_reserved_node_id_set for current shard is: "); + dbg_default_debug("active_reserved_node_id_set for current shard is: "); print_set(active_reserved_node_id_set); nodes_needed -= active_reserved_node_id_set.size(); - dbg_default_info("after substract reserve, nodes_needed is {}", nodes_needed); + dbg_default_debug("after substract reserve, nodes_needed is {}", nodes_needed); } } } @@ -344,6 +339,8 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ desired_nodes.insert(desired_nodes.end(), &curr_members[curr_view.next_unassigned_rank], &curr_members[curr_view.next_unassigned_rank + shard_size]); + // NOTE: If there are unassigned reserved nodes(which should not happen in regular use), next_unassigned_rank only points to + // unassigned normal nodes, which I(Lichen) think is just OK and not in conflict with its definition. curr_view.next_unassigned_rank += shard_size; //Figure out what the Mode policy for this shard is const SubgroupAllocationPolicy& subgroup_type_policy @@ -385,14 +382,17 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( * arranged into two parts: the first part hold survive nodes from prev_view, and the second * part holds newly added nodes. The next_unassigned_rank is the length of the first part. * If we have reserved node_ids, we need to rearrnage curr_view.members into 3 parts: - * the first part holds survive non-reserved node_ids, - * the second part holds reserved node_ids, no matter it's from prev_view or newly added, + * the first part holds reserved node_ids, no matter it's from prev_view or newly added, + * the second part holds survive non-reserved node_ids, * and the third part holds newly added non-reserved node_ids. * We then rearrange next_unassigned_rank to be the length of the first two parts, for they are assigned already. */ + std::vector curr_members; + std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); + const SubgroupAllocationPolicy& subgroup_type_policy + = std::get(policies.at(subgroup_type)); if(all_reserved_node_ids.size() > 0) { - const SubgroupAllocationPolicy& subgroup_type_policy - = std::get(policies.at(subgroup_type)); + } for(uint32_t subgroup_num = 0; subgroup_num < next_assignment.size(); ++subgroup_num) { From 01ba208dc62ffe0acde302d393495668db7134d9 Mon Sep 17 00:00:00 2001 From: Panlichen Date: Tue, 25 May 2021 08:23:59 -0600 Subject: [PATCH 09/26] allocate reserved node_id with prev_view --- src/core/subgroup_functions.cpp | 40 +++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 1fd0a584..53061ccb 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -339,7 +339,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ desired_nodes.insert(desired_nodes.end(), &curr_members[curr_view.next_unassigned_rank], &curr_members[curr_view.next_unassigned_rank + shard_size]); - // NOTE: If there are unassigned reserved nodes(which should not happen in regular use), next_unassigned_rank only points to + // NOTE: If there are unassigned reserved nodes(which should not happen in regular use), next_unassigned_rank only points to // unassigned normal nodes, which I(Lichen) think is just OK and not in conflict with its definition. curr_view.next_unassigned_rank += shard_size; //Figure out what the Mode policy for this shard is @@ -382,17 +382,31 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( * arranged into two parts: the first part hold survive nodes from prev_view, and the second * part holds newly added nodes. The next_unassigned_rank is the length of the first part. * If we have reserved node_ids, we need to rearrnage curr_view.members into 3 parts: - * the first part holds reserved node_ids, no matter it's from prev_view or newly added, - * the second part holds survive non-reserved node_ids, + * the first part holds survive non-reserved node_ids, + * the second part holds reserved node_ids, no matter it's from prev_view or newly added, * and the third part holds newly added non-reserved node_ids. * We then rearrange next_unassigned_rank to be the length of the first two parts, for they are assigned already. */ std::vector curr_members; std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); + std::set survive_member_set(curr_view.members.begin(), curr_view.members.begin() + curr_view.next_unassigned_rank); + std::set added_member_set(curr_view.members.begin() + curr_view.next_unassigned_rank, curr_view.members.end()); const SubgroupAllocationPolicy& subgroup_type_policy = std::get(policies.at(subgroup_type)); if(all_reserved_node_ids.size() > 0) { - + std::set_difference( + survive_member_set.begin(), survive_member_set.end(), + all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + std::inserter(curr_members, curr_members.end())); + std::set_intersection( + curr_member_set.begin(), curr_member_set.end(), + all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + std::inserter(curr_members, curr_members.end())); + curr_view.next_unassigned_rank = curr_members.size(); + std::set_difference( + added_member_set.begin(), added_member_set.end(), + all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + std::inserter(curr_members, curr_members.end())); } for(uint32_t subgroup_num = 0; subgroup_num < next_assignment.size(); ++subgroup_num) { @@ -405,6 +419,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( std::vector next_shard_members; std::vector next_is_sender; uint32_t allocated_shard_size = shard_sizes.at(subgroup_type)[subgroup_num][shard_num]; + //Add all the non-failed nodes from the previous assignment for(std::size_t rank = 0; rank < previous_shard_assignment.members.size(); ++rank) { if(curr_view.rank_of(previous_shard_assignment.members[rank]) == -1) { @@ -413,6 +428,23 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( next_shard_members.push_back(previous_shard_assignment.members[rank]); next_is_sender.push_back(previous_shard_assignment.is_sender[rank]); } + + //Add newly added reserved nodes + const std::set reserved_node_id_set( + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].begin(), + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].end()); + std::set added_reserved_node_id_set; + std::set_intersection( + added_member_set.begin(), added_member_set.end(), + reserved_node_id_set.begin(), reserved_node_id_set.end(), + std::inserter(added_reserved_node_id_set, added_reserved_node_id_set.end())); + if(added_reserved_node_id_set.size() > 0) { + for(auto node_id : added_reserved_node_id_set) { + next_shard_members.push_back(node_id); + next_is_sender.push_back(true); + } + } + //Add additional members if needed while(next_shard_members.size() < allocated_shard_size) { //This must be true if compute_standard_shard_sizes said our view was adequate From a36d6ca017a095454dad7f79c21c54d2283faa9a Mon Sep 17 00:00:00 2001 From: Panlichen Date: Tue, 25 May 2021 22:55:24 -0600 Subject: [PATCH 10/26] add debug prints; prepare to re-design --- .../core/detail/subgroup_functions_impl.hpp | 12 +++- src/core/subgroup_functions.cpp | 57 +++++++++++++++---- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/include/derecho/core/detail/subgroup_functions_impl.hpp b/include/derecho/core/detail/subgroup_functions_impl.hpp index 542b64a0..8a86aaac 100644 --- a/include/derecho/core/detail/subgroup_functions_impl.hpp +++ b/include/derecho/core/detail/subgroup_functions_impl.hpp @@ -33,7 +33,17 @@ namespace derecho { SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json&, std::set&); template -void print_set(const std::set uset) { +void print_set(const std::set& uset) { + std::stringstream stream; + for(auto thing : uset) { + stream << thing << ' '; + } + + std::string out = stream.str(); + dbg_default_debug(out); +} +template +void print_set(const std::vector& uset) { std::stringstream stream; for(auto thing : uset) { stream << thing << ' '; diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 53061ccb..92ce645b 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -130,6 +130,7 @@ void DefaultSubgroupAllocator::compute_standard_memberships( if(!std::holds_alternative(policies.at(subgroup_type))) { continue; } + dbg_default_debug("Without prev_view, assingn node to type {}", std::string(subgroup_type.name())); subgroup_layouts[subgroup_type] = allocate_standard_subgroup_type(subgroup_type, curr_view, shard_sizes); } } else { @@ -140,6 +141,7 @@ void DefaultSubgroupAllocator::compute_standard_memberships( if(!std::holds_alternative(policies.at(subgroup_type))) { continue; } + dbg_default_debug("With prev_view, assingn node to type {}", std::string(subgroup_type.name())); subgroup_layouts[subgroup_type] = update_standard_subgroup_type(subgroup_type, subgroup_type_id, prev_view, curr_view, shard_sizes); } @@ -165,11 +167,11 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(all_active_reserved_node_id_set, all_active_reserved_node_id_set.begin())); - dbg_default_debug("all_active_reserved_node_id_set is: "); + dbg_default_debug("Parsing all_active_reserved_node_id_set: "); print_set(all_active_reserved_node_id_set); nodes_needed = all_active_reserved_node_id_set.size(); - dbg_default_debug("after processing reserve, nodes_needed is {}", nodes_needed); + dbg_default_debug("After counting all_active_reserved_node_id_set, nodes_needed is {}", nodes_needed); } std::map>> shard_sizes; @@ -212,14 +214,14 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( } shard_sizes[subgroup_type][subgroup_num][shard_num] = min_shard_size; nodes_needed += min_shard_size; - dbg_default_debug("after defautl calc, nodes_needed is {}", nodes_needed); + dbg_default_debug("Counting size in type {}, subgroup {}, shard {}", + std::string(subgroup_type.name()), subgroup_num, shard_num); + dbg_default_debug("Default nodes_needed (considering prev_view) is {}", nodes_needed); // If this shard reserve existing nodes, subtract the number of these nodes from nodes_needed std::set active_reserved_node_id_set; - dbg_default_debug("sharding_policy.reserved_node_id_by_shard has {} sets", sharding_policy.reserved_node_id_by_shard.size()); - - dbg_default_debug("reserved_node_id_set for shard {} is: ", shard_num); + dbg_default_debug("Current shard's reserved_node_id_set is: "); print_set(sharding_policy.reserved_node_id_by_shard[shard_num]); std::set_intersection( sharding_policy.reserved_node_id_by_shard[shard_num].begin(), @@ -228,11 +230,11 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( all_active_reserved_node_id_set.end(), std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.begin())); - dbg_default_debug("active_reserved_node_id_set for current shard is: "); + dbg_default_debug("The active_reserved_node_id_set for current shard is: "); print_set(active_reserved_node_id_set); nodes_needed -= active_reserved_node_id_set.size(); - dbg_default_debug("after substract reserve, nodes_needed is {}", nodes_needed); + dbg_default_debug("After substract active reserved node, nodes_needed is {}", nodes_needed); } } } @@ -241,6 +243,12 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( if(nodes_needed > curr_view.num_members) { throw subgroup_provisioning_exception(); } + /** With reserved nodes, even if nodes_needed == curr_view.num_members, all current nodes + * may be occupied by 1 shard because it reserved all of them. Therefore we need to check + * if min_shard_size for each shard is satisfied. + */ + + //Now go back and add one node to each shard evenly, until either they reach max size //or we run out of members in curr_view bool done_adding = false; @@ -308,6 +316,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(curr_members, curr_members.end())); curr_view.next_unassigned_rank = curr_members.size(); + dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); std::set_difference( curr_member_set.begin(), curr_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), @@ -323,6 +332,8 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ uint32_t shard_size = shard_sizes.at(subgroup_type)[subgroup_num][shard_num]; std::vector desired_nodes; + dbg_default_debug("For subgroup {}, shard {}, it needs {} nodes", subgroup_num, shard_num, shard_size); + // Allocate active reserved nodes first. const std::set reserved_node_id_set( subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].begin(), @@ -333,6 +344,8 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ curr_member_set.begin(), curr_member_set.end(), std::inserter(desired_nodes, desired_nodes.end())); shard_size -= desired_nodes.size(); + dbg_default_debug("Assign it {} active reserved nodes:", desired_nodes.size()); + print_set(std::set(desired_nodes.begin(), desired_nodes.end())); } //Grab the next shard_size nodes @@ -342,6 +355,10 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ // NOTE: If there are unassigned reserved nodes(which should not happen in regular use), next_unassigned_rank only points to // unassigned normal nodes, which I(Lichen) think is just OK and not in conflict with its definition. curr_view.next_unassigned_rank += shard_size; + + dbg_default_debug("Assign it {} nodes in total, with curr_view.next_unassigned_rank {}:", desired_nodes.size(), curr_view.next_unassigned_rank); + print_set(desired_nodes); + //Figure out what the Mode policy for this shard is const SubgroupAllocationPolicy& subgroup_type_policy = std::get(policies.at(subgroup_type)); @@ -398,15 +415,25 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( survive_member_set.begin(), survive_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(curr_members, curr_members.end())); + dbg_default_debug("With survive non-reserved nodes, curr_members is:"); + print_set(curr_members); + std::set_intersection( curr_member_set.begin(), curr_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(curr_members, curr_members.end())); + dbg_default_debug("Adding reserved nodes(survice or newly added), curr_members is:"); + print_set(curr_members); + curr_view.next_unassigned_rank = curr_members.size(); + dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); + std::set_difference( added_member_set.begin(), added_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(curr_members, curr_members.end())); + dbg_default_debug("Adding newly added non-reserved nodes, curr_members is:"); + print_set(curr_members); } for(uint32_t subgroup_num = 0; subgroup_num < next_assignment.size(); ++subgroup_num) { @@ -419,6 +446,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( std::vector next_shard_members; std::vector next_is_sender; uint32_t allocated_shard_size = shard_sizes.at(subgroup_type)[subgroup_num][shard_num]; + dbg_default_debug("For subgroup {}, shard {}, it needs {} nodes", subgroup_num, shard_num, allocated_shard_size); //Add all the non-failed nodes from the previous assignment for(std::size_t rank = 0; rank < previous_shard_assignment.members.size(); ++rank) { @@ -428,15 +456,15 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( next_shard_members.push_back(previous_shard_assignment.members[rank]); next_is_sender.push_back(previous_shard_assignment.is_sender[rank]); } + dbg_default_debug("Assigning survive nodes, next_shard_members is:"); + print_set(next_shard_members); //Add newly added reserved nodes - const std::set reserved_node_id_set( - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].begin(), - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].end()); std::set added_reserved_node_id_set; std::set_intersection( added_member_set.begin(), added_member_set.end(), - reserved_node_id_set.begin(), reserved_node_id_set.end(), + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].begin(), + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].end(), std::inserter(added_reserved_node_id_set, added_reserved_node_id_set.end())); if(added_reserved_node_id_set.size() > 0) { for(auto node_id : added_reserved_node_id_set) { @@ -444,6 +472,8 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( next_is_sender.push_back(true); } } + dbg_default_debug("Assigning newly added reserved nodes, next_shard_members is:"); + print_set(next_shard_members); //Add additional members if needed while(next_shard_members.size() < allocated_shard_size) { @@ -454,6 +484,9 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( //All members start out as senders with the default allocator next_is_sender.push_back(true); } + dbg_default_debug("Assign it {} nodes in total, with curr_view.next_unassigned_rank {}:", next_shard_members.size(), curr_view.next_unassigned_rank); + print_set(next_shard_members); + next_assignment[subgroup_num].emplace_back(curr_view.make_subview(next_shard_members, previous_shard_assignment.mode, next_is_sender, From d0d1e18e192adcdb8db19e2448e51022fa4bd78c Mon Sep 17 00:00:00 2001 From: Panlichen Date: Tue, 25 May 2021 23:54:05 -0600 Subject: [PATCH 11/26] inherent node_id set --- src/core/subgroup_functions.cpp | 86 ++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 92ce645b..301dc9e0 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -159,11 +159,10 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( // If there are reserved node_ids, and some appear in curr_view, calculate the // intersection and count them once, in case that we want shard overlapping. std::set all_active_reserved_node_id_set; - // We need a temp_node_id set to hold all active node ids. set_intersection can not work inplace. - std::set temp_node_id(curr_view.members.begin(), curr_view.members.end()); + std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); if(all_reserved_node_ids.size() > 0) { std::set_intersection( - temp_node_id.begin(), temp_node_id.end(), + curr_member_set.begin(), curr_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(all_active_reserved_node_id_set, all_active_reserved_node_id_set.begin())); @@ -193,8 +192,15 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( : subgroup_type_policy.shard_policy_by_subgroup[subgroup_num]; shard_sizes[subgroup_type][subgroup_num].resize(sharding_policy.num_shards); for(int shard_num = 0; shard_num < sharding_policy.num_shards; ++shard_num) { - int min_shard_size = sharding_policy.even_shards ? sharding_policy.min_nodes_per_shard + size_t min_shard_size = sharding_policy.even_shards ? sharding_policy.min_nodes_per_shard : sharding_policy.min_num_nodes_by_shard[shard_num]; + + /** With reserved nodes, we do not assign nodes evenly across shards. + * All current nodes may be occupied by 1 shard because it reserved all of them. + * Therefore we need to check if min_shard_size for each shard is satisfied, + * and thus we need to maintain nodes_needed more carefully. + */ + std::set survived_node_set; //If there was a previous view, we must include all non-failed nodes from that view if(prev_view) { const subgroup_id_t previous_assignment_offset @@ -202,23 +208,14 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( const SubView& previous_shard_assignment = prev_view->subgroup_shard_views[previous_assignment_offset + subgroup_num] [shard_num]; - int num_nonfailed_nodes = 0; for(std::size_t rank = 0; rank < previous_shard_assignment.members.size(); ++rank) { if(curr_view.rank_of(previous_shard_assignment.members[rank]) != -1) { - num_nonfailed_nodes++; + survived_node_set.insert(previous_shard_assignment.members[rank]); } } - if(num_nonfailed_nodes > min_shard_size) { - min_shard_size = num_nonfailed_nodes; - } } - shard_sizes[subgroup_type][subgroup_num][shard_num] = min_shard_size; - nodes_needed += min_shard_size; - dbg_default_debug("Counting size in type {}, subgroup {}, shard {}", - std::string(subgroup_type.name()), subgroup_num, shard_num); - dbg_default_debug("Default nodes_needed (considering prev_view) is {}", nodes_needed); - // If this shard reserve existing nodes, subtract the number of these nodes from nodes_needed + // Check whehter this shard reserve existing nodes. std::set active_reserved_node_id_set; dbg_default_debug("Current shard's reserved_node_id_set is: "); @@ -226,15 +223,36 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( std::set_intersection( sharding_policy.reserved_node_id_by_shard[shard_num].begin(), sharding_policy.reserved_node_id_by_shard[shard_num].end(), - all_active_reserved_node_id_set.begin(), - all_active_reserved_node_id_set.end(), + curr_member_set.begin(), + curr_member_set.end(), std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.begin())); dbg_default_debug("The active_reserved_node_id_set for current shard is: "); print_set(active_reserved_node_id_set); - nodes_needed -= active_reserved_node_id_set.size(); - dbg_default_debug("After substract active reserved node, nodes_needed is {}", nodes_needed); + /** The inherent_node_id_set holds node_ids that are "inherent" or "intrinsic" + * to the this shard, for the node_ids are either surviving nodes from "the same shard" + * in the prev_view or reserved for this shard, or both. + */ + std::set inherent_node_id_set; + std::set_union( + survived_node_set.begin(), survived_node_set.end(), + active_reserved_node_id_set.begin(), active_reserved_node_id_set.end(), + std::inserter(inherent_node_id_set, inherent_node_id_set.end())); + dbg_default_debug("The inherent_node_id_set for current shard is: "); + print_set(inherent_node_id_set); + // All active reserved nodes just count once. + nodes_needed += inherent_node_id_set.size() - active_reserved_node_id_set.size(); + + if(inherent_node_id_set.size() >= min_shard_size) { + min_shard_size = inherent_node_id_set.size(); + } else { + nodes_needed += min_shard_size - inherent_node_id_set.size(); + } + + // TODO: If we add a lot of nodes reserved for a shard, the number of which is larger than this shard's max_num_nodes, we will still add those nodes to it. + // Seems OK? + shard_sizes[subgroup_type][subgroup_num][shard_num] = min_shard_size; } } } @@ -243,11 +261,6 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( if(nodes_needed > curr_view.num_members) { throw subgroup_provisioning_exception(); } - /** With reserved nodes, even if nodes_needed == curr_view.num_members, all current nodes - * may be occupied by 1 shard because it reserved all of them. Therefore we need to check - * if min_shard_size for each shard is satisfied. - */ - //Now go back and add one node to each shard evenly, until either they reach max size //or we run out of members in curr_view @@ -303,7 +316,8 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ * the first part holds current active reserved node_ids, * while the second part holds normal node_ids. * We then rearrange next_unassigned_rank to be the length of the first part, for nodes in the first part - * are actually assigned, and sometimes more than once if we want to overlap shards. + * are inherent nodes for some shards, and sure will be assigned, + * sometimes more than once if we want to overlap shards. */ // We cannot modify curr_view.members inplace, which will corrupt curr_view.my_rank, curr_view.node_id_to_rank, etc. Besides, View::members is const. std::vector curr_members; @@ -398,11 +412,10 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( /** This function is invoked if there is a prev_view, and the curr_view.members is already * arranged into two parts: the first part hold survive nodes from prev_view, and the second * part holds newly added nodes. The next_unassigned_rank is the length of the first part. - * If we have reserved node_ids, we need to rearrnage curr_view.members into 3 parts: - * the first part holds survive non-reserved node_ids, - * the second part holds reserved node_ids, no matter it's from prev_view or newly added, - * and the third part holds newly added non-reserved node_ids. - * We then rearrange next_unassigned_rank to be the length of the first two parts, for they are assigned already. + * If we have reserved node_ids, we need to rearrnage curr_view.members into 2 parts: + * the first part holds inherent node_ids for shards, which is composed with survive node_ids and reserved node_ids + * the second part holds newly added non-reserved node_ids. + * We then rearrange next_unassigned_rank to be the length of the first part, for they will sure be assigned. */ std::vector curr_members; std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); @@ -411,18 +424,11 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( const SubgroupAllocationPolicy& subgroup_type_policy = std::get(policies.at(subgroup_type)); if(all_reserved_node_ids.size() > 0) { - std::set_difference( + std::set_union( survive_member_set.begin(), survive_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(curr_members, curr_members.end())); - dbg_default_debug("With survive non-reserved nodes, curr_members is:"); - print_set(curr_members); - - std::set_intersection( - curr_member_set.begin(), curr_member_set.end(), - all_reserved_node_ids.begin(), all_reserved_node_ids.end(), - std::inserter(curr_members, curr_members.end())); - dbg_default_debug("Adding reserved nodes(survice or newly added), curr_members is:"); + dbg_default_debug("With inherent nodes, curr_members is:"); print_set(curr_members); curr_view.next_unassigned_rank = curr_members.size(); @@ -472,7 +478,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( next_is_sender.push_back(true); } } - dbg_default_debug("Assigning newly added reserved nodes, next_shard_members is:"); + dbg_default_debug("After assigning newly added reserved nodes, we get {} inherent node_id(s) assigned, next_shard_members is:", next_shard_members.size()); print_set(next_shard_members); //Add additional members if needed From bb715b023625822d0b96a86239652385fb61690b Mon Sep 17 00:00:00 2001 From: Panlichen Date: Wed, 26 May 2021 01:00:14 -0600 Subject: [PATCH 12/26] bug fix in update_standard_subgroup_type --- src/core/subgroup_functions.cpp | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 301dc9e0..8c485e46 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -130,7 +130,7 @@ void DefaultSubgroupAllocator::compute_standard_memberships( if(!std::holds_alternative(policies.at(subgroup_type))) { continue; } - dbg_default_debug("Without prev_view, assingn node to type {}", std::string(subgroup_type.name())); + dbg_default_debug("Without prev_view, assign node to type {}", std::string(subgroup_type.name())); subgroup_layouts[subgroup_type] = allocate_standard_subgroup_type(subgroup_type, curr_view, shard_sizes); } } else { @@ -193,13 +193,16 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( shard_sizes[subgroup_type][subgroup_num].resize(sharding_policy.num_shards); for(int shard_num = 0; shard_num < sharding_policy.num_shards; ++shard_num) { size_t min_shard_size = sharding_policy.even_shards ? sharding_policy.min_nodes_per_shard - : sharding_policy.min_num_nodes_by_shard[shard_num]; + : sharding_policy.min_num_nodes_by_shard[shard_num]; /** With reserved nodes, we do not assign nodes evenly across shards. * All current nodes may be occupied by 1 shard because it reserved all of them. * Therefore we need to check if min_shard_size for each shard is satisfied, * and thus we need to maintain nodes_needed more carefully. */ + + dbg_default_debug("Calculate node size for type {}, subgroup_num {}, shard_num {}", std::string(subgroup_type.name()), subgroup_num, shard_num); + std::set survived_node_set; //If there was a previous view, we must include all non-failed nodes from that view if(prev_view) { @@ -324,13 +327,15 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); const SubgroupAllocationPolicy& subgroup_type_policy = std::get(policies.at(subgroup_type)); + + dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); if(all_reserved_node_ids.size() > 0) { std::set_intersection( curr_member_set.begin(), curr_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(curr_members, curr_members.end())); curr_view.next_unassigned_rank = curr_members.size(); - dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); + dbg_default_debug("After rearranging inherent node_ids, curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); std::set_difference( curr_member_set.begin(), curr_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), @@ -346,7 +351,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ uint32_t shard_size = shard_sizes.at(subgroup_type)[subgroup_num][shard_num]; std::vector desired_nodes; - dbg_default_debug("For subgroup {}, shard {}, it needs {} nodes", subgroup_num, shard_num, shard_size); + dbg_default_debug("For subgroup {}, shard {}, it is assigned {} nodes", subgroup_num, shard_num, shard_size); // Allocate active reserved nodes first. const std::set reserved_node_id_set( @@ -423,16 +428,22 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( std::set added_member_set(curr_view.members.begin() + curr_view.next_unassigned_rank, curr_view.members.end()); const SubgroupAllocationPolicy& subgroup_type_policy = std::get(policies.at(subgroup_type)); + dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); if(all_reserved_node_ids.size() > 0) { + std::set active_reserved_node_id_set; + std::set_intersection( + curr_member_set.begin(), curr_member_set.end(), + all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.end())); std::set_union( survive_member_set.begin(), survive_member_set.end(), - all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + active_reserved_node_id_set.begin(), active_reserved_node_id_set.end(), std::inserter(curr_members, curr_members.end())); dbg_default_debug("With inherent nodes, curr_members is:"); print_set(curr_members); curr_view.next_unassigned_rank = curr_members.size(); - dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); + dbg_default_debug("After rearranging inherent node_ids, curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); std::set_difference( added_member_set.begin(), added_member_set.end(), @@ -452,7 +463,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( std::vector next_shard_members; std::vector next_is_sender; uint32_t allocated_shard_size = shard_sizes.at(subgroup_type)[subgroup_num][shard_num]; - dbg_default_debug("For subgroup {}, shard {}, it needs {} nodes", subgroup_num, shard_num, allocated_shard_size); + dbg_default_debug("For subgroup {}, shard {}, it is assigned {} nodes", subgroup_num, shard_num, allocated_shard_size); //Add all the non-failed nodes from the previous assignment for(std::size_t rank = 0; rank < previous_shard_assignment.members.size(); ++rank) { @@ -462,7 +473,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( next_shard_members.push_back(previous_shard_assignment.members[rank]); next_is_sender.push_back(previous_shard_assignment.is_sender[rank]); } - dbg_default_debug("Assigning survive nodes, next_shard_members is:"); + dbg_default_debug("After assigning survive nodes, next_shard_members is:"); print_set(next_shard_members); //Add newly added reserved nodes From 738dbf48eabce5ce1699094d199dee9bd64f2c67 Mon Sep 17 00:00:00 2001 From: Panlichen Date: Wed, 26 May 2021 01:44:18 -0600 Subject: [PATCH 13/26] add print --- src/core/subgroup_functions.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 8c485e46..757d57bd 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -119,7 +119,7 @@ void DefaultSubgroupAllocator::compute_standard_memberships( View& curr_view, subgroup_allocation_map_t& subgroup_layouts) const { //First, determine how many nodes each shard can have based on their policies - dbg_default_debug("ready to calculate size"); + dbg_default_debug("Ready to calculate size"); std::map>> shard_sizes = compute_standard_shard_sizes(subgroup_type_order, prev_view, curr_view); //Now we can go through and actually allocate nodes to each shard, @@ -221,8 +221,6 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( // Check whehter this shard reserve existing nodes. std::set active_reserved_node_id_set; - dbg_default_debug("Current shard's reserved_node_id_set is: "); - print_set(sharding_policy.reserved_node_id_by_shard[shard_num]); std::set_intersection( sharding_policy.reserved_node_id_by_shard[shard_num].begin(), sharding_policy.reserved_node_id_by_shard[shard_num].end(), @@ -426,6 +424,12 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); std::set survive_member_set(curr_view.members.begin(), curr_view.members.begin() + curr_view.next_unassigned_rank); std::set added_member_set(curr_view.members.begin() + curr_view.next_unassigned_rank, curr_view.members.end()); + + dbg_default_debug("The survive_member_set is:"); + print_set(survive_member_set); + dbg_default_debug("The added_member_set is:"); + print_set(added_member_set); + const SubgroupAllocationPolicy& subgroup_type_policy = std::get(policies.at(subgroup_type)); dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); @@ -477,6 +481,12 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( print_set(next_shard_members); //Add newly added reserved nodes + + dbg_default_debug("The total added_member_set is:"); + print_set(added_member_set); + dbg_default_debug("Current shard reserved node_id set is:"); + print_set(subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num]); + std::set added_reserved_node_id_set; std::set_intersection( added_member_set.begin(), added_member_set.end(), @@ -484,6 +494,9 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].end(), std::inserter(added_reserved_node_id_set, added_reserved_node_id_set.end())); if(added_reserved_node_id_set.size() > 0) { + dbg_default_debug("The added_reserved_node_id_set is not empty:"); + print_set(added_reserved_node_id_set); + for(auto node_id : added_reserved_node_id_set) { next_shard_members.push_back(node_id); next_is_sender.push_back(true); From c81262ded5b46bd56d8c9d9972afb2b2891adb6d Mon Sep 17 00:00:00 2001 From: Panlichen Date: Wed, 26 May 2021 02:19:10 -0600 Subject: [PATCH 14/26] bug fix for added_member_set and survive_member_set --- include/derecho/core/subgroup_functions.hpp | 4 +++- src/core/subgroup_functions.cpp | 15 +++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/derecho/core/subgroup_functions.hpp b/include/derecho/core/subgroup_functions.hpp index 446ce6d5..f84ffe3f 100644 --- a/include/derecho/core/subgroup_functions.hpp +++ b/include/derecho/core/subgroup_functions.hpp @@ -329,7 +329,9 @@ class DefaultSubgroupAllocator { const subgroup_type_id_t subgroup_type_id, const std::unique_ptr& prev_view, View& curr_view, - const std::map>>& shard_sizes) const; + const std::map>>& shard_sizes, + const std::set& survive_member_set, + const std::set& added_member_set) const; /** * Helper function that implements the subgroup allocation algorithm for all diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 757d57bd..8941cf71 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -134,6 +134,11 @@ void DefaultSubgroupAllocator::compute_standard_memberships( subgroup_layouts[subgroup_type] = allocate_standard_subgroup_type(subgroup_type, curr_view, shard_sizes); } } else { + /** survive_member_set holds non-failed node_ids from prev_view, added_member_set holds + * newly added node_ids in curr_view. + */ + std::set survive_member_set(curr_view.members.begin(), curr_view.members.begin() + curr_view.next_unassigned_rank); + std::set added_member_set(curr_view.members.begin() + curr_view.next_unassigned_rank, curr_view.members.end()); for(uint32_t subgroup_type_id = 0; subgroup_type_id < subgroup_type_order.size(); ++subgroup_type_id) { //We need to both iterate through this vector and keep the counter in order to know the type IDs @@ -143,7 +148,9 @@ void DefaultSubgroupAllocator::compute_standard_memberships( } dbg_default_debug("With prev_view, assingn node to type {}", std::string(subgroup_type.name())); subgroup_layouts[subgroup_type] = update_standard_subgroup_type(subgroup_type, subgroup_type_id, - prev_view, curr_view, shard_sizes); + prev_view, curr_view, shard_sizes, + survive_member_set, + added_member_set); } } } @@ -404,7 +411,9 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( const subgroup_type_id_t subgroup_type_id, const std::unique_ptr& prev_view, View& curr_view, - const std::map>>& shard_sizes) const { + const std::map>>& shard_sizes, + const std::set& survive_member_set, + const std::set& added_member_set) const { /* Subgroups of the same type will have contiguous IDs because they were created in order. * So the previous assignment is the slice of the previous subgroup_shard_views vector * starting at the first subgroup's ID, and extending for num_subgroups entries. @@ -422,8 +431,6 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( */ std::vector curr_members; std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); - std::set survive_member_set(curr_view.members.begin(), curr_view.members.begin() + curr_view.next_unassigned_rank); - std::set added_member_set(curr_view.members.begin() + curr_view.next_unassigned_rank, curr_view.members.end()); dbg_default_debug("The survive_member_set is:"); print_set(survive_member_set); From 2621851f7d3775a0c43810e7ee9f98cf2abdb361 Mon Sep 17 00:00:00 2001 From: Panlichen Date: Wed, 26 May 2021 02:43:38 -0600 Subject: [PATCH 15/26] add simple overlap tester --- src/applications/demos/CMakeLists.txt | 4 +- ...e_replicated_objects_overlap_json_file.cpp | 87 +++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 src/applications/demos/simple_replicated_objects_overlap_json_file.cpp diff --git a/src/applications/demos/CMakeLists.txt b/src/applications/demos/CMakeLists.txt index ad017b5f..8df9c9ee 100644 --- a/src/applications/demos/CMakeLists.txt +++ b/src/applications/demos/CMakeLists.txt @@ -19,8 +19,8 @@ add_executable(simple_replicated_objects_json_file simple_replicated_objects_jso target_link_libraries(simple_replicated_objects_json_file derecho) # overlapping_replicated_objects -add_executable(overlapping_replicated_objects overlapping_replicated_objects.cpp) -target_link_libraries(overlapping_replicated_objects derecho) +add_executable(simple_replicated_objects_overlap_json_file simple_replicated_objects_overlap_json_file.cpp) +target_link_libraries(simple_replicated_objects_overlap_json_file derecho) add_executable(signed_store_mockup signed_store_mockup.cpp) target_link_libraries(signed_store_mockup derecho) diff --git a/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp b/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp new file mode 100644 index 00000000..4473fbde --- /dev/null +++ b/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp @@ -0,0 +1,87 @@ +/** + * @file simple_replicated_objects.cpp + * + * This test creates two subgroups, one of each type Foo and Bar (defined in sample_objects.h). + * It requires at least 6 nodes to join the group; the first three are part of the Foo subgroup, + * while the next three are part of the Bar subgroup. + * Every node (identified by its node_id) makes some calls to ordered_send in their subgroup; + * some also call p2p_send. By these calls they verify that the state machine operations are + * executed properly. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sample_objects.hpp" +#include +#include + +using derecho::ExternalCaller; +using derecho::Replicated; +using std::cout; +using std::endl; + +int main(int argc, char** argv) { + // Read configurations from the command line options as well as the default config file + derecho::Conf::initialize(argc, argv); + + //Define subgroup membership using the default subgroup allocator function + //Each Replicated type will have one subgroup and one shard, with three members in the shard + derecho::SubgroupInfo subgroup_function{derecho::construct_DSA_with_layout_path( + derecho::getConfString(CONF_DERECHO_JSON_LAYOUT_PATH))}; + //Each replicated type needs a factory; this can be used to supply constructor arguments + //for the subgroup's initial state. These must take a PersistentRegistry* argument, but + //in this case we ignore it because the replicated objects aren't persistent. + auto foo_factory = [](persistent::PersistentRegistry*, derecho::subgroup_id_t) { return std::make_unique(-1); }; + auto bar_factory = [](persistent::PersistentRegistry*, derecho::subgroup_id_t) { return std::make_unique(); }; + + derecho::Group group(derecho::UserMessageCallbacks{}, subgroup_function, {}, + std::vector{}, + foo_factory, bar_factory); + + cout << "Finished constructing/joining Group" << endl; + + //Now have each node send some updates to the Replicated objects + //The code must be different depending on which subgroup this node is in, + //which we can determine based on which membership list it appears in + uint32_t my_id = derecho::getConfUInt32(CONF_DERECHO_LOCAL_ID); + std::vector foo_members = group.get_subgroup_members(0)[0]; + std::vector bar_members = group.get_subgroup_members(0)[0]; + auto find_in_foo_results = std::find(foo_members.begin(), foo_members.end(), my_id); + if(find_in_foo_results != foo_members.end()) { + uint32_t rank_in_foo = std::distance(foo_members.begin(), find_in_foo_results); + Replicated& foo_rpc_handle = group.get_subgroup(); + if(rank_in_foo == 0) { + dbg_default_crit("Here is FOO {}!", rank_in_foo); + } else if(rank_in_foo == 1) { + dbg_default_crit("Here is FOO {}!", rank_in_foo); + } else if(rank_in_foo == 2) { + dbg_default_crit("Here is FOO {}!", rank_in_foo); + } else if(rank_in_foo == 3) { + dbg_default_crit("Here is FOO {}!", rank_in_foo); + } + } + auto find_in_bar_results = std::find(bar_members.begin(), bar_members.end(), my_id); + if(find_in_bar_results != bar_members.end()) { + uint32_t rank_in_bar = derecho::index_of(bar_members, my_id); + Replicated& bar_rpc_handle = group.get_subgroup(); + if(rank_in_bar == 0) { + dbg_default_crit("Here is BAR {}!", rank_in_bar); + } else if(rank_in_bar == 1) { + dbg_default_crit("Here is BAR {}!", rank_in_bar); + } else if(rank_in_bar == 2) { + dbg_default_crit("Here is BAR {}!", rank_in_bar); + } else if(rank_in_bar == 3) { + dbg_default_crit("Here is BAR {}!", rank_in_bar); + } + } + + cout << "Reached end of main(), entering infinite loop so program doesn't exit" << std::endl; + while(true) { + } +} From 36fe0050cce687477e29cfd4f49b3125d06278f1 Mon Sep 17 00:00:00 2001 From: Panlichen Date: Wed, 26 May 2021 03:31:13 -0600 Subject: [PATCH 16/26] bug fix with curr_members curr_member_set --- include/derecho/core/subgroup_functions.hpp | 8 +- ...e_replicated_objects_overlap_json_file.cpp | 4 +- src/core/subgroup_functions.cpp | 149 ++++++++++-------- 3 files changed, 88 insertions(+), 73 deletions(-) diff --git a/include/derecho/core/subgroup_functions.hpp b/include/derecho/core/subgroup_functions.hpp index f84ffe3f..15c534f3 100644 --- a/include/derecho/core/subgroup_functions.hpp +++ b/include/derecho/core/subgroup_functions.hpp @@ -311,7 +311,9 @@ class DefaultSubgroupAllocator { subgroup_shard_layout_t allocate_standard_subgroup_type( const std::type_index subgroup_type, View& curr_view, - const std::map>>& shard_sizes) const; + const std::map>>& shard_sizes, + const std::vector& curr_members, + const std::set& curr_member_set) const; /** * Creates and returns a new membership allocation for a single subgroup @@ -331,7 +333,9 @@ class DefaultSubgroupAllocator { View& curr_view, const std::map>>& shard_sizes, const std::set& survive_member_set, - const std::set& added_member_set) const; + const std::set& added_member_set, + const std::vector& curr_members, + const std::set& curr_member_set) const; /** * Helper function that implements the subgroup allocation algorithm for all diff --git a/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp b/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp index 4473fbde..bee1d75f 100644 --- a/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp +++ b/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp @@ -55,7 +55,7 @@ int main(int argc, char** argv) { auto find_in_foo_results = std::find(foo_members.begin(), foo_members.end(), my_id); if(find_in_foo_results != foo_members.end()) { uint32_t rank_in_foo = std::distance(foo_members.begin(), find_in_foo_results); - Replicated& foo_rpc_handle = group.get_subgroup(); + // Replicated& foo_rpc_handle = group.get_subgroup(); if(rank_in_foo == 0) { dbg_default_crit("Here is FOO {}!", rank_in_foo); } else if(rank_in_foo == 1) { @@ -69,7 +69,7 @@ int main(int argc, char** argv) { auto find_in_bar_results = std::find(bar_members.begin(), bar_members.end(), my_id); if(find_in_bar_results != bar_members.end()) { uint32_t rank_in_bar = derecho::index_of(bar_members, my_id); - Replicated& bar_rpc_handle = group.get_subgroup(); + // Replicated& bar_rpc_handle = group.get_subgroup(); if(rank_in_bar == 0) { dbg_default_crit("Here is BAR {}!", rank_in_bar); } else if(rank_in_bar == 1) { diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 8941cf71..28c42d74 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -124,14 +124,45 @@ void DefaultSubgroupAllocator::compute_standard_memberships( = compute_standard_shard_sizes(subgroup_type_order, prev_view, curr_view); //Now we can go through and actually allocate nodes to each shard, //knowing exactly how many nodes they will get + + dbg_default_debug("Ready to really assign nodes"); if(!prev_view) { + /** allocate_standard_subgroup_type is invoked when we have no prev_view, and thus next_unassigned_rank is 0. + * If we have reserved node_ids, we need to rearrange node_ids in curr_view.members into two "parts": + * the first part holds current active reserved node_ids, + * while the second part holds normal node_ids. + * We then rearrange next_unassigned_rank to be the length of the first part, for nodes in the first part + * are inherent nodes for some shards, and sure will be assigned, + * sometimes more than once if we want to overlap shards. + */ + // We cannot modify curr_view.members inplace, which will corrupt curr_view.my_rank, curr_view.node_id_to_rank, etc. Besides, View::members is const. + std::vector curr_members; + std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); + dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); + if(all_reserved_node_ids.size() > 0) { + std::set_intersection( + curr_member_set.begin(), curr_member_set.end(), + all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + std::inserter(curr_members, curr_members.end())); + curr_view.next_unassigned_rank = curr_members.size(); + dbg_default_debug("After rearranging inherent node_ids, curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); + std::set_difference( + curr_member_set.begin(), curr_member_set.end(), + all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + std::inserter(curr_members, curr_members.end())); + } else { + curr_members = curr_view.members; + } + for(const auto& subgroup_type : subgroup_type_order) { //Ignore cross-product-allocated types if(!std::holds_alternative(policies.at(subgroup_type))) { continue; } dbg_default_debug("Without prev_view, assign node to type {}", std::string(subgroup_type.name())); - subgroup_layouts[subgroup_type] = allocate_standard_subgroup_type(subgroup_type, curr_view, shard_sizes); + + subgroup_layouts[subgroup_type] = allocate_standard_subgroup_type( + subgroup_type, curr_view, shard_sizes, curr_members, curr_member_set); } } else { /** survive_member_set holds non-failed node_ids from prev_view, added_member_set holds @@ -139,6 +170,42 @@ void DefaultSubgroupAllocator::compute_standard_memberships( */ std::set survive_member_set(curr_view.members.begin(), curr_view.members.begin() + curr_view.next_unassigned_rank); std::set added_member_set(curr_view.members.begin() + curr_view.next_unassigned_rank, curr_view.members.end()); + + /** update_standard_subgroup_type is invoked if there is a prev_view, and the curr_view.members is already + * arranged into two parts: the first part hold survive nodes from prev_view, and the second + * part holds newly added nodes. The next_unassigned_rank is the length of the first part. + * If we have reserved node_ids, we need to rearrnage curr_view.members into 2 parts: + * the first part holds inherent node_ids for shards, which is composed with survive node_ids and reserved node_ids + * the second part holds newly added non-reserved node_ids. + * We then rearrange next_unassigned_rank to be the length of the first part, for they will sure be assigned. + */ + std::vector curr_members; + std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); + dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); + if(all_reserved_node_ids.size() > 0) { + std::set active_reserved_node_id_set; + std::set_intersection( + curr_member_set.begin(), curr_member_set.end(), + all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.end())); + std::set_union( + survive_member_set.begin(), survive_member_set.end(), + active_reserved_node_id_set.begin(), active_reserved_node_id_set.end(), + std::inserter(curr_members, curr_members.end())); + dbg_default_debug("With inherent nodes, curr_members is:"); + print_set(curr_members); + + curr_view.next_unassigned_rank = curr_members.size(); + dbg_default_debug("After rearranging inherent node_ids, curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); + + std::set_difference( + added_member_set.begin(), added_member_set.end(), + all_reserved_node_ids.begin(), all_reserved_node_ids.end(), + std::inserter(curr_members, curr_members.end())); + dbg_default_debug("Adding newly added non-reserved nodes, curr_members is:"); + print_set(curr_members); + } + for(uint32_t subgroup_type_id = 0; subgroup_type_id < subgroup_type_order.size(); ++subgroup_type_id) { //We need to both iterate through this vector and keep the counter in order to know the type IDs @@ -147,10 +214,12 @@ void DefaultSubgroupAllocator::compute_standard_memberships( continue; } dbg_default_debug("With prev_view, assingn node to type {}", std::string(subgroup_type.name())); - subgroup_layouts[subgroup_type] = update_standard_subgroup_type(subgroup_type, subgroup_type_id, - prev_view, curr_view, shard_sizes, - survive_member_set, - added_member_set); + subgroup_layouts[subgroup_type] = update_standard_subgroup_type( + subgroup_type, subgroup_type_id, + prev_view, curr_view, shard_sizes, + survive_member_set, + added_member_set, + curr_members, curr_member_set); } } } @@ -315,40 +384,15 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_type( const std::type_index subgroup_type, View& curr_view, - const std::map>>& shard_sizes) const { + const std::map>>& shard_sizes, + const std::vector& curr_members, + const std::set& curr_member_set) const { //The size of shard_sizes[subgroup_type] is the number of subgroups of this type subgroup_shard_layout_t subgroup_allocation(shard_sizes.at(subgroup_type).size()); - /** This function is invoked when we have no prev_view, and thus next_unassigned_rank is 0. - * If we have reserved node_ids, we need to rearrange node_ids in curr_view.members into two "parts": - * the first part holds current active reserved node_ids, - * while the second part holds normal node_ids. - * We then rearrange next_unassigned_rank to be the length of the first part, for nodes in the first part - * are inherent nodes for some shards, and sure will be assigned, - * sometimes more than once if we want to overlap shards. - */ - // We cannot modify curr_view.members inplace, which will corrupt curr_view.my_rank, curr_view.node_id_to_rank, etc. Besides, View::members is const. - std::vector curr_members; - std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); const SubgroupAllocationPolicy& subgroup_type_policy = std::get(policies.at(subgroup_type)); - dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); - if(all_reserved_node_ids.size() > 0) { - std::set_intersection( - curr_member_set.begin(), curr_member_set.end(), - all_reserved_node_ids.begin(), all_reserved_node_ids.end(), - std::inserter(curr_members, curr_members.end())); - curr_view.next_unassigned_rank = curr_members.size(); - dbg_default_debug("After rearranging inherent node_ids, curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); - std::set_difference( - curr_member_set.begin(), curr_member_set.end(), - all_reserved_node_ids.begin(), all_reserved_node_ids.end(), - std::inserter(curr_members, curr_members.end())); - } else { - curr_members = curr_view.members; - } - for(uint32_t subgroup_num = 0; subgroup_num < subgroup_allocation.size(); ++subgroup_num) { //The size of shard_sizes[subgroup_type][subgroup_num] is the number of shards for(uint32_t shard_num = 0; shard_num < shard_sizes.at(subgroup_type)[subgroup_num].size(); @@ -413,7 +457,9 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( View& curr_view, const std::map>>& shard_sizes, const std::set& survive_member_set, - const std::set& added_member_set) const { + const std::set& added_member_set, + const std::vector& curr_members, + const std::set& curr_member_set) const { /* Subgroups of the same type will have contiguous IDs because they were created in order. * So the previous assignment is the slice of the previous subgroup_shard_views vector * starting at the first subgroup's ID, and extending for num_subgroups entries. @@ -421,17 +467,6 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( const subgroup_id_t previous_assignment_offset = prev_view->subgroup_ids_by_type_id.at(subgroup_type_id)[0]; subgroup_shard_layout_t next_assignment(shard_sizes.at(subgroup_type).size()); - /** This function is invoked if there is a prev_view, and the curr_view.members is already - * arranged into two parts: the first part hold survive nodes from prev_view, and the second - * part holds newly added nodes. The next_unassigned_rank is the length of the first part. - * If we have reserved node_ids, we need to rearrnage curr_view.members into 2 parts: - * the first part holds inherent node_ids for shards, which is composed with survive node_ids and reserved node_ids - * the second part holds newly added non-reserved node_ids. - * We then rearrange next_unassigned_rank to be the length of the first part, for they will sure be assigned. - */ - std::vector curr_members; - std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); - dbg_default_debug("The survive_member_set is:"); print_set(survive_member_set); dbg_default_debug("The added_member_set is:"); @@ -439,30 +474,6 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( const SubgroupAllocationPolicy& subgroup_type_policy = std::get(policies.at(subgroup_type)); - dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); - if(all_reserved_node_ids.size() > 0) { - std::set active_reserved_node_id_set; - std::set_intersection( - curr_member_set.begin(), curr_member_set.end(), - all_reserved_node_ids.begin(), all_reserved_node_ids.end(), - std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.end())); - std::set_union( - survive_member_set.begin(), survive_member_set.end(), - active_reserved_node_id_set.begin(), active_reserved_node_id_set.end(), - std::inserter(curr_members, curr_members.end())); - dbg_default_debug("With inherent nodes, curr_members is:"); - print_set(curr_members); - - curr_view.next_unassigned_rank = curr_members.size(); - dbg_default_debug("After rearranging inherent node_ids, curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); - - std::set_difference( - added_member_set.begin(), added_member_set.end(), - all_reserved_node_ids.begin(), all_reserved_node_ids.end(), - std::inserter(curr_members, curr_members.end())); - dbg_default_debug("Adding newly added non-reserved nodes, curr_members is:"); - print_set(curr_members); - } for(uint32_t subgroup_num = 0; subgroup_num < next_assignment.size(); ++subgroup_num) { //The size of shard_sizes[subgroup_type][subgroup_num] is the number of shards From a1a6e82048067a5ba96fc6aa01926e4ec0bb9835 Mon Sep 17 00:00:00 2001 From: Panlichen Date: Wed, 26 May 2021 04:56:38 -0600 Subject: [PATCH 17/26] polish overlap test --- ...e_replicated_objects_overlap_json_file.cpp | 38 +++++++++---------- src/core/subgroup_functions.cpp | 5 --- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp b/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp index bee1d75f..f17aa560 100644 --- a/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp +++ b/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp @@ -10,10 +10,10 @@ */ #include #include -#include #include #include #include +#include #include #include @@ -26,6 +26,16 @@ using derecho::Replicated; using std::cout; using std::endl; +void print_set(const std::vector& uset) { + std::stringstream stream; + for(auto thing : uset) { + stream << thing << ' '; + } + + std::string out = stream.str(); + dbg_default_debug(out); +} + int main(int argc, char** argv) { // Read configurations from the command line options as well as the default config file derecho::Conf::initialize(argc, argv); @@ -56,29 +66,17 @@ int main(int argc, char** argv) { if(find_in_foo_results != foo_members.end()) { uint32_t rank_in_foo = std::distance(foo_members.begin(), find_in_foo_results); // Replicated& foo_rpc_handle = group.get_subgroup(); - if(rank_in_foo == 0) { - dbg_default_crit("Here is FOO {}!", rank_in_foo); - } else if(rank_in_foo == 1) { - dbg_default_crit("Here is FOO {}!", rank_in_foo); - } else if(rank_in_foo == 2) { - dbg_default_crit("Here is FOO {}!", rank_in_foo); - } else if(rank_in_foo == 3) { - dbg_default_crit("Here is FOO {}!", rank_in_foo); - } - } + dbg_default_crit("Here is FOO {}!", rank_in_foo); + dbg_default_crit("I see members of my shard:"); + print_set(foo_members); + } auto find_in_bar_results = std::find(bar_members.begin(), bar_members.end(), my_id); if(find_in_bar_results != bar_members.end()) { uint32_t rank_in_bar = derecho::index_of(bar_members, my_id); // Replicated& bar_rpc_handle = group.get_subgroup(); - if(rank_in_bar == 0) { - dbg_default_crit("Here is BAR {}!", rank_in_bar); - } else if(rank_in_bar == 1) { - dbg_default_crit("Here is BAR {}!", rank_in_bar); - } else if(rank_in_bar == 2) { - dbg_default_crit("Here is BAR {}!", rank_in_bar); - } else if(rank_in_bar == 3) { - dbg_default_crit("Here is BAR {}!", rank_in_bar); - } + dbg_default_crit("Here is BAR {}!", rank_in_bar); + dbg_default_crit("I see members of my shard:"); + print_set(bar_members); } cout << "Reached end of main(), entering infinite loop so program doesn't exit" << std::endl; diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 28c42d74..ce1db26f 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -500,11 +500,6 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( //Add newly added reserved nodes - dbg_default_debug("The total added_member_set is:"); - print_set(added_member_set); - dbg_default_debug("Current shard reserved node_id set is:"); - print_set(subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num]); - std::set added_reserved_node_id_set; std::set_intersection( added_member_set.begin(), added_member_set.end(), From 7f2ea4d36d0afde51f0cac557e1d87bcf9a40f3c Mon Sep 17 00:00:00 2001 From: Panlichen Date: Wed, 26 May 2021 05:08:20 -0600 Subject: [PATCH 18/26] print in overlap test --- .../demos/simple_replicated_objects_overlap_json_file.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp b/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp index f17aa560..2d3e9864 100644 --- a/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp +++ b/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp @@ -33,7 +33,7 @@ void print_set(const std::vector& uset) { } std::string out = stream.str(); - dbg_default_debug(out); + dbg_default_crit(out); } int main(int argc, char** argv) { From 02096d62bc2023296ec78bbc0e968db5e2618992 Mon Sep 17 00:00:00 2001 From: Panlichen Date: Mon, 31 May 2021 12:20:51 -0400 Subject: [PATCH 19/26] bug fix when reserved_node_id_by_shard is not configured --- src/core/subgroup_functions.cpp | 83 +++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 34 deletions(-) diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index ce1db26f..45361e51 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -297,12 +297,16 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( // Check whehter this shard reserve existing nodes. std::set active_reserved_node_id_set; - std::set_intersection( - sharding_policy.reserved_node_id_by_shard[shard_num].begin(), - sharding_policy.reserved_node_id_by_shard[shard_num].end(), - curr_member_set.begin(), - curr_member_set.end(), - std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.begin())); + if(sharding_policy.reserved_node_id_by_shard.size() > 0) { + std::set_intersection( + sharding_policy.reserved_node_id_by_shard[shard_num].begin(), + sharding_policy.reserved_node_id_by_shard[shard_num].end(), + curr_member_set.begin(), + curr_member_set.end(), + std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.begin())); + } else { + dbg_default_debug("There is no reserved node_id configured."); + } dbg_default_debug("The active_reserved_node_id_set for current shard is: "); print_set(active_reserved_node_id_set); @@ -403,17 +407,21 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ dbg_default_debug("For subgroup {}, shard {}, it is assigned {} nodes", subgroup_num, shard_num, shard_size); // Allocate active reserved nodes first. - const std::set reserved_node_id_set( - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].begin(), - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].end()); - if(reserved_node_id_set.size() > 0) { - std::set_intersection( - reserved_node_id_set.begin(), reserved_node_id_set.end(), - curr_member_set.begin(), curr_member_set.end(), - std::inserter(desired_nodes, desired_nodes.end())); - shard_size -= desired_nodes.size(); - dbg_default_debug("Assign it {} active reserved nodes:", desired_nodes.size()); - print_set(std::set(desired_nodes.begin(), desired_nodes.end())); + if(subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard.size() > 0) { + const std::set reserved_node_id_set( + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].begin(), + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].end()); + if(reserved_node_id_set.size() > 0) { + std::set_intersection( + reserved_node_id_set.begin(), reserved_node_id_set.end(), + curr_member_set.begin(), curr_member_set.end(), + std::inserter(desired_nodes, desired_nodes.end())); + shard_size -= desired_nodes.size(); + dbg_default_debug("Assign it {} active reserved nodes:", desired_nodes.size()); + print_set(std::set(desired_nodes.begin(), desired_nodes.end())); + } + } else { + dbg_default_debug("There is no reserved node_id configured."); } //Grab the next shard_size nodes @@ -499,24 +507,27 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( print_set(next_shard_members); //Add newly added reserved nodes - - std::set added_reserved_node_id_set; - std::set_intersection( - added_member_set.begin(), added_member_set.end(), - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].begin(), - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].end(), - std::inserter(added_reserved_node_id_set, added_reserved_node_id_set.end())); - if(added_reserved_node_id_set.size() > 0) { - dbg_default_debug("The added_reserved_node_id_set is not empty:"); - print_set(added_reserved_node_id_set); - - for(auto node_id : added_reserved_node_id_set) { - next_shard_members.push_back(node_id); - next_is_sender.push_back(true); + if(subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard.size() > 0) { + std::set added_reserved_node_id_set; + std::set_intersection( + added_member_set.begin(), added_member_set.end(), + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].begin(), + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].end(), + std::inserter(added_reserved_node_id_set, added_reserved_node_id_set.end())); + if(added_reserved_node_id_set.size() > 0) { + dbg_default_debug("The added_reserved_node_id_set is not empty:"); + print_set(added_reserved_node_id_set); + + for(auto node_id : added_reserved_node_id_set) { + next_shard_members.push_back(node_id); + next_is_sender.push_back(true); + } } + dbg_default_debug("After assigning newly added reserved nodes, we get {} inherent node_id(s) assigned, next_shard_members is:", next_shard_members.size()); + print_set(next_shard_members); + } else { + dbg_default_debug("There is no reserved node_id configured."); } - dbg_default_debug("After assigning newly added reserved nodes, we get {} inherent node_id(s) assigned, next_shard_members is:", next_shard_members.size()); - print_set(next_shard_members); //Add additional members if needed while(next_shard_members.size() < allocated_shard_size) { @@ -630,7 +641,9 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf, s for(auto subgroup_it : jconf[JSON_CONF_LAYOUT]) { ShardAllocationPolicy shard_allocation_policy; size_t num_shards = subgroup_it[MIN_NODES_BY_SHARD].size(); - if(subgroup_it[MAX_NODES_BY_SHARD].size() != num_shards || subgroup_it[DELIVERY_MODES_BY_SHARD].size() != num_shards || subgroup_it[PROFILES_BY_SHARD].size() != num_shards || subgroup_it[RESERVED_NODE_ID_BY_SHRAD].size() != num_shards) { + if(subgroup_it[MAX_NODES_BY_SHARD].size() != num_shards || subgroup_it[DELIVERY_MODES_BY_SHARD].size() != num_shards || subgroup_it[PROFILES_BY_SHARD].size() != num_shards || + // "reserved_node_id_by_shard" is not a mandatory field + (subgroup_it[RESERVED_NODE_ID_BY_SHRAD].size() != 0 && subgroup_it[RESERVED_NODE_ID_BY_SHRAD].size() != num_shards)) { dbg_default_error("parse_json_subgroup_policy: shards does not match in at least one subgroup: {}", subgroup_it.get()); throw derecho::derecho_exception("parse_json_subgroup_policy: shards does not match in at least one subgroup:" + subgroup_it.get()); @@ -656,6 +669,8 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf, s for(auto reserved_id_set : shard_allocation_policy.reserved_node_id_by_shard) { std::set_union(all_reserved_node_ids.begin(), all_reserved_node_ids.end(), reserved_id_set.begin(), reserved_id_set.end(), std::inserter(all_reserved_node_ids, all_reserved_node_ids.begin())); } + } else { + dbg_default_debug("There is no reserved node_id configured."); } subgroup_allocation_policy.shard_policy_by_subgroup.emplace_back(std::move(shard_allocation_policy)); From 9dd8b526395964f4a6651351c33528f79f6c313c Mon Sep 17 00:00:00 2001 From: Panlichen Date: Mon, 7 Jun 2021 11:27:48 -0400 Subject: [PATCH 20/26] Check whether shards in the same subgroup share reserved node_ids. --- .../core/detail/subgroup_functions_impl.hpp | 21 +++++-- src/core/subgroup_functions.cpp | 63 ++++++++++++++++++- 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/include/derecho/core/detail/subgroup_functions_impl.hpp b/include/derecho/core/detail/subgroup_functions_impl.hpp index 8a86aaac..b691a544 100644 --- a/include/derecho/core/detail/subgroup_functions_impl.hpp +++ b/include/derecho/core/detail/subgroup_functions_impl.hpp @@ -24,14 +24,26 @@ namespace derecho { #define DELIVERY_MODE_RAW "Raw" #define PROFILES_BY_SHARD "profiles_by_shard" /** - * derecho_parse_json_subgroup_policy() - * * Generate a single-type subgroup allocation policy from json string - * @param json_config subgroup configuration represented in json format. + * @param jconf subgroup configuration represented in json format. + * @param all_reserved_node_ids a set that holds the union of all reserved node_ids. * @return SubgroupAllocationPolicy */ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json&, std::set&); +/** + * TODO: If we just need to check shards within one subgroup, this function is redundant. + * Make sure that no shards inside a subgroup reserve same node_ids. Shards in + * different subgroups of one same type or from different types can share nodes, + * and this why we use the reserved_node_id feature. + * For example, we can assign 2 subgroups for type "PersistentCascadeStoreWithStringKey" + * to store data and model respectively for an ML application, and actually reserve + * the same node_ids for shards in this two subgroup. This way the data and the model + * coexist in the same node, thus delivering performance gains. + * @param dsa_map the subgroup allocation map derived from json configuration. + */ +void check_reserved_node_id_pool(const std::map>&); + template void print_set(const std::set& uset) { std::stringstream stream; @@ -89,7 +101,6 @@ DefaultSubgroupAllocator construct_DSA_with_layout_path(const std::string& layou std::ifstream json_layout_stream(layout_path.c_str()); if(!json_layout_stream) { throw derecho_exception("The json layout file " + layout_path + " not found."); - // TODO: do we need further actions like return something? } json_layout_stream >> layout; @@ -104,4 +115,4 @@ DefaultSubgroupAllocator construct_DSA_with_layout_path(const std::string& layou return DefaultSubgroupAllocator(dsa_map, all_reserved_node_ids); } -} /* namespace derecho */ \ No newline at end of file +} /* namespace derecho */ diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 45361e51..d44c22e4 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -4,6 +4,7 @@ * @date Feb 28, 2017 */ +#include #include #include @@ -132,7 +133,7 @@ void DefaultSubgroupAllocator::compute_standard_memberships( * the first part holds current active reserved node_ids, * while the second part holds normal node_ids. * We then rearrange next_unassigned_rank to be the length of the first part, for nodes in the first part - * are inherent nodes for some shards, and sure will be assigned, + * are inherent nodes for some shards, and sure will be assigned, * sometimes more than once if we want to overlap shards. */ // We cannot modify curr_view.members inplace, which will corrupt curr_view.my_rank, curr_view.node_id_to_rank, etc. Besides, View::members is const. @@ -166,7 +167,7 @@ void DefaultSubgroupAllocator::compute_standard_memberships( } } else { /** survive_member_set holds non-failed node_ids from prev_view, added_member_set holds - * newly added node_ids in curr_view. + * newly added node_ids in curr_view. */ std::set survive_member_set(curr_view.members.begin(), curr_view.members.begin() + curr_view.next_unassigned_rank); std::set added_member_set(curr_view.members.begin() + curr_view.next_unassigned_rank, curr_view.members.end()); @@ -272,7 +273,7 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( : sharding_policy.min_num_nodes_by_shard[shard_num]; /** With reserved nodes, we do not assign nodes evenly across shards. - * All current nodes may be occupied by 1 shard because it reserved all of them. + * All current nodes may be occupied by 1 shard because it reserved all of them. * Therefore we need to check if min_shard_size for each shard is satisfied, * and thus we need to maintain nodes_needed more carefully. */ @@ -646,6 +647,7 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf, s (subgroup_it[RESERVED_NODE_ID_BY_SHRAD].size() != 0 && subgroup_it[RESERVED_NODE_ID_BY_SHRAD].size() != num_shards)) { dbg_default_error("parse_json_subgroup_policy: shards does not match in at least one subgroup: {}", subgroup_it.get()); + throw derecho::derecho_exception("parse_json_subgroup_policy: shards does not match in at least one subgroup:" + subgroup_it.get()); } shard_allocation_policy.even_shards = false; @@ -669,6 +671,31 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf, s for(auto reserved_id_set : shard_allocation_policy.reserved_node_id_by_shard) { std::set_union(all_reserved_node_ids.begin(), all_reserved_node_ids.end(), reserved_id_set.begin(), reserved_id_set.end(), std::inserter(all_reserved_node_ids, all_reserved_node_ids.begin())); } + /** + * Make sure that no shards inside a subgroup reserve same node_ids. Shards in + * different subgroups of one same type or from different types can share nodes, + * and this why we use the reserved_node_id feature. + * For example, we can assign 2 subgroups for type "PersistentCascadeStoreWithStringKey" + * to store data and model respectively for an ML application, and actually reserve + * the same node_ids for shards in this two subgroup. This way the data and the model + * coexist in the same node, thus delivering performance gains. + * @param dsa_map the subgroup allocation map derived from json configuration. + */ + std::set intersect_reserved_node_ids_in_subgroup; + std::set temp(shard_allocation_policy.reserved_node_id_by_shard[0]); + for(int shard_num = 1; shard_num < shard_allocation_policy.num_shards; ++shard_num) { + intersect_reserved_node_ids_in_subgroup = std::set(); + std::set_intersection( + temp.begin(), temp.end(), + shard_allocation_policy.reserved_node_id_by_shard[shard_num].begin(), + shard_allocation_policy.reserved_node_id_by_shard[shard_num].end(), + std::inserter(intersect_reserved_node_ids_in_subgroup, + intersect_reserved_node_ids_in_subgroup.begin())); + } + // Shards in this subgroup have same reserved node_ids. + if(intersect_reserved_node_ids_in_subgroup.size() > 0) { + throw derecho_exception("Shards in one subgroup have same reserved node_ids!"); + } } else { dbg_default_debug("There is no reserved node_id configured."); } @@ -678,4 +705,34 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf, s return subgroup_allocation_policy; } +void check_reserved_node_id_pool(const std::map>& dsa_map) { + for(auto& item : dsa_map) { + if(!std::holds_alternative(item.second)) { + continue; + } + const SubgroupAllocationPolicy& subgroup_type_policy + = std::get(item.second); + for(int subgroup_num = 0; subgroup_num < subgroup_type_policy.num_subgroups; ++subgroup_num) { + const ShardAllocationPolicy& sharding_policy = subgroup_type_policy.shard_policy_by_subgroup[subgroup_num]; + if(sharding_policy.reserved_node_id_by_shard.size() > 0) { + std::set intersect_reserved_node_ids_in_subgroup; + std::set temp(sharding_policy.reserved_node_id_by_shard[0]); + for(int shard_num = 1; shard_num < sharding_policy.num_shards; ++shard_num) { + intersect_reserved_node_ids_in_subgroup = std::set(); + std::set_intersection( + temp.begin(), temp.end(), + sharding_policy.reserved_node_id_by_shard[shard_num].begin(), + sharding_policy.reserved_node_id_by_shard[shard_num].end(), + std::inserter(intersect_reserved_node_ids_in_subgroup, + intersect_reserved_node_ids_in_subgroup.begin())); + } + // Shards in this subgroup have same reserved node_ids. + if(intersect_reserved_node_ids_in_subgroup.size() > 0) { + throw derecho_exception("Shards in one subgroup have same reserved node_ids!"); + } + } + } + } +} + } // namespace derecho From f9ad56780ca7cdb3f05d29098cc8b2e15847801e Mon Sep 17 00:00:00 2001 From: Panlichen Date: Mon, 7 Jun 2021 11:32:35 -0400 Subject: [PATCH 21/26] fix careless comment --- src/core/subgroup_functions.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index d44c22e4..7a30d089 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -679,7 +679,6 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf, s * to store data and model respectively for an ML application, and actually reserve * the same node_ids for shards in this two subgroup. This way the data and the model * coexist in the same node, thus delivering performance gains. - * @param dsa_map the subgroup allocation map derived from json configuration. */ std::set intersect_reserved_node_ids_in_subgroup; std::set temp(shard_allocation_policy.reserved_node_id_by_shard[0]); From ebe3972abe5d4d691e0eb015037975d4bb40c38b Mon Sep 17 00:00:00 2001 From: Edward Tremel Date: Tue, 15 Jun 2021 17:01:03 -0400 Subject: [PATCH 22/26] Added the json library to official prerequisites Since we now depend on the nlohmann-json library, we should add it to CMakeLists as a dependency and include an install script in the prerequisites directory. Also, updated the README to describe this dependency, and fixed the version number in the description of libfabric. --- CMakeLists.txt | 5 +++-- README.md | 5 +++-- scripts/prerequisites/install-json.sh | 18 ++++++++++++++++++ src/core/git_version.cpp | 4 ++-- 4 files changed, 26 insertions(+), 6 deletions(-) create mode 100755 scripts/prerequisites/install-json.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f1f82a5..d0e7dfa2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ find_package(spdlog 1.3.1 REQUIRED) find_package(OpenSSL 1.1.1 REQUIRED) # json -find_package(nlohmann_json 3.2.0 REQUIRED) +find_package(nlohmann_json 3.9.0 REQUIRED) add_subdirectory(src/mutils-serialization) add_subdirectory(src/conf) @@ -90,7 +90,8 @@ target_link_libraries(derecho ${mutils_LIBRARIES} ${mutils-containers_LIBRARIES} ${mutils-tasks_LIBRARIES} - ${OPENSSL_LIBRARIES}) + ${OPENSSL_LIBRARIES} + nlohmann_json::nlohmann_json) set_target_properties(derecho PROPERTIES SOVERSION ${derecho_VERSION} VERSION ${derecho_build_VERSION} diff --git a/README.md b/README.md index adef4582..b84c5fcf 100644 --- a/README.md +++ b/README.md @@ -52,8 +52,9 @@ Derecho is a library that helps you build replicated, fault-tolerant services in * The OpenSSL SSL/TLS Library. On Ubuntu and other Debian-like systems, you can install package `libssl-dev`. We tested with v1.1.1f. But it should work for any version >= 1.1.1. * The "rdmacm" and "ibverbs" system libraries for Linux, at version 17.1 or higher. On Ubuntu and other Debian-like systems, these are in the packages `librdmacm-dev` and `libibverbs-dev`. * [`spdlog`](https://github.com/gabime/spdlog), a logging library, v1.3.1 or newer. On Ubuntu 19.04 and later this can be installed with the package `libspdlog-dev`. The version of spdlog in Ubuntu 18.04's repositories is too old, but if you are running Ubuntu 18.04 you can download the `libspdlog-dev` package [here](http://old-releases.ubuntu.com/ubuntu/pool/universe/s/spdlog/libspdlog-dev_1.3.1-1_amd64.deb) and install it manually with no other dependencies needed. -* The Open Fabric Interface (OFI) library: [`libfabric`](https://github.com/ofiwg/libfabric). To avoid compatibility issue, please install `v1.7.0` from source code. ([Installation script](https://github.com/Derecho-Project/derecho/blob/master/scripts/prerequisites/install-libfabric.sh)) -* Matthew's C++ utilities +* The Open Fabric Interface (OFI) library: [`libfabric`](https://github.com/ofiwg/libfabric). Since this library's interface changes significantly between versions, please install `v1.12.1` from source rather than any packaged version. ([Installation script](https://github.com/Derecho-Project/derecho/blob/master/scripts/prerequisites/install-libfabric.sh)) +* Lohmann's [JSON for Modern C++](https://github.com/nlohmann/json) library, v3.9 or newer. This library is not packaged for Ubuntu, but can easily be installed with our [installation script](https://github.com/Derecho-Project/derecho/blob/master/scripts/prerequisites/install-json.sh). +* Matthew Milano's C++ utilities, which are all CMake libraries that can be installed with "make install": - [`mutils`](https://github.com/mpmilano/mutils) ([Installation script](https://github.com/Derecho-Project/derecho/blob/master/scripts/prerequisites/install-mutils.sh)) - [`mutils-containers`](https://github.com/mpmilano/mutils-containers) ([Installation script](https://github.com/Derecho-Project/derecho/blob/master/scripts/prerequisites/install-mutils-containers.sh)) - [`mutils-tasks`](https://github.com/mpmilano/mutils-tasks) ([Installation script](https://github.com/Derecho-Project/derecho/blob/master/scripts/prerequisites/install-mutils-tasks.sh)) diff --git a/scripts/prerequisites/install-json.sh b/scripts/prerequisites/install-json.sh new file mode 100755 index 00000000..362ef6e2 --- /dev/null +++ b/scripts/prerequisites/install-json.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -eu +export TMPDIR=/var/tmp +INSTALL_PREFIX="/usr/local" +if [[ $# -gt 0 ]]; then + INSTALL_PREFIX=$1 +fi + +echo "Using INSTALL_PREFIX=${INSTALL_PREFIX}" + +WORKPATH=`mktemp -d` +cd ${WORKPATH} +git clone https://github.com/nlohmann/json.git +cd json +git checkout v3.9.1 +cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} . +make -j `lscpu | grep "^CPU(" | awk '{print $2}'` +make install diff --git a/src/core/git_version.cpp b/src/core/git_version.cpp index d6f4b5ee..a5761dab 100644 --- a/src/core/git_version.cpp +++ b/src/core/git_version.cpp @@ -13,8 +13,8 @@ namespace derecho { const int MAJOR_VERSION = 2; const int MINOR_VERSION = 1; const int PATCH_VERSION = 0; -const int COMMITS_AHEAD_OF_VERSION = 101; +const int COMMITS_AHEAD_OF_VERSION = 146; const char* VERSION_STRING = "2.1.0"; -const char* VERSION_STRING_PLUS_COMMITS = "2.1.0+101"; +const char* VERSION_STRING_PLUS_COMMITS = "2.1.0+146"; } From fb24abcea8d125eb78c75cda11fc85f570d2d03d Mon Sep 17 00:00:00 2001 From: Edward Tremel Date: Tue, 29 Jun 2021 00:54:08 -0400 Subject: [PATCH 23/26] Assorted cosmetic improvements to the JSON layout system The "construct_DSA_xxx" functions have been replaced with similar functions named "make_subgroup_allocator" that are polymorphic on their argument type. Their implementation is now contained within DefaultSubgroupAllocator constructors that use vectors of type_index instead of template parameters; the make_subgroup_allocator functions serve only to convert the template parameters to vectors of type_index. I also added a new constructor that takes only the subgroup types, and guesses whether to use a JSON file path or a JSON string based on which option exists in the config file. This should allow you to write something like auto allocator = make_subgroup_allocator(); and let the config file do the rest. The Conf::initialize function now checks the JSON configuration options to see if the JSON file exists, or the JSON string is valid. This should catch configuration errors earlier and make it less likely that the DefaultSubgroupAllocator has to throw an exception because it can't use the configured JSON. Also, string macros have been replaced with string constants, and some low-level debugging messages have been changed from dbg_default_debug to dbg_default_trace. --- .../core/detail/subgroup_functions_impl.hpp | 118 -------- include/derecho/core/subgroup_functions.hpp | 206 ++++++++++++-- .../demos/simple_replicated_objects_json.cpp | 4 +- .../simple_replicated_objects_json_file.cpp | 2 +- ...e_replicated_objects_overlap_json_file.cpp | 2 +- src/conf/conf.cpp | 26 ++ src/core/subgroup_functions.cpp | 264 +++++++++++------- 7 files changed, 371 insertions(+), 251 deletions(-) delete mode 100644 include/derecho/core/detail/subgroup_functions_impl.hpp diff --git a/include/derecho/core/detail/subgroup_functions_impl.hpp b/include/derecho/core/detail/subgroup_functions_impl.hpp deleted file mode 100644 index b691a544..00000000 --- a/include/derecho/core/detail/subgroup_functions_impl.hpp +++ /dev/null @@ -1,118 +0,0 @@ -/** - * @file subgroup_functions_impl.h - * @brief Contains implementations of functions that parse json layout - * @date May 20, 2021 - */ - -#include "../subgroup_functions.hpp" -#include -#include -#include - -namespace derecho { - -/** - * defining key strings used in the layout json file. - */ -#define JSON_CONF_LAYOUT "layout" -#define JSON_CONF_TYPE_ALIAS "type_alias" -#define MIN_NODES_BY_SHARD "min_nodes_by_shard" -#define MAX_NODES_BY_SHARD "max_nodes_by_shard" -#define RESERVED_NODE_ID_BY_SHRAD "reserved_node_id_by_shard" -#define DELIVERY_MODES_BY_SHARD "delivery_modes_by_shard" -#define DELIVERY_MODE_ORDERED "Ordered" -#define DELIVERY_MODE_RAW "Raw" -#define PROFILES_BY_SHARD "profiles_by_shard" -/** - * Generate a single-type subgroup allocation policy from json string - * @param jconf subgroup configuration represented in json format. - * @param all_reserved_node_ids a set that holds the union of all reserved node_ids. - * @return SubgroupAllocationPolicy - */ -SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json&, std::set&); - -/** - * TODO: If we just need to check shards within one subgroup, this function is redundant. - * Make sure that no shards inside a subgroup reserve same node_ids. Shards in - * different subgroups of one same type or from different types can share nodes, - * and this why we use the reserved_node_id feature. - * For example, we can assign 2 subgroups for type "PersistentCascadeStoreWithStringKey" - * to store data and model respectively for an ML application, and actually reserve - * the same node_ids for shards in this two subgroup. This way the data and the model - * coexist in the same node, thus delivering performance gains. - * @param dsa_map the subgroup allocation map derived from json configuration. - */ -void check_reserved_node_id_pool(const std::map>&); - -template -void print_set(const std::set& uset) { - std::stringstream stream; - for(auto thing : uset) { - stream << thing << ' '; - } - - std::string out = stream.str(); - dbg_default_debug(out); -} -template -void print_set(const std::vector& uset) { - std::stringstream stream; - for(auto thing : uset) { - stream << thing << ' '; - } - - std::string out = stream.str(); - dbg_default_debug(out); -} - -template -void derecho_populate_policy_by_subgroup_type_map( - std::map>& dsa_map, - std::set& all_reserved_node_ids, - const json& layout, int type_idx) { - dsa_map.emplace(std::type_index(typeid(ReplicatedType)), derecho_parse_json_subgroup_policy(layout[type_idx], all_reserved_node_ids)); -} - -template -void derecho_populate_policy_by_subgroup_type_map( - std::map>& dsa_map, - std::set& all_reserved_node_ids, - const json& layout, int type_idx) { - dsa_map.emplace(std::type_index(typeid(FirstReplicatedType)), derecho_parse_json_subgroup_policy(layout[type_idx], all_reserved_node_ids)); - derecho_populate_policy_by_subgroup_type_map(dsa_map, all_reserved_node_ids, layout, type_idx + 1); -} - -template -DefaultSubgroupAllocator construct_DSA_with_layout(const json& layout) { - std::map> dsa_map; - - std::set all_reserved_node_ids; - - derecho_populate_policy_by_subgroup_type_map( - dsa_map, all_reserved_node_ids, layout, 0); - - return DefaultSubgroupAllocator(dsa_map, all_reserved_node_ids); -} - -template -DefaultSubgroupAllocator construct_DSA_with_layout_path(const std::string& layout_path) { - json layout; - - std::ifstream json_layout_stream(layout_path.c_str()); - if(!json_layout_stream) { - throw derecho_exception("The json layout file " + layout_path + " not found."); - } - - json_layout_stream >> layout; - - std::map> dsa_map; - - std::set all_reserved_node_ids; - - derecho_populate_policy_by_subgroup_type_map( - dsa_map, all_reserved_node_ids, layout, 0); - - return DefaultSubgroupAllocator(dsa_map, all_reserved_node_ids); -} - -} /* namespace derecho */ diff --git a/include/derecho/core/subgroup_functions.hpp b/include/derecho/core/subgroup_functions.hpp index 15c534f3..208dc6ff 100644 --- a/include/derecho/core/subgroup_functions.hpp +++ b/include/derecho/core/subgroup_functions.hpp @@ -1,7 +1,5 @@ /** - * @file subgroup_functions.h - * - * @date Feb 28, 2017 + * @file subgroup_functions.hpp */ #pragma once @@ -16,6 +14,7 @@ #include "derecho_modes.hpp" #include "detail/derecho_internal.hpp" #include "subgroup_info.hpp" +#include using json = nlohmann::json; @@ -47,6 +46,20 @@ constexpr char max_nodes_profile_field[] = "max_nodes"; /* It would be really nice if we could group these together in an enumerated class * called ProfileFields or something, but there's no way to do that with strings. */ +/* + * String constants for the names of JSON object fields that the JSON-based + * default subgroup allocator will look up + */ +constexpr char json_layout_field[] = "layout"; +constexpr char json_type_alias_field[] = "type_alias"; +constexpr char min_nodes_by_shard_field[] = "min_nodes_by_shard"; +constexpr char max_nodes_by_shard_field[] = "max_nodes_by_shard"; +constexpr char reserved_node_ids_by_shard_field[] = "reserved_node_id_by_shard"; +constexpr char delivery_modes_by_shard_field[] = "delivery_modes_by_shard"; +constexpr char delivery_mode_ordered[] = "Ordered"; +constexpr char delivery_mode_raw[] = "Raw"; +constexpr char profiles_by_shard_field[] = "profiles_by_shard"; + /** * A simple implementation of shard_view_generator_t that creates a single, * un-sharded subgroup containing all the members of curr_view for every subgroup @@ -99,14 +112,15 @@ struct ShardAllocationPolicy { * indicating which profile it should use. (Ignored if even_shards is * true). */ std::vector profiles_by_shard; - /** Only used when even_shards is false. - * For each shard, this stores a list of node ids reserved for it. When a - * new node comes with id inside the list, it will be added into the - * dedicated shard directly. Overlapping among shards can be realized by - * this mechanism. - * Need to use std::set instead of std::unordered_set to makesure set functions - * run correctly.*/ - std::vector> reserved_node_id_by_shard; + /** + * Only used when even_shards is false. + * For each shard, this stores a list of node IDs reserved for it. When a + * new node is added to the View with an ID on the list, it will always be + * added to its dedicated shard. A node ID can be reserved by more than one + * shard, as long as they are in different subgroups; this will make the + * subgroups overlap (colocate). + */ + std::vector> reserved_node_ids_by_shard; }; /** @@ -146,6 +160,11 @@ struct CrossProductPolicy { std::pair target_subgroup; }; +/** + * A type alias for a std::variant containing one of the possible subgroup policies. + */ +using SubgroupPolicyVariant = std::variant; + /* Helper functions that construct ShardAllocationPolicy values for common cases. */ /** @@ -269,12 +288,12 @@ class DefaultSubgroupAllocator { * CrossProductPolicy if that type should use the "cross-product" allocator * instead. */ - const std::map> policies; + std::map policies; /** * The union set of reserved_node_ids from all shards. */ - const std::set all_reserved_node_ids; + std::set all_reserved_node_ids; /** * Determines how many members each shard can have in the current view, based @@ -306,6 +325,8 @@ class DefaultSubgroupAllocator { * @param subgroup_type The subgroup type to allocate members for * @param curr_view The current view, whose next_unassigned_rank will be updated * @param shard_sizes The map of membership sizes for every subgroup and shard + * @param curr_members + * @param curr_member_set * @return A subgroup layout for this subgroup type */ subgroup_shard_layout_t allocate_standard_subgroup_type( @@ -324,6 +345,10 @@ class DefaultSubgroupAllocator { * @param prev_view The previous View, now known to be non-null * @param curr_view The current View, whose next_unassigned_rank will be updated * @param shard_sizes The map of membership sizes for every subgroup and shard in curr_view + * @param surviving_member_set + * @param added_member_set + * @param curr_members + * @param curr_member_set * @return A subgroup layout for this subgroup type. */ subgroup_shard_layout_t update_standard_subgroup_type( @@ -332,7 +357,7 @@ class DefaultSubgroupAllocator { const std::unique_ptr& prev_view, View& curr_view, const std::map>>& shard_sizes, - const std::set& survive_member_set, + const std::set& surviving_member_set, const std::set& added_member_set, const std::vector& curr_members, const std::set& curr_member_set) const; @@ -369,20 +394,70 @@ class DefaultSubgroupAllocator { subgroup_allocation_map_t& subgroup_layouts) const; public: - DefaultSubgroupAllocator(const std::map>& + /** + * Constructs a subgroup allocator from a map of subgroup policies. + * @param policies_by_subgroup_type A map containing one entry for each + * subgroup type, whose value is the allocation policy to use for that + * subgroup type (either a SubgroupAllocationPolicy or a CrossProductPolicy) + */ + DefaultSubgroupAllocator(const std::map& policies_by_subgroup_type) : policies(policies_by_subgroup_type) {} - - DefaultSubgroupAllocator(const std::map>& + /** + * Constructs a subgroup allocator with policies that include reserved node + * IDs. In this case the allocator must be initialized with the set of all + * reserved node IDs used by any policy in the map. + * @param policies_by_subgroup_type A map containing one entry for each + * subgroup type, whose value is the allocation policy to use for that + * subgroup type (either a SubgroupAllocationPolicy or a CrossProductPolicy) + * @param all_reserved_node_ids The set of all reserved node IDs used by any + * policy in the map. + */ + DefaultSubgroupAllocator(const std::map& policies_by_subgroup_type, const std::set& all_reserved_node_ids) : policies(policies_by_subgroup_type), all_reserved_node_ids(all_reserved_node_ids) {} + + /** + * Constructs a subgroup allocator from a vector of subgroup types and a + * JSON object containing layout policy information for each subgroup type. + * This assumes that the JSON object is an array with one entry for each + * subgroup type, in the same order as the types in the vector. + * @param subgroup_types A vector of subgroup types (as type_indexes) + * @param layout_array A JSON object containing a policy entry for each subgroup type + */ + DefaultSubgroupAllocator(std::vector subgroup_types, const json& layout_array); + + /** + * Constructs a subgroup allocator from a vector of subgroup types and a + * path to a file containing a JSON string with layout information for each + * type. + * @param subgroup_types A vector of subgroup types (as type_indexes) + * @param json_file_path A path to a file containing a JSON string + */ + DefaultSubgroupAllocator(std::vector subgroup_types, const std::string& json_file_path); + + /** + * Constructs a subgroup allocator from a vector of subgroup types, assuming + * that a JSON layout object has been configured for these subgroup types. + * This will use either the json_layout or json_layout_path config option + * (whichever one is present) to load a JSON object, and assume that it is + * an array with one entry for each subgroup type in the same order as the + * types in the vector. + * @param subgroup_types A vector of subgroup types (as type_indexes) + */ + DefaultSubgroupAllocator(std::vector subgroup_types); + + /** + * Copy constructor + */ DefaultSubgroupAllocator(const DefaultSubgroupAllocator& to_copy) : policies(to_copy.policies), all_reserved_node_ids(to_copy.all_reserved_node_ids) {} + /** + * Move constructor + */ DefaultSubgroupAllocator(DefaultSubgroupAllocator&&) = default; subgroup_allocation_map_t operator()(const std::vector& subgroup_type_order, @@ -390,12 +465,97 @@ class DefaultSubgroupAllocator { View& curr_view) const; }; +/* + * Since constructors can't take template parameters, these free functions + * allow you to construct a DefaultSubgroupAllocator by specifying the + * subgroup types as template parameters rather than type_indexes. This is + * modeled after the same workaround used by std::make_unique + */ + +/** + * Constructs a subgroup allocator using information in the Derecho config + * file, as long as the template parameters are the subgroup types in the + * correct order. This will use either the json_layout or json_layout_path + * config option (whichever one is present) to load a JSON object, then + * assume that it is an array with one policy entry for each subgroup type, + * in the same order as the template parameters. + * @tparam The subgroup types, in the same order as the layout policies in + * the JSON object. + */ template -DefaultSubgroupAllocator construct_DSA_with_layout(const json& layout); +DefaultSubgroupAllocator make_subgroup_allocator() { + return DefaultSubgroupAllocator({std::type_index(typeid(ReplicatedTypes))...}); +} +/** + * Constructs a subgroup allocator from a JSON object describing layout + * policies for each subgroup type. + * @param json_file_path A path to a file containing a JSON string + * @tparam The subgroup types, in the same order as the layout policies + * in the JSON object + */ template -DefaultSubgroupAllocator construct_DSA_with_layout_path(const std::string& layout_path); +DefaultSubgroupAllocator make_subgroup_allocator(const std::string& json_file_path) { + return DefaultSubgroupAllocator({std::type_index(typeid(ReplicatedTypes))...}, json_file_path); +} -} // namespace derecho +/** + * Constructs a subgroup allocator from a JSON object describing layout + * policies for each subgroup type. + * @param layout A JSON object that is an array with one entry for each + * subgroup type + * @tparam The subgroup types, in the same order as the layout policies + * in the JSON object + */ +template +DefaultSubgroupAllocator make_subgroup_allocator(const json& layout) { + return DefaultSubgroupAllocator({std::type_index(typeid(ReplicatedTypes))...}, layout); +} -#include "detail/subgroup_functions_impl.hpp" + + +/** + * Generate a single-type subgroup allocation policy from a JSON object + * @param jconf A subgroup configuration represented in json format. + * @param all_reserved_node_ids A set that holds the union of all reserved node_ids. + * @return SubgroupAllocationPolicy for that subgroup + */ +SubgroupAllocationPolicy parse_json_subgroup_policy(const json&, std::set&); + +/** + * TODO: If we just need to check shards within one subgroup, this function is redundant. + * Make sure that no shards inside a subgroup reserve same node_ids. Shards in + * different subgroups of one same type or from different types can share nodes, + * and this why we use the reserved_node_id feature. + * For example, we can assign 2 subgroups for type "PersistentCascadeStoreWithStringKey" + * to store data and model respectively for an ML application, and actually reserve + * the same node_ids for shards in this two subgroup. This way the data and the model + * coexist in the same node, thus delivering performance gains. + * @param dsa_map the subgroup allocation map derived from json configuration. + */ +void check_reserved_node_id_pool(const std::map& dsa_map); + +/* Debugging functions */ + +template +void print_set(const std::set& uset) { + std::stringstream stream; + for(auto thing : uset) { + stream << thing << ' '; + } + + std::string out = stream.str(); + dbg_default_debug(out); +} +template +void print_set(const std::vector& uset) { + std::stringstream stream; + for(auto thing : uset) { + stream << thing << ' '; + } + + std::string out = stream.str(); + dbg_default_debug(out); +} + +} // namespace derecho diff --git a/src/applications/demos/simple_replicated_objects_json.cpp b/src/applications/demos/simple_replicated_objects_json.cpp index f88e2092..ec90eb93 100644 --- a/src/applications/demos/simple_replicated_objects_json.cpp +++ b/src/applications/demos/simple_replicated_objects_json.cpp @@ -33,10 +33,10 @@ int main(int argc, char** argv) { //Define subgroup membership using the default subgroup allocator function //Each Replicated type will have one subgroup and one shard, with three members in the shard - + json json_layout = json::parse(derecho::getConfString(CONF_DERECHO_JSON_LAYOUT)); cout << "json_layout parsed\n"; - auto dsa_object = derecho::construct_DSA_with_layout(json_layout); + auto dsa_object = derecho::make_subgroup_allocator(json_layout); cout << "dsa_object constructed\n"; derecho::SubgroupInfo subgroup_function{dsa_object}; diff --git a/src/applications/demos/simple_replicated_objects_json_file.cpp b/src/applications/demos/simple_replicated_objects_json_file.cpp index 7e876301..2f73a3e7 100644 --- a/src/applications/demos/simple_replicated_objects_json_file.cpp +++ b/src/applications/demos/simple_replicated_objects_json_file.cpp @@ -31,7 +31,7 @@ int main(int argc, char** argv) { //Define subgroup membership using the default subgroup allocator function //Each Replicated type will have one subgroup and one shard, with three members in the shard - derecho::SubgroupInfo subgroup_function {derecho::construct_DSA_with_layout_path( + derecho::SubgroupInfo subgroup_function {derecho::make_subgroup_allocator( derecho::getConfString(CONF_DERECHO_JSON_LAYOUT_PATH) )}; //Each replicated type needs a factory; this can be used to supply constructor arguments diff --git a/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp b/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp index 2d3e9864..354cc515 100644 --- a/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp +++ b/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp @@ -42,7 +42,7 @@ int main(int argc, char** argv) { //Define subgroup membership using the default subgroup allocator function //Each Replicated type will have one subgroup and one shard, with three members in the shard - derecho::SubgroupInfo subgroup_function{derecho::construct_DSA_with_layout_path( + derecho::SubgroupInfo subgroup_function{derecho::make_subgroup_allocator( derecho::getConfString(CONF_DERECHO_JSON_LAYOUT_PATH))}; //Each replicated type needs a factory; this can be used to supply constructor arguments //for the subgroup's initial state. These must take a PersistentRegistry* argument, but diff --git a/src/conf/conf.cpp b/src/conf/conf.cpp index efd94332..cdcc0e57 100644 --- a/src/conf/conf.cpp +++ b/src/conf/conf.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #ifndef NDEBUG #include #endif //NDEBUG @@ -106,6 +107,31 @@ void Conf::initialize(int argc, char* argv[], const char* conf_file) { // 3 - set the flag to initialized Conf::singleton_initialized_flag.store(CONF_INITIALIZED, std::memory_order_acq_rel); + // 4 - check the configuration for sanity + if(hasCustomizedConfKey(CONF_DERECHO_JSON_LAYOUT) && hasCustomizedConfKey(CONF_DERECHO_JSON_LAYOUT_PATH)) { + throw std::logic_error("Configuration error: Both " CONF_DERECHO_JSON_LAYOUT " and " CONF_DERECHO_JSON_LAYOUT_PATH " were specified. These options are mutually exclusive"); + } + if(hasCustomizedConfKey(CONF_DERECHO_JSON_LAYOUT_PATH)) { + std::ifstream json_file_stream(getConfString(CONF_DERECHO_JSON_LAYOUT_PATH)); + if(!json_file_stream) { + throw std::logic_error("Configuration error: The JSON layout file could not be opened for reading"); + } + nlohmann::json json_obj; + try { + json_file_stream >> json_obj; + } catch(nlohmann::json::exception& ex) { + //Wrap the JSON-specific exception in a logic_error to add a message + std::throw_with_nested(std::logic_error("Configuration error: The JSON layout file does not contain valid JSON")); + } + } + if(hasCustomizedConfKey(CONF_DERECHO_JSON_LAYOUT)) { + nlohmann::json json_obj; + try { + json_obj = nlohmann::json::parse(getConfString(CONF_DERECHO_JSON_LAYOUT)); + } catch(nlohmann::json::exception& ex) { + std::throw_with_nested(std::logic_error("Configuration error: The JSON layout string is not valid JSON")); + } + } if(getConfUInt32(CONF_DERECHO_LOCAL_ID) >= getConfUInt32(CONF_DERECHO_MAX_NODE_ID)) { throw std::logic_error("Configuration error: Local node ID must be less than max node ID"); diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 7a30d089..64b42bbc 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -1,7 +1,5 @@ /** * @file subgroup_functions.cpp - * - * @date Feb 28, 2017 */ #include @@ -12,6 +10,7 @@ #include #include #include +#include namespace derecho { @@ -120,33 +119,33 @@ void DefaultSubgroupAllocator::compute_standard_memberships( View& curr_view, subgroup_allocation_map_t& subgroup_layouts) const { //First, determine how many nodes each shard can have based on their policies - dbg_default_debug("Ready to calculate size"); + dbg_default_trace("Ready to calculate size"); std::map>> shard_sizes = compute_standard_shard_sizes(subgroup_type_order, prev_view, curr_view); //Now we can go through and actually allocate nodes to each shard, //knowing exactly how many nodes they will get - dbg_default_debug("Ready to really assign nodes"); + dbg_default_trace("Ready to really assign nodes"); if(!prev_view) { - /** allocate_standard_subgroup_type is invoked when we have no prev_view, and thus next_unassigned_rank is 0. - * If we have reserved node_ids, we need to rearrange node_ids in curr_view.members into two "parts": - * the first part holds current active reserved node_ids, - * while the second part holds normal node_ids. - * We then rearrange next_unassigned_rank to be the length of the first part, for nodes in the first part - * are inherent nodes for some shards, and sure will be assigned, - * sometimes more than once if we want to overlap shards. + /* allocate_standard_subgroup_type is invoked when we have no prev_view, and thus + * next_unassigned_rank is 0. If we have reserved node_ids, we need to rearrange + * node_ids in curr_view.members into two "parts": the first part holds current + * active reserved node_ids, while the second part holds normal node_ids. We then + * rearrange next_unassigned_rank to be the length of the first part, since nodes + * in the first part are inherent nodes for some shards, and definitely will be + * assigned, sometimes more than once if we want to overlap shards. */ - // We cannot modify curr_view.members inplace, which will corrupt curr_view.my_rank, curr_view.node_id_to_rank, etc. Besides, View::members is const. + // We cannot modify curr_view.members in-place, which will corrupt curr_view.my_rank, curr_view.node_id_to_rank, etc. Besides, View::members is const. std::vector curr_members; std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); - dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); + dbg_default_trace("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); if(all_reserved_node_ids.size() > 0) { std::set_intersection( curr_member_set.begin(), curr_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(curr_members, curr_members.end())); curr_view.next_unassigned_rank = curr_members.size(); - dbg_default_debug("After rearranging inherent node_ids, curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); + dbg_default_trace("After rearranging inherent node_ids, curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); std::set_difference( curr_member_set.begin(), curr_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), @@ -160,29 +159,30 @@ void DefaultSubgroupAllocator::compute_standard_memberships( if(!std::holds_alternative(policies.at(subgroup_type))) { continue; } - dbg_default_debug("Without prev_view, assign node to type {}", std::string(subgroup_type.name())); + dbg_default_trace("Without prev_view, assign node to type {}", std::string(subgroup_type.name())); subgroup_layouts[subgroup_type] = allocate_standard_subgroup_type( subgroup_type, curr_view, shard_sizes, curr_members, curr_member_set); } } else { - /** survive_member_set holds non-failed node_ids from prev_view, added_member_set holds - * newly added node_ids in curr_view. - */ - std::set survive_member_set(curr_view.members.begin(), curr_view.members.begin() + curr_view.next_unassigned_rank); + // surviving_member_set holds non-failed node_ids from prev_view, + // added_member_set holds newly added node_ids in curr_view. + std::set surviving_member_set(curr_view.members.begin(), curr_view.members.begin() + curr_view.next_unassigned_rank); std::set added_member_set(curr_view.members.begin() + curr_view.next_unassigned_rank, curr_view.members.end()); - /** update_standard_subgroup_type is invoked if there is a prev_view, and the curr_view.members is already - * arranged into two parts: the first part hold survive nodes from prev_view, and the second - * part holds newly added nodes. The next_unassigned_rank is the length of the first part. - * If we have reserved node_ids, we need to rearrnage curr_view.members into 2 parts: - * the first part holds inherent node_ids for shards, which is composed with survive node_ids and reserved node_ids - * the second part holds newly added non-reserved node_ids. - * We then rearrange next_unassigned_rank to be the length of the first part, for they will sure be assigned. + /* update_standard_subgroup_type is invoked if there is a prev_view, and the + * curr_view.members is already arranged into two parts: the first part holds + * surviving nodes from prev_view, and the second part holds newly added nodes. + * The next_unassigned_rank is the length of the first part. If we have reserved + * node_ids, we need to rearrange curr_view.members into 2 parts: the first part + * holds inherent node_ids for shards, which is composed with surviving node_ids + * and reserved node_ids the second part holds newly added non-reserved node_ids. + * We then rearrange next_unassigned_rank to be the length of the first part, + * since they will definitely be assigned. */ std::vector curr_members; std::set curr_member_set(curr_view.members.begin(), curr_view.members.end()); - dbg_default_debug("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); + dbg_default_trace("Initial curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); if(all_reserved_node_ids.size() > 0) { std::set active_reserved_node_id_set; std::set_intersection( @@ -190,20 +190,20 @@ void DefaultSubgroupAllocator::compute_standard_memberships( all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.end())); std::set_union( - survive_member_set.begin(), survive_member_set.end(), + surviving_member_set.begin(), surviving_member_set.end(), active_reserved_node_id_set.begin(), active_reserved_node_id_set.end(), std::inserter(curr_members, curr_members.end())); - dbg_default_debug("With inherent nodes, curr_members is:"); + dbg_default_trace("With inherent nodes, curr_members is:"); print_set(curr_members); curr_view.next_unassigned_rank = curr_members.size(); - dbg_default_debug("After rearranging inherent node_ids, curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); + dbg_default_trace("After rearranging inherent node_ids, curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); std::set_difference( added_member_set.begin(), added_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(curr_members, curr_members.end())); - dbg_default_debug("Adding newly added non-reserved nodes, curr_members is:"); + dbg_default_trace("Adding newly added non-reserved nodes, curr_members is:"); print_set(curr_members); } @@ -214,11 +214,11 @@ void DefaultSubgroupAllocator::compute_standard_memberships( if(!std::holds_alternative(policies.at(subgroup_type))) { continue; } - dbg_default_debug("With prev_view, assingn node to type {}", std::string(subgroup_type.name())); + dbg_default_trace("With prev_view, assigning node to type {}", std::string(subgroup_type.name())); subgroup_layouts[subgroup_type] = update_standard_subgroup_type( subgroup_type, subgroup_type_id, prev_view, curr_view, shard_sizes, - survive_member_set, + surviving_member_set, added_member_set, curr_members, curr_member_set); } @@ -243,11 +243,11 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(all_active_reserved_node_id_set, all_active_reserved_node_id_set.begin())); - dbg_default_debug("Parsing all_active_reserved_node_id_set: "); + dbg_default_trace("Parsing all_active_reserved_node_id_set: "); print_set(all_active_reserved_node_id_set); nodes_needed = all_active_reserved_node_id_set.size(); - dbg_default_debug("After counting all_active_reserved_node_id_set, nodes_needed is {}", nodes_needed); + dbg_default_trace("After counting all_active_reserved_node_id_set, nodes_needed is {}", nodes_needed); } std::map>> shard_sizes; @@ -272,13 +272,13 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( size_t min_shard_size = sharding_policy.even_shards ? sharding_policy.min_nodes_per_shard : sharding_policy.min_num_nodes_by_shard[shard_num]; - /** With reserved nodes, we do not assign nodes evenly across shards. + /* With reserved nodes, we do not assign nodes evenly across shards. * All current nodes may be occupied by 1 shard because it reserved all of them. * Therefore we need to check if min_shard_size for each shard is satisfied, * and thus we need to maintain nodes_needed more carefully. */ - dbg_default_debug("Calculate node size for type {}, subgroup_num {}, shard_num {}", std::string(subgroup_type.name()), subgroup_num, shard_num); + dbg_default_trace("Calculate node size for type {}, subgroup_num {}, shard_num {}", std::string(subgroup_type.name()), subgroup_num, shard_num); std::set survived_node_set; //If there was a previous view, we must include all non-failed nodes from that view @@ -295,24 +295,24 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( } } - // Check whehter this shard reserve existing nodes. + // Check whether this shard reserves existing nodes. std::set active_reserved_node_id_set; - if(sharding_policy.reserved_node_id_by_shard.size() > 0) { + if(sharding_policy.reserved_node_ids_by_shard.size() > 0) { std::set_intersection( - sharding_policy.reserved_node_id_by_shard[shard_num].begin(), - sharding_policy.reserved_node_id_by_shard[shard_num].end(), + sharding_policy.reserved_node_ids_by_shard[shard_num].begin(), + sharding_policy.reserved_node_ids_by_shard[shard_num].end(), curr_member_set.begin(), curr_member_set.end(), std::inserter(active_reserved_node_id_set, active_reserved_node_id_set.begin())); } else { - dbg_default_debug("There is no reserved node_id configured."); + dbg_default_trace("There is no reserved node_id configured."); } - dbg_default_debug("The active_reserved_node_id_set for current shard is: "); + dbg_default_trace("The active_reserved_node_id_set for current shard is: "); print_set(active_reserved_node_id_set); - /** The inherent_node_id_set holds node_ids that are "inherent" or "intrinsic" + /* The inherent_node_id_set holds node_ids that are "inherent" or "intrinsic" * to the this shard, for the node_ids are either surviving nodes from "the same shard" * in the prev_view or reserved for this shard, or both. */ @@ -321,7 +321,7 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( survived_node_set.begin(), survived_node_set.end(), active_reserved_node_id_set.begin(), active_reserved_node_id_set.end(), std::inserter(inherent_node_id_set, inherent_node_id_set.end())); - dbg_default_debug("The inherent_node_id_set for current shard is: "); + dbg_default_trace("The inherent_node_id_set for current shard is: "); print_set(inherent_node_id_set); // All active reserved nodes just count once. nodes_needed += inherent_node_id_set.size() - active_reserved_node_id_set.size(); @@ -405,35 +405,35 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ uint32_t shard_size = shard_sizes.at(subgroup_type)[subgroup_num][shard_num]; std::vector desired_nodes; - dbg_default_debug("For subgroup {}, shard {}, it is assigned {} nodes", subgroup_num, shard_num, shard_size); + dbg_default_trace("Subgroup {}, shard {}, is assigned {} nodes", subgroup_num, shard_num, shard_size); // Allocate active reserved nodes first. - if(subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard.size() > 0) { + if(subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard.size() > 0) { const std::set reserved_node_id_set( - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].begin(), - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].end()); + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard[shard_num].begin(), + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard[shard_num].end()); if(reserved_node_id_set.size() > 0) { std::set_intersection( reserved_node_id_set.begin(), reserved_node_id_set.end(), curr_member_set.begin(), curr_member_set.end(), std::inserter(desired_nodes, desired_nodes.end())); shard_size -= desired_nodes.size(); - dbg_default_debug("Assign it {} active reserved nodes:", desired_nodes.size()); + dbg_default_trace("Assigning shard {} active reserved nodes:", desired_nodes.size()); print_set(std::set(desired_nodes.begin(), desired_nodes.end())); } } else { - dbg_default_debug("There is no reserved node_id configured."); + dbg_default_trace("There is no reserved node_id configured."); } //Grab the next shard_size nodes desired_nodes.insert(desired_nodes.end(), &curr_members[curr_view.next_unassigned_rank], &curr_members[curr_view.next_unassigned_rank + shard_size]); - // NOTE: If there are unassigned reserved nodes(which should not happen in regular use), next_unassigned_rank only points to - // unassigned normal nodes, which I(Lichen) think is just OK and not in conflict with its definition. + // NOTE: If there are unassigned reserved nodes (which should not happen in regular use), next_unassigned_rank only points to + // unassigned normal nodes, which I (Lichen) think is just OK and not in conflict with its definition. curr_view.next_unassigned_rank += shard_size; - dbg_default_debug("Assign it {} nodes in total, with curr_view.next_unassigned_rank {}:", desired_nodes.size(), curr_view.next_unassigned_rank); + dbg_default_trace("Assigning shard {} nodes in total, with curr_view.next_unassigned_rank {}:", desired_nodes.size(), curr_view.next_unassigned_rank); print_set(desired_nodes); //Figure out what the Mode policy for this shard is @@ -465,7 +465,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( const std::unique_ptr& prev_view, View& curr_view, const std::map>>& shard_sizes, - const std::set& survive_member_set, + const std::set& surviving_member_set, const std::set& added_member_set, const std::vector& curr_members, const std::set& curr_member_set) const { @@ -476,9 +476,9 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( const subgroup_id_t previous_assignment_offset = prev_view->subgroup_ids_by_type_id.at(subgroup_type_id)[0]; subgroup_shard_layout_t next_assignment(shard_sizes.at(subgroup_type).size()); - dbg_default_debug("The survive_member_set is:"); - print_set(survive_member_set); - dbg_default_debug("The added_member_set is:"); + dbg_default_trace("The surviving_member_set is:"); + print_set(surviving_member_set); + dbg_default_trace("The added_member_set is:"); print_set(added_member_set); const SubgroupAllocationPolicy& subgroup_type_policy @@ -494,7 +494,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( std::vector next_shard_members; std::vector next_is_sender; uint32_t allocated_shard_size = shard_sizes.at(subgroup_type)[subgroup_num][shard_num]; - dbg_default_debug("For subgroup {}, shard {}, it is assigned {} nodes", subgroup_num, shard_num, allocated_shard_size); + dbg_default_trace("Subgroup {}, shard {}, is assigned {} nodes", subgroup_num, shard_num, allocated_shard_size); //Add all the non-failed nodes from the previous assignment for(std::size_t rank = 0; rank < previous_shard_assignment.members.size(); ++rank) { @@ -504,19 +504,19 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( next_shard_members.push_back(previous_shard_assignment.members[rank]); next_is_sender.push_back(previous_shard_assignment.is_sender[rank]); } - dbg_default_debug("After assigning survive nodes, next_shard_members is:"); + dbg_default_trace("After assigning surviving nodes, next_shard_members is:"); print_set(next_shard_members); //Add newly added reserved nodes - if(subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard.size() > 0) { + if(subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard.size() > 0) { std::set added_reserved_node_id_set; std::set_intersection( added_member_set.begin(), added_member_set.end(), - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].begin(), - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_id_by_shard[shard_num].end(), + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard[shard_num].begin(), + subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard[shard_num].end(), std::inserter(added_reserved_node_id_set, added_reserved_node_id_set.end())); if(added_reserved_node_id_set.size() > 0) { - dbg_default_debug("The added_reserved_node_id_set is not empty:"); + dbg_default_trace("The added_reserved_node_id_set is not empty:"); print_set(added_reserved_node_id_set); for(auto node_id : added_reserved_node_id_set) { @@ -524,10 +524,10 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( next_is_sender.push_back(true); } } - dbg_default_debug("After assigning newly added reserved nodes, we get {} inherent node_id(s) assigned, next_shard_members is:", next_shard_members.size()); + dbg_default_trace("After assigning newly added reserved nodes, we get {} inherent node_id(s) assigned, next_shard_members is:", next_shard_members.size()); print_set(next_shard_members); } else { - dbg_default_debug("There is no reserved node_id configured."); + dbg_default_trace("There is no reserved node_id configured."); } //Add additional members if needed @@ -539,7 +539,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( //All members start out as senders with the default allocator next_is_sender.push_back(true); } - dbg_default_debug("Assign it {} nodes in total, with curr_view.next_unassigned_rank {}:", next_shard_members.size(), curr_view.next_unassigned_rank); + dbg_default_trace("Assigned shard {} nodes in total, with curr_view.next_unassigned_rank {}:", next_shard_members.size(), curr_view.next_unassigned_rank); print_set(next_shard_members); next_assignment[subgroup_num].emplace_back(curr_view.make_subview(next_shard_members, @@ -628,75 +628,127 @@ subgroup_allocation_map_t DefaultSubgroupAllocator::operator()( return subgroup_allocations; } -SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf, std::set& all_reserved_node_ids) { - if(!jconf.is_object() || !jconf[JSON_CONF_LAYOUT].is_array()) { +DefaultSubgroupAllocator::DefaultSubgroupAllocator(std::vector subgroup_types, const json& layout_array) { + for(std::size_t subgroup_type_index = 0; subgroup_type_index < subgroup_types.size(); ++subgroup_type_index) { + policies.emplace(subgroup_types[subgroup_type_index], + parse_json_subgroup_policy(layout_array[subgroup_type_index], all_reserved_node_ids)); + } +} + +DefaultSubgroupAllocator::DefaultSubgroupAllocator(std::vector subgroup_types, const std::string& json_file_path) { + json layout_array; + + std::ifstream json_file_stream(json_file_path); + if(!json_file_stream) { + //Hopefully this won't be necessary because Conf::initialize checked that the file exists + throw derecho_exception("Failed to initialize subgroup allocator! JSON layout file " + json_file_path + " not found"); + } + + json_file_stream >> layout_array; + for(std::size_t subgroup_type_index = 0; subgroup_type_index < subgroup_types.size(); ++subgroup_type_index) { + policies.emplace(subgroup_types[subgroup_type_index], + parse_json_subgroup_policy(layout_array[subgroup_type_index], all_reserved_node_ids)); + } +} + +DefaultSubgroupAllocator::DefaultSubgroupAllocator(std::vector subgroup_types) { + //It's not possible to delegate to a different constructor based on a boolean, + //so I have to copy and paste from the other two constructors + if(hasCustomizedConfKey(CONF_DERECHO_JSON_LAYOUT)) { + json layout_array = json::parse(getConfString(CONF_DERECHO_JSON_LAYOUT)); + for(std::size_t subgroup_type_index = 0; subgroup_type_index < subgroup_types.size(); ++subgroup_type_index) { + policies.emplace(subgroup_types[subgroup_type_index], + parse_json_subgroup_policy(layout_array[subgroup_type_index], all_reserved_node_ids)); + } + } else if(hasCustomizedConfKey(CONF_DERECHO_JSON_LAYOUT_PATH)) { + json layout_array; + + std::ifstream json_file_stream(getConfString(CONF_DERECHO_JSON_LAYOUT_PATH)); + if(!json_file_stream) { + throw derecho_exception("Failed to initialize subgroup allocator! JSON layout file " + getConfString(CONF_DERECHO_JSON_LAYOUT_PATH) + " not found"); + } + + json_file_stream >> layout_array; + for(std::size_t subgroup_type_index = 0; subgroup_type_index < subgroup_types.size(); ++subgroup_type_index) { + policies.emplace(subgroup_types[subgroup_type_index], + parse_json_subgroup_policy(layout_array[subgroup_type_index], all_reserved_node_ids)); + } + } else { + throw derecho_exception("Either json_layout or json_layout_path is required when constructing DefaultSubgroupAllocator with no arguments"); + } +} + +SubgroupAllocationPolicy parse_json_subgroup_policy(const json& jconf, std::set& all_reserved_node_ids) { + if(!jconf.is_object() || !jconf[json_layout_field].is_array()) { dbg_default_error("parse_json_subgroup_policy cannot parse {}.", jconf.get()); - throw derecho::derecho_exception("parse_json_subgroup_policy cannot parse" + jconf.get()); + throw derecho_exception("parse_json_subgroup_policy cannot parse" + jconf.get()); } SubgroupAllocationPolicy subgroup_allocation_policy; subgroup_allocation_policy.identical_subgroups = false; - subgroup_allocation_policy.num_subgroups = jconf[JSON_CONF_LAYOUT].size(); + subgroup_allocation_policy.num_subgroups = jconf[json_layout_field].size(); subgroup_allocation_policy.shard_policy_by_subgroup = std::vector(); - for(auto subgroup_it : jconf[JSON_CONF_LAYOUT]) { + for(auto subgroup_it : jconf[json_layout_field]) { ShardAllocationPolicy shard_allocation_policy; - size_t num_shards = subgroup_it[MIN_NODES_BY_SHARD].size(); - if(subgroup_it[MAX_NODES_BY_SHARD].size() != num_shards || subgroup_it[DELIVERY_MODES_BY_SHARD].size() != num_shards || subgroup_it[PROFILES_BY_SHARD].size() != num_shards || - // "reserved_node_id_by_shard" is not a mandatory field - (subgroup_it[RESERVED_NODE_ID_BY_SHRAD].size() != 0 && subgroup_it[RESERVED_NODE_ID_BY_SHRAD].size() != num_shards)) { + size_t num_shards = subgroup_it[min_nodes_by_shard_field].size(); + if(subgroup_it[max_nodes_by_shard_field].size() != num_shards + || subgroup_it[delivery_modes_by_shard_field].size() != num_shards + || subgroup_it[profiles_by_shard_field].size() != num_shards || + // "reserved_node_ids_by_shard" is not a mandatory field + (subgroup_it[reserved_node_ids_by_shard_field].size() != 0 + && subgroup_it[reserved_node_ids_by_shard_field].size() != num_shards)) { dbg_default_error("parse_json_subgroup_policy: shards does not match in at least one subgroup: {}", subgroup_it.get()); - throw derecho::derecho_exception("parse_json_subgroup_policy: shards does not match in at least one subgroup:" + subgroup_it.get()); + throw derecho_exception("parse_json_subgroup_policy: shards does not match in at least one subgroup:" + subgroup_it.get()); } shard_allocation_policy.even_shards = false; shard_allocation_policy.num_shards = num_shards; - shard_allocation_policy.min_num_nodes_by_shard = subgroup_it[MIN_NODES_BY_SHARD].get>(); - shard_allocation_policy.max_num_nodes_by_shard = subgroup_it[MAX_NODES_BY_SHARD].get>(); + shard_allocation_policy.min_num_nodes_by_shard = subgroup_it[min_nodes_by_shard_field].get>(); + shard_allocation_policy.max_num_nodes_by_shard = subgroup_it[max_nodes_by_shard_field].get>(); std::vector delivery_modes_by_shard; - for(auto it : subgroup_it[DELIVERY_MODES_BY_SHARD]) { - if(it == DELIVERY_MODE_RAW) { + for(auto it : subgroup_it[delivery_modes_by_shard_field]) { + if(it == delivery_mode_raw) { shard_allocation_policy.modes_by_shard.push_back(Mode::UNORDERED); } else { shard_allocation_policy.modes_by_shard.push_back(Mode::ORDERED); } } - shard_allocation_policy.profiles_by_shard = subgroup_it[PROFILES_BY_SHARD].get>(); + shard_allocation_policy.profiles_by_shard = subgroup_it[profiles_by_shard_field].get>(); - // "reserved_node_id_by_shard" is not a mandatory field - if(subgroup_it.contains(RESERVED_NODE_ID_BY_SHRAD)) { - shard_allocation_policy.reserved_node_id_by_shard = subgroup_it[RESERVED_NODE_ID_BY_SHRAD].get>>(); + // "reserved_node_ids_by_shard" is not a mandatory field + if(subgroup_it.contains(reserved_node_ids_by_shard_field)) { + shard_allocation_policy.reserved_node_ids_by_shard = subgroup_it[reserved_node_ids_by_shard_field].get>>(); - for(auto reserved_id_set : shard_allocation_policy.reserved_node_id_by_shard) { + for(auto reserved_id_set : shard_allocation_policy.reserved_node_ids_by_shard) { std::set_union(all_reserved_node_ids.begin(), all_reserved_node_ids.end(), reserved_id_set.begin(), reserved_id_set.end(), std::inserter(all_reserved_node_ids, all_reserved_node_ids.begin())); } - /** - * Make sure that no shards inside a subgroup reserve same node_ids. Shards in - * different subgroups of one same type or from different types can share nodes, - * and this why we use the reserved_node_id feature. - * For example, we can assign 2 subgroups for type "PersistentCascadeStoreWithStringKey" - * to store data and model respectively for an ML application, and actually reserve - * the same node_ids for shards in this two subgroup. This way the data and the model - * coexist in the same node, thus delivering performance gains. + /* + * Make sure that no shards inside a subgroup reserve the same node IDs. Shards in + * different subgroups of the same type or from different types can share nodes, + * which is one reason to use the reserved_node_id feature. For example, we can + * assign 2 subgroups for type "PersistentCascadeStoreWithStringKey" to store the + * data and model respectively for an ML application, and actually reserve the + * same node IDs for shards in these two subgroups. This way the data and the model + * coexist on the same node, reducing the amount of network traffic between them. */ std::set intersect_reserved_node_ids_in_subgroup; - std::set temp(shard_allocation_policy.reserved_node_id_by_shard[0]); + std::set temp(shard_allocation_policy.reserved_node_ids_by_shard[0]); for(int shard_num = 1; shard_num < shard_allocation_policy.num_shards; ++shard_num) { intersect_reserved_node_ids_in_subgroup = std::set(); std::set_intersection( temp.begin(), temp.end(), - shard_allocation_policy.reserved_node_id_by_shard[shard_num].begin(), - shard_allocation_policy.reserved_node_id_by_shard[shard_num].end(), + shard_allocation_policy.reserved_node_ids_by_shard[shard_num].begin(), + shard_allocation_policy.reserved_node_ids_by_shard[shard_num].end(), std::inserter(intersect_reserved_node_ids_in_subgroup, intersect_reserved_node_ids_in_subgroup.begin())); } - // Shards in this subgroup have same reserved node_ids. if(intersect_reserved_node_ids_in_subgroup.size() > 0) { - throw derecho_exception("Shards in one subgroup have same reserved node_ids!"); + throw derecho_exception("Shards in one subgroup have the same reserved node_ids!"); } } else { - dbg_default_debug("There is no reserved node_id configured."); + dbg_default_trace("There is no reserved node_id configured."); } subgroup_allocation_policy.shard_policy_by_subgroup.emplace_back(std::move(shard_allocation_policy)); @@ -704,7 +756,7 @@ SubgroupAllocationPolicy derecho_parse_json_subgroup_policy(const json& jconf, s return subgroup_allocation_policy; } -void check_reserved_node_id_pool(const std::map>& dsa_map) { +void check_reserved_node_id_pool(const std::map& dsa_map) { for(auto& item : dsa_map) { if(!std::holds_alternative(item.second)) { continue; @@ -713,19 +765,19 @@ void check_reserved_node_id_pool(const std::map(item.second); for(int subgroup_num = 0; subgroup_num < subgroup_type_policy.num_subgroups; ++subgroup_num) { const ShardAllocationPolicy& sharding_policy = subgroup_type_policy.shard_policy_by_subgroup[subgroup_num]; - if(sharding_policy.reserved_node_id_by_shard.size() > 0) { + if(sharding_policy.reserved_node_ids_by_shard.size() > 0) { + //Make sure that no shards inside a subgroup reserve the same node IDs std::set intersect_reserved_node_ids_in_subgroup; - std::set temp(sharding_policy.reserved_node_id_by_shard[0]); + std::set temp(sharding_policy.reserved_node_ids_by_shard[0]); for(int shard_num = 1; shard_num < sharding_policy.num_shards; ++shard_num) { intersect_reserved_node_ids_in_subgroup = std::set(); std::set_intersection( temp.begin(), temp.end(), - sharding_policy.reserved_node_id_by_shard[shard_num].begin(), - sharding_policy.reserved_node_id_by_shard[shard_num].end(), + sharding_policy.reserved_node_ids_by_shard[shard_num].begin(), + sharding_policy.reserved_node_ids_by_shard[shard_num].end(), std::inserter(intersect_reserved_node_ids_in_subgroup, intersect_reserved_node_ids_in_subgroup.begin())); } - // Shards in this subgroup have same reserved node_ids. if(intersect_reserved_node_ids_in_subgroup.size() > 0) { throw derecho_exception("Shards in one subgroup have same reserved node_ids!"); } From 5edd9923ece45c5d7204946c9f4d3ed8b2ad1d63 Mon Sep 17 00:00:00 2001 From: Edward Tremel Date: Fri, 9 Jul 2021 11:56:09 -0400 Subject: [PATCH 24/26] Added tests for JSON-based layout, and fixed a bug I made some general improvements to subgroup_function_tester and added a new test case to test how the JSON-based layout with reserved node IDs works. In the process, I discovered and fixed a bug in the new JSON-based layout system: It did not account for the identical_subgroups flag and would crash if the number of ShardAllocationPolicy objects was not equal to num_subgroups. I also moved container_template_functions.hpp to the utils folder, and added spdlog/fmt/ostr.h to our "standard" set of includes (from logger.hpp), so that logging functions in our code can print out sets and vectors. --- include/derecho/core/detail/group_impl.hpp | 3 +- include/derecho/core/subgroup_functions.hpp | 23 -- .../container_template_functions.hpp | 17 ++ include/derecho/utils/logger.hpp | 3 +- .../unit_tests/subgroup_function_tester.cpp | 216 +++++++++++++----- .../unit_tests/subgroup_function_tester.hpp | 15 +- src/core/git_version.cpp | 4 +- src/core/restart_state.cpp | 2 +- src/core/subgroup_functions.cpp | 78 +++---- src/core/view_manager.cpp | 2 +- 10 files changed, 225 insertions(+), 138 deletions(-) rename include/derecho/{core/detail => utils}/container_template_functions.hpp (89%) diff --git a/include/derecho/core/detail/group_impl.hpp b/include/derecho/core/detail/group_impl.hpp index 40f7cb81..eccce01b 100644 --- a/include/derecho/core/detail/group_impl.hpp +++ b/include/derecho/core/detail/group_impl.hpp @@ -1,6 +1,5 @@ /** * @file group_impl.h - * @brief Contains implementations of all the ManagedGroup functions * @date Apr 22, 2016 */ @@ -10,7 +9,7 @@ #include #include "../group.hpp" -#include "container_template_functions.hpp" +#include #include "derecho_internal.hpp" #include "make_kind_map.hpp" #include diff --git a/include/derecho/core/subgroup_functions.hpp b/include/derecho/core/subgroup_functions.hpp index 208dc6ff..85d4149e 100644 --- a/include/derecho/core/subgroup_functions.hpp +++ b/include/derecho/core/subgroup_functions.hpp @@ -535,27 +535,4 @@ SubgroupAllocationPolicy parse_json_subgroup_policy(const json&, std::set& dsa_map); -/* Debugging functions */ - -template -void print_set(const std::set& uset) { - std::stringstream stream; - for(auto thing : uset) { - stream << thing << ' '; - } - - std::string out = stream.str(); - dbg_default_debug(out); -} -template -void print_set(const std::vector& uset) { - std::stringstream stream; - for(auto thing : uset) { - stream << thing << ' '; - } - - std::string out = stream.str(); - dbg_default_debug(out); -} - } // namespace derecho diff --git a/include/derecho/core/detail/container_template_functions.hpp b/include/derecho/utils/container_template_functions.hpp similarity index 89% rename from include/derecho/core/detail/container_template_functions.hpp rename to include/derecho/utils/container_template_functions.hpp index 172d748a..6c4356ea 100644 --- a/include/derecho/core/detail/container_template_functions.hpp +++ b/include/derecho/utils/container_template_functions.hpp @@ -103,3 +103,20 @@ std::size_t index_of(const Container& container, const typename Container::value } } // namespace derecho + +//This needs to be in namespace std to allow std::sets to be printed out in the obvious way +namespace std { + +/** + * Writes a string representation of a std::set to an output stream. + */ +template +std::ostream& operator<<(std::ostream& out, const std::set& s) { + if(!s.empty()) { + out << '{'; + std::copy(s.begin(), s.end(), std::ostream_iterator(out, ", ")); + out << "\b\b}"; + } + return out; +} +} // namespace std diff --git a/include/derecho/utils/logger.hpp b/include/derecho/utils/logger.hpp index 27dfc239..e3497261 100644 --- a/include/derecho/utils/logger.hpp +++ b/include/derecho/utils/logger.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #ifndef NDEBUG #undef NOLOG @@ -21,7 +22,7 @@ class LoggerFactory { public: // create the logger // @PARAM logger_name - // Name of the logger. The log file would be created as + // Name of the logger. The log file would be created as // ".log" // @PARAM log_level // The level of the logger. diff --git a/src/applications/tests/unit_tests/subgroup_function_tester.cpp b/src/applications/tests/unit_tests/subgroup_function_tester.cpp index 4b014d1c..9d4f64a6 100644 --- a/src/applications/tests/unit_tests/subgroup_function_tester.cpp +++ b/src/applications/tests/unit_tests/subgroup_function_tester.cpp @@ -1,8 +1,9 @@ #include +#include #include -#include #include "subgroup_function_tester.hpp" +#include derecho::IpAndPorts ip_and_ports_generator() { static int invocation_count = 0; @@ -12,6 +13,7 @@ derecho::IpAndPorts ip_and_ports_generator() { return {string_generator.str(), 35465, 35465, 35465, 35465, 35465}; } +//We're really just testing the allocation functions, so each "subgroup" will be a dummy Replicated type struct TestType1 {}; struct TestType2 {}; struct TestType3 {}; @@ -19,41 +21,27 @@ struct TestType4 {}; struct TestType5 {}; struct TestType6 {}; -int main(int argc, char* argv[]) { +void test_fixed_allocation_functions() { using derecho::CrossProductPolicy; using derecho::DefaultSubgroupAllocator; using derecho::SubgroupAllocationPolicy; - //Reduce the verbosity of specifying "ordered" for three custom subgroups std::vector three_ordered(3, derecho::Mode::ORDERED); std::vector three_default_profiles(3, "default"); + SubgroupAllocationPolicy sharded_policy = derecho::one_subgroup_policy(derecho::fixed_even_shards(5, 3)); SubgroupAllocationPolicy unsharded_policy = derecho::one_subgroup_policy(derecho::fixed_even_shards(1, 5)); SubgroupAllocationPolicy uneven_sharded_policy = derecho::one_subgroup_policy( - derecho::custom_shards_policy({2, 5, 3}, {2, 5, 3}, three_ordered,three_default_profiles)); + derecho::custom_shards_policy({2, 5, 3}, {2, 5, 3}, three_ordered, three_default_profiles)); SubgroupAllocationPolicy multiple_copies_policy = derecho::identical_subgroups_policy( 2, derecho::fixed_even_shards(3, 4)); - SubgroupAllocationPolicy multiple_subgroups_policy{3, false, {derecho::fixed_even_shards(3, 3), - derecho::custom_shards_policy({4, 3, 4}, {4, 3, 4}, three_ordered, three_default_profiles), - derecho::fixed_even_shards(2, 2)}}; - - SubgroupAllocationPolicy flexible_shards_policy = derecho::one_subgroup_policy( - derecho::flexible_even_shards(5, 2, 3)); - SubgroupAllocationPolicy uneven_flexible_shards = derecho::one_subgroup_policy( - derecho::custom_shards_policy({2, 5, 3}, {3, 6, 5}, three_ordered, three_default_profiles)); - SubgroupAllocationPolicy multiple_copies_flexible = derecho::identical_subgroups_policy( - 2, derecho::flexible_even_shards(3, 4, 5)); - SubgroupAllocationPolicy multiple_fault_tolerant_subgroups{3, false, - {derecho::flexible_even_shards(3, 2, 4), - derecho::custom_shards_policy({4, 3, 4}, {5, 4, 5}, three_ordered, three_default_profiles), - derecho::flexible_even_shards(2, 2, 4)}}; + SubgroupAllocationPolicy multiple_subgroups_policy{3, false, {derecho::fixed_even_shards(3, 3), derecho::custom_shards_policy({4, 3, 4}, {4, 3, 4}, three_ordered, three_default_profiles), derecho::fixed_even_shards(2, 2)}}; //This will create subgroups that are the cross product of the "uneven_sharded_policy" and "sharded_policy" groups CrossProductPolicy uneven_to_even_cp{ {std::type_index(typeid(TestType3)), 0}, {std::type_index(typeid(TestType1)), 0}}; - //We're really just testing the allocation functions, so assign each one to a dummy Replicated type derecho::SubgroupInfo test_fixed_subgroups( DefaultSubgroupAllocator({{std::type_index(typeid(TestType1)), sharded_policy}, {std::type_index(typeid(TestType2)), unsharded_policy}, @@ -70,25 +58,25 @@ int main(int argc, char* argv[]) { std::type_index(typeid(TestType6))}; std::vector members(100); std::iota(members.begin(), members.end(), 0); - std::vector member_ips_and_ports(100); + std::vector member_ips_and_ports(members.size()); std::generate(member_ips_and_ports.begin(), member_ips_and_ports.end(), ip_and_ports_generator); - std::vector none_failed(100, 0); + std::vector none_failed(members.size(), 0); auto curr_view = std::make_unique(0, members, member_ips_and_ports, none_failed, std::vector{}, std::vector{}, 0, 0, subgroup_type_order); - std::cout << "TEST 1: Initial allocation" << std::endl; + rls_default_info("TEST 1: Initial allocation"); derecho::test_provision_subgroups(test_fixed_subgroups, nullptr, *curr_view); std::set ranks_to_fail{1, 3, 17, 38, 40}; - std::cout << "TEST 2: Failing some nodes that are in subgroups: " << ranks_to_fail << std::endl; + rls_default_info("TEST 2: Failing some nodes that are in subgroups: {}", ranks_to_fail); std::unique_ptr prev_view(std::move(curr_view)); curr_view = derecho::make_next_view(*prev_view, ranks_to_fail, {}, {}); derecho::test_provision_subgroups(test_fixed_subgroups, prev_view, *curr_view); std::set more_ranks_to_fail{13, 20, 59, 78, 89}; - std::cout << "TEST 3: Failing nodes both before and after the pointer. Ranks are " << more_ranks_to_fail << std::endl; + rls_default_info("TEST 3: Failing nodes both before and after the pointer. Ranks are {}", more_ranks_to_fail); prev_view.swap(curr_view); curr_view = derecho::make_next_view(*prev_view, more_ranks_to_fail, {}, {}); @@ -98,7 +86,7 @@ int main(int argc, char* argv[]) { std::vector range_39_to_89(50); std::iota(range_39_to_89.begin(), range_39_to_89.end(), 39); std::set lots_of_members_to_fail(range_39_to_89.begin(), range_39_to_89.end()); - std::cout << "TEST 4: Failing 50 nodes so the next view is inadequate" << std::endl; + rls_default_info("TEST 4: Failing 50 nodes so the next view is inadequate"); prev_view.swap(curr_view); curr_view = derecho::make_next_view(*prev_view, lots_of_members_to_fail, {}, {}); @@ -106,47 +94,162 @@ int main(int argc, char* argv[]) { std::vector new_members(40); std::iota(new_members.begin(), new_members.end(), 100); - std::vector new_member_ips_and_ports(40); + std::vector new_member_ips_and_ports(new_members.size()); std::generate(new_member_ips_and_ports.begin(), new_member_ips_and_ports.end(), ip_and_ports_generator); - std::cout << "TEST 5: Adding new members 100-140" << std::endl; + rls_default_info("TEST 5: Adding new members 100-140"); //Since an inadequate view will never be installed, keep the same prev_view from before the failures curr_view = derecho::make_next_view(*prev_view, lots_of_members_to_fail, new_members, new_member_ips_and_ports); derecho::test_provision_subgroups(test_fixed_subgroups, prev_view, *curr_view); +} + +void test_flexible_allocation_functions() { + using derecho::DefaultSubgroupAllocator; + using derecho::SubgroupAllocationPolicy; + + //Reduce the verbosity of specifying "ordered" for three custom subgroups + std::vector three_ordered(3, derecho::Mode::ORDERED); + std::vector three_default_profiles(3, "default"); + + SubgroupAllocationPolicy flexible_shards_policy = derecho::one_subgroup_policy( + derecho::flexible_even_shards(5, 2, 3)); + SubgroupAllocationPolicy uneven_flexible_shards = derecho::one_subgroup_policy( + derecho::custom_shards_policy({2, 5, 3}, {3, 6, 5}, three_ordered, three_default_profiles)); + SubgroupAllocationPolicy multiple_copies_flexible = derecho::identical_subgroups_policy( + 2, derecho::flexible_even_shards(3, 4, 5)); + SubgroupAllocationPolicy multiple_fault_tolerant_subgroups{3, false, {derecho::flexible_even_shards(3, 2, 4), derecho::custom_shards_policy({4, 3, 4}, {5, 4, 5}, three_ordered, three_default_profiles), derecho::flexible_even_shards(2, 2, 4)}}; - //Now test the flexible allocation functions derecho::SubgroupInfo test_flexible_subgroups( DefaultSubgroupAllocator({{std::type_index(typeid(TestType1)), flexible_shards_policy}, {std::type_index(typeid(TestType2)), uneven_flexible_shards}, {std::type_index(typeid(TestType3)), multiple_copies_flexible}, {std::type_index(typeid(TestType4)), multiple_fault_tolerant_subgroups}})); - std::vector flexible_subgroup_type_order = { - std::type_index(typeid(TestType1)), std::type_index(typeid(TestType2)), - std::type_index(typeid(TestType3)), std::type_index(typeid(TestType4))}; - curr_view = std::make_unique(0, members, member_ips_and_ports, none_failed, - std::vector{}, std::vector{}, - 0, 0, flexible_subgroup_type_order); - std::cout << "Now testing flexible subgroup allocation" << std::endl; - std::cout << "TEST 6: Initial allocation" << std::endl; + std::vector subgroup_type_order = {std::type_index(typeid(TestType1)), + std::type_index(typeid(TestType2)), + std::type_index(typeid(TestType3)), + std::type_index(typeid(TestType4))}; + std::vector members(100); + std::iota(members.begin(), members.end(), 0); + std::vector member_ips_and_ports(members.size()); + std::generate(member_ips_and_ports.begin(), member_ips_and_ports.end(), ip_and_ports_generator); + std::vector none_failed(members.size(), 0); + auto curr_view = std::make_unique(0, members, member_ips_and_ports, none_failed, + std::vector{}, std::vector{}, + 0, 0, subgroup_type_order); + rls_default_info("Now testing flexible subgroup allocation"); + rls_default_info("TEST 6: Initial allocation"); derecho::test_provision_subgroups(test_flexible_subgroups, nullptr, *curr_view); std::set flexible_ranks_to_fail{3, 6, 31, 45, 57}; - std::cout << "TEST 7: Failing some nodes that are in subgroups: " << flexible_ranks_to_fail << std::endl; - prev_view = std::move(curr_view); + rls_default_info("TEST 7: Failing some nodes that are in subgroups: {}", flexible_ranks_to_fail); + std::unique_ptr prev_view(std::move(curr_view)); curr_view = derecho::make_next_view(*prev_view, flexible_ranks_to_fail, {}, {}); derecho::test_provision_subgroups(test_flexible_subgroups, prev_view, *curr_view); std::set flexible_ranks_to_fail_2{7, 8, 17, 18, 40, 41, 51, 61, 62}; - std::cout << "TEST 8: Failing more nodes so that shards must shrink. Ranks are: " << flexible_ranks_to_fail_2 << std::endl; + rls_default_info("TEST 8: Failing more nodes so that shards must shrink. Ranks are: {}", flexible_ranks_to_fail_2); prev_view.swap(curr_view); curr_view = derecho::make_next_view(*prev_view, flexible_ranks_to_fail_2, {}, {}); derecho::test_provision_subgroups(test_flexible_subgroups, prev_view, *curr_view); - std::cout << "TEST 9: Adding new members 100-140 so shards can re-expand." << std::endl; + std::vector new_members(40); + std::iota(new_members.begin(), new_members.end(), 100); + std::vector new_member_ips_and_ports(new_members.size()); + std::generate(new_member_ips_and_ports.begin(), new_member_ips_and_ports.end(), ip_and_ports_generator); + rls_default_info("TEST 9: Adding new members 100-140 so shards can re-expand."); prev_view.swap(curr_view); curr_view = derecho::make_next_view(*prev_view, {}, new_members, new_member_ips_and_ports); derecho::test_provision_subgroups(test_flexible_subgroups, prev_view, *curr_view); +} + +void test_json_layout() { + const char* json_layout_string = + R"|([ + { + "type_alias": "TestType1", + "layout": [ + { + "min_nodes_by_shard": [2], + "max_nodes_by_shard": [3], + "reserved_node_id_by_shard":[[1, 2, 3]], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["Default"] + } + ] + }, + { + "type_alias": "TestType2", + "layout": [ + { + "min_nodes_by_shard": [2], + "max_nodes_by_shard": [3], + "reserved_node_id_by_shard":[[2, 3, 4]], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["Default"] + } + ] + } +])|"; + + derecho::SubgroupInfo test_json_overlapping( + derecho::make_subgroup_allocator(nlohmann::json::parse(json_layout_string))); + + std::vector subgroup_type_order = {std::type_index(typeid(TestType1)), + std::type_index(typeid(TestType2))}; + + std::vector members(3); + std::iota(members.begin(), members.end(), 0); + std::vector ips_and_ports(3); + std::generate(ips_and_ports.begin(), ips_and_ports.end(), ip_and_ports_generator); + std::vector none_failed(3, 0); + auto curr_view = std::make_unique(0, members, ips_and_ports, none_failed, + std::vector{}, std::vector{}, + 0, 0, subgroup_type_order); + + rls_default_info("Now testing JSON-based allocation with overlapping reserved nodes"); + rls_default_info("TEST 10: Initial allocation"); + derecho::test_provision_subgroups(test_json_overlapping, nullptr, *curr_view); + + std::vector new_members_in_reservation{3, 4}; + std::vector new_member_ips_and_ports(new_members_in_reservation.size()); + std::generate(new_member_ips_and_ports.begin(), new_member_ips_and_ports.end(), ip_and_ports_generator); + rls_default_info("TEST 11: Nodes 3 and 4 join, which are in a reserved node list"); + std::unique_ptr prev_view(std::move(curr_view)); + curr_view = derecho::make_next_view(*prev_view, {}, new_members_in_reservation, new_member_ips_and_ports); + derecho::test_provision_subgroups(test_json_overlapping, prev_view, *curr_view); + + std::set ranks_to_fail{0, 2}; + rls_default_info("TEST 12: Nodes 0 and 2 fail; 2 is in both reserved node lists"); + prev_view.swap(curr_view); + curr_view = derecho::make_next_view(*prev_view, ranks_to_fail, {}, {}); + dbg_default_debug("New view has members: {}", curr_view->members); + derecho::test_provision_subgroups(test_json_overlapping, prev_view, *curr_view); + + std::vector new_members_outside_reservation{5, 6}; + std::vector ips_and_ports_2(new_members_outside_reservation.size()); + std::generate(ips_and_ports_2.begin(), ips_and_ports_2.end(), ip_and_ports_generator); + rls_default_info("TEST 13: Nodes 5 and 6 join"); + prev_view.swap(curr_view); + curr_view = derecho::make_next_view(*prev_view, {}, new_members_outside_reservation, ips_and_ports_2); + dbg_default_debug("New view has members: {}", curr_view->members); + derecho::test_provision_subgroups(test_json_overlapping, prev_view, *curr_view); + + std::set ranks_to_fail_2{1, 3}; + std::vector node_rejoined{2}; + std::vector node_2_ip{ips_and_ports[2]}; + rls_default_info("TEST 14: Nodes 3 and 5 fail, node 2 rejoins"); + prev_view.swap(curr_view); + curr_view = derecho::make_next_view(*prev_view, ranks_to_fail_2, node_rejoined, node_2_ip); + dbg_default_debug("New view has members: {}", curr_view->members); + derecho::test_provision_subgroups(test_json_overlapping, prev_view, *curr_view); +} + +int main(int argc, char* argv[]) { + + test_fixed_allocation_functions(); + test_flexible_allocation_functions(); + test_json_layout(); return 0; } @@ -154,13 +257,21 @@ int main(int argc, char* argv[]) { namespace derecho { void print_subgroup_layout(const subgroup_shard_layout_t& layout) { - using std::cout; + std::stringstream string_builder; for(std::size_t subgroup_num = 0; subgroup_num < layout.size(); ++subgroup_num) { - cout << "Subgroup " << subgroup_num << ": "; + string_builder << "Subgroup " << subgroup_num << ": "; for(std::size_t shard_num = 0; shard_num < layout[subgroup_num].size(); ++shard_num) { - cout << layout[subgroup_num][shard_num].members << ", "; + string_builder << layout[subgroup_num][shard_num].members << ", "; } - cout << "\b\b" << std::endl; + string_builder << "\b\b"; + rls_default_info(string_builder.str()); + } +} + +void print_allocations(const std::map& subgroup_allocations) { + for(const auto& subgroup_type_allocation : subgroup_allocations) { + rls_default_info("Subgroup type {} got assignment: ", subgroup_type_allocation.first.name()); + print_subgroup_layout(subgroup_type_allocation.second); } } @@ -187,14 +298,13 @@ void test_provision_subgroups(const SubgroupInfo& subgroup_info, << std::endl; return; } + print_allocations(subgroup_allocations); //Go through subgroup_allocations and initialize curr_view for(subgroup_type_id_t subgroup_type_id = 0; subgroup_type_id < curr_view.subgroup_type_order.size(); ++subgroup_type_id) { const std::type_index& subgroup_type = curr_view.subgroup_type_order[subgroup_type_id]; subgroup_shard_layout_t& curr_type_subviews = subgroup_allocations[subgroup_type]; - std::cout << "Subgroup type " << subgroup_type.name() << " got assignment: " << std::endl; - print_subgroup_layout(curr_type_subviews); std::size_t num_subgroups = curr_type_subviews.size(); curr_view.subgroup_ids_by_type_id.emplace(subgroup_type_id, std::vector(num_subgroups)); for(uint32_t subgroup_index = 0; subgroup_index < num_subgroups; ++subgroup_index) { @@ -250,14 +360,14 @@ std::unique_ptr make_next_view(const View& curr_view, } } //Copy member information, excluding the members that have failed - int m = 0; - for(int n = 0; n < curr_view.num_members; n++) { + int new_rank = 0; + for(int old_rank = 0; old_rank < curr_view.num_members; old_rank++) { //This is why leave_ranks needs to be a set - if(leave_ranks.find(n) == leave_ranks.end()) { - members[m] = curr_view.members[n]; - member_ips_and_ports[m] = curr_view.member_ips_and_ports[n]; - failed[m] = curr_view.failed[n]; - ++m; + if(leave_ranks.find(old_rank) == leave_ranks.end()) { + members[new_rank] = curr_view.members[old_rank]; + member_ips_and_ports[new_rank] = curr_view.member_ips_and_ports[old_rank]; + failed[new_rank] = curr_view.failed[old_rank]; + ++new_rank; } } diff --git a/src/applications/tests/unit_tests/subgroup_function_tester.hpp b/src/applications/tests/unit_tests/subgroup_function_tester.hpp index ef416e0d..10584e75 100644 --- a/src/applications/tests/unit_tests/subgroup_function_tester.hpp +++ b/src/applications/tests/unit_tests/subgroup_function_tester.hpp @@ -18,20 +18,7 @@ #include #include #include - -namespace std { - -//This allows std::sets to be printed out in the obvious way -template -std::ostream& operator<<(std::ostream& out, const std::set& s) { - if(!s.empty()) { - out << '{'; - std::copy(s.begin(), s.end(), std::ostream_iterator(out, ", ")); - out << "\b\b}"; - } - return out; -} -} // namespace std +#include namespace derecho { //Functions that assist with testing subgroup layout allocation diff --git a/src/core/git_version.cpp b/src/core/git_version.cpp index a5761dab..1406be03 100644 --- a/src/core/git_version.cpp +++ b/src/core/git_version.cpp @@ -13,8 +13,8 @@ namespace derecho { const int MAJOR_VERSION = 2; const int MINOR_VERSION = 1; const int PATCH_VERSION = 0; -const int COMMITS_AHEAD_OF_VERSION = 146; +const int COMMITS_AHEAD_OF_VERSION = 147; const char* VERSION_STRING = "2.1.0"; -const char* VERSION_STRING_PLUS_COMMITS = "2.1.0+146"; +const char* VERSION_STRING_PLUS_COMMITS = "2.1.0+147"; } diff --git a/src/core/restart_state.cpp b/src/core/restart_state.cpp index d0d1fc4c..6a88a754 100644 --- a/src/core/restart_state.cpp +++ b/src/core/restart_state.cpp @@ -1,9 +1,9 @@ #include #include -#include #include #include +#include #include //This code needs access to ViewManager's static methods #include diff --git a/src/core/subgroup_functions.cpp b/src/core/subgroup_functions.cpp index 64b42bbc..1ea43a36 100644 --- a/src/core/subgroup_functions.cpp +++ b/src/core/subgroup_functions.cpp @@ -6,10 +6,10 @@ #include #include -#include #include #include #include +#include #include namespace derecho { @@ -193,8 +193,7 @@ void DefaultSubgroupAllocator::compute_standard_memberships( surviving_member_set.begin(), surviving_member_set.end(), active_reserved_node_id_set.begin(), active_reserved_node_id_set.end(), std::inserter(curr_members, curr_members.end())); - dbg_default_trace("With inherent nodes, curr_members is:"); - print_set(curr_members); + dbg_default_trace("With inherent nodes, curr_members is: {}", curr_members); curr_view.next_unassigned_rank = curr_members.size(); dbg_default_trace("After rearranging inherent node_ids, curr_view.next_unassigned_rank is {}", curr_view.next_unassigned_rank); @@ -203,8 +202,7 @@ void DefaultSubgroupAllocator::compute_standard_memberships( added_member_set.begin(), added_member_set.end(), all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(curr_members, curr_members.end())); - dbg_default_trace("Adding newly added non-reserved nodes, curr_members is:"); - print_set(curr_members); + dbg_default_trace("Adding newly added non-reserved nodes, curr_members is: {}", curr_members); } for(uint32_t subgroup_type_id = 0; subgroup_type_id < subgroup_type_order.size(); @@ -243,8 +241,7 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( all_reserved_node_ids.begin(), all_reserved_node_ids.end(), std::inserter(all_active_reserved_node_id_set, all_active_reserved_node_id_set.begin())); - dbg_default_trace("Parsing all_active_reserved_node_id_set: "); - print_set(all_active_reserved_node_id_set); + dbg_default_trace("Parsing all_active_reserved_node_id_set: {}", all_active_reserved_node_id_set); nodes_needed = all_active_reserved_node_id_set.size(); dbg_default_trace("After counting all_active_reserved_node_id_set, nodes_needed is {}", nodes_needed); @@ -309,8 +306,7 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( dbg_default_trace("There is no reserved node_id configured."); } - dbg_default_trace("The active_reserved_node_id_set for current shard is: "); - print_set(active_reserved_node_id_set); + dbg_default_trace("The active_reserved_node_id_set for current shard is: {}", active_reserved_node_id_set); /* The inherent_node_id_set holds node_ids that are "inherent" or "intrinsic" * to the this shard, for the node_ids are either surviving nodes from "the same shard" @@ -321,8 +317,7 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( survived_node_set.begin(), survived_node_set.end(), active_reserved_node_id_set.begin(), active_reserved_node_id_set.end(), std::inserter(inherent_node_id_set, inherent_node_id_set.end())); - dbg_default_trace("The inherent_node_id_set for current shard is: "); - print_set(inherent_node_id_set); + dbg_default_trace("The inherent_node_id_set for current shard is: {}", inherent_node_id_set); // All active reserved nodes just count once. nodes_needed += inherent_node_id_set.size() - active_reserved_node_id_set.size(); @@ -332,7 +327,7 @@ DefaultSubgroupAllocator::compute_standard_shard_sizes( nodes_needed += min_shard_size - inherent_node_id_set.size(); } - // TODO: If we add a lot of nodes reserved for a shard, the number of which is larger than this shard's max_num_nodes, we will still add those nodes to it. + // If we add a lot of nodes reserved for a shard, the number of which is larger than this shard's max_num_nodes, we will still add those nodes to it. // Seems OK? shard_sizes[subgroup_type][subgroup_num][shard_num] = min_shard_size; } @@ -405,21 +400,27 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ uint32_t shard_size = shard_sizes.at(subgroup_type)[subgroup_num][shard_num]; std::vector desired_nodes; + //The allocation policy for this subgroup is either the shard_policy_by_subgroup entry at subgroup_num, + //or the first entry in shard_policy_by_subgroup if identical_subgroups is true + const ShardAllocationPolicy& sharding_policy + = subgroup_type_policy.identical_subgroups + ? subgroup_type_policy.shard_policy_by_subgroup[0] + : subgroup_type_policy.shard_policy_by_subgroup[subgroup_num]; + dbg_default_trace("Subgroup {}, shard {}, is assigned {} nodes", subgroup_num, shard_num, shard_size); // Allocate active reserved nodes first. - if(subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard.size() > 0) { + if(sharding_policy.reserved_node_ids_by_shard.size() > 0) { const std::set reserved_node_id_set( - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard[shard_num].begin(), - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard[shard_num].end()); + sharding_policy.reserved_node_ids_by_shard[shard_num].begin(), + sharding_policy.reserved_node_ids_by_shard[shard_num].end()); if(reserved_node_id_set.size() > 0) { std::set_intersection( reserved_node_id_set.begin(), reserved_node_id_set.end(), curr_member_set.begin(), curr_member_set.end(), std::inserter(desired_nodes, desired_nodes.end())); shard_size -= desired_nodes.size(); - dbg_default_trace("Assigning shard {} active reserved nodes:", desired_nodes.size()); - print_set(std::set(desired_nodes.begin(), desired_nodes.end())); + dbg_default_trace("Assigning shard {} active reserved nodes: {}", desired_nodes.size(), desired_nodes); } } else { dbg_default_trace("There is no reserved node_id configured."); @@ -433,16 +434,9 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::allocate_standard_subgroup_typ // unassigned normal nodes, which I (Lichen) think is just OK and not in conflict with its definition. curr_view.next_unassigned_rank += shard_size; - dbg_default_trace("Assigning shard {} nodes in total, with curr_view.next_unassigned_rank {}:", desired_nodes.size(), curr_view.next_unassigned_rank); - print_set(desired_nodes); + dbg_default_trace("Assigning shard {} nodes in total, with curr_view.next_unassigned_rank {}: {}", desired_nodes.size(), curr_view.next_unassigned_rank, desired_nodes); //Figure out what the Mode policy for this shard is - const SubgroupAllocationPolicy& subgroup_type_policy - = std::get(policies.at(subgroup_type)); - const ShardAllocationPolicy& sharding_policy - = subgroup_type_policy.identical_subgroups - ? subgroup_type_policy.shard_policy_by_subgroup[0] - : subgroup_type_policy.shard_policy_by_subgroup[subgroup_num]; Mode delivery_mode = sharding_policy.even_shards ? sharding_policy.shards_mode : sharding_policy.modes_by_shard[shard_num]; @@ -476,10 +470,8 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( const subgroup_id_t previous_assignment_offset = prev_view->subgroup_ids_by_type_id.at(subgroup_type_id)[0]; subgroup_shard_layout_t next_assignment(shard_sizes.at(subgroup_type).size()); - dbg_default_trace("The surviving_member_set is:"); - print_set(surviving_member_set); - dbg_default_trace("The added_member_set is:"); - print_set(added_member_set); + dbg_default_trace("The surviving_member_set is: {}", surviving_member_set); + dbg_default_trace("The added_member_set is: {}", added_member_set); const SubgroupAllocationPolicy& subgroup_type_policy = std::get(policies.at(subgroup_type)); @@ -504,28 +496,30 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( next_shard_members.push_back(previous_shard_assignment.members[rank]); next_is_sender.push_back(previous_shard_assignment.is_sender[rank]); } - dbg_default_trace("After assigning surviving nodes, next_shard_members is:"); - print_set(next_shard_members); + dbg_default_trace("After assigning surviving nodes, next_shard_members is: {}", next_shard_members); + + const ShardAllocationPolicy& sharding_policy + = subgroup_type_policy.identical_subgroups + ? subgroup_type_policy.shard_policy_by_subgroup[0] + : subgroup_type_policy.shard_policy_by_subgroup[subgroup_num]; //Add newly added reserved nodes - if(subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard.size() > 0) { + if(sharding_policy.reserved_node_ids_by_shard.size() > 0) { std::set added_reserved_node_id_set; std::set_intersection( added_member_set.begin(), added_member_set.end(), - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard[shard_num].begin(), - subgroup_type_policy.shard_policy_by_subgroup[subgroup_num].reserved_node_ids_by_shard[shard_num].end(), + sharding_policy.reserved_node_ids_by_shard[shard_num].begin(), + sharding_policy.reserved_node_ids_by_shard[shard_num].end(), std::inserter(added_reserved_node_id_set, added_reserved_node_id_set.end())); if(added_reserved_node_id_set.size() > 0) { - dbg_default_trace("The added_reserved_node_id_set is not empty:"); - print_set(added_reserved_node_id_set); + dbg_default_trace("The added_reserved_node_id_set is not empty: {}", added_reserved_node_id_set); for(auto node_id : added_reserved_node_id_set) { next_shard_members.push_back(node_id); next_is_sender.push_back(true); } } - dbg_default_trace("After assigning newly added reserved nodes, we get {} inherent node_id(s) assigned, next_shard_members is:", next_shard_members.size()); - print_set(next_shard_members); + dbg_default_trace("After assigning newly added reserved nodes, we get {} inherent node_id(s) assigned, next_shard_members is: {}", next_shard_members.size(), next_shard_members); } else { dbg_default_trace("There is no reserved node_id configured."); } @@ -539,8 +533,7 @@ subgroup_shard_layout_t DefaultSubgroupAllocator::update_standard_subgroup_type( //All members start out as senders with the default allocator next_is_sender.push_back(true); } - dbg_default_trace("Assigned shard {} nodes in total, with curr_view.next_unassigned_rank {}:", next_shard_members.size(), curr_view.next_unassigned_rank); - print_set(next_shard_members); + dbg_default_trace("Assigned shard {} nodes in total, with curr_view.next_unassigned_rank {}: {}", next_shard_members.size(), curr_view.next_unassigned_rank, next_shard_members); next_assignment[subgroup_num].emplace_back(curr_view.make_subview(next_shard_members, previous_shard_assignment.mode, @@ -764,7 +757,10 @@ void check_reserved_node_id_pool(const std::map(item.second); for(int subgroup_num = 0; subgroup_num < subgroup_type_policy.num_subgroups; ++subgroup_num) { - const ShardAllocationPolicy& sharding_policy = subgroup_type_policy.shard_policy_by_subgroup[subgroup_num]; + const ShardAllocationPolicy& sharding_policy + = subgroup_type_policy.identical_subgroups + ? subgroup_type_policy.shard_policy_by_subgroup[0] + : subgroup_type_policy.shard_policy_by_subgroup[subgroup_num]; if(sharding_policy.reserved_node_ids_by_shard.size() > 0) { //Make sure that no shards inside a subgroup reserve the same node IDs std::set intersect_reserved_node_ids_in_subgroup; diff --git a/src/core/view_manager.cpp b/src/core/view_manager.cpp index 3a3532b4..b4ec10b2 100644 --- a/src/core/view_manager.cpp +++ b/src/core/view_manager.cpp @@ -8,12 +8,12 @@ #include #include -#include #include #include #include #include #include +#include #include #include From bb8f7cc881c1bd6be7be9367044367e99540bfeb Mon Sep 17 00:00:00 2001 From: Weijia Song Date: Fri, 9 Jul 2021 21:27:44 -0400 Subject: [PATCH 25/26] reserved_node_id_by_shard --> reserved_node_ids_by_shard --- include/derecho/core/subgroup_functions.hpp | 2 +- src/applications/demos/json_cfgs/layout-with-id-pool.json | 4 ++-- .../tests/unit_tests/subgroup_function_tester.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/derecho/core/subgroup_functions.hpp b/include/derecho/core/subgroup_functions.hpp index 85d4149e..7d2716c1 100644 --- a/include/derecho/core/subgroup_functions.hpp +++ b/include/derecho/core/subgroup_functions.hpp @@ -54,7 +54,7 @@ constexpr char json_layout_field[] = "layout"; constexpr char json_type_alias_field[] = "type_alias"; constexpr char min_nodes_by_shard_field[] = "min_nodes_by_shard"; constexpr char max_nodes_by_shard_field[] = "max_nodes_by_shard"; -constexpr char reserved_node_ids_by_shard_field[] = "reserved_node_id_by_shard"; +constexpr char reserved_node_ids_by_shard_field[] = "reserved_node_ids_by_shard"; constexpr char delivery_modes_by_shard_field[] = "delivery_modes_by_shard"; constexpr char delivery_mode_ordered[] = "Ordered"; constexpr char delivery_mode_raw[] = "Raw"; diff --git a/src/applications/demos/json_cfgs/layout-with-id-pool.json b/src/applications/demos/json_cfgs/layout-with-id-pool.json index 0baa256b..bd6e92d5 100644 --- a/src/applications/demos/json_cfgs/layout-with-id-pool.json +++ b/src/applications/demos/json_cfgs/layout-with-id-pool.json @@ -6,7 +6,7 @@ { "min_nodes_by_shard": [2], "max_nodes_by_shard": [2], - "reserved_node_id_by_shard":[[1, 2, 3]], + "reserved_node_ids_by_shard":[[1, 2, 3]], "delivery_modes_by_shard": ["Ordered"], "profiles_by_shard": ["VCS"] } @@ -18,7 +18,7 @@ { "min_nodes_by_shard": [2], "max_nodes_by_shard": [2], - "reserved_node_id_by_shard":[[2, 3, 4]], + "reserved_node_ids_by_shard":[[2, 3, 4]], "delivery_modes_by_shard": ["Ordered"], "profiles_by_shard": ["DEFAULT"] } diff --git a/src/applications/tests/unit_tests/subgroup_function_tester.cpp b/src/applications/tests/unit_tests/subgroup_function_tester.cpp index 9d4f64a6..027d3a29 100644 --- a/src/applications/tests/unit_tests/subgroup_function_tester.cpp +++ b/src/applications/tests/unit_tests/subgroup_function_tester.cpp @@ -172,7 +172,7 @@ void test_json_layout() { { "min_nodes_by_shard": [2], "max_nodes_by_shard": [3], - "reserved_node_id_by_shard":[[1, 2, 3]], + "reserved_node_ids_by_shard": [[1, 2, 3]], "delivery_modes_by_shard": ["Ordered"], "profiles_by_shard": ["Default"] } @@ -184,7 +184,7 @@ void test_json_layout() { { "min_nodes_by_shard": [2], "max_nodes_by_shard": [3], - "reserved_node_id_by_shard":[[2, 3, 4]], + "reserved_node_ids_by_shard": [[2, 3, 4]], "delivery_modes_by_shard": ["Ordered"], "profiles_by_shard": ["Default"] } From 040bfce3ba2f230a7848f96e157fade4d780ca2f Mon Sep 17 00:00:00 2001 From: Edward Tremel Date: Tue, 20 Jul 2021 12:34:20 -0400 Subject: [PATCH 26/26] Added documentation on the JSON layout format I cleaned up the JSON examples now that there's no need to have a separate executable for the "from file" and "from string" versions (the make_subgroup_allocator function can choose automatically) and added a section to the README describing how to use JSON layout. Also, fixed a bug in the make_subgroup_allocator function: the brace-initializer syntax is ambiguous between std::vector and std::map unless you explicitly name the type. --- README.md | 88 ++++++++- include/derecho/core/subgroup_functions.hpp | 2 +- src/applications/demos/CMakeLists.txt | 8 +- src/applications/demos/json_cfgs/derecho.cfg | 41 ++++- .../demos/json_cfgs/derecho.cfg_json_layout | 168 ------------------ .../demos/json_cfgs/derecho.cfg_json_path | 144 --------------- .../demos/simple_replicated_objects_json.cpp | 13 +- .../simple_replicated_objects_json_file.cpp | 123 ------------- ... => simple_replicated_objects_overlap.cpp} | 29 +-- src/core/git_version.cpp | 4 +- 10 files changed, 134 insertions(+), 486 deletions(-) delete mode 100644 src/applications/demos/json_cfgs/derecho.cfg_json_layout delete mode 100644 src/applications/demos/json_cfgs/derecho.cfg_json_path delete mode 100644 src/applications/demos/simple_replicated_objects_json_file.cpp rename src/applications/demos/{simple_replicated_objects_overlap_json_file.cpp => simple_replicated_objects_overlap.cpp} (73%) diff --git a/README.md b/README.md index b84c5fcf..059c8b31 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Derecho is a library that helps you build replicated, fault-tolerant services in * [`spdlog`](https://github.com/gabime/spdlog), a logging library, v1.3.1 or newer. On Ubuntu 19.04 and later this can be installed with the package `libspdlog-dev`. The version of spdlog in Ubuntu 18.04's repositories is too old, but if you are running Ubuntu 18.04 you can download the `libspdlog-dev` package [here](http://old-releases.ubuntu.com/ubuntu/pool/universe/s/spdlog/libspdlog-dev_1.3.1-1_amd64.deb) and install it manually with no other dependencies needed. * The Open Fabric Interface (OFI) library: [`libfabric`](https://github.com/ofiwg/libfabric). Since this library's interface changes significantly between versions, please install `v1.12.1` from source rather than any packaged version. ([Installation script](https://github.com/Derecho-Project/derecho/blob/master/scripts/prerequisites/install-libfabric.sh)) * Lohmann's [JSON for Modern C++](https://github.com/nlohmann/json) library, v3.9 or newer. This library is not packaged for Ubuntu, but can easily be installed with our [installation script](https://github.com/Derecho-Project/derecho/blob/master/scripts/prerequisites/install-json.sh). -* Matthew Milano's C++ utilities, which are all CMake libraries that can be installed with "make install": +* @mpmilano's C++ utilities, which are all CMake libraries that can be installed with "make install": - [`mutils`](https://github.com/mpmilano/mutils) ([Installation script](https://github.com/Derecho-Project/derecho/blob/master/scripts/prerequisites/install-mutils.sh)) - [`mutils-containers`](https://github.com/mpmilano/mutils-containers) ([Installation script](https://github.com/Derecho-Project/derecho/blob/master/scripts/prerequisites/install-mutils-containers.sh)) - [`mutils-tasks`](https://github.com/mpmilano/mutils-tasks) ([Installation script](https://github.com/Derecho-Project/derecho/blob/master/scripts/prerequisites/install-mutils-tasks.sh)) @@ -247,9 +247,9 @@ std::unique_ptr> group; In order to start or join a Group, all members (including processes that join later) must define a function that provides the membership (as a subset of the current View) for each subgroup. The membership function's input is the list of Replicated Object types, the current View, and the previous View if there was one. Its return type is a `std::map` mapping each Replicated Object type to a vector representing all the subgroups of that type (since there can be more than one subgroup that implements the same Replicated Object type). Each entry in this vector is another vector, whose size indicates the number of shards the subgroup should be divided into, and whose entries are SubViews describing the membership of each shard. For example, if the membership function's return value is named `members`, then `members[std::type_index(typeid(Cache))][0][2]` is a SubView identifying the members of the third shard of the first subgroup of type "Cache." -Derecho provides a default subgroup membership function that automatically assigns nodes from the Group into disjoint subgroups and shards, given a policy that describes the desired number of nodes in each subgroup/shard. It assigns nodes in ascending rank order, and leaves any "extra" nodes (not needed to fully populate all subgroups) at the end (highest rank) of the membership list. During a View change, this function attempts to preserve the correct number of nodes in each shard without re-assigning any nodes to new roles. It does this by copying the subgroup membership from the previous View as much as possible, and assigning idle nodes from the end of the Group's membership list to replace failed members of subgroups. +**The default membership function:** Derecho provides a default subgroup membership function that automatically assigns nodes from the Group into disjoint subgroups and shards, given a policy (an instance of SubgroupAllocationPolicy or ShardAllocationPolicy) that describes the desired number of nodes in each subgroup/shard. The function assigns nodes in ascending rank order, and leaves any "extra" nodes (not needed to fully populate all subgroups) at the end (highest rank) of the membership list. During a View change, it attempts to preserve the correct number of nodes in each shard without re-assigning any nodes to new roles. It does this by copying the subgroup membership from the previous View as much as possible, and assigning idle nodes from the end of the Group's membership list to replace failed members of subgroups. -There are several helper functions in `subgroup_functions.hpp` that construct AllocationPolicy objects for different scenarios, to make it easier to set up the default subgroup membership function. Here is an example of how the default membership function could be configured for two types of Replicated Objects using these functions: +There are several helper functions in `subgroup_functions.hpp` that construct SubgroupAllocationPolicy objects for different scenarios, to make it easier to set up the default subgroup membership function. Here is an example of how the default membership function could be configured for two types of Replicated Objects using these functions: ```cpp derecho::SubgroupInfo subgroup_function {derecho::DefaultSubgroupAllocator({ @@ -260,7 +260,87 @@ derecho::SubgroupInfo subgroup_function {derecho::DefaultSubgroupAllocator({ ``` Based on the policies constructed for the constructor argument of DefaultSubgroupAllocator, the function will create one subgroup of type Foo, with two shards of 3 members each. Next, it will create two subgroups of type Bar, each of which has only one shard of size 3. Note that the order in which subgroups are allocated is the order in which their Replicated Object types are listed in the Group's template parameters, so this instance of the default subgroup allocator will assign the first 6 nodes to the Foo subgroup and the second 6 nodes to the Bar subgroups the first time it runs. -More advanced users may, of course, want to define their own subgroup membership functions. The demo program `overlapping_replicated_objects.cpp` shows a relatively simple example of a user-defined membership function. In this program, the SubgroupInfo contains a C++ lambda function that implements the `shard_view_generator_t` type signature and handles subgroup assignment for Replicated Objects of type Foo, Bar, and Cache: +The subgroup membership function's SubgroupAllocationPolicy objects can also be constructed from JSON strings, which allows you to change the allocation policy without recompiling code. To use this feature, add the option **json_layout_path** to the Derecho config file, specifying the (relative or absolute) path to a JSON file, and construct the DefaultSubgroupAllocator using the templated `make_subgroup_allocator` function. The template parameters to this function must be the same Replicated Objects as the template parameters for the Group object, in the same order; for example, this subgroup function would be used for `derecho::Group`: + +```cpp +derecho::SubgroupInfo subgroup_function{derecho::make_subgroup_allocator()}; +``` + +The JSON file specified with the **json_layout_path** option should contain an array with one entry per Replicated Object type, in the same order as these types are listed in the template parameters. Each array entry should be an (JSON) object with a property named "layout", whose value is an array of one or more objects describing allocation policies for each subgroup of this type. An allocation policy object in JSON has fields very similar to the fields of ShardAllocationPolicy object: `min_nodes_by_shard`, `max_nodes_by_shard`, `delivery_modes_by_shard`, and `profiles_by_shard`. Unlike a ShardAllocationPolicy object, however, identical_subgroups and even_shards cannot be used; each of these fields must be an array of length equal to the number of shards desired. Here is an example of a JSON file that creates the same allocation policies as the arguments to the DefaultSubgroupAllocator constructor above: + +```json +[ + { + "type_alias": "Foo", + "layout": [ + { + "min_nodes_by_shard": [2, 2], + "max_nodes_by_shard": [3, 3], + "delivery_modes_by_shard": ["Ordered", "Ordered"], + "profiles_by_shard": ["DEFAULT", "DEFAULT"] + } + ] + }, + { + "type_alias": "Bar", + "layout": [ + { + "min_nodes_by_shard": [1], + "max_nodes_by_shard": [3], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["DEFAULT"] + }, + { + "min_nodes_by_shard": [1], + "max_nodes_by_shard": [3], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["DEFAULT"] + } + ] + } +] + +``` + +The field "type_alias" is optional for Derecho (it is only used by Cascade), but it is useful to help keep track of which array entry corresponds to which subgroup type. The layout policies will actually be applied to Replicated Object types in the order they appear in the template parameters for Group and `make_subgroup_allocator`, regardless of what name is used in "type_alias." + +Instead of using a separate JSON file, the same JSON layout policy can be embedded directly in the Derecho config file using the option **json_layout**, whose value will be interpreted as a JSON string. It is an error to use both **json_layout** and **json_layout_path** in the same config file. + +**Reserved node IDs**: The default subgroup allocation function's behavior can be significantly changed by using the optional field `reserved_node_ids_by_shard` in ShardAllocationPolicy, which has an equivalent optional field "reserved_node_ids_by_shard" in the JSON layout syntax. This field specifies a set of node IDs that should always be assigned to each shard (if they exist in the current View), regardless of where those nodes appear in the rank order. If shards have reserved node IDs, the allocation function will always assign those node IDs to the shards that reserved them, and then assign any remaining nodes in the default fashion (round robin in ascending rank order). If multiple shards from different subgroups reserve the same node IDs, those nodes will be assigned to all of the shards that reserved them, and thus be members of more than one subgroup. However, multiple shards in the same subgroup cannot reserve the same node ID (this will result in a configuration error), since shards by definition must be disjoint. + +Here is an example of a JSON layout string that uses "reserved_node_ids_by_shard" to make the Bar subgroup's (only) shard co-resident with members of both shards of the Foo subgroup: + +```json +[ + { + "type_alias": "Foo", + "layout": [ + { + "min_nodes_by_shard": [2, 2], + "max_nodes_by_shard": [3, 3], + "reserved_nodes_by_shard": [[1, 2, 3], [4, 5, 6]], + "delivery_modes_by_shard": ["Ordered", "Ordered"], + "profiles_by_shard": ["DEFAULT", "DEFAULT"] + } + ] + }, + { + "type_alias": "Bar", + "layout": [ + { + "min_nodes_by_shard": [1], + "max_nodes_by_shard": [3], + "reserved_nodes_by_shard": [[3, 4, 5]], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["DEFAULT"] + } + ] + } +] + +``` + +**Defining a custom membership function:** If the default membership function's node-allocation algorithm doesn't fit your needs, you can define own subgroup membership function. The demo program `overlapping_replicated_objects.cpp` shows a relatively simple example of a user-defined membership function. In this program, the SubgroupInfo contains a C++ lambda function that implements the `shard_view_generator_t` type signature and handles subgroup assignment for Replicated Objects of type Foo, Bar, and Cache: ```cpp [](const std::vector& subgroup_type_order, diff --git a/include/derecho/core/subgroup_functions.hpp b/include/derecho/core/subgroup_functions.hpp index 7d2716c1..7b583943 100644 --- a/include/derecho/core/subgroup_functions.hpp +++ b/include/derecho/core/subgroup_functions.hpp @@ -484,7 +484,7 @@ class DefaultSubgroupAllocator { */ template DefaultSubgroupAllocator make_subgroup_allocator() { - return DefaultSubgroupAllocator({std::type_index(typeid(ReplicatedTypes))...}); + return DefaultSubgroupAllocator(std::vector{std::type_index(typeid(ReplicatedTypes))...}); } /** diff --git a/src/applications/demos/CMakeLists.txt b/src/applications/demos/CMakeLists.txt index 8df9c9ee..67604a5d 100644 --- a/src/applications/demos/CMakeLists.txt +++ b/src/applications/demos/CMakeLists.txt @@ -15,12 +15,10 @@ target_link_libraries(simple_replicated_objects derecho) add_executable(simple_replicated_objects_json simple_replicated_objects_json.cpp) target_link_libraries(simple_replicated_objects_json derecho) -add_executable(simple_replicated_objects_json_file simple_replicated_objects_json_file.cpp) -target_link_libraries(simple_replicated_objects_json_file derecho) -# overlapping_replicated_objects -add_executable(simple_replicated_objects_overlap_json_file simple_replicated_objects_overlap_json_file.cpp) -target_link_libraries(simple_replicated_objects_overlap_json_file derecho) +# overlapping replicated objects +add_executable(simple_replicated_objects_overlap simple_replicated_objects_overlap.cpp) +target_link_libraries(simple_replicated_objects_overlap derecho) add_executable(signed_store_mockup signed_store_mockup.cpp) target_link_libraries(signed_store_mockup derecho) diff --git a/src/applications/demos/json_cfgs/derecho.cfg b/src/applications/demos/json_cfgs/derecho.cfg index 72ad9f45..c3f78f98 100644 --- a/src/applications/demos/json_cfgs/derecho.cfg +++ b/src/applications/demos/json_cfgs/derecho.cfg @@ -1,9 +1,11 @@ +## Use this config file with simple_replicated_objects_json + [DERECHO] # leader ip - the leader's ip address leader_ip = 192.168.1.1 # leader gms port - the leader's gms port leader_gms_port = 23580 -# leader external client port - the leader's +# leader external client port - the leader's leader_external_port = 32645 # my local id - each node should have a different id local_id = 2 @@ -29,7 +31,7 @@ sst_poll_cq_timeout_ms = 100 # By disabling this feature, the derecho is allowed to run when active # members cannot form a majority. Please be aware of the 'split-brain' # syndrome:https://en.wikipedia.org/wiki/Split-brain and make sure your -# application is fine with it. +# application is fine with it. # To help the user play with derecho at beginning, we disabled the # partitioning safety. We suggest to set it to false for serious deployment disable_partitioning_safety = false @@ -40,8 +42,37 @@ max_p2p_request_payload_size = 10240 max_p2p_reply_payload_size = 10240 # window size for P2P requests and replies p2p_window_size = 16 -# json layout string, absolute path is more safe, the relative path needs to be calculated based on the location of the executable file -json_layout_path = '/users/Poanpan/mydata/temp/layout-with-id-pool.json' + +# Path to a JSON layout file. Can be used instead of the json_layout option. +# Note that relative paths will be interpreted as relative to the executable file's location. +#json_layout_path = './json_cfgs/layout.json' + +# JSON layout string +json_layout = ' +[ + { + "type_alias": "Foo", + "layout": [ + { + "min_nodes_by_shard": [3], + "max_nodes_by_shard": [3], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["VCS"] + } + ] + }, + { + "type_alias": "Bar", + "layout": [ + { + "min_nodes_by_shard": [3], + "max_nodes_by_shard": [3], + "delivery_modes_by_shard": ["Ordered"], + "profiles_by_shard": ["DEFAULT"] + } + ] + } +]' # Subgroup configurations # - The default subgroup settings @@ -111,7 +142,7 @@ provider = verbs # For verbs provider, domain is the device name (ibv_devices) domain = mlx5_1 -# 3. tx_depth +# 3. tx_depth # tx_depth applies to hints->tx_attr->size, where hint is a struct fi_info object. # see https://ofiwg.github.io/libfabric/master/man/fi_getinfo.3.html tx_depth = 256 diff --git a/src/applications/demos/json_cfgs/derecho.cfg_json_layout b/src/applications/demos/json_cfgs/derecho.cfg_json_layout deleted file mode 100644 index c01b409e..00000000 --- a/src/applications/demos/json_cfgs/derecho.cfg_json_layout +++ /dev/null @@ -1,168 +0,0 @@ -[DERECHO] -# leader ip - the leader's ip address -leader_ip = 192.168.1.1 -# leader gms port - the leader's gms port -leader_gms_port = 23580 -# leader external client port - the leader's -leader_external_port = 32645 -# my local id - each node should have a different id -local_id = 0 -# my local ip address -local_ip = 192.168.1.1 -# derecho gms port -gms_port = 23580 -# derecho rpc port -state_transfer_port = 28366 -# sst tcp port -sst_port = 37683 -# rdmc tcp port -rdmc_port = 31675 -# external port -external_port = 32645 -# this is the frequency of the failure detector thread. -# It is best to leave this to 1 ms for RDMA. If it is too high, -# you run the risk of overflowing the queue of outstanding sends. -heartbeat_ms = 100 -# sst poll completion queue timeout in millisecond -sst_poll_cq_timeout_ms = 100 -# disable partitioning safety -# By disabling this feature, the derecho is allowed to run when active -# members cannot form a majority. Please be aware of the 'split-brain' -# syndrome:https://en.wikipedia.org/wiki/Split-brain and make sure your -# application is fine with it. -# To help the user play with derecho at beginning, we disabled the -# partitioning safety. We suggest to set it to false for serious deployment -disable_partitioning_safety = false - -# maximum payload size for P2P requests -max_p2p_request_payload_size = 10240 -# maximum payload size for P2P replies -max_p2p_reply_payload_size = 10240 -# window size for P2P requests and replies -p2p_window_size = 16 -# json layout string -json_layout = ' -[ - { - "type_alias": "Foo", - "layout": [ - { - "min_nodes_by_shard": [3], - "max_nodes_by_shard": [3], - "delivery_modes_by_shard": ["Ordered"], - "profiles_by_shard": ["VCS"] - } - ] - }, - { - "type_alias": "Bar", - "layout": [ - { - "min_nodes_by_shard": [3], - "max_nodes_by_shard": [3], - "delivery_modes_by_shard": ["Ordered"], - "profiles_by_shard": ["DEFAULT"] - } - ] - } -]' - -# Subgroup configurations -# - The default subgroup settings -[SUBGROUP/DEFAULT] -# maximum payload size -# Any message with size large than this has to be broken -# down to multiple messages. -# Large message consumes memory space because the memory buffers -# have to be pre-allocated. -max_payload_size = 10240 -# maximum reply payload size -# This is for replies generated by ordered sends in the subgroup -max_reply_payload_size = 10240 -# maximum smc (SST's small message multicast) payload size -# If the message size is smaller or equal to this size, -# it will be sent using SST multicast, otherwise it will -# try RDMC if the message size is smaller than max_payload_size. -max_smc_payload_size = 10240 -# block size depends on your max_payload_size. -# It is only relevant if you are ever going to send a message using RDMC. -# In that case, it should be set to the same value as the max_payload_size, -# if the max_payload_size is around 1 MB. For very large messages, the block # size should be a few MBs (1 is fine). -block_size = 1048576 -# message window size -# the length of the message pipeline -window_size = 16 -# the send algorithm for RDMC. Other options are -# chain_send, sequential_send, tree_send -rdmc_send_algorithm = binomial_send -# - SAMPLE for large message settings - -[SUBGROUP/VCS] -max_payload_size = 8192 -max_reply_payload_size = 8192 -max_smc_payload_size = 10240 -# does not matter unless max_payload_size > max_smc_payload_size -block_size = 1048576 -window_size = 50 -rdmc_send_algorithm = binomial_send -num_shards = 1 -min_nodes = 1 -max_nodes = 4 - -# RDMA section contains configurations of the following -# - which RDMA device to use -# - device configurations -[RDMA] -# 1. provider = bgq|gni|mlx|netdir|psm|psm2|rxd|rxm|shm|sockets|udp|usnic|verbs -# possible options(only 'sockets' and 'verbs' providers are tested so far): -# bgq - The Blue Gene/Q Fabric Provider -# gni - The GNI Fabric Provider (Cray XC (TM) systems) -# mlx - The MLX Fabric Provider (UCX library) -# netdir - The Network Direct Fabric Provider (Microsoft Network Direct SPI) -# psm - The PSM Fabric Provider -# psm2 - The PSM2 Fabric Provider -# rxd - The RxD (RDM over DGRAM) Utility Provider -# rxm - The RxM (RDM over MSG) Utility Provider -# shm - The SHM Fabric Provider -# sockets - The Sockets Fabric Provider (TCP) -# udp - The UDP Fabric Provider -# usnic - The usNIC Fabric Provider (Cisco VIC) -# verbs - The Verbs Fabric Provider -provider = verbs - -# 2. domain -# For sockets provider, domain is the NIC name (ifconfig | grep -v -e "^ ") -# For verbs provider, domain is the device name (ibv_devices) -domain = mlx5_1 - -# 3. tx_depth -# tx_depth applies to hints->tx_attr->size, where hint is a struct fi_info object. -# see https://ofiwg.github.io/libfabric/master/man/fi_getinfo.3.html -tx_depth = 256 - -# 4. rx_depth: -# rx_depth applies to hints->rx_attr->size, where hint is a struct fi_info object. -# see https://ofiwg.github.io/libfabric/master/man/fi_getinfo.3.html -rx_depth = 256 - -# Persistent configurations -[PERS] -# persistent directory for file system-based logfile. -file_path = .plog -ramdisk_path = /dev/shm/volatile_t -# Reset persistent data -# CAUTION: "reset = true" removes existing persisted data!!! -reset = false -# Max number of the log entries in each persistent, default to 1048576 -max_log_entry = 1048576 -# Max data size in bytes for each persistent, default to 512GB -max_data_size = 549755813888 - -# Logger configurations -[LOGGER] -# default log name -default_log_name = derecho_debug -# default log level -# Available options: -# trace,debug,info,warn,error,critical,off -default_log_level = trace diff --git a/src/applications/demos/json_cfgs/derecho.cfg_json_path b/src/applications/demos/json_cfgs/derecho.cfg_json_path deleted file mode 100644 index 60119650..00000000 --- a/src/applications/demos/json_cfgs/derecho.cfg_json_path +++ /dev/null @@ -1,144 +0,0 @@ -[DERECHO] -# leader ip - the leader's ip address -leader_ip = 192.168.1.1 -# leader gms port - the leader's gms port -leader_gms_port = 23580 -# leader external client port - the leader's -leader_external_port = 32645 -# my local id - each node should have a different id -local_id = 0 -# my local ip address -local_ip = 192.168.1.1 -# derecho gms port -gms_port = 23580 -# derecho rpc port -state_transfer_port = 28366 -# sst tcp port -sst_port = 37683 -# rdmc tcp port -rdmc_port = 31675 -# external port -external_port = 32645 -# this is the frequency of the failure detector thread. -# It is best to leave this to 1 ms for RDMA. If it is too high, -# you run the risk of overflowing the queue of outstanding sends. -heartbeat_ms = 100 -# sst poll completion queue timeout in millisecond -sst_poll_cq_timeout_ms = 100 -# disable partitioning safety -# By disabling this feature, the derecho is allowed to run when active -# members cannot form a majority. Please be aware of the 'split-brain' -# syndrome:https://en.wikipedia.org/wiki/Split-brain and make sure your -# application is fine with it. -# To help the user play with derecho at beginning, we disabled the -# partitioning safety. We suggest to set it to false for serious deployment -disable_partitioning_safety = false - -# maximum payload size for P2P requests -max_p2p_request_payload_size = 10240 -# maximum payload size for P2P replies -max_p2p_reply_payload_size = 10240 -# window size for P2P requests and replies -p2p_window_size = 16 -# json layout string, absolute path is more safe, the relative path needs to be calculated based on the location of the executable file -json_layout_path = '/users/Poanpan/mydata/temp/layout.json' - -# Subgroup configurations -# - The default subgroup settings -[SUBGROUP/DEFAULT] -# maximum payload size -# Any message with size large than this has to be broken -# down to multiple messages. -# Large message consumes memory space because the memory buffers -# have to be pre-allocated. -max_payload_size = 10240 -# maximum reply payload size -# This is for replies generated by ordered sends in the subgroup -max_reply_payload_size = 10240 -# maximum smc (SST's small message multicast) payload size -# If the message size is smaller or equal to this size, -# it will be sent using SST multicast, otherwise it will -# try RDMC if the message size is smaller than max_payload_size. -max_smc_payload_size = 10240 -# block size depends on your max_payload_size. -# It is only relevant if you are ever going to send a message using RDMC. -# In that case, it should be set to the same value as the max_payload_size, -# if the max_payload_size is around 1 MB. For very large messages, the block # size should be a few MBs (1 is fine). -block_size = 1048576 -# message window size -# the length of the message pipeline -window_size = 16 -# the send algorithm for RDMC. Other options are -# chain_send, sequential_send, tree_send -rdmc_send_algorithm = binomial_send -# - SAMPLE for large message settings - -[SUBGROUP/VCS] -max_payload_size = 8192 -max_reply_payload_size = 8192 -max_smc_payload_size = 10240 -# does not matter unless max_payload_size > max_smc_payload_size -block_size = 1048576 -window_size = 50 -rdmc_send_algorithm = binomial_send -num_shards = 1 -min_nodes = 1 -max_nodes = 4 - -# RDMA section contains configurations of the following -# - which RDMA device to use -# - device configurations -[RDMA] -# 1. provider = bgq|gni|mlx|netdir|psm|psm2|rxd|rxm|shm|sockets|udp|usnic|verbs -# possible options(only 'sockets' and 'verbs' providers are tested so far): -# bgq - The Blue Gene/Q Fabric Provider -# gni - The GNI Fabric Provider (Cray XC (TM) systems) -# mlx - The MLX Fabric Provider (UCX library) -# netdir - The Network Direct Fabric Provider (Microsoft Network Direct SPI) -# psm - The PSM Fabric Provider -# psm2 - The PSM2 Fabric Provider -# rxd - The RxD (RDM over DGRAM) Utility Provider -# rxm - The RxM (RDM over MSG) Utility Provider -# shm - The SHM Fabric Provider -# sockets - The Sockets Fabric Provider (TCP) -# udp - The UDP Fabric Provider -# usnic - The usNIC Fabric Provider (Cisco VIC) -# verbs - The Verbs Fabric Provider -provider = verbs - -# 2. domain -# For sockets provider, domain is the NIC name (ifconfig | grep -v -e "^ ") -# For verbs provider, domain is the device name (ibv_devices) -domain = mlx5_1 - -# 3. tx_depth -# tx_depth applies to hints->tx_attr->size, where hint is a struct fi_info object. -# see https://ofiwg.github.io/libfabric/master/man/fi_getinfo.3.html -tx_depth = 256 - -# 4. rx_depth: -# rx_depth applies to hints->rx_attr->size, where hint is a struct fi_info object. -# see https://ofiwg.github.io/libfabric/master/man/fi_getinfo.3.html -rx_depth = 256 - -# Persistent configurations -[PERS] -# persistent directory for file system-based logfile. -file_path = .plog -ramdisk_path = /dev/shm/volatile_t -# Reset persistent data -# CAUTION: "reset = true" removes existing persisted data!!! -reset = false -# Max number of the log entries in each persistent, default to 1048576 -max_log_entry = 1048576 -# Max data size in bytes for each persistent, default to 512GB -max_data_size = 549755813888 - -# Logger configurations -[LOGGER] -# default log name -default_log_name = derecho_debug -# default log level -# Available options: -# trace,debug,info,warn,error,critical,off -default_log_level = trace diff --git a/src/applications/demos/simple_replicated_objects_json.cpp b/src/applications/demos/simple_replicated_objects_json.cpp index ec90eb93..b59c753f 100644 --- a/src/applications/demos/simple_replicated_objects_json.cpp +++ b/src/applications/demos/simple_replicated_objects_json.cpp @@ -32,15 +32,10 @@ int main(int argc, char** argv) { derecho::Conf::initialize(argc, argv); //Define subgroup membership using the default subgroup allocator function - //Each Replicated type will have one subgroup and one shard, with three members in the shard - - json json_layout = json::parse(derecho::getConfString(CONF_DERECHO_JSON_LAYOUT)); - cout << "json_layout parsed\n"; - auto dsa_object = derecho::make_subgroup_allocator(json_layout); - cout << "dsa_object constructed\n"; - derecho::SubgroupInfo subgroup_function{dsa_object}; - - // derecho::SubgroupInfo subgroup_function{derecho::construct_DSA_with_layout(json::parse(derecho::getConfString(CONF_DERECHO_JSON_LAYOUT)))}; + //When constructed using make_subgroup_allocator with no arguments, this will check the config file + //for either the json_layout or json_layout_path options, and use whichever one is present to define + //the mapping from types to subgroup allocation parameters. + derecho::SubgroupInfo subgroup_function{derecho::make_subgroup_allocator()}; //Each replicated type needs a factory; this can be used to supply constructor arguments //for the subgroup's initial state. These must take a PersistentRegistry* argument, but diff --git a/src/applications/demos/simple_replicated_objects_json_file.cpp b/src/applications/demos/simple_replicated_objects_json_file.cpp deleted file mode 100644 index 2f73a3e7..00000000 --- a/src/applications/demos/simple_replicated_objects_json_file.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/** - * @file simple_replicated_objects.cpp - * - * This test creates two subgroups, one of each type Foo and Bar (defined in sample_objects.h). - * It requires at least 6 nodes to join the group; the first three are part of the Foo subgroup, - * while the next three are part of the Bar subgroup. - * Every node (identified by its node_id) makes some calls to ordered_send in their subgroup; - * some also call p2p_send. By these calls they verify that the state machine operations are - * executed properly. - */ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include "sample_objects.hpp" - -using derecho::ExternalCaller; -using derecho::Replicated; -using std::cout; -using std::endl; - -int main(int argc, char** argv) { - // Read configurations from the command line options as well as the default config file - derecho::Conf::initialize(argc, argv); - - //Define subgroup membership using the default subgroup allocator function - //Each Replicated type will have one subgroup and one shard, with three members in the shard - derecho::SubgroupInfo subgroup_function {derecho::make_subgroup_allocator( - derecho::getConfString(CONF_DERECHO_JSON_LAYOUT_PATH) - )}; - //Each replicated type needs a factory; this can be used to supply constructor arguments - //for the subgroup's initial state. These must take a PersistentRegistry* argument, but - //in this case we ignore it because the replicated objects aren't persistent. - auto foo_factory = [](persistent::PersistentRegistry*,derecho::subgroup_id_t) { return std::make_unique(-1); }; - auto bar_factory = [](persistent::PersistentRegistry*,derecho::subgroup_id_t) { return std::make_unique(); }; - - derecho::Group group(derecho::UserMessageCallbacks{}, subgroup_function, {}, - std::vector{}, - foo_factory, bar_factory); - - cout << "Finished constructing/joining Group" << endl; - - //Now have each node send some updates to the Replicated objects - //The code must be different depending on which subgroup this node is in, - //which we can determine based on which membership list it appears in - uint32_t my_id = derecho::getConfUInt32(CONF_DERECHO_LOCAL_ID); - std::vector foo_members = group.get_subgroup_members(0)[0]; - std::vector bar_members = group.get_subgroup_members(0)[0]; - auto find_in_foo_results = std::find(foo_members.begin(), foo_members.end(), my_id); - if(find_in_foo_results != foo_members.end()) { - uint32_t rank_in_foo = std::distance(foo_members.begin(), find_in_foo_results); - Replicated& foo_rpc_handle = group.get_subgroup(); - if(rank_in_foo == 0) { - int new_value = 1; - cout << "Changing Foo's state to " << new_value << endl; - derecho::rpc::QueryResults results = foo_rpc_handle.ordered_send(new_value); - decltype(results)::ReplyMap& replies = results.get(); - cout << "Got a reply map!" << endl; - for(auto& reply_pair : replies) { - cout << "Reply from node " << reply_pair.first << " was " << std::boolalpha << reply_pair.second.get() << endl; - } - cout << "Reading Foo's state just to allow node 1's message to be delivered" << endl; - foo_rpc_handle.ordered_send(); - } else if(rank_in_foo == 1) { - int new_value = 3; - cout << "Changing Foo's state to " << new_value << endl; - derecho::rpc::QueryResults results = foo_rpc_handle.ordered_send(new_value); - decltype(results)::ReplyMap& replies = results.get(); - cout << "Got a reply map!" << endl; - for(auto& reply_pair : replies) { - cout << "Reply from node " << reply_pair.first << " was " << std::boolalpha << reply_pair.second.get() << endl; - } - } else if(rank_in_foo == 2) { - std::this_thread::sleep_for(std::chrono::seconds(1)); - cout << "Reading Foo's state from the group" << endl; - derecho::rpc::QueryResults foo_results = foo_rpc_handle.ordered_send(); - for(auto& reply_pair : foo_results.get()) { - cout << "Node " << reply_pair.first << " says the state is: " << reply_pair.second.get() << endl; - } - } - } else { - uint32_t rank_in_bar = derecho::index_of(bar_members, my_id); - Replicated& bar_rpc_handle = group.get_subgroup(); - if(rank_in_bar == 0) { - cout << "Appending to Bar." << endl; - derecho::rpc::QueryResults void_future = bar_rpc_handle.ordered_send("Write from 0..."); - derecho::rpc::QueryResults::ReplyMap& sent_nodes = void_future.get(); - cout << "Append delivered to nodes: "; - for(const node_id_t& node : sent_nodes) { - cout << node << " "; - } - cout << endl; - } else if(rank_in_bar == 1) { - cout << "Appending to Bar" << endl; - bar_rpc_handle.ordered_send("Write from 1..."); - node_id_t p2p_target = foo_members[2]; - cout << "Reading Foo's state from node " << p2p_target << endl; - ExternalCaller& p2p_foo_handle = group.get_nonmember_subgroup(); - derecho::rpc::QueryResults foo_results = p2p_foo_handle.p2p_send(p2p_target); - int response = foo_results.get().get(p2p_target); - cout << " Response: " << response << endl; - } else if(rank_in_bar == 2) { - bar_rpc_handle.ordered_send("Write from 2..."); - cout << "Printing log from Bar" << endl; - derecho::rpc::QueryResults bar_results = bar_rpc_handle.ordered_send(); - for(auto& reply_pair : bar_results.get()) { - cout << "Node " << reply_pair.first << " says the log is: " << reply_pair.second.get() << endl; - } - cout << "Clearing Bar's log" << endl; - derecho::rpc::QueryResults void_future = bar_rpc_handle.ordered_send(); - } - } - - cout << "Reached end of main(), entering infinite loop so program doesn't exit" << std::endl; - while(true) { - } -} diff --git a/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp b/src/applications/demos/simple_replicated_objects_overlap.cpp similarity index 73% rename from src/applications/demos/simple_replicated_objects_overlap_json_file.cpp rename to src/applications/demos/simple_replicated_objects_overlap.cpp index 354cc515..e27c6b0f 100644 --- a/src/applications/demos/simple_replicated_objects_overlap_json_file.cpp +++ b/src/applications/demos/simple_replicated_objects_overlap.cpp @@ -1,13 +1,3 @@ -/** - * @file simple_replicated_objects.cpp - * - * This test creates two subgroups, one of each type Foo and Bar (defined in sample_objects.h). - * It requires at least 6 nodes to join the group; the first three are part of the Foo subgroup, - * while the next three are part of the Bar subgroup. - * Every node (identified by its node_id) makes some calls to ordered_send in their subgroup; - * some also call p2p_send. By these calls they verify that the state machine operations are - * executed properly. - */ #include #include #include @@ -26,22 +16,13 @@ using derecho::Replicated; using std::cout; using std::endl; -void print_set(const std::vector& uset) { - std::stringstream stream; - for(auto thing : uset) { - stream << thing << ' '; - } - - std::string out = stream.str(); - dbg_default_crit(out); -} - int main(int argc, char** argv) { // Read configurations from the command line options as well as the default config file derecho::Conf::initialize(argc, argv); //Define subgroup membership using the default subgroup allocator function - //Each Replicated type will have one subgroup and one shard, with three members in the shard + //This test assumes derecho.cfg specifies a JSON layout path to a file that allocates + //Foo and Bar overlapping sets of node IDs. derecho::SubgroupInfo subgroup_function{derecho::make_subgroup_allocator( derecho::getConfString(CONF_DERECHO_JSON_LAYOUT_PATH))}; //Each replicated type needs a factory; this can be used to supply constructor arguments @@ -67,16 +48,14 @@ int main(int argc, char** argv) { uint32_t rank_in_foo = std::distance(foo_members.begin(), find_in_foo_results); // Replicated& foo_rpc_handle = group.get_subgroup(); dbg_default_crit("Here is FOO {}!", rank_in_foo); - dbg_default_crit("I see members of my shard:"); - print_set(foo_members); + dbg_default_crit("I see members of my shard: {}", foo_members); } auto find_in_bar_results = std::find(bar_members.begin(), bar_members.end(), my_id); if(find_in_bar_results != bar_members.end()) { uint32_t rank_in_bar = derecho::index_of(bar_members, my_id); // Replicated& bar_rpc_handle = group.get_subgroup(); dbg_default_crit("Here is BAR {}!", rank_in_bar); - dbg_default_crit("I see members of my shard:"); - print_set(bar_members); + dbg_default_crit("I see members of my shard: {}", bar_members); } cout << "Reached end of main(), entering infinite loop so program doesn't exit" << std::endl; diff --git a/src/core/git_version.cpp b/src/core/git_version.cpp index 1406be03..95949ffa 100644 --- a/src/core/git_version.cpp +++ b/src/core/git_version.cpp @@ -13,8 +13,8 @@ namespace derecho { const int MAJOR_VERSION = 2; const int MINOR_VERSION = 1; const int PATCH_VERSION = 0; -const int COMMITS_AHEAD_OF_VERSION = 147; +const int COMMITS_AHEAD_OF_VERSION = 149; const char* VERSION_STRING = "2.1.0"; -const char* VERSION_STRING_PLUS_COMMITS = "2.1.0+147"; +const char* VERSION_STRING_PLUS_COMMITS = "2.1.0+149"; }