Skip to content

Commit

Permalink
#1934: Add parameter to control minimal retention of historical LB data
Browse files Browse the repository at this point in the history
  • Loading branch information
thearusable committed Dec 6, 2022
1 parent 5dff872 commit b4a686a
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 2 deletions.
2 changes: 2 additions & 0 deletions src/vt/configs/arguments/app_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ struct AppConfig {
bool vt_lb_keep_last_elm = false;
bool vt_lb_data = false;
bool vt_lb_data_compress = true;
uint32_t vt_lb_data_retention = 0;
std::string vt_lb_data_dir = "vt_lb_data";
std::string vt_lb_data_file = "data.%p.json";
std::string vt_lb_data_dir_in = "vt_lb_data_in";
Expand Down Expand Up @@ -317,6 +318,7 @@ struct AppConfig {
| vt_lb_interval
| vt_lb_data
| vt_lb_data_compress
| vt_lb_data_retention
| vt_lb_data_dir
| vt_lb_data_file
| vt_lb_data_dir_in
Expand Down
3 changes: 3 additions & 0 deletions src/vt/configs/arguments/args.cc
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) {
auto lb_keep_last_elm = "Do not migrate last element in collection";
auto lb_data = "Enable load balancing data";
auto lb_data_comp = "Compress load balancing data output with brotli";
auto lb_data_hist = "Minimal number of historical LB data phases to retain";
auto lb_data_dir = "Load balancing data output directory";
auto lb_data_file = "Load balancing data output file name";
auto lb_data_dir_in = "Load balancing data input directory";
Expand All @@ -490,6 +491,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) {
auto wl = app.add_flag("--vt_lb_keep_last_elm", appConfig.vt_lb_keep_last_elm, lb_keep_last_elm);
auto ww = app.add_flag("--vt_lb_data", appConfig.vt_lb_data, lb_data);
auto xz = app.add_flag("--vt_lb_data_compress", appConfig.vt_lb_data_compress, lb_data_comp);
auto dr = app.add_option("--vt_lb_data_retention", appConfig.vt_lb_data_retention, lb_data_hist);
auto wx = app.add_option("--vt_lb_data_dir", appConfig.vt_lb_data_dir, lb_data_dir)->capture_default_str();
auto wy = app.add_option("--vt_lb_data_file", appConfig.vt_lb_data_file, lb_data_file)->capture_default_str();
auto xx = app.add_option("--vt_lb_data_dir_in", appConfig.vt_lb_data_dir_in, lb_data_dir_in)->capture_default_str();
Expand Down Expand Up @@ -517,6 +519,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) {
xx->group(debugLB);
xy->group(debugLB);
xz->group(debugLB);
dr->group(debugLB);
yx->group(debugLB);
yy->group(debugLB);
yz->group(debugLB);
Expand Down
6 changes: 5 additions & 1 deletion src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
#include "vt/vrt/collection/manager.h"
#include "vt/utils/json/json_appender.h"

#include <algorithm>

namespace vt { namespace vrt { namespace collection { namespace balance {

/*static*/ std::unique_ptr<LBManager> LBManager::construct() {
Expand Down Expand Up @@ -159,6 +161,8 @@ LBType LBManager::decideLBToRun(PhaseType phase, bool try_file) {
void LBManager::setLoadModel(std::shared_ptr<LoadModel> model) {
model_ = model;
auto nlb_data = theNodeLBData();
min_hist_lb_data_ = std::max(model->getNumPastPhasesNeeded(), theConfig()->vt_lb_data_retention);
nlb_data->setMinLBDataHistory(min_hist_lb_data_);
model_->setLoads(nlb_data->getNodeLoad(),
nlb_data->getNodeComm());
}
Expand Down Expand Up @@ -465,7 +469,7 @@ void LBManager::finishedLB(PhaseType phase) {
"finishedLB\n"
);

theNodeLBData()->startIterCleanup(phase, model_->getNumPastPhasesNeeded());
theNodeLBData()->startIterCleanup(phase, min_hist_lb_data_);
theNodeLBData()->outputLBDataForPhase(phase);

destroyLB();
Expand Down
2 changes: 2 additions & 0 deletions src/vt/vrt/collection/balance/lb_invoke/lb_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ struct LBManager : runtime::component::Component<LBManager> {
std::unique_ptr<util::json::BaseAppender> statistics_writer_ = nullptr;
/// Whether the LB statistics directory has been created
bool created_lbstats_dir_ = false;
//// The amount of phases of historical LB data to hold
uint32_t min_hist_lb_data_ = 0;
};

void makeGraphSymmetric(
Expand Down
6 changes: 5 additions & 1 deletion src/vt/vrt/collection/balance/node_lb_data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,11 @@ void NodeLBData::addNodeLBData(
in->updatePhase(1);

auto model = theLBManager()->getLoadModel();
in->releaseLBDataFromUnneededPhases(phase, model->getNumPastPhasesNeeded());
if(min_hist_lb_data_ > 0){
in->releaseLBDataFromUnneededPhases(phase, min_hist_lb_data_);
} else {
in->releaseLBDataFromUnneededPhases(phase, model->getNumPastPhasesNeeded());
}
}

VirtualProxyType NodeLBData::getCollectionProxyForElement(
Expand Down
9 changes: 9 additions & 0 deletions src/vt/vrt/collection/balance/node_lb_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,13 @@ struct NodeLBData : runtime::component::Component<NodeLBData> {
*/
LBDataHolder* getLBData() { return lb_data_.get(); }

/**
* \internal \brief Set the minimal amount of historical LB data which should be hold
*
* \param[in] hist_len the minimal amount of LB data to hold
*/
void setMinLBDataHistory(uint32_t hist_len) { min_hist_lb_data_ = hist_len; }

template <typename SerializerT>
void serialize(SerializerT& s) {
s | proxy_
Expand Down Expand Up @@ -292,6 +299,8 @@ struct NodeLBData : runtime::component::Component<NodeLBData> {
std::unique_ptr<util::json::BaseAppender> lb_data_writer_ = nullptr;
/// The struct that holds all the LB data
std::unique_ptr<LBDataHolder> lb_data_ = nullptr;
//// The minimal amount of historical LB data to hold
uint32_t min_hist_lb_data_ = 0;
};

}}}} /* end namespace vt::vrt::collection::balance */
Expand Down

0 comments on commit b4a686a

Please sign in to comment.