Skip to content

Commit

Permalink
#1560: LB: change confusing arguments to be more explicit
Browse files Browse the repository at this point in the history
  • Loading branch information
lifflander committed Sep 23, 2021
1 parent 3af5300 commit 44d1a8b
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 102 deletions.
61 changes: 21 additions & 40 deletions src/vt/vrt/collection/balance/greedylb/greedylb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,46 +76,32 @@ void GreedyLB::init(objgroup::proxy::Proxy<GreedyLB> in_proxy) {
GreedyLB::getInputKeysWithHelp() {
std::unordered_map<std::string, std::string> const keys_help = {
{
"min",
"I_tolerance",
R"(
Values: <double>
Default: 0.8
Default: 0.05
Description:
The load threshold for objects to consider on each node. The default value of
0.8 will consider 80% of the average load for re-balancing on each node. The
order these will be selected is based on the value passed to "strategy". If
the parameter "auto" is set to "true", this will be the minimum threshold;
otherwise, it sets the threshold directly.
If the imbalance metric, I, is greater than I_tolerance, the load balancer
will run.
)"
},
{
"max",
"threshold",
R"(
Values: <double>
Default: 1.004
Default: 0.5
Description:
The maximum load threshold for objects to consider on each node which is only
used if "auto" is "true".
The load threshold of objects to consider for potential migration on each
rank. All objects over threshold * average_load on each rank will considered.
)"
},
{
"auto",
R"(
Values: {true, false}
Default: true
Description:
Automatically determine the threshold between "min" and "max" using
calculated I (imbalance metric) with the formula
min(max(1-I, min), max).
)"
},
{
"strategy",
"data_dist",
R"(
Values: {scatter, bcast, pt2pt}
Default: scatter
Description:
How to distribute the data after the centralized LB makes a decision
How to distribute the migrations decisions after the centralized LB runs.
)"
}
};
Expand All @@ -130,18 +116,17 @@ void GreedyLB::inputParams(balance::SpecEntry* spec) {
allowed.push_back(elm.first);
}
spec->checkAllowedKeys(allowed);
min_threshold = spec->getOrDefault<double>("min", greedy_threshold_p);
max_threshold = spec->getOrDefault<double>("max", greedy_max_threshold_p);
auto_threshold = spec->getOrDefault<bool>("auto", greedy_auto_threshold_p);
I_tolerance = spec->getOrDefault<double>("I_tolerance", I_tolerance);
this_threshold = spec->getOrDefault<double>("threshold", this_threshold);

balance::LBArgsEnumConverter<DataDistStrategy> strategy_converter_(
"strategy", "DataDistStrategy", {
balance::LBArgsEnumConverter<DataDistStrategy> data_dist_converter_(
"data_dist", "DataDistStrategy", {
{DataDistStrategy::scatter, "scatter"},
{DataDistStrategy::pt2pt, "pt2pt"},
{DataDistStrategy::bcast, "bcast"}
}
);
strat_ = strategy_converter_.getFromSpec(spec, strat_);
data_dist_ = data_dist_converter_.getFromSpec(spec, data_dist_);
}

void GreedyLB::runLB() {
Expand All @@ -158,19 +143,15 @@ void GreedyLB::loadStats() {
this_load_begin = this_load;

if (avg_load > 0.0000000001) {
should_lb = I > greedy_tolerance;
}

if (auto_threshold) {
this_threshold = std::min(std::max(1.0f - I, min_threshold), max_threshold);
should_lb = I > I_tolerance;
}

if (this_node == 0) {
vt_print(
lb,
"loadStats: load={:.2f}, total={:.2f}, avg={:.2f}, I={:.2f},"
"should_lb={}, auto={}, threshold={}\n",
this_load, total_load, avg_load, I, should_lb, auto_threshold,
"should_lb={}, I_tolerance={}, threshold={}\n",
this_load, total_load, avg_load, I, should_lb, I_tolerance,
this_threshold
);
fflush(stdout);
Expand Down Expand Up @@ -344,7 +325,7 @@ void GreedyLB::transferObjs(std::vector<GreedyProc>&& in_load) {
}
}

if (strat_ == DataDistStrategy::scatter) {
if (data_dist_ == DataDistStrategy::scatter) {
std::size_t max_bytes = max_recs * sizeof(GreedyLBTypes::ObjIDType);
vt_debug_print(
normal, lb,
Expand All @@ -362,15 +343,15 @@ void GreedyLB::transferObjs(std::vector<GreedyProc>&& in_load) {
}
}
);
} else if (strat_ == DataDistStrategy::pt2pt) {
} else if (data_dist_ == DataDistStrategy::pt2pt) {
for (NodeType n = 0; n < theContext()->getNumNodes(); n++) {
vtAssert(
node_transfer.size() == static_cast<size_t>(theContext()->getNumNodes()),
"Must contain all nodes"
);
proxy[n].send<GreedySendMsg, &GreedyLB::recvObjs>(node_transfer[n]);
}
} else if (strat_ == DataDistStrategy::bcast) {
} else if (data_dist_ == DataDistStrategy::bcast) {
proxy.broadcast<GreedyBcastMsg, &GreedyLB::recvObjsBcast>(node_transfer);
}
}
Expand Down
11 changes: 3 additions & 8 deletions src/vt/vrt/collection/balance/greedylb/greedylb.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,17 +111,12 @@ struct GreedyLB : BaseLB {
static objgroup::proxy::Proxy<GreedyLB> scatter_proxy;

private:
double this_threshold = 0.0f;
double I_tolerance = 0.05f;
double this_threshold = 0.5f;
LoadType this_load_begin = 0.0f;
ObjSampleType load_over;
objgroup::proxy::Proxy<GreedyLB> proxy = {};

// Parameters read from LB spec file
double max_threshold = 0.0f;
double min_threshold = 0.0f;
bool auto_threshold = true;

DataDistStrategy strat_ = DataDistStrategy::scatter;
DataDistStrategy data_dist_ = DataDistStrategy::scatter;
};

}}}} /* end namespace vt::vrt::collection::lb */
Expand Down
8 changes: 2 additions & 6 deletions src/vt/vrt/collection/balance/greedylb/greedylb_constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,8 @@

namespace vt { namespace vrt { namespace collection { namespace lb {

static constexpr NodeType const greedy_root = 0;
static constexpr int32_t const greedy_bin_size = 10;
static constexpr bool const greedy_auto_threshold_p = true;
static constexpr double const greedy_tolerance = 0.05f;
static constexpr double const greedy_threshold_p = 0.3f;
static constexpr double const greedy_max_threshold_p = 1.004f;
static constexpr NodeType const greedy_root = 0;
static constexpr int32_t const greedy_bin_size = 10;

}}}} /* end namespace vt::vrt::collection::lb */

Expand Down
57 changes: 18 additions & 39 deletions src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,41 +75,27 @@ void HierarchicalLB::init(objgroup::proxy::Proxy<HierarchicalLB> in_proxy) {
HierarchicalLB::getInputKeysWithHelp() {
std::unordered_map<std::string, std::string> const keys_help = {
{
"min",
"I_tolerance",
R"(
Values: <double>
Default: 0.8
Default: 0.05
Description:
The load threshold for objects to consider on each node. The default value of
0.8 will consider 80% of the average load for re-balancing on each node. The
order these will be selected is based on the value passed to "strategy". If
the parameter "auto" is set to "true", this will be the minimum threshold;
otherwise, it sets the threshold directly.
If the imbalance metric, I, is greater than I_tolerance, the load balancer
will run.
)"
},
{
"max",
"threshold",
R"(
Values: <double>
Default: 1.004
Default: 0.5
Description:
The maximum load threshold for objects to consider on each node which is only
used if "auto" is "true".
The load threshold of objects to consider for potential migration on each
rank. All objects over threshold * average_load on each rank will considered.
)"
},
{
"auto",
R"(
Values: {true, false}
Default: true
Description:
Automatically determine the threshold between "min" and "max" using
calculated I (imbalance metric) with the formula
min(max(1-I, min), max).
)"
},
{
"strategy",
"object_selection",
R"(
Values: {LoadOverLessThan, LoadOverGreaterThan, LoadOverOneEach}
Default: LoadOverLessThan
Expand All @@ -134,12 +120,11 @@ void HierarchicalLB::inputParams(balance::SpecEntry* spec) {
allowed.push_back(elm.first);
}
spec->checkAllowedKeys(allowed);
min_threshold = spec->getOrDefault<double>("min", hierlb_threshold_p);
max_threshold = spec->getOrDefault<double>("max", hierlb_max_threshold_p);
auto_threshold = spec->getOrDefault<bool>("auto", hierlb_auto_threshold_p);
this_threshold = spec->getOrDefault<double>("threshold", this_threshold);
I_tolerance = spec->getOrDefault<double>("I_tolerance", I_tolerance);

std::string extract = spec->getOrDefault<std::string>(
"strategy", "LoadOverLessThan"
"object_selection", "LoadOverLessThan"
);
if (extract.compare("LoadOverLessThan") == 0) {
extract_strategy = HeapExtractEnum::LoadOverLessThan;
Expand All @@ -154,7 +139,7 @@ void HierarchicalLB::inputParams(balance::SpecEntry* spec) {
}
}

void HierarchicalLB::setupTree(double const threshold) {
void HierarchicalLB::setupTree() {
vtAssert(
tree_setup == false,
"Tree must not already be set up when is this called"
Expand All @@ -163,12 +148,10 @@ void HierarchicalLB::setupTree(double const threshold) {
auto const& this_node = theContext()->getNode();
auto const& num_nodes = theContext()->getNumNodes();

this_threshold = threshold;

vt_debug_print(
terse, hierlb,
"HierarchicalLB: setupTree: threshold={}\n",
threshold
this_threshold
);

for (NodeType node = 0; node < hierlb_nary; node++) {
Expand Down Expand Up @@ -256,19 +239,15 @@ void HierarchicalLB::loadStats() {
this_load_begin = this_load;

if (avg_load > 0.0000000001) {
should_lb = I > hierlb_tolerance;
}

if (auto_threshold) {
this_threshold = std::min(std::max(1.0f - I, min_threshold), max_threshold);
should_lb = I > I_tolerance;
}

if (this_node == 0) {
vt_print(
hierlb,
"loadStats: load={:.2f}, total={:.2f}, avg={:.2f}, I={:.2f},"
"should_lb={}, auto={}, threshold={}\n",
this_load, total_load, avg_load, I, should_lb, auto_threshold,
"should_lb={}, I_tolerance={}, threshold={}\n",
this_load, total_load, avg_load, I, should_lb, I_tolerance,
this_threshold
);
fflush(stdout);
Expand Down Expand Up @@ -758,7 +737,7 @@ void HierarchicalLB::clearObj(ObjSampleType& objs) {
}

void HierarchicalLB::runLB() {
setupTree(min_threshold);
setupTree();

auto cb = vt::theCB()->makeBcast<
HierarchicalLB, SetupDoneMsg, &HierarchicalLB::setupDone
Expand Down
8 changes: 3 additions & 5 deletions src/vt/vrt/collection/balance/hierarchicallb/hierlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ struct HierarchicalLB : BaseLB {

static std::unordered_map<std::string, std::string> getInputKeysWithHelp();

void setupTree(double const threshold);
void setupTree();
void calcLoadOver(HeapExtractEnum const extract);
void loadOverBin(ObjBinType bin, ObjBinListType& bin_list);
void procDataIn(ElementLoadType const& data_in);
Expand Down Expand Up @@ -121,7 +121,8 @@ struct HierarchicalLB : BaseLB {
void loadStats();

private:
double this_threshold = 0.0f;
double this_threshold = 0.5f;
double I_tolerance = 0.05f;
bool tree_setup = false;
NodeType parent = uninitialized_destination;
NodeType bottom_parent = uninitialized_destination;
Expand All @@ -132,9 +133,6 @@ struct HierarchicalLB : BaseLB {
int64_t migrates_expected = 0, transfer_count = 0;
TransferType transfers;
objgroup::proxy::Proxy<HierarchicalLB> proxy = {};
double max_threshold = 0.0f;
double min_threshold = 0.0f;
bool auto_threshold = true;
HeapExtractEnum extract_strategy = HeapExtractEnum::LoadOverLessThan;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,10 @@

namespace vt { namespace vrt { namespace collection { namespace lb {

static constexpr double const hierlb_threshold_p = 0.8f;
static constexpr double const hierlb_max_threshold_p = 1.004f;
static constexpr NodeType const hierlb_nary = 2;
static constexpr NodeType const hierlb_root = 0;
static constexpr int32_t const hierlb_bin_size = 10;
static constexpr double const hierlb_no_load_sentinel = -1.0f;
static constexpr double const hierlb_tolerance = 0.05f;
static constexpr bool const hierlb_auto_threshold_p = true;

}}}} /* end namespace vt::vrt::collection::lb */

Expand Down

0 comments on commit 44d1a8b

Please sign in to comment.