Skip to content

Commit

Permalink
Merge pull request #1720 from DARMA-tasking/1265-stats-replay-without…
Browse files Browse the repository at this point in the history
…-collection

1265 lb data replay without collection
  • Loading branch information
lifflander authored Nov 9, 2023
2 parents 09159cc + 794299d commit 65e7ebc
Show file tree
Hide file tree
Showing 14 changed files with 1,637 additions and 6 deletions.
20 changes: 20 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ set(PROJECT_BIN_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(PROJECT_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PROJECT_LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}/lib)
set(PROJECT_EXAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/examples)
set(PROJECT_TOOLS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/tools)

# Import the linking macros for VT-related targets
include(cmake/link_vt.cmake)
Expand Down Expand Up @@ -114,6 +115,25 @@ if (VT_BUILD_TESTS
include(CTest)
endif()

#
# Tools
#
option(vt_build_tools "Build VT tools" ON)

if (vt_build_tools)
message(
STATUS
"VT: building tools"
)

add_custom_target(tools)
add_subdirectory(tools)
else()
message(
STATUS "VT: NOT building tools because vt_build_tools is not set."
)
endif()

#
# Examples
#
Expand Down
2 changes: 1 addition & 1 deletion scripts/check_license.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
path_to_vt=${1}
cd "$path_to_vt" || exit 1

for sub_dir in "src" "tests/unit" "tests/perf" "tutorial" "examples"
for sub_dir in "src" "tests/unit" "tests/perf" "tutorial" "examples" "tools"
do
"$path_to_vt/scripts/add-license-perl.pl" "$path_to_vt/$sub_dir" "$path_to_vt/scripts/license-template"
done
Expand Down
2 changes: 2 additions & 0 deletions src/vt/configs/arguments/app_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ struct AppConfig {
bool vt_debug_phase = false;
bool vt_debug_context = false;
bool vt_debug_epoch = false;
bool vt_debug_replay = false;

bool vt_debug_print_flush = false;

Expand Down Expand Up @@ -386,6 +387,7 @@ struct AppConfig {
| vt_debug_phase
| vt_debug_context
| vt_debug_epoch
| vt_debug_replay

| vt_debug_print_flush

Expand Down
3 changes: 3 additions & 0 deletions src/vt/configs/arguments/args.cc
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ void addDebugPrintArgs(CLI::App& app, AppConfig& appConfig) {
auto dcp = "Enable debug_phase = \"" debug_pp(phase) "\"";
auto ddp = "Enable debug_context = \"" debug_pp(context) "\"";
auto dep = "Enable debug_epoch = \"" debug_pp(epoch) "\"";
auto dfp = "Enable debug_replay = \"" debug_pp(replay) "\"";

auto r1 = app.add_option("--vt_debug_level", appConfig.vt_debug_level, rq);

Expand Down Expand Up @@ -410,6 +411,7 @@ void addDebugPrintArgs(CLI::App& app, AppConfig& appConfig) {
auto dc = app.add_flag("--vt_debug_phase", appConfig.vt_debug_phase, dcp);
auto dd = app.add_flag("--vt_debug_context", appConfig.vt_debug_context, ddp);
auto de = app.add_flag("--vt_debug_epoch", appConfig.vt_debug_epoch, dep);
auto df = app.add_flag("--vt_debug_replay", appConfig.vt_debug_replay, dfp);

auto debugGroup = "Debug Print Configuration (must be compile-time enabled)";
r->group(debugGroup);
Expand Down Expand Up @@ -446,6 +448,7 @@ void addDebugPrintArgs(CLI::App& app, AppConfig& appConfig) {
dc->group(debugGroup);
dd->group(debugGroup);
de->group(debugGroup);
df->group(debugGroup);

auto dbq = "Always flush VT runtime prints";
auto eb = app.add_flag("--vt_debug_print_flush", appConfig.vt_debug_print_flush, dbq);
Expand Down
4 changes: 3 additions & 1 deletion src/vt/configs/debug/debug_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ enum CatEnum : uint64_t {
phase = 1ull<<28,
context = 1ull<<29,
epoch = 1ull<<30,
temperedwmin = 1ull<<31
temperedwmin = 1ull<<31,
replay = 1ull<<32
};

enum CtxEnum : uint64_t {
Expand Down Expand Up @@ -138,6 +139,7 @@ vt_option_category_pretty_print(reduce, "reduce")
vt_option_category_pretty_print(rdma, "RDMA")
vt_option_category_pretty_print(rdma_channel, "RDMA Channel")
vt_option_category_pretty_print(rdma_state, "RDMA State")
vt_option_category_pretty_print(replay, "replay")
vt_option_category_pretty_print(runtime, "runtime")
vt_option_category_pretty_print(scatter, "scatter")
vt_option_category_pretty_print(serial_msg, "serialized-msg")
Expand Down
8 changes: 4 additions & 4 deletions src/vt/phase/phase_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ void PhaseManager::printSummary(vrt::collection::lb::PhaseInfo* last_phase_info)
phase,
"phase={}, duration={}, rank_max_compute_time={}, rank_avg_compute_time={}, imbalance={:.3f}, "
"grain_max_time={}, migration count={}, lb_name={}\n",
cur_phase_,
last_phase_info->phase,
total_time,
TimeType(last_phase_info->max_load),
TimeType(last_phase_info->avg_load),
Expand All @@ -313,7 +313,7 @@ void PhaseManager::printSummary(vrt::collection::lb::PhaseInfo* last_phase_info)
// vt_print(
// phase,
// "POST phase={}, total time={}, max_load={}, avg_load={}, imbalance={:.3f}, migration count={}\n",
// cur_phase_,
// last_phase_info->phase,
// total_time,
// TimeType(last_phase_info->max_load_post_lb),
// TimeType(last_phase_info->avg_load_post_lb),
Expand All @@ -336,7 +336,7 @@ void PhaseManager::printSummary(vrt::collection::lb::PhaseInfo* last_phase_info)
auto percent_improvement = compute_percent_improvement(
last_phase_info->max_load, last_phase_info->avg_load
);
if (percent_improvement > 3.0 and cur_phase_ > 0) {
if (percent_improvement > 3.0 and last_phase_info->phase > 0) {
if (grain_percent_improvement < 0.5) {
// grain size is blocking improvement
vt_print(
Expand Down Expand Up @@ -395,7 +395,7 @@ void PhaseManager::printSummary(vrt::collection::lb::PhaseInfo* last_phase_info)
}
}
}
} else if (cur_phase_ == 0) {
} else if (last_phase_info->phase == 0) {
// ran the lb on a phase that may have included initialization costs
vt_print(
phase,
Expand Down
1 change: 1 addition & 0 deletions src/vt/runtime/runtime_banner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -904,6 +904,7 @@ void Runtime::printStartupBanner() {
vt_runtime_debug_warn_compile(phase)
vt_runtime_debug_warn_compile(context)
vt_runtime_debug_warn_compile(epoch)
vt_runtime_debug_warn_compile(replay)

auto arg_str = [](std::vector<char*> const& args) -> std::string {
std::stringstream ss;
Expand Down
4 changes: 4 additions & 0 deletions src/vt/vrt/collection/balance/lb_invoke/lb_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,10 @@ struct LBManager : runtime::component::Component<LBManager> {

void statsHandler(std::vector<balance::LoadData> const& in_stat_vec);

lb::PhaseInfo *getPhaseInfo() { return last_phase_info_.get(); }

void setComputingBeforeLBStats(bool before_lb) { before_lb_stats_ = before_lb; }

private:
bool isCollectiveComm(elm::CommCategory cat) const;

Expand Down
Loading

0 comments on commit 65e7ebc

Please sign in to comment.