forked from flexflow/flexflow-train
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add multi-objective global memory search algorithm (flexflow#493)
* Initial change of search procedure with memory consideration (flexflow#278) * [Memory] Add necessary types to support memory search. WIP. * [Memory] Implement modified DP search algorithm with memory cost. Missing base solution. WIP. * [Memory] Complete all changes to the search procedure to support multi-objective search with global memory. A search procedure refactor is in the future plan. * Add line to export clang compilation database, but not enable that. * [Memory] Save some work * [Memory] Allow different run time cost factor * Update format * Update format again * Resolve compile error due to merge conflict * Sync the changes again (flexflow#296) * Save some more expressive logging * Update format * [Memory] Correct memory cost calculation * Fix the build with CUDA_TOOLKIT_ROOT_DIR * [Memory] Update calculation of memory cost * Add logs folder to gitignore * Improve dot graph representation * [Dot] Update dot graph representation * Move changes * Quick fix to avoid bert segfault * Grid search of lambda * [WIP] Update * [Interface] Add --memory-search argument * [Memory] Update memory search * [Interface] Save -ll:fsize info * [WIP] Save per-device memory change * Finalize per-device max memory threshold * Update format * Update comments to prepare for merging * [WIP] Experiments to clear the caches * Fixed a memory calculation bug * Update minor issues * Update based on review comments * Remove unnecessary include * Update based on review * Update based on review * Factor out lambda helper functions * Fix a bug due to moving lambda function out * Fix memory leak of the cached_simulator --------- Co-authored-by: Gabriele Oliaro <[email protected]> Co-authored-by: Colin Unger <[email protected]> Co-authored-by: Zhihao Jia <[email protected]>
- Loading branch information
1 parent
c122eb2
commit 2c4d257
Showing
18 changed files
with
1,439 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef _FLEXFLOW_MEMORY_OPTIMIZATION_H_ | ||
#define _FLEXFLOW_MEMORY_OPTIMIZATION_H_ | ||
|
||
#include <cassert> | ||
#include <string> | ||
|
||
namespace FlexFlow { | ||
|
||
enum class MemoryUsageType { | ||
// Use global memory of a PCG as the measure of memory usage. No device | ||
// mapping consideration. | ||
GLOBAL, | ||
|
||
// Use the max of peak per-device memory usage among devices as the measure. | ||
// Need associated device mapping views. | ||
PER_DEVICE_MAX, | ||
}; | ||
|
||
enum class MemorySearchAlgo { | ||
// Multiple objective DP search. Combine memory cost and run time cost into | ||
// one single cost function and add a factor to balance them. | ||
MULTI_OBJECTIVE, | ||
}; | ||
|
||
/** | ||
* @brief Config class to control memory optimizations. This should be put into | ||
* config.h and be stored in FFConfig. But for easy turnaround, put this here | ||
* for now. | ||
*/ | ||
class MemoryOptimConfig { | ||
public: | ||
MemoryUsageType mem_usage_type; ///< How to represent memory cost | ||
MemorySearchAlgo mem_search_algo; ///< How to search for the optimal schedule | ||
float run_time_cost_factor; ///< The weight factor of run time cost in the | ||
///< overall cost function; used in | ||
///< MULTI_OBJECTIVE algorithm | ||
///< Valid between and including 0 and 1 | ||
|
||
MemoryOptimConfig() | ||
: mem_usage_type{MemoryUsageType::GLOBAL}, | ||
mem_search_algo{MemorySearchAlgo::MULTI_OBJECTIVE}, | ||
run_time_cost_factor{0.5} {} | ||
MemoryOptimConfig(float factor) | ||
: mem_usage_type{MemoryUsageType::GLOBAL}, | ||
mem_search_algo{MemorySearchAlgo::MULTI_OBJECTIVE}, | ||
run_time_cost_factor{factor} {} | ||
}; | ||
|
||
/** | ||
* @brief Hold the result (including memory information) of a graph_optimize on | ||
* a PCG. | ||
*/ | ||
class MemorySearchResult { | ||
public: | ||
float run_time_cost{}; | ||
float memory_cost{}; | ||
float search_time{}; | ||
///< The max of per-device memory usage among all devices | ||
float max_per_device_mem_all_deivces = 0.0; | ||
}; | ||
|
||
namespace PCG { | ||
|
||
/** | ||
* @brief Class to hold memory usage information of a (sub-)PCG. | ||
*/ | ||
class MemoryUsage { | ||
public: | ||
MemoryUsageType usage_type; ///< What "num" means | ||
float num; ///< The numerical number of memory usage | ||
|
||
MemoryUsage() : usage_type{MemoryUsageType::GLOBAL}, num{0.0} {} | ||
MemoryUsage(MemoryUsageType _usage_type, float _num) | ||
: usage_type{_usage_type}, num{_num} {} | ||
|
||
std::string to_string() const; | ||
|
||
MemoryUsage &operator+=(MemoryUsage const &rhs); | ||
|
||
/** | ||
* @brief Combine the memory usage of two PCGs flexibly based on | ||
* MemoryUsageType. | ||
*/ | ||
friend MemoryUsage operator+(MemoryUsage lhs, MemoryUsage const &rhs); | ||
|
||
friend std::ostream &operator<<(std::ostream &s, MemoryUsage const &usage); | ||
}; | ||
|
||
} // namespace PCG | ||
} // namespace FlexFlow | ||
|
||
#endif // _FLEXFLOW_MEMORY_OPTIMIZATION_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.