diff --git a/cmake/configure_options.cmake b/cmake/configure_options.cmake index 76d9caeb14..2d8315a1f4 100644 --- a/cmake/configure_options.cmake +++ b/cmake/configure_options.cmake @@ -99,7 +99,7 @@ define_option(vt_rdma_tests_enabled "RDMA tests" "Build VT with RDMA tests enabl ) define_option(vt_papi_enabled "PAPI task measurement" "Build VT with PAPI to measure low level metrics of interest" OFF vt_feature_cmake_papi) -define_option(vt_papi_enabled "perf task measurement" "Build VT with perf to measure low level metrics of interest" OFF vt_feature_cmake_perf) +define_option(vt_perf_enabled "perf task measurement" "Build VT with perf to measure low level metrics of interest" OFF vt_feature_cmake_perf) ##################################################### #################### DIAGNOSTICS #################### diff --git a/cmake/load_packages.cmake b/cmake/load_packages.cmake index ff8a2a8a16..a3744d784a 100644 --- a/cmake/load_packages.cmake +++ b/cmake/load_packages.cmake @@ -25,7 +25,7 @@ include(cmake/load_zoltan_package.cmake) include(cmake/load_papi.cmake) # If enabled, test if perf is available and works -include(cmake/load_papi.cmake) +include(cmake/load_perf.cmake) # Tests include(cmake/test_vt.cmake) diff --git a/cmake/load_perf.cmake b/cmake/load_perf.cmake index d23c396493..5dc194b69a 100644 --- a/cmake/load_perf.cmake +++ b/cmake/load_perf.cmake @@ -14,21 +14,22 @@ if (vt_perf_enabled) string(REPLACE "." ";" VERSION_LIST ${CMAKE_SYSTEM_VERSION}) list(GET VERSION_LIST 0 KERNEL_MAJOR_VERSION) if (KERNEL_MAJOR_VERSION GREATER_EQUAL 4) - # check if a simple perf stat runs without issues - execute_process( - COMMAND "perf stat which cmake" - RESULT_VARIABLE PERF_STAT_RESULT - OUTPUT_QUIET - ERROR_QUIET - ) - if (PERF_STAT_RESULT EQUAL 0) + # # check if a simple perf stat runs without issues + # execute_process( + # COMMAND "perf stat pwd" + # RESULT_VARIABLE PERF_STAT_RESULT + # OUTPUT_QUIET + # ERROR_QUIET + # ) + # if (PERF_STAT_RESULT EQUAL 0) + message(STATUS "Perf measurements enabled.") set(vt_perf_enabled "1") set(vt_perf_found "1") - else () - message(WARNING "Perf measurements enabled but couldn't run perf stat successfully. Disabling perf measurements.") - set(vt_perf_enabled "0") - set(vt_perf_found "0") - endif () + # else () + # message(WARNING "Perf measurements enabled but couldn't run perf stat successfully. Disabling perf measurements.") + # set(vt_perf_enabled "0") + # set(vt_perf_found "0") + # endif () else () message(WARNING "Perf measurements enabled but Kernel major version is less than 4. Disabling perf measurements.") set(vt_perf_enabled "0") diff --git a/examples/collection/CMakeLists.txt b/examples/collection/CMakeLists.txt index a8b54b84a1..e0d7122a47 100644 --- a/examples/collection/CMakeLists.txt +++ b/examples/collection/CMakeLists.txt @@ -11,8 +11,11 @@ set( reduce_integral transpose ) -if ((vt_papi_enabled AND vt_papi_found) OR (vt_perf_enabled AND vt_perf_found)) - list(APPEND COLLECTION_EXAMPLES do_flops) +if ((vt_papi_enabled AND vt_papi_found)) + list(APPEND COLLECTION_EXAMPLES do_flops_papi) +endif() +if ((vt_perf_enabled AND vt_perf_found)) + list(APPEND COLLECTION_EXAMPLES do_flops_perf) endif() foreach(EXAMPLE_NAME ${COLLECTION_EXAMPLES}) diff --git a/examples/collection/do_flops.cc b/examples/collection/do_flops_papi.cc similarity index 99% rename from examples/collection/do_flops.cc rename to examples/collection/do_flops_papi.cc index 4565aad21d..8748002386 100644 --- a/examples/collection/do_flops.cc +++ b/examples/collection/do_flops_papi.cc @@ -2,7 +2,7 @@ //@HEADER // ***************************************************************************** // -// jacobi2d_vt.cc +// do_flops_papi.cc // DARMA/vt => Virtual Transport // // Copyright 2019-2021 National Technology & Engineering Solutions of Sandia, LLC diff --git a/examples/collection/do_flops_perf.cc b/examples/collection/do_flops_perf.cc new file mode 100644 index 0000000000..12b9959a74 --- /dev/null +++ b/examples/collection/do_flops_perf.cc @@ -0,0 +1,315 @@ +/* +//@HEADER +// ***************************************************************************** +// +// do_flops_perf.cc +// DARMA/vt => Virtual Transport +// +// Copyright 2019-2021 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include +#include + +#include +#include +#include + +/// [Do Flops example] + +#include +#include + +#include +#include +#include + +static constexpr std::size_t const default_nrow_object = 8; +static constexpr std::size_t const default_num_objs = 1; +static constexpr double const default_tol = 1.0e-02; +static constexpr std::size_t const default_flops_per_iter = 100000; + +volatile double a = 0.5, b = 2.2; + +void +dummy( void *array ) +{ +/* Confuse the compiler so as not to optimize + away the flops in the calling routine */ +/* Cast the array as a void to eliminate unused argument warning */ + ( void ) array; +} + +void +do_flops( int n ) +{ + int i; + double c = 0.11; + + for ( i = 0; i < n; i++ ) { + c += a * b; + } + dummy( ( void * ) &c ); +} + +double pi(uint64_t n) { + double sum = 0.0; + int sign = 1; + for (uint64_t i = 0; i < n; ++i) { + sum += sign/(2.0*i+1.0); + sign *= -1; + } + return 4.0*sum; +} + +struct NodeObj { + bool is_finished_ = false; + void workFinishedHandler() { is_finished_ = true; } + bool isWorkFinished() { return is_finished_; } +}; +using NodeObjProxy = vt::objgroup::proxy::Proxy; + +struct GenericWork : vt::Collection { + +private: + size_t iter_ = 0; + size_t msgReceived_ = 0, totalReceive_ = 0; + size_t numObjs_ = 1; + size_t flopsPerIter_ = default_flops_per_iter; + size_t maxIter_ = 8; + NodeObjProxy objProxy_; + +public: + explicit GenericWork() : + iter_(0), msgReceived_(0), totalReceive_(0), + numObjs_(1), flopsPerIter_(default_flops_per_iter), maxIter_(8) + { } + + using BlankMsg = vt::CollectionMessage; + + struct WorkMsg : vt::CollectionMessage { + size_t numObjects = 0; + size_t flopsPerIter = 0; + size_t iterMax = 0; + NodeObjProxy objProxy; + + WorkMsg() = default; + + WorkMsg(const size_t nobjs, const size_t flops, const size_t itMax, NodeObjProxy proxy) : + numObjects(nobjs), flopsPerIter(flops), iterMax(itMax), objProxy(proxy) + { } + }; + + void checkCompleteCB() { + auto const iter_max_reached = iter_ > maxIter_; + + if (iter_max_reached) { + fmt::print("\n Maximum Number of Iterations Reached. \n\n"); + objProxy_.broadcast<&NodeObj::workFinishedHandler>(); + } else { + fmt::print(" ## ITER {} completed. \n", iter_); + } + } + + void doIteration() { + iter_ += 1; + fmt::print("-- Starting Iteration --\n"); + + vt::theContext()->getTask()->startPerfMeasurements(); + + // ---------------------------------------------------------- + // test non packed double precision floating point operations + // should result in ~4*n of these operations + + double p; + p = pi(10000000); + fmt::print("pi: {}\n", p); + // ---------------------------------------------------------- + + auto proxy = this->getCollectionProxy(); + proxy.reduce<&GenericWork::checkCompleteCB, vt::collective::MaxOp>( + proxy[0] + ); + + vt::theContext()->getTask()->stopPerfMeasurements(); + std::unordered_map res = vt::theContext()->getTask()->getPerfMeasurements(); + for (auto [name, value] : res) { + fmt::print(" {}: {}\n", name, value); + } + + fmt::print("-- Stopping Iteration --\n"); + } + + struct VecMsg : vt::CollectionMessage { + using MessageParentType = vt::CollectionMessage; + vt_msg_serialize_if_needed_by_parent_or_type1(vt::IdxBase); + + VecMsg() = default; + + VecMsg(vt::IdxBase const& in_index) : + vt::CollectionMessage(), + from_index(in_index) + { } + + template + void serialize(Serializer& s) { + MessageParentType::serialize(s); + s | from_index; + } + + vt::IdxBase from_index = 0; + }; + + void exchange(VecMsg *msg) { + msgReceived_ += 1; + + if (msgReceived_ == totalReceive_) { + msgReceived_ = 0; + doIteration(); + } + } + + void doIter([[maybe_unused]] BlankMsg *msg) { + if (numObjs_ == 1) { + doIteration(); + return; + } + + vt::IdxBase const myIdx = getIndex().x(); + auto proxy = this->getCollectionProxy(); + + + if (myIdx > 0) { + proxy[myIdx - 1].send( + myIdx + ); + } + + if (size_t(myIdx) < numObjs_ - 1) { + proxy[myIdx + 1].send( + myIdx + ); + } + } + + void init() { + totalReceive_ = 2; + + if (getIndex().x() == 0) { + totalReceive_ -= 1; + } + + if (getIndex().x() == numObjs_ - 1) { + totalReceive_ -= 1; + } + } + + void init(WorkMsg* msg) { + numObjs_ = msg->numObjects; + flopsPerIter_ = msg->flopsPerIter; + maxIter_ = msg->iterMax; + objProxy_ = msg->objProxy; + + init(); + } +}; + +bool isWorkDone(vt::objgroup::proxy::Proxy const& proxy) { + auto const this_node = vt::theContext()->getNode(); + return proxy[this_node].invoke<&NodeObj::isWorkFinished>(); +} + +int main(int argc, char** argv) { + size_t num_objs = default_num_objs; + size_t flopsPerIter = default_flops_per_iter; + size_t maxIter = 8; + + std::string name(argv[0]); + + vt::initialize(argc, argv); + + vt::NodeType this_node = vt::theContext()->getNode(); + vt::NodeType num_nodes = vt::theContext()->getNumNodes(); + + if (argc == 1) { + if (this_node == 0) { + fmt::print(stderr, "{}: using default arguments since none provided\n", name); + } + num_objs = default_num_objs * num_nodes; + } else if (argc == 2) { + num_objs = static_cast(strtol(argv[1], nullptr, 10)); + } else if (argc == 3) { + num_objs = static_cast(strtol(argv[1], nullptr, 10)); + flopsPerIter = static_cast(strtol(argv[2], nullptr, 10)); + } else if (argc == 4) { + num_objs = static_cast(strtol(argv[1], nullptr, 10)); + flopsPerIter = static_cast(strtol(argv[2], nullptr, 10)); + maxIter = static_cast(strtol(argv[3], nullptr, 10)); + } else { + fmt::print(stderr, "usage: {} \n", name); + return 1; + } + + auto grp_proxy = vt::theObjGroup()->makeCollective("examples_generic_work"); + using BaseIndexType = typename vt::Index1D::DenseIndexType; + auto range = vt::Index1D(static_cast(num_objs)); + + auto col_proxy = vt::makeCollection("examples_generic_work") + .bounds(range) + .bulkInsert() + .wait(); + + vt::runInEpochCollective([col_proxy, grp_proxy, num_objs, flopsPerIter, maxIter]{ + col_proxy.broadcastCollective( + num_objs, flopsPerIter, maxIter, grp_proxy + ); + }); + + while(!isWorkDone(grp_proxy)) { + vt::runInEpochCollective([col_proxy]{ + col_proxy.broadcastCollective< + GenericWork::BlankMsg, &GenericWork::doIter + >(); + }); + + vt::thePhase()->nextPhaseCollective(); + } + + vt::finalize(); + + return 0; +} +/// [Do Flops example] diff --git a/examples/collection/manzano.h b/examples/collection/manzano.h new file mode 100644 index 0000000000..2994a551c2 --- /dev/null +++ b/examples/collection/manzano.h @@ -0,0 +1,21 @@ +#include +#include +#include + +std::unordered_map> manzano_event_map = { + {"cycles", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES)}, + {"instructions", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS)}, + {"cache_references", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES)}, + {"cache_misses", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES)}, + {"branch_instructions", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)}, + {"branch_misses", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES)}, + {"fp_arith_inst_retired_scalar_double", std::make_pair(PERF_TYPE_RAW, 0x5301c7)}, + {"fp_arith_inst_retired_scalar_single", std::make_pair(PERF_TYPE_RAW, 0x5302c7)}, + {"fp_arith_inst_retired_128b_packed_double", std::make_pair(PERF_TYPE_RAW, 0x5304c7)}, + {"fp_arith_inst_retired_128b_packed_single", std::make_pair(PERF_TYPE_RAW, 0x5308c7)}, + {"fp_arith_inst_retired_256b_packed_double", std::make_pair(PERF_TYPE_RAW, 0x5310c7)}, + {"fp_arith_inst_retired_256b_packed_single", std::make_pair(PERF_TYPE_RAW, 0x5320c7)}, + {"fp_arith_inst_retired_512b_packed_double", std::make_pair(PERF_TYPE_RAW, 0x5340c7)}, + {"fp_arith_inst_retired_512b_packed_single", std::make_pair(PERF_TYPE_RAW, 0x5380c7)} +}; + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 380c8d4a4b..a9695c2d56 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -20,7 +20,7 @@ set(TOP_LEVEL_SUBDIRS activefn # Add single-directory components context event handler sequence termination - scheduler standalone runtime trace timing demangle rdmahandle + scheduler standalone runtime trace timing demangle rdmahandle metrics ) set( PROJECT_SUBDIRS_LIST diff --git a/src/vt/context/runnable_context/lb_data.h b/src/vt/context/runnable_context/lb_data.h index 8c26d6756b..0a5c860f91 100644 --- a/src/vt/context/runnable_context/lb_data.h +++ b/src/vt/context/runnable_context/lb_data.h @@ -51,6 +51,10 @@ #include "vt/context/runnable_context/papi_data.h" #endif +#if vt_check_enabled(perf) +#include "vt/context/runnable_context/perf_data.h" +#endif + namespace vt { namespace ctx { /** @@ -86,6 +90,9 @@ struct LBData { { #if vt_check_enabled(papi) papiData_ = std::make_unique(); +#endif +#if vt_check_enabled(perf) + perfData_ = std::make_unique(); #endif } @@ -128,9 +135,7 @@ struct LBData { * \brief Start PAPI metrics map for the running context */ void startPAPIMetrics() { papiData_->start(); } -#endif -#if vt_check_enabled(papi) /** * \brief Stop PAPI metrics map for the running context * @@ -138,9 +143,7 @@ struct LBData { * */ void stopPAPIMetrics() { papiData_->stop(); } -#endif -#if vt_check_enabled(papi) /** * \brief Get the current PAPI metrics map for the running context * @@ -149,6 +152,28 @@ struct LBData { std::unordered_map getPAPIMetrics(); #endif +#if vt_check_enabled(perf) + /** + * \brief Start perf metrics map for the running context + */ + void startPerfMeasurements() { perfData_->start(); } + + /** + * \brief Stop perf metrics map for the running context + * + * \note has to be called after startperfMetrics + * + */ + void stopPerfMeasurements() { perfData_->stop(); } + + /** + * \brief Get the current perf metrics map for the running context + * + * \return the perf metrics map + */ + std::unordered_map getPerfMeasurements() { return perfData_->getMeasurements(); } +#endif + private: ElementLBData* lb_data_ = nullptr; /**< Element LB data */ ElementIDStruct cur_elm_id_ = {}; /**< Current element ID */ @@ -156,6 +181,9 @@ struct LBData { #if vt_check_enabled(papi) std::unique_ptr papiData_; #endif +#if vt_check_enabled(perf) + std::unique_ptr perfData_; +#endif }; }} /* end namespace vt::ctx */ diff --git a/src/vt/context/runnable_context/lb_data.impl.h b/src/vt/context/runnable_context/lb_data.impl.h index c9f075a129..860d6ed199 100644 --- a/src/vt/context/runnable_context/lb_data.impl.h +++ b/src/vt/context/runnable_context/lb_data.impl.h @@ -64,6 +64,9 @@ LBData::LBData(ElmT* in_elm, MsgT* msg) { #if vt_check_enabled(papi) papiData_ = std::make_unique(); +#endif +#if vt_check_enabled(perf) + perfData_ = std::make_unique(); #endif // record the communication LB data right away! theCollection()->recordLBData(in_elm, msg); diff --git a/src/vt/context/runnable_context/perf_data.h b/src/vt/context/runnable_context/perf_data.h new file mode 100644 index 0000000000..4ca87c9346 --- /dev/null +++ b/src/vt/context/runnable_context/perf_data.h @@ -0,0 +1,176 @@ +/* +//@HEADER +// ***************************************************************************** +// +// perf_data.h +// DARMA/vt => Virtual Transport +// +// Copyright 2019-2021 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VT_CONTEXT_RUNNABLE_CONTEXT_LB_DATA_PERF_DATA_H +#define INCLUDED_VT_CONTEXT_RUNNABLE_CONTEXT_LB_DATA_PERF_DATA_H + +#include +#include +#include +#include +#include +#include "vt/metrics/perf_event_map.h" + +namespace vt { namespace ctx { + +/** + * \struct PerfData + * + * \brief Structure for storing Linux perf data structures to track low level metrics + */ +struct PerfData +{ +public: + PerfData() + { + std::unordered_map> global_map = vt::thePerfEventMap()->getEventMap(); + // std::unordered_map> global_map = { + // {"instructions", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS)} + // }; + if (global_map.empty()) + { + // vtAbort("the PerfEventMap event map is empty; initialize vt::thePerfEventMap->getEventMap(...)"); + fmt::print("the PerfEventMap event map is empty; initialize vt::thePerfEventMap->getEventMap(...)\n"); + } else { + const char* env_p = getenv("VT_EVENTS"); + + // check if the environment variable is set + if (env_p == nullptr) { + vtWarn("Warning: Environment variabale VT_EVENTS not set, defaulting to instructions for the PAPI event set.\n"); + event_names_.push_back("instructions"); + } + else { + std::string env_str(env_p); + + std::stringstream ss(env_str); + std::string item; + + while (std::getline(ss, item, ',')) + { + event_names_.push_back(item); + } + } + + for (const auto &event_name : event_names_) + { + if (global_map.find(event_name) == global_map.end()) + { + vtAbort("Event name isn't in known perf events map: " + event_name); + } + struct perf_event_attr pe; + memset(&pe, 0, sizeof(struct perf_event_attr)); + pe.type = global_map.at(event_name).first; + pe.size = sizeof(struct perf_event_attr); + pe.config = global_map.at(event_name).second; + + pe.disabled = 1; + pe.exclude_kernel = 1; + pe.exclude_hv = 1; + pe.inherit = 1; // Ensure event is inherited by threads + + if (event_name == "instructions") { // or event_name == "FP_ARITH_INST_RETIRED_SCALAR_DOUBLE") { + pe.pinned = 1; + } + // pe.sample_period = 1000000; + + int fd = perf_event_open(&pe, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); + if (fd == -1) + { + vtAbort("Error opening perf event: " + std::string(strerror(errno))); + } + fds.push_back(fd); + event_names_.push_back(event_name); + } + + } + } + + ~PerfData() + { + for (int fd : fds) + { + if (fd != -1) + { + close(fd); + } + } + } + + void start() + { + for (int fd : fds) + { + ioctl(fd, PERF_EVENT_IOC_RESET, 0); + ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); + } + } + + void stop() + { + for (int fd : fds) + { + ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); + } + } + + std::unordered_map getMeasurements() + { + std::unordered_map measurements; + for (size_t i = 0; i < fds.size(); ++i) + { + uint64_t count; + read(fds[i], &count, sizeof(uint64_t)); + measurements[event_names_[i]] = count; + } + return measurements; + } + +private: + std::vector fds = {}; + std::vector event_names_ = {}; + static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); } +}; + +}} /* end namespace vt::ctx */ + +#endif /*INCLUDED_VT_CONTEXT_RUNNABLE_CONTEXT_LB_DATA_PERF_DATA_H*/ diff --git a/src/vt/metrics/example_events.h b/src/vt/metrics/example_events.h new file mode 100644 index 0000000000..879932c5ec --- /dev/null +++ b/src/vt/metrics/example_events.h @@ -0,0 +1,12 @@ +#include +#include +#include + +const std::unordered_map> example_event_map = { + {"cycles", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES)}, + {"instructions", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS)}, + {"cache_references", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES)}, + {"cache_misses", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES)}, + {"branch_instructions", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS)}, + {"branch_misses", std::make_pair(PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES)} +}; diff --git a/src/vt/metrics/perf_event_map.h b/src/vt/metrics/perf_event_map.h new file mode 100644 index 0000000000..7e9dfa178d --- /dev/null +++ b/src/vt/metrics/perf_event_map.h @@ -0,0 +1,91 @@ +/* +//@HEADER +// ***************************************************************************** +// +// perf_event_map.h +// DARMA/vt => Virtual Transport +// +// Copyright 2019-2021 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#if !defined INCLUDED_VT_METRICS_PERF_MAP_H +#define INCLUDED_VT_METRICS_PERF_MAP_H + +#include "vt/config.h" +#include "vt/runtime/component/component_pack.h" +#include "example_events.h" + +namespace vt { namespace metrics { + +/** \file */ + +/** + * \struct PerfEventMap perf_event_map.h vt/metrics/perf_event_map.h + * + * \brief Used to obtain the association between string names of metrics and their corresponding perf event type and identifier + * + */ +struct PerfEventMap : runtime::component::Component { + /** + * \brief Gets the map of event names to their corresponding perf variables + * used. + * + * \return the node currently being run on + */ + std::unordered_map> getEventMap() const { return event_map_; } + + void startup() override { event_map_ = example_event_map; } + + std::string name() override { return "PerfEventMap"; } + + template + void serialize(SerializerT& s) { + s | event_map_; + } + +private: + std::unordered_map> event_map_ = {}; +}; + +}} // end namespace vt::metrics + +namespace vt { + +extern metrics::PerfEventMap* thePerfEventMap(); + +} // end namespace vt + +#endif /*INCLUDED_VT_METRICS_PERF_MAP_H*/ diff --git a/src/vt/runnable/runnable.h b/src/vt/runnable/runnable.h index 50b0e5ff9b..d7cf02254f 100644 --- a/src/vt/runnable/runnable.h +++ b/src/vt/runnable/runnable.h @@ -343,6 +343,28 @@ struct RunnableNew { std::unordered_map getPAPIMetrics(); #endif +#if vt_check_enabled(perf) + /** + * \brief Start perf metrics map for the running context + */ + void startPerfMeasurements() { contexts_.lb.startPerfMeasurements(); } + + /** + * \brief Stop perf metrics map for the running context + * + * \note has to be called after startperfMetrics + * + */ + void stopPerfMeasurements() { contexts_.lb.stopPerfMeasurements(); } + + /** + * \brief Get the current perf metrics map for the running context + * + * \return the perf metrics map + */ + std::unordered_map getPerfMeasurements() { return contexts_.lb.getPerfMeasurements(); } +#endif + #if vt_check_enabled(fcontext) /** * \brief Check if this runnable is complete or not diff --git a/src/vt/runtime/runtime.cc b/src/vt/runtime/runtime.cc index d87182cd37..629e0d127c 100644 --- a/src/vt/runtime/runtime.cc +++ b/src/vt/runtime/runtime.cc @@ -67,6 +67,7 @@ #include "vt/timetrigger/time_trigger_manager.h" #include "vt/phase/phase_manager.h" #include "vt/epoch/epoch_manip.h" +#include "vt/metrics/perf_event_map.h" #include "vt/configs/arguments/app_config.h" #include "vt/configs/arguments/args.h" @@ -912,6 +913,14 @@ void Runtime::initializeComponents() { >{} ); +# if vt_check_enabled(perf) + p_->registerComponent( + &thePerfEventMap, Deps< + ctx::Context // Everything depends on theContext + >{} + ); +# endif + p_->add(); p_->add(); p_->add(); @@ -941,6 +950,9 @@ void Runtime::initializeComponents() { p_->add(); p_->add(); p_->add(); +# if vt_check_enabled(perf) + p_->add(); +# endif if (addLBDataRestartReader) { p_->add(); diff --git a/src/vt/runtime/runtime.h b/src/vt/runtime/runtime.h index 00c6fed4e8..719550ce3d 100644 --- a/src/vt/runtime/runtime.h +++ b/src/vt/runtime/runtime.h @@ -55,6 +55,9 @@ #include "vt/trace/trace.h" #endif #include "vt/pmpi/pmpi_component.h" +#if vt_check_enabled(perf) +#include "vt/metrics/perf_event_map.h" +#endif #include #include @@ -432,6 +435,9 @@ struct Runtime { #if vt_check_enabled(mpi_access_guards) ComponentPtrType thePMPI = nullptr; #endif + #if vt_check_enabled(perf) + ComponentPtrType thePerfEventMap = nullptr; + #endif static bool volatile sig_user_1_; diff --git a/src/vt/runtime/runtime_get.cc b/src/vt/runtime/runtime_get.cc index 40610b387f..321379ea04 100644 --- a/src/vt/runtime/runtime_get.cc +++ b/src/vt/runtime/runtime_get.cc @@ -64,6 +64,7 @@ #include "vt/timetrigger/time_trigger_manager.h" #include "vt/phase/phase_manager.h" #include "vt/epoch/epoch_manip.h" +#include "vt/metrics/perf_event_map.h" #include @@ -123,6 +124,9 @@ trace::Trace* theTrace() { return curRT->theTrace; #if vt_check_enabled(mpi_access_guards) pmpi::PMPIComponent* thePMPI() { return curRT->thePMPI; } #endif +#if vt_check_enabled(perf) +metrics::PerfEventMap* thePerfEventMap() { return curRT->thePerfEventMap; } +#endif } /* end namespace vt */