Skip to content

Commit

Permalink
Create Utils for Adding Range and Marker (microsoft#4013)
Browse files Browse the repository at this point in the history
In this PR, we
  1. create some APIs for creating NVTX objects
  2. apply those APIs in pipeline-related operators and sequential executor.
As a result, we can explicitly see how a pipeline schedule is run by GPUs in 
Nvidia's visual profiler. Note that these APIs are Linux only due to Nvidia's
limited support.
  • Loading branch information
wschin authored May 25, 2020
1 parent aafe988 commit 24eda3d
Show file tree
Hide file tree
Showing 14 changed files with 461 additions and 9 deletions.
13 changes: 12 additions & 1 deletion cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,15 @@ option(onnxruntime_USE_TELEMETRY "Build with Telemetry" OFF)
option(onnxruntime_PREFER_SYSTEM_LIB "Experimental: Build with the preinstalled libraries in your system" OFF)

# training options
option(onnxruntime_ENABLE_NVTX_PROFILE "Enable NVTX profile." OFF)
option(onnxruntime_ENABLE_TRAINING "Enable training functionality." OFF)
option(onnxruntime_ENABLE_TRAINING_E2E_TESTS "Enable training end-to-end tests." OFF)
option(onnxruntime_USE_HOROVOD "Build with HOROVOD support" OFF)

if (onnxruntime_ENABLE_NVTX_PROFILE)
add_definitions(-DENABLE_NVTX_PROFILE=1)
endif()

set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests" FORCE)
#nsync tests failed on Mac Build
set(NSYNC_ENABLE_TESTS OFF CACHE BOOL "Build protobuf tests" FORCE)
Expand Down Expand Up @@ -804,7 +809,13 @@ if (onnxruntime_USE_CUDA)
set(CMAKE_CUDA_STANDARD 11)
file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
set(ONNXRUNTIME_CUDA_LIBRARIES ${CUDA_LIBRARIES})
list(APPEND ONNXRUNTIME_CUDA_LIBRARIES cublas cudnn curand cufft)

if (onnxruntime_ENABLE_NVTX_PROFILE)
list(APPEND ONNXRUNTIME_CUDA_LIBRARIES cublas cudnn curand cufft nvToolsExt)
else()
list(APPEND ONNXRUNTIME_CUDA_LIBRARIES cublas cudnn curand cufft)
endif()

if (WIN32)
link_directories(${onnxruntime_CUDNN_HOME}/lib/x64)

Expand Down
5 changes: 5 additions & 0 deletions cmake/onnxruntime_util.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
file(GLOB_RECURSE onnxruntime_util_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/util/*.h"
"${ONNXRUNTIME_ROOT}/core/util/*.cc"
"${ONNXRUNTIME_ROOT}/core/profile/*.h"
"${ONNXRUNTIME_ROOT}/core/profile/*.cc"
)

source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_util_srcs})
Expand All @@ -16,6 +18,9 @@ if (MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
target_compile_options(onnxruntime_util PRIVATE "/wd4244")
endif()
target_include_directories(onnxruntime_util PRIVATE ${ONNXRUNTIME_ROOT} ${MKLML_INCLUDE_DIR} ${gemmlowp_src} PUBLIC ${eigen_INCLUDE_DIRS})
if (onnxruntime_USE_CUDA)
target_include_directories(onnxruntime_util PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
endif()
onnxruntime_add_include_to_target(onnxruntime_util onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf)
if(UNIX)
target_compile_options(onnxruntime_util PUBLIC "-Wno-error=comment")
Expand Down
46 changes: 46 additions & 0 deletions onnxruntime/core/framework/sequential_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@
#include "core/framework/utils.h"
#endif

#ifdef ENABLE_NVTX_PROFILE
// This header is for profile using Nvidia's visual profilier.
#include "core/profile/profile.h"
#include "core/profile/context.h"
#endif

// #define TRACE_EXECUTION

// Define this symbol to create Concurrency Visualizer markers.
Expand Down Expand Up @@ -165,6 +171,17 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
diagnostic::marker_series series(series_name);
#endif

#ifdef ENABLE_NVTX_PROFILE
auto& profile_context = profile::Context::GetInstance();
const auto tag = profile_context.GetThreadTagOrDefault(std::this_thread::get_id());
profile::NvtxRangeCreator forward_range(
"forward-" + tag,
profile::Color::White);
profile::NvtxRangeCreator backward_range(
"backward-" + tag,
profile::Color::Black);
#endif

for (const auto& node_exec_plan : exec_plan_vec) {
if (terminate_flag_) {
LOGS(logger, WARNING) << "Exiting due to terminate flag being set to true.";
Expand All @@ -184,6 +201,18 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
series.write_flag(node.Name().c_str());
#endif

#ifdef ENABLE_NVTX_PROFILE
if (node.Description() != "Backward pass" && !forward_range.IsBeginCalled()) {
// Start timing forward pass when encountering the first forward node.
forward_range.Begin();
} else if (node.Description() == "Backward pass" && !backward_range.IsBeginCalled()) {
// Start timing backward pass when encountering the first backward node.
// In the meanwhile, forward range ends.
forward_range.End();
backward_range.Begin();
}
#endif

auto p_op_kernel = session_state.GetKernel(node_index);

// if a kernel has been added in the session state, it better be NON-null.
Expand Down Expand Up @@ -372,6 +401,23 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
ORT_RETURN_IF_ERROR(ReleaseNodeMLValues(frame, seq_exec_plan, node_exec_plan, logger));
}

#ifdef ENABLE_NVTX_PROFILE
// Make sure forward Range object call Begin and End.
if (!forward_range.IsBeginCalled()) {
forward_range.Begin();
}
if (!forward_range.IsEndCalled()) {
forward_range.End();
}
// Make sure backward Range object call Begin and End.
if (!backward_range.IsBeginCalled()) {
backward_range.Begin();
}
if (!backward_range.IsEndCalled()) {
backward_range.End();
}
#endif

VLOGS(logger, 1) << "Fetching output.";
// ExecutionFrame::Finalize will update 'fetches' with the final output
ORT_RETURN_IF_ERROR(frame.GetOutputs(fetches));
Expand Down
52 changes: 52 additions & 0 deletions onnxruntime/core/profile/context.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include <thread>
#include <mutex>
#include <string>
#include <unordered_map>

#ifdef ENABLE_NVTX_PROFILE

namespace onnxruntime {
namespace profile {

// Singleton class of managing global NVTX profiling information.
class Context {
public:
static Context& GetInstance() {
static Context instance_;
return instance_;
}

// Return tag for the specified thread.
// If the thread's tag doesn't exist, this function returns an empty string.
std::string GetThreadTagOrDefault(const std::thread::id& thread_id) {
const std::lock_guard<std::mutex> lock(mtx_);
return thread_tag_[thread_id];
}

// Set tag for the specified thread.
void SetThreadTag(
const std::thread::id& thread_id, const std::string& tag) {
const std::lock_guard<std::mutex> lock(mtx_);
thread_tag_[thread_id] = tag;
}

private:
Context() = default;
~Context() = default;
Context(const Context&) = delete;
Context& operator=(const Context&) = delete;

// map from thread's id to its human-readable tag.
std::unordered_map<std::thread::id, std::string> thread_tag_;
std::mutex mtx_;
};

} // namespace profile
} // namespace onnxruntime

#endif
65 changes: 65 additions & 0 deletions onnxruntime/core/profile/profile.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#ifdef ENABLE_NVTX_PROFILE
#include "core/profile/profile.h"
#include "core/common/common.h"
#include <nvToolsExt.h>
#include <nvToolsExtCuda.h>

namespace onnxruntime {
namespace profile {

void NvtxRangeCreator::BeginImpl() {
// enable only for debug builds because this function is for profiling only.
nvtxEventAttributes_t eventAttrib;
eventAttrib.version = NVTX_VERSION;
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
eventAttrib.colorType = NVTX_COLOR_ARGB;
eventAttrib.color = static_cast<uint32_t>(color_);
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
eventAttrib.message.ascii = message_.c_str();

range_id_ = nvtxRangeStartEx(&eventAttrib);
}

void NvtxRangeCreator::EndImpl() {
// enable only for debug builds because this function is for profiling only.
nvtxRangeEnd(range_id_);
}

void NvtxNestedRangeCreator::BeginImpl() {
// enable only for debug builds because this function is for profiling only.
nvtxEventAttributes_t eventAttrib;
eventAttrib.version = NVTX_VERSION;
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
eventAttrib.colorType = NVTX_COLOR_ARGB;
eventAttrib.color = static_cast<uint32_t>(color_);
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
eventAttrib.message.ascii = message_.c_str();

nvtxRangePushEx(&eventAttrib);
}

void NvtxNestedRangeCreator::EndImpl() {
// enable only for debug builds because this function is for profiling only.
nvtxRangePop();
}

void NvtxMarkerCreator::Mark() {
// enable only for debug builds because this function is for profiling only.
nvtxEventAttributes_t eventAttrib;
eventAttrib.version = NVTX_VERSION;
eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
eventAttrib.colorType = NVTX_COLOR_ARGB;
eventAttrib.color = static_cast<uint32_t>(color_);
eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
eventAttrib.message.ascii = message_.c_str();

nvtxMarkEx(&eventAttrib);
}

} // namespace contrib
} // namespace onnxruntime

#endif
140 changes: 140 additions & 0 deletions onnxruntime/core/profile/profile.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

// These enclosed classes are wrappers over
// generating Nvidia's visual profile APIs.
// They can be used to plot the time intervals of forward and backward passes.
// They can also be used to plot the time span of a specific operator.
// When writing this file, Nvidia only supports this tool on Linux.
#ifdef ENABLE_NVTX_PROFILE

#pragma once

#include <cinttypes>
#include <cstdlib>
#include <iostream>
#include <stdexcept>
#include <string>

#include "core/common/common.h"

namespace onnxruntime {
namespace profile {

// Color in ARGB space.
// A: first 8 bit.
// R: later 8 bit.
// G: later 8 bit.
// B: last 8 bits
// All colo channels has range [0, 255].
enum class Color : uint32_t {
Black = 0x00000000,
Red = 0x00ff0000,
DarkGreen = 0x00009900,
Green = 0x0000ff00,
LightGreen = 0x00ccffcc,
Blue = 0x000000ff,
Amber = 0x00ffbf00,
LightAmber = 0x00fff2cc,
White = 0x00ffffff,
Cyan = 0x0000ffff,
Magenta = 0x00ff00ff
};

class RangeCreatorBase {
public:
RangeCreatorBase(const std::string message, const Color color)
: message_(message), color_(color),
is_begin_called_(false), is_end_called_(false) {};

// Check if Begin and End are both called.
// It's pointless if not all of them are called.
~RangeCreatorBase() {
if (!is_begin_called_) {
std::cerr << "Begin must be called once." << std::endl;
}
if (!is_end_called_) {
std::cerr << "End must be called once." << std::endl;
}
}

// Mark the beginning of a range.
void Begin() {
ORT_ENFORCE(!is_begin_called_, "Begin cannot be called more than once.");
ORT_ENFORCE(!is_end_called_, "Begin cannot be called after calling End.");
BeginImpl();
is_begin_called_ = true;
}

// Mark the end of a range.
void End() {
ORT_ENFORCE(is_begin_called_, "End must be called after calling Begin.");
ORT_ENFORCE(!is_end_called_, "End cannot be called more than once.");
EndImpl();
is_end_called_ = true;
}

bool IsBeginCalled() const {
return is_begin_called_;
}

bool IsEndCalled() const {
return is_end_called_;
}

virtual void BeginImpl() = 0;

virtual void EndImpl() = 0;

protected:
// Text on this event.
const std::string message_;

// Color of event in ARGB space.
const Color color_;

bool is_begin_called_;
bool is_end_called_;
};

class NvtxRangeCreator final : public RangeCreatorBase {
public:
NvtxRangeCreator(const std::string message, const Color color)
: RangeCreatorBase(message, color) {};

void BeginImpl() override;
void EndImpl() override;

private:
// It records the event ID created by BeginImpl.
// EndImpl needs this value to end the right event.
uint64_t range_id_;
};

class NvtxNestedRangeCreator final : public RangeCreatorBase {
public:
NvtxNestedRangeCreator(const std::string message, const Color color)
: RangeCreatorBase(message, color) {};

void BeginImpl() override;
void EndImpl() override;
};

class NvtxMarkerCreator final {
public:
NvtxMarkerCreator(const std::string message, const Color color)
: message_(message), color_(color) {};
void Mark();

private:
// Text on this marker.
const std::string message_;

// See nvtxRangeCreator.color_.
const Color color_;
};

} // namespace profile
} // namespace onnxruntime

#endif
Loading

0 comments on commit 24eda3d

Please sign in to comment.