Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add View of Views debugging tool #267

Draft
wants to merge 9 commits into
base: develop
Choose a base branch
from
Draft
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ if(NOT WIN32)
add_subdirectory(common/kokkos-sampler)
endif()
add_subdirectory(debugging/kernel-logger)
add_subdirectory(debugging/vov-bug-finder)

# Profilers
if(NOT WIN32)
Expand Down
1 change: 1 addition & 0 deletions debugging/vov-bug-finder/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
kp_add_library(kp_view_of_views_bug_finder kp_view_of_views_bug_finder.cpp)
169 changes: 169 additions & 0 deletions debugging/vov-bug-finder/kp_view_of_views_bug_finder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include <kp_core.hpp>

#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <map>
#include <mutex>
#include <optional>
#include <string>

namespace {

bool verbose = false;
bool abort_on_error = true;

class {
uint64_t count_;
std::map<uint64_t, std::string> map_;

public:
std::mutex mutex;
uint64_t push(std::string s) {
auto it = map_.emplace_hint(map_.end(), count_, std::move(s));
assert(++it == map_.end());
return count_++;
}
void pop(uint64_t x) {
auto it = map_.find(x);
assert(it != map_.end());
map_.erase(it);
}
std::string const &top() {
assert(!map_.empty());
return map_.begin()->second;
}
bool is_empty() noexcept { return map_.empty(); }
} current;

bool ignore_fence(std::string_view s) {
return (s == "Kokkos::Impl::ViewValueFunctor: View init/destroy fence") ||
(s == "Kokkos::ThreadsInternal::fence: Unnamed Instance Fence");
}

std::optional<std::string> get_substr(std::string const &str,
std::string_view prefix,
std::string_view suffix) {
if (auto found = str.find(prefix); found != std::string::npos) {
found += prefix.length();
return str.substr(found, str.rfind(suffix) - found);
}
return std::nullopt;
}

} // namespace

extern "C" void kokkosp_request_tool_settings(
const uint32_t, Kokkos_Tools_ToolSettings *settings) {
settings->requires_global_fencing = false;
}

extern "C" void kokkosp_begin_parallel_for(char const *kernelName,
uint32_t deviceID,
uint64_t *kernelID) {
std::lock_guard lock(current.mutex);
if (!current.is_empty()) {
if (auto lbl =
get_substr(kernelName, "Kokkos::View::initialization [", "]")) {
std::cerr << "constructing view \"" << *lbl
<< "\" within a parallel region \"" << current.top() << "\"\n";
if (abort_on_error) {
std::abort();
}
}
}
*kernelID = current.push(kernelName);

if (verbose) {
std::cout << "begin kernel " << *kernelID << " " << kernelName
<< " on device " << deviceID << '\n';
}
}

extern "C" void kokkosp_end_parallel_for(uint64_t kernelID) {
std::lock_guard lock(current.mutex);
current.pop(kernelID);

if (verbose) {
std::cout << "end kernel " << kernelID << '\n';
}
}

extern "C" void kokkosp_begin_fence(char const *fenceName, uint32_t deviceID,
uint64_t *fenceID) {
std::lock_guard lock(current.mutex);
if (!current.is_empty() && !ignore_fence(fenceName)) {
if (auto lbl =
get_substr(current.top(), "Kokkos::View::destruction [", "]")) {
std::cerr << "view of views \"" << *lbl
<< "\" not properly cleared this fence labelled \"" << fenceName
<< "\" will hang\n";
if (abort_on_error) {
std::abort();
}
}
}
*fenceID = -1;

if (verbose) {
std::cout << "begin fence " << *fenceID << " " << fenceName << " on device "
<< deviceID << '\n';
}
}

extern "C" void kokkosp_end_fence(uint64_t fenceID) {
if (verbose) {
std::cout << "end fence " << fenceID << '\n';
}
}

extern "C" void kokkosp_allocate_data(SpaceHandle handle, const char *name,
void *ptr, uint64_t size) {
std::lock_guard lock(current.mutex);
if (!current.is_empty()) {
std::cerr << "allocating \"" << name << "\" within parallel region \""
<< current.top() << "\"\n";
if (abort_on_error) {
std::abort();
}
}

if (verbose) {
std::cout << "alloc (" << handle.name << ") " << name << " pointer " << ptr
<< "size " << size << '\n';
}
}

extern "C" void kokkosp_deallocate_data(SpaceHandle handle, const char *name,
void *ptr, uint64_t size) {
std::lock_guard lock(current.mutex);
if (!current.is_empty()) {
std::cerr << "deallocating \"" << name << "\" within parallel region \""
<< current.top() << "\"\n";
if (abort_on_error) {
std::abort();
}
}

if (verbose) {
std::cout << "dealloc (" << handle.name << ") " << name << " pointer "
<< ptr << "size " << size << '\n';
}
}
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,4 @@ target_link_libraries(test_common PUBLIC GTest::gtest GTest::gmock Kokkos::kokko

add_subdirectory(space-time-stack)
add_subdirectory(sampler)
add_subdirectory(vov-bug-finder)
5 changes: 5 additions & 0 deletions tests/vov-bug-finder/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
kp_add_executable_and_test(
TARGET_NAME test_vov_bug_finder
SOURCE_FILE test_view_of_views_bug_finder.cpp
KOKKOS_TOOLS_LIBS kp_view_of_views_bug_finder
)
101 changes: 101 additions & 0 deletions tests/vov-bug-finder/test_view_of_views_bug_finder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include "Kokkos_Core.hpp"
#include "gtest/gtest.h"

// TODO intialixe in main and split unit tests
dalg24 marked this conversation as resolved.
Show resolved Hide resolved
TEST(ViewOfViews, find_bugs) {
Kokkos::initialize();
{
ASSERT_NO_THROW(({
using V = Kokkos::View<int *>;
Kokkos::View<V **, Kokkos::HostSpace> vov("vov", 2, 3);
V a("a", 4);
V b("b", 5);
vov(0, 0) = a;
vov(0, 1) = a;
vov(1, 0) = b;

vov(0, 0) = V();
vov(0, 1) = V();
vov(1, 0) = V();
}));

ASSERT_NO_THROW(({
using V = Kokkos::View<int *>;
Kokkos::View<V **, Kokkos::HostSpace> vov(
Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3);
V a("a", 4);
V b("b", 5);
new (&vov(0, 0)) V(a);
new (&vov(0, 1)) V(a);
new (&vov(1, 0)) V(b);

vov(0, 0).~V();
vov(0, 1).~V();
// vov(1, 0).~V();
// ^ leaking "b" but not caught by the tool
}));

ASSERT_DEATH(({
using V = Kokkos::View<int *>;
Kokkos::View<V **, Kokkos::HostSpace> vov("vo]v", 2, 3);
// ^ included a closing square bracket in the label to try
// to trip the substring extraction
V a("a", 4);
V b("b", 5);
vov(0, 0) = a;
vov(0, 1) = a;
vov(1, 0) = b;
}),
"view of views \"vo]v\" not properly cleared");
Comment on lines +107 to +118
Copy link
Contributor

@maartenarnst maartenarnst Aug 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noting that in my OpenMP build, I see this death test hang when I run with more than 1 omp thread. (Christian had noted that team tests sometimes hang, but I'm not sure if it's related.)


ASSERT_NO_THROW(({
using V = Kokkos::View<int *>;
Kokkos::View<V **, Kokkos::HostSpace> vov(
Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3);
V a("a", 4);
V b("b", 5);
Kokkos::parallel_for(
"Fine", Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0, 1),
KOKKOS_LAMBDA(int) {
new (&vov(0, 0)) V(a);
new (&vov(0, 1)) V(a);
new (&vov(1, 0)) V(b);
});
}));

ASSERT_DEATH(
({
using V = Kokkos::View<int *>;
Kokkos::View<V **, Kokkos::HostSpace> vov(
Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3);
V a("a", 4);
new (&vov(0, 0)) V(a);
new (&vov(0, 1)) V(a);
Kokkos::parallel_for(
"AllocatesInParallel]For",
Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0, 1),
KOKKOS_LAMBDA(int) {
V b("b", 5);
new (&vov(1, 0)) V(b);
});
}),
"allocating \"b\" within parallel region \"AllocatesInParallel]For\"");
}
Kokkos::finalize();
}
Loading