Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sanitizers Utility Library #63

Draft
wants to merge 4 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/tools/sanitizer-utility-library/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CXX=g++
CXXFLAGS=-O3 -std=c++11 -g
SHARED_CXXFLAGS=-shared -fPIC

all: kp_sanitizer_utility.so

kp_reader: kp_reader.cpp kp_sanitizer_utility.so
$(CXX) $(CXXFLAGS) -o kp_reader kp_reader.cpp

kp_sanitizer_utility.so: kp_kernel_timer.cpp kp_kernel_info.h
$(CXX) $(SHARED_CXXFLAGS) $(CXXFLAGS) -o $@ kp_kernel_timer.cpp

clean:
rm *.so kp_reader
196 changes: 196 additions & 0 deletions src/tools/sanitizer-utility-library/kp_kernel_info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@

#ifndef _H_KOKKOSP_KERNEL_INFO
#define _H_KOKKOSP_KERNEL_INFO

#include <stdio.h>
#include <sys/time.h>
#include <cstring>
#if defined(HAVE_GCC_ABI_DEMANGLE)
#include <cxxabi.h>
#endif
double seconds() {
struct timeval now;
gettimeofday(&now, NULL);

return (double) (now.tv_sec + (now.tv_usec * 1.0e-6));
}

enum KernelExecutionType {
PARALLEL_FOR = 0,
PARALLEL_REDUCE = 1,
PARALLEL_SCAN = 2,
REGION = 3
};

class KernelPerformanceInfo {
public:
KernelPerformanceInfo(std::string kName, KernelExecutionType kernelType) :
kType(kernelType) {

kernelName = (char*) malloc(sizeof(char) * (kName.size() + 1));
strcpy(kernelName, kName.c_str());

callCount = 0;
time = 0;
}

~KernelPerformanceInfo() {
free(kernelName);
}

KernelExecutionType getKernelType() {
return kType;
}

void incrementCount() {
callCount++;
}

void addTime(double t) {
time += t;
timeSq += (t*t);
}

void addFromTimer() {
addTime(seconds() - startTime);

incrementCount();
}

void startTimer() {
startTime = seconds();
}

uint64_t getCallCount() {
return callCount;
}

double getTime() {
return time;
}

double getTimeSq() {
return timeSq;
}

char* getName() {
return kernelName;
}

void addCallCount(const uint64_t newCalls) {
callCount += newCalls;
}

bool readFromFile(FILE* input) {
uint32_t recordLen = 0;
uint32_t actual_read = fread(&recordLen, sizeof(recordLen), 1, input);
if(actual_read != 1) return false;

char* entry = (char*) malloc(recordLen);
fread(entry, recordLen, 1, input);

uint32_t nextIndex = 0;
uint32_t kernelNameLength;
copy((char*) &kernelNameLength, &entry[nextIndex], sizeof(kernelNameLength));
nextIndex += sizeof(kernelNameLength);

if(strlen(kernelName) > 0) {
free(kernelName);
}

kernelName = (char*) malloc( sizeof(char) * (kernelNameLength + 1));
copy(kernelName, &entry[nextIndex], kernelNameLength);
kernelName[kernelNameLength] = '\0';
#if defined(HAVE_GCC_ABI_DEMANGLE)
{
int status = -1;
char* demangledKernelName = abi::__cxa_demangle(kernelName, NULL, NULL, &status);
if (status==0) {
free(kernelName);
kernelName = demangledKernelName;
}
}
#endif // HAVE_GCC_ABI_DEMANGLE
nextIndex += kernelNameLength;

copy((char*) &callCount, &entry[nextIndex], sizeof(callCount));
nextIndex += sizeof(callCount);

copy((char*) &time, &entry[nextIndex], sizeof(time));
nextIndex += sizeof(time);

copy((char*) &timeSq, &entry[nextIndex], sizeof(timeSq));
nextIndex += sizeof(timeSq);

uint32_t kernelT = 0;
copy((char*) &kernelT, &entry[nextIndex], sizeof(kernelT));
nextIndex += sizeof(kernelT);

if(kernelT == 0) {
kType = PARALLEL_FOR;
} else if(kernelT == 1) {
kType = PARALLEL_REDUCE;
} else if(kernelT == 2) {
kType = PARALLEL_SCAN;
} else if(kernelT == 3) {
kType = REGION;
}

free(entry);
return true;
}

void writeToFile(FILE* output) {
const uint32_t kernelNameLen = (uint32_t) strlen(kernelName);

const uint32_t recordLen =
sizeof(uint32_t) +
sizeof(char) * kernelNameLen +
sizeof(uint64_t) +
sizeof(double) +
sizeof(double) +
sizeof(uint32_t);

uint32_t nextIndex = 0;
char* entry = (char*) malloc(recordLen);

copy(&entry[nextIndex], (char*) &kernelNameLen, sizeof(kernelNameLen));
nextIndex += sizeof(kernelNameLen);

copy(&entry[nextIndex], kernelName, kernelNameLen);
nextIndex += kernelNameLen;

copy(&entry[nextIndex], (char*) &callCount, sizeof(callCount));
nextIndex += sizeof(callCount);

copy(&entry[nextIndex], (char*) &time, sizeof(time));
nextIndex += sizeof(time);

copy(&entry[nextIndex], (char*) &timeSq, sizeof(timeSq));
nextIndex += sizeof(timeSq);

uint32_t kernelTypeOutput = (uint32_t) kType;
copy(&entry[nextIndex], (char*) &kernelTypeOutput, sizeof(kernelTypeOutput));
nextIndex += sizeof(kernelTypeOutput);

fwrite(&recordLen, sizeof(uint32_t), 1, output);
fwrite(entry, recordLen, 1, output);
free(entry);
}

private:
void copy(char* dest, const char* src, uint32_t len) {
for(uint32_t i = 0; i < len; i++) {
dest[i] = src[i];
}
}

char* kernelName;
uint64_t callCount;
double time;
double timeSq;
double startTime;
KernelExecutionType kType;
};

#endif
175 changes: 175 additions & 0 deletions src/tools/sanitizer-utility-library/kp_kernel_timer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#include <iostream>
#include <stdio.h>
#include <inttypes.h>
#include <execinfo.h>
#include <cstdlib>
#include <cstring>
#include <map>
#include <stack>
#include <set>
#include <vector>
#include <algorithm>
#include <functional>
#include <string>
#include <sys/time.h>
#include <cxxabi.h>
#include <unistd.h>
#include "kp_kernel_info.h"
#include "kp_memory_events.hpp"
#include <cstdio>

#include <dlfcn.h>

struct NamedPointer {
std::uintptr_t ptr;
const char* name;
std::uint64_t size;
const SpaceHandle space;
};

namespace std {
template<>
struct less<NamedPointer> {
static std::less<std::uintptr_t> comp;
bool operator()(const NamedPointer& n1, const NamedPointer& n2) const{
return !comp(n1.ptr, n2.ptr);
}
};
};

std::vector<std::string> kokkos_stack;
std::set<NamedPointer> tracked_pointers;
std::map<std::string, std::set<NamedPointer>> space_map;

using poisonFunctionType = void(*)(void*, std::size_t);

poisonFunctionType poisonFunction;
poisonFunctionType unpoisonFunction;

void poisonSpace(std::string spaceName){
std::cout << "Poisoning "<<spaceName << std::endl;
for(auto named_pointer : space_map[spaceName]){
poisonFunction((void*)named_pointer.ptr, named_pointer.size);
}
}

void unpoisonSpace(std::string spaceName){
std::cout << "Unpoisoning "<<spaceName << std::endl;
for(auto named_pointer : space_map[spaceName]){
unpoisonFunction((void*)named_pointer.ptr, named_pointer.size);
}
}

extern "C" void kokkosp_init_library(const int loadSeq,
const uint64_t interfaceVer,
const uint32_t devInfoCount,
void* deviceInfo) {

void* liveProgram = dlopen(nullptr, RTLD_NOW | RTLD_GLOBAL);

auto poisonFunctionHandle = dlsym(liveProgram, "__asan_poison_memory_region");
auto unpoisonFunctionHandle = dlsym(liveProgram, "__asan_unpoison_memory_region");

poisonFunction = *((poisonFunctionType*)&poisonFunctionHandle);
unpoisonFunction = *((poisonFunctionType*)&unpoisonFunctionHandle);

space_map["Host"] = std::set<NamedPointer>();
space_map["HBW"] = std::set<NamedPointer>();
poisonSpace("Host");
poisonSpace("HBW");

}

extern "C" void kokkosp_finalize_library() {
}



extern "C" void kokkosp_begin_parallel_for(const char* name, const uint32_t devID, uint64_t* kID) {
std::cout << "Running kernel on device "<<devID<<std::endl;
kokkos_stack.push_back(name);
if(devID == 0) {
poisonSpace("HBW");
}
else if (devID==1){
poisonSpace("Host");
}
*kID = devID;
}

extern "C" void kokkosp_end_parallel_for(const uint64_t kID) {
uint64_t devID = kID;
std::string name = kokkos_stack.back();
if(devID == 0) {
unpoisonSpace("HBW");
}
else if (devID==1){
unpoisonSpace("Host");
}
kokkos_stack.pop_back();
}

extern "C" void kokkosp_begin_parallel_scan(const char* name, const uint32_t devID, uint64_t* kID) {
kokkos_stack.push_back(name);
}

extern "C" void kokkosp_end_parallel_scan(const uint64_t kID) {
kokkos_stack.pop_back();
}

extern "C" void kokkosp_begin_parallel_reduce(const char* name, const uint32_t devID, uint64_t* kID) {
kokkos_stack.push_back(name);
}

extern "C" void kokkosp_end_parallel_reduce(const uint64_t kID) {
kokkos_stack.pop_back();
}

extern "C" void kokkosp_push_profile_region(char* regionName) {
kokkos_stack.push_back(regionName);
}

extern "C" void kokkosp_pop_profile_region() {
kokkos_stack.pop_back();
}

extern "C" void kokkosp_deallocate_data(const SpaceHandle space, const char* label, const void* const ptr_raw, const uint64_t size) {
auto ptr = reinterpret_cast<std::uintptr_t>(ptr_raw);
auto key = NamedPointer{ptr};
tracked_pointers.erase(key);
std::string space_name_as_string = space.name;
auto iter = space_map.find(space_name_as_string);
if(iter != space_map.end()){
space_map[space_name_as_string] = std::set<NamedPointer>();
}
space_map[space_name_as_string].erase(key);
}

extern "C" void kokkosp_allocate_data(const SpaceHandle space, const char* label, const void* const ptr_raw, const uint64_t size) {
auto ptr = reinterpret_cast<std::uintptr_t>(ptr_raw);
std::size_t length = strlen(label);
auto key = NamedPointer { ptr, label, size, space };
tracked_pointers.insert(key);
std::string space_name_as_string = space.name;
auto iter = space_map.find(space_name_as_string);
if(iter != space_map.end()){
space_map[space_name_as_string] = std::set<NamedPointer>();
}
space_map[space_name_as_string].insert(key);
}

extern "C" const char* parallel_runtime_get_pointer_name(const void* ptr_raw){
auto ptr = reinterpret_cast<std::uintptr_t>(ptr_raw);
auto nearest = tracked_pointers.upper_bound(NamedPointer{ptr, ""});
return nearest->name;
}

extern "C" const char** parallel_runtime_get_callstack() {

const char** stack = (const char**)malloc(sizeof(const char*) * (kokkos_stack.size() + 1));
for(int i=0;i<kokkos_stack.size();++i){
stack[i] = kokkos_stack[i].c_str();
}
stack[kokkos_stack.size()] = nullptr;
return stack;
}
Loading