Skip to content

Commit

Permalink
async-profiler#1044: Fall back to ctimer for CPU profiling when perf_…
Browse files Browse the repository at this point in the history
…events are unavailable
  • Loading branch information
apangin committed Nov 4, 2024
1 parent adecac7 commit 6c32ce9
Show file tree
Hide file tree
Showing 16 changed files with 111 additions and 53 deletions.
4 changes: 4 additions & 0 deletions src/allocTracer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ class AllocTracer : public Engine {
uintptr_t total_size, uintptr_t instance_size);

public:
const char* type() {
return "alloc_tracer";
}

const char* title() {
return "Allocation profile";
}
Expand Down
6 changes: 1 addition & 5 deletions src/arguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ static const Multiplier UNIVERSAL[] = {{'n', 1}, {'u', 1000}, {'m', 1000000}, {'
// cstack=MODE - how to collect C stack frames in addition to Java stack
// MODE is 'fp', 'dwarf', 'lbr', 'vm' or 'no'
// clock=SOURCE - clock source for JFR timestamps: 'tsc' or 'monotonic'
// allkernel - include only kernel-mode events
// alluser - include only user-mode events
// fdtransfer - use fdtransfer to pass fds to the profiler
// simple - simple class names instead of FQN
Expand Down Expand Up @@ -339,11 +338,8 @@ Error Arguments::parse(const char* args) {
CASE("nobatch")
_nobatch = true;

CASE("allkernel")
_ring = RING_KERNEL;

CASE("alluser")
_ring = RING_USER;
_alluser = true;

CASE("cstack")
if (value != NULL) {
Expand Down
10 changes: 2 additions & 8 deletions src/arguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@ enum SHORT_ENUM Counter {
COUNTER_TOTAL
};

enum SHORT_ENUM Ring {
RING_ANY,
RING_KERNEL,
RING_USER
};

enum Style {
STYLE_SIMPLE = 0x1,
STYLE_DOTTED = 0x2,
Expand Down Expand Up @@ -159,7 +153,6 @@ class Arguments {
public:
Action _action;
Counter _counter;
Ring _ring;
const char* _event;
int _timeout;
long _interval;
Expand All @@ -183,6 +176,7 @@ class Arguments {
bool _sched;
bool _live;
bool _nobatch;
bool _alluser;
bool _fdtransfer;
const char* _fdtransfer_path;
int _style;
Expand All @@ -209,7 +203,6 @@ class Arguments {
_shared(false),
_action(ACTION_NONE),
_counter(COUNTER_SAMPLES),
_ring(RING_ANY),
_event(NULL),
_timeout(0),
_interval(0),
Expand All @@ -233,6 +226,7 @@ class Arguments {
_sched(false),
_live(false),
_nobatch(false),
_alluser(false),
_fdtransfer(false),
_fdtransfer_path(NULL),
_style(0),
Expand Down
4 changes: 4 additions & 0 deletions src/ctimer.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ class CTimer : public CpuEngine {
void destroyForThread(int tid);

public:
const char* type() {
return "ctimer";
}

Error check(Arguments& args);
Error start(Arguments& args);
void stop();
Expand Down
4 changes: 4 additions & 0 deletions src/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ class Engine {
}

public:
virtual const char* type() {
return "noop";
}

virtual const char* title() {
return "Flame Graph";
}
Expand Down
4 changes: 2 additions & 2 deletions src/flightRecorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ static jmethodID _start_method;
static jmethodID _stop_method;
static jmethodID _box_method;

static const char* const SETTING_RING[] = {NULL, "kernel", "user"};
static const char* const SETTING_CSTACK[] = {NULL, "no", "fp", "dwarf", "lbr", "vm"};
static const char* const SETTING_CLOCK[] = {NULL, "tsc", "monotonic"};

Expand Down Expand Up @@ -804,7 +803,7 @@ class Recording {

void writeSettings(Buffer* buf, Arguments& args) {
writeStringSetting(buf, T_ACTIVE_RECORDING, "version", PROFILER_VERSION);
writeStringSetting(buf, T_ACTIVE_RECORDING, "ring", SETTING_RING[args._ring]);
writeStringSetting(buf, T_ACTIVE_RECORDING, "engine", Profiler::instance()->_engine->type());
writeStringSetting(buf, T_ACTIVE_RECORDING, "cstack", SETTING_CSTACK[args._cstack]);
writeStringSetting(buf, T_ACTIVE_RECORDING, "clock", SETTING_CLOCK[args._clock]);
writeStringSetting(buf, T_ACTIVE_RECORDING, "event", args._event);
Expand All @@ -824,6 +823,7 @@ class Recording {
writeBoolSetting(buf, T_EXECUTION_SAMPLE, "enabled", args._event != NULL);
if (args._event != NULL) {
writeIntSetting(buf, T_EXECUTION_SAMPLE, "interval", args._interval);
writeBoolSetting(buf, T_EXECUTION_SAMPLE, "alluser", args._alluser);
}
if (args._wall >= 0) {
writeIntSetting(buf, T_EXECUTION_SAMPLE, "wall", args._wall);
Expand Down
4 changes: 4 additions & 0 deletions src/instrument.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ class Instrument : public Engine {
static volatile bool _running;

public:
const char* type() {
return "instrument";
}

const char* title() {
return "Java method profile";
}
Expand Down
4 changes: 4 additions & 0 deletions src/itimer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@

class ITimer : public CpuEngine {
public:
const char* type() {
return "itimer";
}

Error check(Arguments& args);
Error start(Arguments& args);
void stop();
Expand Down
4 changes: 4 additions & 0 deletions src/j9ObjectSampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@

class J9ObjectSampler : public ObjectSampler {
public:
const char* type() {
return "j9_object_sampler";
}

Error check(Arguments& args);
Error start(Arguments& args);
void stop();
Expand Down
4 changes: 4 additions & 0 deletions src/j9WallClock.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ class J9WallClock : public Engine {
void timerLoop();

public:
const char* type() {
return "j9_wall";
}

const char* title() {
return "Wall clock profile";
}
Expand Down
4 changes: 4 additions & 0 deletions src/lockTracer.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ class LockTracer : public Engine {
const char* lock_name, jobject lock, jlong timeout);

public:
const char* type() {
return "lock_tracer";
}

const char* title() {
return "Lock profile";
}
Expand Down
4 changes: 4 additions & 0 deletions src/objectSampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ class ObjectSampler : public Engine {
jobject object, jclass object_klass, jlong size);

public:
const char* type() {
return "object_sampler";
}

const char* title() {
return "Allocation profile";
}
Expand Down
8 changes: 6 additions & 2 deletions src/perfEvents.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class PerfEvents : public CpuEngine {
static int _max_events;
static PerfEvent* _events;
static PerfEventType* _event_type;
static Ring _ring;
static bool _use_mmap_page;
static bool _alluser;
static bool _kernel_stack;

static u64 readCounter(siginfo_t* siginfo, void* ucontext);
static void signalHandler(int signo, siginfo_t* siginfo, void* ucontext);
Expand All @@ -35,6 +35,10 @@ class PerfEvents : public CpuEngine {
Error start(Arguments& args);
void stop();

const char* type() {
return "perf";
}

const char* title();
const char* units();

Expand Down
77 changes: 46 additions & 31 deletions src/perfEvents_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ struct PerfEventType {
int counter_arg;

enum {
IDX_CPU = 0,
IDX_PREDEFINED = 12,
IDX_RAW,
IDX_PMU,
Expand Down Expand Up @@ -355,6 +356,11 @@ struct PerfEventType {
}

static PerfEventType* forName(const char* name) {
// "cpu" is an alias for "cpu-clock"
if (strcmp(name, EVENT_CPU) == 0) {
return &AVAILABLE_EVENTS[IDX_CPU];
}

// Look through the table of predefined perf events
for (int i = 0; i <= IDX_PREDEFINED; i++) {
if (strcmp(name, AVAILABLE_EVENTS[i].name) == 0) {
Expand Down Expand Up @@ -429,7 +435,7 @@ struct PerfEventType {
#endif

PerfEventType PerfEventType::AVAILABLE_EVENTS[] = {
{"cpu", DEFAULT_INTERVAL, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK},
{"cpu-clock", DEFAULT_INTERVAL, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK},
{"page-faults", 1, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS},
{"context-switches", 2, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES},

Expand All @@ -445,6 +451,8 @@ PerfEventType PerfEventType::AVAILABLE_EVENTS[] = {
{"LLC-load-misses", 1000, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_LL)},
{"dTLB-load-misses", 1000, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_DTLB)},

/* End of IDX_PREDEFINED events */

{"rNNN", 1000, PERF_TYPE_RAW, 0}, /* IDX_RAW */
{"pmu/event-descriptor/", 1000, PERF_TYPE_RAW, 0}, /* IDX_PMU */

Expand Down Expand Up @@ -510,8 +518,8 @@ class PerfEvent : public SpinLock {
int PerfEvents::_max_events = 0;
PerfEvent* PerfEvents::_events = NULL;
PerfEventType* PerfEvents::_event_type = NULL;
Ring PerfEvents::_ring;
bool PerfEvents::_use_mmap_page;
bool PerfEvents::_alluser;
bool PerfEvents::_kernel_stack;

int PerfEvents::createForThread(int tid) {
if (tid >= _max_events) {
Expand Down Expand Up @@ -548,10 +556,12 @@ int PerfEvents::createForThread(int tid) {
attr.disabled = 1;
attr.wakeup_events = 1;

if (_ring == RING_USER) {
if (_alluser) {
attr.exclude_kernel = 1;
} else if (_ring == RING_KERNEL) {
attr.exclude_user = 1;
}

if (!_kernel_stack) {
attr.exclude_callchain_kernel = 1;
}

if (_cstack >= CSTACK_FP) {
Expand Down Expand Up @@ -586,10 +596,13 @@ int PerfEvents::createForThread(int tid) {
return err;
}

void* page = _use_mmap_page ? mmap(NULL, 2 * OS::page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0) : NULL;
if (page == MAP_FAILED) {
Log::warn("perf_event mmap failed: %s", strerror(errno));
page = NULL;
void* page = NULL;
if (_kernel_stack || _cstack == CSTACK_DEFAULT || _cstack == CSTACK_LBR) {
page = mmap(NULL, 2 * OS::page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (page == MAP_FAILED) {
Log::warn("perf_event mmap failed: %s", strerror(errno));
page = NULL;
}
}

_events[tid].reset();
Expand Down Expand Up @@ -693,7 +706,7 @@ void PerfEvents::signalHandlerJ9(int signo, siginfo_t* siginfo, void* ucontext)
}

const char* PerfEvents::title() {
if (_event_type == NULL || _event_type->name == EVENT_CPU) {
if (_event_type == NULL || strcmp(_event_type->name, "cpu-clock") == 0) {
return "CPU profile";
} else if (_event_type->type == PERF_TYPE_SOFTWARE || _event_type->type == PERF_TYPE_HARDWARE || _event_type->type == PERF_TYPE_HW_CACHE) {
return _event_type->name;
Expand All @@ -703,7 +716,7 @@ const char* PerfEvents::title() {
}

const char* PerfEvents::units() {
return _event_type == NULL || _event_type->name == EVENT_CPU ? "ns" : "total";
return _event_type == NULL || strcmp(_event_type->name, "cpu-clock") == 0 ? "ns" : "total";
}

Error PerfEvents::check(Arguments& args) {
Expand Down Expand Up @@ -734,17 +747,8 @@ Error PerfEvents::check(Arguments& args) {
attr.sample_type = PERF_SAMPLE_CALLCHAIN;
attr.disabled = 1;

if (args._ring == RING_USER) {
if (args._alluser) {
attr.exclude_kernel = 1;
} else if (args._ring == RING_KERNEL) {
attr.exclude_user = 1;
} else if (!Symbols::haveKernelSymbols()) {
Profiler::instance()->updateSymbols(true);
attr.exclude_kernel = Symbols::haveKernelSymbols() ? 0 : 1;
}

if (args._cstack >= CSTACK_FP) {
attr.exclude_callchain_user = 1;
}

#ifdef PERF_ATTR_SIZE_VER5
Expand Down Expand Up @@ -783,14 +787,14 @@ Error PerfEvents::start(Arguments& args) {
_cstack = args._cstack;
_signal = args._signal == 0 ? OS::getProfilingSignal(0) : args._signal & 0xff;

_ring = args._ring;
if (_ring != RING_USER && !Symbols::haveKernelSymbols()) {
_alluser = args._alluser;
_kernel_stack = !_alluser && _cstack != CSTACK_NO;
if (_kernel_stack && !Symbols::haveKernelSymbols()) {
Log::warn("Kernel symbols are unavailable due to restrictions. Try\n"
" sysctl kernel.perf_event_paranoid=1\n"
" sysctl kernel.kptr_restrict=0");
_ring = RING_USER;
_kernel_stack = false;
}
_use_mmap_page = _cstack != CSTACK_NO && (_ring != RING_USER || _cstack == CSTACK_DEFAULT || _cstack == CSTACK_LBR);

adjustFDLimit();

Expand Down Expand Up @@ -820,7 +824,7 @@ Error PerfEvents::start(Arguments& args) {
if (err) {
stop();
if (err == EACCES || err == EPERM) {
return Error("No access to perf events. Try --fdtransfer or --all-user option or 'sysctl kernel.perf_event_paranoid=1'");
return Error("Perf events unavailable. Try --fdtransfer or --all-user option or 'sysctl kernel.perf_event_paranoid=1'");
} else if (isResourceLimit(err)) {
return Error("Perf events resource limit. Check 'ulimit -n'");
} else {
Expand Down Expand Up @@ -938,10 +942,21 @@ void PerfEvents::resetBuffer(int tid) {
}

bool PerfEvents::supported() {
// The official way of knowing if perf_event_open() support is enabled
// is checking for the existence of the file /proc/sys/kernel/perf_event_paranoid
struct stat statbuf;
return stat("/proc/sys/kernel/perf_event_paranoid", &statbuf) == 0;
struct perf_event_attr attr = {0};
attr.size = sizeof(attr);
attr.type = PERF_TYPE_SOFTWARE;
attr.config = PERF_COUNT_SW_CPU_CLOCK;
attr.sample_period = 1000000000;
attr.sample_type = PERF_SAMPLE_CALLCHAIN;
attr.disabled = 1;

int fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
if (fd == -1) {
return false;
}

close(fd);
return true;
}

const char* PerfEvents::getEventName(int event_id) {
Expand Down
Loading

0 comments on commit 6c32ce9

Please sign in to comment.