From 68c2584330aa40ae94305b878b9f7c4cab1f4d12 Mon Sep 17 00:00:00 2001 From: Gal Horowitz Date: Wed, 1 Jun 2022 00:09:45 +0300 Subject: [PATCH] Prime+Scope implementation, including a "PrimeTime" implementation for finding prime patterns --- demo/Makefile.in | 2 + demo/PS-prime-time.c | 20 +++ demo/PS.c | 111 +++++++++++++ mastik/ps.h | 39 +++++ mastik/pt.h | 22 +++ src/Makefile.in | 2 + src/ps.c | 212 +++++++++++++++++++++++++ src/pt.c | 363 +++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 771 insertions(+) create mode 100644 demo/PS-prime-time.c create mode 100644 demo/PS.c create mode 100644 mastik/ps.h create mode 100644 mastik/pt.h create mode 100644 src/ps.c create mode 100644 src/pt.c diff --git a/demo/Makefile.in b/demo/Makefile.in index 7f53665..5b6fcb4 100644 --- a/demo/Makefile.in +++ b/demo/Makefile.in @@ -14,6 +14,8 @@ FILES= \ L3-capture.c \ L3-capturecount.c \ L3-scan.c \ + PS.c \ + PS-prime-time.c \ L2-capture.c \ L2-rattle.c \ L2-sequence.c \ diff --git a/demo/PS-prime-time.c b/demo/PS-prime-time.c new file mode 100644 index 0000000..b693be0 --- /dev/null +++ b/demo/PS-prime-time.c @@ -0,0 +1,20 @@ +#include +#include + +#include +#include +#include + +int main() { + pt_t patterns = pt_prepare(NULL, NULL); + pt_results_t results = generate_prime_patterns(patterns); + + printf("EVCr | Cycles | Pattern\n"); + for(int i = 0; i < PT_RESULTS_COUNT; i++){ + printf("%7.3f%% | %6d | ", results->evcrs[i]*100, results->cycles[i]); + dump_ppattern(results->patterns[i]); + printf("\n"); + } + + free(results); +} \ No newline at end of file diff --git a/demo/PS.c b/demo/PS.c new file mode 100644 index 0000000..2d16d4e --- /dev/null +++ b/demo/PS.c @@ -0,0 +1,111 @@ +#define _GNU_SOURCE +#include +#undef _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void pin_to_core(int core) { + cpu_set_t cpu_set; + CPU_ZERO(&cpu_set); + CPU_SET(core, &cpu_set); + sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set); +} + +enum SHARED_STATE { + STATE_INIT, + STATE_SHOULD_ACCESS, +}; +volatile enum SHARED_STATE* shared_state; + +pid_t setup_child(void* target) { + shared_state = mmap(NULL, sizeof(enum SHARED_STATE), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *shared_state = STATE_INIT; + + pid_t child_pid = fork(); + if(child_pid == 0) { + prctl(PR_SET_PDEATHSIG, SIGKILL); + pin_to_core(3); + while(1) { + while(*shared_state == STATE_INIT) {} + mfence(); + memaccesstime(target); + mfence(); + *shared_state = STATE_INIT; + } + } + + return child_pid; +} + +void cleanup_child(pid_t pid) { + kill(pid, SIGKILL); + waitpid(pid, 0, 0); + munmap((void*)shared_state, sizeof(enum SHARED_STATE)); +} + +#define TARGET_LINE 31 +#define TRIALS 100 +int main() { + // Intel Core i5-8250U: R4_S4_P0S123S1231S23 [99.98% Success rate, 1625 cycles] + uint8_t _pat[13] = {0, PATTERN_TARGET, 1, 2, 3, PATTERN_TARGET, 1, 2, 3, 1, PATTERN_TARGET, 2, 3}; + ppattern_t best_pat = construct_pattern(4, 4, _pat, sizeof(_pat)); + + mm_t mm = mm_prepare(NULL, NULL, NULL); + assert(mm); + ps_t ps = ps_prepare(best_pat, NULL, mm); + assert(ps); + + void* target = mm_requestline(mm, L3, TARGET_LINE); + // NOTE: We are somwhat abusing this for demonstration purposes - + // we are relying on the fact that Linux uses copy-on-write for + // for MAP_PRIVATE mmap-allocations: This line comes from the + // mm->memory buffer, which is MAP_PRIVATE and so in theory it + // is not shared with the forked process, but practically it is. + + ps_monitor(ps, TARGET_LINE); + + pid_t child_pid = setup_child(target); + pin_to_core(2); + + int success = 0; + for(int i = 0; i < TRIALS; i++) { + ps_prime(ps); + + uint16_t t_res; + + ps_scope(ps, &t_res); + if(t_res > L3_THRESHOLD) + continue; + + ps_scope(ps, &t_res); + if(t_res > L3_THRESHOLD) + continue; + + *shared_state = STATE_SHOULD_ACCESS; + mfence(); + while(*shared_state == STATE_SHOULD_ACCESS) {} + mfence(); + + ps_scope(ps, &t_res); + if(t_res < L3_THRESHOLD) + continue; + + success += 1; + } + + printf("%d/%d Successful\n", success, TRIALS); + + ps_release(ps); + cleanup_child(child_pid); + + return 0; +} \ No newline at end of file diff --git a/mastik/ps.h b/mastik/ps.h new file mode 100644 index 0000000..eb7bb26 --- /dev/null +++ b/mastik/ps.h @@ -0,0 +1,39 @@ +#ifndef __PS_H__ +#define __PS_H__ 1 + +#include +#include + +#define PATTERN_CAPACITY 20 +#define PATTERN_TARGET 0xFF +struct prime_pattern { + uint8_t repeat; + uint8_t stride; + // The largest offset from i + uint8_t width; + uint8_t length; + // 0xFF denotes the target, other values denote offset from i + uint8_t pattern[PATTERN_CAPACITY]; +}; +typedef struct prime_pattern ppattern_t; +ppattern_t construct_pattern(uint8_t repeat, uint8_t stride, uint8_t* pattern, uint8_t length); +void dump_ppattern(ppattern_t pattern); +void access_pattern(vlist_t evset, int associativity, ppattern_t pattern); + +typedef struct ps *ps_t; +ps_t ps_prepare(ppattern_t prime_pattern, l3info_t l3info, mm_t mm); + +int ps_monitor(ps_t ps, int line); +int ps_unmonitor(ps_t ps, int line); +void ps_unmonitorall(ps_t ps); +int ps_getmonitoredset(ps_t ps, int *lines, int nlines); + +void ps_prime(ps_t ps); +void ps_scope(ps_t ps, uint16_t* results); +// Returns the first line that is accessed, or (-1) if no monitored line +// was accessed before running through all iterations +int ps_prime_and_scope(ps_t ps, int iterations); + +void ps_release(ps_t ps); + +#endif // __PS_H__ diff --git a/mastik/pt.h b/mastik/pt.h new file mode 100644 index 0000000..a566d4e --- /dev/null +++ b/mastik/pt.h @@ -0,0 +1,22 @@ +#ifndef __PRIME_TIME_H__ +#define __PRIME_TIME_H__ 1 + +#include + +typedef struct prime_time* pt_t; + +#define PT_RESULTS_COUNT 100 +struct pt_results { + ppattern_t patterns[PT_RESULTS_COUNT]; + float evcrs[PT_RESULTS_COUNT]; + int cycles[PT_RESULTS_COUNT]; +}; +typedef struct pt_results* pt_results_t; + +pt_t pt_prepare(l3info_t l3info, mm_t mm); +void pt_release(pt_t pt); + +// Results have to be free()-ed +pt_results_t generate_prime_patterns(pt_t patterns); + +#endif // __PRIME_TIME_H__ diff --git a/src/Makefile.in b/src/Makefile.in index 578509c..01fab01 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -10,6 +10,8 @@ LIBSRCS= \ lx.c \ mm.c \ pda.c \ + ps.c \ + pt.c \ util.c \ symbol.c \ synctrace.c \ diff --git a/src/ps.c b/src/ps.c new file mode 100644 index 0000000..d0480ed --- /dev/null +++ b/src/ps.c @@ -0,0 +1,212 @@ +#include +#include +#include +#include +#include + +#include + +#include "vlist.h" +#include "mm-impl.h" +#include "mastik/impl.h" + +ppattern_t construct_pattern(uint8_t repeat, uint8_t stride, uint8_t* pattern, uint8_t length) { + assert(length <= PATTERN_CAPACITY); + + ppattern_t pat; + pat.length = length; + pat.repeat = repeat; + pat.stride = stride; + + pat.width = 0; + for(int i = 0; i < length; i++) { + pat.pattern[i] = pattern[i]; + + if(pattern[i] != PATTERN_TARGET && pattern[i] > pat.width) { + pat.width = pattern[i]; + } + } + + return pat; +} + +void dump_ppattern(ppattern_t pat) { + printf("R%d_S%d_P", pat.repeat, pat.stride); + for(int i = 0; i < pat.length; i++) { + if(pat.pattern[i] == PATTERN_TARGET) { + printf("S"); + } else { + printf("%d", pat.pattern[i]); + } + } +} + +void access_pattern(vlist_t evset, int assoc, ppattern_t pattern) { + for(int r = 0; r < pattern.repeat; r++) { + for(int i = 0; i < assoc - pattern.width; i++) { + for(int j = 0; j < pattern.length; j++) { + int a = pattern.pattern[j]; + if(a == PATTERN_TARGET) { + memaccess(vl_get(evset, 0)); + } else { + assert(a <= pattern.width); + memaccess(vl_get(evset, i + a)); + } + } + } + } +} + + +struct ps { + ppattern_t prime_pattern; + + vlist_t *monitored_evsets; + int nmonitored; + int *monitoredset; + uint32_t *monitoredbitmap; + + size_t totalsets; + int ngroups; + int groupsize; + vlist_t *groups; + + struct l3info l3info; + + mm_t mm; + uint8_t internalmm; +}; + +ps_t ps_prepare(ppattern_t prime_pattern, l3info_t l3info, mm_t mm) { + // Setup + ps_t ps = (ps_t)malloc(sizeof(struct ps)); + bzero(ps, sizeof(struct ps)); + + ps->prime_pattern = prime_pattern; + + if (l3info != NULL) + bcopy(l3info, &ps->l3info, sizeof(struct l3info)); + fillL3Info(&ps->l3info); + + ps->mm = mm; + if (ps->mm == NULL) { + ps->mm = mm_prepare(NULL, NULL, (lxinfo_t)l3info); + ps->internalmm = 1; + } + + if (!mm_initialisel3(ps->mm)) + return NULL; + + ps->ngroups = ps->mm->l3ngroups; + ps->groupsize = ps->mm->l3groupsize; + + // Allocate monitored set info + ps->totalsets = ps->ngroups * ps->groupsize; + ps->monitoredbitmap = (uint32_t *)calloc((ps->totalsets/32) + 1, sizeof(uint32_t)); + ps->monitoredset = (int *)malloc(ps->totalsets * sizeof(int)); + ps->monitored_evsets = (vlist_t *)malloc(ps->totalsets * sizeof(vlist_t)); + ps->nmonitored = 0; + + return ps; +} + + +int ps_monitor(ps_t ps, int line) { + if(line < 0 || line >= ps->totalsets) + return 0; + if (IS_MONITORED(ps->monitoredbitmap, line)) + return 0; + + int associativity = ps->l3info.associativity; + + vlist_t vl = vl_new(); + _mm_requestlines(ps->mm, L3, line, associativity, vl); + + ps->monitoredset[ps->nmonitored] = line; + ps->monitored_evsets[ps->nmonitored] = vl; + SET_MONITORED(ps->monitoredbitmap, line); + ps->nmonitored++; + return 1; +} + +int ps_unmonitor(ps_t ps, int line) { + if (line < 0 || line >= ps->totalsets) + return 0; + if (!IS_MONITORED(ps->monitoredbitmap, line)) + return 0; + UNSET_MONITORED(ps->monitoredbitmap, line); + for (int i = 0; i < ps->nmonitored; i++) + if (ps->monitoredset[i] == line) { + --ps->nmonitored; + ps->monitoredset[i] = ps->monitoredset[ps->nmonitored]; + + _mm_returnlines(ps->mm, ps->monitored_evsets[i]); + vl_free(ps->monitored_evsets[i]); + + ps->monitored_evsets[i] = ps->monitored_evsets[ps->nmonitored]; + + break; + } + return 1; +} + +void ps_unmonitorall(ps_t ps) { + for (int i = 0; i < ps->totalsets / 32; i++) + ps->monitoredbitmap[i] = 0; + for (int i = 0; i < ps->nmonitored; i++) { + _mm_returnlines(ps->mm, ps->monitored_evsets[i]); + vl_free(ps->monitored_evsets[i]); + } + ps->nmonitored = 0; +} + +int ps_getmonitoredset(ps_t ps, int *lines, int nlines) { + if (lines == NULL || nlines == 0) + return ps->nmonitored; + if (nlines > ps->nmonitored) + nlines = ps->nmonitored; + bcopy(ps->monitoredset, lines, nlines * sizeof(int)); + return ps->nmonitored; +} + +void ps_prime(ps_t ps) { + for (int i = 0; i < ps->nmonitored; i++) { + access_pattern(ps->monitored_evsets[i], ps->l3info.associativity, ps->prime_pattern); + } +} + +void ps_scope(ps_t ps, uint16_t* results) { + for (int i = 0; i < ps->nmonitored; i++) { + int t = memaccesstime(vl_get(ps->monitored_evsets[i], 0)); + results[i] = t > UINT16_MAX ? UINT16_MAX : t; + } +} + +int ps_prime_and_scope(ps_t ps, int iterations) { + ps_prime(ps); + + for(int i = 0; i < iterations; i++) { + for (int i = 0; i < ps->nmonitored; i++) { + int t = memaccesstime(vl_get(ps->monitored_evsets[i], 0)); + if(t > L3_THRESHOLD) { + return ps->monitoredset[i]; + } + } + } + + return -1; +} + +void ps_release(ps_t ps) { + free(ps->monitoredbitmap); + free(ps->monitoredset); + for(int i = 0; i < ps->nmonitored; i++) { + _mm_returnlines(ps->mm, ps->monitored_evsets[i]); + vl_free(ps->monitored_evsets[i]); + } + free(ps->monitored_evsets); + if (ps->internalmm) + mm_release(ps->mm); + bzero(ps, sizeof(struct ps)); + free(ps); +} diff --git a/src/pt.c b/src/pt.c new file mode 100644 index 0000000..5d329a7 --- /dev/null +++ b/src/pt.c @@ -0,0 +1,363 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "vlist.h" +#include "mm-impl.h" +#include "mastik/impl.h" + +#define DEBUG 1 + +#define PATTERN_LIST_CAPACITY 50000 +struct prime_time { + ppattern_t* patterns; + int pattern_count; + + vlist_t evset; + + struct l3info l3info; + mm_t mm; + uint8_t internalmm; +}; + +pt_t pt_prepare(l3info_t l3info, mm_t mm) { + // Setup + pt_t pt = (pt_t) malloc(sizeof(struct prime_time)); + bzero(pt, sizeof(struct prime_time)); + + pt->patterns = (ppattern_t *)malloc(PATTERN_LIST_CAPACITY * sizeof(ppattern_t)); + pt->pattern_count = 0; + + if (l3info != NULL) + bcopy(l3info, &pt->l3info, sizeof(struct l3info)); + fillL3Info(&pt->l3info); + + pt->mm = mm; + if (pt->mm == NULL) { + pt->mm = mm_prepare(NULL, NULL, (lxinfo_t)l3info); + pt->internalmm = 1; + } + + if (!mm_initialisel3(pt->mm)) + return NULL; + + pt->evset = vl_new(); + _mm_requestlines(pt->mm, L3, 31, pt->l3info.associativity, pt->evset); + + return pt; +} + +void pt_release(pt_t pt) { + free(pt->patterns); + _mm_returnlines(pt->mm, pt->evset); + vl_free(pt->evset); + if (pt->internalmm) + mm_release(pt->mm); + bzero(pt, sizeof(struct prime_time)); + free(pt); +} + +void patterns_push(pt_t patterns, ppattern_t pattern) { + assert(patterns->pattern_count < PATTERN_LIST_CAPACITY); + patterns->patterns[patterns->pattern_count++] = pattern; +} + +void test_pattern(pt_t patterns, ppattern_t pattern, int test_count, float* evcr_result) { + int evc_count = 0; + vlist_t evset = patterns->evset; + int associativity = patterns->l3info.associativity; + + for(int t = 0; t < test_count; t++) { + access_pattern(evset, associativity, pattern); + + uint32_t t_l1 = memaccesstime(vl_get(evset, 0)); + + memaccess(vl_get(evset, associativity)); + uint32_t t_evict = memaccesstime(vl_get(evset, 0)); + + if(t_l1 < L3_THRESHOLD && t_evict > L3_THRESHOLD) + evc_count++; + } + *evcr_result = (float)evc_count/test_count; +} + +void test_patterns(pt_t patterns, int test_count, float* evcr_results) { + for(int i = 0; i < patterns->pattern_count; i++) { + test_pattern(patterns, patterns->patterns[i], test_count, evcr_results + i); +#ifdef DEBUG + if(i%500 == 0 || (i%50 == 0 && test_count > 10000)) { + printf("test_patterns %d/%d\n", i, patterns->pattern_count); + } +#endif // DEBUG + } +} + +void time_pattern(pt_t patterns, ppattern_t pattern, int test_count, int* time_result) { + uint64_t t_start = rdtscp64(); + for(int t = 0; t < test_count; t++) { + access_pattern(patterns->evset, patterns->l3info.associativity, pattern); + } + uint64_t t_end = rdtscp64(); + assert((uint64_t)(int)(t_end - t_start) == t_end - t_start); + *time_result = (int)(t_end - t_start) / test_count; +} + +void time_patterns(pt_t patterns, int test_count, int* time_results) { + for(int i = 0; i < patterns->pattern_count; i++) { + time_pattern(patterns, patterns->patterns[i], test_count, time_results + i); + } +} + +void filter_patterns(pt_t patterns, int* indices, int count) { + ppattern_t* filtered = (ppattern_t*) malloc(sizeof(ppattern_t) * count); + for(int i = 0; i < count; i++) { + filtered[i] = patterns->patterns[indices[i]]; + } + for(int i = 0; i < count; i++) { + patterns->patterns[i] = filtered[i]; + } + free(filtered); + patterns->pattern_count = count; +} + +float* evcr_results; +int evcr_compare(const void* a, const void* b) { + int a_idx = *(int*)a; + int b_idx = *(int*)b; + float a_evcr = evcr_results[a_idx]; + float b_evcr = evcr_results[b_idx]; + if(a_evcr > b_evcr) return -1; + if(a_evcr < b_evcr) return 1; + return 0; +} +int* time_results; +int time_compare(const void* a, const void* b) { + int a_idx = *(int*)a; + int b_idx = *(int*)b; + return time_results[a_idx] - time_results[b_idx]; +} + +void filter_evcr(pt_t patterns, int test_count, int filter_count) { + evcr_results = malloc(sizeof(float)*patterns->pattern_count); + test_patterns(patterns, test_count, evcr_results); + + int* indices = malloc(sizeof(int)*patterns->pattern_count); + for(int i = 0; i < patterns->pattern_count; i++) { + indices[i] = i; + } + qsort(indices, patterns->pattern_count, sizeof(int), evcr_compare); + +#ifdef DEBUG + printf("Top EVCrs: %.3f%% %.3f%% %.3f%% (%.3f%%)\n", 100*evcr_results[indices[0]], 100*evcr_results[indices[1]], 100*evcr_results[indices[2]], 100*evcr_results[indices[patterns->pattern_count - 1]]); +#endif // DEBUG + + free(evcr_results); + + filter_patterns(patterns, indices, filter_count); + free(indices); +} + +void filter_time(pt_t patterns, int test_count, int filter_count) { + time_results = malloc(sizeof(int)*patterns->pattern_count); + time_patterns(patterns, test_count, time_results); + +#ifdef DEBUG + printf("Top EVCrs times: %d %d %d (%d)\n", time_results[0], time_results[1], time_results[2], time_results[patterns->pattern_count - 1]); +#endif // DEBUG + + int* indices = malloc(sizeof(int)*patterns->pattern_count); + for(int i = 0; i < patterns->pattern_count; i++) { + indices[i] = i; + } + qsort(indices, patterns->pattern_count, sizeof(int), time_compare); + #ifdef DEBUG + printf("Debug times: %d %d %d (%d)\n", time_results[indices[0]], time_results[indices[1]], time_results[indices[2]], time_results[indices[patterns->pattern_count - 1]]); + #endif // DEBUG + free(time_results); + + filter_patterns(patterns, indices, filter_count); + free(indices); +} + +void mutate_repeat_subpatterns(pt_t patterns, bool less) { + int cur_count = patterns->pattern_count; + for(int i = 0; i < cur_count; i++) { + if(less && (random() % 2) == 0) + continue; + ppattern_t pat = patterns->patterns[i]; + int start = random() % pat.length; + int sub_len = 1 + random() % (pat.length - start); + + ppattern_t mut = pat; + for(int k = 0; k < sub_len; k++) { + mut.pattern[start + sub_len + k] = pat.pattern[start + k]; + } + for(int k = 0; k < pat.length - sub_len - start; k++){ + mut.pattern[start + sub_len + sub_len + k] = pat.pattern[start + sub_len + k]; + } + + assert(pat.length + sub_len <= PATTERN_CAPACITY); + mut.length += sub_len; + + patterns_push(patterns, mut); + } +} + +void shuffle(uint8_t* arr, int size) { + if(size > 1) { + for(int i = size - 1; i > 0; i--) { + int j = rand() % (i + 1); + uint8_t temp = arr[j]; + arr[j] = arr[i]; + arr[i] = temp; + } + } +} + +void mutate_permute_order(pt_t patterns, int permutes, bool less) { + int cur_count = patterns->pattern_count; + for(int i = 0; i < cur_count; i++) { + if(less && (random() % 2) == 0) + continue; + + int pat_len = patterns->patterns[i].length; + int num_perms = 1; + if(pat_len == 1) { + continue; + } else if(pat_len == 2) { + num_perms = 1; + } else if(pat_len == 3) { + num_perms = 4; + } else { + num_perms = permutes; + } + + for(int j = 0; j < num_perms; j++){ + ppattern_t pat = patterns->patterns[i]; + shuffle(pat.pattern, pat_len); + patterns_push(patterns, pat); + } + } +} + +void mutate_interleave_target(pt_t patterns, bool less) { + int cur_count = patterns->pattern_count; + for(int i = 0; i < cur_count; i++) { + if(less && (random() % 2) == 0) + continue; + + int pat_len = patterns->patterns[i].length; + int max_iter = 4; + if(pat_len == 1) { + max_iter = 1; + } else if(pat_len == 2) { + max_iter = 2; + } + for(int j = 0; j < max_iter; j++) { + int num_target = 1; + if(pat_len > 1) { + if(random() % 2 == 0) + num_target++; + if(pat_len > 3) { + if(random() % 8 == 0) + num_target++; + } + } + + ppattern_t orig = patterns->patterns[i]; + ppattern_t pat = orig; + for(int k = 0; k < num_target; k++) { + int idx = random() % (pat.length + 1); + for(int a = idx; a < pat.length; a++) { + pat.pattern[a+1] = orig.pattern[a]; + } + pat.pattern[idx] = PATTERN_TARGET; + pat.length++; + + orig = pat; + } + patterns_push(patterns, pat); + } + } +} + +pt_results_t produce_results(pt_t patterns) { + assert(patterns->pattern_count == PT_RESULTS_COUNT); + + pt_results_t results = (pt_results_t)malloc(sizeof(struct pt_results)); + memcpy(results->patterns, patterns->patterns, PT_RESULTS_COUNT * sizeof(ppattern_t)); + + test_patterns(patterns, 1000000, (float*) &results->evcrs); + time_patterns(patterns, 100, (int*) &results->cycles); + + return results; +} + +// "PrimeTime" +pt_results_t generate_prime_patterns(pt_t patterns) { + // 1. Generate initial patterns + ppattern_t template; + for(int i = 0; i < 16; i++){ + template.pattern[i] = i; + } + for(int r = 1; r <= 8; r++) { + template.repeat = r; + for(int s = 1; s <= 4; s++) { + template.stride = s; + for(int w = 0; w < 4; w++){ + template.width = w; + template.length = w+1; + patterns_push(patterns, template); + } + } + } + +#ifdef DEBUG + printf("%d initial patterns\n", patterns->pattern_count); +#endif // DEBUG + + // 2. Mutation: + // - Repeated access to (sub-)patterns + // - Permuation of access orders + // - Interleaving of target accesses + mutate_repeat_subpatterns(patterns, false); + mutate_permute_order(patterns, 16, false); + mutate_interleave_target(patterns, false); + +#ifdef DEBUG + printf("%d initial mutated patterns\n", patterns->pattern_count); +#endif // DEBUG + + // 3. Measurements: Test 10'000 times + // - Filter to 150 highest EVCr + // - Filter to 100 fastest + filter_evcr(patterns, 10000, 150); + filter_time(patterns, 20, 100); + +#ifdef DEBUG + printf("Filtered to top %d patterns\n", patterns->pattern_count); +#endif // DEBUG + + // 4. Further mutations of candidates + mutate_repeat_subpatterns(patterns, true); + mutate_permute_order(patterns, 4, true); + mutate_interleave_target(patterns, true); + +#ifdef DEBUG + printf("%d further mutated patterns\n", patterns->pattern_count); +#endif // DEBUG + + // 5. Measurements: Test 100'000 times + // - Filter to 150 Highest EVCr + // - Filter to 100 fastest + filter_evcr(patterns, 100000, 150); + filter_time(patterns, 100, PT_RESULTS_COUNT); + + return produce_results(patterns); +} \ No newline at end of file