Skip to content

Commit

Permalink
bench fft: closer C++ code to Rust and use -fno-plt
Browse files Browse the repository at this point in the history
Note that -mtune=cascadelake is only about tuning (like instruction
timing) not arch/features.
  • Loading branch information
jedbrown committed Nov 15, 2024
1 parent 88feab6 commit 31fd712
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 13 deletions.
4 changes: 2 additions & 2 deletions enzyme/benchmarks/ReverseMode/fft/Makefile.make
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a: src/lib.rs Cargo.toml
cargo +enzyme rustc --release --lib --crate-type=staticlib

fft.o: fft.cpp $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a
clang++ $(LOADCLANG) $(BENCH) -DCPP=1 -O3 -fno-math-errno $^ $(BENCHLINK) -lm -o $@
clang++ $(LOADCLANG) $(BENCH) -DCPP=1 -O3 -fno-math-errno -fno-plt -mtune=cascadelake -g $^ $(BENCHLINK) -lm -o $@

fftr.o: fft.cpp $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a
clang++ $(LOADCLANG) $(BENCH) -O3 -fno-math-errno $^ $(BENCHLINK) -lm -o $@
clang++ $(LOADCLANG) $(BENCH) -O3 -fno-math-errno -fno-plt -mtune=cascadelake -g $^ $(BENCHLINK) -lm -o $@

results.json: fft.o fftr.o
numactl -C 1 ./fft.o 1048576 | tee results.json
Expand Down
25 changes: 14 additions & 11 deletions enzyme/benchmarks/ReverseMode/fft/fft.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ inline void swap(double *a, double *b) {
*b = temp;
}

static void recursiveApply(double *data, size_t N, int iSign) {
static void recursiveApply(double *__restrict data, size_t N, int iSign) {
if (N == 1)
return;
recursiveApply(data, N / 2, iSign);
Expand All @@ -39,17 +39,20 @@ static void recursiveApply(double *data, size_t N, int iSign) {
double wr = 1.0;
double wi = 0.0;

for (size_t ii = 0; ii < N / 2; ii++) {
size_t i = 2 * ii;
for (size_t i = 0; i < N; i += 2) {
size_t iN = i + N;

double tempr = data[iN] * wr - data[iN + 1] * wi;
double tempi = data[iN] * wi + data[iN + 1] * wr;

data[iN] = data[i] - tempr;
data[iN + 1] = data[i + 1] - tempi;
data[i] += tempr;
data[i + 1] += tempi;
double *__restrict ay = &data[i + 1];
double *__restrict ax = &data[i];
double *__restrict by = &data[iN + 1];
double *__restrict bx = &data[iN];

double tempr = *bx * wr - *by * wi;
double tempi = *bx * wi + *by * wr;

*bx = *ax - tempr;
*by = *ay - tempi;
*ax += tempr;
*ay += tempi;

wtemp = wr;
wr = wr * (wpr + 1.) - wi * wpi;
Expand Down

0 comments on commit 31fd712

Please sign in to comment.