Skip to content

Commit

Permalink
Improve accuracy of throughput benchmark and log scale chart output (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ogxd authored Nov 11, 2024
1 parent 08258ed commit 5de7fbd
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 32 deletions.
2 changes: 1 addition & 1 deletion benches/hashset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ fn iterate<T, B>(b: &mut criterion::Bencher<'_>, value: &T, set: &mut HashSet<T,
set.insert(T::default());
b.iter(|| {
// We intentionally check on a value that is not present, otherwise there will be an
// additional equality check perform, diluting the hashing time and biasing the benchmark.
// additional equality check performed, diluting the hashing time and biasing the benchmark.
set.contains(criterion::black_box(value))
});
}
Expand Down
51 changes: 23 additions & 28 deletions benches/throughput/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ mod result_processor;

use result_processor::*;

use std::hash::Hasher;
use std::hint::black_box;
use std::hash::Hasher;
use std::time::{Instant, Duration};
use std::alloc::{alloc, dealloc, Layout};
use std::slice;
Expand All @@ -12,9 +12,8 @@ use rand::Rng;

use gxhash::*;

const ITERATIONS: u32 = 1000;
const ITERATIONS: usize = 1000;
const MAX_RUN_DURATION: Duration = Duration::from_millis(1000);
const FORCE_NO_INLINING: bool = false;

fn main() {
let mut rng = rand::thread_rng();
Expand Down Expand Up @@ -49,7 +48,7 @@ fn main() {
});

// AHash
let ahash_hasher = ahash::RandomState::with_seeds(0, 0, 0, 0);
let ahash_hasher = ahash::RandomState::with_seed(42);
benchmark(processor.as_mut(), slice, "AHash", |data: &[u8], _: i32| -> u64 {
ahash_hasher.hash_one(data)
});
Expand Down Expand Up @@ -91,7 +90,7 @@ fn main() {
}

fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str, delegate: F)
where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize>
where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize> + Clone + Copy
{
processor.on_start(name);
for i in 2.. {
Expand All @@ -101,22 +100,20 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
}

// Warmup
black_box(time(ITERATIONS, &|| delegate(&data[..len], S::default())));
time::<_, _, ITERATIONS>(&delegate, &data[..len], S::default());

let mut durations_s = vec![];
let now = Instant::now();
while now.elapsed() < MAX_RUN_DURATION {
// Make seed unpredictable to prevent optimizations
let seed = S::try_from(now.elapsed().as_nanos())
.unwrap_or_else(|_| panic!("Something went horribly wrong!"));
let seed = S::try_from(now.elapsed().as_nanos()).unwrap_or_else(|_| panic!());
// Offset slice by an unpredictable amount to prevent optimization (pre caching)
// and make the benchmark use both aligned and unaligned data
let start = S::try_into(seed)
.unwrap_or_else(|_| panic!("Something went horribly wrong!")) & 0xFF;
let start = S::try_into(seed).unwrap_or_else(|_| panic!()) & 0xFF;
let end = start + len;
let slice = &data[start..end];
// Execute method for a new iterations
let duration = time(ITERATIONS, &|| delegate(slice, S::default()));
let duration = time::<_, _, ITERATIONS>(&delegate, slice, seed);
durations_s.push(duration.as_secs_f64());
}
let average_duration_s = calculate_average_without_outliers(&mut durations_s);
Expand All @@ -127,30 +124,28 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
processor.on_end();
}

#[inline(never)]
fn time<F>(iterations: u32, delegate: &F) -> Duration
where F: Fn() -> u64
fn time<F, S, const N: usize>(delegate: F, slice: &[u8], seed: S) -> Duration
where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize> + Clone + Copy
{
let now = Instant::now();
// Bench the same way to what is done in criterion.rs
// Time measurement similar to what is done in criterion.rs
// https://github.com/bheisler/criterion.rs/blob/e1a8c9ab2104fbf2d15f700d0038b2675054a2c8/src/bencher.rs#L87
for _ in 0..iterations {
if FORCE_NO_INLINING {
black_box(execute_noinlining(delegate));
} else {
black_box(delegate());
}
}
let now = Instant::now();
iter::<F, S, N>(delegate, slice, seed);
now.elapsed()
}

// Some algorithm are more likely to be inlined than others.
// This puts then all at the same level. But is it fair?
// The content might be inlined, but the function itself should not be inlined
// This favors benchmarked methods with small byte code size, which is more realistic
#[inline(never)]
fn execute_noinlining<F>(delegate: &F) -> u64
where F: Fn() -> u64
fn iter<F, S, const N: usize>(delegate: F, slice: &[u8], seed: S)
where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize> + Clone + Copy
{
delegate()
for _ in 0..N {
// Black box the result to prevent the compiler from optimizing the operation away
// Black box the slice to prevent the compiler to assume the slice is constant
// We don't black box the seed because it's likely to be constant in most real-world usage scenarios
black_box(delegate(black_box(slice), seed));
}
}

// Outliers are inevitable, especially on a low number of iterations
Expand Down
6 changes: 3 additions & 3 deletions benches/throughput/result_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ impl ResultProcessor for OutputPlot {
let x_min = self.series.iter().next().unwrap().1.iter().map(|(x, _)| *x as u32).min().unwrap();
let x_max = self.series.iter().next().unwrap().1.iter().map(|(x, _)| *x as u32).max().unwrap();

let y_min = 0u32;
let y_min = self.series.iter().flat_map(|inner_map| inner_map.1.iter()).map(|(_, y)| (0.95 * *y) as u32).min().unwrap();
let y_max = self.series.iter().flat_map(|inner_map| inner_map.1.iter()).map(|(_, y)| (1.05 * *y) as u32).max().unwrap();

let mut chart = ChartBuilder::on(&canvas)
Expand All @@ -144,8 +144,8 @@ impl ResultProcessor for OutputPlot {
(x_min..x_max)
.log_scale()
.with_key_points(self.series.iter().next().unwrap().1.iter().map(|(x, _)| *x as u32).collect::<Vec<u32>>()),
y_min..y_max
//.log_scale(),
(y_min..y_max)
.log_scale(),
).unwrap();

chart
Expand Down

0 comments on commit 5de7fbd

Please sign in to comment.