Skip to content

Commit

Permalink
Much faster multithreaded approach
Browse files Browse the repository at this point in the history
  • Loading branch information
maneatingape committed Sep 8, 2024
1 parent 0b32d16 commit b7add2b
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 68 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
| 14 | [One-Time Pad](https://adventofcode.com/2016/day/14) | [Source](src/year2016/day14.rs) | 77000 |
| 15 | [Timing is Everything](https://adventofcode.com/2016/day/15) | [Source](src/year2016/day15.rs) | 1 |
| 16 | [Dragon Checksum](https://adventofcode.com/2016/day/16) | [Source](src/year2016/day16.rs) | 1 |
| 17 | [Two Steps Forward](https://adventofcode.com/2016/day/17) | [Source](src/year2016/day17.rs) | 14948 |
| 17 | [Two Steps Forward](https://adventofcode.com/2016/day/17) | [Source](src/year2016/day17.rs) | 3854 |
| 18 | [Like a Rogue](https://adventofcode.com/2016/day/18) | [Source](src/year2016/day18.rs) | 728 |
| 19 | [An Elephant Named Joseph](https://adventofcode.com/2016/day/19) | [Source](src/year2016/day19.rs) | 1 |
| 20 | [Firewall Rules](https://adventofcode.com/2016/day/20) | [Source](src/year2016/day20.rs) | 21 |
Expand Down
198 changes: 133 additions & 65 deletions src/year2016/day17.rs
Original file line number Diff line number Diff line change
@@ -1,89 +1,157 @@
//! # Two Steps Forward
//!
//! Brute force search over every possible path. As we need all paths (not just the shortest)
//! a DFS is faster than a BFS as we can reuse the same buffer to store the path so far
//! only adding or removing the last step.
//! Brute force search over every possible path. Work is parallelized over multiple threads.
//! Keeping each thread busy and spreading the work as evenly as possible is quite tricky. Some
//! paths can dead-end quickly while others can take the majority of exploration time.
//!
//! To solve this we implement a very simple version of
//! [work stealing](https://en.wikipedia.org/wiki/Work_stealing). Threads process paths locally,
//! stopping every now and then to return paths to a global queue. This allows other threads that
//! have run out of work to pickup new paths to process.
//!
//! The approach from "Waiting: Parking and Condition Variables" in the excellent book
//! [Rust Atomics and Locks](https://marabos.nl/atomics/) prevent idle threads from busy
//! looping on the mutex.
use crate::util::md5::*;
use crate::util::thread::*;
use std::sync::{Condvar, Mutex};

pub struct State {
path: Vec<u8>,
prefix: usize,
size: usize,
type Input = (String, usize);
type Item = (u8, u8, usize, Vec<u8>);

struct State {
todo: Vec<Item>,
min: String,
max: usize,
}

pub fn parse(input: &str) -> State {
let bytes = input.trim().as_bytes();
let prefix = bytes.len();
let size = bytes.len();
let min = String::new();
let max = 0;
struct Exclusive {
global: State,
inflight: usize,
}

let mut path = vec![0; 1024];
path[..size].copy_from_slice(bytes);
struct Shared {
prefix: usize,
mutex: Mutex<Exclusive>,
not_empty: Condvar,
}

pub fn parse(input: &str) -> Input {
// Initial starting position is the top left corner.
let input = input.trim().as_bytes();
let prefix = input.len();
let start = (0, 0, prefix, extend(input, prefix, 0));

// State shared between threads.
let global = State { todo: vec![start], min: String::new(), max: 0 };
let exclusive = Exclusive { global, inflight: 0 };
let shared = Shared { prefix, mutex: Mutex::new(exclusive), not_empty: Condvar::new() };

let mut state = State { path, prefix, size, min, max };
explore(&mut state, 0, 0);
state
// Search paths in parallel.
spawn(|| worker(&shared));

let global = shared.mutex.into_inner().unwrap().global;
(global.min, global.max)
}

pub fn part1(input: &State) -> &str {
&input.min
pub fn part1(input: &Input) -> &str {
&input.0
}

pub fn part2(input: &State) -> usize {
input.max
pub fn part2(input: &Input) -> usize {
input.1
}

fn explore(state: &mut State, x: u32, y: u32) {
// If we've reached the end then don't go any further.
if x == 3 && y == 3 {
let adjusted = state.size - state.prefix;
/// Process local work items, stopping every now and then to redistribute items back to global pool.
/// This prevents threads idling or hotspotting.
fn worker(shared: &Shared) {
let mut local = State { todo: Vec::new(), min: String::new(), max: 0 };

loop {
let mut exclusive = shared.mutex.lock().unwrap();
let item = loop {
// Pickup available work.
if let Some(item) = exclusive.global.todo.pop() {
exclusive.inflight += 1;
break item;
}
// If no work available and no other thread is doing anything, then we're done.
if exclusive.inflight == 0 {
return;
}
// Put thread to sleep until another thread notifies us that work is available.
// This avoids busy looping on the mutex.
exclusive = shared.not_empty.wait(exclusive).unwrap();
};

if state.min.is_empty() || adjusted < state.min.len() {
let steps = state.path[state.prefix..state.size].to_vec();
state.min = String::from_utf8(steps).unwrap();
// Drop mutex to release lock and allow other threads access.
drop(exclusive);

// Process local work items.
local.todo.push(item);
explore(shared, &mut local);

// Redistribute local work items back to the global queue. Update min and max paths.
let mut exclusive = shared.mutex.lock().unwrap();
let global = &mut exclusive.global;

global.todo.append(&mut local.todo);
if global.min.is_empty() || local.min.len() < global.min.len() {
global.min = local.min.clone();
}
state.max = state.max.max(adjusted);
global.max = global.max.max(local.max);

return;
// Mark ourselves as idle then notify all other threads that there is new work available.
exclusive.inflight -= 1;
shared.not_empty.notify_all();
}
}

// Round size up to next multiple of 64 bytes for md5 algorithm.
let current = state.size;
let padded = buffer_size(current);
let (result, ..) = hash(&mut state.path[..padded], current);

// Remove MD5 padding.
state.path[padded - 8] = 0;
state.path[padded - 7] = 0;
state.path[padded - 6] = 0;
state.path[padded - 5] = 0;
state.path[padded - 4] = 0;
state.path[padded - 3] = 0;
state.path[padded - 2] = 0;
state.path[padded - 1] = 0;

state.size += 1;

if y > 0 && ((result >> 28) & 0xf) > 0xa {
state.path[current] = b'U';
explore(state, x, y - 1);
}
if y < 3 && ((result >> 24) & 0xf) > 0xa {
state.path[current] = b'D';
explore(state, x, y + 1);
}
if x > 0 && ((result >> 20) & 0xf) > 0xa {
state.path[current] = b'L';
explore(state, x - 1, y);
}
if x < 3 && ((result >> 16) & 0xf) > 0xa {
state.path[current] = b'R';
explore(state, x + 1, y);
/// Explore at most 100 paths, stopping sooner if we run out.
/// 100 is chosen empirically as the amount that results in the least total time taken.
///
/// Too low and threads waste time locking the mutex, reading and writing global state.
/// Too high and some threads are starved with no paths, while other threads do all the work.
fn explore(shared: &Shared, local: &mut State) {
for _ in 0..100 {
let Some((x, y, size, mut path)) = local.todo.pop() else { break };

if x == 3 && y == 3 {
// Stop if we've reached the bottom right room.
let adjusted = size - shared.prefix;
if local.min.is_empty() || adjusted < local.min.len() {
// Remove salt and padding.
let middle = path[shared.prefix..size].to_vec();
local.min = String::from_utf8(middle).unwrap();
}
local.max = local.max.max(adjusted);
} else {
// Explore other paths.
let (result, ..) = hash(&mut path, size);

if y > 0 && ((result >> 28) & 0xf) > 0xa {
local.todo.push((x, y - 1, size + 1, extend(&path, size, b'U')));
}
if y < 3 && ((result >> 24) & 0xf) > 0xa {
local.todo.push((x, y + 1, size + 1, extend(&path, size, b'D')));
}
if x > 0 && ((result >> 20) & 0xf) > 0xa {
local.todo.push((x - 1, y, size + 1, extend(&path, size, b'L')));
}
if x < 3 && ((result >> 16) & 0xf) > 0xa {
local.todo.push((x + 1, y, size + 1, extend(&path, size, b'R')));
}
}
}
}

state.size = current;
state.path[current] = 0;
/// Convenience function to generate new path.
fn extend(src: &[u8], size: usize, b: u8) -> Vec<u8> {
// Leave room for MD5 padding.
let padded = buffer_size(size + 1);
let mut next = vec![0; padded];
// Copy existing path and next step.
next[0..size].copy_from_slice(&src[0..size]);
next[size] = b;
next
}
10 changes: 8 additions & 2 deletions tests/year2016/day17_test.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
use aoc::year2016::day17::*;

const EXAMPLE: &str = "ihgpwlah";

#[test]
fn part1_test() {
// No example data
let input = parse(EXAMPLE);
assert_eq!(part1(&input), "DDRRRD");
}

#[test]
fn part2_test() {
// No example data
let input = parse(EXAMPLE);
assert_eq!(part2(&input), 370);
}

0 comments on commit b7add2b

Please sign in to comment.