diff --git a/README.md b/README.md index b2f6d14..1e480c1 100644 --- a/README.md +++ b/README.md @@ -318,7 +318,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro | 14 | [One-Time Pad](https://adventofcode.com/2016/day/14) | [Source](src/year2016/day14.rs) | 77000 | | 15 | [Timing is Everything](https://adventofcode.com/2016/day/15) | [Source](src/year2016/day15.rs) | 1 | | 16 | [Dragon Checksum](https://adventofcode.com/2016/day/16) | [Source](src/year2016/day16.rs) | 1 | -| 17 | [Two Steps Forward](https://adventofcode.com/2016/day/17) | [Source](src/year2016/day17.rs) | 14948 | +| 17 | [Two Steps Forward](https://adventofcode.com/2016/day/17) | [Source](src/year2016/day17.rs) | 3854 | | 18 | [Like a Rogue](https://adventofcode.com/2016/day/18) | [Source](src/year2016/day18.rs) | 728 | | 19 | [An Elephant Named Joseph](https://adventofcode.com/2016/day/19) | [Source](src/year2016/day19.rs) | 1 | | 20 | [Firewall Rules](https://adventofcode.com/2016/day/20) | [Source](src/year2016/day20.rs) | 21 | diff --git a/src/year2016/day17.rs b/src/year2016/day17.rs index ce18304..708679e 100644 --- a/src/year2016/day17.rs +++ b/src/year2016/day17.rs @@ -1,89 +1,157 @@ //! # Two Steps Forward //! -//! Brute force search over every possible path. As we need all paths (not just the shortest) -//! a DFS is faster than a BFS as we can reuse the same buffer to store the path so far -//! only adding or removing the last step. +//! Brute force search over every possible path. Work is parallelized over multiple threads. +//! Keeping each thread busy and spreading the work as evenly as possible is quite tricky. Some +//! paths can dead-end quickly while others can take the majority of exploration time. +//! +//! To solve this we implement a very simple version of +//! [work stealing](https://en.wikipedia.org/wiki/Work_stealing). Threads process paths locally, +//! stopping every now and then to return paths to a global queue. This allows other threads that +//! have run out of work to pickup new paths to process. +//! +//! The approach from "Waiting: Parking and Condition Variables" in the excellent book +//! [Rust Atomics and Locks](https://marabos.nl/atomics/) prevent idle threads from busy +//! looping on the mutex. use crate::util::md5::*; +use crate::util::thread::*; +use std::sync::{Condvar, Mutex}; -pub struct State { - path: Vec, - prefix: usize, - size: usize, +type Input = (String, usize); +type Item = (u8, u8, usize, Vec); + +struct State { + todo: Vec, min: String, max: usize, } -pub fn parse(input: &str) -> State { - let bytes = input.trim().as_bytes(); - let prefix = bytes.len(); - let size = bytes.len(); - let min = String::new(); - let max = 0; +struct Exclusive { + global: State, + inflight: usize, +} - let mut path = vec![0; 1024]; - path[..size].copy_from_slice(bytes); +struct Shared { + prefix: usize, + mutex: Mutex, + not_empty: Condvar, +} + +pub fn parse(input: &str) -> Input { + // Initial starting position is the top left corner. + let input = input.trim().as_bytes(); + let prefix = input.len(); + let start = (0, 0, prefix, extend(input, prefix, 0)); + + // State shared between threads. + let global = State { todo: vec![start], min: String::new(), max: 0 }; + let exclusive = Exclusive { global, inflight: 0 }; + let shared = Shared { prefix, mutex: Mutex::new(exclusive), not_empty: Condvar::new() }; - let mut state = State { path, prefix, size, min, max }; - explore(&mut state, 0, 0); - state + // Search paths in parallel. + spawn(|| worker(&shared)); + + let global = shared.mutex.into_inner().unwrap().global; + (global.min, global.max) } -pub fn part1(input: &State) -> &str { - &input.min +pub fn part1(input: &Input) -> &str { + &input.0 } -pub fn part2(input: &State) -> usize { - input.max +pub fn part2(input: &Input) -> usize { + input.1 } -fn explore(state: &mut State, x: u32, y: u32) { - // If we've reached the end then don't go any further. - if x == 3 && y == 3 { - let adjusted = state.size - state.prefix; +/// Process local work items, stopping every now and then to redistribute items back to global pool. +/// This prevents threads idling or hotspotting. +fn worker(shared: &Shared) { + let mut local = State { todo: Vec::new(), min: String::new(), max: 0 }; + + loop { + let mut exclusive = shared.mutex.lock().unwrap(); + let item = loop { + // Pickup available work. + if let Some(item) = exclusive.global.todo.pop() { + exclusive.inflight += 1; + break item; + } + // If no work available and no other thread is doing anything, then we're done. + if exclusive.inflight == 0 { + return; + } + // Put thread to sleep until another thread notifies us that work is available. + // This avoids busy looping on the mutex. + exclusive = shared.not_empty.wait(exclusive).unwrap(); + }; - if state.min.is_empty() || adjusted < state.min.len() { - let steps = state.path[state.prefix..state.size].to_vec(); - state.min = String::from_utf8(steps).unwrap(); + // Drop mutex to release lock and allow other threads access. + drop(exclusive); + + // Process local work items. + local.todo.push(item); + explore(shared, &mut local); + + // Redistribute local work items back to the global queue. Update min and max paths. + let mut exclusive = shared.mutex.lock().unwrap(); + let global = &mut exclusive.global; + + global.todo.append(&mut local.todo); + if global.min.is_empty() || local.min.len() < global.min.len() { + global.min = local.min.clone(); } - state.max = state.max.max(adjusted); + global.max = global.max.max(local.max); - return; + // Mark ourselves as idle then notify all other threads that there is new work available. + exclusive.inflight -= 1; + shared.not_empty.notify_all(); } +} - // Round size up to next multiple of 64 bytes for md5 algorithm. - let current = state.size; - let padded = buffer_size(current); - let (result, ..) = hash(&mut state.path[..padded], current); - - // Remove MD5 padding. - state.path[padded - 8] = 0; - state.path[padded - 7] = 0; - state.path[padded - 6] = 0; - state.path[padded - 5] = 0; - state.path[padded - 4] = 0; - state.path[padded - 3] = 0; - state.path[padded - 2] = 0; - state.path[padded - 1] = 0; - - state.size += 1; - - if y > 0 && ((result >> 28) & 0xf) > 0xa { - state.path[current] = b'U'; - explore(state, x, y - 1); - } - if y < 3 && ((result >> 24) & 0xf) > 0xa { - state.path[current] = b'D'; - explore(state, x, y + 1); - } - if x > 0 && ((result >> 20) & 0xf) > 0xa { - state.path[current] = b'L'; - explore(state, x - 1, y); - } - if x < 3 && ((result >> 16) & 0xf) > 0xa { - state.path[current] = b'R'; - explore(state, x + 1, y); +/// Explore at most 100 paths, stopping sooner if we run out. +/// 100 is chosen empirically as the amount that results in the least total time taken. +/// +/// Too low and threads waste time locking the mutex, reading and writing global state. +/// Too high and some threads are starved with no paths, while other threads do all the work. +fn explore(shared: &Shared, local: &mut State) { + for _ in 0..100 { + let Some((x, y, size, mut path)) = local.todo.pop() else { break }; + + if x == 3 && y == 3 { + // Stop if we've reached the bottom right room. + let adjusted = size - shared.prefix; + if local.min.is_empty() || adjusted < local.min.len() { + // Remove salt and padding. + let middle = path[shared.prefix..size].to_vec(); + local.min = String::from_utf8(middle).unwrap(); + } + local.max = local.max.max(adjusted); + } else { + // Explore other paths. + let (result, ..) = hash(&mut path, size); + + if y > 0 && ((result >> 28) & 0xf) > 0xa { + local.todo.push((x, y - 1, size + 1, extend(&path, size, b'U'))); + } + if y < 3 && ((result >> 24) & 0xf) > 0xa { + local.todo.push((x, y + 1, size + 1, extend(&path, size, b'D'))); + } + if x > 0 && ((result >> 20) & 0xf) > 0xa { + local.todo.push((x - 1, y, size + 1, extend(&path, size, b'L'))); + } + if x < 3 && ((result >> 16) & 0xf) > 0xa { + local.todo.push((x + 1, y, size + 1, extend(&path, size, b'R'))); + } + } } +} - state.size = current; - state.path[current] = 0; +/// Convenience function to generate new path. +fn extend(src: &[u8], size: usize, b: u8) -> Vec { + // Leave room for MD5 padding. + let padded = buffer_size(size + 1); + let mut next = vec![0; padded]; + // Copy existing path and next step. + next[0..size].copy_from_slice(&src[0..size]); + next[size] = b; + next } diff --git a/tests/year2016/day17_test.rs b/tests/year2016/day17_test.rs index 3ee4e0a..a4c7f66 100644 --- a/tests/year2016/day17_test.rs +++ b/tests/year2016/day17_test.rs @@ -1,9 +1,15 @@ +use aoc::year2016::day17::*; + +const EXAMPLE: &str = "ihgpwlah"; + #[test] fn part1_test() { - // No example data + let input = parse(EXAMPLE); + assert_eq!(part1(&input), "DDRRRD"); } #[test] fn part2_test() { - // No example data + let input = parse(EXAMPLE); + assert_eq!(part2(&input), 370); }