diff --git a/Cargo.lock b/Cargo.lock index f831396010..df5500598e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -245,6 +245,7 @@ dependencies = [ "glob", "hashbrown 0.12.3", "humansize", + "id-arena", "itertools", "lazy_static", "libc", @@ -383,6 +384,12 @@ dependencies = [ "quick-error", ] +[[package]] +name = "id-arena" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005" + [[package]] name = "indexmap" version = "1.7.0" diff --git a/Cargo.toml b/Cargo.toml index 9eece16f10..b650be6545 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,7 @@ hashbrown = "0.12.3" humansize = "2.1.3" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +id-arena = "2.2.1" [dev-dependencies] # assert_cmd 2.0.6 requires rust 1.60 diff --git a/src/diff/dijkstra.rs b/src/diff/dijkstra.rs index 6e311da60c..029c284657 100644 --- a/src/diff/dijkstra.rs +++ b/src/diff/dijkstra.rs @@ -5,11 +5,11 @@ use std::{cmp::Reverse, env}; use crate::{ diff::changes::ChangeMap, - diff::graph::{populate_change_map, set_neighbours, Edge, Vertex}, + diff::graph::{populate_change_map, set_neighbours, Edge, Vertex, VertexId}, hash::DftHashMap, parse::syntax::Syntax, }; -use bumpalo::Bump; +use id_arena::Arena; use itertools::Itertools; use radix_heap::RadixHeapMap; @@ -17,32 +17,36 @@ use radix_heap::RadixHeapMap; pub struct ExceededGraphLimit {} /// Return the shortest route from `start` to the end vertex. -fn shortest_vertex_path<'s, 'b>( - start: &'b Vertex<'s, 'b>, - vertex_arena: &'b Bump, +fn shortest_vertex_path<'s>( + start: VertexId<'s>, + vertex_arena: &mut Arena>, size_hint: usize, graph_limit: usize, -) -> Result>, ExceededGraphLimit> { +) -> Result>, ExceededGraphLimit> { // We want to visit nodes with the shortest distance first, but // RadixHeapMap is a max-heap. Ensure nodes are wrapped with // Reverse to flip comparisons. - let mut heap: RadixHeapMap, &'b Vertex<'s, 'b>> = RadixHeapMap::new(); + let mut heap: RadixHeapMap, VertexId<'s>> = RadixHeapMap::new(); heap.push(Reverse(0), start); let mut seen = DftHashMap::default(); seen.reserve(size_hint); - let end: &'b Vertex<'s, 'b> = loop { + let end: VertexId<'s> = loop { match heap.pop() { - Some((Reverse(distance), current)) => { - if current.is_end() { - break current; + Some((Reverse(distance), current_id)) => { + { + if (vertex_arena[current_id]).is_end() { + break current_id; + } } - set_neighbours(current, vertex_arena, &mut seen); + set_neighbours(current_id, vertex_arena, &mut seen); + let current = &vertex_arena[current_id]; for neighbour in current.neighbours.borrow().as_ref().unwrap() { - let (edge, next) = neighbour; + let (edge, next_id) = neighbour; + let next = &vertex_arena[*next_id]; let distance_to_next = distance + edge.cost(); let found_shorter_route = match next.predecessor.get() { @@ -51,15 +55,16 @@ fn shortest_vertex_path<'s, 'b>( }; if found_shorter_route { - next.predecessor.replace(Some((distance_to_next, current))); - heap.push(Reverse(distance_to_next), next); + next.predecessor + .replace(Some((distance_to_next, current_id))); + heap.push(Reverse(distance_to_next), *next_id); } } if seen.len() > graph_limit { info!( - "Reached graph limit, arena consumed {} bytes", - vertex_arena.allocated_bytes(), + "Reached graph limit, arena allocated {} items", + vertex_arena.len(), ); return Err(ExceededGraphLimit {}); } @@ -69,38 +74,39 @@ fn shortest_vertex_path<'s, 'b>( }; info!( - "Saw {} vertices (a Vertex is {} bytes), arena consumed {} bytes, with {} vertices left on heap.", + "Saw {} vertices (a Vertex is {} bytes), arena allocated {} items, with {} vertices left on heap.", seen.len(), std::mem::size_of::(), - vertex_arena.allocated_bytes(), + vertex_arena.len(), heap.len(), ); let mut current = Some((0, end)); - let mut vertex_route: Vec<&'b Vertex<'s, 'b>> = vec![]; - while let Some((_, node)) = current { - vertex_route.push(node); - current = node.predecessor.get(); + let mut vertex_route: Vec> = vec![]; + while let Some((_, node_id)) = current { + vertex_route.push(node_id); + current = vertex_arena[node_id].predecessor.get(); } vertex_route.reverse(); Ok(vertex_route) } -fn shortest_path_with_edges<'s, 'b>( - route: &[&'b Vertex<'s, 'b>], -) -> Vec<(Edge, &'b Vertex<'s, 'b>)> { +fn shortest_path_with_edges<'s>( + route: &[VertexId<'s>], + vertex_arena: &Arena>, +) -> Vec<(Edge, VertexId<'s>)> { let mut prev = route.first().expect("Expected non-empty route"); let mut cost = 0; let mut res = vec![]; - for vertex in route.iter().skip(1) { - let edge = edge_between(prev, vertex); + for vertex_id in route.iter().skip(1) { + let edge = edge_between(*prev, *vertex_id, vertex_arena); res.push((edge, *prev)); cost += edge.cost(); - prev = vertex; + prev = vertex_id; } debug!("Found a path of {} with cost {}.", route.len(), cost); @@ -111,27 +117,34 @@ fn shortest_path_with_edges<'s, 'b>( /// /// The vec returned does not return the very last vertex. This is /// necessary because a route of N vertices only has N-1 edges. -fn shortest_path<'s, 'b>( - start: Vertex<'s, 'b>, - vertex_arena: &'b Bump, +fn shortest_path<'s>( + start: Vertex<'s>, + vertex_arena: &mut Arena>, size_hint: usize, graph_limit: usize, -) -> Result)>, ExceededGraphLimit> { - let start: &'b Vertex<'s, 'b> = vertex_arena.alloc(start); +) -> Result)>, ExceededGraphLimit> { + let start: VertexId<'s> = vertex_arena.alloc(start); let vertex_path = shortest_vertex_path(start, vertex_arena, size_hint, graph_limit)?; - Ok(shortest_path_with_edges(&vertex_path)) + Ok(shortest_path_with_edges(&vertex_path, vertex_arena)) } -fn edge_between<'s, 'b>(before: &Vertex<'s, 'b>, after: &Vertex<'s, 'b>) -> Edge { - assert_ne!(before, after); +fn edge_between<'s>( + before_id: VertexId<'s>, + after_id: VertexId<'s>, + vertex_arena: &Arena>, +) -> Edge { + assert_ne!(before_id, after_id); + let before = &vertex_arena[before_id]; + let after = &vertex_arena[after_id]; let mut shortest_edge: Option = None; if let Some(neighbours) = &*before.neighbours.borrow() { for neighbour in neighbours { - let (edge, next) = *neighbour; + let (edge, next_id) = *neighbour; + // If there are multiple edges that can take us to `next`, // prefer the shortest. - if *next == *after { + if next_id == after_id { let is_shorter = match shortest_edge { Some(prev_edge) => edge.cost() < prev_edge.cost(), None => true, @@ -213,9 +226,9 @@ pub fn mark_syntax<'a>( let size_hint = std::cmp::min(lhs_node_count * rhs_node_count, graph_limit); let start = Vertex::new(lhs_syntax, rhs_syntax); - let vertex_arena = Bump::new(); + let mut vertex_arena = Arena::new(); - let route = shortest_path(start, &vertex_arena, size_hint, graph_limit)?; + let route = shortest_path(start, &mut vertex_arena, size_hint, graph_limit)?; let print_length = if env::var("DFT_VERBOSE").is_ok() { 50 @@ -227,7 +240,8 @@ pub fn mark_syntax<'a>( print_length, route .iter() - .map(|(edge, v)| { + .map(|(edge, v_id)| { + let v = &vertex_arena[*v_id]; format!( "{:20} {:20} --- {:3} {:?}", v.lhs_syntax @@ -242,7 +256,7 @@ pub fn mark_syntax<'a>( .collect_vec() ); - populate_change_map(&route, change_map); + populate_change_map(&route, &vertex_arena, change_map); Ok(()) } @@ -258,7 +272,7 @@ mod tests { }; use itertools::Itertools; - use typed_arena::Arena; + use typed_arena::Arena as TypedArena; fn pos_helper(line: u32) -> Vec { vec![SingleLineSpan { @@ -270,7 +284,7 @@ mod tests { #[test] fn identical_atoms() { - let arena = Arena::new(); + let arena = TypedArena::new(); let lhs = Syntax::new_atom(&arena, pos_helper(0), "foo", AtomKind::Normal); // Same content as LHS. @@ -278,8 +292,8 @@ mod tests { init_all_info(&[lhs], &[rhs]); let start = Vertex::new(Some(lhs), Some(rhs)); - let vertex_arena = Bump::new(); - let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); + let mut vertex_arena = Arena::::new(); + let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( @@ -293,7 +307,7 @@ mod tests { #[test] fn extra_atom_lhs() { - let arena = Arena::new(); + let arena = TypedArena::new(); let lhs = vec![Syntax::new_list( &arena, @@ -320,8 +334,8 @@ mod tests { init_all_info(&lhs, &rhs); let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied()); - let vertex_arena = Bump::new(); - let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); + let mut vertex_arena = Arena::new(); + let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( @@ -337,7 +351,7 @@ mod tests { #[test] fn repeated_atoms() { - let arena = Arena::new(); + let arena = TypedArena::new(); let lhs = vec![Syntax::new_list( &arena, @@ -362,8 +376,8 @@ mod tests { init_all_info(&lhs, &rhs); let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied()); - let vertex_arena = Bump::new(); - let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); + let mut vertex_arena = Arena::new(); + let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( @@ -380,7 +394,7 @@ mod tests { #[test] fn atom_after_empty_list() { - let arena = Arena::new(); + let arena = TypedArena::new(); let lhs = vec![Syntax::new_list( &arena, @@ -408,8 +422,8 @@ mod tests { init_all_info(&lhs, &rhs); let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied()); - let vertex_arena = Bump::new(); - let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); + let mut vertex_arena = Arena::new(); + let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( @@ -431,7 +445,7 @@ mod tests { #[test] fn replace_similar_comment() { - let arena = Arena::new(); + let arena = TypedArena::new(); let lhs = vec![Syntax::new_atom( &arena, @@ -449,8 +463,8 @@ mod tests { init_all_info(&lhs, &rhs); let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied()); - let vertex_arena = Bump::new(); - let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); + let mut vertex_arena = Arena::new(); + let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( @@ -463,7 +477,7 @@ mod tests { #[test] fn replace_very_different_comment() { - let arena = Arena::new(); + let arena = TypedArena::new(); let lhs = vec![Syntax::new_atom( &arena, @@ -481,8 +495,8 @@ mod tests { init_all_info(&lhs, &rhs); let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied()); - let vertex_arena = Bump::new(); - let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); + let mut vertex_arena = Arena::new(); + let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( @@ -495,7 +509,7 @@ mod tests { #[test] fn replace_comment_prefer_most_similar() { - let arena = Arena::new(); + let arena = TypedArena::new(); let lhs = vec![ Syntax::new_atom( @@ -521,8 +535,8 @@ mod tests { init_all_info(&lhs, &rhs); let start = Vertex::new(lhs.get(0).copied(), rhs.get(0).copied()); - let vertex_arena = Bump::new(); - let route = shortest_path(start, &vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); + let mut vertex_arena = Arena::new(); + let route = shortest_path(start, &mut vertex_arena, 0, DEFAULT_GRAPH_LIMIT).unwrap(); let actions = route.iter().map(|(action, _)| *action).collect_vec(); assert_eq!( @@ -538,7 +552,7 @@ mod tests { #[test] fn mark_syntax_equal_atoms() { - let arena = Arena::new(); + let arena = TypedArena::new(); let lhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal); let rhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal); init_all_info(&[lhs], &[rhs]); @@ -552,7 +566,7 @@ mod tests { #[test] fn mark_syntax_different_atoms() { - let arena = Arena::new(); + let arena = TypedArena::new(); let lhs = Syntax::new_atom(&arena, pos_helper(1), "foo", AtomKind::Normal); let rhs = Syntax::new_atom(&arena, pos_helper(1), "bar", AtomKind::Normal); init_all_info(&[lhs], &[rhs]); diff --git a/src/diff/graph.rs b/src/diff/graph.rs index f9b0ddbcb3..0094ce8332 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -1,7 +1,7 @@ //! A graph representation for computing tree diffs. -use bumpalo::Bump; use hashbrown::hash_map::RawEntryMut; +use id_arena::{Arena, Id}; use std::{ cell::{Cell, RefCell}, cmp::min, @@ -49,9 +49,9 @@ use Edge::*; /// ^ ^ /// ``` #[derive(Debug, Clone)] -pub struct Vertex<'s, 'b> { - pub neighbours: RefCell)>>>, - pub predecessor: Cell)>>, +pub struct Vertex<'s> { + pub neighbours: RefCell)>>>, + pub predecessor: Cell)>>, // TODO: experiment with storing SyntaxId only, and have a HashMap // from SyntaxId to &Syntax. pub lhs_syntax: Option<&'s Syntax<'s>>, @@ -61,7 +61,9 @@ pub struct Vertex<'s, 'b> { rhs_parent_id: Option, } -impl<'s, 'b> PartialEq for Vertex<'s, 'b> { +pub type VertexId<'s> = Id>; + +impl<'s> PartialEq for Vertex<'s> { fn eq(&self, other: &Self) -> bool { // Strictly speaking, we should compare the whole // EnteredDelimiter stack, not just the immediate @@ -99,9 +101,9 @@ impl<'s, 'b> PartialEq for Vertex<'s, 'b> { b0 && b1 && b2 } } -impl<'s, 'b> Eq for Vertex<'s, 'b> {} +impl<'s> Eq for Vertex<'s> {} -impl<'s, 'b> Hash for Vertex<'s, 'b> { +impl<'s> Hash for Vertex<'s> { fn hash(&self, state: &mut H) { self.lhs_syntax.map(|node| node.id()).hash(state); self.rhs_syntax.map(|node| node.id()).hash(state); @@ -247,7 +249,7 @@ fn push_rhs_delimiter<'s>( } } -impl<'s, 'b> Vertex<'s, 'b> { +impl<'s> Vertex<'s> { pub fn is_end(&self) -> bool { self.lhs_syntax.is_none() && self.rhs_syntax.is_none() && self.parents.is_empty() } @@ -344,11 +346,11 @@ impl Edge { } } -fn allocate_if_new<'s, 'b>( - v: Vertex<'s, 'b>, - alloc: &'b Bump, - seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>, -) -> &'b Vertex<'s, 'b> { +fn allocate_if_new<'a, 's>( + v: Vertex<'s>, + alloc: &'a mut Arena>, + seen: &mut DftHashMap<&'a Vertex<'s>, Vec>>, +) -> VertexId<'s> { // We use the entry API so that we only need to do a single lookup // for access and insert. match seen.raw_entry_mut().from_key(&v) { @@ -359,15 +361,16 @@ fn allocate_if_new<'s, 'b>( // nestings for each syntax node pair. if let Some(allocated) = existing.last() { if existing.len() >= 2 { - return allocated; + return *allocated; } } // If we have seen exactly this graph node before, even // considering parenthesis matching, return it. - for existing_node in existing.iter() { + for existing_node_id in existing.iter() { + let existing_node = &alloc[*existing_node_id]; if existing_node.parents == v.parents { - return existing_node; + return *existing_node_id; } } @@ -378,18 +381,18 @@ fn allocate_if_new<'s, 'b>( allocated } RawEntryMut::Vacant(vacant) => { - let allocated = alloc.alloc(v); + let allocated_id = alloc.alloc(v); // We know that this vec will never have more than 2 // nodes, and this code is very hot, so reserve. // // We still use a vec to enable experiments with the value // of how many possible parenthesis nestings to explore. - let mut existing: Vec<&'b Vertex<'s, 'b>> = Vec::with_capacity(2); - existing.push(allocated); + let mut existing: Vec> = Vec::with_capacity(2); + existing.push(allocated_id); - vacant.insert(allocated, existing); - allocated + vacant.insert(&alloc[allocated_id], existing); + allocated_id } } } @@ -474,20 +477,22 @@ fn pop_all_parents<'s>( /// Compute the neighbours of `v` if we haven't previously done so, /// and write them to the .neighbours cell inside `v`. -pub fn set_neighbours<'s, 'b>( - v: &Vertex<'s, 'b>, - alloc: &'b Bump, - seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>, +pub fn set_neighbours<'a, 's>( + v_id: VertexId<'s>, + alloc: &'a mut Arena>, + seen: &mut DftHashMap<&'a Vertex<'s>, Vec>>, ) { + let v = alloc.get(v_id).unwrap(); if v.neighbours.borrow().is_some() { return; } // There are only seven pushes in this function, so that's sufficient. - let mut res: Vec<(Edge, &Vertex)> = Vec::with_capacity(7); + let mut res: Vec<(Edge, VertexId<'s>)> = Vec::with_capacity(7); if let (Some(lhs_syntax), Some(rhs_syntax)) = (&v.lhs_syntax, &v.rhs_syntax) { if lhs_syntax == rhs_syntax { + let v = &alloc[v_id]; let depth_difference = (lhs_syntax.num_ancestors() as i32 - rhs_syntax.num_ancestors() as i32) .unsigned_abs(); @@ -769,11 +774,13 @@ pub fn set_neighbours<'s, 'b>( v.neighbours.replace(Some(res)); } -pub fn populate_change_map<'s, 'b>( - route: &[(Edge, &'b Vertex<'s, 'b>)], +pub fn populate_change_map<'s>( + route: &[(Edge, VertexId<'s>)], + vertex_arena: &Arena>, change_map: &mut ChangeMap<'s>, ) { - for (e, v) in route { + for (e, v_id) in route { + let v = &vertex_arena[*v_id]; match e { UnchangedNode { .. } => { // No change on this node or its children.