From d840c5ade882ccbafa318d061948df0962aff166 Mon Sep 17 00:00:00 2001 From: zazedd Date: Fri, 13 Sep 2024 16:07:53 +0100 Subject: [PATCH 1/3] Speed up `Store.last-modified` by comparing hashes instead of file contents --- src/irmin/store.ml | 10 +++++----- src/irmin/store_intf.ml | 7 +++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/irmin/store.ml b/src/irmin/store.ml index b73edf07bd..0fdff8565b 100644 --- a/src/irmin/store.ml +++ b/src/irmin/store.ml @@ -1153,9 +1153,9 @@ module Make (B : Backend.S) = struct let current, current_depth = Heap.pop_minimum heap in let parents = Commit.parents current in let tree = Commit.tree current in - let* current_value = Tree.find tree key in + let* current_tree = Tree.find_tree tree key in if List.length parents = 0 then - if current_value <> None then Lwt.return (current :: acc) + if current_tree <> None then Lwt.return (current :: acc) else Lwt.return acc else let max_depth = @@ -1173,9 +1173,9 @@ module Make (B : Backend.S) = struct Heap.add heap (commit, current_depth + 1) in let tree = Commit.tree commit in - let+ e = Tree.find tree key in - match (e, current_value) with - | Some x, Some y -> not (equal_contents x y) + let+ e = Tree.find_tree tree key in + match (e, current_tree) with + | Some x, Some y -> Tree.hash x <> Tree.hash y | Some _, None -> true | None, Some _ -> true | _, _ -> false) diff --git a/src/irmin/store_intf.ml b/src/irmin/store_intf.ml index 09ba421064..a458da15e6 100644 --- a/src/irmin/store_intf.ml +++ b/src/irmin/store_intf.ml @@ -1006,10 +1006,9 @@ module type S_generic_key = sig head is not set) and stopping at [min] if specified. *) val last_modified : ?depth:int -> ?n:int -> t -> path -> commit list Lwt.t - (** [last_modified ?number c k] is the list of the last [number] commits that - modified [path], in ascending order of date. [depth] is the maximum depth - to be explored in the commit graph, if any. Default value for [number] is - 1. *) + (** [last_modified ?n c k] is the list of the last [n] commits that modified + [path], in ascending order of date. [depth] is the maximum depth to be + explored in the commit graph, if any. Default value for [n] is 1. *) (** Manipulate branches. *) module Branch : sig From 4d9deeab1987fd6ccb28399c7085382981d65702 Mon Sep 17 00:00:00 2001 From: ArthurW Date: Mon, 30 Sep 2024 10:30:08 +0200 Subject: [PATCH 2/3] Store.last_modified: use equal_hash, call find_tree less --- src/irmin/store.ml | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/irmin/store.ml b/src/irmin/store.ml index 0fdff8565b..adc3bb4392 100644 --- a/src/irmin/store.ml +++ b/src/irmin/store.ml @@ -1129,13 +1129,11 @@ module Make (B : Backend.S) = struct g module Heap = Binary_heap.Make (struct - type t = commit * int + type t = commit * int * tree option - let compare c1 c2 = + let compare (c1, _, _) (c2, _, _) = (* [bheap] operates on miminums, we need to invert the comparison. *) - -Int64.compare - (Info.date (Commit.info (fst c1))) - (Info.date (Commit.info (fst c2))) + Int64.compare (Info.date (Commit.info c2)) (Info.date (Commit.info c1)) end) let last_modified ?depth ?(n = 1) t key = @@ -1145,15 +1143,14 @@ module Make (B : Backend.S) = struct depth n pp_path key]; let repo = repo t in let* commit = Head.get t in - let heap = Heap.create ~dummy:(commit, 0) 0 in - let () = Heap.add heap (commit, 0) in + let* commit_tree = Tree.find_tree (Commit.tree commit) key in + let heap = Heap.create ~dummy:(commit, 0, commit_tree) 0 in + let () = Heap.add heap (commit, 0, commit_tree) in let rec search acc = if Heap.is_empty heap || List.length acc = n then Lwt.return acc else - let current, current_depth = Heap.pop_minimum heap in + let current, current_depth, current_tree = Heap.pop_minimum heap in let parents = Commit.parents current in - let tree = Commit.tree current in - let* current_tree = Tree.find_tree tree key in if List.length parents = 0 then if current_tree <> None then Lwt.return (current :: acc) else Lwt.return acc @@ -1168,14 +1165,14 @@ module Make (B : Backend.S) = struct (fun hash -> Commit.of_key repo hash >>= function | Some commit -> ( + let+ e = Tree.find_tree (Commit.tree commit) key in let () = if not max_depth then - Heap.add heap (commit, current_depth + 1) + Heap.add heap (commit, current_depth + 1, e) in - let tree = Commit.tree commit in - let+ e = Tree.find_tree tree key in match (e, current_tree) with - | Some x, Some y -> Tree.hash x <> Tree.hash y + | Some x, Some y -> + not (equal_hash (Tree.hash x) (Tree.hash y)) | Some _, None -> true | None, Some _ -> true | _, _ -> false) From e61a347dc8b754f5c767cb216407c6dc65edf486 Mon Sep 17 00:00:00 2001 From: ArthurW Date: Mon, 30 Sep 2024 14:52:10 +0200 Subject: [PATCH 3/3] Store.last_modified: deduplicate forked commits --- src/irmin/store.ml | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/irmin/store.ml b/src/irmin/store.ml index adc3bb4392..6d98b8be4f 100644 --- a/src/irmin/store.ml +++ b/src/irmin/store.ml @@ -1131,9 +1131,17 @@ module Make (B : Backend.S) = struct module Heap = Binary_heap.Make (struct type t = commit * int * tree option - let compare (c1, _, _) (c2, _, _) = + let compare (c1, d1, _) (c2, d2, _) = (* [bheap] operates on miminums, we need to invert the comparison. *) - Int64.compare (Info.date (Commit.info c2)) (Info.date (Commit.info c1)) + match + Int64.compare (Info.date (Commit.info c2)) (Info.date (Commit.info c1)) + with + | 0 -> ( + (* if the same commit was inserted multiple times, group them together to deduplicate *) + match compare_hash (Commit.hash c1) (Commit.hash c2) with + | 0 -> Int.compare d1 d2 (* smallest depth first *) + | c -> c) + | c -> c end) let last_modified ?depth ?(n = 1) t key = @@ -1146,10 +1154,22 @@ module Make (B : Backend.S) = struct let* commit_tree = Tree.find_tree (Commit.tree commit) key in let heap = Heap.create ~dummy:(commit, 0, commit_tree) 0 in let () = Heap.add heap (commit, 0, commit_tree) in + let pop_minimum () = + let ((current, _, _) as elt) = Heap.pop_minimum heap in + let rec remove_duplicates () = + match Heap.minimum heap with + | duplicate, _, _ when Commit.equal current duplicate -> + Heap.remove heap; + remove_duplicates () + | _ | (exception Binary_heap.Empty) -> () + in + remove_duplicates (); + elt + in let rec search acc = if Heap.is_empty heap || List.length acc = n then Lwt.return acc else - let current, current_depth, current_tree = Heap.pop_minimum heap in + let current, current_depth, current_tree = pop_minimum () in let parents = Commit.parents current in if List.length parents = 0 then if current_tree <> None then Lwt.return (current :: acc)