Skip to content

Commit

Permalink
fixup! irmin-pack: update GC for chunked suffix
Browse files Browse the repository at this point in the history
  • Loading branch information
metanivek committed Nov 1, 2022
1 parent ad270b8 commit 6313cfc
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 22 deletions.
14 changes: 11 additions & 3 deletions src/irmin-pack/unix/gc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -108,18 +108,24 @@ module Make (Args : Gc_args.S) = struct
latest_gc_target_offset;
}

let swap_and_purge t suffix =
let swap_and_purge t removable_chunk_num suffix =
let open Result_syntax in
let { generation; latest_gc_target_offset; _ } = t in
let Worker.
{
start_offset = suffix_start_offset;
chunk_start_idx;
chunk_num;
dead_bytes = suffix_dead_bytes;
} =
suffix
in
(* Calculate chunk num in main process since more chunks could have been
added while GC was running. GC process only tells us how many chunks are
to be removed. *)
let suffix = Fm.suffix t.fm in
let chunk_num = Fm.Suffix.chunk_num suffix - removable_chunk_num in
assert (chunk_num >= 1);

let* () =
Fm.swap t.fm ~generation ~suffix_start_offset ~chunk_start_idx ~chunk_num
~suffix_dead_bytes ~latest_gc_target_offset
Expand Down Expand Up @@ -232,7 +238,9 @@ module Make (Args : Gc_args.S) = struct
Gc_stats.Main.finish_current_step partial_stats
"swap and purge"
in
let* () = swap_and_purge t suffix in
let* () =
swap_and_purge t (List.length removable_chunk_idxs) suffix
in
let partial_stats =
Gc_stats.Main.finish_current_step partial_stats "unlink"
in
Expand Down
32 changes: 14 additions & 18 deletions src/irmin-pack/unix/gc_worker.ml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ module Make (Args : Gc_args.S) = struct
type new_suffix = {
start_offset : int63;
chunk_start_idx : int;
chunk_num : int;
dead_bytes : int63;
}
[@@deriving irmin]
Expand Down Expand Up @@ -289,7 +288,7 @@ module Make (Args : Gc_args.S) = struct
let open struct
type chunk = { idx : int; end_suffix_off : int63 }
end in
let chunks_gced =
let removable_chunks =
match Fm.Suffix.chunk_num suffix with
| 1 -> [] (* We never remove a single chunk. *)
| _ ->
Expand All @@ -311,37 +310,34 @@ module Make (Args : Gc_args.S) = struct
in
(* Step 6.2. Calculate the new chunk starting idx. *)
let chunk_start_idx =
match chunks_gced with
match removable_chunks with
| [] -> Fm.Suffix.start_idx suffix
| chunks ->
(* [chunks] is in descending order, so the new starting idx
is the hd + 1. *)
let { idx; _ } = chunks |> List.hd in
succ idx
| last_removed_chunk :: _ -> succ last_removed_chunk.idx
in
(* Step 6.3. Calculate new dead bytes at the beginning of the suffix. *)
let suffix_dead_bytes =
match chunks_gced with
match removable_chunks with
(* If no chunks are GCed, the dead bytes are equivalent to the physical
offset of new suffix offset. *)
| [] -> Dispatcher.soff_of_offset dispatcher new_suffix_start_offset
(* Otherwise, it is the difference between the last chunk removed's end offset
and the new start offset. *)
| _ ->
Int63.Syntax.(
new_suffix_start_offset - (List.hd chunks_gced).end_suffix_off)
| last_removed_chunk :: _ ->
let removed_end_offset =
last_removed_chunk.end_suffix_off
|> Dispatcher.offset_of_soff dispatcher
in
Int63.Syntax.(new_suffix_start_offset - removed_end_offset)
in
(* Step 6.4. Calculate the new number of chunks. *)
let chunk_num = Fm.Suffix.chunk_num suffix - List.length chunks_gced in
(* Step 6.5. Assertions and record construction. *)
(* Step 6.4. Assertions and record construction. *)
assert (Int63.Syntax.(suffix_dead_bytes >= Int63.zero));
assert (chunk_num >= 1);
let removable_chunk_idxs = chunks_gced |> List.map (fun c -> c.idx) in
let removable_chunk_idxs =
removable_chunks |> List.map (fun c -> c.idx)
in
( {
start_offset = new_suffix_start_offset;
dead_bytes = suffix_dead_bytes;
chunk_start_idx;
chunk_num;
},
removable_chunk_idxs )
in
Expand Down
1 change: 0 additions & 1 deletion src/irmin-pack/unix/gc_worker.mli
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ module Make (Args : Gc_args.S) : sig
type new_suffix = {
start_offset : int63;
chunk_start_idx : int;
chunk_num : int;
dead_bytes : int63;
}
[@@deriving irmin]
Expand Down
33 changes: 33 additions & 0 deletions test/irmin-pack/test_gc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,38 @@ module Split = struct
let* () = check_4 t c4 in
S.Repo.close t.repo

let multi_split_and_gc () =
(* This test primarily checks that dead byte calculation
happens correctly by testing GCs on chunks past the first
one. When the calculation is incorrect, exceptions are thrown
when attempting to lookup keys in the store. *)
let* t = init () in
let* t, c1 = commit_1 t in
let () = S.split t.repo in

let* t = checkout_exn t c1 in
let* t, c2 = commit_2 t in

let () = S.split t.repo in
let* () = start_gc t c1 in
let* () = finalise_gc t in

let* t = checkout_exn t c2 in
let* t, c3 = commit_3 t in

let () = S.split t.repo in
let* () = start_gc t c2 in
let* () = finalise_gc t in

let* t = checkout_exn t c3 in
let* t, c4 = commit_4 t in

let* () = check_not_found t c1 "removed c1" in
let* () = check_2 t c2 in
let* () = check_3 t c3 in
let* () = check_4 t c4 in
S.Repo.close t.repo

let split_and_gc () =
let* t = init () in
let* t, c1 = commit_1 t in
Expand Down Expand Up @@ -1068,6 +1100,7 @@ module Split = struct
tc "Test split and close" close_and_split;
tc "Test two gc followed by split" two_gc_then_split;
tc "Test split and GC" split_and_gc;
tc "Test multi split and GC" multi_split_and_gc;
tc "Test another split and GC" another_split_and_gc;
tc "Test split during GC" split_during_gc;
]
Expand Down

0 comments on commit 6313cfc

Please sign in to comment.