diff --git a/CHANGES.md b/CHANGES.md index 520052d906..053f097513 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -43,6 +43,7 @@ - Unhandled exceptions in GC worker process are now reported as a failure (#2163, @metanivek) - Fix the silent mode for the integrity checks. (#2179, @icristescu) + - Fix file descriptor leak caused by `mmap`. (#2232, @art-w) ## 3.6.1 (2023-03-15) diff --git a/src/irmin-pack/unix/control_file.ml b/src/irmin-pack/unix/control_file.ml index 47ded57013..289712505d 100644 --- a/src/irmin-pack/unix/control_file.ml +++ b/src/irmin-pack/unix/control_file.ml @@ -180,6 +180,7 @@ module Serde = struct generation = x.generation; latest_gc_target_offset = x.suffix_start_offset; suffix_dead_bytes = Int63.zero; + mapping_end_poff = None; } | T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14 | T15 -> @@ -200,6 +201,25 @@ module Serde = struct volume_num = 0; } + let upgrade_status_from_v4 = function + | Payload.Upper.V4.From_v1_v2_post_upgrade x -> + Latest.From_v1_v2_post_upgrade x + | No_gc_yet -> No_gc_yet + | Used_non_minimal_indexing_strategy -> Used_non_minimal_indexing_strategy + | Gced x -> + Gced + { + suffix_start_offset = x.suffix_start_offset; + generation = x.generation; + latest_gc_target_offset = x.latest_gc_target_offset; + suffix_dead_bytes = x.suffix_dead_bytes; + mapping_end_poff = None; + } + | T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14 + | T15 -> + (* Unreachable *) + assert false + let upgrade_from_v4 (pl : Payload.Upper.V4.t) : payload = { dict_end_poff = pl.dict_end_poff; @@ -207,7 +227,7 @@ module Serde = struct checksum = Int63.zero; chunk_start_idx = pl.chunk_start_idx; chunk_num = pl.chunk_num; - status = pl.status; + status = upgrade_status_from_v4 pl.status; upgraded_from = Some (Version.to_int `V4); volume_num = 0; } diff --git a/src/irmin-pack/unix/control_file_intf.ml b/src/irmin-pack/unix/control_file_intf.ml index 98642292c0..7ce02130e2 100644 --- a/src/irmin-pack/unix/control_file_intf.ml +++ b/src/irmin-pack/unix/control_file_intf.ml @@ -193,15 +193,16 @@ module Payload = struct end module V5 = struct - type gced = V4.gced = { + type gced = { suffix_start_offset : int63; generation : int; latest_gc_target_offset : int63; suffix_dead_bytes : int63; + mapping_end_poff : int63 option; } [@@deriving irmin] - type status = V4.status = + type status = | From_v1_v2_post_upgrade of V3.from_v1_v2_post_upgrade | No_gc_yet | Used_non_minimal_indexing_strategy @@ -239,6 +240,8 @@ module Payload = struct New fields - [volume_num] stores the number of volumes in the lower layer. + - [mapping_end_poff] stores the mapping file size (optional if missing + or unknown after a migration from V4). Changed fields diff --git a/src/irmin-pack/unix/file_manager.ml b/src/irmin-pack/unix/file_manager.ml index 6777ae94f4..6c4fdc786f 100644 --- a/src/irmin-pack/unix/file_manager.ml +++ b/src/irmin-pack/unix/file_manager.ml @@ -77,15 +77,13 @@ struct let register_suffix_consumer t ~after_flush = t.suffix_consumers <- { after_flush } :: t.suffix_consumers - let generation = function - | Payload.From_v1_v2_post_upgrade _ | Used_non_minimal_indexing_strategy - | No_gc_yet -> - 0 - | T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14 - | T15 -> - (* Unreachable *) - assert false - | Gced x -> x.generation + let get_gced = function Payload.Gced x -> Some x | _ -> None + + let generation payload = + match get_gced payload with Some x -> x.generation | None -> 0 + + let mapping_size payload = + match get_gced payload with Some x -> x.mapping_end_poff | None -> None let notify_reload_consumers consumers = List.fold_left @@ -215,20 +213,24 @@ struct module Layout = Irmin_pack.Layout.V5 - let open_prefix ~root ~generation = + let open_prefix ~root ~generation ~mapping_size = let open Result_syntax in if generation = 0 then Ok None else let mapping = Layout.mapping ~generation ~root in let data = Layout.prefix ~root ~generation in - let* mapping_size = Io.size_of_path mapping in + let* mapping_size = + match mapping_size with + | Some size -> Ok size + | None -> Io.size_of_path mapping + in let mapping_size = Int63.to_int mapping_size in let+ prefix = Sparse.open_ro ~mapping_size ~mapping ~data in Some prefix - let reopen_prefix t ~generation = + let reopen_prefix t ~generation ~mapping_size = let open Result_syntax in - let* some_prefix = open_prefix ~root:t.root ~generation in + let* some_prefix = open_prefix ~root:t.root ~generation ~mapping_size in match some_prefix with | None -> Ok () | Some _ -> @@ -312,12 +314,12 @@ struct let use_fsync = Irmin_pack.Conf.use_fsync config in let indexing_strategy = Conf.indexing_strategy config in let pl : Payload.t = Control.payload control in - let generation = + let generation, mapping_size = match pl.status with | From_v1_v2_post_upgrade _ | No_gc_yet | Used_non_minimal_indexing_strategy -> - 0 - | Gced x -> x.generation + (0, None) + | Gced x -> (x.generation, x.mapping_end_poff) | T1 | T2 | T3 | T4 | T5 | T6 | T7 | T8 | T9 | T10 | T11 | T12 | T13 | T14 | T15 -> assert false @@ -350,7 +352,7 @@ struct let cb _ = suffix_requires_a_flush_exn (get_instance ()) in make_suffix ~auto_flush_threshold ~auto_flush_procedure:(`External cb) in - let* prefix = open_prefix ~root ~generation in + let* prefix = open_prefix ~root ~generation ~mapping_size in let* dict = let path = Layout.dict ~root in let auto_flush_threshold = @@ -437,7 +439,10 @@ struct in (* Step 3.2. Potentially reload prefix *) let* () = - if gen0 = gen1 then Ok () else reopen_prefix t ~generation:gen1 + if gen0 = gen1 then Ok () + else + reopen_prefix t ~generation:gen1 + ~mapping_size:(mapping_size pl1.status) in (* Step 3.3. Potentially reload lower *) if gen0 = gen1 && pl0.volume_num = pl1.volume_num then Ok () @@ -577,6 +582,7 @@ struct generation; latest_gc_target_offset = Int63.zero; suffix_dead_bytes = Int63.zero; + mapping_end_poff = Some Int63.zero; }; } in @@ -768,7 +774,9 @@ struct Suffix.open_ro ~root ~appendable_chunk_poff ~start_idx ~chunk_num ~dead_header_size in - let* prefix = open_prefix ~root ~generation in + let* prefix = + open_prefix ~root ~generation ~mapping_size:(mapping_size status) + in let* dict = let path = Layout.dict ~root in Dict.open_ro ~path ~end_poff:dict_end_poff ~dead_header_size @@ -833,8 +841,8 @@ struct | `Unknown_major_pack_version _ ) as e -> e) - let swap t ~generation ~suffix_start_offset ~chunk_start_idx ~chunk_num - ~suffix_dead_bytes ~latest_gc_target_offset ~volume = + let swap t ~generation ~mapping_size ~suffix_start_offset ~chunk_start_idx + ~chunk_num ~suffix_dead_bytes ~latest_gc_target_offset ~volume = let open Result_syntax in [%log.debug "Gc in main: swap gen %d; suffix start %a; chunk start idx %d; chunk num \ @@ -845,7 +853,8 @@ struct let pl = Control.payload t.control in (* Step 1. Reopen files *) - let* () = reopen_prefix t ~generation in + let mapping_size = Some mapping_size in + let* () = reopen_prefix t ~generation ~mapping_size in let* () = reopen_suffix t ~chunk_start_idx ~chunk_num ~appendable_chunk_poff:pl.appendable_chunk_poff @@ -870,6 +879,7 @@ struct generation; latest_gc_target_offset; suffix_dead_bytes; + mapping_end_poff = mapping_size; } in @@ -975,8 +985,7 @@ struct let lower = t.lower in cleanup ~root ~generation ~chunk_start_idx ~chunk_num ~lower - let create_one_commit_store t config ~generation ~latest_gc_target_offset - ~suffix_start_offset commit_key = + let create_one_commit_store t config gced commit_key = let open Result_syntax in let src_root = t.root in let dst_root = Irmin_pack.Conf.root config in @@ -992,15 +1001,7 @@ struct in let* () = Suffix.close suffix in (* Step 3. Create the control file and close it. *) - let status = - Payload.Gced - { - suffix_start_offset; - generation; - latest_gc_target_offset; - suffix_dead_bytes = Int63.zero; - } - in + let status = Payload.Gced gced in let dict_end_poff = Io.size_of_path dst_dict |> Errs.raise_if_error in let pl = { diff --git a/src/irmin-pack/unix/file_manager_intf.ml b/src/irmin-pack/unix/file_manager_intf.ml index d29705ae9f..a8f2700703 100644 --- a/src/irmin-pack/unix/file_manager_intf.ml +++ b/src/irmin-pack/unix/file_manager_intf.ml @@ -263,6 +263,7 @@ module type S = sig val swap : t -> generation:int -> + mapping_size:int63 -> suffix_start_offset:int63 -> chunk_start_idx:int -> chunk_num:int -> @@ -292,9 +293,7 @@ module type S = sig val create_one_commit_store : t -> Irmin.Backend.Conf.t -> - generation:int -> - latest_gc_target_offset:int63 -> - suffix_start_offset:int63 -> + Control_file.Payload.Upper.Latest.gced -> Index.key Pack_key.t -> (unit, [> open_rw_error | close_error ]) result (** [create_one_commit_store t conf generation new_store_root key] is called diff --git a/src/irmin-pack/unix/gc.ml b/src/irmin-pack/unix/gc.ml index 4854123799..22e2cd8beb 100644 --- a/src/irmin-pack/unix/gc.ml +++ b/src/irmin-pack/unix/gc.ml @@ -282,9 +282,14 @@ module Make (Args : Gc_args.S) = struct match (status, gc_output) with | `Success, Ok gc_results -> Lwt.return - ( t.latest_gc_target_offset, - t.new_suffix_start_offset, - gc_results.mapping_size ) + { + Control_file_intf.Payload.Upper.Latest.generation = + Fm.generation t.fm + 1; + latest_gc_target_offset = t.latest_gc_target_offset; + suffix_start_offset = t.new_suffix_start_offset; + suffix_dead_bytes = Int63.zero; + mapping_end_poff = Some gc_results.mapping_size; + } | _ -> let r = gc_errors status gc_output |> Errs.raise_if_error in Lwt.return r diff --git a/src/irmin-pack/unix/gc.mli b/src/irmin-pack/unix/gc.mli index 2a47642eb5..d35177f335 100644 --- a/src/irmin-pack/unix/gc.mli +++ b/src/irmin-pack/unix/gc.mli @@ -54,11 +54,10 @@ module Make (Args : Gc_args.S) : sig val cancel : t -> bool - val finalise_without_swap : t -> (int63 * int63) Lwt.t + val finalise_without_swap : + t -> Control_file_intf.Payload.Upper.Latest.gced Lwt.t (** Waits for the current gc to finish and returns immediately without swapping the files and doing the other finalisation steps from [finalise]. - - It returns the [latest_gc_target_offset] and the - [new_suffix_start_offset]. *) + Returns the [gced] status to create a fresh control file for the snapshot. *) end with module Args = Args diff --git a/src/irmin-pack/unix/gc_worker.ml b/src/irmin-pack/unix/gc_worker.ml index 4315eda120..7b8521959e 100644 --- a/src/irmin-pack/unix/gc_worker.ml +++ b/src/irmin-pack/unix/gc_worker.ml @@ -282,7 +282,7 @@ module Make (Args : Gc_args.S) = struct Live.to_list live_entries in - let () = + let mapping_size = (* Step 4. Create the new prefix. *) stats := Gc_stats.Worker.finish_current_step !stats "prefix: start"; let mapping = @@ -294,9 +294,9 @@ module Make (Args : Gc_args.S) = struct (* Step 5. Transfer to the new prefix, flush and close. *) [%log.debug "GC: transfering to the new prefix"]; stats := Gc_stats.Worker.finish_current_step !stats "prefix: transfer"; - Errors.finalise_exn (fun outcome -> + Errors.finalise_exn (fun _ -> Sparse.Ao.flush prefix - >>= (fun _ -> Sparse.Ao.close prefix >>= fun _ -> Ok outcome) + >>= (fun _ -> Sparse.Ao.close prefix) |> Errs.log_if_error "GC: Close prefix after data copy") @@ fun () -> (* Step 5.1. Transfer all. *) @@ -308,24 +308,27 @@ module Make (Args : Gc_args.S) = struct live_entries; Int63.to_int (Sparse.Ao.mapping_size prefix) in - (* Step 5.2. Update the parent commits to be dangling: reopen the new - prefix, this time in write-only as we have to - modify data inside the file. *) - stats := - Gc_stats.Worker.finish_current_step !stats - "prefix: rewrite commit parents"; - let prefix = - Sparse.Wo.open_wo ~mapping_size ~mapping ~data |> Errs.raise_if_error + let () = + (* Step 5.2. Update the parent commits to be dangling: reopen the new + prefix, this time in write-only as we have to + modify data inside the file. *) + stats := + Gc_stats.Worker.finish_current_step !stats + "prefix: rewrite commit parents"; + let prefix = + Sparse.Wo.open_wo ~mapping_size ~mapping ~data |> Errs.raise_if_error + in + Errors.finalise_exn (fun _outcome -> + Sparse.Wo.fsync prefix + >>= (fun _ -> Sparse.Wo.close prefix) + |> Errs.log_if_error "GC: Close prefix after parent rewrite") + @@ fun () -> + let write_exn = Sparse.Wo.write_exn prefix in + List.iter + (fun key -> transfer_parent_commit_exn ~write_exn key) + (Commit_value.parents commit) in - Errors.finalise_exn (fun _outcome -> - Sparse.Wo.fsync prefix - >>= (fun _ -> Sparse.Wo.close prefix) - |> Errs.log_if_error "GC: Close prefix after parent rewrite") - @@ fun () -> - let write_exn = Sparse.Wo.write_exn prefix in - List.iter - (fun key -> transfer_parent_commit_exn ~write_exn key) - (Commit_value.parents commit) + Int63.of_int mapping_size in let () = report_new_file_sizes ~root ~generation stats |> ignore in diff --git a/src/irmin-pack/unix/store.ml b/src/irmin-pack/unix/store.ml index 56c5bc857f..73706e54c9 100644 --- a/src/irmin-pack/unix/store.ml +++ b/src/irmin-pack/unix/store.ml @@ -357,16 +357,14 @@ module Maker (Config : Conf.S) = struct let () = if not launched then Errs.raise_error `Forbidden_during_gc in - let* latest_gc_target_offset, suffix_start_offset = + let* gced = match t.running_gc with | None -> assert false | Some { gc; _ } -> Gc.finalise_without_swap gc in - let generation = File_manager.generation t.fm + 1 in let config = Irmin.Backend.Conf.add t.config Conf.Key.root path in let () = - File_manager.create_one_commit_store t.fm config ~generation - ~latest_gc_target_offset ~suffix_start_offset commit_key + File_manager.create_one_commit_store t.fm config gced commit_key |> Errs.raise_if_error in let branch_path = Irmin_pack.Layout.V4.branch ~root:path in