From c5fcfa9935c1ba95647ac56cc3531abdd999ab38 Mon Sep 17 00:00:00 2001 From: Emile Trotignon Date: Tue, 22 Oct 2024 15:34:44 +0200 Subject: [PATCH] Add unit tests to suffix tree and discutable bugfixes --- index/index.ml | 4 ++ index/index.mli | 4 ++ index/suffix_tree.ml | 2 +- query/priority_queue.ml | 7 +- query/test/dune | 2 +- query/test/test.ml | 7 +- query/test/test_suffix_tree.ml | 127 +++++++++++++++++++++++++++++++++ 7 files changed, 148 insertions(+), 5 deletions(-) create mode 100644 query/test/test_suffix_tree.ml diff --git a/index/index.ml b/index/index.ml index 10c017b6..1ef0dccb 100644 --- a/index/index.ml +++ b/index/index.ml @@ -122,3 +122,7 @@ let term = $ index_name $ type_search $ favoured_prefixes) + +module Private = struct + module Suffix_tree = Suffix_tree +end diff --git a/index/index.mli b/index/index.mli index fae8900e..06a369a6 100644 --- a/index/index.mli +++ b/index/index.mli @@ -1 +1,5 @@ val term : (Db_store.db_format -> string -> unit) Cmdliner.Term.t + +module Private : sig + module Suffix_tree = Suffix_tree +end diff --git a/index/suffix_tree.ml b/index/suffix_tree.ml index 7e96c8d6..7dadb47e 100644 --- a/index/suffix_tree.ml +++ b/index/suffix_tree.ml @@ -389,7 +389,7 @@ let rec export ~cache ~cache_term ~summarize ~is_root node = then min_child, min_child :: terminals else min_terminal, terminals in - assert (min_child == Seen.min_elt seen) ; + assert (Entry.equal min_child (Seen.min_elt seen)) ; assert (terminals <> []) ; let terminals_uid, terminals = export_terminals ~cache_term ~is_summary terminals in let children_uids = List.map (fun (chr, { uid; _ }) -> chr, uid) children in diff --git a/query/priority_queue.ml b/query/priority_queue.ml index 6c38416a..0d1a7676 100644 --- a/query/priority_queue.ml +++ b/query/priority_queue.ml @@ -23,8 +23,11 @@ let minimum = function let of_sorted_array arr = Array (0, arr) let of_automata s = - let elt = String_automata.minimum s in - All (elt, s) + if String_automata.size s = 0 + then Empty + else ( + let elt = String_automata.minimum s in + All (elt, s)) let of_list lst = let lst = List.filter (( <> ) Empty) lst in diff --git a/query/test/dune b/query/test/dune index 46d89d81..59295843 100644 --- a/query/test/dune +++ b/query/test/dune @@ -1,3 +1,3 @@ (test (name test) - (libraries alcotest query)) + (libraries alcotest query index)) diff --git a/query/test/test.ml b/query/test/test.ml index 11fcf5cf..d2928537 100644 --- a/query/test/test.ml +++ b/query/test/test.ml @@ -1,3 +1,8 @@ let () = let open Alcotest in - run "Query" [ "Succ", Test_succ.tests_to_seq; "Type_parser", Test_type_parser.tests ] + run + "Query" + [ "Succ", Test_succ.tests_to_seq + ; "Type_parser", Test_type_parser.tests + ; "Suffix_tree", Test_suffix_tree.tests + ] diff --git a/query/test/test_suffix_tree.ml b/query/test/test_suffix_tree.ml new file mode 100644 index 00000000..333e36e7 --- /dev/null +++ b/query/test/test_suffix_tree.ml @@ -0,0 +1,127 @@ +open Index.Private +open Query.Private + +let knuth_morris_pratt str ~sub = + let sublen = String.length sub in + if sublen = 0 + then true + else ( + let len = String.length str in + List.init len (fun i -> + if i + sublen > len then false else String.sub str i sublen = sub) + |> List.exists Fun.id) + +let pkg = Db.Entry.Package.v ~name:"" ~version:"" + +let elt cost = + Db.Entry.v ~cost ~name:"" ~kind:Db.Entry.Kind.Doc ~rhs:None ~doc_html:"" ~url:"" ~pkg () + +let entry_of_int int = + Db.Entry.v + ~name:"" + ~kind:Db.Entry.Kind.Module + ~rhs:None + ~doc_html:"" + ~url:"" + ~pkg:(Db.Entry.Package.v ~name:"" ~version:"") + ~cost:int + () + +let int_of_entry entry = Db.Entry.(entry.cost) + +(** This module does the same thing as Succ, but its correctness is obvious + and its performance terrible. *) +module Reference = struct + type t' = (string * int) list + type t = t' ref + + let make () = ref [] + let add t id entry = t := (id, entry) :: !t + let export t = !t + + let find li request = + li + |> List.filter_map (fun (id, v) -> + if knuth_morris_pratt ~sub:request id then Some v else None) + |> List.sort_uniq Int.compare +end + +module Real = struct + type t = Suffix_tree.t + type t' = Db.String_automata.t + + let make () = Suffix_tree.(make (Buf.make ())) + + let add t id v = + let entry = entry_of_int v in + Suffix_tree.add_suffixes t id entry + + let export = Suffix_tree.export ~summarize:false + + let find s req = + match Db.String_automata.find s req with + | None -> [] + | Some automata -> + automata + |> Succ.of_automata + |> Succ.to_seq + |> List.of_seq + |> List.map int_of_entry + |> List.sort_uniq Int.compare +end + +module Both = struct + let make () = Reference.make (), Real.make () + + let add (ref, real) id v = + Reference.add ref id v ; + Real.add real id v + + let export (ref, real) = Reference.export ref, Real.export real + let find (ref, real) req = Reference.find ref req, Real.find real req +end + +let random_string ?size () = + let size = + match size with + | None -> Random.int 64 + | Some size -> size + in + String.init size (fun _ -> + let int = Random.int (122 - 97) + 97 in + Char.chr int) + +let random_suffix_tree size = + let suffixes = Both.make () in + for i = 1 to size do + let id = random_string () in + let v = Random.int size in + Printf.printf "%S -> %i\n" id v ; + Both.add suffixes id v + done ; + suffixes + +let test_find tree req = + let tree = Both.export tree in + let ref, real = Both.find tree req in + Alcotest.(check (list int)) "same int list" ref real + +let test_random_requests tree = + let tree = Both.export tree in + for i = 0 to 64 do + let req = random_string ~size:i () in + Printf.printf "req : %S\n" req ; + let ref, real = Both.find tree req in + Alcotest.(check (list int)) "same int list" ref real + done + +let test_random_size size () = + let tree = random_suffix_tree size in + test_random_requests tree + +let tests = + List.init 100 (fun i -> + Alcotest.test_case + (Printf.sprintf "Suffix_tree size %i" i) + `Quick + (test_random_size i))