Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add unit tests to suffix tree and discutable bugfixes
Browse files Browse the repository at this point in the history
EmileTrotignon committed Oct 22, 2024
1 parent 0357233 commit 4c4f796
Showing 7 changed files with 148 additions and 5 deletions.
4 changes: 4 additions & 0 deletions index/index.ml
Original file line number Diff line number Diff line change
@@ -151,3 +151,7 @@ let term =
$ index_name
$ type_search
$ favoured_prefixes)

module Private = struct
module Suffix_tree = Suffix_tree
end
4 changes: 4 additions & 0 deletions index/index.mli
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
val term : (Db_store.db_format -> string -> unit) Cmdliner.Term.t

module Private : sig
module Suffix_tree = Suffix_tree
end
2 changes: 1 addition & 1 deletion index/suffix_tree.ml
Original file line number Diff line number Diff line change
@@ -389,7 +389,7 @@ let rec export ~cache ~cache_term ~summarize ~is_root node =
then min_child, min_child :: terminals
else min_terminal, terminals
in
assert (min_child == Seen.min_elt seen) ;
assert (Entry.equal min_child (Seen.min_elt seen)) ;
assert (terminals <> []) ;
let terminals_uid, terminals = export_terminals ~cache_term ~is_summary terminals in
let children_uids = List.map (fun (chr, { uid; _ }) -> chr, uid) children in
7 changes: 5 additions & 2 deletions query/priority_queue.ml
Original file line number Diff line number Diff line change
@@ -23,8 +23,11 @@ let minimum = function
let of_sorted_array arr = Array (0, arr)

let of_automata s =
let elt = String_automata.minimum s in
All (elt, s)
if String_automata.size s = 0
then Empty
else (
let elt = String_automata.minimum s in
All (elt, s))

let of_list lst =
let lst = List.filter (( <> ) Empty) lst in
2 changes: 1 addition & 1 deletion query/test/dune
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
(test
(name test)
(libraries alcotest query))
(libraries alcotest query index))
7 changes: 6 additions & 1 deletion query/test/test.ml
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
let () =
let open Alcotest in
run "Query" [ "Succ", Test_succ.tests_to_seq; "Type_parser", Test_type_parser.tests ]
run
"Query"
[ "Succ", Test_succ.tests_to_seq
; "Type_parser", Test_type_parser.tests
; "Suffix_tree", Test_suffix_tree.tests
]
127 changes: 127 additions & 0 deletions query/test/test_suffix_tree.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
open Index.Private
open Query.Private

let knuth_morris_pratt str ~sub =
let sublen = String.length sub in
if sublen = 0
then true
else (
let len = String.length str in
List.init len (fun i ->
if i + sublen > len then false else String.sub str i sublen = sub)
|> List.exists Fun.id)

let pkg = Db.Entry.Package.v ~name:"" ~version:""

let elt cost =
Db.Entry.v ~cost ~name:"" ~kind:Db.Entry.Kind.Doc ~rhs:None ~doc_html:"" ~url:"" ~pkg ()

let entry_of_int int =
Db.Entry.v
~name:""
~kind:Db.Entry.Kind.Module
~rhs:None
~doc_html:""
~url:""
~pkg:(Db.Entry.Package.v ~name:"" ~version:"")
~cost:int
()

let int_of_entry entry = Db.Entry.(entry.cost)

(** This module does the same thing as Succ, but its correctness is obvious
and its performance terrible. *)
module Reference = struct
type t' = (string * int) list
type t = t' ref

let make () = ref []
let add t id entry = t := (id, entry) :: !t
let export t = !t

let find li request =
li
|> List.filter_map (fun (id, v) ->
if knuth_morris_pratt ~sub:request id then Some v else None)
|> List.sort_uniq Int.compare
end

module Real = struct
type t = Suffix_tree.t
type t' = Db.String_automata.t

let make () = Suffix_tree.(make (Buf.make ()))

let add t id v =
let entry = entry_of_int v in
Suffix_tree.add_suffixes t id entry

let export = Suffix_tree.export ~summarize:false

let find s req =
match Db.String_automata.find s req with
| None -> []
| Some automata ->
automata
|> Succ.of_automata
|> Succ.to_seq
|> List.of_seq
|> List.map int_of_entry
|> List.sort_uniq Int.compare
end

module Both = struct
let make () = Reference.make (), Real.make ()

let add (ref, real) id v =
Reference.add ref id v ;
Real.add real id v

let export (ref, real) = Reference.export ref, Real.export real
let find (ref, real) req = Reference.find ref req, Real.find real req
end

let random_string ?size () =
let size =
match size with
| None -> Random.int 64
| Some size -> size
in
String.init size (fun _ ->
let int = Random.int (122 - 97) + 97 in
Char.chr int)

let random_suffix_tree size =
let suffixes = Both.make () in
for i = 1 to size do
let id = random_string () in
let v = Random.int size in
Printf.printf "%S -> %i\n" id v ;
Both.add suffixes id v
done ;
suffixes

let test_find tree req =
let tree = Both.export tree in
let ref, real = Both.find tree req in
Alcotest.(check (list int)) "same int list" ref real

let test_random_requests tree =
let tree = Both.export tree in
for i = 0 to 64 do
let req = random_string ~size:i () in
Printf.printf "req : %S\n" req ;
let ref, real = Both.find tree req in
Alcotest.(check (list int)) "same int list" ref real
done

let test_random_size size () =
let tree = random_suffix_tree size in
test_random_requests tree

let tests =
List.init 100 (fun i ->
Alcotest.test_case
(Printf.sprintf "Suffix_tree size %i" i)
`Quick
(test_random_size i))

0 comments on commit 4c4f796

Please sign in to comment.