Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve regressions #159

Merged
merged 1 commit into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 42 additions & 36 deletions examples/regressions.ml
Original file line number Diff line number Diff line change
@@ -1,51 +1,57 @@
(* This test that unicode_old.ml is a strict sub-set of
* new unicode.ml. *)
(* This test that unicode_old.ml is a strict sub-set of new unicode.ml. *)

let test_versions = ("14.0.0", "15.0.0")
let regressions = []
let interval s e = Array.to_list (Array.init (e - s) (fun pos -> s + pos))
module CSet = Sedlex_ppx.Sedlex_cset
module Unicode = Sedlex_ppx.Unicode

exception Found
let test_versions = ("14.0.0", "15.0.0")

let test_exception name x =
try
let l = List.assoc name regressions in
List.iter (fun (s, e) -> if s <= x && x <= e then raise Found) l
with Not_found -> ()
let regressions =
[ (* Example *)
(* ("lt", CSet.union (CSet.singleton 0x1c5) (CSet.singleton (0x0001))) *) ]

let compare name (old_l : (int * int) list) (new_l : Sedlex_ppx.Sedlex_cset.t) =
let new_l = (new_l :> (int * int) list) in
let code_points =
List.fold_left (fun res (s, e) -> res @ interval s e) [] old_l
let compare name (old_ : CSet.t) (new_ : CSet.t) =
let diff = CSet.difference old_ new_ in
let regressions =
match List.assoc name regressions with
| exception Not_found -> CSet.empty
| x -> x
in
let test x =
try
test_exception name x;
List.iter (fun (s, e) -> if s <= x && x <= e then raise Found) new_l;
false
with Found -> true
in
List.iter
let regressions_intersect = CSet.intersection regressions old_ in
let regressions = CSet.difference regressions regressions_intersect in
let regressions_useless = CSet.difference regressions new_ in
let diff = CSet.difference diff regressions in
Seq.iter
(fun x ->
Printf.printf
"Invalid regression for 0x%x in %s: already present in old set.\n" x
name)
(CSet.to_seq regressions_intersect);
Seq.iter
(fun x ->
if not (test x) then
Printf.printf "Code point 0x%x missing in %s!\n" x name)
code_points
Printf.printf "Invalid regression for 0x%x in %s: absent in new set.\n" x
name)
(CSet.to_seq regressions_useless);
Seq.iter
(fun x -> Printf.printf "Code point 0x%x missing in %s!\n" x name)
(CSet.to_seq diff)

let test new_l (name, old_l) =
(* Cn is for unassigned code points, which are allowed to be
* used in future version. *)
if name <> "cn" then compare name old_l (List.assoc name new_l)
if name <> "cn" then (
let old_l =
List.fold_left
(fun acc (a, b) -> CSet.union acc (CSet.interval a b))
CSet.empty old_l
in
compare name old_l (List.assoc name new_l))

let () =
if (Unicode_old.version, Sedlex_ppx.Unicode.version) <> test_versions then
if (Unicode_old.version, Unicode.version) <> test_versions then
failwith
(Printf.sprintf "Test written for versions: %s => %s\n%!"
Unicode_old.version Sedlex_ppx.Unicode.version);
Unicode_old.version Unicode.version);
Printf.printf "Testing Unicode regression: %s => %s\n%!" Unicode_old.version
Sedlex_ppx.Unicode.version;
List.iter
(test Sedlex_ppx.Unicode.Categories.list)
Unicode_old.Categories.list;
List.iter
(test Sedlex_ppx.Unicode.Properties.list)
Unicode_old.Properties.list
Unicode.version;
List.iter (test Unicode.Categories.list) Unicode_old.Categories.list;
List.iter (test Unicode.Properties.list) Unicode_old.Properties.list
6 changes: 6 additions & 0 deletions src/common/cset.ml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@

type t = (int * int) list

let rec range_to_seq a b next () =
if a = b then Seq.Cons (a, next) else Seq.Cons (a, range_to_seq (a + 1) b next)

let rec to_seq x () =
match x with [] -> Seq.Nil | (a, b) :: xs -> range_to_seq a b (to_seq xs) ()

let check_invariant l =
let rec loop prev = function
| [] -> ()
Expand Down
1 change: 1 addition & 0 deletions src/common/cset.mli
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ val is_empty : t -> bool
val eof : t
val singleton : int -> t
val interval : int -> int -> t
val to_seq : t -> int Seq.t
Loading