diff --git a/CHANGELOG.md b/CHANGELOG.md index 496dd83..0194fa7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,11 @@ -# v0.10.0 - Unreleased +# v0.11.0 - Unreleased + +- Move all signatures into a new `Sigs` module, also move the `cmp` and `snd` types + into `Sigs`. This is due to breaking up the single file into multiple modules. + Simply using includes would break the documentation + (see: https://github.com/ocaml/odoc/issues/1162) + +# v0.10.0 - 2024-06-01 ## Main changes diff --git a/README.md b/README.md index e94d95e..e961bcf 100644 --- a/README.md +++ b/README.md @@ -307,7 +307,7 @@ let map2 = ## Release status This should be close to a stable release. It is already being -used as part of a larger project successfully, and this usage as helped us mature +used as part of a [larger project](https://codex.top) successfully, and this usage as helped us mature the interface. As is, we believe the project is usable, and we don't anticipate any major change before 1.0.0. We didn't commit to a stable release straight away as we would like a bit more time using this library before doing so. @@ -322,7 +322,7 @@ type (_, 'b) snd = Snd of 'b [@@unboxed] It should not incur any extra performance cost as it is unboxed, but can appear when manipulating non-generic maps. -For more details about this issue, see [the OCaml discourse discussion](https://discuss.ocaml.org/t/weird-behaviors-with-first-order-polymorphism/13783). +For more details about this issue, see [the OCaml discourse discussion](https://discuss.ocaml.org/t/weird-behaviors-with-first-order-polymorphism/13783) or [the github issue](https://github.com/ocaml/ocaml/issues/13292). ## Comparison to other OCaml libraries diff --git a/src/PatriciaTree.ml b/src/PatriciaTree.ml new file mode 100644 index 0000000..9c6adac --- /dev/null +++ b/src/PatriciaTree.ml @@ -0,0 +1,26 @@ +(**************************************************************************) +(* This file is part of the Codex semantics library *) +(* (patricia-tree sub-component). *) +(* *) +(* Copyright (C) 2024 *) +(* CEA (Commissariat à l'énergie atomique et aux énergies *) +(* alternatives) *) +(* *) +(* You can redistribute it and/or modify it under the terms of the GNU *) +(* Lesser General Public License as published by the Free Software *) +(* Foundation, version 2.1. *) +(* *) +(* It is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU Lesser General Public License for more details. *) +(* *) +(* See the GNU Lesser General Public License version 2.1 *) +(* for more details (enclosed in the file LICENSE). *) +(**************************************************************************) + +include Ints +module Sigs = Sigs +include Key_value +include Functors +include Nodes diff --git a/src/PatriciaTree.mli b/src/PatriciaTree.mli new file mode 100644 index 0000000..3d24031 --- /dev/null +++ b/src/PatriciaTree.mli @@ -0,0 +1,143 @@ +(**************************************************************************) +(* This file is part of the Codex semantics library *) +(* (patricia-tree sub-component). *) +(* *) +(* Copyright (C) 2024 *) +(* CEA (Commissariat à l'énergie atomique et aux énergies *) +(* alternatives) *) +(* *) +(* You can redistribute it and/or modify it under the terms of the GNU *) +(* Lesser General Public License as published by the Free Software *) +(* Foundation, version 2.1. *) +(* *) +(* It is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU Lesser General Public License for more details. *) +(* *) +(* See the GNU Lesser General Public License version 2.1 *) +(* for more details (enclosed in the file LICENSE). *) +(**************************************************************************) + +(** Association maps from key to values, and sets, implemented with + Patricia Trees, allowing fast merge operations by making use of + physical equality between subtrees; and custom implementation of + {{!node_impl}tree nodes} (allowing normal maps, {{!hash_consed}hash-consed maps}, weak key or + value maps, sets, custom maps, etc.). + + The main entry points into this library are the functors that build maps + and sets: + {table + {tr + {th } + {th Map} + {th Set} + } + {tr + {th Homogeneous} + {td {!MakeMap}} + {td {!MakeSet}} + } + {tr + {th Heterogeneous} + {td {!MakeHeterogeneousMap}} + {td {!MakeHeterogeneousSet}} + } + {tr + {th {{!hash_consed}hashconsed} Homogeneous} + {td {!MakeHashconsedMap}} + {td {!MakeHashconsedSet}} + } + {tr + {th {{!hash_consed}Hashconsed} Heterogeneous} + {td {!MakeHashconsedHeterogeneousMap}} + {td {!MakeHashconsedHeterogeneousSet}} + } + } + + + Differences between this library and OCaml's {{: https://ocaml.org/api/Map.S.html}[Map]} include: + + {ul + {- The required signature for keys is different, in that we require + each key to be mapped to a unique integer identifier.} + + {- The implementation uses Patricia Tree, as described in Okasaki + and Gill's 1998 paper + {{: https://www.semanticscholar.org/paper/Fast-Mergeable-Integer-Maps-Okasaki-Gill/23003be706e5f586f23dd7fa5b2a410cc91b659d}{i Fast mergeable integer maps}}, + i.e. it is a space-efficient prefix trie over the big-endian representation of + the key's integer identifier. + + Example of a 5-bit patricia tree containing five numbers: 0 [0b0000], 1 [0b0001], + 5 [0b0101] and 7 [0b0111] and -8 [0b1111]: + {v + Branch + (prefix=0b?___) + / \ + Branch Leaf(-8) + (prefix=0b0?__) 0b1111 + / \ + Branch Branch + (prefix=0b000?) (prefix=0b01?_) + | | | | + Leaf(0) Leaf(1) Leaf(5) Leaf(7) + 0b0000 0b0001 0b0101 0b0111 + v} + + The main benefit of Patricia Tree is that their representation + is stable (contrary to maps, inserting nodes in any order will + return the same shape), which allows different versions of a map + to share more subtrees in memory, and the operations over two + maps to benefit from this sharing. The functions in this library + attempt to maximally preserve sharing and benefit from sharing, + allowing very important improvements in complexity and running + time when combining maps or sets is a frequent operation.} + + {- Finally, the implementation is more customizable, allowing + notably (key,value) pairs or different types to be in the same map, + or to choose the memory representation of the nodes of the tree.} + + {- Some operations like {{!Sigs.BASE_MAP.pop_unsigned_minimum}[pop_unsigned_minimum]} and + {{!Sigs.BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]} make our Set + suitable as priority queue (but remember that each element in the + queue must map to a distinct integer, and that using the {{!unsigned_lt}unsigned order} + means elements with negative priority are seen as greater than elements with + positive ones).} + } + + {b Note on complexity:} in the following, n represents the size of the + map when there is one (and [|map1|] is the number of elements in + [map1]). The term log(n) correspond to the maximum height of the + tree, which is log(n) if we assume an even distribution of numbers + in the map (e.g. random distribution, or integers chosen + contiguously using a counter). The worst-case height is + O(min(n,64)) which is actually constant, but not really + informative; log(n) corresponds to the real complexity in usual + distributions. *) + +(** {1 Signatures} *) + +module Sigs = Sigs + +(** {1 Functors} *) + +include module type of Functors + +(** {1 Miscellaneous utilities} *) + +include module type of Ints + +(** {1 Default KEY and VALUE implementations} *) +(** These can be used as parameters to {!MakeMap}/{!MakeSet} functors in the + most common use cases. *) + +include module type of Key_value + +(** {1:node_impl Some implementations of NODE} *) +(** We provide a few different implementations of {!Sigs.NODE}, the internal representation + of a PatriciaTree's nodes. They can be used with + the {!MakeCustomMap}, {!MakeCustomSet}, {!MakeCustomHeterogeneousMap} and + {!MakeCustomHeterogeneousSet} functors to build maps and sets with custom + internal representation. *) + +include module type of Nodes diff --git a/dune b/src/dune similarity index 86% rename from dune rename to src/dune index a9bbdae..00def75 100644 --- a/dune +++ b/src/dune @@ -22,7 +22,6 @@ (library (name PatriciaTree) (public_name patricia-tree) - (modules PatriciaTree) (foreign_stubs (language c) (names int_builtins))) @@ -32,15 +31,5 @@ (mdx (files *.mld *.mli) - (libraries patricia-tree)) - -;; For test purposes only. - -(library - (name PatriciaTreeTest) - (inline_tests - (libraries qcheck-core)) - (preprocess - (pps ppx_inline_test)) - (libraries PatriciaTree qcheck-core) - (modules PatriciaTreeTest)) + (libraries patricia-tree) + (preludes test/mdx_prelude.ml)) diff --git a/patriciaTree.ml b/src/functors.ml similarity index 61% rename from patriciaTree.ml rename to src/functors.ml index 195df15..4972a5e 100644 --- a/patriciaTree.ml +++ b/src/functors.ml @@ -19,785 +19,19 @@ (* for more details (enclosed in the file LICENSE). *) (**************************************************************************) -(** {1 Signatures} *) +open Ints +open Sigs +open Key_value +open Nodes -(** The integer associated with a key *) -type intkey = int +(** [match_prefix k p m] returns [true] if and only if the key [k] has prefix [p] up to bit [m]. *) +let match_prefix k p m = mask k m = p -(** A mask is an integer with a single bit set (i.e. a power of 2). *) -type mask = int +(** Returns true if the branch caracterized by the two first arguments + would contain the branch caracterized by the second (as right or left subtree) *) +let [@inline always] branches_before l_prefix (l_mask : mask) (r_prefix : intkey) (r_mask : mask) = + unsigned_lt (r_mask :> int) (l_mask :> int) && match_prefix (r_prefix :> int) l_prefix l_mask -module type NODE = sig - type 'key key - type ('key, 'map) value - type 'map t - - val empty : 'map t - val leaf : 'key key -> ('key, 'map) value -> 'map t - val branch : - prefix:intkey -> - branching_bit:mask -> tree0:'map t -> tree1:'map t -> 'map t - - type 'map view = private - | Empty : 'map view - | Branch : { prefix : intkey; branching_bit : mask; - tree0 : 'map t; tree1 : 'map t; } -> 'map view - | Leaf : { key : 'key key; value : ('key, 'map) value; } -> 'map view - - val is_empty: 'map t -> bool - val view: 'a t -> 'a view -end - -module type NODE_WITH_ID = sig - include NODE - val to_int: 'a t -> int -end - -module type HASH_CONSED_NODE = sig - include NODE_WITH_ID - val equal : 'a t -> 'a t -> bool - val compare : 'a t -> 'a t -> int -end - -module type BASE_MAP = sig - include NODE - - type 'map key_value_pair = - KeyValue : 'a key * ('a, 'map) value -> 'map key_value_pair - val unsigned_min_binding : 'a t -> 'a key_value_pair - val unsigned_max_binding : 'a t -> 'a key_value_pair - val singleton : 'a key -> ('a, 'b) value -> 'b t - val cardinal : 'a t -> int - val is_singleton : 'a t -> 'a key_value_pair option - val find : 'key key -> 'map t -> ('key, 'map) value - val find_opt : 'key key -> 'map t -> ('key, 'map) value option - val mem : 'key key -> 'map t -> bool - val remove : 'key key -> 'map t -> 'map t - val pop_unsigned_minimum: 'map t -> ('map key_value_pair * 'map t) option - val pop_unsigned_maximum: 'map t -> ('map key_value_pair * 'map t) option - - val insert: 'a key -> (('a,'map) value option -> ('a,'map) value) -> 'map t -> 'map t - val update: 'a key -> (('a,'map) value option -> ('a,'map) value option) -> 'map t -> 'map t - val add : 'key key -> ('key, 'map) value -> 'map t -> 'map t - val split : 'key key -> 'map t -> 'map t * ('key, 'map) value option * 'map t - - type 'map polyiter = { f : 'a. 'a key -> ('a, 'map) value -> unit; } [@@unboxed] - val iter : 'map polyiter -> 'map t -> unit - - type ('acc,'map) polyfold = { f: 'a. 'a key -> ('a,'map) value -> 'acc -> 'acc } [@@unboxed] - val fold : ('acc,'map) polyfold -> 'map t -> 'acc -> 'acc - - type ('acc,'map) polyfold2 = { f: 'a. 'a key -> ('a,'map) value -> ('a,'map) value -> 'acc -> 'acc } [@@unboxed] - val fold_on_nonequal_inter : ('acc,'map) polyfold2 -> 'map t -> 'map t -> 'acc -> 'acc - - type ('acc,'map) polyfold2_union = { f: 'a. 'a key -> ('a,'map) value option -> ('a,'map) value option -> 'acc -> 'acc } [@@unboxed] - val fold_on_nonequal_union : ('acc,'map) polyfold2_union -> 'map t -> 'map t -> 'acc -> 'acc - - type 'map polypredicate = { f: 'a. 'a key -> ('a,'map) value -> bool; } [@@unboxed] - val filter : 'map polypredicate -> 'map t -> 'map t - val for_all : 'map polypredicate -> 'map t -> bool - - type ('map1,'map2) polymap = { f : 'a. ('a, 'map1) value -> ('a, 'map2) value; } [@@unboxed] - val map : ('map,'map) polymap -> 'map t -> 'map t - val map_no_share : ('map1,'map2) polymap -> 'map1 t -> 'map2 t - - type ('map1,'map2) polymapi = - { f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value; } [@@unboxed] - val mapi : ('map,'map) polymapi -> 'map t -> 'map t - val mapi_no_share : ('map1,'map2) polymapi -> 'map1 t -> 'map2 t - - type ('map1,'map2) polyfilter_map = - { f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value option; } [@@unboxed] - val filter_map : ('map,'map) polyfilter_map -> 'map t -> 'map t - val filter_map_no_share : ('map1,'map2) polyfilter_map -> 'map1 t -> 'map2 t - - type 'map polypretty = { f: 'a. Format.formatter -> 'a key -> ('a, 'map) value -> unit } [@@unboxed] - val pretty : - ?pp_sep:(Format.formatter -> unit -> unit) -> 'map polypretty -> - Format.formatter -> 'map t -> unit - - type ('map1,'map2) polysame_domain_for_all2 = - { f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value -> bool; } [@@unboxed] - val reflexive_same_domain_for_all2 : - ('map,'map) polysame_domain_for_all2 -> 'map t -> 'map t -> bool - val nonreflexive_same_domain_for_all2: - ('map1,'map2) polysame_domain_for_all2 -> 'map1 t -> 'map2 t -> bool - val reflexive_subset_domain_for_all2 : - ('map,'map) polysame_domain_for_all2 -> 'map t -> 'map t -> bool - - type ('map1, 'map2, 'map3) polyunion = { - f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value -> ('a, 'map3) value; } [@@unboxed] - val idempotent_union : ('a, 'a, 'a) polyunion -> 'a t -> 'a t -> 'a t - - - type ('map1, 'map2, 'map3) polyinter = { - f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value -> ('a, 'map3) value; - } [@@unboxed] - val idempotent_inter : ('a, 'a, 'a) polyinter -> 'a t -> 'a t -> 'a t - val nonidempotent_inter_no_share :('a, 'b, 'c) polyinter -> 'a t -> 'b t -> 'c t - - - type ('map1, 'map2, 'map3) polyinterfilter = { f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value -> ('a, 'map3) value option; } [@@unboxed] - val idempotent_inter_filter : ('a, 'a, 'a) polyinterfilter -> 'a t -> 'a t -> 'a t - - type ('map1, 'map2, 'map3) polymerge = { - f : 'a. 'a key -> ('a, 'map1) value option -> ('a, 'map2) value option -> ('a, 'map3) value option; } [@@unboxed] - val slow_merge : ('map1, 'map2, 'map3) polymerge -> 'map1 t -> 'map2 t -> 'map3 t - val disjoint : 'a t -> 'a t -> bool - - val to_seq : 'a t -> 'a key_value_pair Seq.t - val to_rev_seq : 'a t -> 'a key_value_pair Seq.t - val add_seq : 'a key_value_pair Seq.t -> 'a t -> 'a t - val of_seq : 'a key_value_pair Seq.t -> 'a t - - val of_list : 'a key_value_pair list -> 'a t - val to_list : 'a t -> 'a key_value_pair list -end - -(** {2 Heterogeneous maps and sets} *) - -module type HETEROGENEOUS_MAP = sig - include BASE_MAP - - module WithForeign(Map2:BASE_MAP with type 'a key = 'a key):sig - type ('map1,'map2) polyinter_foreign = { f: 'a. 'a key -> ('a,'map1) value -> ('a,'map2) Map2.value -> ('a,'map1) value } [@@unboxed] - - val nonidempotent_inter : ('a,'b) polyinter_foreign -> 'a t -> 'b Map2.t -> 'a t - - type ('map2,'map1) polyfilter_map_foreign = - { f : 'a. 'a key -> ('a, 'map2) Map2.value -> ('a, 'map1) value option; } [@@unboxed] - val filter_map_no_share : ('map2,'map1) polyfilter_map_foreign -> 'map2 Map2.t -> 'map1 t - (** Like {!BASE_MAP.filter_map_no_share}, but allows to transform a foreigh map into the current one. *) - - type ('map1,'map2) polyupdate_multiple = { f: 'a. 'a key -> ('a,'map1) value option -> ('a,'map2) Map2.value -> ('a,'map1) value option } [@@unboxed] - val update_multiple_from_foreign : 'b Map2.t -> ('a,'b) polyupdate_multiple -> 'a t -> 'a t - - type ('map1,'map2) polyupdate_multiple_inter = { f: 'a. 'a key -> ('a,'map1) value -> ('a,'map2) Map2.value -> ('a,'map1) value option } [@@unboxed] - val update_multiple_from_inter_with_foreign : 'b Map2.t -> ('a,'b) polyupdate_multiple_inter -> 'a t -> 'a t - end -end - - -module type HETEROGENEOUS_SET = sig - type 'a elt - - module BaseMap : HETEROGENEOUS_MAP - with type 'a key = 'a elt - and type (_,_) value = unit - - type t = unit BaseMap.t - type 'a key = 'a elt - - type any_elt = Any : 'a elt -> any_elt - - val empty: t - val is_empty: t -> bool - val mem: 'a elt -> t -> bool - val add: 'a elt -> t -> t - val singleton: 'a elt -> t - val cardinal: t -> int - val is_singleton: t -> any_elt option - val remove: 'a elt -> t -> t - val unsigned_min_elt: t -> any_elt - val unsigned_max_elt: t -> any_elt - val pop_unsigned_minimum: t -> (any_elt * t) option - val pop_unsigned_maximum: t -> (any_elt * t) option - val union: t -> t -> t - val inter: t -> t -> t - val disjoint: t -> t -> bool - val equal : t -> t -> bool - val subset : t -> t -> bool - val split: 'a elt -> t -> t * bool * t - type polyiter = { f: 'a. 'a elt -> unit; } [@@unboxed] - val iter: polyiter -> t -> unit - - type polypredicate = { f: 'a. 'a elt -> bool; } [@@unboxed] - val filter: polypredicate -> t -> t - val for_all: polypredicate -> t -> bool - - type 'acc polyfold = { f: 'a. 'a elt -> 'acc -> 'acc } [@@unboxed] - val fold: 'acc polyfold -> t -> 'acc -> 'acc - - type polypretty = { f: 'a. Format.formatter -> 'a elt -> unit; } [@@unboxed] - val pretty : - ?pp_sep:(Format.formatter -> unit -> unit) -> polypretty -> Format.formatter -> t -> unit - - val to_seq : t -> any_elt Seq.t - val to_rev_seq : t -> any_elt Seq.t - val add_seq : any_elt Seq.t -> t -> t - val of_seq : any_elt Seq.t -> t - - val of_list : any_elt list -> t - val to_list : t -> any_elt list -end - - -(** {2 Homogeneous maps and sets} *) - -(** Signature for sets implemented using Patricia trees. *) -module type SET = sig - type elt - - module BaseMap : HETEROGENEOUS_MAP - with type _ key = elt - and type (_,_) value = unit - - type key = elt - type t = unit BaseMap.t - - val empty: t - val is_empty: t -> bool - val mem: elt -> t -> bool - val add: elt -> t -> t - val singleton: elt -> t - val cardinal: t -> int - val is_singleton: t -> elt option - val remove: elt -> t -> t - val unsigned_min_elt: t -> elt - val unsigned_max_elt: t -> elt - val pop_unsigned_minimum: t -> (elt * t) option - val pop_unsigned_maximum: t -> (elt * t) option - val iter: (elt -> unit) -> t -> unit - val filter: (elt -> bool) -> t -> t - val for_all: (elt -> bool) -> t -> bool - val fold: (elt -> 'b -> 'b) -> t -> 'b -> 'b - val split: elt -> t -> t * bool * t - val pretty : - ?pp_sep:(Format.formatter -> unit -> unit) -> - (Format.formatter -> elt -> unit) -> - Format.formatter -> t -> unit - val union: t -> t -> t - val inter: t -> t -> t - val disjoint: t -> t -> bool - val equal : t -> t -> bool - val subset : t -> t -> bool - val to_seq : t -> elt Seq.t - val to_rev_seq : t -> elt Seq.t - val add_seq : elt Seq.t -> t -> t - val of_seq : elt Seq.t -> t - - val of_list : elt list -> t - val to_list : t -> elt list -end - -type (_, 'b) snd = Snd of 'b [@@unboxed] - -module type MAP_WITH_VALUE = sig - type key - type 'a t - type 'a value - - module BaseMap : HETEROGENEOUS_MAP - with type 'a t = 'a t - and type _ key = key - and type ('a,'b) value = ('a,'b value) snd - - val empty : 'a t - val is_empty : 'a t -> bool - val unsigned_min_binding : 'a t -> (key * 'a value) - val unsigned_max_binding : 'a t -> (key * 'a value) - val singleton : key -> 'a value -> 'a t - val cardinal : 'a t -> int - val is_singleton : 'a t -> (key * 'a value) option - val find : key -> 'a t -> 'a value - val find_opt : key -> 'a t -> 'a value option - val mem : key -> 'a t -> bool - val remove : key -> 'a t -> 'a t - val pop_unsigned_minimum : 'a t -> (key * 'a value * 'a t) option - val pop_unsigned_maximum : 'a t -> (key * 'a value * 'a t) option - val insert : key -> ('a value option -> 'a value) -> 'a t -> 'a t - val update : key -> ('a value option -> 'a value option) -> 'a t -> 'a t - val add : key -> 'a value -> 'a t -> 'a t - val split : key -> 'a t -> 'a t * 'a value option * 'a t - val iter : (key -> 'a value -> unit) -> 'a t -> unit - val fold : (key -> 'a value -> 'acc -> 'acc) -> 'a t -> 'acc -> 'acc - val fold_on_nonequal_inter : (key -> 'a value -> 'a value -> 'acc -> 'acc) -> - 'a t -> 'a t -> 'acc -> 'acc - val fold_on_nonequal_union : - (key -> 'a value option -> 'a value option -> 'acc -> 'acc) -> - 'a t -> 'a t -> 'acc -> 'acc - val filter : (key -> 'a value -> bool) -> 'a t -> 'a t - val for_all : (key -> 'a value -> bool) -> 'a t -> bool - val map : ('a value -> 'a value) -> 'a t -> 'a t - val map_no_share : ('a value -> 'b value) -> 'a t -> 'b t - val mapi : (key -> 'a value -> 'a value) -> 'a t -> 'a t - val mapi_no_share : (key -> 'a value -> 'b value) -> 'a t -> 'b t - val filter_map : (key -> 'a value -> 'a value option) -> 'a t -> 'a t - val filter_map_no_share : (key -> 'a value -> 'b value option) -> 'a t -> 'b t - val reflexive_same_domain_for_all2 : (key -> 'a value -> 'a value -> bool) -> 'a t -> 'a t -> bool - val nonreflexive_same_domain_for_all2 : (key -> 'a value -> 'b value -> bool) -> 'a t -> 'b t -> bool - val reflexive_subset_domain_for_all2 : (key -> 'a value -> 'a value -> bool) -> 'a t -> 'a t -> bool - val idempotent_union : (key -> 'a value -> 'a value -> 'a value) -> 'a t -> 'a t -> 'a t - val idempotent_inter : (key -> 'a value -> 'a value -> 'a value) -> 'a t -> 'a t -> 'a t - val nonidempotent_inter_no_share : (key -> 'a value -> 'b value -> 'c value) -> 'a t -> 'b t -> 'c t - val idempotent_inter_filter : (key -> 'a value -> 'a value -> 'a value option) -> 'a t -> 'a t -> 'a t - val slow_merge : (key -> 'a value option -> 'b value option -> 'c value option) -> 'a t -> 'b t -> 'c t - val disjoint : 'a t -> 'a t -> bool - - module WithForeign(Map2 : BASE_MAP with type _ key = key):sig - type ('b,'c) polyfilter_map_foreign = { f: 'a. key -> ('a,'b) Map2.value -> 'c value option } [@@unboxed] - val filter_map_no_share : ('b, 'c) polyfilter_map_foreign -> 'b Map2.t -> 'c t - - type ('value,'map2) polyinter_foreign = - { f: 'a. 'a Map2.key -> 'value value -> ('a, 'map2) Map2.value -> 'value value } [@@unboxed] - val nonidempotent_inter : ('a, 'b) polyinter_foreign -> 'a t -> 'b Map2.t -> 'a t - - type ('map1,'map2) polyupdate_multiple = { f: 'a. key -> 'map1 value option -> ('a,'map2) Map2.value -> 'map1 value option } [@@unboxed] - val update_multiple_from_foreign : 'b Map2.t -> ('a,'b) polyupdate_multiple -> 'a t -> 'a t - - type ('map1,'map2) polyupdate_multiple_inter = { f: 'a. key -> 'map1 value -> ('a,'map2) Map2.value -> 'map1 value option } [@@unboxed] - val update_multiple_from_inter_with_foreign: 'b Map2.t -> ('a,'b) polyupdate_multiple_inter -> 'a t -> 'a t - end - - val pretty : - ?pp_sep:(Format.formatter -> unit -> unit) -> - (Format.formatter -> key -> 'a value -> unit) -> - Format.formatter -> 'a t -> unit - - val to_seq : 'a t -> (key * 'a value) Seq.t - val to_rev_seq : 'a t -> (key * 'a value) Seq.t - val add_seq : (key * 'a value) Seq.t -> 'a t -> 'a t - val of_seq : (key * 'a value) Seq.t -> 'a t - val of_list : (key * 'a value) list -> 'a t - val to_list : 'a t -> (key * 'a value) list -end - -module type MAP = MAP_WITH_VALUE with type 'a value = 'a - -(** {2 Keys and Value} *) - -module type KEY = sig - type t - val to_int: t -> int -end - -type (_,_) cmp = Eq: ('a,'a) cmp | Diff: ('a,'b) cmp - -module type HETEROGENEOUS_KEY = sig - type 'key t - val to_int: ('key) t -> int - val polyeq: 'a t -> 'b t -> ('a,'b) cmp -end - -module type VALUE = sig type 'a t end - -module type HETEROGENEOUS_VALUE = sig - type ('key,'map) t -end - -(** {1 Utility functions} *) - -(** Fast highest bit computation in c, using GCC's __builtin_clz - which compile to efficient instruction (bsr) when possible. *) -external highest_bit: int -> (int[@untagged]) = - "caml_int_builtin_highest_bit_byte" "caml_int_builtin_highest_bit" [@@noalloc] - -let unsigned_lt x y = x - min_int < y - min_int - (* if x >= 0 && y >= 0 - then x < y - else if x >= 0 - then (* pos < neg *) true - else if y >= 0 then false - else x < y *) - -(** Note: in the original version, okasaki give the masks as arguments - to optimize the computation of highest_bit. *) -let branching_bit a b = highest_bit (a lxor b) - -let mask i m = i land (lnot (2*m-1)) - -(** {1 Nodes} *) - -(** Simple node, with no hash consing. *) -module [@inline] SimpleNode(Key:sig type 'a t end)(Value:HETEROGENEOUS_VALUE) = struct - type 'a key = 'a Key.t - type ('key,'map) value = ('key,'map) Value.t - - type 'map view = - | Empty: 'map view - | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view - | Leaf: {key:'key key; value:('key,'map) value} -> 'map view - and 'map t = 'map view - let view x = x - - let empty = Empty - let is_empty x = x == Empty - let leaf key value = Leaf {key;value} - let branch ~prefix ~branching_bit ~tree0 ~tree1 = - match tree0,tree1 with - | Empty, x -> x - | x, Empty -> x - | _ -> Branch{prefix;branching_bit;tree0;tree1} -end - -module WeakNode(Key:sig type 'a t end)(Value:HETEROGENEOUS_VALUE)(* :NODE *) = struct - type 'a key = 'a Key.t - type ('key,'map) value = ('key,'map) Value.t - - type 'map view = - | Empty: 'map view - | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view - | Leaf: {key:'key key; value:('key,'map) value} -> 'map view - and 'a t = - | TEmpty: 'map t - | TBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map t - (* Additional hidden case: leaf, which is an Ephemeron.K1, whose - tag is 251, so it can be discriminated against the other - cases. This avoids an indirection. *) - - let empty = TEmpty - let is_empty x = x == TEmpty - let leaf key value = Obj.magic (Ephemeron.K1.make key value) - let branch ~prefix ~branching_bit ~tree0 ~tree1 = - match tree0,tree1 with - | TEmpty, x -> x - | x, TEmpty -> x - | _ -> TBranch{prefix;branching_bit;tree0;tree1} - - let view (type k) (type map) (t:map t) = - let obj = Obj.repr t in - if Obj.is_block obj && Obj.tag obj != 0 then - (* Ephemeron.K1.get_(key|value) are no longer available in 5.0, - so we do that instead. *) - let ephe:Obj.Ephemeron.t = Obj.magic obj in - let key:k key option = Obj.magic @@ Obj.Ephemeron.get_key ephe 0 in - let data:(k,map) Value.t option = Obj.magic @@ Obj.Ephemeron.get_data ephe in - match key,data with - | Some key, Some value -> Leaf{key;value} - | _ -> Empty - else match t with - | TEmpty -> Empty - | TBranch{prefix;branching_bit;tree0;tree1} -> Branch{prefix;branching_bit;tree0;tree1} - -end - - -(** Add a unique id to nodes, e.g. so that they can be used as keys in maps or sets. *) -module NodeWithId(Key:sig type 'a t end)(Value:HETEROGENEOUS_VALUE):NODE_WITH_ID - with type 'key key = 'key Key.t - and type ('key,'map) value = ('key,'map) Value.t -= struct - - type 'a key = 'a Key.t - type ('key,'map) value = ('key,'map) Value.t - - type 'map view = - | Empty: 'map view - | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view - | Leaf: {key:'key key; value:('key,'map) value} -> 'map view - and 'map t = - | NEmpty: 'map t - | NBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t;id:int} -> 'map t - | NLeaf: {key:'key key;value:('key,'map) value;id:int} -> 'map t - - let view = function - | NEmpty -> Empty - | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} - | NLeaf{key;value;_} -> Leaf{key;value} - - let to_int = function - | NEmpty -> 0 - | NBranch{id;_} -> id - | NLeaf{id;_} -> id - - let count = ref 0 - - let empty = NEmpty - let is_empty x = x == NEmpty - let leaf key value = incr count; NLeaf {key;value;id=(!count)} - let branch ~prefix ~branching_bit ~tree0 ~tree1 = - match tree0,tree1 with - | NEmpty, x -> x - | x, NEmpty -> x - | _ -> incr count; NBranch{prefix;branching_bit;tree0;tree1;id=(!count)} -end - - -(** NODE for sets, i.e. when there is no associated values. *) -module SetNode(Key:sig type 'a t end):NODE - with type 'key key = 'key Key.t - and type ('key,'map) value = unit -= struct - - type 'a key = 'a Key.t - type ('key,'map) value = unit - - type 'map view = - | Empty: 'map view - | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view - | Leaf: {key:'key key; value:('key,'map) value} -> 'map view - and 'map t = - | NEmpty: 'map t - | NBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map t - | NLeaf: {key:'key key} -> 'map t - - - let view = function - | NEmpty -> Empty - | NBranch{prefix;branching_bit;tree0;tree1} -> Branch{prefix;branching_bit;tree0;tree1} - | NLeaf{key} -> Leaf{key;value=()} - - let empty = NEmpty - let is_empty x = x == NEmpty - let leaf key _value = NLeaf {key} - let branch ~prefix ~branching_bit ~tree0 ~tree1 = - match tree0,tree1 with - | NEmpty, x -> x - | x, NEmpty -> x - | _ -> NBranch{prefix;branching_bit;tree0;tree1} - -end - -module WeakSetNode(Key:sig type 'a t end)(* :NODE *) = struct - type 'a key = 'a Key.t - type ('key,'map) value = unit - - type 'map view = - | Empty: 'map view - | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view - | Leaf: {key:'key key; value:('key,'map) value} -> 'map view - and 'a t = - | TEmpty: 'map t - | TBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map t - (* Additional hidden case: leaf, which is a Weak array, whose tag - is 251, so it can be discriminated against the other - cases. This avoids an indirection. *) - - let empty = TEmpty - let is_empty x = x == TEmpty - let leaf key () = Obj.magic (let a = Weak.create 1 in Weak.set a 0 (Some key)) - let branch ~prefix ~branching_bit ~tree0 ~tree1 = - match tree0,tree1 with - | TEmpty, x -> x - | x, TEmpty -> x - | _ -> TBranch{prefix;branching_bit;tree0;tree1} - - let view t = - let obj = Obj.repr t in - if Obj.is_block obj && Obj.tag obj != 0 then - let weak = Obj.magic obj in - let key = Weak.get weak 0 in - match key with - | Some key -> Leaf{key;value=()} - | _ -> Empty - else match t with (* Identity in memory. *) - | TEmpty -> Empty - | TBranch{prefix;branching_bit;tree0;tree1} -> Branch{prefix;branching_bit;tree0;tree1} - -end - -let sdbm x y = y + (x lsl 16) + (x lsl 6) - x -(** Combine two numbers into a new hash *) - -module type HETEROGENEOUS_HASHED_VALUE = sig - include HETEROGENEOUS_VALUE - - val hash : ('a, 'b) t -> int - val polyeq : ('a, 'b) t -> ('a, 'c) t -> bool -end - -module type HASHED_VALUE = sig - type 'map t - - val hash : 'map t -> int - val polyeq : 'a t -> 'b t -> bool -end - -module HeterogeneousHashedValueFromHashedValue(Value: HASHED_VALUE) -: HETEROGENEOUS_HASHED_VALUE with type ('a, 'map) t = ('a, 'map Value.t) snd = struct - type ('a, 'map) t = ('a, 'map Value.t) snd - let hash (Snd x) = Value.hash x - let polyeq (Snd a) (Snd b) = Value.polyeq a b -end - -module HashconsedNode(Key:HETEROGENEOUS_KEY)(Value:HETEROGENEOUS_HASHED_VALUE)() -(* : HASH_CONSED_NODE - with type 'key key = 'key Key.t - and type ('key, 'map) value = ('key, 'map) Value.t *) -= struct - - type 'a key = 'a Key.t - type ('key, 'map) value = ('key, 'map) Value.t - - type 'map view = - | Empty: 'map view - | Branch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t } -> 'map view - | Leaf: { key:'key key; value:('key,'map) value } -> 'map view - and 'map t = - | NEmpty: 'map t - | NBranch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t; id:int } -> 'map t - | NLeaf: { key:'key key; value:('key, 'map) Value.t; id:int } -> 'map t - - let view = function - | NEmpty -> Empty - | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} - | NLeaf{key;value;_} -> Leaf{key;value} - - let to_int = function - | NEmpty -> 0 - | NBranch{ id; _ } -> id - | NLeaf{ id; _ } -> id - - let count = ref 1 (** Start at 1 as we increment in post *) - - type any_map = AnyMap : 'a t -> any_map [@@unboxed] - - module HashArg = struct - type t = any_map - let equal (AnyMap a) (AnyMap b) = match a, b with - | NEmpty, NEmpty -> true - | NLeaf{key=key1;value=value1;_}, NLeaf{key=key2;value=value2;_} -> - begin match Key.polyeq key1 key2 with - | Eq -> Value.polyeq value1 value2 - | Diff -> false - end - | NBranch{prefix=prefixa;branching_bit=branching_bita;tree0=tree0a;tree1=tree1a;_}, - NBranch{prefix=prefixb;branching_bit=branching_bitb;tree0=tree0b;tree1=tree1b;_} -> - prefixa == prefixb && branching_bita == branching_bitb && - to_int tree0a = to_int tree0b && to_int tree1a = to_int tree1b - | _ -> false - - let hash (AnyMap x) = match x with - | NEmpty -> 0 - | NLeaf{key; value; _} -> - let hash = sdbm (Key.to_int key) (Value.hash value) in - (hash lsl 1) lor 1 - (* All leaf hashes are odd *) - | NBranch{prefix; branching_bit; tree0; tree1; _} -> (* All branch hashes are even *) - (sdbm (prefix lor branching_bit) @@ sdbm (to_int tree0) (to_int tree1)) lsl 1 - end - - module WeakHash = Weak.Make(HashArg) - - let weakh = WeakHash.create 120 - - let empty = NEmpty - let is_empty x = x == NEmpty - - let try_find (tentative : 'a t) = - let AnyMap x = WeakHash.merge weakh (AnyMap tentative) in - let x : 'a t = Obj.magic x in - if x == tentative then incr count; - x - - let leaf key value = try_find (NLeaf{key;value;id= !count}) - - let branch ~prefix ~branching_bit ~tree0 ~tree1 = - match tree0,tree1 with - | NEmpty, x -> x - | x, NEmpty -> x - | _ -> try_find (NBranch{prefix;branching_bit;tree0;tree1;id=(!count)}) - - let equal x y = x == y - let compare x y = Int.compare (to_int x) (to_int y) -end - -module HashconsedSetNode(Key:HETEROGENEOUS_KEY)(): HASH_CONSED_NODE - with type 'key key = 'key Key.t - and type ('key,'map) value = unit -= struct - - type 'a key = 'a Key.t - type ('key,'map) value = unit - - type map = - | NEmpty: map - | NBranch: { prefix:intkey; branching_bit:mask; tree0:map; tree1:map; id:int } -> map - | NLeaf: { key:'key key; id:int } -> map - type 'map view = - | Empty: 'map view - | Branch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t } -> 'map view - | Leaf: { key:'key key; value:unit } -> 'map view - and _ t = map - - let view = function - | NEmpty -> Empty - | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} - | NLeaf{ key; _ } -> Leaf{ key; value=() } - - let to_int = function - | NEmpty -> 0 - | NBranch{ id; _ } -> id - | NLeaf{ id; _ } -> id - - let count = ref 1 (** Start at 1 as we increment in post *) - - module HashArg = struct - type t = map - let equal a b = match a, b with - | NEmpty, NEmpty -> true - | NLeaf{key=key1;_}, NLeaf{key=key2;_} -> - begin match Key.polyeq key1 key2 with - | Eq -> true - | Diff -> false - end - | NBranch{prefix=prefixa;branching_bit=branching_bita;tree0=tree0a;tree1=tree1a;_}, - NBranch{prefix=prefixb;branching_bit=branching_bitb;tree0=tree0b;tree1=tree1b;_} -> - prefixa == prefixb && branching_bita == branching_bitb && - tree0a == tree0b && tree1a == tree1b - | _ -> false - - let hash a = match a with - | NEmpty -> 0 - | NLeaf{key; _} -> ((Key.to_int key) lsl 1) lor 1 (* All leaf hashes are odd *) - | NBranch{prefix; branching_bit; tree0; tree1; _} -> (* All branch hashes are even *) - (sdbm (prefix lor branching_bit) @@ sdbm (to_int tree0) (to_int tree1)) lsl 1 - end - - module WeakHash = Weak.Make(HashArg) - - let weakh = WeakHash.create 120 - - let empty = NEmpty - let is_empty x = x == NEmpty - - let try_find tentative = - let x = WeakHash.merge weakh tentative in - if x == tentative then incr count; - x - - let leaf key () = try_find (NLeaf{ key; id = !count }) - - let branch ~prefix ~branching_bit ~tree0 ~tree1 = - match tree0,tree1 with - | NEmpty, x -> x - | x, NEmpty -> x - | _ -> try_find (NBranch{prefix;branching_bit;tree0;tree1;id=(!count)}) - - let equal x y = x == y - let compare x y = Int.compare (to_int x) (to_int y) -end - -(** {1 Keys and values} *) - -module HomogeneousValue = struct - type ('a,'map) t = 'map -end - -module WrappedHomogeneousValue = struct - type ('a, 'map) t = ('a, 'map) snd -end - -module HeterogeneousKeyFromKey(Key:KEY):(HETEROGENEOUS_KEY with type 'a t = Key.t) = struct - type _ t = Key.t - - (** The type-safe way to do it would be to define this type, to - guarantee that 'a is always bound to the same type, and Eq is - safe. But this requires a lot of conversion code, and identity - functions that may not be well detected. [polyeq] is unsafe in - that it allows arbitrary conversion of t1 by t2 in t1 t, but - this unsafety is not exported, and I don't think we can do - something wrong using it. *) - (* type 'a t = K: Key.t -> unit t [@@unboxed] *) - let polyeq: type a b. a t -> b t -> (a,b) cmp = - fun a b -> match a,b with - | a, b when (Key.to_int a) == (Key.to_int b) -> Obj.magic Eq - | _ -> Diff - let to_int = Key.to_int -end - - -(** {1 Functors} *) module MakeCustomHeterogeneousMap (Key:HETEROGENEOUS_KEY) @@ -823,16 +57,15 @@ module MakeCustomHeterogeneousMap (* Merge trees whose prefix disagree. *) let join pa treea pb treeb = (* Printf.printf "join %d %d\n" pa pb; *) - let m = branching_bit pa pb in - let p = mask pa (* for instance *) m in - if (pa land m) = 0 then + let m = branching_bit (pa :> int) (pb :> int) in + let p = mask (pa :> int) (* for instance *) m in + if ((pa :> int) land (m :> int)) = 0 then branch ~prefix:p ~branching_bit:m ~tree0:treea ~tree1:treeb else branch ~prefix:p ~branching_bit:m ~tree0:treeb ~tree1:treea - (** [match_prefix k p m] returns [true] if and only if the key [k] has prefix [p] up to bit [m]. *) - let match_prefix k p m = mask k m = p + let singleton = leaf @@ -856,7 +89,7 @@ module MakeCustomHeterogeneousMap end | Branch{branching_bit;tree0;tree1;_} -> (* Optional if not (match_prefix searched prefix branching_bit) then raise Not_found - else *) if (branching_bit land searched == 0) + else *) if ((branching_bit :> int) land searched == 0) then findint witness searched tree0 else findint witness searched tree1 | Empty -> raise Not_found @@ -873,10 +106,10 @@ module MakeCustomHeterogeneousMap end | Branch{prefix;branching_bit;tree0;tree1} -> if not (match_prefix split_key_int prefix branching_bit) then - if unsigned_lt prefix split_key_int + if unsigned_lt (prefix :> int) split_key_int then m, None, NODE.empty else NODE.empty, None, m - else if (branching_bit land split_key_int == 0) then + else if ((branching_bit :> int) land split_key_int == 0) then let left, found, right = split split_key split_key_int tree0 in left, found, NODE.branch ~prefix ~branching_bit ~tree0:right ~tree1 else @@ -958,7 +191,7 @@ module MakeCustomHeterogeneousMap | Leaf{key;_} when (Key.to_int key) == to_remove -> empty | (Empty | Leaf _) -> m | Branch{prefix;branching_bit;tree0;tree1} -> - if (branching_bit land to_remove) == 0 + if ((branching_bit :> int) land to_remove) == 0 then begin let tree0' = removeint to_remove tree0 in if tree0' == empty then tree1 @@ -1018,10 +251,10 @@ module MakeCustomHeterogeneousMap end | Branch{prefix;branching_bit;tree0;tree1} -> if match_prefix thekeyint prefix branching_bit then - if (thekeyint land branching_bit) == 0 + if ((branching_bit :> int) land thekeyint) == 0 then branch ~prefix ~branching_bit ~tree0:(loop tree0) ~tree1 else branch ~prefix ~branching_bit ~tree0 ~tree1:(loop tree1) - else join thekeyint (leaf thekey (f None)) prefix t + else join thekeyint (leaf thekey (f None)) (prefix :> int) t in loop t with Unmodified -> t @@ -1057,12 +290,12 @@ module MakeCustomHeterogeneousMap end | Branch{prefix;branching_bit;tree0;tree1} -> if match_prefix thekeyint prefix branching_bit then - if (thekeyint land branching_bit) == 0 + if (thekeyint land (branching_bit :> int)) == 0 then branch ~prefix ~branching_bit ~tree0:(loop tree0) ~tree1 else branch ~prefix ~branching_bit ~tree0 ~tree1:(loop tree1) else begin match f None with | None -> raise Unmodified - | Some value -> join thekeyint (leaf thekey value) prefix t + | Some value -> join thekeyint (leaf thekey value) (prefix :> int) t end in loop t with Unmodified -> t @@ -1090,10 +323,10 @@ module MakeCustomHeterogeneousMap end | Branch{prefix;branching_bit;tree0;tree1} -> if match_prefix thekeyint prefix branching_bit then - if (thekeyint land branching_bit) == 0 + if (thekeyint land (branching_bit :> int)) == 0 then branch ~prefix ~branching_bit ~tree0:(loop tree0) ~tree1 else branch ~prefix ~branching_bit ~tree0 ~tree1:(loop tree1) - else join thekeyint (leaf thekey value) prefix t + else join thekeyint (leaf thekey value) (prefix :> int) t in loop t with Unmodified -> t @@ -1147,7 +380,7 @@ module MakeCustomHeterogeneousMap | Eq -> f.f keya valuea valueb end | Branch{branching_bit;tree0;tree1;_} -> - if (branching_bit land searched == 0) + if ((branching_bit :> int) land searched == 0) then search (NODE.view tree0) else search (NODE.view tree1) | Empty -> false (* Can only happen on weak nodes. *) @@ -1160,13 +393,14 @@ module MakeCustomHeterogeneousMap (reflexive_subset_domain_for_all2 f ta0 tb0) && (reflexive_subset_domain_for_all2 f ta1 tb1) (* Case where ta have to be included in one of tb0 or tb1. *) - else if unsigned_lt ma mb && match_prefix pa pb mb - then if mb land pa == 0 + else if branches_before pb mb pa ma + then if (mb :> int) land (pa :> int) == 0 then reflexive_subset_domain_for_all2 f ta tb0 else reflexive_subset_domain_for_all2 f ta tb1 (* Any other case: there are elements in ta that are unmatched in tb. *) else false + let rec disjoint ta tb = if ta == tb then is_empty ta else match NODE.view ta,NODE.view tb with @@ -1178,12 +412,12 @@ module MakeCustomHeterogeneousMap if ma == mb && pa == pb (* Same prefix: check both subtrees *) then disjoint ta0 tb0 && disjoint ta1 tb1 - else if unsigned_lt mb ma && match_prefix pb pa ma (* tb included in ta0 or ta1 *) - then if ma land pb == 0 + else if branches_before pa ma pb mb (* tb included in ta0 or ta1 *) + then if (ma :> int) land (pb :> int) == 0 then disjoint ta0 tb else disjoint ta1 tb - else if unsigned_lt ma mb && match_prefix pa pb mb (* ta included in tb0 or tb1 *) - then if mb land pa == 0 + else if branches_before pb mb pa ma (* ta included in tb0 or tb1 *) + then if (mb :> int) land (pa :> int) == 0 then disjoint ta tb0 else disjoint ta tb1 else true (* Different prefixes => no intersection *) @@ -1207,15 +441,15 @@ module MakeCustomHeterogeneousMap let tree0 = idempotent_union f ta0 tb0 in let tree1 = idempotent_union f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if unsigned_lt mb ma && match_prefix pb pa ma - then if ma land pb == 0 + else if branches_before pa ma pb mb + then if (ma :> int) land (pb :> int) == 0 then branch ~prefix:pa ~branching_bit:ma ~tree0:(idempotent_union f ta0 tb) ~tree1:ta1 else branch ~prefix:pa ~branching_bit:ma ~tree0:ta0 ~tree1:(idempotent_union f ta1 tb) - else if unsigned_lt ma mb && match_prefix pa pb mb - then if mb land pa == 0 + else if branches_before pb mb pa ma + then if (mb :> int) land (pa :> int) == 0 then branch ~prefix:pb ~branching_bit:mb ~tree0:(idempotent_union f ta tb0) ~tree1:tb1 else branch ~prefix:pb ~branching_bit:mb ~tree0:tb0 ~tree1:(idempotent_union f ta tb1) - else join pa ta pb tb + else join (pa :> int) ta (pb :> int) tb type ('map1,'map2,'map3) polyinter = { f: 'a. 'a Key.t -> ('a,'map1) Value.t -> ('a,'map2) Value.t -> ('a,'map3) Value.t } [@@unboxed] let rec idempotent_inter f ta tb = @@ -1244,12 +478,12 @@ module MakeCustomHeterogeneousMap let tree0 = idempotent_inter f ta0 tb0 in let tree1 = idempotent_inter f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if unsigned_lt mb ma && match_prefix pb pa ma - then if ma land pb == 0 + else if branches_before pa ma pb mb + then if (ma :> int) land (pb :> int) == 0 then idempotent_inter f ta0 tb else idempotent_inter f ta1 tb - else if unsigned_lt ma mb && match_prefix pa pb mb - then if mb land pa == 0 + else if branches_before pb mb pa ma + then if (mb :> int) land (pa :> int) == 0 then idempotent_inter f ta tb0 else idempotent_inter f ta tb1 else empty @@ -1274,12 +508,12 @@ module MakeCustomHeterogeneousMap let tree0 = nonidempotent_inter_no_share f ta0 tb0 in let tree1 = nonidempotent_inter_no_share f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if unsigned_lt mb ma && match_prefix pb pa ma - then if ma land pb == 0 + else if branches_before pa ma pb mb + then if (ma :> int) land (pb :> int) == 0 then nonidempotent_inter_no_share f ta0 tb else nonidempotent_inter_no_share f ta1 tb - else if unsigned_lt ma mb && match_prefix pa pb mb - then if mb land pa == 0 + else if branches_before pb mb pa ma + then if (mb :> int) land (pa :> int) == 0 then nonidempotent_inter_no_share f ta tb0 else nonidempotent_inter_no_share f ta tb1 else empty @@ -1313,12 +547,12 @@ module MakeCustomHeterogeneousMap let tree0 = idempotent_inter_filter f ta0 tb0 in let tree1 = idempotent_inter_filter f ta1 tb1 in branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if unsigned_lt mb ma && match_prefix pb pa ma - then if ma land pb == 0 + else if branches_before pa ma pb mb + then if (ma :> int) land (pb :> int) == 0 then idempotent_inter_filter f ta0 tb else idempotent_inter_filter f ta1 tb - else if unsigned_lt ma mb && match_prefix pa pb mb - then if mb land pa == 0 + else if branches_before pb mb pa ma + then if (mb :> int) land (pa :> int) == 0 then idempotent_inter_filter f ta tb0 else idempotent_inter_filter f ta tb1 else empty @@ -1368,15 +602,15 @@ module MakeCustomHeterogeneousMap (* Same prefix: merge the subtrees *) then branch ~prefix:pa ~branching_bit:ma ~tree0:(slow_merge f ta0 tb0) ~tree1:(slow_merge f ta1 tb1) - else if unsigned_lt mb ma && match_prefix pb pa ma - then if ma land pb == 0 + else if branches_before pa ma pb mb + then if (ma :> int) land (pb :> int) == 0 then branch ~prefix:pa ~branching_bit:ma ~tree0:(slow_merge f ta0 tb) ~tree1:(upd_ta ta1) else branch ~prefix:pa ~branching_bit:ma ~tree0:(upd_ta ta0) ~tree1:(slow_merge f ta1 tb) - else if unsigned_lt ma mb && match_prefix pa pb mb - then if mb land pa == 0 + else if branches_before pb mb pa ma + then if (mb :> int) land (pa :> int) == 0 then branch ~prefix:pb ~branching_bit:mb ~tree0:(slow_merge f ta tb0) ~tree1:(upd_tb tb1) else branch ~prefix:pb ~branching_bit:mb ~tree0:(upd_tb tb0) ~tree1:(slow_merge f ta tb1) - else join pa (upd_ta ta) pb (upd_tb tb) + else join (pa :> int) (upd_ta ta) (pb :> int) (upd_tb tb) type 'map polyiter = { f: 'a. 'a Key.t -> ('a,'map) Value.t -> unit } [@@unboxed] let rec iter f x = match NODE.view x with @@ -1416,12 +650,12 @@ module MakeCustomHeterogeneousMap let acc = fold_on_nonequal_inter f ta0 tb0 acc in let acc = fold_on_nonequal_inter f ta1 tb1 acc in acc - else if unsigned_lt mb ma && match_prefix pb pa ma - then if ma land pb == 0 + else if branches_before pa ma pb mb + then if (ma :> int) land (pb :> int) == 0 then fold_on_nonequal_inter f ta0 tb acc else fold_on_nonequal_inter f ta1 tb acc - else if unsigned_lt ma mb && match_prefix pa pb mb - then if mb land pa == 0 + else if branches_before pb mb pa ma + then if (mb :> int) land (pa :> int) == 0 then fold_on_nonequal_inter f ta tb0 acc else fold_on_nonequal_inter f ta tb1 acc else acc @@ -1496,8 +730,8 @@ module MakeCustomHeterogeneousMap let acc = fold_on_nonequal_union f ta0 tb0 acc in let acc = fold_on_nonequal_union f ta1 tb1 acc in acc - else if unsigned_lt mb ma && match_prefix pb pa ma - then if ma land pb == 0 + else if branches_before pa ma pb mb + then if (ma :> int) land (pb :> int) == 0 then let acc = fold_on_nonequal_union f ta0 tb acc in let acc = fold fleft ta1 acc in @@ -1506,8 +740,8 @@ module MakeCustomHeterogeneousMap let acc = fold fleft ta0 acc in let acc = fold_on_nonequal_union f ta1 tb acc in acc - else if unsigned_lt ma mb && match_prefix pa pb mb - then if mb land pa == 0 + else if branches_before pb mb pa ma + then if (mb :> int) land (pa :> int) == 0 then let acc = fold_on_nonequal_union f ta tb0 acc in let acc = fold fright tb1 acc in @@ -1518,7 +752,7 @@ module MakeCustomHeterogeneousMap acc else (* Distinct subtrees: process them in increasing order of keys. *) - if unsigned_lt pa pb then + if unsigned_lt (pa :> int) (pb :> int) then let acc = fold fleft ta acc in let acc = fold fright tb acc in acc @@ -1563,12 +797,12 @@ module MakeCustomHeterogeneousMap if(ta0 == tree0 && ta1 == tree1) then ta else NODE.branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if unsigned_lt mb ma && match_prefix pb pa ma - then if ma land pb == 0 + else if branches_before pa ma pb mb + then if (ma :> int) land (pb :> int) == 0 then nonidempotent_inter f ta0 tb else nonidempotent_inter f ta1 tb - else if unsigned_lt ma mb && match_prefix pa pb mb - then if mb land pa == 0 + else if branches_before pb mb pa ma + then if (mb :> int) land (pa :> int) == 0 then nonidempotent_inter f ta tb0 else nonidempotent_inter f ta tb1 else NODE.empty @@ -1610,8 +844,8 @@ module MakeCustomHeterogeneousMap let tree1 = update_multiple_from_foreign tb1 f ta1 in if tree0 == ta0 && tree1 == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if unsigned_lt mb ma && match_prefix pb pa ma - then if ma land pb == 0 + else if branches_before pa ma pb mb + then if (ma :> int) land (pb :> int) == 0 then let ta0' = update_multiple_from_foreign tb f ta0 in if ta0' == ta0 then ta @@ -1620,8 +854,8 @@ module MakeCustomHeterogeneousMap let ta1' = update_multiple_from_foreign tb f ta1 in if ta1' == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0:ta0 ~tree1:ta1' - else if unsigned_lt ma mb && match_prefix pa pb mb - then if mb land pa == 0 + else if branches_before pb mb pa ma + then if (mb :> int) land (pa :> int) == 0 then let tree0 = update_multiple_from_foreign tb0 f ta in let tree1 = upd_tb tb1 in @@ -1630,7 +864,7 @@ module MakeCustomHeterogeneousMap let tree0 = upd_tb tb0 in let tree1 = update_multiple_from_foreign tb1 f ta in branch ~prefix:pb ~branching_bit:mb ~tree0 ~tree1 - else join pa ta pb (upd_tb tb) + else join (pa :> int) ta (pb :> int) (upd_tb tb) (* Map difference: (possibly) remove from ta elements that are in tb, the other are preserved, no element is added. *) @@ -1660,8 +894,8 @@ module MakeCustomHeterogeneousMap let tree1 = update_multiple_from_inter_with_foreign tb1 f ta1 in if tree0 == ta0 && tree1 == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0 ~tree1 - else if unsigned_lt mb ma && match_prefix pb pa ma - then if ma land pb == 0 + else if branches_before pa ma pb mb + then if (ma :> int) land (pb :> int) == 0 then let ta0' = update_multiple_from_inter_with_foreign tb f ta0 in if ta0' == ta0 then ta @@ -1670,8 +904,8 @@ module MakeCustomHeterogeneousMap let ta1' = update_multiple_from_inter_with_foreign tb f ta1 in if ta1' == ta1 then ta else branch ~prefix:pa ~branching_bit:ma ~tree0:ta0 ~tree1:ta1' - else if unsigned_lt ma mb && match_prefix pa pb mb - then if mb land pa == 0 + else if branches_before pb mb pa ma + then if (mb :> int) land (pa :> int) == 0 then update_multiple_from_inter_with_foreign tb0 f ta else update_multiple_from_inter_with_foreign tb1 f ta else ta @@ -1889,18 +1123,6 @@ module MakeCustomMap let to_list s = List.of_seq (to_seq s) end -module Value : VALUE with type 'a t = 'a = struct type 'a t = 'a end - -module HashedValue : HASHED_VALUE with type 'a t = 'a = struct - include Value - let hash x = Hashtbl.hash x - let polyeq: type a b. a -> b -> bool = fun a b -> a == Obj.magic b -end -module HeterogeneousHashedValue : HETEROGENEOUS_HASHED_VALUE with type ('k, 'm) t = 'm = struct - include HomogeneousValue - let hash x = Hashtbl.hash x - let polyeq: type a b. a -> b -> bool = fun a b -> a == Obj.magic b -end module MakeMap(Key: KEY) = struct module NKey = struct type 'a t = Key.t end diff --git a/src/functors.mli b/src/functors.mli new file mode 100644 index 0000000..98c3661 --- /dev/null +++ b/src/functors.mli @@ -0,0 +1,191 @@ +(**************************************************************************) +(* This file is part of the Codex semantics library *) +(* (patricia-tree sub-component). *) +(* *) +(* Copyright (C) 2024 *) +(* CEA (Commissariat à l'énergie atomique et aux énergies *) +(* alternatives) *) +(* *) +(* You can redistribute it and/or modify it under the terms of the GNU *) +(* Lesser General Public License as published by the Free Software *) +(* Foundation, version 2.1. *) +(* *) +(* It is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU Lesser General Public License for more details. *) +(* *) +(* See the GNU Lesser General Public License version 2.1 *) +(* for more details (enclosed in the file LICENSE). *) +(**************************************************************************) + +open Sigs + +(** This section presents the functors which can be used to build patricia tree + maps and sets. *) + +(** {1 Homogeneous maps and sets} *) +(** These are homogeneous maps and set, their keys/elements are a single + non-generic type, just like the standard library's [Map] and [Set] modules. *) + +module MakeMap(Key: KEY) : MAP with type key = Key.t +module MakeSet(Key: KEY) : SET with type elt = Key.t + +(** {1 Heterogeneous maps and sets} *) +(** Heterogeneous maps are ['map map], which store bindings of ['key key] + to [('key, 'map) value], where ['key key] is a GADT, as we must be able + to compare keys of different types together. + + Similarly, heterogeneous sets store sets of ['key key]. *) + +module MakeHeterogeneousSet(Key: HETEROGENEOUS_KEY) : HETEROGENEOUS_SET + with type 'a elt = 'a Key.t +module MakeHeterogeneousMap(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_VALUE) : HETEROGENEOUS_MAP + with type 'a key = 'a Key.t + and type ('k,'m) value = ('k,'m) Value.t + + +(** {1 Maps and sets with custom nodes} *) +(** We can also customize the representation and creation of nodes, to + gain space or time. + + Possibitities include having weak key and/or values, hash-consing, + giving unique number to nodes or keeping them in sync with the + disk, lazy evaluation and/or caching, adding size information for + constant time [cardinal] functions, etc. + + See {!node_impl} for the provided implementations of {!Sigs.NODE}, or create your own. *) + +(** Create a homogeneous map with a custom {!Sigs.NODE}. Also allows + customizing the map values *) +module MakeCustomMap + (Key: KEY) + (Value: VALUE) + (Node: NODE with type 'a key = Key.t and type ('key,'map) value = ('key, 'map Value.t) snd) + : MAP_WITH_VALUE + with type key = Key.t + and type 'm value = 'm Value.t + and type 'm t = 'm Node.t + + +(** Create a homogeneous set with a custom {!Sigs.NODE}. + @since v0.10.0 *) +module MakeCustomSet + (Key: KEY) + (Node: NODE with type 'a key = Key.t and type ('key,'map) value = unit) + : SET + with type elt = Key.t + and type 'a BaseMap.t = 'a Node.t + +(** Create an heterogeneous map with a custom {!Sigs.NODE}. *) +module MakeCustomHeterogeneousMap + (Key: HETEROGENEOUS_KEY) + (Value: HETEROGENEOUS_VALUE) + (Node: NODE with type 'a key = 'a Key.t and type ('key,'map) value = ('key,'map) Value.t) + : HETEROGENEOUS_MAP + with type 'a key = 'a Key.t + and type ('k,'m) value = ('k,'m) Value.t + and type 'm t = 'm Node.t + +(** Create an heterogeneous set with a custom {!Sigs.NODE}. + @since v0.10.0 *) +module MakeCustomHeterogeneousSet + (Key: HETEROGENEOUS_KEY) + (NODE: NODE with type 'a key = 'a Key.t and type ('key,'map) value = unit) + : HETEROGENEOUS_SET + with type 'a elt = 'a Key.t + and type 'a BaseMap.t = 'a NODE.t + +(** {1:hash_consed Hash-consed maps and sets} *) +(** Hash-consed maps and sets uniquely number each of their nodes. + Upon creation, they check whether a similar node has been created before, + if so they return it, else they return a new node with a new number. + With this unique numbering: + - [equal] and [compare] become constant time operations; + - two maps with the same bindings (where keys are compared by {!Sigs.KEY.to_int} and + values by {!Sigs.HASHED_VALUE.polyeq}) will always be physically equal; + - functions that benefit from sharing, like {!Sigs.BASE_MAP.idempotent_union} and + {!Sigs.BASE_MAP.idempotent_inter} will see improved performance; + - constructors are slightly slower, as they now require a hash-table lookup; + - memory usage is increased: nodes store their tags inside themselves, and + a global hash-table of all built nodes must be maintained. + This is quickly amortized if multiple identical nodes are built, + as only one will be kept in memory. + - hash-consed maps assume their keys and values are immutable, where regular + maps can mutate values freely; + - {b WARNING:} when using physical equality as {!Sigs.HASHED_VALUE.polyeq}, some + {b maps of different types may be given the same identifier}. See the end of + the documentation of {!Sigs.HASHED_VALUE.polyeq} for details. + Note that this is the case in the default implementations {!HashedValue} + and {!HeterogeneousHashedValue}. + + All hash-consing functors are {b generative}, since each functor call will + create a new hash-table to store the created nodes. Calling a functor + twice with same arguments will lead to two numbering systems for identifiers, + and thus the types should not be considered compatible. *) + +(** Hash-consed version of {!Sigs.MAP}. See {!hash_consed} for the differences between + hash-consed and non hash-consed maps. + + This is a generative functor, as calling it creates a new hash-table to store + the created nodes, and a reference to store the next unallocated identifier. + Maps/sets from different hash-consing functors (even if these functors have + the same arguments) will have different (incompatible) numbering systems and + be stored in different hash-tables (thus they will never be physically equal). + + @since v0.10.0 *) +module MakeHashconsedMap(Key: KEY)(Value: HASHED_VALUE)() : sig + include MAP_WITH_VALUE with type key = Key.t and type 'a value = 'a Value.t (** @closed *) + + include HASH_CONSED_OPERATIONS with type 'a t := 'a t (** @inline *) +end + +(** Hash-consed version of {!Sigs.SET}. See {!hash_consed} for the differences between + hash-consed and non hash-consed sets. + + This is a generative functor, as calling it creates a new hash-table to store + the created nodes, and a reference to store the next unallocated identifier. + Maps/sets from different hash-consing functors (even if these functors have + the same arguments) will have different (incompatible) numbering systems and + be stored in different hash-tables (thus they will never be physically equal). + + @since v0.10.0 *) +module MakeHashconsedSet(Key: KEY)() : sig + include SET with type elt = Key.t (** @closed *) + + include HASH_CONSED_OPERATIONS with type 'a t := t (** @inline *) +end + +(** Hash-consed version of {!Sigs.HETEROGENEOUS_SET}. See {!hash_consed} for the differences between + hash-consed and non hash-consed sets. + + This is a generative functor, as calling it creates a new hash-table to store + the created nodes, and a reference to store the next unallocated identifier. + Maps/sets from different hash-consing functors (even if these functors have + the same arguments) will have different (incompatible) numbering systems and + be stored in different hash-tables (thus they will never be physically equal). + + @since v0.10.0 *) +module MakeHashconsedHeterogeneousSet(Key: HETEROGENEOUS_KEY)() : sig + include HETEROGENEOUS_SET with type 'a elt = 'a Key.t (** @closed *) + + include HASH_CONSED_OPERATIONS with type 'a t := t (** @inline *) +end + +(** Hash-consed version of {!Sigs.HETEROGENEOUS_MAP}. See {!hash_consed} for the differences between + hash-consed and non hash-consed maps. + + This is a generative functor, as calling it creates a new hash-table to store + the created nodes, and a reference to store the next unallocated identifier. + Maps/sets from different hash-consing functors (even if these functors have + the same arguments) will have different (incompatible) numbering systems and + be stored in different hash-tables (thus they will never be physically equal). + + @since v0.10.0 *) +module MakeHashconsedHeterogeneousMap(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_HASHED_VALUE)() : sig + include HETEROGENEOUS_MAP + with type 'a key = 'a Key.t + and type ('k,'m) value = ('k, 'm) Value.t (** @closed *) + + include HASH_CONSED_OPERATIONS with type 'a t := 'a t (** @inline *) +end diff --git a/index.mld b/src/index.mld similarity index 80% rename from index.mld rename to src/index.mld index b3ec86c..1a71376 100644 --- a/index.mld +++ b/src/index.mld @@ -46,8 +46,8 @@ See the {{!examples}examples} to jump right into using this library. using the same function names when possible and the same convention for order of arguments. This should allow switching to and from Patricia Tree with minimal effort.} -{li The functor parameters ({{!PatriciaTree.KEY}[KEY]} module) requires an injective [to_int : t -> int] - function instead of a [compare] function. {{!PatriciaTree.KEY.to_int}[KEY.to_int]} should be fast, +{li The functor parameters ({{!PatriciaTree.Sigs.KEY}[Sigs.KEY]} module) requires an injective [to_int : t -> int] + function instead of a [compare] function. {{!PatriciaTree.Sigs.KEY.to_int}[Sigs.KEY.to_int]} should be fast, and injective. This works well with {{: https://en.wikipedia.org/wiki/Hash_consing}hash-consed} types.} {li The Patricia Tree representation is stable, contrary to maps, inserting nodes @@ -71,8 +71,8 @@ See the {{!examples}examples} to jump right into using this library. {{: https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf}{i QuickChecking Patricia Trees}} by Jan Mitgaard. - It also affects functions like {{!PatriciaTree.BASE_MAP.unsigned_min_binding}[unsigned_min_binding]} - and {{!PatriciaTree.BASE_MAP.pop_unsigned_minimum}[pop_unsigned_minimum]}. They will return the smallest + It also affects functions like {{!PatriciaTree.Sigs.BASE_MAP.unsigned_min_binding}[unsigned_min_binding]} + and {{!PatriciaTree.Sigs.BASE_MAP.pop_unsigned_minimum}[pop_unsigned_minimum]}. They will return the smallest positive integer of both positive and negative keys are present; and not the smallest negative, as one might expect.} {li Supports generic maps and sets: a ['m map] that maps ['k key] to [('k, 'm) value]. @@ -80,11 +80,11 @@ See the {{!examples}examples} to jump right into using this library. for the type of keys. This is also sometimes called a dependent map.} {li Allows easy and fast operations across different types of maps and set which have the same type of keys (e.g. an intersection between a map and a set).} -{li Multiple choices for internal representation ({{!PatriciaTree.NODE}[NODE]}), which allows for efficient +{li Multiple choices for internal representation ({{!PatriciaTree.Sigs.NODE}[Sigs.NODE]}), which allows for efficient storage (no need to store a value for sets), or using weak nodes only (values removed from the tree if no other pointer to it exists). This system can also be extended to store size information in nodes if needed.} -{li Exposes a common interface ({!type:PatriciaTree.NODE.view}) to allow users to write their own pattern - matching on the tree structure without depending on the {{!PatriciaTree.NODE}[NODE]} being used.} +{li Exposes a common interface ({!type:PatriciaTree.Sigs.NODE.view}) to allow users to write their own pattern + matching on the tree structure without depending on the {{!PatriciaTree.Sigs.NODE}[Sigs.NODE]} being used.} {li Additionally, hashconsed versions of heterogeneous/homogeneous maps/sets are available. These provide constant time equality and comparison, and ensure maps/set with the same constants are always physically equal. It comes at the cost @@ -101,17 +101,17 @@ The functors used to build maps and sets are the following: {li For homogeneous (non-generic) maps and sets: {{!PatriciaTree.MakeMap}[MakeMap]} and {{!PatriciaTree.MakeSet}[MakeSet]}. These are similar to the standard library's maps and sets. {@ocaml skip[ - module MakeMap(Key: KEY) : MAP with type key = Key.t - module MakeSet(Key: KEY) : SET with type elt = Key.t + module MakeMap(Key: Sigs.KEY) : Sigs.MAP with type key = Key.t + module MakeSet(Key: Sigs.KEY) : Sigs.SET with type elt = Key.t ]}} {li For Heterogeneous (generic) maps and sets: {{!PatriciaTree.MakeHeterogeneousMap}[MakeHeterogeneousMap]} and {{!PatriciaTree.MakeHeterogeneousSet}[MakeHeterogeneousSet]}. {@ocaml skip[ - module MakeHeterogeneousMap(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_VALUE) : + module MakeHeterogeneousMap(Key: Sigs.HETEROGENEOUS_KEY)(Value: Sigs.HETEROGENEOUS_VALUE) : HETEROGENEOUS_MAP with type 'a key = 'a Key.t and type ('k,'m) value = ('k,'m) Value.t - module MakeHeterogeneousSet(Key: HETEROGENEOUS_KEY) : HETEROGENEOUS_SET + module MakeHeterogeneousSet(Key: Sigs.HETEROGENEOUS_KEY) : Sigs.HETEROGENEOUS_SET with type 'a elt = 'a Key.t ]}} {li @@ -120,16 +120,16 @@ The functors used to build maps and sets are the following: {{!PatriciaTree.MakeHashconsedHeterogeneousMap}[MakeHashconsedHeterogeneousMap]} and {{!PatriciaTree.MakeHashconsedHeterogeneousSet}[MakeHashconsedHeterogeneousSet]}. These uniquely number their nodes, which means: - [equal] and [compare] become constant time operations; - - two maps with the same bindings (where keys are compared by {{!PatriciaTree.KEY.to_int}[KEY.to_int]} and - values by {{!PatriciaTree.HASHED_VALUE.polyeq}[HASHED_VALUE.polyeq]}) will always be physically equal; + - two maps with the same bindings (where keys are compared by {{!PatriciaTree.Sigs.KEY.to_int}[Sigs.KEY.to_int]} and + values by {{!PatriciaTree.Sigs.HASHED_VALUE.polyeq}[Sigs.HASHED_VALUE.polyeq]}) will always be physically equal; - functions that benefit from sharing will see improved performance; - constructors are slightly slower, as they now require a hash-table lookup; - memory usage is increased: nodes store their tags inside themselves, and a global hash-table of all built nodes must be maintained; - hash-consed maps assume their values are immutable; - - {b WARNING:} when using physical equality as {{!PatriciaTree.HASHED_VALUE.polyeq}[HASHED_VALUE.polyeq]}, some maps of different + - {b WARNING:} when using physical equality as {{!PatriciaTree.Sigs.HASHED_VALUE.polyeq}[Sigs.HASHED_VALUE.polyeq]}, some maps of different types may be given the same identifier. See the end of - the documentation of {{!PatriciaTree.HASHED_VALUE.polyeq}[HASHED_VALUE.polyeq]} for details. + the documentation of {{!PatriciaTree.Sigs.HASHED_VALUE.polyeq}[Sigs.HASHED_VALUE.polyeq]} for details. Note that this is the case in the default implementations {{!PatriciaTree.HashedValue}[HashedValue]} and {{!PatriciaTree.HeterogeneousHashedValue}[HeterogeneousHashedValue]}. @@ -141,9 +141,10 @@ The functors used to build maps and sets are the following: {2 Interfaces} -Here is a brief overview of the various module types of our library: +Here is a brief overview of the various module types of our library. They are placed +inside the {{!PatriciaTree.Sigs}[Sigs]} module: {ul -{li {{!PatriciaTree.BASE_MAP}[BASE_MAP]}: the underlying module type of all our trees (maps end sets). It +{li {{!PatriciaTree.Sigs.BASE_MAP}[BASE_MAP]}: the underlying module type of all our trees (maps end sets). It represents a ['b map] binding ['a key] to [('a,'b) value], as well as all functions needed to manipulate them. @@ -151,26 +152,26 @@ Here is a brief overview of the various module types of our library: unified representation, useful for cross map operations. However, for practical purposes, it is often best to use the more specific interfaces: {ul - {li {{!PatriciaTree.HETEROGENEOUS_MAP}[HETEROGENEOUS_MAP]} for heterogeneous maps (this is just {{!PatriciaTree.BASE_MAP}[BASE_MAP]} with a + {li {{!PatriciaTree.Sigs.HETEROGENEOUS_MAP}[HETEROGENEOUS_MAP]} for heterogeneous maps (this is just {{!PatriciaTree.Sigs.BASE_MAP}[BASE_MAP]} with a [WithForeign] functor).} - {li {{!PatriciaTree.MAP}[MAP]} for homogeneous maps, this interface is close to {{: https://ocaml.org/api/Map.S.html}[Stdlib.Map.S]}.} - {li {{!PatriciaTree.HETEROGENEOUS_SET}[HETEROGENEOUS_SET]} for heterogeneous sets (sets of ['a elt]). These are just + {li {{!PatriciaTree.Sigs.MAP}[MAP]} for homogeneous maps, this interface is close to {{: https://ocaml.org/api/Map.S.html}[Stdlib.Map.S]}.} + {li {{!PatriciaTree.Sigs.HETEROGENEOUS_SET}[HETEROGENEOUS_SET]} for heterogeneous sets (sets of ['a elt]). These are just maps to [unit], but with a custom node representation to avoid storing [unit] in nodes.} - {li {{!PatriciaTree.SET}[SET]} for homogeneous sets, this interface is close to {{: https://ocaml.org/api/Set.S.html}[Stdlib.Set.S]}.} + {li {{!PatriciaTree.Sigs.SET}[SET]} for homogeneous sets, this interface is close to {{: https://ocaml.org/api/Set.S.html}[Stdlib.Set.S]}.} }} -{li The parameter of our functor are either {{!PatriciaTree.KEY}[KEY]} or {{!PatriciaTree.HETEROGENEOUS_KEY}[HETEROGENEOUS_KEY]}. +{li The parameter of our functor are either {{!PatriciaTree.Sigs.KEY}[KEY]} or {{!PatriciaTree.Sigs.HETEROGENEOUS_KEY}[HETEROGENEOUS_KEY]}. These just consist of a type, a (polymorphic) equality function, and an injective [to_int] coercion. - The heterogeneous map functor also has a {{!PatriciaTree.HETEROGENEOUS_VALUE}[HETEROGENEOUS_VALUE]} parameter to specify the + The heterogeneous map functor also has a {{!PatriciaTree.Sigs.HETEROGENEOUS_VALUE}[HETEROGENEOUS_VALUE]} parameter to specify the [('a, 'b) value] type.} {li The internal representations of our tree can be customized to use different - internal {{!PatriciaTree.NODE}[NODE]}. Each node come with its own private constructors and destructors, - as well as a cast to a uniform {{!type:PatriciaTree.NODE.view}[NODE.view]} type used for pattern matching. + internal {{!PatriciaTree.Sigs.NODE}[NODE]}. Each node come with its own private constructors and destructors, + as well as a cast to a uniform {{!type:PatriciaTree.Sigs.NODE.view}[NODE.view]} type used for pattern matching. A number of implementations are provided: - - {{!PatriciaTree.SimpleNode}[SimpleNode]}: exactly the {{!type:PatriciaTree.NODE.view}[NODE.view]} type; + - {{!PatriciaTree.SimpleNode}[SimpleNode]}: exactly the {{!type:PatriciaTree.Sigs.NODE.view}[NODE.view]} type; - {{!PatriciaTree.WeakNode}[WeakNode]}: only store weak pointer to its elements; - {{!PatriciaTree.NodeWithId}[NodeWithId]}: node which contains a unique identifier; - {{!PatriciaTree.SetNode}[SetNode]}: optimized for sets, doesn't store the [unit] value; @@ -201,19 +202,26 @@ dune files: Here is a small example of a non-generic map: {ol -{li Start by creating a key module: +{li Start by creating a key module. We use [type int] for keys in this example, + but you can use any type, so long as it supports an efficient and injective + {{!PatriciaTree.Sigs.KEY.to_int}[to_int]} function. {@ocaml[ - module IntKey : PatriciaTree.KEY with type t = int = struct + module IntKey : PatriciaTree.Sigs.KEY with type t = int = struct type t = int let to_int x = x end ]}} {li Use it to instanciate the map/set functors: {[ - module IMap : PatriciaTree.MAP with type key = int = PatriciaTree.MakeMap(IntKey);; - module ISet : PatriciaTree.SET with type elt = int = PatriciaTree.MakeSet(IntKey);; + module IMap : PatriciaTree.Sigs.MAP with type key = int + = PatriciaTree.MakeMap(IntKey) + module ISet : PatriciaTree.Sigs.SET with type elt = int + = PatriciaTree.MakeSet(IntKey) ]}} -{li You can now use it as you would any other map: +{li You can now use it as you would any other map, most of the interface is + shared with the standard library's {{: https://ocaml.org/api/Map.S.html}[Map]} + and {{: https://ocaml.org/api/Set.S.html}[Set]} (some functions have + been renamed to highlight their differing requirements). {[ # let map = IMap.empty |> @@ -222,17 +230,18 @@ Here is a small example of a non-generic map: IMap.add 3 "how do you do?";; val map : string IMap.t = ]} - (We also have {{!PatriciaTree.MAP.of_list}[of_list]} and - {{!PatriciaTree.MAP.of_seq}[of_seq]} functions for quick initialization) + (We also have {{!PatriciaTree.Sigs.MAP.of_list}[of_list]} and + {{!PatriciaTree.Sigs.MAP.of_seq}[of_seq]} functions for quick initialization) {[ # IMap.find 1 map;; - : string = "hello" + # IMap.cardinal map;; - : int = 3 ]}} {li The strength of Patricia Tree is the speedup of operations on multiple maps with common subtrees. For example, in the following, the - {{!PatriciaTree.MAP.idempotent_inter_filter}[idempotent_inter_filter]} function + {{!PatriciaTree.Sigs.MAP.idempotent_inter_filter}[idempotent_inter_filter]} function will skip recursive calls to physically equal subtrees (kept as-is in the intersection). This allows faster than [O(n)] intersections. {[ @@ -241,6 +250,7 @@ Here is a small example of a non-generic map: (IMap.add 4 "something" map) (IMap.add 5 "something else" map);; val map2 : string IMap.t = + # map == map2;; - : bool = true ]} @@ -250,8 +260,10 @@ Here is a small example of a non-generic map: {[ # let str = IMap.find 1 map;; val str : string = "hello" + # IMap.add 1 str map == map (* already present *);; - : bool = true + # IMap.add 1 "hello" map == map (* new string copy isn't physically equal to the old one *);; - : bool = false @@ -262,6 +274,7 @@ Here is a small example of a non-generic map: {[ # let map3 = IMap.remove 2 map;; val map3 : string IMap.t = + # IMap.add 2 (IMap.find 2 map) map3 == map;; - : bool = false ]} @@ -269,7 +282,7 @@ Here is a small example of a non-generic map: cheap equality test between maps), use the provided {{!PatriciaTree.section-hash_consed}hash-consed maps and sets}.} {li Our library also allows cross map/set operations through the - {{!PatriciaTree.MAP.WithForeign}[WithForeign]} functors: + {{!PatriciaTree.Sigs.MAP.WithForeign}[WithForeign]} functors: {[ module CrossOperations = IMap.WithForeign(ISet.BaseMap) ]} @@ -278,11 +291,14 @@ Here is a small example of a non-generic map: {[ # let set = ISet.of_list [1; 3];; val set : ISet.t = + # let restricted_map = CrossOperations.nonidempotent_inter { f = fun _key value () -> value } map set;; val restricted_map : string IMap.t = + # IMap.to_list map;; - : (int * string) list = [(1, "hello"); (2, "world"); (3, "how do you do?")] + # IMap.to_list restricted_map;; - : (int * string) list = [(1, "hello"); (3, "how do you do?")] ]} @@ -302,10 +318,10 @@ liberty of having a generic type as a key. | G_Addition : int expr * int expr -> int expr | G_Equal : 'a expr * 'a expr -> bool expr ]} - We can create our {{!PatriciaTree.HETEROGENEOUS_KEY}[HETEROGENEOUS_KEY]} functor + We can create our {{!PatriciaTree.Sigs.HETEROGENEOUS_KEY}[Sigs.HETEROGENEOUS_KEY]} functor parameter using this type as follows: {[ - module Expr : PatriciaTree.HETEROGENEOUS_KEY with type 'a t = 'a expr = struct + module Expr : PatriciaTree.Sigs.HETEROGENEOUS_KEY with type 'a t = 'a expr = struct type 'a t = 'a expr (** Injective, so long as expressions are small enough @@ -317,8 +333,8 @@ liberty of having a generic type as a key. | G_Addition(l,r) -> 2 + 4*(to_int l mod 10000 + 10000*(to_int r)) | G_Equal(l,r) -> 3 + 4*(to_int l mod 10000 + 10000*(to_int r)) - (** Full polymorphic equality *) - let rec polyeq : type a b. a expr -> b expr -> (a, b) PatriciaTree.cmp = + (** Full polymorphic equality, requires annotation to type properly *) + let rec polyeq : type a b. a expr -> b expr -> (a, b) PatriciaTree.Sigs.cmp = fun l r -> match l, r with | G_Const_Int l, G_Const_Int r -> if l = r then Eq else Diff | G_Const_Bool l, G_Const_Bool r -> if l = r then Eq else Diff @@ -328,7 +344,8 @@ liberty of having a generic type as a key. | Diff -> Diff) | G_Equal(ll, lr), G_Equal(rl, rr) -> ( match polyeq ll rl with - | Eq -> (match polyeq lr rr with Eq -> Eq | Diff -> Diff) (* Match required by typechecker *) + | Eq -> (* this match is no-op, but it is required to typecheck *) + (match polyeq lr rr with Eq -> Eq | Diff -> Diff) | Diff -> Diff) | _ -> Diff end @@ -338,7 +355,9 @@ liberty of having a generic type as a key. on the key type (first parameter) and the map type (second parameter). Here the value only depends on the type of the key, not that of the map {[ - module EMap = PatriciaTree.MakeHeterogeneousMap(Expr)(struct type ('a, _) t = 'a end) + module EMap = PatriciaTree.MakeHeterogeneousMap + (Expr) + (struct type ('key, 'map) t = 'key end) ]}} {li You can now use this as you would any other dependent map: {[ @@ -349,10 +368,13 @@ liberty of having a generic type as a key. EMap.add (G_Addition (G_Const_Int 3, G_Const_Int 6)) 9 |> EMap.add (G_Equal (G_Const_Bool false, G_Equal (G_Const_Int 5, G_Const_Int 7))) true val map : unit EMap.t = + # EMap.find (G_Const_Bool false) map;; - : bool = false + # EMap.find (G_Const_Int 5) map;; - : int = 5 + # EMap.cardinal map;; - : int = 4 ]}} @@ -382,24 +404,24 @@ liberty of having a generic type as a key. {1 Release status} This should be close to a stable release. It is already being -used as part of a larger project successfully, and this usage as helped us mature +used as part of a {{: https://codex.top}larger project} successfully, and this usage as helped us mature the interface. As is, we believe the project is usable, and we don't anticipate any major change before 1.0.0. We didn't commit to a stable release straight away as we would like a bit more time using this library before doing so. - {1 Known issues} There is a bug in the OCaml typechecker which prevents us from directly defining non-generic maps as instances of generic maps. To avoid this, non-generic maps -use a separate value type {{!PatriciaTree.snd}[('a, 'b) snd]} (instead of just using ['b]) +use a separate value type {{!PatriciaTree.Sigs.snd}[('a, 'b) snd]} (instead of just using ['b]) {[ type (_, 'b) snd = Snd of 'b [@@unboxed] ]} It should not incur any extra performance cost as it is unboxed, but can appear when manipulating non-generic maps. -For more details about this issue, see {{: https://discuss.ocaml.org/t/weird-behaviors-with-first-order-polymorphism/13783}the OCaml discourse discussion}. +For more details about this issue, see {{: https://discuss.ocaml.org/t/weird-behaviors-with-first-order-polymorphism/13783}the OCaml discourse discussion} +or {{: https://github.com/ocaml/ocaml/issues/13292}the github issue}. {1 Comparison to other OCaml libraries} diff --git a/int_builtins.c b/src/int_builtins.c similarity index 100% rename from int_builtins.c rename to src/int_builtins.c diff --git a/src/ints.ml b/src/ints.ml new file mode 100644 index 0000000..f1dffca --- /dev/null +++ b/src/ints.ml @@ -0,0 +1,45 @@ +(**************************************************************************) +(* This file is part of the Codex semantics library *) +(* (patricia-tree sub-component). *) +(* *) +(* Copyright (C) 2024 *) +(* CEA (Commissariat à l'énergie atomique et aux énergies *) +(* alternatives) *) +(* *) +(* You can redistribute it and/or modify it under the terms of the GNU *) +(* Lesser General Public License as published by the Free Software *) +(* Foundation, version 2.1. *) +(* *) +(* It is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU Lesser General Public License for more details. *) +(* *) +(* See the GNU Lesser General Public License version 2.1 *) +(* for more details (enclosed in the file LICENSE). *) +(**************************************************************************) + +(** The integer associated with a key *) +type intkey = int + +(** A mask is an integer with a single bit set (i.e. a power of 2). *) +type mask = int + +(** Fast highest bit computation in c, using GCC's __builtin_clz + which compile to efficient instruction (bsr) when possible. *) +external highest_bit: int -> (int[@untagged]) = + "caml_int_builtin_highest_bit_byte" "caml_int_builtin_highest_bit" [@@noalloc] + +let unsigned_lt x y = x - min_int < y - min_int + (* if x >= 0 && y >= 0 + then x < y + else if x >= 0 + then (* pos < neg *) true + else if y >= 0 then false + else x < y *) + +(** Note: in the original version, okasaki give the masks as arguments + to optimize the computation of highest_bit. *) +let branching_bit a b = highest_bit (a lxor b) + +let mask i m = i land (lnot (2*m-1)) diff --git a/src/ints.mli b/src/ints.mli new file mode 100644 index 0000000..3e81aab --- /dev/null +++ b/src/ints.mli @@ -0,0 +1,76 @@ +(**************************************************************************) +(* This file is part of the Codex semantics library *) +(* (patricia-tree sub-component). *) +(* *) +(* Copyright (C) 2024 *) +(* CEA (Commissariat à l'énergie atomique et aux énergies *) +(* alternatives) *) +(* *) +(* You can redistribute it and/or modify it under the terms of the GNU *) +(* Lesser General Public License as published by the Free Software *) +(* Foundation, version 2.1. *) +(* *) +(* It is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU Lesser General Public License for more details. *) +(* *) +(* See the GNU Lesser General Public License version 2.1 *) +(* for more details (enclosed in the file LICENSE). *) +(**************************************************************************) + +(** Small utilities used to manipulate the integer masks and branching bits *) + +type intkey = private int +(** Private type used to represent prefix stored in nodes. + These are integers with all bits after branching bit (included) set to zero *) + +type mask = private int +(** Private type: integers with a single bit set. *) + +val unsigned_lt : int -> int -> bool +(** All integers comparisons in this library are done according to their + {b unsigned representation}. This is the same as signed comparison for same + sign integers, but all negative integers are greater than the positives. + This means [-1] is the greatest possible number, and [0] is the smallest. + {[ + # unsigned_lt 2 (-1);; + - : bool = true + # unsigned_lt max_int min_int;; + - : bool = true + # unsigned_lt 3 2;; + - : bool = false + # unsigned_lt 2 3;; + - : bool = true + # unsigned_lt (-2) (-3);; + - : bool = false + # unsigned_lt (-4) (-3);; + - : bool = true + # unsigned_lt 0 0;; + - : bool = false + ]} + + Using this unsigned order helps avoid a bug described in + {{: https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf}{i QuickChecking Patricia Trees}} + by Jan Mitgaard. + + @since 0.10.0 *) + +(**/**) +(** For internal use and testing *) + +val branching_bit : int -> int -> mask +(** Returns the {!mask} corresponding to the highest bit that differs between + both arguments. *) + +val mask : int -> mask -> intkey +(** Only keeps the bits above mask set *) + +external highest_bit: int -> (int[@untagged]) = + "caml_int_builtin_highest_bit_byte" "caml_int_builtin_highest_bit" [@@noalloc] +(** [highest_bit x] is an integer with a single bit set: the highest set bit of [x]. + exported for test purposes only. + + @since 0.10.0 *) + +(**/**) diff --git a/src/key_value.ml b/src/key_value.ml new file mode 100644 index 0000000..a522121 --- /dev/null +++ b/src/key_value.ml @@ -0,0 +1,74 @@ +(**************************************************************************) +(* This file is part of the Codex semantics library *) +(* (patricia-tree sub-component). *) +(* *) +(* Copyright (C) 2024 *) +(* CEA (Commissariat à l'énergie atomique et aux énergies *) +(* alternatives) *) +(* *) +(* You can redistribute it and/or modify it under the terms of the GNU *) +(* Lesser General Public License as published by the Free Software *) +(* Foundation, version 2.1. *) +(* *) +(* It is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU Lesser General Public License for more details. *) +(* *) +(* See the GNU Lesser General Public License version 2.1 *) +(* for more details (enclosed in the file LICENSE). *) +(**************************************************************************) + +open Sigs + +(** {1 Keys and values} *) + +module HomogeneousValue = struct + type ('a,'map) t = 'map +end + +module WrappedHomogeneousValue = struct + type ('a, 'map) t = ('a, 'map) snd +end + +module HeterogeneousKeyFromKey(Key: KEY): HETEROGENEOUS_KEY with type 'a t = Key.t = +struct + type _ t = Key.t + + (** The type-safe way to do it would be to define this type, to + guarantee that 'a is always bound to the same type, and Eq is + safe. But this requires a lot of conversion code, and identity + functions that may not be well detected. [polyeq] is unsafe in + that it allows arbitrary conversion of t1 by t2 in t1 t, but + this unsafety is not exported, and I don't think we can do + something wrong using it. *) + (* type 'a t = K: Key.t -> unit t [@@unboxed] *) + let polyeq: type a b. a t -> b t -> (a,b) cmp = + fun a b -> match a,b with + | a, b when (Key.to_int a) == (Key.to_int b) -> Obj.magic Eq + | _ -> Diff + let to_int = Key.to_int +end + + +module Value : VALUE with type 'a t = 'a = struct type 'a t = 'a end + +module HashedValue : HASHED_VALUE with type 'a t = 'a = struct + include Value + let hash x = Hashtbl.hash x + let polyeq: type a b. a -> b -> bool = fun a b -> a == Obj.magic b +end +module HeterogeneousHashedValue : HETEROGENEOUS_HASHED_VALUE with type ('k, 'm) t = 'm = +struct + include HomogeneousValue + let hash x = Hashtbl.hash x + let polyeq: type a b. a -> b -> bool = fun a b -> a == Obj.magic b +end + +module HeterogeneousHashedValueFromHashedValue(Value: HASHED_VALUE) + : HETEROGENEOUS_HASHED_VALUE with type ('a, 'map) t = ('a, 'map Value.t) snd = +struct + type ('a, 'map) t = ('a, 'map Value.t) snd + let hash (Snd x) = Value.hash x + let polyeq (Snd a) (Snd b) = Value.polyeq a b +end diff --git a/src/key_value.mli b/src/key_value.mli new file mode 100644 index 0000000..4280c36 --- /dev/null +++ b/src/key_value.mli @@ -0,0 +1,67 @@ +(**************************************************************************) +(* This file is part of the Codex semantics library *) +(* (patricia-tree sub-component). *) +(* *) +(* Copyright (C) 2024 *) +(* CEA (Commissariat à l'énergie atomique et aux énergies *) +(* alternatives) *) +(* *) +(* You can redistribute it and/or modify it under the terms of the GNU *) +(* Lesser General Public License as published by the Free Software *) +(* Foundation, version 2.1. *) +(* *) +(* It is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU Lesser General Public License for more details. *) +(* *) +(* See the GNU Lesser General Public License version 2.1 *) +(* for more details (enclosed in the file LICENSE). *) +(**************************************************************************) + +open Sigs + +module Value : VALUE with type 'a t = 'a +(** Default implementation of {!Sigs.VALUE}, used in {!MakeMap}. + @since 0.10.0 *) + +module HomogeneousValue : HETEROGENEOUS_VALUE with type ('a,'map) t = 'map +(** Default implementation of {!Sigs.HETEROGENEOUS_VALUE}, to use when the type of the + value in a heterogeneous map does not depend on the type of the key, only on + the type of the map. *) + +module WrappedHomogeneousValue : HETEROGENEOUS_VALUE with type ('a,'map) t = ('a,'map) snd +(** Same as {!HomogeneousValue}, but uses a wrapper (unboxed) type instead of direct + equality. This avoids a problem in the typechecker with overly eager simplification of aliases. + More info on + {{: https://discuss.ocaml.org/t/weird-behaviors-with-first-order-polymorphism/13783} the OCaml discourse post} + and {{: https://github.com/ocaml/ocaml/issues/13292}the github issue}. *) + +module HashedValue : HASHED_VALUE with type 'a t = 'a +(** Generic implementation of {!Sigs.HASHED_VALUE}. + Uses {{: https://ocaml.org/api/Hashtbl.html#VALhash}[Hashtbl.hash]} for hashing + and physical equality for equality. + Note that this may lead to maps of different types having the same identifier + ({!MakeHashconsedMap.to_int}), see the documentation of {!Sigs.HASHED_VALUE.polyeq} + for details on this. *) + +module HeterogeneousHashedValue : HETEROGENEOUS_HASHED_VALUE with type ('k, 'm) t = 'm +(** Generic implementation of {!Sigs.HETEROGENEOUS_HASHED_VALUE}. + Uses {{: https://ocaml.org/api/Hashtbl.html#VALhash}[Hashtbl.hash]} for hashing + and physical equality for equality. + Note that this may lead to maps of different types having the same identifier + ({!MakeHashconsedHeterogeneousMap.to_int}), see the documentation of + {!Sigs.HASHED_VALUE.polyeq} for details on this. *) + + +(**/**) +(** For local library use only *) + +module HeterogeneousKeyFromKey(Key: KEY): HETEROGENEOUS_KEY with type 'a t = Key.t +(** Create a {!Sigs.HETEROGENEOUS_KEY} from a non-polymorphic {!Sigs.KEY} *) + +module HeterogeneousHashedValueFromHashedValue(Value: HASHED_VALUE) + : HETEROGENEOUS_HASHED_VALUE with type ('a, 'map) t = ('a, 'map Value.t) snd +(** Create a {!Sigs.HETEROGENEOUS_HASHED_VALUE} from a {!Sigs.HASHED_VALUE} *) + +(**/**) diff --git a/src/nodes.ml b/src/nodes.ml new file mode 100644 index 0000000..caec96e --- /dev/null +++ b/src/nodes.ml @@ -0,0 +1,361 @@ +(**************************************************************************) +(* This file is part of the Codex semantics library *) +(* (patricia-tree sub-component). *) +(* *) +(* Copyright (C) 2024 *) +(* CEA (Commissariat à l'énergie atomique et aux énergies *) +(* alternatives) *) +(* *) +(* You can redistribute it and/or modify it under the terms of the GNU *) +(* Lesser General Public License as published by the Free Software *) +(* Foundation, version 2.1. *) +(* *) +(* It is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU Lesser General Public License for more details. *) +(* *) +(* See the GNU Lesser General Public License version 2.1 *) +(* for more details (enclosed in the file LICENSE). *) +(**************************************************************************) + +open Ints +open Sigs + +let sdbm x y = y + (x lsl 16) + (x lsl 6) - x +(** Combine two numbers into a new hash *) + +(** Simple node, with no hash consing. *) +module [@inline] SimpleNode(Key:sig type 'a t end)(Value:HETEROGENEOUS_VALUE) = struct + type 'a key = 'a Key.t + type ('key,'map) value = ('key,'map) Value.t + + type 'map view = + | Empty: 'map view + | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view + | Leaf: {key:'key key; value:('key,'map) value} -> 'map view + and 'map t = 'map view + let view x = x + + let empty = Empty + let is_empty x = x == Empty + let leaf key value = Leaf {key;value} + let branch ~prefix ~branching_bit ~tree0 ~tree1 = + match tree0,tree1 with + | Empty, x -> x + | x, Empty -> x + | _ -> Branch{prefix;branching_bit;tree0;tree1} +end + +module WeakNode(Key:sig type 'a t end)(Value:HETEROGENEOUS_VALUE)(* :NODE *) = struct + type 'a key = 'a Key.t + type ('key,'map) value = ('key,'map) Value.t + + type 'map view = + | Empty: 'map view + | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view + | Leaf: {key:'key key; value:('key,'map) value} -> 'map view + and 'a t = + | TEmpty: 'map t + | TBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map t + (* Additional hidden case: leaf, which is an Ephemeron.K1, whose + tag is 251, so it can be discriminated against the other + cases. This avoids an indirection. *) + + let empty = TEmpty + let is_empty x = x == TEmpty + let leaf key value = Obj.magic (Ephemeron.K1.make key value) + let branch ~prefix ~branching_bit ~tree0 ~tree1 = + match tree0,tree1 with + | TEmpty, x -> x + | x, TEmpty -> x + | _ -> TBranch{prefix;branching_bit;tree0;tree1} + + let view (type k) (type map) (t:map t) = + let obj = Obj.repr t in + if Obj.is_block obj && Obj.tag obj != 0 then + (* Ephemeron.K1.get_(key|value) are no longer available in 5.0, + so we do that instead. *) + let ephe:Obj.Ephemeron.t = Obj.magic obj in + let key:k key option = Obj.magic @@ Obj.Ephemeron.get_key ephe 0 in + let data:(k,map) Value.t option = Obj.magic @@ Obj.Ephemeron.get_data ephe in + match key,data with + | Some key, Some value -> Leaf{key;value} + | _ -> Empty + else match t with + | TEmpty -> Empty + | TBranch{prefix;branching_bit;tree0;tree1} -> Branch{prefix;branching_bit;tree0;tree1} + +end + + +(** Add a unique id to nodes, e.g. so that they can be used as keys in maps or sets. *) +module NodeWithId(Key:sig type 'a t end)(Value:HETEROGENEOUS_VALUE):NODE_WITH_ID + with type 'key key = 'key Key.t + and type ('key,'map) value = ('key,'map) Value.t += struct + + type 'a key = 'a Key.t + type ('key,'map) value = ('key,'map) Value.t + + type 'map view = + | Empty: 'map view + | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view + | Leaf: {key:'key key; value:('key,'map) value} -> 'map view + and 'map t = + | NEmpty: 'map t + | NBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t;id:int} -> 'map t + | NLeaf: {key:'key key;value:('key,'map) value;id:int} -> 'map t + + let view = function + | NEmpty -> Empty + | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} + | NLeaf{key;value;_} -> Leaf{key;value} + + let to_int = function + | NEmpty -> 0 + | NBranch{id;_} -> id + | NLeaf{id;_} -> id + + let count = ref 0 + + let empty = NEmpty + let is_empty x = x == NEmpty + let leaf key value = incr count; NLeaf {key;value;id=(!count)} + let branch ~prefix ~branching_bit ~tree0 ~tree1 = + match tree0,tree1 with + | NEmpty, x -> x + | x, NEmpty -> x + | _ -> incr count; NBranch{prefix;branching_bit;tree0;tree1;id=(!count)} +end + + +(** NODE for sets, i.e. when there is no associated values. *) +module SetNode(Key:sig type 'a t end):NODE + with type 'key key = 'key Key.t + and type ('key,'map) value = unit += struct + + type 'a key = 'a Key.t + type ('key,'map) value = unit + + type 'map view = + | Empty: 'map view + | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view + | Leaf: {key:'key key; value:('key,'map) value} -> 'map view + and 'map t = + | NEmpty: 'map t + | NBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map t + | NLeaf: {key:'key key} -> 'map t + + + let view = function + | NEmpty -> Empty + | NBranch{prefix;branching_bit;tree0;tree1} -> Branch{prefix;branching_bit;tree0;tree1} + | NLeaf{key} -> Leaf{key;value=()} + + let empty = NEmpty + let is_empty x = x == NEmpty + let leaf key _value = NLeaf {key} + let branch ~prefix ~branching_bit ~tree0 ~tree1 = + match tree0,tree1 with + | NEmpty, x -> x + | x, NEmpty -> x + | _ -> NBranch{prefix;branching_bit;tree0;tree1} + +end + +module WeakSetNode(Key:sig type 'a t end)(* :NODE *) = struct + type 'a key = 'a Key.t + type ('key,'map) value = unit + + type 'map view = + | Empty: 'map view + | Branch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map view + | Leaf: {key:'key key; value:('key,'map) value} -> 'map view + and 'a t = + | TEmpty: 'map t + | TBranch: {prefix:intkey;branching_bit:mask;tree0:'map t;tree1:'map t} -> 'map t + (* Additional hidden case: leaf, which is a Weak array, whose tag + is 251, so it can be discriminated against the other + cases. This avoids an indirection. *) + + let empty = TEmpty + let is_empty x = x == TEmpty + let leaf key () = Obj.magic (let a = Weak.create 1 in Weak.set a 0 (Some key)) + let branch ~prefix ~branching_bit ~tree0 ~tree1 = + match tree0,tree1 with + | TEmpty, x -> x + | x, TEmpty -> x + | _ -> TBranch{prefix;branching_bit;tree0;tree1} + + let view t = + let obj = Obj.repr t in + if Obj.is_block obj && Obj.tag obj != 0 then + let weak = Obj.magic obj in + let key = Weak.get weak 0 in + match key with + | Some key -> Leaf{key;value=()} + | _ -> Empty + else match t with (* Identity in memory. *) + | TEmpty -> Empty + | TBranch{prefix;branching_bit;tree0;tree1} -> Branch{prefix;branching_bit;tree0;tree1} +end + +module HashconsedNode(Key:HETEROGENEOUS_KEY)(Value:HETEROGENEOUS_HASHED_VALUE)() +(* : HASH_CONSED_NODE + with type 'key key = 'key Key.t + and type ('key, 'map) value = ('key, 'map) Value.t *) += struct + + type 'a key = 'a Key.t + type ('key, 'map) value = ('key, 'map) Value.t + + type 'map view = + | Empty: 'map view + | Branch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t } -> 'map view + | Leaf: { key:'key key; value:('key,'map) value } -> 'map view + and 'map t = + | NEmpty: 'map t + | NBranch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t; id:int } -> 'map t + | NLeaf: { key:'key key; value:('key, 'map) Value.t; id:int } -> 'map t + + let view = function + | NEmpty -> Empty + | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} + | NLeaf{key;value;_} -> Leaf{key;value} + + let to_int = function + | NEmpty -> 0 + | NBranch{ id; _ } -> id + | NLeaf{ id; _ } -> id + + let count = ref 1 (** Start at 1 as we increment in post *) + + type any_map = AnyMap : 'a t -> any_map [@@unboxed] + + module HashArg = struct + type t = any_map + let equal (AnyMap a) (AnyMap b) = match a, b with + | NEmpty, NEmpty -> true + | NLeaf{key=key1;value=value1;_}, NLeaf{key=key2;value=value2;_} -> + begin match Key.polyeq key1 key2 with + | Eq -> Value.polyeq value1 value2 + | Diff -> false + end + | NBranch{prefix=prefixa;branching_bit=branching_bita;tree0=tree0a;tree1=tree1a;_}, + NBranch{prefix=prefixb;branching_bit=branching_bitb;tree0=tree0b;tree1=tree1b;_} -> + prefixa == prefixb && branching_bita == branching_bitb && + to_int tree0a = to_int tree0b && to_int tree1a = to_int tree1b + | _ -> false + + let hash (AnyMap x) = match x with + | NEmpty -> 0 + | NLeaf{key; value; _} -> + let hash = sdbm (Key.to_int key) (Value.hash value) in + (hash lsl 1) lor 1 + (* All leaf hashes are odd *) + | NBranch{prefix; branching_bit; tree0; tree1; _} -> (* All branch hashes are even *) + (sdbm ((prefix :> int) lor (branching_bit :> int)) @@ sdbm (to_int tree0) (to_int tree1)) lsl 1 + end + + module WeakHash = Weak.Make(HashArg) + + let weakh = WeakHash.create 120 + + let empty = NEmpty + let is_empty x = x == NEmpty + + let try_find (tentative : 'a t) = + let AnyMap x = WeakHash.merge weakh (AnyMap tentative) in + let x : 'a t = Obj.magic x in + if x == tentative then incr count; + x + + let leaf key value = try_find (NLeaf{key;value;id= !count}) + + let branch ~prefix ~branching_bit ~tree0 ~tree1 = + match tree0,tree1 with + | NEmpty, x -> x + | x, NEmpty -> x + | _ -> try_find (NBranch{prefix;branching_bit;tree0;tree1;id=(!count)}) + + let equal x y = x == y + let compare x y = Int.compare (to_int x) (to_int y) +end + +module HashconsedSetNode(Key:HETEROGENEOUS_KEY)(): HASH_CONSED_NODE + with type 'key key = 'key Key.t + and type ('key,'map) value = unit += struct + + type 'a key = 'a Key.t + type ('key,'map) value = unit + + type map = + | NEmpty: map + | NBranch: { prefix:intkey; branching_bit:mask; tree0:map; tree1:map; id:int } -> map + | NLeaf: { key:'key key; id:int } -> map + type 'map view = + | Empty: 'map view + | Branch: { prefix:intkey; branching_bit:mask; tree0:'map t; tree1:'map t } -> 'map view + | Leaf: { key:'key key; value:unit } -> 'map view + and _ t = map + + let view = function + | NEmpty -> Empty + | NBranch{prefix;branching_bit;tree0;tree1;_} -> Branch{prefix;branching_bit;tree0;tree1} + | NLeaf{ key; _ } -> Leaf{ key; value=() } + + let to_int = function + | NEmpty -> 0 + | NBranch{ id; _ } -> id + | NLeaf{ id; _ } -> id + + let count = ref 1 (** Start at 1 as we increment in post *) + + module HashArg = struct + type t = map + let equal a b = match a, b with + | NEmpty, NEmpty -> true + | NLeaf{key=key1;_}, NLeaf{key=key2;_} -> + begin match Key.polyeq key1 key2 with + | Eq -> true + | Diff -> false + end + | NBranch{prefix=prefixa;branching_bit=branching_bita;tree0=tree0a;tree1=tree1a;_}, + NBranch{prefix=prefixb;branching_bit=branching_bitb;tree0=tree0b;tree1=tree1b;_} -> + prefixa == prefixb && branching_bita == branching_bitb && + tree0a == tree0b && tree1a == tree1b + | _ -> false + + let hash a = match a with + | NEmpty -> 0 + | NLeaf{key; _} -> ((Key.to_int key) lsl 1) lor 1 (* All leaf hashes are odd *) + | NBranch{prefix; branching_bit; tree0; tree1; _} -> (* All branch hashes are even *) + (sdbm ((prefix :> int) lor (branching_bit :> int)) @@ sdbm (to_int tree0) (to_int tree1)) lsl 1 + end + + module WeakHash = Weak.Make(HashArg) + + let weakh = WeakHash.create 120 + + let empty = NEmpty + let is_empty x = x == NEmpty + + let try_find tentative = + let x = WeakHash.merge weakh tentative in + if x == tentative then incr count; + x + + let leaf key () = try_find (NLeaf{ key; id = !count }) + + let branch ~prefix ~branching_bit ~tree0 ~tree1 = + match tree0,tree1 with + | NEmpty, x -> x + | x, NEmpty -> x + | _ -> try_find (NBranch{prefix;branching_bit;tree0;tree1;id=(!count)}) + + let equal x y = x == y + let compare x y = Int.compare (to_int x) (to_int y) +end diff --git a/src/nodes.mli b/src/nodes.mli new file mode 100644 index 0000000..65c5f36 --- /dev/null +++ b/src/nodes.mli @@ -0,0 +1,91 @@ +(**************************************************************************) +(* This file is part of the Codex semantics library *) +(* (patricia-tree sub-component). *) +(* *) +(* Copyright (C) 2024 *) +(* CEA (Commissariat à l'énergie atomique et aux énergies *) +(* alternatives) *) +(* *) +(* You can redistribute it and/or modify it under the terms of the GNU *) +(* Lesser General Public License as published by the Free Software *) +(* Foundation, version 2.1. *) +(* *) +(* It is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU Lesser General Public License for more details. *) +(* *) +(* See the GNU Lesser General Public License version 2.1 *) +(* for more details (enclosed in the file LICENSE). *) +(**************************************************************************) + +open Sigs + +(** {1 Basic nodes} *) + +(** This module is such that ['map t = 'map view]. + This is the node used in {!MakeHeterogeneousMap} and {!MakeMap}. *) + module SimpleNode(Key: sig type 'k t end)(Value: HETEROGENEOUS_VALUE) : NODE + with type 'a key = 'a Key.t + and type ('key,'map) value = ('key,'map) Value.t + +(** Here, nodes also contain a unique id, e.g. so that they can be + used as keys of maps or hash-tables. *) +module NodeWithId(Key: sig type 'k t end)(Value: HETEROGENEOUS_VALUE) : NODE_WITH_ID + with type 'a key = 'a Key.t + and type ('key,'map) value = ('key,'map) Value.t + + +(* Maybe: we can make variations around NodeWithId; e.g. a version + that does HashConsing, or a version that replicates the node to a + key-value store on disk, etc. *) + +(** An optimized representation for sets, i.e. maps to unit: we do not + store a reference to unit (note that you can further optimize when + you know the representation of the key). + This is the node used in {!MakeHeterogeneousSet} and {!MakeSet}. *) +module SetNode(Key: sig type 'k t end) : NODE + with type 'a key = 'a Key.t + and type ('key,'map) value = unit + +(** {1 Weak nodes} *) + +(** NODE used to implement weak key hashes (the key-binding pair is an + Ephemeron, the reference to the key is weak, and if the key is + garbage collected, the binding disappears from the map *) +module WeakNode(Key: sig type 'k t end)(Value: HETEROGENEOUS_VALUE) : NODE + with type 'a key = 'a Key.t + and type ('key,'map) value = ('key,'map) Value.t + +(** Both a {!WeakNode} and a {!SetNode}, useful to implement Weak sets. *) +module WeakSetNode(Key: sig type 'k t end) : NODE + with type 'a key = 'a Key.t + and type ('key,'map) value = unit + + +(** {1 Hashconsed nodes} *) + +(** Gives a unique number to each node like {!NodeWithId}, + but also performs hash-consing. So two maps with the same bindings will + always be physically equal. See {!hash_consed} for more details on this. + + This is a generative functor, as calling it creates a new hash-table to store + the created nodes, and a reference to store the next unallocated identifier. + Maps/sets from different hash-consing functors (even if these functors have + the same arguments) will have different (incompatible) numbering systems and + be stored in different hash-tables (thus they will never be physically equal). + + Using a single {!HashconsedNode} in multiple {!MakeCustomMap} functors will result in + all those maps being hash-consed together (stored in the same hash-table, + same numbering system). + + @since v0.10.0 *) +module HashconsedNode(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_HASHED_VALUE)() : HASH_CONSED_NODE + with type 'a key = 'a Key.t + and type ('key,'map) value = ('key, 'map) Value.t + +(** Both a {!HashconsedNode} and a {!SetNode}. + @since v0.10.0 *) +module HashconsedSetNode(Key: HETEROGENEOUS_KEY)() : HASH_CONSED_NODE + with type 'a key = 'a Key.t + and type ('key,'map) value = unit diff --git a/patriciaTree.mli b/src/sigs.ml similarity index 66% rename from patriciaTree.mli rename to src/sigs.ml index 8d41267..74aa18a 100644 --- a/patriciaTree.mli +++ b/src/sigs.ml @@ -19,139 +19,14 @@ (* for more details (enclosed in the file LICENSE). *) (**************************************************************************) -(** Association maps from key to values, and sets, implemented with - Patricia Trees, allowing fast merge operations by making use of - physical equality between subtrees; and custom implementation of - tree nodes (allowing normal maps, hash-consed maps, weak key or - value maps, sets, custom maps, etc.) - - This is similar to OCaml's Map, except that: - - {ul - {- The required signature for keys is different, in that we require - each key to be mapped to a unique integer identifier.} - - {- The implementation uses Patricia Tree, as described in Okasaki - and Gill's 1998 paper - {{: https://www.semanticscholar.org/paper/Fast-Mergeable-Integer-Maps-Okasaki-Gill/23003be706e5f586f23dd7fa5b2a410cc91b659d}{i Fast mergeable integer maps}}, - i.e. it is a space-efficient prefix trie over the big-endian representation of - the key's integer identifier. - - Example of a 5-bit patricia tree containing five numbers: 0 [0b0000], 1 [0b0001], - 5 [0b0101] and 7 [0b0111] and -8 [0b1111]: - {v - Branch - (prefix=0b?___) - / \ - Branch Leaf(-8) - (prefix=0b0?__) 0b1111 - / \ - Branch Branch - (prefix=0b000?) (prefix=0b01?_) - | | | | - Leaf(0) Leaf(1) Leaf(5) Leaf(7) - 0b0000 0b0001 0b0101 0b0111 - v} - - The main benefit of Patricia Tree is that their representation - is stable (contrary to maps, inserting nodes in any order will - return the same shape), which allows different versions of a map - to share more subtrees in memory, and the operations over two - maps to benefit from this sharing. The functions in this library - attempt to maximally preserve sharing and benefit from sharing, - allowing very important improvements in complexity and running - time when combining maps or sets is a frequent operation.} - - {- Finally, the implementation is more customizable, allowing - notably (key,value) pairs or different types to be in the same map, - or to choose the memory representation of the nodes of the tree.} - - {- Some operations like {{!BASE_MAP.pop_unsigned_minimum}[pop_unsigned_minimum]} and - {{!BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]} make our Set - suitable as priority queue (but remember that each element in the - queue must map to a distinct integer, and that using the {{!unsigned_lt}unsigned order} - means elements with negative priority are seen as greater than elements with - positive ones).} - } *) - -(** Note on complexity: in the following, n represents the size of the - map when there is one (and [|map1|] is the number of elements in - [map1]). The term log(n) correspond to the maximum height of the - tree, which is log(n) if we assume an even distribution of numbers - in the map (e.g. random distribution, or integers chosen - contiguously using a counter). The worst-case height is - O(min(n,64)) which is actually constant, but not really - informative; log(n) corresponds to the real complexity in usual - distributions. *) - -(**/**) -(** mdx requires opening PatriciaTree, but we don't want that to appear in the doc. - also contains some quick placeholder code: -{[ - open PatriciaTree - - type foo - - module IntKey = struct - type 'a t = int - let to_int x = x - let polyeq : type a b. a t -> b t -> (a, b) cmp = fun a b -> - if a == Obj.magic b then Obj.magic Eq else Diff - end - module MyValue = Int - module MyMap = MakeHeterogeneousMap(IntKey)(struct type ('a,'b) t = int end) -]} -*) -(**/**) - -val unsigned_lt : int -> int -> bool -(** All integers comparisons in this library are done according to their - {b unsigned representation}. This is the same as signed comparison for same - sign integers, but all negative integers are greater than the positives. - This means [-1] is the greatest possible number, and [0] is the smallest. - {[ - # unsigned_lt 2 (-1);; - - : bool = true - # unsigned_lt max_int min_int;; - - : bool = true - # unsigned_lt 3 2;; - - : bool = false - # unsigned_lt 2 3;; - - : bool = true - # unsigned_lt (-2) (-3);; - - : bool = false - # unsigned_lt (-4) (-3);; - - : bool = true - # unsigned_lt 0 0;; - - : bool = false - ]} - - Using this unsigned order helps avoid a bug described in - {{: https://www.cs.tufts.edu/comp/150FP/archive/jan-midtgaard/qc-patricia.pdf}{i QuickChecking Patricia Trees}} - by Jan Mitgaard. +(** All signatures used in this library *) - @since 0.10.0 *) - - -type intkey = private int -(** Private type used to represent prefix stored in nodes. - These are integers with all bits after branching bit (included) set to zero *) - -type mask = private int -(** Private type: integers with a single bit set. *) - -(**/**) - -external highest_bit: int -> (int[@untagged]) = - "caml_int_builtin_highest_bit_byte" "caml_int_builtin_highest_bit" [@@noalloc] -(** [highest_bit x] is an integer with a single bit set: the highest set bit of [x]. - exported for test purposes only. - - @since 0.10.0 *) - -(**/**) +open Ints (** {1 Nodes} *) +(** Nodes are the underlying representation used to build a patricia-tree. + The module type specifies the constructors they must provide, and a common + interface used for pattern-matching. *) (** This module explains how a node is stored in memory, with functions to create and view nodes. *) @@ -178,7 +53,7 @@ module type NODE = sig (** The empty map *) val leaf : 'key key -> ('key, 'map) value -> 'map t - (** A singleton leaf, similar to {!BASE_MAP.singleton} *) + (** A singleton leaf, similar to {!Sigs.BASE_MAP.singleton} *) val branch : prefix:intkey -> @@ -239,7 +114,7 @@ module type NODE_WITH_ID = sig end (** Hash-consed nodes also associate a unique number to each node, - Unlike {!NODE_WITH_ID}, they also check before instanciating the node whether + Unlike {!Sigs.NODE_WITH_ID}, they also check before instanciating the node whether a similar node already exists. This results in slightly slower constructors (they perform an extra hash-table lookup), but allows for constant time equality and comparison. @@ -252,24 +127,24 @@ module type HASH_CONSED_NODE = sig val to_int : 'a t -> int (** Returns a unique number for each map, the {{!hash_consed}hash-consed} identifier of the map. - Unlike {!NODE_WITH_ID.to_int}, hash-consing ensures that maps - which contain the same keys (compared by {!KEY.to_int}) and values (compared - by {!HASHED_VALUE.polyeq}) will always be physically equal + Unlike {!Sigs.NODE_WITH_ID.to_int}, hash-consing ensures that maps + which contain the same keys (compared by {!Sigs.KEY.to_int}) and values (compared + by {!Sigs.HASHED_VALUE.polyeq}) will always be physically equal and have the same identifier. Maps with the same identifier are also physically equal: [to_int m1 = to_int m2] implies [m1 == m2]. - Note that when using physical equality as {!HASHED_VALUE.polyeq}, some + Note that when using physical equality as {!Sigs.HASHED_VALUE.polyeq}, some maps of different types [a t] and [b t] may be given the same identifier. - See the end of the documentation of {!HASHED_VALUE.polyeq} for details. *) + See the end of the documentation of {!Sigs.HASHED_VALUE.polyeq} for details. *) val equal : 'a t -> 'a t -> bool (** Constant time equality using the {{!hash_consed}hash-consed} nodes identifiers. This is equivalent to physical equality. Two nodes are equal if their trees contain the same bindings, - where keys are compared by {!KEY.to_int} and values are compared by - {!HASHED_VALUE.polyeq}. *) + where keys are compared by {!Sigs.KEY.to_int} and values are compared by + {!Sigs.HASHED_VALUE.polyeq}. *) val compare : 'a t -> 'a t -> int (** Constant time comparison using the {{!hash_consed}hash-consed} node identifiers. @@ -289,30 +164,30 @@ end of ['a key] to [('a,'b) values]. All maps and set are a variation of this type, sometimes with a simplified interface. - - {!HETEROGENEOUS_MAP} is just a {!BASE_MAP} with a functor {!HETEROGENEOUS_MAP.WithForeign} + - {!Sigs.HETEROGENEOUS_MAP} is just a {!Sigs.BASE_MAP} with a functor {!Sigs.HETEROGENEOUS_MAP.WithForeign} for building operations that operate on two maps of different base types; - - {!MAP} specializes the interface for non-generic keys ([key] instead of ['a key]); - - {!HETEROGENEOUS_SET} specializes {!BASE_MAP} for sets ([('a,'b) value = unit]) and + - {!Sigs.MAP} specializes the interface for non-generic keys ([key] instead of ['a key]); + - {!Sigs.HETEROGENEOUS_SET} specializes {!Sigs.BASE_MAP} for sets ([('a,'b) value = unit]) and removes the value argument from most operations; - - {!SET} specializes {!HETEROGENEOUS_SET} further by making elements (keys) + - {!Sigs.SET} specializes {!Sigs.HETEROGENEOUS_SET} further by making elements (keys) non-generic ([elt] instead of ['a elt]). *) module type BASE_MAP = sig - include NODE + include NODE (** @closed *) (** Existential wrapper for the ['a] parameter in a ['a key], [('a,'map) value] pair *) type 'map key_value_pair = KeyValue : 'a key * ('a, 'map) value -> 'map key_value_pair - (** {3 Basic functions} *) + (** {1 Basic functions} *) val unsigned_min_binding : 'a t -> 'a key_value_pair (** [unsigned_min_binding m] is minimal binding [KeyValue(k,v)] of the map, - using the {{!unsigned_lt}unsigned order} on {!KEY.to_int}. + using the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}. @raises Not_found if the map is empty *) val unsigned_max_binding : 'a t -> 'a key_value_pair (** [unsigned_max_binding m] is maximal binding [KeyValue(k,v)] of the map, - using the {{!unsigned_lt}unsigned order} on {!KEY.to_int}. + using the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}. @raises Not_found if the map is empty *) val singleton : 'a key -> ('a, 'b) value -> 'b t @@ -342,13 +217,13 @@ module type BASE_MAP = sig val pop_unsigned_minimum: 'map t -> ('map key_value_pair * 'map t) option (** [pop_unsigned_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where [(key,value) = unsigned_min_binding m] and [m' = remove m key]. - Uses the {{!unsigned_lt}unsigned order} on {!KEY.to_int}. + Uses the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}. O(log(n)) complexity. *) val pop_unsigned_maximum: 'map t -> ('map key_value_pair * 'map t) option (** [pop_unsigned_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where [(key,value) = unsigned_max_binding m] and [m' = remove m key]. - Uses the {{!unsigned_lt}unsigned order} on {!KEY.to_int}. + Uses the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}. O(log(n)) complexity. *) val insert: 'a key -> (('a,'map) value option -> ('a,'map) value) -> 'map t -> 'map t @@ -372,7 +247,7 @@ module type BASE_MAP = sig whether the old value existed). O(log(n)) complexity. Preserves physical equality if the new value is physically equal to the old. *) - (** {3 Iterators} *) + (** {1 Iterators} *) val split : 'key key -> 'map t -> 'map t * ('key, 'map) value option * 'map t (** [split key map] splits the map into: @@ -380,18 +255,18 @@ module type BASE_MAP = sig - value associated to [key] (if present) - submap of [map] whose keys are bigger than [key] - Where the order is given by the {{!unsigned_lt}unsigned order} on {!KEY.to_int}. *) + Where the order is given by the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}. *) type 'map polyiter = { f : 'a. 'a key -> ('a, 'map) value -> unit; } [@@unboxed] val iter : 'map polyiter -> 'map t -> unit (** [iter f m] calls [f.f] on all bindings of [m], - in the {{!unsigned_lt}unsigned order} on {!KEY.to_int} *) + in the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int} *) type ('acc,'map) polyfold = { f: 'a. 'a key -> ('a,'map) value -> 'acc -> 'acc } [@@unboxed] val fold : ('acc,'map) polyfold -> 'map t -> 'acc -> 'acc (** [fold f m acc] returns [f.f key_n value_n (... (f.f key_1 value_1 acc))] where [(key_1, value_1) ... (key_n, value_n)] are the bindings of [m], in - the {{!unsigned_lt}unsigned order} on {!KEY.to_int}. *) + the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}. *) type ('acc,'map) polyfold2 = { f: 'a. 'a key -> ('a,'map) value -> ('a,'map) value -> 'acc -> 'acc } [@@unboxed] val fold_on_nonequal_inter : ('acc,'map) polyfold2 -> 'map t -> 'map t -> 'acc -> 'acc @@ -399,7 +274,7 @@ module type BASE_MAP = sig [f.f key_n value1_n value2n (... (f.f key_1 value1_1 value2_1 acc))] where [(key_1, value1_1, value2_1) ... (key_n, value1_n, value2_n)] are the bindings that exist in both maps ([m1 ∩ m2]) whose values are physically different. - Calls to [f.f] are performed in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + Calls to [f.f] are performed in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) type ('acc,'map) polyfold2_union = { f: 'a. 'a key -> ('a,'map) value option -> ('a,'map) value option -> 'acc -> 'acc } [@@unboxed] @@ -409,13 +284,13 @@ module type BASE_MAP = sig [(key_1, value1_1, value2_1) ... (key_n, value1_n, value2_n)] are the bindings that exists in either map ([m1 ∪ m2]) whose values are physically different. - Calls to [f.f] are performed in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + Calls to [f.f] are performed in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) type 'map polypredicate = { f: 'a. 'a key -> ('a,'map) value -> bool; } [@@unboxed] val filter : 'map polypredicate -> 'map t -> 'map t (** [filter f m] returns the submap of [m] containing the bindings [k->v] such that [f.f k v = true]. - [f.f] is called in the {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + [f.f] is called in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) val for_all : 'map polypredicate -> 'map t -> bool (** [for_all f m] checks that [f] holds on all bindings of [m]. @@ -431,14 +306,14 @@ module type BASE_MAP = sig val map : ('map,'map) polymap -> 'map t -> 'map t val map_no_share : ('map1,'map2) polymap -> 'map1 t -> 'map2 t (** [map f m] and [map_no_share f m] replace all bindings [(k,v)] by [(k, f.f v)]. - Bindings are examined in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + Bindings are examined in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) type ('map1,'map2) polymapi = { f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value; } [@@unboxed] val mapi : ('map,'map) polymapi -> 'map t -> 'map t val mapi_no_share : ('map1,'map2) polymapi -> 'map1 t -> 'map2 t (** [mapi f m] and [mapi_no_share f m] replace all bindings [(k,v)] by [(k, f.f k v)]. - Bindings are examined in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + Bindings are examined in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) type ('map1,'map2) polyfilter_map = { f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value option; } [@@unboxed] @@ -447,7 +322,7 @@ module type BASE_MAP = sig (** [filter_map m f] and [filter_map_no_share m f] remove the bindings [(k,v)] for which [f.f k v] is [None], and replaces the bindings [(k,v)] for which [f.f k v] is [Some v'] by [(k,v')]. - Bindings are examined in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + Bindings are examined in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) type 'map polypretty = { f: 'a. Format.formatter -> 'a key -> ('a, 'map) value -> unit } [@@unboxed] val pretty : @@ -456,9 +331,9 @@ module type BASE_MAP = sig (** Pretty-prints a map using the given formatter. [pp_sep] is called once between each binding, it defaults to {{: https://v2.ocaml.org/api/Format.html#VALpp_print_cut}[Format.pp_print_cut]}. - Bindings are printed in the {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + Bindings are printed in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) - (** {3 Functions on pairs of maps} *) + (** {1 Functions on pairs of maps} *) type ('map1,'map2) polysame_domain_for_all2 = { f : 'a. 'a key -> ('a, 'map1) value -> ('a, 'map2) value -> bool; } [@@unboxed] @@ -470,7 +345,7 @@ module type BASE_MAP = sig - for all bindings [(k, v1)] in [m1] and [(k, v2)] in [m2], [f.f k v1 v2] holds {b Assumes} [f.f] is reflexive, i.e. [f.f k v v = true] to skip calls to equal subtrees. - Calls [f.f] in ascending {{!unsigned_lt}unsigned order} of {!KEY.to_int}. + Calls [f.f] in ascending {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. Exits early if the domains mismatch or if [f.f] returns false. It is useful to implement equality on maps: @@ -486,7 +361,7 @@ module type BASE_MAP = sig ('map1,'map2) polysame_domain_for_all2 -> 'map1 t -> 'map2 t -> bool (** [nonreflexive_same_domain_for_all2 f m1 m2] is the same as {!reflexive_same_domain_for_all2}, but doesn't assume [f.f] is reflexive. - It thus calls [f.f] on every binding, in ascending {{!unsigned_lt}unsigned order} of {!KEY.to_int}. + It thus calls [f.f] on every binding, in ascending {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. Exits early if the domains mismatch or if [f.f] returns false. *) val reflexive_subset_domain_for_all2 : @@ -496,7 +371,7 @@ module type BASE_MAP = sig - for all bindings [(k, v1)] in [m1] and [(k, v2)] in [m2], [f.f k v1 v2] holds {b Assumes} [f.f] is reflexive, i.e. [f.f k v v = true] to skip calls to equal subtrees. - Calls [f.f] in ascending {{!unsigned_lt}unsigned order} of {!KEY.to_int}. + Calls [f.f] in ascending {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. Exits early if the domains mismatch. *) type ('map1, 'map2, 'map3) polyunion = { @@ -507,7 +382,7 @@ module type BASE_MAP = sig the values of keys mapped in both maps. {b Assumes} [f.f] idempotent (i.e. [f key value value == value]) - [f.f] is called in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. + [f.f] is called in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. [f.f] is never called on physically equal values. Preserves physical equality as much as possible. Complexity is O(log(n)*Delta) where Delta is the number of @@ -522,7 +397,7 @@ module type BASE_MAP = sig the values a key is mapped in both maps. {b Assumes} [f.f] idempotent (i.e. [f key value value == value]) - [f.f] is called in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. + [f.f] is called in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. [f.f] is never called on physically equal values. Preserves physical equality as much as possible. Complexity is O(log(n)*Delta) where Delta is the number of @@ -549,13 +424,13 @@ module type BASE_MAP = sig val disjoint : 'a t -> 'a t -> bool (** [disjoint m1 m2] is [true] iff [m1] and [m2] have disjoint domains *) - (** {3 Conversion functions} *) + (** {1 Conversion functions} *) val to_seq : 'a t -> 'a key_value_pair Seq.t - (** [to_seq m] iterates the whole map, in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + (** [to_seq m] iterates the whole map, in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) val to_rev_seq : 'a t -> 'a key_value_pair Seq.t - (** [to_rev_seq m] iterates the whole map, in decreasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + (** [to_rev_seq m] iterates the whole map, in decreasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) val add_seq : 'a key_value_pair Seq.t -> 'a t -> 'a t (** [add_seq s m] adds all bindings of the sequence [s] to [m] in order. *) @@ -569,17 +444,17 @@ module type BASE_MAP = sig If a key is bound multiple times in [l], the latest binding is kept *) val to_list : 'a t -> 'a key_value_pair list - (** [to_list m] returns the bindings of [m] as a list, in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + (** [to_list m] returns the bindings of [m] as a list, in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) end (** {2 Heterogeneous maps and sets} *) (** Maps and sets with generic keys ['a key] and values [('a,'b) value] *) module type HETEROGENEOUS_MAP = sig - (** This is the same as {!MAP}, but with simple type [key] being replaced by type + (** This is the same as {!Sigs.MAP}, but with simple type [key] being replaced by type constructor ['a key] and ['b value] being replaced by [('a,'b) value]. - The main changes from {!MAP} are: + The main changes from {!Sigs.MAP} are: - The type of {!key} is replaced by a type constructor ['k key]. Because of that, most higher-order arguments require higher-ranking polymorphism, and we provide records that allows to @@ -593,17 +468,17 @@ module type HETEROGENEOUS_MAP = sig include BASE_MAP (** @closed *) (** Operation with maps/set of different types. - [Map2] must use the same {!KEY.to_int} function. *) + [Map2] must use the same {!Sigs.KEY.to_int} function. *) module WithForeign(Map2:BASE_MAP with type 'a key = 'a key):sig type ('map1,'map2) polyinter_foreign = { f: 'a. 'a key -> ('a,'map1) value -> ('a,'map2) Map2.value -> ('a,'map1) value } [@@unboxed] val nonidempotent_inter : ('a,'b) polyinter_foreign -> 'a t -> 'b Map2.t -> 'a t - (** Like {!BASE_MAP.idempotent_inter}. Tries to preserve physical equality on the first argument when possible. *) + (** Like {!Sigs.BASE_MAP.idempotent_inter}. Tries to preserve physical equality on the first argument when possible. *) type ('map2,'map1) polyfilter_map_foreign = { f : 'a. 'a key -> ('a, 'map2) Map2.value -> ('a, 'map1) value option; } [@@unboxed] val filter_map_no_share : ('map2,'map1) polyfilter_map_foreign -> 'map2 Map2.t -> 'map1 t - (** Like {!BASE_MAP.filter_map_no_share}, but allows to transform a foreigh map into the current one. *) + (** Like {!Sigs.BASE_MAP.filter_map_no_share}, but allows to transform a foreigh map into the current one. *) type ('map1,'map2) polyupdate_multiple = { f: 'a. 'a key -> ('a,'map1) value option -> ('a,'map2) Map2.value -> ('a,'map1) value option } [@@unboxed] val update_multiple_from_foreign : 'b Map2.t -> ('a,'b) polyupdate_multiple -> 'a t -> 'a t @@ -614,7 +489,7 @@ module type HETEROGENEOUS_MAP = sig i.e. [update_multiple_from_foreign m_from f m_to] calls [f.f] on every key of [m_from], says if the corresponding value also exists in [m_to], and adds or remove the element in [m_to] depending on the value of [f.f]. - [f.f] is called in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. + [f.f] is called in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. O(size(m_from) + size(m_to)) complexity. *) type ('map1,'map2) polyupdate_multiple_inter = { f: 'a. 'a key -> ('a,'map1) value -> ('a,'map2) Map2.value -> ('a,'map1) value option } [@@unboxed] @@ -628,9 +503,9 @@ end module type HETEROGENEOUS_SET = sig (** A set containing different keys, very similar to - {!SET}, but with simple type [elt] being replaced by type + {!Sigs.SET}, but with simple type [elt] being replaced by type constructor ['a elt]. *) - (** The main changes from {!SET} are: + (** The main changes from {!Sigs.SET} are: - The type of {!elt} is replaced by a type constructor ['k elt]. Because of that, most higher-order arguments require higher-ranking polymorphism, and we provide records that allows to @@ -733,22 +608,22 @@ module type HETEROGENEOUS_SET = sig type polyiter = { f: 'a. 'a elt -> unit; } [@@unboxed] val iter: polyiter -> t -> unit - (** [iter f set] calls [f.f] on all elements of [set], in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + (** [iter f set] calls [f.f] on all elements of [set], in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) type polypredicate = { f: 'a. 'a elt -> bool; } [@@unboxed] val filter: polypredicate -> t -> t (** [filter f set] is the subset of [set] that only contains the elements that - satisfy [f.f]. [f.f] is called in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + satisfy [f.f]. [f.f] is called in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val for_all: polypredicate -> t -> bool (** [for_all f set] is [true] if [f.f] is [true] on all elements of [set]. - Short-circuits on first [false]. [f.f] is called in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + Short-circuits on first [false]. [f.f] is called in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) type 'acc polyfold = { f: 'a. 'a elt -> 'acc -> 'acc } [@@unboxed] val fold: 'acc polyfold -> t -> 'acc -> 'acc (** [fold f set acc] returns [f.f elt_n (... (f.f elt_1 acc) ...)], where [elt_1, ..., elt_n] are the elements of [set], in increasing {{!unsigned_lt}unsigned order} of - {!KEY.to_int} *) + {!Sigs.KEY.to_int} *) type polypretty = { f: 'a. Format.formatter -> 'a elt -> unit; } [@@unboxed] val pretty : @@ -759,10 +634,10 @@ module type HETEROGENEOUS_SET = sig (** {3 Conversion functions} *) val to_seq : t -> any_elt Seq.t - (** [to_seq st] iterates the whole set, in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + (** [to_seq st] iterates the whole set, in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) val to_rev_seq : t -> any_elt Seq.t - (** [to_rev_seq st] iterates the whole set, in decreasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + (** [to_rev_seq st] iterates the whole set, in decreasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) val add_seq : any_elt Seq.t -> t -> t (** [add_seq s st] adds all elements of the sequence [s] to [st] in order. *) @@ -774,7 +649,7 @@ module type HETEROGENEOUS_SET = sig (** [of_list l] creates a new set from the elements of [l]. *) val to_list : t -> any_elt list - (** [to_list s] returns the elements of [s] as a list, in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + (** [to_list s] returns the elements of [s] as a list, in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) end @@ -794,7 +669,7 @@ module type SET = sig (** Underlying basemap, for cross map/set operations *) module BaseMap : HETEROGENEOUS_MAP with type _ key = elt - and type (_,_) value = unit + and type (_,_) value = unit type t = unit BaseMap.t (** The set type *) @@ -829,46 +704,46 @@ module type SET = sig Returns a value physically equal to [set] if [elt] is not present. *) val unsigned_min_elt: t -> elt - (** The minimal element (according to the {{!unsigned_lt}unsigned order} on {!KEY.to_int}) if non empty. + (** The minimal element (according to the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}) if non empty. @raises Not_found *) val unsigned_max_elt: t -> elt - (** The maximal element (according to the {{!unsigned_lt}unsigned order} on {!KEY.to_int}) if non empty. + (** The maximal element (according to the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}) if non empty. @raises Not_found *) val pop_unsigned_minimum: t -> (elt * t) option (** [pop_unsigned_minimum s] is [Some (elt, s')] where [elt = unsigned_min_elt s] and [s' = remove elt s] if [s] is non empty. - Uses the {{!unsigned_lt}unsigned order} on {!KEY.to_int}. *) + Uses the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}. *) val pop_unsigned_maximum: t -> (elt * t) option (** [pop_unsigned_maximum s] is [Some (elt, s')] where [elt = unsigned_max_elt s] and [s' = remove elt s] if [s] is non empty. - Uses the {{!unsigned_lt}unsigned order} on {!KEY.to_int}. *) + Uses the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}. *) (** {3 Iterators} *) val iter: (elt -> unit) -> t -> unit - (** [iter f set] calls [f] on all elements of [set], in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + (** [iter f set] calls [f] on all elements of [set], in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val filter: (elt -> bool) -> t -> t (** [filter f set] is the subset of [set] that only contains the elements that - satisfy [f]. [f] is called in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + satisfy [f]. [f] is called in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val for_all: (elt -> bool) -> t -> bool (** [for_all f set] is [true] if [f] is [true] on all elements of [set]. - Short-circuits on first [false]. [f] is called in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + Short-circuits on first [false]. [f] is called in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val fold: (elt -> 'acc -> 'acc) -> t -> 'acc -> 'acc (** [fold f set acc] returns [f elt_n (... (f elt_1 acc) ...)], where [elt_1, ..., elt_n] are the elements of [set], in increasing {{!unsigned_lt}unsigned order} of - {!KEY.to_int} *) + {!Sigs.KEY.to_int} *) val split: elt -> t -> t * bool * t (** [split elt set] returns [s_lt, present, s_gt] where [s_lt] contains all elements of [set] smaller than [elt], [s_gt] all those greater than [elt], and [present] is [true] if [elt] is in [set]. - Uses the {{!unsigned_lt}unsigned order} on {!KEY.to_int}.*) + Uses the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}.*) val pretty : ?pp_sep:(Format.formatter -> unit -> unit) -> @@ -898,10 +773,10 @@ module type SET = sig (** {3 Conversion functions} *) val to_seq : t -> elt Seq.t - (** [to_seq st] iterates the whole set, in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + (** [to_seq st] iterates the whole set, in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) val to_rev_seq : t -> elt Seq.t - (** [to_rev_seq st] iterates the whole set, in decreasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + (** [to_rev_seq st] iterates the whole set, in decreasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) val add_seq : elt Seq.t -> t -> t (** [add_seq s st] adds all elements of the sequence [s] to [st] in order. *) @@ -913,7 +788,7 @@ module type SET = sig (** [of_list l] creates a new set from the elements of [l]. *) val to_list : t -> elt list - (** [to_list s] returns the elements of [s] as a list, in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + (** [to_list s] returns the elements of [s] as a list, in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) end (** The typechecker struggles with forall quantification on values if they @@ -922,13 +797,14 @@ end Since the type is unboxed, it doesn't introduce any performance overhead. This is due to a bug in the typechecker, more info on - {{: https://discuss.ocaml.org/t/weird-behaviors-with-first-order-polymorphism/13783} the OCaml discourse post}. *) + {{: https://discuss.ocaml.org/t/weird-behaviors-with-first-order-polymorphism/13783} the OCaml discourse post} + and {{: https://github.com/ocaml/ocaml/issues/13292}the github issue}. *) type (_, 'b) snd = Snd of 'b [@@unboxed] (** The signature for maps with a single type for keys and values, a ['a map] binds [key] to ['a value]. - This is slightly more generic than {!MAP}, which just binds to ['a]. + This is slightly more generic than {!Sigs.MAP}, which just binds to ['a]. It is used for maps that need to restrict their value type, namely {!hash_consed}. *) module type MAP_WITH_VALUE = sig type key @@ -939,7 +815,7 @@ module type MAP_WITH_VALUE = sig type 'a value (** Type for values, this is a divergence from Stdlib's [Map], - but becomes equivalent to it when using {!MAP}, + but becomes equivalent to it when using {!Sigs.MAP}, which is just [MAP_WITH_VALUE with type 'a value = 'a]. On the other hand, it allows defining maps with fixed values, which is useful for hash-consing. @@ -996,12 +872,12 @@ module type MAP_WITH_VALUE = sig val pop_unsigned_minimum : 'a t -> (key * 'a value * 'a t) option (** [pop_unsigned_minimum m] returns [None] if [is_empty m], or [Some(key,value,m')] where [(key,value) = unsigned_min_binding m] and [m' = remove m key]. O(log(n)) complexity. - Uses the {{!unsigned_lt}unsigned order} on {!KEY.to_int}. *) + Uses the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}. *) val pop_unsigned_maximum : 'a t -> (key * 'a value * 'a t) option (** [pop_unsigned_maximum m] returns [None] if [is_empty m], or [Some(key,value,m')] where [(key,value) = unsigned_max_binding m] and [m' = remove m key]. O(log(n)) complexity. - Uses the {{!unsigned_lt}unsigned order} on {!KEY.to_int}. *) + Uses the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}. *) val insert : key -> ('a value option -> 'a value) -> 'a t -> 'a t (** [insert key f map] modifies or insert an element of the map; [f] @@ -1032,13 +908,13 @@ module type MAP_WITH_VALUE = sig - value associated to [key] (if present) - submap of [map] whose keys are bigger than [key] - Uses the {{!unsigned_lt}unsigned order} on {!KEY.to_int}. *) + Uses the {{!unsigned_lt}unsigned order} on {!Sigs.KEY.to_int}. *) val iter : (key -> 'a value -> unit) -> 'a t -> unit - (** Iterate on each [(key,value)] pair of the map, in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + (** Iterate on each [(key,value)] pair of the map, in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val fold : (key -> 'a value -> 'acc -> 'acc) -> 'a t -> 'acc -> 'acc - (** Fold on each [(key,value)] pair of the map, in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + (** Fold on each [(key,value)] pair of the map, in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val fold_on_nonequal_inter : (key -> 'a value -> 'a value -> 'acc -> 'acc) -> 'a t -> 'a t -> 'acc -> 'acc @@ -1046,7 +922,7 @@ module type MAP_WITH_VALUE = sig [f key_n value1_n value2n (... (f key_1 value1_1 value2_1 acc))] where [(key_1, value1_1, value2_1) ... (key_n, value1_n, value2_n)] are the bindings that exist in both maps ([m1 ∩ m2]) whose values are physically different. - Calls to [f] are performed in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + Calls to [f] are performed in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val fold_on_nonequal_union: (key -> 'a value option -> 'a value option -> 'acc -> 'acc) -> 'a t -> 'a t -> 'acc -> 'acc @@ -1055,15 +931,15 @@ module type MAP_WITH_VALUE = sig [(key_1, value1_1, value2_1) ... (key_n, value1_n, value2_n)] are the bindings that exists in either map ([m1 ∪ m2]) whose values are physically different. - Calls to [f.f] are performed in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + Calls to [f.f] are performed in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val filter : (key -> 'a value -> bool) -> 'a t -> 'a t (** Returns the submap containing only the key->value pairs satisfying the - given predicate. [f] is called in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + given predicate. [f] is called in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val for_all : (key -> 'a value -> bool) -> 'a t -> bool (** Returns true if the predicate holds on all map bindings. Short-circuiting. - [f] is called in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) (** In the following, the *no_share function allows taking arguments of different types (but cannot share subtrees of the map), while @@ -1077,12 +953,12 @@ module type MAP_WITH_VALUE = sig value is physically the same (i.e. [f key value == value] for all the keys in the subtree) are guaranteed to be physically equal to the original subtree. O(n) complexity. - [f] is called in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val map_no_share : ('a value -> 'b value) -> 'a t -> 'b t (** [map_no_share f m] returns a map where the [value] bound to each [key] is replaced by [f value]. O(n) complexity. - [f] is called in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val mapi : (key -> 'a value -> 'a value) -> 'a t -> 'a t (** [mapi f m] returns a map where the [value] bound to each [key] is @@ -1090,12 +966,12 @@ module type MAP_WITH_VALUE = sig value is physically the same (i.e. [f key value == value] for all the keys in the subtree) are guaranteed to be physically equal to the original subtree. O(n) complexity. - [f] is called in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val mapi_no_share : (key -> 'a value -> 'b value) -> 'a t -> 'b t (** [mapi_no_share f m] returns a map where the [value] bound to each [key] is replaced by [f key value]. O(n) complexity. - [f] is called in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val filter_map : (key -> 'a value -> 'a value option) -> 'a t -> 'a t (** [filter_map m f] returns a map where the [value] bound to each @@ -1105,14 +981,14 @@ module type MAP_WITH_VALUE = sig (i.e. [f key value = Some v] with [value == v] for all the keys in the subtree) are guaranteed to be physically equal to the original subtree. O(n) complexity. - [f] is called in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) val filter_map_no_share : (key -> 'a value -> 'b value option) -> 'a t -> 'b t (** [filter_map m f] returns a map where the [value] bound to each [key] is removed (if [f key value] returns [None]), or is replaced by [v] ((if [f key value] returns [Some v]). O(n) complexity. - [f] is called in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. *) + [f] is called in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. *) (** {3 Operations on pairs of maps} *) @@ -1139,7 +1015,7 @@ module type MAP_WITH_VALUE = sig returns true for each mapping pair of keys. We assume that [f] is reflexive (i.e. [f key value value] returns [true]) to avoid visiting physically equal subtrees of [map1] and [map2]. The - complexity is O(log(n)*Delta) where Delta is the number of + complexity is O(log(n)+Delta) where Delta is the number of different keys between [map1] and [map2]. *) val nonreflexive_same_domain_for_all2 : (key -> 'a value -> 'b value -> bool) -> 'a t -> 'b t -> bool @@ -1150,10 +1026,10 @@ module type MAP_WITH_VALUE = sig val reflexive_subset_domain_for_all2 : (key -> 'a value -> 'a value -> bool) -> 'a t -> 'a t -> bool (** [reflexive_subset_domain_for_all2 f map1 map2] returns true if - all the keys of [map1] also are in [map2], and [f key (find map1 - key) (find map2 key)] returns [true] when both keys are present - in the map. We assume that [f] is reflexive (i.e. [f key value - value] returns true) to avoid visiting physically equal subtrees + all the keys of [map1] also are in [map2], and + [f key (find map1 key) (find map2 key)] returns [true] when both keys are present + in the map. We assume that [f] is reflexive (i.e. + [f key value value] returns true) to avoid visiting physically equal subtrees of [map1] and [map2]. The complexity is O(log(n)*Delta) where Delta is the number of different keys between [map1] and [map2]. *) @@ -1167,7 +1043,7 @@ module type MAP_WITH_VALUE = sig preserve physical equality of the subtreess in that case. The complexity is O(log(n)*Delta) where Delta is the number of different keys between [map1] and [map2]. - [f] is called in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. + [f] is called in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. [f] is never called on physically equal values. *) val idempotent_inter : (key -> 'a value -> 'a value -> 'a value) -> 'a t -> 'a t -> 'a t @@ -1179,7 +1055,7 @@ module type MAP_WITH_VALUE = sig preserve physical equality of the subtrees in that case. The complexity is O(log(n)*Delta) where Delta is the number of different keys between [map1] and [map2]. - [f] is called in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}!. + [f] is called in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}!. [f] is never called on physically equal values. *) val nonidempotent_inter_no_share : (key -> 'a value -> 'b value -> 'c value) -> 'a t -> 'b t -> 'c t @@ -1189,7 +1065,7 @@ module type MAP_WITH_VALUE = sig need to be idempotent, which imply that we have to visit physically equal subtrees of [map1] and [map2]. The complexity is O(log(n)*min(|map1|,|map2|)). - [f] is called in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int}. + [f] is called in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. [f] is called on every shared binding. *) val idempotent_inter_filter : (key -> 'a value -> 'a value -> 'a value option) -> 'a t -> 'a t -> 'a t @@ -1209,7 +1085,7 @@ module type MAP_WITH_VALUE = sig (** [disjoint a b] is [true] if and only if [a] and [b] have disjoint domains. *) (** Combination with other kinds of maps. - [Map2] must use the same {!KEY.to_int} function. *) + [Map2] must use the same {!Sigs.KEY.to_int} function. *) module WithForeign(Map2 : BASE_MAP with type _ key = key):sig type ('b,'c) polyfilter_map_foreign = { f: 'a. key -> ('a,'b) Map2.value -> 'c value option } [@@unboxed] @@ -1231,7 +1107,7 @@ module type MAP_WITH_VALUE = sig i.e. [update_multiple_from_foreign m_from f m_to] calls [f.f] on every key of [m_from], says if the corresponding value also exists in [m_to], and adds or remove the element in [m_to] depending on the value of [f.f]. - [f.f] is called in the {{!unsigned_lt}unsigned order} of {!KEY.to_int}. + [f.f] is called in the {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int}. O(size(m_from) + size(m_to)) complexity. *) @@ -1253,10 +1129,10 @@ module type MAP_WITH_VALUE = sig (** {3 Conversion functions} *) val to_seq : 'a t -> (key * 'a value) Seq.t - (** [to_seq m] iterates the whole map, in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + (** [to_seq m] iterates the whole map, in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) val to_rev_seq : 'a t -> (key * 'a value) Seq.t - (** [to_rev_seq m] iterates the whole map, in decreasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + (** [to_rev_seq m] iterates the whole map, in decreasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) val add_seq : (key * 'a value) Seq.t -> 'a t -> 'a t (** [add_seq s m] adds all bindings of the sequence [s] to [m] in order. *) @@ -1271,7 +1147,7 @@ module type MAP_WITH_VALUE = sig val to_list : 'a t -> (key * 'a value) list (** [to_list m] returns the bindings of [m] as a list, - in increasing {{!unsigned_lt}unsigned order} of {!KEY.to_int} *) + in increasing {{!unsigned_lt}unsigned order} of {!Sigs.KEY.to_int} *) end (** The signature for maps with a single type for keys and values, @@ -1279,8 +1155,42 @@ end Most of this interface should be shared with {{: https://ocaml.org/api/Map.S.html}[Stdlib.Map.S]}. *) module type MAP = MAP_WITH_VALUE with type 'a value = 'a +(** Operations added/changed in {{!hash_consed}hash-consed} maps and sets. *) +module type HASH_CONSED_OPERATIONS = sig + type 'a t + + (** {1 Hash-consing specific operations} *) + + val to_int : 'a t -> int + (** Returns the {{!hash_consed}hash-consed} id of the map. + Unlike {!Sigs.NODE_WITH_ID.to_int}, hash-consing ensures that maps + which contain the same keys (compared by {!Sigs.KEY.to_int}) and values (compared + by {!Sigs.HASHED_VALUE.polyeq}) will always be physically equal + and have the same identifier. + + Note that when using physical equality as {!Sigs.HASHED_VALUE.polyeq}, some + maps of different types [a t] and [b t] may be given the same identifier. + See the end of the documentation of {!Sigs.HASHED_VALUE.polyeq} for details. *) + + val equal : 'a t -> 'a t -> bool + (** Constant time equality using the {{!hash_consed}hash-consed} nodes identifiers. + This is equivalent to physical equality. + Two nodes are equal if their trees contain the same bindings, + where keys are compared by {!Sigs.KEY.to_int} and values are compared by + {!Sigs.HASHED_VALUE.polyeq}. *) + + val compare : 'a t -> 'a t -> int + (** Constant time comparison using the {{!hash_consed}hash-consed} node identifiers. + This order is fully arbitrary, but it is total and can be used to sort nodes. + It is based on node ids which depend on the order in which the nodes where created + (older nodes having smaller ids). + + One useful property of this order is that + child nodes will always have a smaller identifier than their parents. *) +end + (** {1 Keys} *) -(** Keys are the functor arguments used to build the maps. *) +(** Functor argument used to specify the key type when building the maps. *) (** The signature of homogeneous keys (non-generic, unparameterized keys). *) module type KEY = sig @@ -1304,7 +1214,7 @@ module type KEY = sig Note that since Patricia Trees use {{!unsigned_lt}unsigned order}, negative keys are seen as bigger than positive keys. Be wary of this when using negative keys combined with functions like - {{!BASE_MAP.unsigned_max_binding}[unsigned_max_binding]} and {{!BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]}. *) + {{!Sigs.BASE_MAP.unsigned_max_binding}[unsigned_max_binding]} and {{!Sigs.BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]}. *) val to_int: t -> int end @@ -1337,7 +1247,7 @@ module type HETEROGENEOUS_KEY = sig Note that since Patricia Trees use {{!unsigned_lt}unsigned order}, negative keys are seen as bigger than positive keys. Be wary of this when using negative keys combined with functions like - {{!BASE_MAP.unsigned_max_binding}[unsigned_max_binding]} and {{!BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]}. *) + {{!Sigs.BASE_MAP.unsigned_max_binding}[unsigned_max_binding]} and {{!Sigs.BASE_MAP.pop_unsigned_maximum}[pop_unsigned_maximum]}. *) val polyeq : 'a t -> 'b t -> ('a, 'b) cmp (** Polymorphic equality function used to compare our keys. @@ -1346,6 +1256,7 @@ module type HETEROGENEOUS_KEY = sig end (** {1 Values} *) +(** Functor argument used to specify the value type when building the maps. *) (** Module type used for specifying custom homogeneous value types in {!MakeCustomMap}. For most purposes, use the provided {!Value} implementation. @@ -1353,41 +1264,26 @@ end This is the case in {!MakeMap}. However, for maps like {!hash_consed}, it can be useful to restrict the type of values in order to implement [hash] and [polyeq] functions on values. - See the {!HASHED_VALUE} module type for more details. + See the {!Sigs.HASHED_VALUE} module type for more details. @since 0.10.0 *) module type VALUE = sig - type 'a t - (** The type of values. A ['map map] maps [key] to ['map value]. - Can be mutable if desired, unless it is being used in {!hash_consed}. *) + type 'a t + (** The type of values. A ['map map] maps [key] to ['map value]. + Can be mutable if desired, unless it is being used in {!hash_consed}. *) end -(** Default implementation of {!VALUE}, used in {!MakeMap}. - @since 0.10.0 *) -module Value : VALUE with type 'a t = 'a - (** The module type of values, which can be heterogeneous. This can be used to specify how the type of the value depends on that of the key. If the value doesn't depend on the key type, you can use the provided default implementations {!HomogeneousValue} and {!WrappedHomogeneousValue}. *) module type HETEROGENEOUS_VALUE = sig - type ('key, 'map) t - (** The type of values. A ['map map] maps ['key key] to [('key, 'map) value]. - Can be mutable if desired, unless it is being used in {!hash_consed}. *) + type ('key, 'map) t + (** The type of values. A ['map map] maps ['key key] to [('key, 'map) value]. + Can be mutable if desired, unless it is being used in {!hash_consed}. *) end -(** Default implementation of {!HETEROGENEOUS_VALUE}, to use when the type of the - value in a heterogeneous map does not depend on the type of the key, only on - the type of the map. *) -module HomogeneousValue : HETEROGENEOUS_VALUE with type ('a,'map) t = 'map - -(** Same as {!HomogeneousValue}, but uses a wrapper (unboxed) type instead of direct - equality. This avoids a problem in the typechecker with overly eager simplification of aliases. - More info on - {{: https://discuss.ocaml.org/t/weird-behaviors-with-first-order-polymorphism/13783} the OCaml discourse post}. *) -module WrappedHomogeneousValue : HETEROGENEOUS_VALUE with type ('a,'map) t = ('a,'map) snd - -(** {!VALUE} parameter for {!hash_consed}, as hash-consing requires hashing and comparing values. +(** {!Sigs.VALUE} parameter for {!hash_consed}, as hash-consing requires hashing and comparing values. This is the parameter type for homogeneous maps, used in {!MakeHashconsedMap}. A default implementation is provided in {!HashedValue}, using @@ -1399,7 +1295,7 @@ module type HASHED_VALUE = sig type 'a t (** The type of values for a hash-consed maps. - Unlike {!VALUE.t}, {b hash-consed values should be immutable}. + Unlike {!Sigs.VALUE.t}, {b hash-consed values should be immutable}. Or, if they do mutate, they must not change their {!hash} value, and still be equal to the same values via {!polyeq} *) @@ -1495,7 +1391,7 @@ end (** In order to build {!hash_consed}, we need to be able to hash and compare values. - This is the heterogeneous version of {!HASHED_VALUE}, used to specify a value + This is the heterogeneous version of {!Sigs.HASHED_VALUE}, used to specify a value for heterogeneous maps (in {!MakeHashconsedHeterogeneousMap}). A default implementation is provided in {!HeterogeneousHashedValue}, using {{: https://ocaml.org/api/Hashtbl.html#VALhash}[Hashtbl.hash]} @@ -1503,423 +1399,59 @@ end @since 0.10.0 *) module type HETEROGENEOUS_HASHED_VALUE = sig - type ('key, 'map) t - (** The type of values for a hash-consed maps. - - Unlike {!HETEROGENEOUS_VALUE.t}, {b hash-consed values should be immutable}. - Or, if they do mutate, they must not change their {!hash} value, and - still be equal to the same values via {!polyeq} *) - - val hash : ('key, 'map) t -> int - (** [hash v] should return an integer hash for the value [v]. - It is used for {{!hash_consed}hash-consing}. - - Hashing should be fast, avoid mapping too many values to the same integer - and compatible with {!polyeq} (equal values must have the same hash: - [polyeq v1 v2 = true ==> hash v1 = hash v2]). *) - - val polyeq : ('key, 'map_a) t -> ('key, 'map_b) t -> bool - (** Polymorphic equality on values. - - {b WARNING: if [polyeq a b] is true, then casting [b] to the type of [a] - (and [a] to the type of [b]) must be type-safe.} Eg. if [a : (k, t1) t] and [b : (k, t2) t] - yield [polyeq a b = true], then [let a' : (k,t2) t = Obj.magic a] and - [let b' : (k,t1) t = Obj.magic b] must be safe. - - Examples of safe implementations include: - {ul - {li Having a type [('key, 'map) t] which doesn't depend on ['map] (i can depend on ['key]), in which case casting - form [('key, 'a) t] to [('key, 'b) t] is always safe: - {[ - type ('k, _) t = 'k list - let cast : type a b. ('k, a) t -> ('k, b) t = fun x -> x - let polyeq : type a b. ('k, a) t -> ('k, b) t -> bool = fun x y -> x = y - ]}} - {li Using a GADT type and examining its constructors to only return [true] - when the constructors are equal: - {[ - type (_, _) t = - | T_Int : int -> (unit, int) t - | T_Bool : bool -> (unit, bool) t - let polyeq : type k a b. (k, a) t -> (k, b) t -> bool = fun x y -> - match x, y with - | T_Int i, T_Int j -> i = j (* Here type a = b = int, we can return true *) - | T_Bool i, T_Bool j -> i && j (* same here, but with a = b = bool *) - | _ -> false (* never return true on heterogeneous cases. *) - ]}} - {li Using physical equality: - {[ - let polyeq a b = a == Obj.magic b - ]} - While this contains an [Obj.magic], it is still type safe (OCaml just compares - the immediate values) and we can safely cast values from one type to the - other if they satisfy this (since they are already physically equal). - - This is the implementation used in {!HeterogeneousHashedValue}. Note however that - using this function can lead to {b identifiers no longer being unique across - types}. See {!HASHED_VALUE.polyeq} for more information on this.}} *) -end - -module HashedValue : HASHED_VALUE with type 'a t = 'a -(** Generic implementation of {!HASHED_VALUE}. - Uses {{: https://ocaml.org/api/Hashtbl.html#VALhash}[Hashtbl.hash]} for hashing - and physical equality for equality. - Note that this may lead to maps of different types having the same identifier - ({!MakeHashconsedMap.to_int}), see the documentation of {!HASHED_VALUE.polyeq} - for details on this. *) - -module HeterogeneousHashedValue : HETEROGENEOUS_HASHED_VALUE with type ('k, 'm) t = 'm -(** Generic implementation of {!HETEROGENEOUS_HASHED_VALUE}. - Uses {{: https://ocaml.org/api/Hashtbl.html#VALhash}[Hashtbl.hash]} for hashing - and physical equality for equality. - Note that this may lead to maps of different types having the same identifier - ({!MakeHashconsedHeterogeneousMap.to_int}), see the documentation of - {!HASHED_VALUE.polyeq} for details on this. *) - - -(** {1 Functors} *) -(** This section presents the functors which can be used to build patricia tree - maps and sets. *) - -(** {2 Homogeneous maps and sets} *) -(** These are homogeneous maps and set, their keys/elements are a single - non-generic type, just like the standard library's [Map] and [Set] modules. *) - -module MakeMap(Key: KEY) : MAP with type key = Key.t -module MakeSet(Key: KEY) : SET with type elt = Key.t - -(** {2 Heterogeneous maps and sets} *) -(** Heterogeneous maps are ['map map], which store bindings of ['key key] - to [('key, 'map) value], where ['key key] is a GADT, as we must be able - to compare keys of different types together. - - Similarly, heterogeneous sets store sets of ['key key]. *) - -module MakeHeterogeneousSet(Key: HETEROGENEOUS_KEY) : HETEROGENEOUS_SET - with type 'a elt = 'a Key.t -module MakeHeterogeneousMap(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_VALUE) : HETEROGENEOUS_MAP - with type 'a key = 'a Key.t - and type ('k,'m) value = ('k,'m) Value.t - - -(** {2 Maps and sets with custom nodes} *) -(** We can also customize the representation and creation of nodes, to - gain space or time. - - Possibitities include having weak key and/or values, hash-consing, - giving unique number to nodes or keeping them in sync with the - disk, lazy evaluation and/or caching, adding size information for - constant time [cardinal] functions, etc. - - See {!node_impl} for the provided implementations of {!NODE}, or create your own. *) - -(** Create a homogeneous map with a custom {!NODE}. Also allows - customizing the map values *) -module MakeCustomMap - (Key: KEY) - (Value: VALUE) - (Node: NODE with type 'a key = Key.t and type ('key,'map) value = ('key, 'map Value.t) snd) - : MAP_WITH_VALUE - with type key = Key.t - and type 'm value = 'm Value.t - and type 'm t = 'm Node.t - - -(** Create a homogeneous set with a custom {!NODE}. - @since v0.10.0 *) -module MakeCustomSet - (Key: KEY) - (Node: NODE with type 'a key = Key.t and type ('key,'map) value = unit) - : SET - with type elt = Key.t - and type 'a BaseMap.t = 'a Node.t - -(** Create an heterogeneous map with a custom {!NODE}. *) -module MakeCustomHeterogeneousMap - (Key: HETEROGENEOUS_KEY) - (Value: HETEROGENEOUS_VALUE) - (Node: NODE with type 'a key = 'a Key.t and type ('key,'map) value = ('key,'map) Value.t) - : HETEROGENEOUS_MAP - with type 'a key = 'a Key.t - and type ('k,'m) value = ('k,'m) Value.t - and type 'm t = 'm Node.t - -(** Create an heterogeneous set with a custom {!NODE}. - @since v0.10.0 *) -module MakeCustomHeterogeneousSet - (Key: HETEROGENEOUS_KEY) - (NODE: NODE with type 'a key = 'a Key.t and type ('key,'map) value = unit) - : HETEROGENEOUS_SET - with type 'a elt = 'a Key.t - and type 'a BaseMap.t = 'a NODE.t - -(** {2:hash_consed Hash-consed maps and sets} *) -(** Hash-consed maps and sets uniquely number each of their nodes. - Upon creation, they check whether a similar node has been created before, - if so they return it, else they return a new node with a new number. - With this unique numbering: - - [equal] and [compare] become constant time operations; - - two maps with the same bindings (where keys are compared by {!KEY.to_int} and - values by {!HASHED_VALUE.polyeq}) will always be physically equal; - - functions that benefit from sharing, like {!BASE_MAP.idempotent_union} and - {!BASE_MAP.idempotent_inter} will see improved performance; - - constructors are slightly slower, as they now require a hash-table lookup; - - memory usage is increased: nodes store their tags inside themselves, and - a global hash-table of all built nodes must be maintained; - - hash-consed maps assume their values are immutable; - - {b WARNING:} when using physical equality as {!HASHED_VALUE.polyeq}, some - {b maps of different types may be given the same identifier}. See the end of - the documentation of {!HASHED_VALUE.polyeq} for details. - Note that this is the case in the default implementations {!HashedValue} - and {!HeterogeneousHashedValue}. - - All hash-consing functors are {b generative}, since each functor call will - create a new hash-table to store the created nodes. Calling a functor - twice with same arguments will lead to two numbering systems for identifiers, - and thus the types should not be considered compatible. *) - -(** Hash-consed version of {!MAP}. See {!hash_consed} for the differences between - hash-consed and non hash-consed maps. - - This is a generative functor, as calling it creates a new hash-table to store - the created nodes, and a reference to store the next unallocated identifier. - Maps/sets from different hash-consing functors (even if these functors have - the same arguments) will have different (incompatible) numbering systems and - be stored in different hash-tables (thus they will never be physically equal). - - @since v0.10.0 *) -module MakeHashconsedMap(Key: KEY)(Value: HASHED_VALUE)() : sig - include MAP_WITH_VALUE with type key = Key.t and type 'a value = 'a Value.t (** @closed *) - - val to_int : 'a t -> int - (** Returns the {{!hash_consed}hash-consed} id of the map. - Unlike {!NODE_WITH_ID.to_int}, hash-consing ensures that maps - which contain the same keys (compared by {!KEY.to_int}) and values (compared - by {!HASHED_VALUE.polyeq}) will always be physically equal - and have the same identifier. - - Note that when using physical equality as {!HASHED_VALUE.polyeq}, some - maps of different types [a t] and [b t] may be given the same identifier. - See the end of the documentation of {!HASHED_VALUE.polyeq} for details. *) - - val equal : 'a t -> 'a t -> bool - (** Constant time equality using the {{!hash_consed}hash-consed} nodes identifiers. - This is equivalent to physical equality. - Two nodes are equal if their trees contain the same bindings, - where keys are compared by {!KEY.to_int} and values are compared by - {!HASHED_VALUE.polyeq}. *) - - val compare : 'a t -> 'a t -> int - (** Constant time comparison using the {{!hash_consed}hash-consed} node identifiers. - This order is fully arbitrary, but it is total and can be used to sort nodes. - It is based on node ids which depend on the order in which the nodes where created - (older nodes having smaller ids). - - One useful property of this order is that - child nodes will always have a smaller identifier than their parents. *) -end - -(** Hash-consed version of {!SET}. See {!hash_consed} for the differences between - hash-consed and non hash-consed sets. - - This is a generative functor, as calling it creates a new hash-table to store - the created nodes, and a reference to store the next unallocated identifier. - Maps/sets from different hash-consing functors (even if these functors have - the same arguments) will have different (incompatible) numbering systems and - be stored in different hash-tables (thus they will never be physically equal). - - @since v0.10.0 *) -module MakeHashconsedSet(Key: KEY)() : sig - include SET with type elt = Key.t (** @closed *) - - val to_int : t -> int - (** Returns the {{!hash_consed}hash-consed} id of the map. - Unlike {!NODE_WITH_ID.to_int}, hash-consing ensures that maps - which contain the same keys (compared by {!KEY.to_int}) and values (compared - by {!HASHED_VALUE.polyeq}) will always be physically equal - and have the same identifier. - - Note that when using physical equality as {!HASHED_VALUE.polyeq}, some - maps of different types [a t] and [b t] may be given the same identifier. - See the end of the documentation of {!HASHED_VALUE.polyeq} for details. *) - - val equal : t -> t -> bool - (** Constant time equality using the {{!hash_consed}hash-consed} nodes identifiers. - This is equivalent to physical equality. - Two nodes are equal if their trees contain the same bindings, - where keys are compared by {!KEY.to_int} and values are compared by - {!HASHED_VALUE.polyeq}. *) - - val compare : t -> t -> int - (** Constant time comparison using the {{!hash_consed}hash-consed} node identifiers. - This order is fully arbitrary, but it is total and can be used to sort nodes. - It is based on node ids which depend on the order in which the nodes where created - (older nodes having smaller ids). - - One useful property of this order is that - child nodes will always have a smaller identifier than their parents. *) -end - -(** Hash-consed version of {!HETEROGENEOUS_SET}. See {!hash_consed} for the differences between - hash-consed and non hash-consed sets. - - This is a generative functor, as calling it creates a new hash-table to store - the created nodes, and a reference to store the next unallocated identifier. - Maps/sets from different hash-consing functors (even if these functors have - the same arguments) will have different (incompatible) numbering systems and - be stored in different hash-tables (thus they will never be physically equal). - - @since v0.10.0 *) -module MakeHashconsedHeterogeneousSet(Key: HETEROGENEOUS_KEY)() : sig - include HETEROGENEOUS_SET with type 'a elt = 'a Key.t (** @closed *) - - val to_int : t -> int - (** Returns the {{!hash_consed}hash-consed} id of the map. - Unlike {!NODE_WITH_ID.to_int}, hash-consing ensures that maps - which contain the same keys (compared by {!KEY.to_int}) and values (compared - by {!HASHED_VALUE.polyeq}) will always be physically equal - and have the same identifier. - - Note that when using physical equality as {!HASHED_VALUE.polyeq}, some - maps of different types [a t] and [b t] may be given the same identifier. - See the end of the documentation of {!HASHED_VALUE.polyeq} for details. *) - - val equal : t -> t -> bool - (** Constant time equality using the {{!hash_consed}hash-consed} nodes identifiers. - This is equivalent to physical equality. - Two nodes are equal if their trees contain the same bindings, - where keys are compared by {!KEY.to_int} and values are compared by - {!HASHED_VALUE.polyeq}. *) - - val compare : t -> t -> int - (** Constant time comparison using the {{!hash_consed}hash-consed} node identifiers. - This order is fully arbitrary, but it is total and can be used to sort nodes. - It is based on node ids which depend on the order in which the nodes where created - (older nodes having smaller ids). - - One useful property of this order is that - child nodes will always have a smaller identifier than their parents. *) -end - -(** Hash-consed version of {!HETEROGENEOUS_MAP}. See {!hash_consed} for the differences between - hash-consed and non hash-consed maps. - - This is a generative functor, as calling it creates a new hash-table to store - the created nodes, and a reference to store the next unallocated identifier. - Maps/sets from different hash-consing functors (even if these functors have - the same arguments) will have different (incompatible) numbering systems and - be stored in different hash-tables (thus they will never be physically equal). - - @since v0.10.0 *) -module MakeHashconsedHeterogeneousMap(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_HASHED_VALUE)() : sig - include HETEROGENEOUS_MAP - with type 'a key = 'a Key.t - and type ('k,'m) value = ('k, 'm) Value.t (** @closed *) + type ('key, 'map) t + (** The type of values for a hash-consed maps. - val to_int : 'a t -> int - (** Returns the {{!hash_consed}hash-consed} id of the map. - Unlike {!NODE_WITH_ID.to_int}, hash-consing ensures that maps - which contain the same keys (compared by {!KEY.to_int}) and values (compared - by {!HASHED_VALUE.polyeq}) will always be physically equal - and have the same identifier. + Unlike {!Sigs.HETEROGENEOUS_VALUE.t}, {b hash-consed values should be immutable}. + Or, if they do mutate, they must not change their {!hash} value, and + still be equal to the same values via {!polyeq} *) - Note that when using physical equality as {!HASHED_VALUE.polyeq}, some - maps of different types [a t] and [b t] may be given the same identifier. - See the end of the documentation of {!HASHED_VALUE.polyeq} for details. *) + val hash : ('key, 'map) t -> int + (** [hash v] should return an integer hash for the value [v]. + It is used for {{!hash_consed}hash-consing}. - val equal : 'a t -> 'a t -> bool - (** Constant time equality using the {{!hash_consed}hash-consed} nodes identifiers. - This is equivalent to physical equality. - Two nodes are equal if their trees contain the same bindings, - where keys are compared by {!KEY.to_int} and values are compared by - {!HASHED_VALUE.polyeq}. *) + Hashing should be fast, avoid mapping too many values to the same integer + and compatible with {!polyeq} (equal values must have the same hash: + [polyeq v1 v2 = true ==> hash v1 = hash v2]). *) - val compare : 'a t -> 'a t -> int - (** Constant time comparison using the {{!hash_consed}hash-consed} node identifiers. - This order is fully arbitrary, but it is total and can be used to sort nodes. - It is based on node ids which depend on the order in which the nodes where created - (older nodes having smaller ids). + val polyeq : ('key, 'map_a) t -> ('key, 'map_b) t -> bool + (** Polymorphic equality on values. - One useful property of this order is that - child nodes will always have a smaller identifier than their parents. *) + {b WARNING: if [polyeq a b] is true, then casting [b] to the type of [a] + (and [a] to the type of [b]) must be type-safe.} Eg. if [a : (k, t1) t] and [b : (k, t2) t] + yield [polyeq a b = true], then [let a' : (k,t2) t = Obj.magic a] and + [let b' : (k,t1) t = Obj.magic b] must be safe. + + Examples of safe implementations include: + {ul + {li Having a type [('key, 'map) t] which doesn't depend on ['map] (i can depend on ['key]), in which case casting + form [('key, 'a) t] to [('key, 'b) t] is always safe: + {[ + type ('k, _) t = 'k list + let cast : type a b. ('k, a) t -> ('k, b) t = fun x -> x + let polyeq : type a b. ('k, a) t -> ('k, b) t -> bool = fun x y -> x = y + ]}} + {li Using a GADT type and examining its constructors to only return [true] + when the constructors are equal: + {[ + type (_, _) t = + | T_Int : int -> (unit, int) t + | T_Bool : bool -> (unit, bool) t + let polyeq : type k a b. (k, a) t -> (k, b) t -> bool = fun x y -> + match x, y with + | T_Int i, T_Int j -> i = j (* Here type a = b = int, we can return true *) + | T_Bool i, T_Bool j -> i && j (* same here, but with a = b = bool *) + | _ -> false (* never return true on heterogeneous cases. *) + ]}} + {li Using physical equality: + {[ + let polyeq a b = a == Obj.magic b + ]} + While this contains an [Obj.magic], it is still type safe (OCaml just compares + the immediate values) and we can safely cast values from one type to the + other if they satisfy this (since they are already physically equal). + + This is the implementation used in {!HeterogeneousHashedValue}. Note however that + using this function can lead to {b identifiers no longer being unique across + types}. See {!Sigs.HASHED_VALUE.polyeq} for more information on this.}} *) end - - -(** {1:node_impl Some implementations of NODE} *) -(** We provide a few different implementations of {!NODE}, they can be used with - the {!MakeCustomMap}, {!MakeCustomSet}, {!MakeCustomHeterogeneousMap} and - {!MakeCustomHeterogeneousSet} functors. *) - -(** {2 Basic nodes} *) - -(** This module is such that ['map t = 'map view]. - This is the node used in {!MakeHeterogeneousMap} and {!MakeMap}. *) -module SimpleNode(Key: sig type 'k t end)(Value: HETEROGENEOUS_VALUE) : NODE - with type 'a key = 'a Key.t - and type ('key,'map) value = ('key,'map) Value.t - -(** Here, nodes also contain a unique id, e.g. so that they can be - used as keys of maps or hash-tables. *) -module NodeWithId(Key: sig type 'k t end)(Value: HETEROGENEOUS_VALUE) : NODE_WITH_ID - with type 'a key = 'a Key.t - and type ('key,'map) value = ('key,'map) Value.t - - -(* Maybe: we can make variations around NodeWithId; e.g. a version - that does HashConsing, or a version that replicates the node to a - key-value store on disk, etc. *) - -(** An optimized representation for sets, i.e. maps to unit: we do not - store a reference to unit (note that you can further optimize when - you know the representation of the key). - This is the node used in {!MakeHeterogeneousSet} and {!MakeSet}. *) -module SetNode(Key: sig type 'k t end) : NODE - with type 'a key = 'a Key.t - and type ('key,'map) value = unit - -(** {2 Weak nodes} *) - -(** NODE used to implement weak key hashes (the key-binding pair is an - Ephemeron, the reference to the key is weak, and if the key is - garbage collected, the binding disappears from the map *) -module WeakNode(Key: sig type 'k t end)(Value: HETEROGENEOUS_VALUE) : NODE - with type 'a key = 'a Key.t - and type ('key,'map) value = ('key,'map) Value.t - -(** Both a {!WeakNode} and a {!SetNode}, useful to implement Weak sets. *) -module WeakSetNode(Key: sig type 'k t end) : NODE - with type 'a key = 'a Key.t - and type ('key,'map) value = unit - - -(** {2 Hashconsed nodes} *) - -(** Gives a unique number to each node like {!NodeWithId}, - but also performs hash-consing. So two maps with the same bindings will - always be physically equal. See {!hash_consed} for more details on this. - - This is a generative functor, as calling it creates a new hash-table to store - the created nodes, and a reference to store the next unallocated identifier. - Maps/sets from different hash-consing functors (even if these functors have - the same arguments) will have different (incompatible) numbering systems and - be stored in different hash-tables (thus they will never be physically equal). - - Using a single {!HashconsedNode} in multiple {!MakeCustomMap} functors will result in - all those maps being hash-consed together (stored in the same hash-table, - same numbering system). - - @since v0.10.0 *) -module HashconsedNode(Key: HETEROGENEOUS_KEY)(Value: HETEROGENEOUS_HASHED_VALUE)() : HASH_CONSED_NODE - with type 'a key = 'a Key.t - and type ('key,'map) value = ('key, 'map) Value.t - -(** Both a {!HashconsedNode} and a {!SetNode}. - @since v0.10.0 *) -module HashconsedSetNode(Key: HETEROGENEOUS_KEY)() : HASH_CONSED_NODE - with type 'a key = 'a Key.t - and type ('key,'map) value = unit - -(* TODO: Functor to make sets from maps. *) -(* TODO: A possibility of customizing the fixpoint in the recursive - calls, so that we can cache operations or make lazy some of the - operations. *) diff --git a/src/test/dune b/src/test/dune new file mode 100644 index 0000000..daec6b6 --- /dev/null +++ b/src/test/dune @@ -0,0 +1,29 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; This file is part of the Codex semantics library ;; +;; (patricia-tree sub-component). ;; +;; ;; +;; Copyright (C) 2024 ;; +;; CEA (Commissariat à l'énergie atomique et aux énergies ;; +;; alternatives) ;; +;; ;; +;; You can redistribute it and/or modify it under the terms of the GNU ;; +;; Lesser General Public License as published by the Free Software ;; +;; Foundation, version 2.1. ;; +;; ;; +;; It is distributed in the hope that it will be useful, ;; +;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; +;; GNU Lesser General Public License for more details. ;; +;; ;; +;; See the GNU Lesser General Public License version 2.1 ;; +;; for more details (enclosed in the file LICENSE). ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(library + (name PatriciaTreeTest) + (inline_tests + (libraries qcheck-core)) + (preprocess + (pps ppx_inline_test)) + (libraries PatriciaTree qcheck-core) + (modules PatriciaTreeTest)) diff --git a/src/test/mdx_prelude.ml b/src/test/mdx_prelude.ml new file mode 100644 index 0000000..3acc4b2 --- /dev/null +++ b/src/test/mdx_prelude.ml @@ -0,0 +1,37 @@ +(**************************************************************************) +(* This file is part of the Codex semantics library *) +(* (patricia-tree sub-component). *) +(* *) +(* Copyright (C) 2024 *) +(* CEA (Commissariat à l'énergie atomique et aux énergies *) +(* alternatives) *) +(* *) +(* You can redistribute it and/or modify it under the terms of the GNU *) +(* Lesser General Public License as published by the Free Software *) +(* Foundation, version 2.1. *) +(* *) +(* It is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) +(* GNU Lesser General Public License for more details. *) +(* *) +(* See the GNU Lesser General Public License version 2.1 *) +(* for more details (enclosed in the file LICENSE). *) +(**************************************************************************) + +(** File run by MDX before running all others, sets up some stuff so the + comments don't have to *) + +open PatriciaTree +open Sigs + +type foo + +module IntKey = struct + type 'a t = int + let to_int x = x + let polyeq : type a b. a t -> b t -> (a, b) cmp = fun a b -> + if a == Obj.magic b then Obj.magic Eq else Diff +end +module MyValue = Int +module MyMap = MakeHeterogeneousMap(IntKey)(struct type ('a,'b) t = int end) diff --git a/patriciaTreeTest.ml b/src/test/patriciaTreeTest.ml similarity index 99% rename from patriciaTreeTest.ml rename to src/test/patriciaTreeTest.ml index 4fce639..7d4ccdc 100644 --- a/patriciaTreeTest.ml +++ b/src/test/patriciaTreeTest.ml @@ -20,6 +20,7 @@ (**************************************************************************) open PatriciaTree +open Sigs let check_highest_bit x res = (* Printf.printf "CHECK_HIGHEST_BIT: %x %x\n%!" x res; *)