From d98fed93e7f7f24406ff5fab02ca6e1273501c2c Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Wed, 27 Mar 2024 18:04:03 +0100 Subject: [PATCH 01/10] Detect subpaths for dirs --- src/lib/ast.ml | 58 +++++++++++++++++++++++++++---- src/lib/ast.mli | 3 +- src/lib/dotrenderer.ml | 9 +++-- src/test/expect/test_dot.expected | 28 +++++++-------- 4 files changed, 74 insertions(+), 24 deletions(-) diff --git a/src/lib/ast.ml b/src/lib/ast.ml index e6070a4d..37d841fe 100644 --- a/src/lib/ast.ml +++ b/src/lib/ast.ml @@ -1,10 +1,26 @@ +open Astring + module DataFile = struct - type t = { id : int; path : string } + type t = { id : int; path : string; subpath : string option } - let create id path = { id; path } + let create ?(subpath = None) id path = { id; path; subpath } let id d = d.id - let path d = d.path + + let path d = + match Filename.extension d.path = "" with + | false -> d.path + | true -> + let p = d.path in + let last_char = String.sub ~start:(String.length p) p in + if String.Sub.to_string last_char = "/" then p else p ^ "/" + + let path_nc d = d.path + let subpath d = d.subpath let compare a b = Int.compare a.id b.id + + let is_dir d = + (* a little hacky, we probably need to do something in the sharkdown here *) + Filename.extension d.path = "" end module Leaf = struct @@ -35,10 +51,38 @@ type t = CommandGroup.t list let to_list cg = cg +let find_matching_datafile datafile_map path = + match List.assoc_opt path datafile_map with + | Some p -> Some p + | None -> + (* No full match, but can we find a prefix dir *) + List.fold_left + (fun acc i -> + match acc with + | Some x -> Some x + | None -> ( + let ipath, df = i in + match DataFile.is_dir df with + | false -> None + | true -> ( + match String.is_prefix ~affix:(DataFile.path df) path with + | true -> + Some + (DataFile.create + ~subpath: + (Some + (String.Sub.to_string + (String.sub ~start:(String.length ipath) path))) + (DataFile.id df) (DataFile.path df)) + | false -> None))) + None datafile_map + let order_command_list metadata command_groups = let input_map = List.mapi - (fun i f -> (f, DataFile.create i f)) + (fun i f -> + let df = DataFile.create i f in + (f, df)) (Frontmatter.inputs metadata) in let counter = ref (List.length input_map) in @@ -56,14 +100,14 @@ let order_command_list metadata command_groups = (* TODO: dedup *) let inputs = List.filter_map - (fun path -> List.assoc_opt path datafile_map) + (fun p -> find_matching_datafile datafile_map p) file_args in let outputs = List.filter_map (fun path -> - match List.assoc_opt path datafile_map with + match find_matching_datafile datafile_map path with | None -> let id = !counter in counter := !counter + 1; @@ -79,7 +123,7 @@ let order_command_list metadata command_groups = (List.concat [ datafile_map; - List.map (fun o -> (DataFile.path o, o)) outputs; + List.map (fun o -> (DataFile.path_nc o, o)) outputs; ]) in (updated_map, x :: rest) diff --git a/src/lib/ast.mli b/src/lib/ast.mli index a3a1e9b8..59a9f87c 100644 --- a/src/lib/ast.mli +++ b/src/lib/ast.mli @@ -9,11 +9,12 @@ module DataFile : sig type t - val create : int -> string -> t + val create : ?subpath:string option -> int -> string -> t (** Creates a new datafile with an integer ID and a file path. *) val id : t -> int val path : t -> string + val subpath : t -> string option val compare : t -> t -> int end diff --git a/src/lib/dotrenderer.ml b/src/lib/dotrenderer.ml index c4819360..610397e9 100644 --- a/src/lib/dotrenderer.ml +++ b/src/lib/dotrenderer.ml @@ -16,8 +16,13 @@ let render_command_to_dot ppf command = let process_index = Ast.Leaf.id command in List.iter (fun datafile -> - Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"];\n" - (DataFile.id datafile) process_index) + let label = + match DataFile.subpath datafile with + | Some x -> Printf.sprintf ",label=\"%s\"" x + | None -> "" + in + Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"%s];\n" + (DataFile.id datafile) process_index label) (Ast.Leaf.inputs command); Format.fprintf ppf "\tn%d[shape=\"%s\",label=\"%s\"];\n" process_index "box" (Uri.pct_encode (Command.name (Ast.Leaf.command command))); diff --git a/src/test/expect/test_dot.expected b/src/test/expect/test_dot.expected index e1eb0ff3..8efc965e 100644 --- a/src/test/expect/test_dot.expected +++ b/src/test/expect/test_dot.expected @@ -1,32 +1,32 @@ digraph{ n0[shape="cylinder",label="/data/tmf/project_boundaries/123.geojson"]; - n1[shape="cylinder",label="/data/tmf/project_boundaries"]; - n2[shape="cylinder",label="/data/tmf/jrc/zips"]; - n3[shape="cylinder",label="/data/tmf/jrc/tif"]; - n5[shape="cylinder",label="/data/tmf/fcc-cpcs"]; + n1[shape="cylinder",label="/data/tmf/project_boundaries/"]; + n2[shape="cylinder",label="/data/tmf/jrc/zips/"]; + n3[shape="cylinder",label="/data/tmf/jrc/tif/"]; + n5[shape="cylinder",label="/data/tmf/fcc-cpcs/"]; n7[shape="cylinder",label="/data/tmf/ecoregions/ecoregions.geojson"]; - n9[shape="cylinder",label="/data/tmf/ecoregions"]; + n9[shape="cylinder",label="/data/tmf/ecoregions/"]; n11[shape="cylinder",label="/data/tmf/access/raw.tif"]; - n13[shape="cylinder",label="/data/tmf/access"]; + n13[shape="cylinder",label="/data/tmf/access/"]; n15[shape="cylinder",label="/data/tmf/osm_borders.geojson"]; n17[shape="cylinder",label="/data/tmf/123/buffer.geojson"]; n19[shape="cylinder",label="/data/tmf/123/leakage.geojson"]; n21[shape="cylinder",label="/data/tmf/123/luc.tif"]; - n23[shape="cylinder",label="/data/tmf/gedi"]; + n23[shape="cylinder",label="/data/tmf/gedi/"]; n26[shape="cylinder",label="/data/tmf/123/carbon-density.csv"]; n28[shape="cylinder",label="/data/tmf/123/country-list.json"]; n30[shape="cylinder",label="/data/tmf/123/matching-area.geojson"]; - n32[shape="cylinder",label="/data/tmf/srtm/zip"]; - n33[shape="cylinder",label="/data/tmf/srtm/tif"]; - n35[shape="cylinder",label="/data/tmf/slopes"]; - n37[shape="cylinder",label="/data/tmf/rescaled-elevation"]; - n39[shape="cylinder",label="/data/tmf/rescaled-slopes"]; + n32[shape="cylinder",label="/data/tmf/srtm/zip/"]; + n33[shape="cylinder",label="/data/tmf/srtm/tif/"]; + n35[shape="cylinder",label="/data/tmf/slopes/"]; + n37[shape="cylinder",label="/data/tmf/rescaled-elevation/"]; + n39[shape="cylinder",label="/data/tmf/rescaled-slopes/"]; n41[shape="cylinder",label="/data/tmf/123/countries.tif"]; n43[shape="cylinder",label="/data/tmf/123/k.parquet"]; - n45[shape="cylinder",label="/data/tmf/123/matches"]; + n45[shape="cylinder",label="/data/tmf/123/matches/"]; n47[shape="cylinder",label="/data/tmf/123/matches.tif"]; n49[shape="cylinder",label="/data/tmf/123/matches.parquet"]; - n51[shape="cylinder",label="/data/tmf/123/pairs"]; + n51[shape="cylinder",label="/data/tmf/123/pairs/"]; n53[shape="cylinder",label="/data/tmf/123/additionality.csv"]; subgraph "cluster_0" { label = "JRC" From cc0e8887e2791e490981a3e28672d50472fffb84 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Thu, 28 Mar 2024 16:01:03 +0000 Subject: [PATCH 02/10] Attempt to encode a map operation over a directory of files --- src/lib/ast.ml | 40 ++++++++++++++++++++++++++++++++++++---- src/lib/ast.mli | 6 +++++- src/lib/dotrenderer.ml | 7 ++++++- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/lib/ast.ml b/src/lib/ast.ml index 37d841fe..b5fc4605 100644 --- a/src/lib/ast.ml +++ b/src/lib/ast.ml @@ -1,9 +1,28 @@ open Astring module DataFile = struct - type t = { id : int; path : string; subpath : string option } + type t = { id : int; path : string; subpath : string option; wildcard : bool } + + let create ?(subpath = None) id path = + let wildcard = + match subpath with + | None -> false + | Some p -> + let last_char = + String.Sub.to_string (String.sub ~start:(String.length p - 1) p) + in + last_char = "*" + in + (* Printf.printf "%s\n" last_char; *) + if wildcard then + { + id; + path = String.Sub.to_string (String.sub ~stop:(String.length path) path); + subpath = None; + wildcard = true; + } + else { id; path; subpath; wildcard = false } - let create ?(subpath = None) id path = { id; path; subpath } let id d = d.id let path d = @@ -16,6 +35,7 @@ module DataFile = struct let path_nc d = d.path let subpath d = d.subpath + let is_wildcard d = d.wildcard let compare a b = Int.compare a.id b.id let is_dir d = @@ -24,17 +44,23 @@ module DataFile = struct end module Leaf = struct + type style = Command | Map + type t = { id : int; command : Command.t; + style : style; inputs : DataFile.t list; outputs : DataFile.t list; } - let create id command inputs outputs = { id; command; inputs; outputs } + let create id command style inputs outputs = + { id; command; style; inputs; outputs } + let command o = o.command let inputs o = o.inputs let outputs o = o.outputs + let command_style o = o.style let id o = o.id end @@ -104,6 +130,12 @@ let order_command_list metadata command_groups = file_args in + let style : Leaf.style = + match List.exists DataFile.is_wildcard inputs with + | true -> Map + | false -> Command + in + let outputs = List.filter_map (fun path -> @@ -116,7 +148,7 @@ let order_command_list metadata command_groups = file_args in - let x = Leaf.create !counter hd inputs outputs in + let x = Leaf.create !counter hd style inputs outputs in counter := !counter + 1; let updated_map, rest = loop tl diff --git a/src/lib/ast.mli b/src/lib/ast.mli index 59a9f87c..45639a9c 100644 --- a/src/lib/ast.mli +++ b/src/lib/ast.mli @@ -15,20 +15,24 @@ module DataFile : sig val id : t -> int val path : t -> string val subpath : t -> string option + val is_wildcard : t -> bool val compare : t -> t -> int end module Leaf : sig (** A Leaf is an atomic exection unit the in the pipeline graph. *) + type style = Command | Map type t - val create : int -> Command.t -> DataFile.t list -> DataFile.t list -> t + val create : + int -> Command.t -> style -> DataFile.t list -> DataFile.t list -> t (** Creats a new leaf node, taking an integer identifier, the command to execute and a list of inputs and a list of outputs. *) val id : t -> int val command : t -> Command.t + val command_style : t -> style val inputs : t -> DataFile.t list val outputs : t -> DataFile.t list end diff --git a/src/lib/dotrenderer.ml b/src/lib/dotrenderer.ml index 610397e9..9878215d 100644 --- a/src/lib/dotrenderer.ml +++ b/src/lib/dotrenderer.ml @@ -24,7 +24,12 @@ let render_command_to_dot ppf command = Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"%s];\n" (DataFile.id datafile) process_index label) (Ast.Leaf.inputs command); - Format.fprintf ppf "\tn%d[shape=\"%s\",label=\"%s\"];\n" process_index "box" + let shape = + match Ast.Leaf.command_style command with + | Command -> "box" + | Map -> "box3d" + in + Format.fprintf ppf "\tn%d[shape=\"%s\",label=\"%s\"];\n" process_index shape (Uri.pct_encode (Command.name (Ast.Leaf.command command))); List.iter (fun datafile -> From 3dbc95edeed9e53e623b4539ab7906481a480aae Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Fri, 29 Mar 2024 11:18:53 +0000 Subject: [PATCH 03/10] Add some datafile tests --- src/lib/ast.ml | 31 ++++++++++++-------- src/lib/ast.mli | 17 ++++++++--- src/test/test.ml | 76 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 107 insertions(+), 17 deletions(-) diff --git a/src/lib/ast.ml b/src/lib/ast.ml index b5fc4605..052e97dd 100644 --- a/src/lib/ast.ml +++ b/src/lib/ast.ml @@ -1,9 +1,13 @@ open Astring +open Sexplib.Conv module DataFile = struct type t = { id : int; path : string; subpath : string option; wildcard : bool } + [@@deriving sexp] - let create ?(subpath = None) id path = + let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) + + let v ?(subpath = None) id path = let wildcard = match subpath with | None -> false @@ -13,7 +17,6 @@ module DataFile = struct in last_char = "*" in - (* Printf.printf "%s\n" last_char; *) if wildcard then { id; @@ -30,7 +33,7 @@ module DataFile = struct | false -> d.path | true -> let p = d.path in - let last_char = String.sub ~start:(String.length p) p in + let last_char = String.sub ~start:(String.length p - 1) p in if String.Sub.to_string last_char = "/" then p else p ^ "/" let path_nc d = d.path @@ -44,7 +47,7 @@ module DataFile = struct end module Leaf = struct - type style = Command | Map + type style = Command | Map [@@deriving sexp] type t = { id : int; @@ -53,8 +56,11 @@ module Leaf = struct inputs : DataFile.t list; outputs : DataFile.t list; } + [@@deriving sexp] + + let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) - let create id command style inputs outputs = + let v id command style inputs outputs = { id; command; style; inputs; outputs } let command o = o.command @@ -65,9 +71,10 @@ module Leaf = struct end module CommandGroup = struct - type t = { name : string; children : Leaf.t list } + type t = { name : string; children : Leaf.t list } [@@deriving sexp] - let create name children = { name; children } + let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) + let v name children = { name; children } let name g = g.name let children g = g.children end @@ -94,7 +101,7 @@ let find_matching_datafile datafile_map path = match String.is_prefix ~affix:(DataFile.path df) path with | true -> Some - (DataFile.create + (DataFile.v ~subpath: (Some (String.Sub.to_string @@ -107,7 +114,7 @@ let order_command_list metadata command_groups = let input_map = List.mapi (fun i f -> - let df = DataFile.create i f in + let df = DataFile.v i f in (f, df)) (Frontmatter.inputs metadata) in @@ -143,12 +150,12 @@ let order_command_list metadata command_groups = | None -> let id = !counter in counter := !counter + 1; - Some (DataFile.create id path) + Some (DataFile.v id path) | Some _ -> None) file_args in - let x = Leaf.create !counter hd style inputs outputs in + let x = Leaf.v !counter hd style inputs outputs in counter := !counter + 1; let updated_map, rest = loop tl @@ -161,7 +168,7 @@ let order_command_list metadata command_groups = (updated_map, x :: rest) in let updated_map, commands = loop commands input_map in - (updated_map, CommandGroup.create name commands)) + (updated_map, CommandGroup.v name commands)) input_map command_groups in ordered diff --git a/src/lib/ast.mli b/src/lib/ast.mli index 45639a9c..128c0050 100644 --- a/src/lib/ast.mli +++ b/src/lib/ast.mli @@ -9,13 +9,17 @@ module DataFile : sig type t - val create : ?subpath:string option -> int -> string -> t + val v : ?subpath:string option -> int -> string -> t (** Creates a new datafile with an integer ID and a file path. *) + val pp : t Fmt.t + (** A pretty printer for datafiles. *) + val id : t -> int val path : t -> string val subpath : t -> string option val is_wildcard : t -> bool + val is_dir : t -> bool val compare : t -> t -> int end @@ -25,11 +29,13 @@ module Leaf : sig type style = Command | Map type t - val create : - int -> Command.t -> style -> DataFile.t list -> DataFile.t list -> t + val v : int -> Command.t -> style -> DataFile.t list -> DataFile.t list -> t (** Creats a new leaf node, taking an integer identifier, the command to execute and a list of inputs and a list of outputs. *) + val pp : t Fmt.t + (** A pretty printer for leaves. *) + val id : t -> int val command : t -> Command.t val command_style : t -> style @@ -42,9 +48,12 @@ module CommandGroup : sig type t - val create : string -> Leaf.t list -> t + val v : string -> Leaf.t list -> t (** Creates a command group made up of a series of leaf nodes and given a name. *) + val pp : t Fmt.t + (** A pretty printer for command groups. *) + val name : t -> string val children : t -> Leaf.t list end diff --git a/src/test/test.ml b/src/test/test.ml index f87eaa8f..8e9c227c 100644 --- a/src/test/test.ml +++ b/src/test/test.ml @@ -103,6 +103,80 @@ module CommandParsing = struct ] end +module DataFile = struct + (* let datafile = Alcotest.of_pp Shark.Ast.DataFile.pp *) + + let test_basic_file_path () = + let testcase = "/data/test/example.tif" in + let test = Shark.Ast.DataFile.v 42 testcase in + Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); + Alcotest.(check string) "Same path" testcase (Shark.Ast.DataFile.path test); + Alcotest.(check (option string)) + "No subpath" None + (Shark.Ast.DataFile.subpath test); + Alcotest.(check bool) + "Isn't wildcard" false + (Shark.Ast.DataFile.is_wildcard test); + Alcotest.(check bool) "Isn't dir" false (Shark.Ast.DataFile.is_dir test) + + let test_basic_dir_noncanonical () = + let testcase = "/data/test" in + let test = Shark.Ast.DataFile.v 42 testcase in + Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); + Alcotest.(check string) + "Extended path" "/data/test/" + (Shark.Ast.DataFile.path test); + Alcotest.(check (option string)) + "No subpath" None + (Shark.Ast.DataFile.subpath test); + Alcotest.(check bool) + "Isn't wildcard" false + (Shark.Ast.DataFile.is_wildcard test); + Alcotest.(check bool) "Is dir" true (Shark.Ast.DataFile.is_dir test) + + let test_basic_dir_canonical () = + let testcase = "/data/test/" in + let test = Shark.Ast.DataFile.v 42 testcase in + Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); + Alcotest.(check string) "Same path" testcase (Shark.Ast.DataFile.path test); + Alcotest.(check (option string)) + "No subpath" None + (Shark.Ast.DataFile.subpath test); + Alcotest.(check bool) + "Isn't wildcard" false + (Shark.Ast.DataFile.is_wildcard test); + Alcotest.(check bool) "Is dir" true (Shark.Ast.DataFile.is_dir test) + + let test_basic_dir_canonical_with_wildcard () = + let testcase = "/data/test/" in + let test = Shark.Ast.DataFile.v ~subpath:(Some "*") 42 testcase in + Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); + Alcotest.(check string) + "Extended path" "/data/test/" + (Shark.Ast.DataFile.path test); + Alcotest.(check (option string)) + "No subpath" None + (Shark.Ast.DataFile.subpath test); + Alcotest.(check bool) + "Is wildcard" true + (Shark.Ast.DataFile.is_wildcard test); + Alcotest.(check bool) "Is dir" true (Shark.Ast.DataFile.is_dir test) + + let tests = + [ + ("Basic file", `Quick, test_basic_file_path); + ("Non-canonical dir", `Quick, test_basic_dir_noncanonical); + ("Canonical dir", `Quick, test_basic_dir_canonical); + ( "Canonical dir with wildcard", + `Quick, + test_basic_dir_canonical_with_wildcard ); + ] +end + let () = Alcotest.run "shark" - [ ("basic", Basic.tests); ("command parsing", CommandParsing.tests) ] + [ + ("basic", Basic.tests); + ("command parsing", CommandParsing.tests); + ("datafile modeling", DataFile.tests); + ] From 9aa3cd6b89b0d1bc412d1e5f2df37e2e85a669bf Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Tue, 2 Apr 2024 09:30:37 +0100 Subject: [PATCH 04/10] Review by Patrick: remove unnecessary optional --- src/lib/ast.ml | 7 +++---- src/lib/ast.mli | 2 +- src/test/test.ml | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/lib/ast.ml b/src/lib/ast.ml index 052e97dd..755a09b8 100644 --- a/src/lib/ast.ml +++ b/src/lib/ast.ml @@ -7,7 +7,7 @@ module DataFile = struct let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) - let v ?(subpath = None) id path = + let v ?subpath id path = let wildcard = match subpath with | None -> false @@ -103,9 +103,8 @@ let find_matching_datafile datafile_map path = Some (DataFile.v ~subpath: - (Some - (String.Sub.to_string - (String.sub ~start:(String.length ipath) path))) + (String.Sub.to_string + (String.sub ~start:(String.length ipath) path)) (DataFile.id df) (DataFile.path df)) | false -> None))) None datafile_map diff --git a/src/lib/ast.mli b/src/lib/ast.mli index 128c0050..a87ecdf8 100644 --- a/src/lib/ast.mli +++ b/src/lib/ast.mli @@ -9,7 +9,7 @@ module DataFile : sig type t - val v : ?subpath:string option -> int -> string -> t + val v : ?subpath:string -> int -> string -> t (** Creates a new datafile with an integer ID and a file path. *) val pp : t Fmt.t diff --git a/src/test/test.ml b/src/test/test.ml index 8e9c227c..f526f09a 100644 --- a/src/test/test.ml +++ b/src/test/test.ml @@ -149,7 +149,7 @@ module DataFile = struct let test_basic_dir_canonical_with_wildcard () = let testcase = "/data/test/" in - let test = Shark.Ast.DataFile.v ~subpath:(Some "*") 42 testcase in + let test = Shark.Ast.DataFile.v ~subpath:"*" 42 testcase in Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); Alcotest.(check string) "Extended path" "/data/test/" From dd8da09fcca261c79c256909f3a81e89bd688dea Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Tue, 2 Apr 2024 09:47:18 +0100 Subject: [PATCH 05/10] Review by Patrick: additional code improvements --- src/lib/ast.ml | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/lib/ast.ml b/src/lib/ast.ml index 755a09b8..ce7520d0 100644 --- a/src/lib/ast.ml +++ b/src/lib/ast.ml @@ -11,16 +11,12 @@ module DataFile = struct let wildcard = match subpath with | None -> false - | Some p -> - let last_char = - String.Sub.to_string (String.sub ~start:(String.length p - 1) p) - in - last_char = "*" + | Some p -> Char.equal p.[String.length p - 1] '*' in if wildcard then { id; - path = String.Sub.to_string (String.sub ~stop:(String.length path) path); + path = String.Sub.to_string (String.sub ~stop:((String.length path) - 1) path); subpath = None; wildcard = true; } @@ -33,8 +29,9 @@ module DataFile = struct | false -> d.path | true -> let p = d.path in - let last_char = String.sub ~start:(String.length p - 1) p in - if String.Sub.to_string last_char = "/" then p else p ^ "/" + match Char.equal p.[String.length p - 1] '/' with + | true -> p + | false -> p ^ "/" let path_nc d = d.path let subpath d = d.subpath @@ -90,11 +87,10 @@ let find_matching_datafile datafile_map path = | None -> (* No full match, but can we find a prefix dir *) List.fold_left - (fun acc i -> + (fun acc (ipath, df) -> match acc with | Some x -> Some x | None -> ( - let ipath, df = i in match DataFile.is_dir df with | false -> None | true -> ( From 649de57874768ac1774ade77f0f782f2be6bc526 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Tue, 2 Apr 2024 13:47:37 +0100 Subject: [PATCH 06/10] Move to Fpath --- src/lib/ast.ml | 80 +++++++++--------------------------------- src/lib/ast.mli | 25 ++----------- src/lib/command.ml | 21 ++++++++--- src/lib/dotrenderer.ml | 16 ++++----- src/lib/dune | 12 ++++++- src/lib/frontmatter.ml | 39 ++++++++++++++++---- src/test/dune | 2 +- src/test/test.ml | 73 ++------------------------------------ 8 files changed, 91 insertions(+), 177 deletions(-) diff --git a/src/lib/ast.ml b/src/lib/ast.ml index ce7520d0..a256bbda 100644 --- a/src/lib/ast.ml +++ b/src/lib/ast.ml @@ -1,48 +1,5 @@ -open Astring open Sexplib.Conv -module DataFile = struct - type t = { id : int; path : string; subpath : string option; wildcard : bool } - [@@deriving sexp] - - let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) - - let v ?subpath id path = - let wildcard = - match subpath with - | None -> false - | Some p -> Char.equal p.[String.length p - 1] '*' - in - if wildcard then - { - id; - path = String.Sub.to_string (String.sub ~stop:((String.length path) - 1) path); - subpath = None; - wildcard = true; - } - else { id; path; subpath; wildcard = false } - - let id d = d.id - - let path d = - match Filename.extension d.path = "" with - | false -> d.path - | true -> - let p = d.path in - match Char.equal p.[String.length p - 1] '/' with - | true -> p - | false -> p ^ "/" - - let path_nc d = d.path - let subpath d = d.subpath - let is_wildcard d = d.wildcard - let compare a b = Int.compare a.id b.id - - let is_dir d = - (* a little hacky, we probably need to do something in the sharkdown here *) - Filename.extension d.path = "" -end - module Leaf = struct type style = Command | Map [@@deriving sexp] @@ -50,8 +7,8 @@ module Leaf = struct id : int; command : Command.t; style : style; - inputs : DataFile.t list; - outputs : DataFile.t list; + inputs : Datafile.t list; + outputs : Datafile.t list; } [@@deriving sexp] @@ -81,35 +38,32 @@ type t = CommandGroup.t list let to_list cg = cg -let find_matching_datafile datafile_map path = - match List.assoc_opt path datafile_map with +let find_matching_datafile datafile_map fpath = + match List.assoc_opt fpath datafile_map with | Some p -> Some p | None -> (* No full match, but can we find a prefix dir *) List.fold_left - (fun acc (ipath, df) -> + (fun acc (_ipath, df) -> match acc with | Some x -> Some x | None -> ( - match DataFile.is_dir df with + match Datafile.is_dir df with | false -> None | true -> ( - match String.is_prefix ~affix:(DataFile.path df) path with - | true -> + match Fpath.rem_prefix (Datafile.path df) fpath with + | None -> None + | Some subpath -> Some - (DataFile.v - ~subpath: - (String.Sub.to_string - (String.sub ~start:(String.length ipath) path)) - (DataFile.id df) (DataFile.path df)) - | false -> None))) + (Datafile.v ~subpath:(Fpath.to_string subpath) + (Datafile.id df) (Datafile.path df))))) None datafile_map let order_command_list metadata command_groups = let input_map = List.mapi (fun i f -> - let df = DataFile.v i f in + let df = Datafile.v i f in (f, df)) (Frontmatter.inputs metadata) in @@ -133,19 +87,19 @@ let order_command_list metadata command_groups = in let style : Leaf.style = - match List.exists DataFile.is_wildcard inputs with + match List.exists Datafile.is_wildcard inputs with | true -> Map | false -> Command in let outputs = List.filter_map - (fun path -> - match find_matching_datafile datafile_map path with + (fun fpath -> + match find_matching_datafile datafile_map fpath with | None -> let id = !counter in counter := !counter + 1; - Some (DataFile.v id path) + Some (Datafile.v id fpath) | Some _ -> None) file_args in @@ -157,7 +111,7 @@ let order_command_list metadata command_groups = (List.concat [ datafile_map; - List.map (fun o -> (DataFile.path_nc o, o)) outputs; + List.map (fun o -> (Datafile.path o, o)) outputs; ]) in (updated_map, x :: rest) diff --git a/src/lib/ast.mli b/src/lib/ast.mli index a87ecdf8..62154fea 100644 --- a/src/lib/ast.mli +++ b/src/lib/ast.mli @@ -4,32 +4,13 @@ The AST is the logical representation of the workflow described in a sharkdown file, including the structure of groups (aka basic blocks in PL, but block is an overloaded term in this context). *) -module DataFile : sig - (** A named file/directory that acts as an input and/or output of a process. *) - - type t - - val v : ?subpath:string -> int -> string -> t - (** Creates a new datafile with an integer ID and a file path. *) - - val pp : t Fmt.t - (** A pretty printer for datafiles. *) - - val id : t -> int - val path : t -> string - val subpath : t -> string option - val is_wildcard : t -> bool - val is_dir : t -> bool - val compare : t -> t -> int -end - module Leaf : sig (** A Leaf is an atomic exection unit the in the pipeline graph. *) type style = Command | Map type t - val v : int -> Command.t -> style -> DataFile.t list -> DataFile.t list -> t + val v : int -> Command.t -> style -> Datafile.t list -> Datafile.t list -> t (** Creats a new leaf node, taking an integer identifier, the command to execute and a list of inputs and a list of outputs. *) @@ -39,8 +20,8 @@ module Leaf : sig val id : t -> int val command : t -> Command.t val command_style : t -> style - val inputs : t -> DataFile.t list - val outputs : t -> DataFile.t list + val inputs : t -> Datafile.t list + val outputs : t -> Datafile.t list end module CommandGroup : sig diff --git a/src/lib/command.ml b/src/lib/command.ml index 7388c8d6..faac747b 100644 --- a/src/lib/command.ml +++ b/src/lib/command.ml @@ -1,20 +1,31 @@ open Astring open Sexplib.Conv -type t = { name : string; args : string list; file_args : string list } +type path = Fpath.t + +let path_of_sexp = function + | Ppx_sexp_conv_lib.Sexp.Atom s -> Fpath.v s + | List _ -> Fpath.v "" + +let sexp_of_path v = Ppx_sexp_conv_lib.Sexp.Atom (Fpath.to_string v) + +type t = { name : string; args : string list; file_args : path list } [@@deriving sexp] let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) let v ~name ~args ~file_args = { name; args; file_args } +let magic_path_regex = Str.regexp "^/data" let find_file_args args = (* gross liberties, we assume for now that any arg with a doubeldash might be a file. though ultimately this will have to rely on convention, annotation, or guesses, so it's not exactly that bad, just limited as is. I imagine we can have a common prefix for all files, like example.com should be used for domains. *) - List.filter + List.filter_map (fun arg -> - let regex = Str.regexp "^/data" in - Str.string_match regex arg 0) + match Str.string_match magic_path_regex arg 0 with + | false -> None + | true -> ( + match Fpath.of_string arg with Error _e -> None | Ok r -> Some r)) args let parse_python_command args = @@ -69,4 +80,4 @@ let of_string command_str = | name :: args -> Some (parse_generic_commmand (name :: args)) let name c = c.name -let file_args c = c.file_args +let file_args c : Fpath.t list = c.file_args diff --git a/src/lib/dotrenderer.ml b/src/lib/dotrenderer.ml index 9878215d..99e81162 100644 --- a/src/lib/dotrenderer.ml +++ b/src/lib/dotrenderer.ml @@ -1,6 +1,5 @@ open Astring -module DataFile = Ast.DataFile -module DataFileSet = Set.Make (DataFile) +module DatafileSet = Set.Make (Datafile) (* In theory this could be a recursive structure that attempts to maintain the heirarchy of the document, markdown doesn't enforce that the section levels @@ -17,12 +16,12 @@ let render_command_to_dot ppf command = List.iter (fun datafile -> let label = - match DataFile.subpath datafile with + match Datafile.subpath datafile with | Some x -> Printf.sprintf ",label=\"%s\"" x | None -> "" in Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"%s];\n" - (DataFile.id datafile) process_index label) + (Datafile.id datafile) process_index label) (Ast.Leaf.inputs command); let shape = match Ast.Leaf.command_style command with @@ -34,13 +33,14 @@ let render_command_to_dot ppf command = List.iter (fun datafile -> Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"];\n" process_index - (DataFile.id datafile)) + (Datafile.id datafile)) (Ast.Leaf.outputs command); Format.fprintf ppf "\n" let datafile_to_dot ppf datafile = Format.fprintf ppf "\tn%d[shape=\"cylinder\",label=\"%s\"];\n" - (DataFile.id datafile) (DataFile.path datafile) + (Datafile.id datafile) + (Fpath.to_string (Datafile.path datafile)) let render_ast_to_dot ppf ast : unit = Format.fprintf ppf "digraph{\n"; @@ -54,8 +54,8 @@ let render_ast_to_dot ppf ast : unit = List.concat [ inputs; outputs ]) commands) ast - |> DataFileSet.of_list - |> DataFileSet.iter (datafile_to_dot ppf); + |> DatafileSet.of_list + |> DatafileSet.iter (datafile_to_dot ppf); List.iteri (fun i group -> diff --git a/src/lib/dune b/src/lib/dune index 87b73683..950be284 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -1,6 +1,16 @@ (library (name shark) - (libraries eio cohttp-eio str yaml lwt_eio cmarkit obuilder htmlit routes) + (libraries + eio + cohttp-eio + str + yaml + lwt_eio + cmarkit + obuilder + htmlit + routes + fpath) (preprocess (pps ppx_sexp_conv))) diff --git a/src/lib/frontmatter.ml b/src/lib/frontmatter.ml index f2a78cf1..17349682 100644 --- a/src/lib/frontmatter.ml +++ b/src/lib/frontmatter.ml @@ -1,6 +1,19 @@ -type t = { variables : (string * string list) list } +open Sexplib.Conv -let empty = { variables = [] } +type path = Fpath.t + +let path_of_sexp = function + | Ppx_sexp_conv_lib.Sexp.Atom s -> Fpath.v s + | List _ -> Fpath.v "" + +let sexp_of_path v = Ppx_sexp_conv_lib.Sexp.Atom (Fpath.to_string v) + +type t = { variables : (string * string list) list; inputs : path list } +[@@deriving sexp] + +let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) +let v variables inputs = { variables; inputs } +let empty = { variables = []; inputs = [] } let yaml_to_string = function | `String s -> s @@ -18,10 +31,24 @@ let string_list_of_yaml = function let of_yaml = function | `O assoc -> let vars = List.map (fun (k, v) -> (k, string_list_of_yaml v)) assoc in - { variables = vars } + let raw_inputs = + match List.assoc_opt "inputs" vars with None -> [] | Some v -> v + in + let inputs = + List.map + (fun p -> + match Fpath.of_string p with + | Error e -> + failwith + (Printf.sprintf "Malformed input path %s" + (match e with `Msg x -> x)) + | Ok p -> Fpath.normalize p) + raw_inputs + in + { variables = vars; inputs } + | `Null -> empty | _ -> failwith "Malformed variables in markdown frontmatter" let of_string s = String.trim s |> Yaml.of_string |> Result.map of_yaml - -let inputs t = - match List.assoc_opt "inputs" t.variables with None -> [] | Some v -> v +let variables t = t.variables +let inputs t = t.inputs diff --git a/src/test/dune b/src/test/dune index 24146953..20e0933d 100644 --- a/src/test/dune +++ b/src/test/dune @@ -1,3 +1,3 @@ (test (name test) - (libraries alcotest shark)) + (libraries alcotest shark fpath)) diff --git a/src/test/test.ml b/src/test/test.ml index f526f09a..cd326dbe 100644 --- a/src/test/test.ml +++ b/src/test/test.ml @@ -103,80 +103,11 @@ module CommandParsing = struct ] end -module DataFile = struct - (* let datafile = Alcotest.of_pp Shark.Ast.DataFile.pp *) - - let test_basic_file_path () = - let testcase = "/data/test/example.tif" in - let test = Shark.Ast.DataFile.v 42 testcase in - Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); - Alcotest.(check string) "Same path" testcase (Shark.Ast.DataFile.path test); - Alcotest.(check (option string)) - "No subpath" None - (Shark.Ast.DataFile.subpath test); - Alcotest.(check bool) - "Isn't wildcard" false - (Shark.Ast.DataFile.is_wildcard test); - Alcotest.(check bool) "Isn't dir" false (Shark.Ast.DataFile.is_dir test) - - let test_basic_dir_noncanonical () = - let testcase = "/data/test" in - let test = Shark.Ast.DataFile.v 42 testcase in - Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); - Alcotest.(check string) - "Extended path" "/data/test/" - (Shark.Ast.DataFile.path test); - Alcotest.(check (option string)) - "No subpath" None - (Shark.Ast.DataFile.subpath test); - Alcotest.(check bool) - "Isn't wildcard" false - (Shark.Ast.DataFile.is_wildcard test); - Alcotest.(check bool) "Is dir" true (Shark.Ast.DataFile.is_dir test) - - let test_basic_dir_canonical () = - let testcase = "/data/test/" in - let test = Shark.Ast.DataFile.v 42 testcase in - Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); - Alcotest.(check string) "Same path" testcase (Shark.Ast.DataFile.path test); - Alcotest.(check (option string)) - "No subpath" None - (Shark.Ast.DataFile.subpath test); - Alcotest.(check bool) - "Isn't wildcard" false - (Shark.Ast.DataFile.is_wildcard test); - Alcotest.(check bool) "Is dir" true (Shark.Ast.DataFile.is_dir test) - - let test_basic_dir_canonical_with_wildcard () = - let testcase = "/data/test/" in - let test = Shark.Ast.DataFile.v ~subpath:"*" 42 testcase in - Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); - Alcotest.(check string) - "Extended path" "/data/test/" - (Shark.Ast.DataFile.path test); - Alcotest.(check (option string)) - "No subpath" None - (Shark.Ast.DataFile.subpath test); - Alcotest.(check bool) - "Is wildcard" true - (Shark.Ast.DataFile.is_wildcard test); - Alcotest.(check bool) "Is dir" true (Shark.Ast.DataFile.is_dir test) - - let tests = - [ - ("Basic file", `Quick, test_basic_file_path); - ("Non-canonical dir", `Quick, test_basic_dir_noncanonical); - ("Canonical dir", `Quick, test_basic_dir_canonical); - ( "Canonical dir with wildcard", - `Quick, - test_basic_dir_canonical_with_wildcard ); - ] -end - let () = Alcotest.run "shark" [ ("basic", Basic.tests); ("command parsing", CommandParsing.tests); - ("datafile modeling", DataFile.tests); + ("datafile modeling", Datafile.tests); + ("frontmatter parsing", Frontmatter.tests); ] From b5174b5ea47c502d5d6c5a5d3c40045c4230f938 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Tue, 2 Apr 2024 15:01:34 +0100 Subject: [PATCH 07/10] Revert test changes as we no longer assume no ext == dir --- src/test/expect/test_dot.expected | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/test/expect/test_dot.expected b/src/test/expect/test_dot.expected index 8efc965e..e1eb0ff3 100644 --- a/src/test/expect/test_dot.expected +++ b/src/test/expect/test_dot.expected @@ -1,32 +1,32 @@ digraph{ n0[shape="cylinder",label="/data/tmf/project_boundaries/123.geojson"]; - n1[shape="cylinder",label="/data/tmf/project_boundaries/"]; - n2[shape="cylinder",label="/data/tmf/jrc/zips/"]; - n3[shape="cylinder",label="/data/tmf/jrc/tif/"]; - n5[shape="cylinder",label="/data/tmf/fcc-cpcs/"]; + n1[shape="cylinder",label="/data/tmf/project_boundaries"]; + n2[shape="cylinder",label="/data/tmf/jrc/zips"]; + n3[shape="cylinder",label="/data/tmf/jrc/tif"]; + n5[shape="cylinder",label="/data/tmf/fcc-cpcs"]; n7[shape="cylinder",label="/data/tmf/ecoregions/ecoregions.geojson"]; - n9[shape="cylinder",label="/data/tmf/ecoregions/"]; + n9[shape="cylinder",label="/data/tmf/ecoregions"]; n11[shape="cylinder",label="/data/tmf/access/raw.tif"]; - n13[shape="cylinder",label="/data/tmf/access/"]; + n13[shape="cylinder",label="/data/tmf/access"]; n15[shape="cylinder",label="/data/tmf/osm_borders.geojson"]; n17[shape="cylinder",label="/data/tmf/123/buffer.geojson"]; n19[shape="cylinder",label="/data/tmf/123/leakage.geojson"]; n21[shape="cylinder",label="/data/tmf/123/luc.tif"]; - n23[shape="cylinder",label="/data/tmf/gedi/"]; + n23[shape="cylinder",label="/data/tmf/gedi"]; n26[shape="cylinder",label="/data/tmf/123/carbon-density.csv"]; n28[shape="cylinder",label="/data/tmf/123/country-list.json"]; n30[shape="cylinder",label="/data/tmf/123/matching-area.geojson"]; - n32[shape="cylinder",label="/data/tmf/srtm/zip/"]; - n33[shape="cylinder",label="/data/tmf/srtm/tif/"]; - n35[shape="cylinder",label="/data/tmf/slopes/"]; - n37[shape="cylinder",label="/data/tmf/rescaled-elevation/"]; - n39[shape="cylinder",label="/data/tmf/rescaled-slopes/"]; + n32[shape="cylinder",label="/data/tmf/srtm/zip"]; + n33[shape="cylinder",label="/data/tmf/srtm/tif"]; + n35[shape="cylinder",label="/data/tmf/slopes"]; + n37[shape="cylinder",label="/data/tmf/rescaled-elevation"]; + n39[shape="cylinder",label="/data/tmf/rescaled-slopes"]; n41[shape="cylinder",label="/data/tmf/123/countries.tif"]; n43[shape="cylinder",label="/data/tmf/123/k.parquet"]; - n45[shape="cylinder",label="/data/tmf/123/matches/"]; + n45[shape="cylinder",label="/data/tmf/123/matches"]; n47[shape="cylinder",label="/data/tmf/123/matches.tif"]; n49[shape="cylinder",label="/data/tmf/123/matches.parquet"]; - n51[shape="cylinder",label="/data/tmf/123/pairs/"]; + n51[shape="cylinder",label="/data/tmf/123/pairs"]; n53[shape="cylinder",label="/data/tmf/123/additionality.csv"]; subgraph "cluster_0" { label = "JRC" From d3bba387c3ea36e35fae3cd16e7f676c7430d8a1 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Tue, 2 Apr 2024 15:54:59 +0100 Subject: [PATCH 08/10] Add missing files --- src/lib/command.mli | 10 ++++++++++ src/lib/datafile.ml | 33 +++++++++++++++++++++++++++++++++ src/lib/datafile.mli | 17 +++++++++++++++++ src/lib/frontmatter.mli | 8 ++++++++ src/test/datafile.ml | 31 +++++++++++++++++++++++++++++++ src/test/frontmatter.ml | 39 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 138 insertions(+) create mode 100644 src/lib/command.mli create mode 100644 src/lib/datafile.ml create mode 100644 src/lib/datafile.mli create mode 100644 src/lib/frontmatter.mli create mode 100644 src/test/datafile.ml create mode 100644 src/test/frontmatter.ml diff --git a/src/lib/command.mli b/src/lib/command.mli new file mode 100644 index 00000000..0c28ec99 --- /dev/null +++ b/src/lib/command.mli @@ -0,0 +1,10 @@ +type t [@@deriving sexp] + +val v : name:string -> args:string list -> file_args:Fpath.t list -> t + +val pp : t Fmt.t +(** A pretty printer for blocks. *) + +val of_string : string -> t option +val name : t -> string +val file_args : t -> Fpath.t list diff --git a/src/lib/datafile.ml b/src/lib/datafile.ml new file mode 100644 index 00000000..71d89efd --- /dev/null +++ b/src/lib/datafile.ml @@ -0,0 +1,33 @@ +open Astring +open Sexplib.Conv + +type path = Fpath.t + +let path_of_sexp = function + | Ppx_sexp_conv_lib.Sexp.Atom s -> Fpath.v s + | List _ -> Fpath.v "" + +let sexp_of_path v = Ppx_sexp_conv_lib.Sexp.Atom (Fpath.to_string v) + +type t = { id : int; path : path; subpath : string option; wildcard : bool } +[@@deriving sexp] + +type err = InvalidPath of string | InvalidSubpath of string + +let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) + +let v ?subpath id path = + let wildcard = + match subpath with + | None -> false + | Some p -> Char.equal p.[String.length p - 1] '*' + in + if wildcard then { id; path; subpath = None; wildcard = true } + else { id; path; subpath; wildcard = false } + +let id d = d.id +let path d = d.path +let subpath d = d.subpath +let is_wildcard d = d.wildcard +let compare a b = Int.compare a.id b.id +let is_dir d = Fpath.is_dir_path d.path diff --git a/src/lib/datafile.mli b/src/lib/datafile.mli new file mode 100644 index 00000000..4d116e49 --- /dev/null +++ b/src/lib/datafile.mli @@ -0,0 +1,17 @@ +(** A named file/directory that acts as an input and/or output of a process. *) + +type t [@@deriving sexp] +type err = InvalidPath of string | InvalidSubpath of string + +val v : ?subpath:string -> int -> Fpath.t -> t +(** Creates a new datafile with an integer ID and a file path. *) + +val pp : t Fmt.t +(** A pretty printer for datafiles. *) + +val id : t -> int +val path : t -> Fpath.t +val subpath : t -> string option +val is_wildcard : t -> bool +val is_dir : t -> bool +val compare : t -> t -> int diff --git a/src/lib/frontmatter.mli b/src/lib/frontmatter.mli new file mode 100644 index 00000000..d7a5d5da --- /dev/null +++ b/src/lib/frontmatter.mli @@ -0,0 +1,8 @@ +type t [@@deriving sexp] + +val pp : t Fmt.t +val v : (string * string list) list -> Fpath.t list -> t +val empty : t +val of_string : string -> (t, [ `Msg of string ]) result +val variables : t -> (string * string list) list +val inputs : t -> Fpath.t list diff --git a/src/test/datafile.ml b/src/test/datafile.ml new file mode 100644 index 00000000..505a796b --- /dev/null +++ b/src/test/datafile.ml @@ -0,0 +1,31 @@ +let test_basic_file_path () = + let testcase = Fpath.v "/data/test/example.tif" in + let test = Shark.Datafile.v 42 testcase in + Alcotest.(check int) "Same id" 42 (Shark.Datafile.id test); + Alcotest.(check string) + "Same path" (Fpath.to_string testcase) + (Fpath.to_string (Shark.Datafile.path test)); + Alcotest.(check (option string)) + "No subpath" None + (Shark.Datafile.subpath test); + Alcotest.(check bool) "Isn't wildcard" false (Shark.Datafile.is_wildcard test); + Alcotest.(check bool) "Isn't dir" false (Shark.Datafile.is_dir test) + +let test_basic_dir_with_wildcard () = + let testcase = Fpath.v "/data/test/" in + let test = Shark.Datafile.v ~subpath:"*" 42 testcase in + Alcotest.(check int) "Same id" 42 (Shark.Datafile.id test); + Alcotest.(check string) + "Same path" (Fpath.to_string testcase) + (Fpath.to_string (Shark.Datafile.path test)); + Alcotest.(check (option string)) + "No subpath" None + (Shark.Datafile.subpath test); + Alcotest.(check bool) "Is wildcard" true (Shark.Datafile.is_wildcard test); + Alcotest.(check bool) "Is dir" true (Shark.Datafile.is_dir test) + +let tests = + [ + ("Basic file", `Quick, test_basic_file_path); + ("Canonical dir with wildcard", `Quick, test_basic_dir_with_wildcard); + ] diff --git a/src/test/frontmatter.ml b/src/test/frontmatter.ml new file mode 100644 index 00000000..8c9da8b1 --- /dev/null +++ b/src/test/frontmatter.ml @@ -0,0 +1,39 @@ +let frontmatter = Alcotest.of_pp Shark.Frontmatter.pp + +let test_empty () = + let testcase = "" in + match Shark.Frontmatter.of_string testcase with + | Error e -> + Alcotest.fail (Printf.sprintf "Failed %s" (match e with `Msg x -> x)) + | Ok fm -> + Alcotest.(check frontmatter) "Empty expected" Shark.Frontmatter.empty fm + +let test_inputs () = + let testcase = + {| +inputs: +- /data/stuff +- /data/other/ +- /data/file.txt + |} + in + match Shark.Frontmatter.of_string testcase with + | Error e -> + Alcotest.fail (Printf.sprintf "Failed %s" (match e with `Msg x -> x)) + | Ok fm -> + let expected = + Shark.Frontmatter.v + [ ("inputs", [ "/data/stuff"; "/data/other/"; "/data/file.txt" ]) ] + [ + Fpath.v "/data/stuff"; + Fpath.v "/data/other/"; + Fpath.v "/data/file.txt"; + ] + in + Alcotest.(check frontmatter) "Empty expected" expected fm + +let tests = + [ + ("empty front matter", `Quick, test_empty); + ("simple input list", `Quick, test_inputs); + ] From 610452302f8566c26080642e5400173f99197199 Mon Sep 17 00:00:00 2001 From: Patrick Ferris Date: Tue, 2 Apr 2024 16:27:02 +0100 Subject: [PATCH 09/10] Use result combinator --- src/test/frontmatter.ml | 33 ++++++++++++++------------------- src/test/import.ml | 2 ++ 2 files changed, 16 insertions(+), 19 deletions(-) create mode 100644 src/test/import.ml diff --git a/src/test/frontmatter.ml b/src/test/frontmatter.ml index 8c9da8b1..30efed88 100644 --- a/src/test/frontmatter.ml +++ b/src/test/frontmatter.ml @@ -1,12 +1,12 @@ +open Import + let frontmatter = Alcotest.of_pp Shark.Frontmatter.pp let test_empty () = let testcase = "" in - match Shark.Frontmatter.of_string testcase with - | Error e -> - Alcotest.fail (Printf.sprintf "Failed %s" (match e with `Msg x -> x)) - | Ok fm -> - Alcotest.(check frontmatter) "Empty expected" Shark.Frontmatter.empty fm + let res = Shark.Frontmatter.of_string testcase in + Alcotest.(check (result frontmatter msg)) + "Empty expected" (Ok Shark.Frontmatter.empty) res let test_inputs () = let testcase = @@ -17,20 +17,15 @@ inputs: - /data/file.txt |} in - match Shark.Frontmatter.of_string testcase with - | Error e -> - Alcotest.fail (Printf.sprintf "Failed %s" (match e with `Msg x -> x)) - | Ok fm -> - let expected = - Shark.Frontmatter.v - [ ("inputs", [ "/data/stuff"; "/data/other/"; "/data/file.txt" ]) ] - [ - Fpath.v "/data/stuff"; - Fpath.v "/data/other/"; - Fpath.v "/data/file.txt"; - ] - in - Alcotest.(check frontmatter) "Empty expected" expected fm + let res = Shark.Frontmatter.of_string testcase in + let expected = + Shark.Frontmatter.v + [ ("inputs", [ "/data/stuff"; "/data/other/"; "/data/file.txt" ]) ] + [ + Fpath.v "/data/stuff"; Fpath.v "/data/other/"; Fpath.v "/data/file.txt"; + ] + in + Alcotest.(check (result frontmatter msg)) "Empty expected" (Ok expected) res let tests = [ diff --git a/src/test/import.ml b/src/test/import.ml new file mode 100644 index 00000000..0bdd8848 --- /dev/null +++ b/src/test/import.ml @@ -0,0 +1,2 @@ +let msg : [ `Msg of string ] Alcotest.testable = + Alcotest.of_pp (fun ppf (`Msg m) -> Fmt.pf ppf "msg: %s" m) From 4a657360a7c98ab0c4b4b935528280a2e24b3b23 Mon Sep 17 00:00:00 2001 From: Michael Dales Date: Tue, 2 Apr 2024 16:44:23 +0100 Subject: [PATCH 10/10] Remove unused type --- src/lib/datafile.ml | 2 -- src/lib/datafile.mli | 1 - 2 files changed, 3 deletions(-) diff --git a/src/lib/datafile.ml b/src/lib/datafile.ml index 71d89efd..a006444f 100644 --- a/src/lib/datafile.ml +++ b/src/lib/datafile.ml @@ -12,8 +12,6 @@ let sexp_of_path v = Ppx_sexp_conv_lib.Sexp.Atom (Fpath.to_string v) type t = { id : int; path : path; subpath : string option; wildcard : bool } [@@deriving sexp] -type err = InvalidPath of string | InvalidSubpath of string - let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) let v ?subpath id path = diff --git a/src/lib/datafile.mli b/src/lib/datafile.mli index 4d116e49..407bb3c9 100644 --- a/src/lib/datafile.mli +++ b/src/lib/datafile.mli @@ -1,7 +1,6 @@ (** A named file/directory that acts as an input and/or output of a process. *) type t [@@deriving sexp] -type err = InvalidPath of string | InvalidSubpath of string val v : ?subpath:string -> int -> Fpath.t -> t (** Creates a new datafile with an integer ID and a file path. *)