diff --git a/src/lib/ast.ml b/src/lib/ast.ml index ce7520d0..a256bbda 100644 --- a/src/lib/ast.ml +++ b/src/lib/ast.ml @@ -1,48 +1,5 @@ -open Astring open Sexplib.Conv -module DataFile = struct - type t = { id : int; path : string; subpath : string option; wildcard : bool } - [@@deriving sexp] - - let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) - - let v ?subpath id path = - let wildcard = - match subpath with - | None -> false - | Some p -> Char.equal p.[String.length p - 1] '*' - in - if wildcard then - { - id; - path = String.Sub.to_string (String.sub ~stop:((String.length path) - 1) path); - subpath = None; - wildcard = true; - } - else { id; path; subpath; wildcard = false } - - let id d = d.id - - let path d = - match Filename.extension d.path = "" with - | false -> d.path - | true -> - let p = d.path in - match Char.equal p.[String.length p - 1] '/' with - | true -> p - | false -> p ^ "/" - - let path_nc d = d.path - let subpath d = d.subpath - let is_wildcard d = d.wildcard - let compare a b = Int.compare a.id b.id - - let is_dir d = - (* a little hacky, we probably need to do something in the sharkdown here *) - Filename.extension d.path = "" -end - module Leaf = struct type style = Command | Map [@@deriving sexp] @@ -50,8 +7,8 @@ module Leaf = struct id : int; command : Command.t; style : style; - inputs : DataFile.t list; - outputs : DataFile.t list; + inputs : Datafile.t list; + outputs : Datafile.t list; } [@@deriving sexp] @@ -81,35 +38,32 @@ type t = CommandGroup.t list let to_list cg = cg -let find_matching_datafile datafile_map path = - match List.assoc_opt path datafile_map with +let find_matching_datafile datafile_map fpath = + match List.assoc_opt fpath datafile_map with | Some p -> Some p | None -> (* No full match, but can we find a prefix dir *) List.fold_left - (fun acc (ipath, df) -> + (fun acc (_ipath, df) -> match acc with | Some x -> Some x | None -> ( - match DataFile.is_dir df with + match Datafile.is_dir df with | false -> None | true -> ( - match String.is_prefix ~affix:(DataFile.path df) path with - | true -> + match Fpath.rem_prefix (Datafile.path df) fpath with + | None -> None + | Some subpath -> Some - (DataFile.v - ~subpath: - (String.Sub.to_string - (String.sub ~start:(String.length ipath) path)) - (DataFile.id df) (DataFile.path df)) - | false -> None))) + (Datafile.v ~subpath:(Fpath.to_string subpath) + (Datafile.id df) (Datafile.path df))))) None datafile_map let order_command_list metadata command_groups = let input_map = List.mapi (fun i f -> - let df = DataFile.v i f in + let df = Datafile.v i f in (f, df)) (Frontmatter.inputs metadata) in @@ -133,19 +87,19 @@ let order_command_list metadata command_groups = in let style : Leaf.style = - match List.exists DataFile.is_wildcard inputs with + match List.exists Datafile.is_wildcard inputs with | true -> Map | false -> Command in let outputs = List.filter_map - (fun path -> - match find_matching_datafile datafile_map path with + (fun fpath -> + match find_matching_datafile datafile_map fpath with | None -> let id = !counter in counter := !counter + 1; - Some (DataFile.v id path) + Some (Datafile.v id fpath) | Some _ -> None) file_args in @@ -157,7 +111,7 @@ let order_command_list metadata command_groups = (List.concat [ datafile_map; - List.map (fun o -> (DataFile.path_nc o, o)) outputs; + List.map (fun o -> (Datafile.path o, o)) outputs; ]) in (updated_map, x :: rest) diff --git a/src/lib/ast.mli b/src/lib/ast.mli index a87ecdf8..62154fea 100644 --- a/src/lib/ast.mli +++ b/src/lib/ast.mli @@ -4,32 +4,13 @@ The AST is the logical representation of the workflow described in a sharkdown file, including the structure of groups (aka basic blocks in PL, but block is an overloaded term in this context). *) -module DataFile : sig - (** A named file/directory that acts as an input and/or output of a process. *) - - type t - - val v : ?subpath:string -> int -> string -> t - (** Creates a new datafile with an integer ID and a file path. *) - - val pp : t Fmt.t - (** A pretty printer for datafiles. *) - - val id : t -> int - val path : t -> string - val subpath : t -> string option - val is_wildcard : t -> bool - val is_dir : t -> bool - val compare : t -> t -> int -end - module Leaf : sig (** A Leaf is an atomic exection unit the in the pipeline graph. *) type style = Command | Map type t - val v : int -> Command.t -> style -> DataFile.t list -> DataFile.t list -> t + val v : int -> Command.t -> style -> Datafile.t list -> Datafile.t list -> t (** Creats a new leaf node, taking an integer identifier, the command to execute and a list of inputs and a list of outputs. *) @@ -39,8 +20,8 @@ module Leaf : sig val id : t -> int val command : t -> Command.t val command_style : t -> style - val inputs : t -> DataFile.t list - val outputs : t -> DataFile.t list + val inputs : t -> Datafile.t list + val outputs : t -> Datafile.t list end module CommandGroup : sig diff --git a/src/lib/command.ml b/src/lib/command.ml index 7388c8d6..faac747b 100644 --- a/src/lib/command.ml +++ b/src/lib/command.ml @@ -1,20 +1,31 @@ open Astring open Sexplib.Conv -type t = { name : string; args : string list; file_args : string list } +type path = Fpath.t + +let path_of_sexp = function + | Ppx_sexp_conv_lib.Sexp.Atom s -> Fpath.v s + | List _ -> Fpath.v "" + +let sexp_of_path v = Ppx_sexp_conv_lib.Sexp.Atom (Fpath.to_string v) + +type t = { name : string; args : string list; file_args : path list } [@@deriving sexp] let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) let v ~name ~args ~file_args = { name; args; file_args } +let magic_path_regex = Str.regexp "^/data" let find_file_args args = (* gross liberties, we assume for now that any arg with a doubeldash might be a file. though ultimately this will have to rely on convention, annotation, or guesses, so it's not exactly that bad, just limited as is. I imagine we can have a common prefix for all files, like example.com should be used for domains. *) - List.filter + List.filter_map (fun arg -> - let regex = Str.regexp "^/data" in - Str.string_match regex arg 0) + match Str.string_match magic_path_regex arg 0 with + | false -> None + | true -> ( + match Fpath.of_string arg with Error _e -> None | Ok r -> Some r)) args let parse_python_command args = @@ -69,4 +80,4 @@ let of_string command_str = | name :: args -> Some (parse_generic_commmand (name :: args)) let name c = c.name -let file_args c = c.file_args +let file_args c : Fpath.t list = c.file_args diff --git a/src/lib/dotrenderer.ml b/src/lib/dotrenderer.ml index 9878215d..99e81162 100644 --- a/src/lib/dotrenderer.ml +++ b/src/lib/dotrenderer.ml @@ -1,6 +1,5 @@ open Astring -module DataFile = Ast.DataFile -module DataFileSet = Set.Make (DataFile) +module DatafileSet = Set.Make (Datafile) (* In theory this could be a recursive structure that attempts to maintain the heirarchy of the document, markdown doesn't enforce that the section levels @@ -17,12 +16,12 @@ let render_command_to_dot ppf command = List.iter (fun datafile -> let label = - match DataFile.subpath datafile with + match Datafile.subpath datafile with | Some x -> Printf.sprintf ",label=\"%s\"" x | None -> "" in Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"%s];\n" - (DataFile.id datafile) process_index label) + (Datafile.id datafile) process_index label) (Ast.Leaf.inputs command); let shape = match Ast.Leaf.command_style command with @@ -34,13 +33,14 @@ let render_command_to_dot ppf command = List.iter (fun datafile -> Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"];\n" process_index - (DataFile.id datafile)) + (Datafile.id datafile)) (Ast.Leaf.outputs command); Format.fprintf ppf "\n" let datafile_to_dot ppf datafile = Format.fprintf ppf "\tn%d[shape=\"cylinder\",label=\"%s\"];\n" - (DataFile.id datafile) (DataFile.path datafile) + (Datafile.id datafile) + (Fpath.to_string (Datafile.path datafile)) let render_ast_to_dot ppf ast : unit = Format.fprintf ppf "digraph{\n"; @@ -54,8 +54,8 @@ let render_ast_to_dot ppf ast : unit = List.concat [ inputs; outputs ]) commands) ast - |> DataFileSet.of_list - |> DataFileSet.iter (datafile_to_dot ppf); + |> DatafileSet.of_list + |> DatafileSet.iter (datafile_to_dot ppf); List.iteri (fun i group -> diff --git a/src/lib/dune b/src/lib/dune index 87b73683..950be284 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -1,6 +1,16 @@ (library (name shark) - (libraries eio cohttp-eio str yaml lwt_eio cmarkit obuilder htmlit routes) + (libraries + eio + cohttp-eio + str + yaml + lwt_eio + cmarkit + obuilder + htmlit + routes + fpath) (preprocess (pps ppx_sexp_conv))) diff --git a/src/lib/frontmatter.ml b/src/lib/frontmatter.ml index f2a78cf1..17349682 100644 --- a/src/lib/frontmatter.ml +++ b/src/lib/frontmatter.ml @@ -1,6 +1,19 @@ -type t = { variables : (string * string list) list } +open Sexplib.Conv -let empty = { variables = [] } +type path = Fpath.t + +let path_of_sexp = function + | Ppx_sexp_conv_lib.Sexp.Atom s -> Fpath.v s + | List _ -> Fpath.v "" + +let sexp_of_path v = Ppx_sexp_conv_lib.Sexp.Atom (Fpath.to_string v) + +type t = { variables : (string * string list) list; inputs : path list } +[@@deriving sexp] + +let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) +let v variables inputs = { variables; inputs } +let empty = { variables = []; inputs = [] } let yaml_to_string = function | `String s -> s @@ -18,10 +31,24 @@ let string_list_of_yaml = function let of_yaml = function | `O assoc -> let vars = List.map (fun (k, v) -> (k, string_list_of_yaml v)) assoc in - { variables = vars } + let raw_inputs = + match List.assoc_opt "inputs" vars with None -> [] | Some v -> v + in + let inputs = + List.map + (fun p -> + match Fpath.of_string p with + | Error e -> + failwith + (Printf.sprintf "Malformed input path %s" + (match e with `Msg x -> x)) + | Ok p -> Fpath.normalize p) + raw_inputs + in + { variables = vars; inputs } + | `Null -> empty | _ -> failwith "Malformed variables in markdown frontmatter" let of_string s = String.trim s |> Yaml.of_string |> Result.map of_yaml - -let inputs t = - match List.assoc_opt "inputs" t.variables with None -> [] | Some v -> v +let variables t = t.variables +let inputs t = t.inputs diff --git a/src/test/dune b/src/test/dune index 24146953..20e0933d 100644 --- a/src/test/dune +++ b/src/test/dune @@ -1,3 +1,3 @@ (test (name test) - (libraries alcotest shark)) + (libraries alcotest shark fpath)) diff --git a/src/test/test.ml b/src/test/test.ml index f526f09a..cd326dbe 100644 --- a/src/test/test.ml +++ b/src/test/test.ml @@ -103,80 +103,11 @@ module CommandParsing = struct ] end -module DataFile = struct - (* let datafile = Alcotest.of_pp Shark.Ast.DataFile.pp *) - - let test_basic_file_path () = - let testcase = "/data/test/example.tif" in - let test = Shark.Ast.DataFile.v 42 testcase in - Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); - Alcotest.(check string) "Same path" testcase (Shark.Ast.DataFile.path test); - Alcotest.(check (option string)) - "No subpath" None - (Shark.Ast.DataFile.subpath test); - Alcotest.(check bool) - "Isn't wildcard" false - (Shark.Ast.DataFile.is_wildcard test); - Alcotest.(check bool) "Isn't dir" false (Shark.Ast.DataFile.is_dir test) - - let test_basic_dir_noncanonical () = - let testcase = "/data/test" in - let test = Shark.Ast.DataFile.v 42 testcase in - Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); - Alcotest.(check string) - "Extended path" "/data/test/" - (Shark.Ast.DataFile.path test); - Alcotest.(check (option string)) - "No subpath" None - (Shark.Ast.DataFile.subpath test); - Alcotest.(check bool) - "Isn't wildcard" false - (Shark.Ast.DataFile.is_wildcard test); - Alcotest.(check bool) "Is dir" true (Shark.Ast.DataFile.is_dir test) - - let test_basic_dir_canonical () = - let testcase = "/data/test/" in - let test = Shark.Ast.DataFile.v 42 testcase in - Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); - Alcotest.(check string) "Same path" testcase (Shark.Ast.DataFile.path test); - Alcotest.(check (option string)) - "No subpath" None - (Shark.Ast.DataFile.subpath test); - Alcotest.(check bool) - "Isn't wildcard" false - (Shark.Ast.DataFile.is_wildcard test); - Alcotest.(check bool) "Is dir" true (Shark.Ast.DataFile.is_dir test) - - let test_basic_dir_canonical_with_wildcard () = - let testcase = "/data/test/" in - let test = Shark.Ast.DataFile.v ~subpath:"*" 42 testcase in - Alcotest.(check int) "Same id" 42 (Shark.Ast.DataFile.id test); - Alcotest.(check string) - "Extended path" "/data/test/" - (Shark.Ast.DataFile.path test); - Alcotest.(check (option string)) - "No subpath" None - (Shark.Ast.DataFile.subpath test); - Alcotest.(check bool) - "Is wildcard" true - (Shark.Ast.DataFile.is_wildcard test); - Alcotest.(check bool) "Is dir" true (Shark.Ast.DataFile.is_dir test) - - let tests = - [ - ("Basic file", `Quick, test_basic_file_path); - ("Non-canonical dir", `Quick, test_basic_dir_noncanonical); - ("Canonical dir", `Quick, test_basic_dir_canonical); - ( "Canonical dir with wildcard", - `Quick, - test_basic_dir_canonical_with_wildcard ); - ] -end - let () = Alcotest.run "shark" [ ("basic", Basic.tests); ("command parsing", CommandParsing.tests); - ("datafile modeling", DataFile.tests); + ("datafile modeling", Datafile.tests); + ("frontmatter parsing", Frontmatter.tests); ]