Skip to content

Commit

Permalink
Merge pull request #14 from quantifyearth/mwd-detect-subpaths-for-dirs
Browse files Browse the repository at this point in the history
Detect subpaths for dirs
  • Loading branch information
mdales authored Apr 2, 2024
2 parents cc524e7 + 4a65736 commit 1d9ceb8
Show file tree
Hide file tree
Showing 15 changed files with 283 additions and 61 deletions.
74 changes: 53 additions & 21 deletions src/lib/ast.ml
Original file line number Diff line number Diff line change
@@ -1,31 +1,34 @@
module DataFile = struct
type t = { id : int; path : string }

let create id path = { id; path }
let id d = d.id
let path d = d.path
let compare a b = Int.compare a.id b.id
end
open Sexplib.Conv

module Leaf = struct
type style = Command | Map [@@deriving sexp]

type t = {
id : int;
command : Command.t;
inputs : DataFile.t list;
outputs : DataFile.t list;
style : style;
inputs : Datafile.t list;
outputs : Datafile.t list;
}
[@@deriving sexp]

let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t)

let v id command style inputs outputs =
{ id; command; style; inputs; outputs }

let create id command inputs outputs = { id; command; inputs; outputs }
let command o = o.command
let inputs o = o.inputs
let outputs o = o.outputs
let command_style o = o.style
let id o = o.id
end

module CommandGroup = struct
type t = { name : string; children : Leaf.t list }
type t = { name : string; children : Leaf.t list } [@@deriving sexp]

let create name children = { name; children }
let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t)
let v name children = { name; children }
let name g = g.name
let children g = g.children
end
Expand All @@ -35,10 +38,33 @@ type t = CommandGroup.t list

let to_list cg = cg

let find_matching_datafile datafile_map fpath =
match List.assoc_opt fpath datafile_map with
| Some p -> Some p
| None ->
(* No full match, but can we find a prefix dir *)
List.fold_left
(fun acc (_ipath, df) ->
match acc with
| Some x -> Some x
| None -> (
match Datafile.is_dir df with
| false -> None
| true -> (
match Fpath.rem_prefix (Datafile.path df) fpath with
| None -> None
| Some subpath ->
Some
(Datafile.v ~subpath:(Fpath.to_string subpath)
(Datafile.id df) (Datafile.path df)))))
None datafile_map

let order_command_list metadata command_groups =
let input_map =
List.mapi
(fun i f -> (f, DataFile.create i f))
(fun i f ->
let df = Datafile.v i f in
(f, df))
(Frontmatter.inputs metadata)
in
let counter = ref (List.length input_map) in
Expand All @@ -56,36 +82,42 @@ let order_command_list metadata command_groups =
(* TODO: dedup *)
let inputs =
List.filter_map
(fun path -> List.assoc_opt path datafile_map)
(fun p -> find_matching_datafile datafile_map p)
file_args
in

let style : Leaf.style =
match List.exists Datafile.is_wildcard inputs with
| true -> Map
| false -> Command
in

let outputs =
List.filter_map
(fun path ->
match List.assoc_opt path datafile_map with
(fun fpath ->
match find_matching_datafile datafile_map fpath with
| None ->
let id = !counter in
counter := !counter + 1;
Some (DataFile.create id path)
Some (Datafile.v id fpath)
| Some _ -> None)
file_args
in

let x = Leaf.create !counter hd inputs outputs in
let x = Leaf.v !counter hd style inputs outputs in
counter := !counter + 1;
let updated_map, rest =
loop tl
(List.concat
[
datafile_map;
List.map (fun o -> (DataFile.path o, o)) outputs;
List.map (fun o -> (Datafile.path o, o)) outputs;
])
in
(updated_map, x :: rest)
in
let updated_map, commands = loop commands input_map in
(updated_map, CommandGroup.create name commands))
(updated_map, CommandGroup.v name commands))
input_map command_groups
in
ordered
29 changes: 12 additions & 17 deletions src/lib/ast.mli
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,37 @@ The AST is the logical representation of the workflow described in a
sharkdown file, including the structure of groups (aka basic blocks
in PL, but block is an overloaded term in this context). *)

module DataFile : sig
(** A named file/directory that acts as an input and/or output of a process. *)

type t

val create : int -> string -> t
(** Creates a new datafile with an integer ID and a file path. *)

val id : t -> int
val path : t -> string
val compare : t -> t -> int
end

module Leaf : sig
(** A Leaf is an atomic exection unit the in the pipeline graph. *)

type style = Command | Map
type t

val create : int -> Command.t -> DataFile.t list -> DataFile.t list -> t
val v : int -> Command.t -> style -> Datafile.t list -> Datafile.t list -> t
(** Creats a new leaf node, taking an integer identifier, the command to execute
and a list of inputs and a list of outputs. *)

val pp : t Fmt.t
(** A pretty printer for leaves. *)

val id : t -> int
val command : t -> Command.t
val inputs : t -> DataFile.t list
val outputs : t -> DataFile.t list
val command_style : t -> style
val inputs : t -> Datafile.t list
val outputs : t -> Datafile.t list
end

module CommandGroup : sig
(** A named basic-block in PL terms. *)

type t

val create : string -> Leaf.t list -> t
val v : string -> Leaf.t list -> t
(** Creates a command group made up of a series of leaf nodes and given a name. *)

val pp : t Fmt.t
(** A pretty printer for command groups. *)

val name : t -> string
val children : t -> Leaf.t list
end
Expand Down
21 changes: 16 additions & 5 deletions src/lib/command.ml
Original file line number Diff line number Diff line change
@@ -1,20 +1,31 @@
open Astring
open Sexplib.Conv

type t = { name : string; args : string list; file_args : string list }
type path = Fpath.t

let path_of_sexp = function
| Ppx_sexp_conv_lib.Sexp.Atom s -> Fpath.v s
| List _ -> Fpath.v ""

let sexp_of_path v = Ppx_sexp_conv_lib.Sexp.Atom (Fpath.to_string v)

type t = { name : string; args : string list; file_args : path list }
[@@deriving sexp]

let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t)
let v ~name ~args ~file_args = { name; args; file_args }
let magic_path_regex = Str.regexp "^/data"

let find_file_args args =
(* gross liberties, we assume for now that any arg with a doubeldash might be a file. though ultimately this
will have to rely on convention, annotation, or guesses, so it's not exactly that bad, just limited as is. I imagine
we can have a common prefix for all files, like example.com should be used for domains. *)
List.filter
List.filter_map
(fun arg ->
let regex = Str.regexp "^/data" in
Str.string_match regex arg 0)
match Str.string_match magic_path_regex arg 0 with
| false -> None
| true -> (
match Fpath.of_string arg with Error _e -> None | Ok r -> Some r))
args

let parse_python_command args =
Expand Down Expand Up @@ -69,4 +80,4 @@ let of_string command_str =
| name :: args -> Some (parse_generic_commmand (name :: args))

let name c = c.name
let file_args c = c.file_args
let file_args c : Fpath.t list = c.file_args
10 changes: 10 additions & 0 deletions src/lib/command.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
type t [@@deriving sexp]

val v : name:string -> args:string list -> file_args:Fpath.t list -> t

val pp : t Fmt.t
(** A pretty printer for blocks. *)

val of_string : string -> t option
val name : t -> string
val file_args : t -> Fpath.t list
31 changes: 31 additions & 0 deletions src/lib/datafile.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
open Astring
open Sexplib.Conv

type path = Fpath.t

let path_of_sexp = function
| Ppx_sexp_conv_lib.Sexp.Atom s -> Fpath.v s
| List _ -> Fpath.v ""

let sexp_of_path v = Ppx_sexp_conv_lib.Sexp.Atom (Fpath.to_string v)

type t = { id : int; path : path; subpath : string option; wildcard : bool }
[@@deriving sexp]

let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t)

let v ?subpath id path =
let wildcard =
match subpath with
| None -> false
| Some p -> Char.equal p.[String.length p - 1] '*'
in
if wildcard then { id; path; subpath = None; wildcard = true }
else { id; path; subpath; wildcard = false }

let id d = d.id
let path d = d.path
let subpath d = d.subpath
let is_wildcard d = d.wildcard
let compare a b = Int.compare a.id b.id
let is_dir d = Fpath.is_dir_path d.path
16 changes: 16 additions & 0 deletions src/lib/datafile.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
(** A named file/directory that acts as an input and/or output of a process. *)

type t [@@deriving sexp]

val v : ?subpath:string -> int -> Fpath.t -> t
(** Creates a new datafile with an integer ID and a file path. *)

val pp : t Fmt.t
(** A pretty printer for datafiles. *)

val id : t -> int
val path : t -> Fpath.t
val subpath : t -> string option
val is_wildcard : t -> bool
val is_dir : t -> bool
val compare : t -> t -> int
28 changes: 19 additions & 9 deletions src/lib/dotrenderer.ml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
open Astring
module DataFile = Ast.DataFile
module DataFileSet = Set.Make (DataFile)
module DatafileSet = Set.Make (Datafile)

(* In theory this could be a recursive structure that attempts to maintain the
heirarchy of the document, markdown doesn't enforce that the section levels
Expand All @@ -16,21 +15,32 @@ let render_command_to_dot ppf command =
let process_index = Ast.Leaf.id command in
List.iter
(fun datafile ->
Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"];\n"
(DataFile.id datafile) process_index)
let label =
match Datafile.subpath datafile with
| Some x -> Printf.sprintf ",label=\"%s\"" x
| None -> ""
in
Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"%s];\n"
(Datafile.id datafile) process_index label)
(Ast.Leaf.inputs command);
Format.fprintf ppf "\tn%d[shape=\"%s\",label=\"%s\"];\n" process_index "box"
let shape =
match Ast.Leaf.command_style command with
| Command -> "box"
| Map -> "box3d"
in
Format.fprintf ppf "\tn%d[shape=\"%s\",label=\"%s\"];\n" process_index shape
(Uri.pct_encode (Command.name (Ast.Leaf.command command)));
List.iter
(fun datafile ->
Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"];\n" process_index
(DataFile.id datafile))
(Datafile.id datafile))
(Ast.Leaf.outputs command);
Format.fprintf ppf "\n"

let datafile_to_dot ppf datafile =
Format.fprintf ppf "\tn%d[shape=\"cylinder\",label=\"%s\"];\n"
(DataFile.id datafile) (DataFile.path datafile)
(Datafile.id datafile)
(Fpath.to_string (Datafile.path datafile))

let render_ast_to_dot ppf ast : unit =
Format.fprintf ppf "digraph{\n";
Expand All @@ -44,8 +54,8 @@ let render_ast_to_dot ppf ast : unit =
List.concat [ inputs; outputs ])
commands)
ast
|> DataFileSet.of_list
|> DataFileSet.iter (datafile_to_dot ppf);
|> DatafileSet.of_list
|> DatafileSet.iter (datafile_to_dot ppf);

List.iteri
(fun i group ->
Expand Down
12 changes: 11 additions & 1 deletion src/lib/dune
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
(library
(name shark)
(libraries eio cohttp-eio str yaml lwt_eio cmarkit obuilder htmlit routes)
(libraries
eio
cohttp-eio
str
yaml
lwt_eio
cmarkit
obuilder
htmlit
routes
fpath)
(preprocess
(pps ppx_sexp_conv)))

Expand Down
Loading

0 comments on commit 1d9ceb8

Please sign in to comment.