Skip to content

Commit

Permalink
Move to Fpath
Browse files Browse the repository at this point in the history
  • Loading branch information
mdales committed Apr 2, 2024
1 parent dd8da09 commit 649de57
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 177 deletions.
80 changes: 17 additions & 63 deletions src/lib/ast.ml
Original file line number Diff line number Diff line change
@@ -1,57 +1,14 @@
open Astring
open Sexplib.Conv

module DataFile = struct
type t = { id : int; path : string; subpath : string option; wildcard : bool }
[@@deriving sexp]

let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t)

let v ?subpath id path =
let wildcard =
match subpath with
| None -> false
| Some p -> Char.equal p.[String.length p - 1] '*'
in
if wildcard then
{
id;
path = String.Sub.to_string (String.sub ~stop:((String.length path) - 1) path);
subpath = None;
wildcard = true;
}
else { id; path; subpath; wildcard = false }

let id d = d.id

let path d =
match Filename.extension d.path = "" with
| false -> d.path
| true ->
let p = d.path in
match Char.equal p.[String.length p - 1] '/' with
| true -> p
| false -> p ^ "/"

let path_nc d = d.path
let subpath d = d.subpath
let is_wildcard d = d.wildcard
let compare a b = Int.compare a.id b.id

let is_dir d =
(* a little hacky, we probably need to do something in the sharkdown here *)
Filename.extension d.path = ""
end

module Leaf = struct
type style = Command | Map [@@deriving sexp]

type t = {
id : int;
command : Command.t;
style : style;
inputs : DataFile.t list;
outputs : DataFile.t list;
inputs : Datafile.t list;
outputs : Datafile.t list;
}
[@@deriving sexp]

Expand Down Expand Up @@ -81,35 +38,32 @@ type t = CommandGroup.t list

let to_list cg = cg

let find_matching_datafile datafile_map path =
match List.assoc_opt path datafile_map with
let find_matching_datafile datafile_map fpath =
match List.assoc_opt fpath datafile_map with
| Some p -> Some p
| None ->
(* No full match, but can we find a prefix dir *)
List.fold_left
(fun acc (ipath, df) ->
(fun acc (_ipath, df) ->
match acc with
| Some x -> Some x
| None -> (
match DataFile.is_dir df with
match Datafile.is_dir df with
| false -> None
| true -> (
match String.is_prefix ~affix:(DataFile.path df) path with
| true ->
match Fpath.rem_prefix (Datafile.path df) fpath with
| None -> None
| Some subpath ->
Some
(DataFile.v
~subpath:
(String.Sub.to_string
(String.sub ~start:(String.length ipath) path))
(DataFile.id df) (DataFile.path df))
| false -> None)))
(Datafile.v ~subpath:(Fpath.to_string subpath)
(Datafile.id df) (Datafile.path df)))))
None datafile_map

let order_command_list metadata command_groups =
let input_map =
List.mapi
(fun i f ->
let df = DataFile.v i f in
let df = Datafile.v i f in
(f, df))
(Frontmatter.inputs metadata)
in
Expand All @@ -133,19 +87,19 @@ let order_command_list metadata command_groups =
in

let style : Leaf.style =
match List.exists DataFile.is_wildcard inputs with
match List.exists Datafile.is_wildcard inputs with
| true -> Map
| false -> Command
in

let outputs =
List.filter_map
(fun path ->
match find_matching_datafile datafile_map path with
(fun fpath ->
match find_matching_datafile datafile_map fpath with
| None ->
let id = !counter in
counter := !counter + 1;
Some (DataFile.v id path)
Some (Datafile.v id fpath)
| Some _ -> None)
file_args
in
Expand All @@ -157,7 +111,7 @@ let order_command_list metadata command_groups =
(List.concat
[
datafile_map;
List.map (fun o -> (DataFile.path_nc o, o)) outputs;
List.map (fun o -> (Datafile.path o, o)) outputs;
])
in
(updated_map, x :: rest)
Expand Down
25 changes: 3 additions & 22 deletions src/lib/ast.mli
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,13 @@ The AST is the logical representation of the workflow described in a
sharkdown file, including the structure of groups (aka basic blocks
in PL, but block is an overloaded term in this context). *)

module DataFile : sig
(** A named file/directory that acts as an input and/or output of a process. *)

type t

val v : ?subpath:string -> int -> string -> t
(** Creates a new datafile with an integer ID and a file path. *)

val pp : t Fmt.t
(** A pretty printer for datafiles. *)

val id : t -> int
val path : t -> string
val subpath : t -> string option
val is_wildcard : t -> bool
val is_dir : t -> bool
val compare : t -> t -> int
end

module Leaf : sig
(** A Leaf is an atomic exection unit the in the pipeline graph. *)

type style = Command | Map
type t

val v : int -> Command.t -> style -> DataFile.t list -> DataFile.t list -> t
val v : int -> Command.t -> style -> Datafile.t list -> Datafile.t list -> t
(** Creats a new leaf node, taking an integer identifier, the command to execute
and a list of inputs and a list of outputs. *)

Expand All @@ -39,8 +20,8 @@ module Leaf : sig
val id : t -> int
val command : t -> Command.t
val command_style : t -> style
val inputs : t -> DataFile.t list
val outputs : t -> DataFile.t list
val inputs : t -> Datafile.t list
val outputs : t -> Datafile.t list
end

module CommandGroup : sig
Expand Down
21 changes: 16 additions & 5 deletions src/lib/command.ml
Original file line number Diff line number Diff line change
@@ -1,20 +1,31 @@
open Astring
open Sexplib.Conv

type t = { name : string; args : string list; file_args : string list }
type path = Fpath.t

let path_of_sexp = function
| Ppx_sexp_conv_lib.Sexp.Atom s -> Fpath.v s
| List _ -> Fpath.v ""

let sexp_of_path v = Ppx_sexp_conv_lib.Sexp.Atom (Fpath.to_string v)

type t = { name : string; args : string list; file_args : path list }
[@@deriving sexp]

let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t)
let v ~name ~args ~file_args = { name; args; file_args }
let magic_path_regex = Str.regexp "^/data"

let find_file_args args =
(* gross liberties, we assume for now that any arg with a doubeldash might be a file. though ultimately this
will have to rely on convention, annotation, or guesses, so it's not exactly that bad, just limited as is. I imagine
we can have a common prefix for all files, like example.com should be used for domains. *)
List.filter
List.filter_map
(fun arg ->
let regex = Str.regexp "^/data" in
Str.string_match regex arg 0)
match Str.string_match magic_path_regex arg 0 with
| false -> None
| true -> (
match Fpath.of_string arg with Error _e -> None | Ok r -> Some r))
args

let parse_python_command args =
Expand Down Expand Up @@ -69,4 +80,4 @@ let of_string command_str =
| name :: args -> Some (parse_generic_commmand (name :: args))

let name c = c.name
let file_args c = c.file_args
let file_args c : Fpath.t list = c.file_args
16 changes: 8 additions & 8 deletions src/lib/dotrenderer.ml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
open Astring
module DataFile = Ast.DataFile
module DataFileSet = Set.Make (DataFile)
module DatafileSet = Set.Make (Datafile)

(* In theory this could be a recursive structure that attempts to maintain the
heirarchy of the document, markdown doesn't enforce that the section levels
Expand All @@ -17,12 +16,12 @@ let render_command_to_dot ppf command =
List.iter
(fun datafile ->
let label =
match DataFile.subpath datafile with
match Datafile.subpath datafile with
| Some x -> Printf.sprintf ",label=\"%s\"" x
| None -> ""
in
Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"%s];\n"
(DataFile.id datafile) process_index label)
(Datafile.id datafile) process_index label)
(Ast.Leaf.inputs command);
let shape =
match Ast.Leaf.command_style command with
Expand All @@ -34,13 +33,14 @@ let render_command_to_dot ppf command =
List.iter
(fun datafile ->
Format.fprintf ppf "\tn%d->n%d[penwidth=\"2.0\"];\n" process_index
(DataFile.id datafile))
(Datafile.id datafile))
(Ast.Leaf.outputs command);
Format.fprintf ppf "\n"

let datafile_to_dot ppf datafile =
Format.fprintf ppf "\tn%d[shape=\"cylinder\",label=\"%s\"];\n"
(DataFile.id datafile) (DataFile.path datafile)
(Datafile.id datafile)
(Fpath.to_string (Datafile.path datafile))

let render_ast_to_dot ppf ast : unit =
Format.fprintf ppf "digraph{\n";
Expand All @@ -54,8 +54,8 @@ let render_ast_to_dot ppf ast : unit =
List.concat [ inputs; outputs ])
commands)
ast
|> DataFileSet.of_list
|> DataFileSet.iter (datafile_to_dot ppf);
|> DatafileSet.of_list
|> DatafileSet.iter (datafile_to_dot ppf);

List.iteri
(fun i group ->
Expand Down
12 changes: 11 additions & 1 deletion src/lib/dune
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
(library
(name shark)
(libraries eio cohttp-eio str yaml lwt_eio cmarkit obuilder htmlit routes)
(libraries
eio
cohttp-eio
str
yaml
lwt_eio
cmarkit
obuilder
htmlit
routes
fpath)
(preprocess
(pps ppx_sexp_conv)))

Expand Down
39 changes: 33 additions & 6 deletions src/lib/frontmatter.ml
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
type t = { variables : (string * string list) list }
open Sexplib.Conv

let empty = { variables = [] }
type path = Fpath.t

let path_of_sexp = function
| Ppx_sexp_conv_lib.Sexp.Atom s -> Fpath.v s
| List _ -> Fpath.v ""

let sexp_of_path v = Ppx_sexp_conv_lib.Sexp.Atom (Fpath.to_string v)

type t = { variables : (string * string list) list; inputs : path list }
[@@deriving sexp]

let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t)
let v variables inputs = { variables; inputs }
let empty = { variables = []; inputs = [] }

let yaml_to_string = function
| `String s -> s
Expand All @@ -18,10 +31,24 @@ let string_list_of_yaml = function
let of_yaml = function
| `O assoc ->
let vars = List.map (fun (k, v) -> (k, string_list_of_yaml v)) assoc in
{ variables = vars }
let raw_inputs =
match List.assoc_opt "inputs" vars with None -> [] | Some v -> v
in
let inputs =
List.map
(fun p ->
match Fpath.of_string p with
| Error e ->
failwith
(Printf.sprintf "Malformed input path %s"
(match e with `Msg x -> x))
| Ok p -> Fpath.normalize p)
raw_inputs
in
{ variables = vars; inputs }
| `Null -> empty
| _ -> failwith "Malformed variables in markdown frontmatter"

let of_string s = String.trim s |> Yaml.of_string |> Result.map of_yaml

let inputs t =
match List.assoc_opt "inputs" t.variables with None -> [] | Some v -> v
let variables t = t.variables
let inputs t = t.inputs
2 changes: 1 addition & 1 deletion src/test/dune
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
(test
(name test)
(libraries alcotest shark))
(libraries alcotest shark fpath))
Loading

0 comments on commit 649de57

Please sign in to comment.