From 03d4733596881d1d143455df09aff4e21d206680 Mon Sep 17 00:00:00 2001 From: Patrick Ferris Date: Wed, 14 Aug 2024 10:01:57 +0100 Subject: [PATCH] Refactor AST into standalone library --- .ocamlformat | 2 +- Dockerfile | 3 +- shark-ast.opam | 39 ++ shark.opam | 2 + src/bin/dune | 1 + src/bin/main.ml | 23 +- src/lib/ast/ast.ml | 357 +----------------- src/lib/ast/ast.mli | 48 --- src/lib/ast/block.ml | 180 +++++++++ src/lib/ast/block.mli | 64 ++++ src/lib/{dotrenderer.ml => ast/dot.ml} | 14 +- src/lib/ast/dune | 6 + src/lib/{frontmatter.ml => ast/metadata.ml} | 9 +- src/lib/{frontmatter.mli => ast/metadata.mli} | 2 +- src/lib/ast/shark_ast.ml | 42 +++ src/lib/ast/shark_ast.mli | 41 ++ src/lib/block.ml | 169 +-------- src/lib/block.mli | 65 +--- src/lib/dune | 4 +- src/lib/md.ml | 40 +- src/lib/md.mli | 3 +- src/lib/md_to_ast.ml | 282 ++++++++++++++ src/lib/md_to_ast.mli | 6 + src/lib/run_block.ml | 1 + src/lib/run_block.mli | 2 + src/lib/server/shark_server.ml | 5 +- src/test/ast.ml | 22 +- src/test/block.ml | 42 ++- src/test/command.ml | 44 ++- src/test/datafile.ml | 49 ++- src/test/expect/test_dot.ml | 4 +- src/test/frontmatter.ml | 16 +- src/test/leaf.ml | 76 ++-- src/test/run_block.ml | 34 +- 34 files changed, 884 insertions(+), 813 deletions(-) create mode 100644 shark-ast.opam delete mode 100644 src/lib/ast/ast.mli create mode 100644 src/lib/ast/block.ml create mode 100644 src/lib/ast/block.mli rename src/lib/{dotrenderer.ml => ast/dot.ml} (90%) create mode 100644 src/lib/ast/dune rename src/lib/{frontmatter.ml => ast/metadata.ml} (89%) rename src/lib/{frontmatter.mli => ast/metadata.mli} (85%) create mode 100644 src/lib/ast/shark_ast.ml create mode 100644 src/lib/ast/shark_ast.mli create mode 100644 src/lib/md_to_ast.ml create mode 100644 src/lib/md_to_ast.mli diff --git a/.ocamlformat b/.ocamlformat index dbfb7bbf3..a587feadb 100644 --- a/.ocamlformat +++ b/.ocamlformat @@ -1 +1 @@ -version=0.26.1 \ No newline at end of file +version=0.26.2 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 5cf599309..fce1e073b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,8 +4,9 @@ RUN sudo apt-get update \ RUN sudo ln -f /usr/bin/opam-2.1 /usr/bin/opam USER 1000:1000 RUN cd ~/opam-repository && git pull origin -q master && git reset --hard 3505e93828fa76861e82d09d92a37a6272d46da5 && opam update -COPY --chown=opam shark.opam /src/ +COPY --chown=opam shark.opam shark-ast.opam /src/ WORKDIR /src +RUN opam pin . -yn RUN opam install -y --deps-only --with-test . ADD --chown=opam . . RUN opam exec -- dune build @runtest @install @check diff --git a/shark-ast.opam b/shark-ast.opam new file mode 100644 index 000000000..0d0ccf878 --- /dev/null +++ b/shark-ast.opam @@ -0,0 +1,39 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +homepage:"https://github.com/RyanGibb/shark" +bug-reports:"https://github.com/RyanGibb/shark/issues" +synopsis:"Shhhhhark!" +authors:[ + "Ryan Gibb" + "Patrick Ferris" + "Michael Dales" +] +# Didn't want to burden you @Ryan! +maintainer:"patrick@sirref.org" +depends: [ + "dune" {>= "3.3"} + "ocaml" + "obuilder-spec" + "yaml" + + "digestif" + + "patdiff" {with-test} + "mdx" {with-test} + "alcotest" {with-test} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] + diff --git a/shark.opam b/shark.opam index d354f1ca7..28d866c9f 100644 --- a/shark.opam +++ b/shark.opam @@ -12,6 +12,8 @@ maintainer:"patrick@sirref.org" depends: [ "dune" {>= "3.3"} "ocaml" + + "shark-ast" # "obuilder" "obuilder-spec" "cmarkit" diff --git a/src/bin/dune b/src/bin/dune index 2f79c0b39..bb03a8a4c 100644 --- a/src/bin/dune +++ b/src/bin/dune @@ -1,6 +1,7 @@ (executable (name main) (public_name shark) + (package shark) (libraries eio_posix shark diff --git a/src/bin/main.ml b/src/bin/main.ml index aa1b7f66f..20288dd1b 100644 --- a/src/bin/main.ml +++ b/src/bin/main.ml @@ -1,3 +1,5 @@ +open Shark_ast + let ( / ) = Filename.concat module Sandbox = Obuilder.Native_sandbox @@ -142,7 +144,7 @@ let md ~fs ~net ~domain_mgr ~proc () no_run store conf file port fetcher jobs let file_path = Eio.Path.(fs / file) in let template_markdown = Eio.Path.load file_path in let ast, markdown = - Shark.Ast.of_sharkdown ~concrete_paths:import_map template_markdown + Shark.Md_to_ast.of_sharkdown ~concrete_paths:import_map template_markdown in let doc = Cmarkit.Doc.of_string markdown in @@ -154,7 +156,7 @@ let md ~fs ~net ~domain_mgr ~proc () no_run store conf file port fetcher jobs let f ~build_cache code_block block = if no_run then (code_block, `Continue) else - match Shark.Block.kind block with + match Ast.Block.Raw.kind block with | `Import -> ( (* First we translate the import statement to a build block *) let uid = string_of_int !import_uid in @@ -164,11 +166,12 @@ let md ~fs ~net ~domain_mgr ~proc () no_run store conf file port fetcher jobs (* Now we build the block *) (* Import block digests need to be mapped to this build hash *) let hb = - match Shark.Ast.find_ast_block_from_shark_block ast block with + match Ast.find_block_from_raw_block ast block with | Some hb -> hb | None -> Logs.info (fun f -> - f "Failed to find the astblock for %a" Shark.Block.pp block); + f "Failed to find the astblock for %a" Ast.Block.Raw.pp + block); failwith "Block not found" in let res = @@ -181,9 +184,9 @@ let md ~fs ~net ~domain_mgr ~proc () no_run store conf file port fetcher jobs | `Stop msg -> Error (msg, cb) | `Continue -> Ok - ( Shark.Block.alias blk, + ( Ast.Block.Raw.alias blk, option_get ~msg:"Block hash for import" - (Shark.Block.hash blk), + (Ast.Block.Raw.hash blk), cb ) in match res with @@ -205,9 +208,9 @@ let md ~fs ~net ~domain_mgr ~proc () no_run store conf file port fetcher jobs | `Stop msg -> Error (msg, cb) | `Continue -> Ok - ( Shark.Block.alias blk, + ( Ast.Block.Raw.alias blk, option_get ~msg:"Block hash for build" - (Shark.Block.hash blk), + (Ast.Block.Raw.hash blk), cb ) in match res with @@ -266,8 +269,8 @@ let dot ~fs () file = run_eventloop @@ fun () -> let file_path = Eio.Path.(fs / file) in let template_markdown = Eio.Path.load file_path in - let s = Shark.Dotrenderer.render ~template_markdown in - Format.pp_print_string Format.std_formatter s; + let ast, _ = Shark.Md_to_ast.of_sharkdown template_markdown in + Shark_ast.Ast.pp_dot Fmt.stdout ast; Ok () open Cmdliner diff --git a/src/lib/ast/ast.ml b/src/lib/ast/ast.ml index 39e55810e..931bbac6d 100644 --- a/src/lib/ast/ast.ml +++ b/src/lib/ast/ast.ml @@ -1,364 +1,13 @@ -open Astring open Sexplib.Conv -module DatafileSet = Set.Make (Datafile) - -module Astblock = struct - type t = { - hash : string list ref; - context : string; - block : Block.t; - commands : Leaf.t list; - } - [@@deriving sexp] - - let v context block commands = { hash = ref []; context; block; commands } - let block h = h.block - let commands h = h.commands - let context h = h.context - let digest h = Block.digest h.block - let hash h = match !(h.hash) with [] -> None | hd :: _ -> Some hd - let hashes h = !(h.hash) - let update_hash h hash = h.hash := hash :: !(h.hash) - - let io h = - let all_inputs, all_outputs = - List.fold_left - (fun acc v -> - let inputs, outputs = acc in - ( DatafileSet.union inputs (DatafileSet.of_list (Leaf.inputs v)), - DatafileSet.union outputs (DatafileSet.of_list (Leaf.outputs v)) )) - (DatafileSet.empty, DatafileSet.empty) - h.commands - in - ( DatafileSet.to_list (DatafileSet.diff all_inputs all_outputs), - DatafileSet.to_list all_outputs ) - - let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) -end - -module Section = struct - type t = { name : string; blocks : Astblock.t list } - - let v name blocks = { name; blocks } - let name s = s.name - let blocks s = s.blocks -end type block_id = int [@@deriving sexp] type t = { - nodes : (block_id * Astblock.t) list; + nodes : (block_id * Block.t) list; edges : (block_id * block_id) list; - metadata : Frontmatter.t; + metadata : Metadata.t; } [@@deriving sexp] -let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) - -(* ----- front matter parser ----- *) - -let parse_frontmatter frontmatter = - match Frontmatter.of_string frontmatter with - | Ok frontmatter -> frontmatter - | Error (`Msg m) -> failwith ("Failed to parse frontmatter: " ^ m) - -(* ----- markdown body parser ----- *) - -(* In theory this could be a recursive structure that attempts to maintain the - heirarchy of the document, markdown doesn't enforce that the section levels - make any sort of sense, so for now I'm just going to assume a single level. - - I did initially try to implement that but got a lot of complexity for little - initial benefit. *) -type section_group = { name : string; children : Block.t list } - -let parse_markdown markdown = - let doc = Cmarkit.Doc.of_string markdown in - - let block _ acc = function - | Cmarkit.Block.Heading (node, _meta) -> - let title = - Cmarkit.Block.Heading.inline node - |> Cmarkit.Inline.to_plain_text ~break_on_soft:false - |> List.map (String.concat ~sep:"") - |> String.concat ~sep:" / " - in - Cmarkit.Folder.ret ({ name = title; children = [] } :: acc) - | Cmarkit.Block.Code_block (node, _meta) -> ( - match Block.of_code_block node with - | None -> Cmarkit.Folder.default - | Some b -> ( - match Block.kind b with - | _ -> ( - match acc with - | [] -> - Cmarkit.Folder.ret - [ { name = "Top level"; children = [ b ] } ] - | hd :: tl -> - Cmarkit.Folder.ret - ({ - name = hd.name; - children = List.rev (b :: List.rev hd.children); - } - :: tl)))) - | _ -> Cmarkit.Folder.default - in - let folder = Cmarkit.Folder.make ~block () in - List.rev (Cmarkit.Folder.fold_doc folder [] doc) - |> List.filter_map (fun x -> match x.children with [] -> None | _ -> Some x) - -(* ----- internal tree building code ----- *) - -type superblock = { block : Block.t; commands : Command.t list } - -let block_to_superblock (block : Block.t) : superblock = - { - block; - commands = - Block.command_list block |> List.filter_map Command.of_string - (* |> List.filter_map (fun c -> - match Command.file_args c with [] -> None | _ -> Some c); *); - } - -let build_initial_input_map inputs = - let input_map = - List.mapi - (fun i f -> - let df = Datafile.v i f in - (f, df)) - inputs - in - let counter = ref (List.length input_map) in - (input_map, counter) - -let find_matching_datafile datafile_map fpath = - match List.assoc_opt fpath datafile_map with - | Some p -> Some p - | None -> - (* No full match, but can we find a prefix dir *) - List.fold_left - (fun acc (_ipath, df) -> - match acc with - | Some x -> Some x - | None -> ( - match Datafile.is_dir df with - | false -> None - | true -> ( - match Fpath.rem_prefix (Datafile.path df) fpath with - | None -> None - | Some subpath -> - Some - (Datafile.v ~subpath:(Fpath.to_string subpath) - (Datafile.id df) (Datafile.path df))))) - None datafile_map - -let rec pass_one_process_commands_loop counter commands datafile_map = - match commands with - | [] -> (datafile_map, []) - | hd :: tl -> - let file_args = Command.file_args hd in - - (* TODO: dedup *) - let inputs = - List.filter_map - (fun p -> find_matching_datafile datafile_map p) - file_args - in - - let style : Leaf.style = - match List.exists Datafile.is_wildcard inputs with - | true -> Map - | false -> Command - in - - let outputs = - List.filter_map - (fun fpath -> - match find_matching_datafile datafile_map fpath with - | None -> - let id = !counter in - counter := !counter + 1; - Some (Datafile.v id fpath) - | Some _ -> None) - file_args - in - - let x = Leaf.v !counter hd style inputs outputs in - counter := !counter + 1; - let updated_map, rest = - pass_one_process_commands_loop counter tl - (List.concat - [ datafile_map; List.map (fun o -> (Datafile.path o, o)) outputs ]) - in - (updated_map, x :: rest) - -let pass_one_on_list inputs section_list = - let input_map, counter = build_initial_input_map inputs in - let _, processed = - List.fold_left_map - (fun input_map section -> - let name, superblocks = section in - let updated_map, processed_section = - List.fold_left_map - (fun input_map superblock -> - let updated_map, leaves = - pass_one_process_commands_loop counter superblock.commands - input_map - in - (updated_map, Astblock.v name superblock.block leaves)) - input_map superblocks - in - (updated_map, Section.v name processed_section)) - input_map section_list - in - processed - -(* ----- public interface ----- *) - +let v ~nodes ~edges metadata = { nodes; edges; metadata } let to_list ast = List.map snd ast.nodes - -let synthesize_import_block input_map input_override_map = - let imports = - List.map - (fun (k, p) -> - let dest = List.assoc k input_map in - (p, dest)) - input_override_map - in - let codeblock = - List.fold_left - (fun acc (src, dst) -> - acc - ^ Printf.sprintf "%s %s\n" (Fpath.to_string src) (Fpath.to_string dst)) - "" imports - in - let block = Block.import codeblock in - ("imports", [ block_to_superblock block ]) - -let synthesize_unmapped_import_block input_map = - let codeblock = - List.fold_left - (fun acc (src, dst) -> - acc ^ Printf.sprintf "%s %s\n" src (Fpath.to_string dst)) - "" input_map - in - let block = Block.import codeblock in - ("imports", [ block_to_superblock block ]) - -let of_sharkdown ?concrete_paths template_markdown = - let metadata, sections, markdown = - match String.cuts ~sep:"---" template_markdown with - | [ frontmatter; markdown ] | [ ""; frontmatter; markdown ] -> - (parse_frontmatter frontmatter, parse_markdown markdown, markdown) - | [ markdown ] -> (Frontmatter.empty, parse_markdown markdown, markdown) - | _ -> failwith "Malformed frontmatter/markdown file" - in - - (* Now I have a list of sections with a name and a list of blocks. The list nature has an - implicit dependnacy order, but that's wrong, so we need to turn this into a DAG of blocks. - - 1: Markdown -> list of sections, where a section is a name and a list of blocks. - 2: -> List of sections, where a section is a list a name, and a list of block/commands per block - 3: -> build the data dependnacy graph - *) - let detailed_sections = - List.map - (fun sgroup -> - (sgroup.name, List.map block_to_superblock sgroup.children)) - sections - in - - let input_map = Frontmatter.input_map metadata in - let synthesized_sections = - match input_map with - | [] -> [] - | _ -> ( - match concrete_paths with - | Some concrete_paths -> - [ synthesize_import_block input_map concrete_paths ] - | None -> [ synthesize_unmapped_import_block input_map ]) - in - - let expanded_markdown = - List.fold_left - (fun acc (name, bs) -> - let title = Printf.sprintf "# %s\n\n" name in - let body = - List.fold_left - (fun acc b -> - Printf.sprintf "```%s\n%s\n```\n\n" - (Block.to_info_string b.block) - (Block.body b.block) - ^ acc) - "\n" bs - in - - (title ^ body) ^ acc) - markdown synthesized_sections - in - - let expanded_sections = synthesized_sections @ detailed_sections in - - (* we can only infer the dependancy graph globally, so we need to do this at the top level before - then working out the DAG. *) - let pass1 = pass_one_on_list [] expanded_sections in - - (* Now I have the global graph implicitly, turn the list into a graph of blocks *) - let all_astblocks = List.concat_map Section.blocks pass1 in - let id_all_astblocks = List.mapi (fun i h -> (i, h)) all_astblocks in - - (* All files will have one writer and zero or more readers *) - let writers = - List.concat - (List.map - (fun (hbid, h) -> - let _, outputs = Astblock.io h in - List.map (fun o -> (Datafile.id o, (o, hbid))) outputs) - id_all_astblocks) - in - - let edges = - List.concat - (List.map - (fun (hbid, h) -> - let inputs, _ = Astblock.io h in - List.filter_map - (fun i -> - match List.assoc_opt (Datafile.id i) writers with - | None -> None - | Some (_, writerid) -> Some (writerid, hbid)) - inputs) - id_all_astblocks) - in - - ({ nodes = id_all_astblocks; edges; metadata }, expanded_markdown) - -let find_id_of_block ast ib = - let d = Block.digest ib in - let rec loop l = - match l with - | [] -> None - | hd :: tl -> - let id, hb = hd in - let b = Astblock.block hb in - if Block.digest b = d then Some id else loop tl - in - loop ast.nodes - -let block_by_id ast id = List.assoc_opt id ast.nodes - -let find_ast_block_from_shark_block ast block = - let id = find_id_of_block ast block in - Option.bind id (block_by_id ast) - -let find_dependencies ast id = - List.filter_map - (fun (edge : block_id * block_id) : block_id option -> - let from, too = edge in - if too = id then Some from else None) - ast.edges - |> List.sort_uniq (fun a b -> a - b) - (* remove duplicates if we take more than one output from a block *) - |> List.map (fun id -> List.assoc id ast.nodes) - -let default_container_path ast = Frontmatter.default_container_path ast.metadata diff --git a/src/lib/ast/ast.mli b/src/lib/ast/ast.mli deleted file mode 100644 index 831824f7c..000000000 --- a/src/lib/ast/ast.mli +++ /dev/null @@ -1,48 +0,0 @@ -(** {1 AST} - -The AST is the logical representation of the workflow described in a -sharkdown file, including the structure of groups (aka basic blocks -in PL, but block is an overloaded term in this context). *) - -module Astblock : sig - type t [@@deriving sexp] - - val block : t -> Block.t - val hash : t -> string option - val hashes : t -> string list - val update_hash : t -> string -> unit - val commands : t -> Leaf.t list - val context : t -> string - val io : t -> Datafile.t list * Datafile.t list - val digest : t -> string - val pp : t Fmt.t -end - -module Section : sig - type t - - val name : t -> string -end - -type block_id [@@deriving sexp] - -type t [@@deriving sexp] -(** An AST instance *) - -val pp : t Fmt.t - -val of_sharkdown : - ?concrete_paths:(string * Fpath.t) list -> string -> t * string -(** [of_sharkdown] takes in the sharkdown document and generates and AST. If the frontmatter contains - declarations of external inputs they can be overridden by supplying [concerte_paths] that maps the input - name to a file path. In addition to the AST the sharkdown document is returned, with the body section - being updated for any autogenerated blocks. *) - -val find_id_of_block : t -> Block.t -> block_id option -val block_by_id : t -> block_id -> Astblock.t option -val find_ast_block_from_shark_block : t -> Block.t -> Astblock.t option -val find_dependencies : t -> block_id -> Astblock.t list -val default_container_path : t -> Fpath.t - -val to_list : t -> Astblock.t list -(** Convert the AST to a list of command blocks. *) diff --git a/src/lib/ast/block.ml b/src/lib/ast/block.ml new file mode 100644 index 000000000..6910ea9e6 --- /dev/null +++ b/src/lib/ast/block.ml @@ -0,0 +1,180 @@ +open Sexplib.Conv +open Astring +module DatafileSet = Set.Make (Datafile) + +(* Raw block information defining the kind of block it is *) +module Raw = struct + type build_and_run = { hash : string option; alias : string; body : string } + [@@deriving sexp] + + type publish = { body : string; output : [ `Directory of string ] } + [@@deriving sexp] + + type import = { body : string; alias : string option; hash : string option } + [@@deriving sexp] + + type t = + | Build of build_and_run + | Run of build_and_run + | Publish of publish + | Import of import + [@@deriving sexp] + + let build_or_run ?hash ~alias ~body kind = + match kind with + | `Run -> Run { alias; body; hash } + | `Build -> Build { alias; body; hash } + + let publish ?(output = `Directory "./_shark") body = Publish { body; output } + let import ?hash ?alias body = Import { body; alias; hash } + let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) + + let body : t -> string = function + | Publish { body; _ } + | Run { body; _ } + | Build { body; _ } + | Import { body; _ } -> + body + + let alias = function + | Import { alias = Some alias; _ } | Run { alias; _ } | Build { alias; _ } + -> + alias + | _ -> invalid_arg "No alias found for block" + + let hash = function + | Publish _ -> None + | Run b -> b.hash + | Build b -> b.hash + | Import b -> b.hash + + let kind = function + | Build _ -> `Build + | Run _ -> `Run + | Publish _ -> `Publish + | Import _ -> `Import + + let output = function + | Publish { output; _ } -> output + | _ -> invalid_arg "Expected a publish block" + + let with_hash (b : t) hash = + match b with + | Build b -> Build { b with hash = Some hash } + | Run b -> Run { b with hash = Some hash } + | Publish b -> Publish b + | Import i -> Import i + + let command_list : t -> string list = function + | Import { body; _ } + | Publish { body; _ } + | Run { body; _ } + | Build { body; _ } -> + let regex_newline = Str.regexp "\\\\\n" + and regex_comment = Str.regexp "#.*$" + and regex_whitespace = Str.regexp "[\t ]+" in + Str.global_replace regex_newline "" body + |> Str.global_replace regex_comment "" + |> String.cuts ~sep:"\n" |> List.map String.trim + |> List.map (Str.global_replace regex_whitespace " ") + |> List.filter_map (fun l -> match l with "" -> None | x -> Some x) + + let imports = function + | Build _ | Run _ | Publish _ -> invalid_arg "Expected an import block" + | Import { body; _ } -> + let cut_import s = + match String.cut ~sep:" " s with + | Some (url, path) -> ( + match Fpath.of_string path with + | Ok path -> (Uri.of_string url, path) + | Error (`Msg msg) -> + Fmt.failwith "Error parsing path %s: %s" path msg) + | None -> Fmt.failwith "Invalid import statement '%s'" s + in + String.cuts ~sep:"\n" (String.trim body) |> List.map cut_import + + let digest : t -> string = function + | Import { body; _ } + | Publish { body; _ } + | Run { body; _ } + | Build { body; _ } -> + Digest.string body + + let import_spec b = + let open Obuilder_spec in + let commands = + imports b + |> List.concat_map (fun (url, target_path) -> + match Uri.scheme url with + | None | Some "file" -> + let fpath = + match Fpath.of_string (Uri.path url) with + | Ok p -> p + | Error (`Msg msg) -> + Fmt.failwith "Failed to parse path %s: %s" (Uri.path url) + msg + in + let relpath = + Option.get (Fpath.relativize ~root:(Fpath.v "/") fpath) + in + [ + copy + [ Fpath.to_string relpath ] + ~dst:(Fpath.to_string target_path); + ] + | Some "http" | Some "https" -> ( + let src_path = Uri.path url in + match String.cut ~rev:true ~sep:"." src_path with + | Some (_, "git") -> + [ + shell [ "/bin/sh"; "-c" ]; + run ~network:[ "host" ] "apk add --no-cache git"; + run ~network:[ "host" ] + "mkdir -p /data && git clone %s %s" (Uri.to_string url) + (Fpath.to_string target_path); + ] + | _ -> + [ + shell [ "/bin/sh"; "-c" ]; + run ~network:[ "host" ] "apk add --no-cache curl"; + run ~network:[ "host" ] "mkdir -p /data && curl -O %s %s" + (Fpath.to_string target_path) + (Uri.to_string url); + ]) + | Some scheme -> Fmt.failwith "Unsupported import scheme %s" scheme) + in + stage ~from:(`Image "alpine") commands +end + +type t = { + hash : string list ref; + context : string; + raw : Raw.t; + commands : Leaf.t list; +} +[@@deriving sexp] + +let v context raw commands = { hash = ref []; context; raw; commands } +let raw h = h.raw +let kind h = Raw.kind h.raw +let commands h = h.commands +let context h = h.context +let digest h = Raw.digest h.raw +let hash h = match !(h.hash) with [] -> None | hd :: _ -> Some hd +let hashes h = !(h.hash) +let update_hash h hash = h.hash := hash :: !(h.hash) + +let io h = + let all_inputs, all_outputs = + List.fold_left + (fun acc v -> + let inputs, outputs = acc in + ( DatafileSet.union inputs (DatafileSet.of_list (Leaf.inputs v)), + DatafileSet.union outputs (DatafileSet.of_list (Leaf.outputs v)) )) + (DatafileSet.empty, DatafileSet.empty) + h.commands + in + ( DatafileSet.to_list (DatafileSet.diff all_inputs all_outputs), + DatafileSet.to_list all_outputs ) + +let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) diff --git a/src/lib/ast/block.mli b/src/lib/ast/block.mli new file mode 100644 index 000000000..410d7bb61 --- /dev/null +++ b/src/lib/ast/block.mli @@ -0,0 +1,64 @@ +module Raw : sig + type t [@@deriving sexp] + (** A shark block *) + + val build_or_run : + ?hash:string -> alias:string -> body:string -> [ `Run | `Build ] -> t + (** Construct a custom block. *) + + val publish : ?output:[ `Directory of string ] -> string -> t + (** A publish block with a body. Default output is [`Directory "./_shark"] *) + + val import : ?hash:string -> ?alias:string -> string -> t + (** A shark import statement with a body *) + + val pp : t Fmt.t + (** A pretty printer for blocks. *) + + val with_hash : t -> string -> t + (** [with_hash block] is [block] with a new hash. Publish blocks remain unchanged. *) + + val alias : t -> string + (** A block's alias *) + + val command_list : t -> string list + (** [command_list block] parses the [block]'s body for commands. *) + + val import_spec : t -> Obuilder_spec.t + (** For a shark-import block generate the spec to execute to enact the import. *) + + val hash : t -> string option + (** If a block has been run it will hash a build hash *) + + val kind : t -> [ `Run | `Build | `Publish | `Import ] + (** The kind of block *) + + val body : t -> string + (** The body of the block *) + + val output : t -> [ `Directory of string ] + (** The output of a publish block *) + + val imports : t -> (Uri.t * Fpath.t) list + (** The imports from an import block i.e. a list of [URL, Path] pairs. *) + + val digest : t -> string +end + +type t [@@deriving sexp] + +val v : string -> Raw.t -> Leaf.t list -> t +(* Constructor for a new block *) + +val kind : t -> [ `Run | `Build | `Publish | `Import ] +(** The kind of block *) + +val raw : t -> Raw.t +val hash : t -> string option +val hashes : t -> string list +val update_hash : t -> string -> unit +val commands : t -> Leaf.t list +val context : t -> string +val io : t -> Datafile.t list * Datafile.t list +val digest : t -> string +val pp : t Fmt.t diff --git a/src/lib/dotrenderer.ml b/src/lib/ast/dot.ml similarity index 90% rename from src/lib/dotrenderer.ml rename to src/lib/ast/dot.ml index 488918a17..2945de6a3 100644 --- a/src/lib/dotrenderer.ml +++ b/src/lib/ast/dot.ml @@ -73,7 +73,7 @@ let render_ast_to_dot ppf astblocks : unit = Format.fprintf ppf "digraph{\n"; List.concat_map (fun hb -> - let commands = Ast.Astblock.commands hb in + let commands = Block.commands hb in List.concat_map (fun command -> let inputs = Leaf.inputs command and outputs = Leaf.outputs command in @@ -85,12 +85,12 @@ let render_ast_to_dot ppf astblocks : unit = List.iteri (fun i hb -> - let kind = Block.kind (Ast.Astblock.block hb) in + let kind = Block.Raw.kind (Block.raw hb) in let name, style = match kind with | `Publish -> ("Publish", "bold") - | _ -> (Ast.Astblock.context hb, "solid") - and commands = Ast.Astblock.commands hb in + | _ -> (Block.context hb, "solid") + and commands = Block.commands hb in Format.fprintf ppf "subgraph \"cluster_%d\" {\n" i; Format.fprintf ppf "\tstyle = %s\n" style; Format.fprintf ppf "\tlabel = \"%s\"\n" name; @@ -118,8 +118,4 @@ let render_ast_to_dot ppf astblocks : unit = astblocks; Format.fprintf ppf "}\n" -let render ~template_markdown = - Ast.of_sharkdown template_markdown - |> fst |> Ast.to_list - |> render_ast_to_dot Format.str_formatter; - Format.flush_str_formatter () +let render ppf ast = Ast.to_list ast |> render_ast_to_dot ppf diff --git a/src/lib/ast/dune b/src/lib/ast/dune new file mode 100644 index 000000000..36bd95355 --- /dev/null +++ b/src/lib/ast/dune @@ -0,0 +1,6 @@ +(library + (name shark_ast) + (preprocess + (pps ppx_sexp_conv)) + (public_name shark-ast) + (libraries fmt fpath hmap sexplib uri str obuilder-spec yaml)) diff --git a/src/lib/frontmatter.ml b/src/lib/ast/metadata.ml similarity index 89% rename from src/lib/frontmatter.ml rename to src/lib/ast/metadata.ml index 05c4f4da3..4d8d8213c 100644 --- a/src/lib/frontmatter.ml +++ b/src/lib/ast/metadata.ml @@ -32,7 +32,7 @@ let string_list_of_yaml = function let dict_of_yaml = function `O assoc -> assoc | _ -> [] -let of_toplevel_yaml = function +let of_yaml = function | `O assoc -> let vars = List.filter_map @@ -58,11 +58,10 @@ let of_toplevel_yaml = function | _ -> None) raw_inputs in - { variables = vars; inputs } - | `Null -> empty - | _ -> failwith "Malformed variables in markdown frontmatter" + Ok { variables = vars; inputs } + | `Null -> Ok empty + | _ -> Error (`Msg "Malformed variables in markdown frontmatter") -let of_string s = String.trim s |> Yaml.of_string |> Result.map of_toplevel_yaml let variables t = t.variables let inputs t = List.map (fun (_, v) -> v) t.inputs let input_map t = t.inputs diff --git a/src/lib/frontmatter.mli b/src/lib/ast/metadata.mli similarity index 85% rename from src/lib/frontmatter.mli rename to src/lib/ast/metadata.mli index 7f95dfce8..714a2aa6e 100644 --- a/src/lib/frontmatter.mli +++ b/src/lib/ast/metadata.mli @@ -3,7 +3,7 @@ type t [@@deriving sexp] val pp : t Fmt.t val v : (string * string list) list -> (string * Fpath.t) list -> t val empty : t -val of_string : string -> (t, [ `Msg of string ]) result +val of_yaml : Yaml.value -> (t, [ `Msg of string ]) result val variables : t -> (string * string list) list (* These are specific to shark rather than general frontmatter *) diff --git a/src/lib/ast/shark_ast.ml b/src/lib/ast/shark_ast.ml new file mode 100644 index 000000000..376fb1cdb --- /dev/null +++ b/src/lib/ast/shark_ast.ml @@ -0,0 +1,42 @@ +open Sexplib.Conv +module Leaf = Leaf +module Command = Command +module Metadata = Metadata +module Datafile = Datafile + +module Ast = struct + module Block = Block + include Ast + + let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) + let pp_dot = Dot.render + + let find_id_of_block ast ib = + let d = Block.Raw.digest ib in + let rec loop l = + match l with + | [] -> None + | hd :: tl -> + let id, hb = hd in + if Block.digest hb = d then Some id else loop tl + in + loop ast.nodes + + let block_by_id ast id = List.assoc_opt id ast.nodes + + let find_dependencies ast id = + List.filter_map + (fun (edge : block_id * block_id) : block_id option -> + let from, too = edge in + if too = id then Some from else None) + ast.edges + |> List.sort_uniq (fun a b -> a - b) + (* remove duplicates if we take more than one output from a block *) + |> List.map (fun id -> List.assoc id ast.nodes) + + let find_block_from_raw_block ast block = + let id = find_id_of_block ast block in + Option.bind id (block_by_id ast) + + let default_container_path ast = Metadata.default_container_path ast.metadata +end diff --git a/src/lib/ast/shark_ast.mli b/src/lib/ast/shark_ast.mli new file mode 100644 index 000000000..22e93acdf --- /dev/null +++ b/src/lib/ast/shark_ast.mli @@ -0,0 +1,41 @@ +(** {1 AST} + +The AST is the logical representation of the workflow described in a +sharkdown file, including the structure of groups (aka basic blocks +in PL, but block is an overloaded term in this context). *) + +module Leaf = Leaf +module Command = Command +module Metadata = Metadata +module Datafile = Datafile + +module Ast : sig + module Block = Block + + type block_id = int [@@deriving sexp] + + type t [@@deriving sexp] + (** An AST *) + + val v : + nodes:(block_id * Block.t) list -> + edges:(block_id * block_id) list -> + Metadata.t -> + t + (** Construct a new AST from edges and nodes *) + + val pp : t Fmt.t + (** A pretty printer for ASTs *) + + val pp_dot : t Fmt.t + (** The dot graph of the AST. *) + + val find_id_of_block : t -> Block.Raw.t -> block_id option + val block_by_id : t -> block_id -> Block.t option + val find_block_from_raw_block : t -> Block.Raw.t -> Block.t option + val find_dependencies : t -> block_id -> Block.t list + val default_container_path : t -> Fpath.t + + val to_list : t -> Block.t list + (** Convert the AST to a list of command blocks. *) +end diff --git a/src/lib/block.ml b/src/lib/block.ml index 06b5a090d..8b8acaf5e 100644 --- a/src/lib/block.ml +++ b/src/lib/block.ml @@ -1,30 +1,7 @@ open Astring -open Sexplib.Conv +module Rb = Shark_ast.Ast.Block.Raw -type build_and_run = { hash : string option; alias : string; body : string } -[@@deriving sexp] - -type publish = { body : string; output : [ `Directory of string ] } -[@@deriving sexp] - -type import = { body : string; alias : string option; hash : string option } -[@@deriving sexp] - -type t = - | Build of build_and_run - | Run of build_and_run - | Publish of publish - | Import of import -[@@deriving sexp] - -let build_or_run ?hash ~alias ~body kind = - match kind with - | `Run -> Run { alias; body; hash } - | `Build -> Build { alias; body; hash } - -let publish ?(output = `Directory "./_shark") body = Publish { body; output } -let import ?hash ?alias body = Import { body; alias; hash } -let pp ppf t = Sexplib.Sexp.pp_hum ppf (sexp_of_t t) +type t = Rb.t let of_info_string ?(default = fun ~info:_ ~body:_ -> None) ~body s = match Astring.String.cuts ~sep:":" s with @@ -35,7 +12,8 @@ let of_info_string ?(default = fun ~info:_ ~body:_ -> None) ~body s = | [ env; hash ] -> (env, Some hash) | _ -> failwith "Malformed env and hash" in - Some (Build { hash; alias = env; body }) + let build = Rb.build_or_run ?hash ~alias:env ~body `Build in + Some build | "shark-run" :: rest -> let env, hash = match rest with @@ -43,7 +21,8 @@ let of_info_string ?(default = fun ~info:_ ~body:_ -> None) ~body s = | [ env; hash ] -> (env, Some hash) | _ -> failwith "Malformed env and hash" in - Some (Run { hash; alias = env; body }) + let run = Rb.build_or_run ?hash ~alias:env ~body `Run in + Some run | "shark-publish" :: rest -> let output = match rest with @@ -51,7 +30,7 @@ let of_info_string ?(default = fun ~info:_ ~body:_ -> None) ~body s = | [ dir ] -> Some (`Directory dir) | _ -> failwith "Unknown publishing output" in - Some (publish ?output body) + Some (Rb.publish ?output body) | "shark-import" :: rest -> let alias, hash = match rest with @@ -59,7 +38,7 @@ let of_info_string ?(default = fun ~info:_ ~body:_ -> None) ~body s = | [ env; hash ] -> (Some env, Some hash) | _ -> (None, None) in - Some (Import { body; alias; hash }) + Some (Rb.import ?hash ?alias body) | _ -> default ~info:s ~body let of_code_block ?default cb = @@ -71,130 +50,22 @@ let of_code_block ?default cb = | None -> None | Some (info, _) -> of_info_string ?default ~body info -let to_info_string = function - | Build { hash; alias; _ } -> ( +let to_info_string v = + match Rb.kind v with + | `Build -> ( + let alias = Rb.alias v in + let hash = Rb.hash v in Fmt.str "shark-build:%s" alias ^ match hash with Some hash -> ":" ^ hash | None -> "") - | Run { hash; alias; _ } -> ( + | `Run -> ( + let alias = Rb.alias v in + let hash = Rb.hash v in Fmt.str "shark-run:%s" alias ^ match hash with Some hash -> ":" ^ hash | None -> "") - | Publish _ -> "shark-publish" - | Import { hash; alias; _ } -> ( + | `Publish -> "shark-publish" + | `Import -> ( + let alias = try Some (Rb.alias v) with Invalid_argument _ -> None in + let hash = Rb.hash v in Fmt.str "shark-import" ^ (match alias with Some alias -> ":" ^ alias | None -> "") ^ match hash with Some hash -> ":" ^ hash | None -> "") - -let body : t -> string = function - | Publish { body; _ } - | Run { body; _ } - | Build { body; _ } - | Import { body; _ } -> - body - -let alias = function - | Import { alias = Some alias; _ } | Run { alias; _ } | Build { alias; _ } -> - alias - | _ -> invalid_arg "No alias found for block" - -let hash = function - | Publish _ -> None - | Run b -> b.hash - | Build b -> b.hash - | Import b -> b.hash - -let kind = function - | Build _ -> `Build - | Run _ -> `Run - | Publish _ -> `Publish - | Import _ -> `Import - -let output = function - | Publish { output; _ } -> output - | _ -> invalid_arg "Expected a publish block" - -let with_hash (b : t) hash = - match b with - | Build b -> Build { b with hash = Some hash } - | Run b -> Run { b with hash = Some hash } - | Publish b -> Publish b - | Import i -> Import i - -let command_list : t -> string list = function - | Import { body; _ } - | Publish { body; _ } - | Run { body; _ } - | Build { body; _ } -> - let regex_newline = Str.regexp "\\\\\n" - and regex_comment = Str.regexp "#.*$" - and regex_whitespace = Str.regexp "[\t ]+" in - Str.global_replace regex_newline "" body - |> Str.global_replace regex_comment "" - |> String.cuts ~sep:"\n" |> List.map String.trim - |> List.map (Str.global_replace regex_whitespace " ") - |> List.filter_map (fun l -> match l with "" -> None | x -> Some x) - -let imports = function - | Build _ | Run _ | Publish _ -> invalid_arg "Expected an import block" - | Import { body; _ } -> - let cut_import s = - match String.cut ~sep:" " s with - | Some (url, path) -> ( - match Fpath.of_string path with - | Ok path -> (Uri.of_string url, path) - | Error (`Msg msg) -> - Fmt.failwith "Error parsing path %s: %s" path msg) - | None -> Fmt.failwith "Invalid import statement '%s'" s - in - String.cuts ~sep:"\n" (String.trim body) |> List.map cut_import - -let digest : t -> string = function - | Import { body; _ } - | Publish { body; _ } - | Run { body; _ } - | Build { body; _ } -> - Digest.string body - -let import_spec b = - let open Obuilder_spec in - let commands = - imports b - |> List.concat_map (fun (url, target_path) -> - match Uri.scheme url with - | None | Some "file" -> - let fpath = - match Fpath.of_string (Uri.path url) with - | Ok p -> p - | Error (`Msg msg) -> - Fmt.failwith "Failed to parse path %s: %s" (Uri.path url) - msg - in - let relpath = - Option.get (Fpath.relativize ~root:(Fpath.v "/") fpath) - in - [ - copy - [ Fpath.to_string relpath ] - ~dst:(Fpath.to_string target_path); - ] - | Some "http" | Some "https" -> ( - let src_path = Uri.path url in - match String.cut ~rev:true ~sep:"." src_path with - | Some (_, "git") -> - [ - shell [ "/bin/sh"; "-c" ]; - run ~network:[ "host" ] "apk add --no-cache git"; - run ~network:[ "host" ] "mkdir -p /data && git clone %s %s" - (Uri.to_string url) - (Fpath.to_string target_path); - ] - | _ -> - [ - shell [ "/bin/sh"; "-c" ]; - run ~network:[ "host" ] "apk add --no-cache curl"; - run ~network:[ "host" ] "mkdir -p /data && curl -O %s %s" - (Fpath.to_string target_path) - (Uri.to_string url); - ]) - | Some scheme -> Fmt.failwith "Unsupported import scheme %s" scheme) - in - stage ~from:(`Image "alpine") commands diff --git a/src/lib/block.mli b/src/lib/block.mli index f1e4a7682..426a86c8e 100644 --- a/src/lib/block.mli +++ b/src/lib/block.mli @@ -1,38 +1,13 @@ -(** {1 Shark Blocks} - -A shark block is a code block in a markdown file. These blocks -are distinguished by either [shark-build] or [shark-run] as the -code blocks "language". +open Shark_ast +(** {1 Markdown Blocks} + + We map between markdown code blocks and Sharks {! Shark_ast.Block}. +*) -After [shark-build] and separated by a [:] there is the build's alias -which [shark-run] commands can reference in the same way to tell -{e shark} to use that build environment to execute the command. - -[shark-publish] blocks are different. They allow you to export data -from shark along with its build description. At the moment only -exporting to the local filesystem is supported. *) - -type t [@@deriving sexp] -(** A shark block *) - -val build_or_run : - ?hash:string -> alias:string -> body:string -> [ `Run | `Build ] -> t -(** Construct a custom block. *) - -val publish : ?output:[ `Directory of string ] -> string -> t -(** A publish block with a body. Default output is [`Directory "./_shark"] *) - -val import : ?hash:string -> ?alias:string -> string -> t -(** A shark import statement with a body *) - -val pp : t Fmt.t -(** A pretty printer for blocks. *) - -val with_hash : t -> string -> t -(** [with_hash block] is [block] with a new hash. Publish blocks remain unchanged. *) +type t = Ast.Block.Raw.t val of_info_string : - ?default:(info:string -> body:string -> t option) -> + ?default:(info:string -> body:string -> Ast.Block.Raw.t option) -> body:string -> string -> t option @@ -49,29 +24,3 @@ val of_code_block : val to_info_string : t -> string (** Convert the block back to the info string *) - -val command_list : t -> string list -(** [command_list block] parses the [block]'s body for commands. *) - -val alias : t -> string -(** A block's alias *) - -val hash : t -> string option -(** If a block has been run it will hash a build hash *) - -val kind : t -> [ `Run | `Build | `Publish | `Import ] -(** The kind of block *) - -val body : t -> string -(** The body of the block *) - -val output : t -> [ `Directory of string ] -(** The output of a publish block *) - -val imports : t -> (Uri.t * Fpath.t) list -(** The imports from an import block i.e. a list of [URL, Path] pairs. *) - -val digest : t -> string - -val import_spec : t -> Obuilder_spec.t -(** For a shark-import block generate the spec to execute to enact the import. *) diff --git a/src/lib/dune b/src/lib/dune index 79fda625a..77bb26f2d 100644 --- a/src/lib/dune +++ b/src/lib/dune @@ -1,8 +1,6 @@ -(include_subdirs unqualified) - (library (name shark) (public_name shark) - (libraries eio str yaml lwt_eio cmarkit obuilder fpath uri) + (libraries eio str yaml lwt_eio cmarkit obuilder fpath uri shark-ast) (preprocess (pps ppx_sexp_conv))) diff --git a/src/lib/md.ml b/src/lib/md.ml index 4cb16e455..63cee8996 100644 --- a/src/lib/md.ml +++ b/src/lib/md.ml @@ -1,6 +1,7 @@ open Astring open Eio open Import +open Shark_ast let ( / ) = Eio.Path.( / ) @@ -43,10 +44,11 @@ let log kind buffer tag msg = let process_build_block ?(src_dir = ".") ?hb (Builder ((module Builder), builder)) ast (code_block, block) = - match Block.kind block with + match Ast.Block.Raw.kind block with | `Build -> ( let spec = - Obuilder_spec.t_of_sexp (Sexplib.Sexp.of_string (Block.body block)) + Obuilder_spec.t_of_sexp + (Sexplib.Sexp.of_string (Ast.Block.Raw.body block)) in let buf = Buffer.create 128 in let log = log `Build buf in @@ -60,16 +62,16 @@ let process_build_block ?(src_dir = ".") ?hb let cb = Cmarkit.Block.Code_block.make ~info_string body in (cb, block, `Stop (Printf.sprintf "%s: %s" id msg)) | Ok id -> - let block_with_hash = Block.with_hash block id in + let block_with_hash = Ast.Block.Raw.with_hash block id in (* Update astblock hash *) let hb = match hb with | Some hb -> hb | None -> - Ast.find_ast_block_from_shark_block ast block + Ast.find_block_from_raw_block ast block |> Option.get ~err:"No astblock for build block" in - Ast.Astblock.update_hash hb id; + Ast.Block.update_hash hb id; let new_code_block = let info_string = Block.to_info_string block_with_hash in Cmarkit.Block.Code_block.make @@ -89,7 +91,7 @@ let input_hashes ast block = (* The input Datafile has the wildcard flag, which won't be set on the output flag, so we need to swap them over *) let input_map = - Ast.Astblock.io + Ast.Block.io (Option.get ~err:"No block ID for input map" (Ast.block_by_id ast block_id)) |> fst @@ -97,9 +99,9 @@ let input_hashes ast block = in let map_to_inputs hb = - let hashes = Ast.Astblock.hashes hb in + let hashes = Ast.Block.hashes hb in let inputs = - Ast.Astblock.io hb |> snd |> List.map Datafile.id + Ast.Block.io hb |> snd |> List.map Datafile.id |> List.filter_map (fun o -> List.assoc_opt o input_map) in List.map (fun h -> (h, inputs)) hashes @@ -154,16 +156,18 @@ let get_paths ~fs (Obuilder.Store_spec.Store ((module Store), store)) hash let process_run_block ?(environment_override = []) ~fs ~build_cache ~pool store ast (Builder ((module Builder), builder)) (_code_block, block) = - match Block.kind block with + match Ast.Block.Raw.kind block with | `Run -> let astblock = - Ast.find_ast_block_from_shark_block ast block + Ast.find_block_from_raw_block ast block |> Option.get ~err:"No astblock for run block" in let inputs = input_hashes ast block in - let build = Build_cache.find_exn build_cache (Block.alias block) in + let build = + Build_cache.find_exn build_cache (Ast.Block.Raw.alias block) + in let rom = List.map @@ -398,7 +402,7 @@ let process_run_block ?(environment_override = []) ~fs ~build_cache ~pool store ~workdir:(Fpath.to_string (Ast.default_container_path ast)) ~environment:[] in - let commands = Ast.Astblock.commands astblock in + let commands = Ast.Block.commands astblock in let ids_and_output_and_cmd = List.fold_left process_single_command [ [ initial_state ] ] commands in @@ -425,10 +429,10 @@ let process_run_block ?(environment_override = []) ~fs ~build_cache ~pool store List.iter (fun es -> - Ast.Astblock.update_hash astblock + Ast.Block.update_hash astblock (Run_block.ExecutionState.build_hash es)) last; - let block = Block.with_hash block id in + let block = Ast.Block.Raw.with_hash block id in let info_string = (Block.to_info_string block, Cmarkit.Meta.none) in (* TODO: We should be able to continue procressing other blocks if only one fails here, but I would like to restructure the code to support this better and have @@ -461,7 +465,7 @@ let copy ?chown ~src ~dst () = let process_publish_block (Obuilder.Store_spec.Store ((module Store), store)) ast (_code_block, block) = - match Block.kind block with + match Ast.Block.Raw.kind block with | `Publish -> let inputs = input_hashes ast block in Logs.info (fun f -> f "Inputs for publish"); @@ -501,13 +505,13 @@ let process_publish_block (Obuilder.Store_spec.Store ((module Store), store)) | _ -> failwith "Expected Publish Block" let translate_import_block ~uid block = - match Block.kind block with + match Ast.Block.Raw.kind block with | `Import -> - let spec = Block.import_spec block in + let spec = Ast.Block.Raw.import_spec block in Logs.info (fun f -> f "import spec: %a" Obuilder_spec.pp spec); let body = Sexplib.Sexp.to_string_hum (Obuilder_spec.sexp_of_t spec) in let alias = Fmt.str "import-statement-%s" uid in - let block = Block.build_or_run ~alias ~body `Build in + let block = Ast.Block.Raw.build_or_run ~alias ~body `Build in let code_block = Cmarkit.Block.Code_block.make ~info_string:(Fmt.str "shark-build:%s" alias, Cmarkit.Meta.none) diff --git a/src/lib/md.mli b/src/lib/md.mli index 0d61cdccb..18b858e18 100644 --- a/src/lib/md.mli +++ b/src/lib/md.mli @@ -1,3 +1,4 @@ +open Shark_ast (** {1 Sharkdown Utilities} This modules contains utilities for working with Sharkdown (markdown) files. @@ -24,7 +25,7 @@ type builder = val process_build_block : ?src_dir:string -> - ?hb:Ast.Astblock.t -> + ?hb:Ast.Block.t -> builder -> Ast.t -> Cmarkit.Block.Code_block.t * Block.t -> diff --git a/src/lib/md_to_ast.ml b/src/lib/md_to_ast.ml new file mode 100644 index 000000000..329c5f2d6 --- /dev/null +++ b/src/lib/md_to_ast.ml @@ -0,0 +1,282 @@ +open Astring +open Shark_ast + +module Section = struct + type t = { name : string; blocks : Ast.Block.t list } [@@warning "-69"] + + let blocks s = s.blocks +end + +(* ----- front matter parser ----- *) + +let parse_frontmatter frontmatter = + match Metadata.of_yaml (String.trim frontmatter |> Yaml.of_string_exn) with + | Ok frontmatter -> frontmatter + | Error (`Msg m) -> failwith ("Failed to parse frontmatter: " ^ m) + +(* ----- internal tree building code ----- *) + +type superblock = { block : Block.t; commands : Command.t list } + +let block_to_superblock (block : Block.t) : superblock = + { + block; + commands = + Ast.Block.Raw.command_list block |> List.filter_map Command.of_string + (* |> List.filter_map (fun c -> + match Command.file_args c with [] -> None | _ -> Some c); *); + } + +let build_initial_input_map inputs = + let input_map = + List.mapi + (fun i f -> + let df = Datafile.v i f in + (f, df)) + inputs + in + let counter = ref (List.length input_map) in + (input_map, counter) + +let find_matching_datafile datafile_map fpath = + match List.assoc_opt fpath datafile_map with + | Some p -> Some p + | None -> + (* No full match, but can we find a prefix dir *) + List.fold_left + (fun acc (_ipath, df) -> + match acc with + | Some x -> Some x + | None -> ( + match Datafile.is_dir df with + | false -> None + | true -> ( + match Fpath.rem_prefix (Datafile.path df) fpath with + | None -> None + | Some subpath -> + Some + (Datafile.v ~subpath:(Fpath.to_string subpath) + (Datafile.id df) (Datafile.path df))))) + None datafile_map + +let rec pass_one_process_commands_loop counter commands datafile_map = + match commands with + | [] -> (datafile_map, []) + | hd :: tl -> + let file_args = Command.file_args hd in + + (* TODO: dedup *) + let inputs = + List.filter_map + (fun p -> find_matching_datafile datafile_map p) + file_args + in + + let style : Leaf.style = + match List.exists Datafile.is_wildcard inputs with + | true -> Map + | false -> Command + in + + let outputs = + List.filter_map + (fun fpath -> + match find_matching_datafile datafile_map fpath with + | None -> + let id = !counter in + counter := !counter + 1; + Some (Datafile.v id fpath) + | Some _ -> None) + file_args + in + + let x = Leaf.v !counter hd style inputs outputs in + counter := !counter + 1; + let updated_map, rest = + pass_one_process_commands_loop counter tl + (List.concat + [ datafile_map; List.map (fun o -> (Datafile.path o, o)) outputs ]) + in + (updated_map, x :: rest) + +let pass_one_on_list inputs section_list = + let input_map, counter = build_initial_input_map inputs in + let _, processed = + List.fold_left_map + (fun input_map section -> + let name, superblocks = section in + let updated_map, processed_section = + List.fold_left_map + (fun input_map superblock -> + let updated_map, leaves = + pass_one_process_commands_loop counter superblock.commands + input_map + in + (updated_map, Ast.Block.v name superblock.block leaves)) + input_map superblocks + in + (updated_map, Section.{ name; blocks = processed_section })) + input_map section_list + in + processed + +(* ----- markdown body parser ----- *) + +(* In theory this could be a recursive structure that attempts to maintain the + hierarchy of the document, markdown doesn't enforce that the section levels + make any sort of sense, so for now I'm just going to assume a single level. + + I did initially try to implement that but got a lot of complexity for little + initial benefit. *) +type section_group = { name : string; children : Block.t list } + +let synthesize_import_block input_map input_override_map = + let imports = + List.map + (fun (k, p) -> + let dest = List.assoc k input_map in + (p, dest)) + input_override_map + in + let codeblock = + List.fold_left + (fun acc (src, dst) -> + acc + ^ Printf.sprintf "%s %s\n" (Fpath.to_string src) (Fpath.to_string dst)) + "" imports + in + let block = Ast.Block.Raw.import codeblock in + ("imports", [ block_to_superblock block ]) + +let synthesize_unmapped_import_block input_map = + let codeblock = + List.fold_left + (fun acc (src, dst) -> + acc ^ Printf.sprintf "%s %s\n" src (Fpath.to_string dst)) + "" input_map + in + let block = Ast.Block.Raw.import codeblock in + ("imports", [ block_to_superblock block ]) + +let parse_markdown markdown = + let doc = Cmarkit.Doc.of_string markdown in + + let block _ acc = function + | Cmarkit.Block.Heading (node, _meta) -> + let title = + Cmarkit.Block.Heading.inline node + |> Cmarkit.Inline.to_plain_text ~break_on_soft:false + |> List.map (String.concat ~sep:"") + |> String.concat ~sep:" / " + in + Cmarkit.Folder.ret ({ name = title; children = [] } :: acc) + | Cmarkit.Block.Code_block (node, _meta) -> ( + match Block.of_code_block node with + | None -> Cmarkit.Folder.default + | Some b -> ( + match Ast.Block.Raw.kind b with + | _ -> ( + match acc with + | [] -> + Cmarkit.Folder.ret + [ { name = "Top level"; children = [ b ] } ] + | hd :: tl -> + Cmarkit.Folder.ret + ({ + name = hd.name; + children = List.rev (b :: List.rev hd.children); + } + :: tl)))) + | _ -> Cmarkit.Folder.default + in + let folder = Cmarkit.Folder.make ~block () in + List.rev (Cmarkit.Folder.fold_doc folder [] doc) + |> List.filter_map (fun x -> match x.children with [] -> None | _ -> Some x) + +let of_sharkdown ?concrete_paths template_markdown = + let metadata, sections, markdown = + match String.cuts ~sep:"---" template_markdown with + | [ frontmatter; markdown ] | [ ""; frontmatter; markdown ] -> + (parse_frontmatter frontmatter, parse_markdown markdown, markdown) + | [ markdown ] -> (Metadata.empty, parse_markdown markdown, markdown) + | _ -> failwith "Malformed frontmatter/markdown file" + in + + (* Now I have a list of sections with a name and a list of blocks. The list nature has an + implicit dependnacy order, but that's wrong, so we need to turn this into a DAG of blocks. + + 1: Markdown -> list of sections, where a section is a name and a list of blocks. + 2: -> List of sections, where a section is a list a name, and a list of block/commands per block + 3: -> build the data dependnacy graph + *) + let detailed_sections = + List.map + (fun sgroup -> + (sgroup.name, List.map block_to_superblock sgroup.children)) + sections + in + + let input_map = Metadata.input_map metadata in + let synthesized_sections = + match input_map with + | [] -> [] + | _ -> ( + match concrete_paths with + | Some concrete_paths -> + [ synthesize_import_block input_map concrete_paths ] + | None -> [ synthesize_unmapped_import_block input_map ]) + in + + let expanded_markdown = + List.fold_left + (fun acc (name, bs) -> + let title = Printf.sprintf "# %s\n\n" name in + let body = + List.fold_left + (fun acc b -> + Printf.sprintf "```%s\n%s\n```\n\n" + (Block.to_info_string b.block) + (Ast.Block.Raw.body b.block) + ^ acc) + "\n" bs + in + + (title ^ body) ^ acc) + markdown synthesized_sections + in + + let expanded_sections = synthesized_sections @ detailed_sections in + + (* we can only infer the dependancy graph globally, so we need to do this at the top level before + then working out the DAG. *) + let pass1 = pass_one_on_list [] expanded_sections in + + (* Now I have the global graph implicitly, turn the list into a graph of blocks *) + let all_astblocks = List.concat_map Section.blocks pass1 in + let id_all_astblocks = List.mapi (fun i h -> (i, h)) all_astblocks in + + (* All files will have one writer and zero or more readers *) + let writers = + List.concat + (List.map + (fun (hbid, h) -> + let _, outputs = Ast.Block.io h in + List.map (fun o -> (Datafile.id o, (o, hbid))) outputs) + id_all_astblocks) + in + + let edges = + List.concat + (List.map + (fun (hbid, h) -> + let inputs, _ = Ast.Block.io h in + List.filter_map + (fun i -> + match List.assoc_opt (Datafile.id i) writers with + | None -> None + | Some (_, writerid) -> Some (writerid, hbid)) + inputs) + id_all_astblocks) + in + let ast = Ast.v ~nodes:id_all_astblocks ~edges metadata in + (ast, expanded_markdown) diff --git a/src/lib/md_to_ast.mli b/src/lib/md_to_ast.mli new file mode 100644 index 000000000..b73200ef6 --- /dev/null +++ b/src/lib/md_to_ast.mli @@ -0,0 +1,6 @@ +val of_sharkdown : + ?concrete_paths:(string * Fpath.t) list -> string -> Shark_ast.Ast.t * string +(** [of_sharkdown] takes in the sharkdown document and generates and AST. If the frontmatter contains + declarations of external inputs they can be overridden by supplying [concerte_paths] that maps the input + name to a file path. In addition to the AST the sharkdown document is returned, with the body section + being updated for any autogenerated blocks. *) diff --git a/src/lib/run_block.ml b/src/lib/run_block.ml index dcbdaae7b..7f365a0c7 100644 --- a/src/lib/run_block.ml +++ b/src/lib/run_block.ml @@ -1,6 +1,7 @@ open Astring open Sexplib.Conv open Import +open Shark_ast module CommandResult = struct type t = { build_hash : string; output : string option; command : string } diff --git a/src/lib/run_block.mli b/src/lib/run_block.mli index 0c7568819..30ada9c77 100644 --- a/src/lib/run_block.mli +++ b/src/lib/run_block.mli @@ -1,3 +1,5 @@ +open Shark_ast + module CommandResult : sig type t [@@deriving sexp] diff --git a/src/lib/server/shark_server.ml b/src/lib/server/shark_server.ml index aeb83e215..4c664c2e4 100644 --- a/src/lib/server/shark_server.ml +++ b/src/lib/server/shark_server.ml @@ -224,7 +224,7 @@ let custom_document_renderer _ = function Option.bind v (fun (v, _) -> Shark.Block.of_info_string ~body:"" v) in let info_block = - match Option.bind info (fun v -> Shark.Block.hash v) with + match Option.bind info (fun v -> Shark_ast.Ast.Block.Raw.hash v) with | None -> Cmarkit.Block.Thematic_break (Cmarkit.Block.Thematic_break.make (), Cmarkit.Meta.none) @@ -442,7 +442,8 @@ let run_dot proc dot = let serve_dot proc _req body = let template_markdown = Eio.Flow.read_all body in - let txt = Shark.Dotrenderer.render ~template_markdown in + let ast, _ = Shark.Md_to_ast.of_sharkdown template_markdown in + let txt = Fmt.str "%a" Shark_ast.Ast.pp_dot ast in let png = run_dot proc txt |> Base64.encode_string in respond_txt png diff --git a/src/test/ast.ml b/src/test/ast.ml index 8118e4a6d..ece79978e 100644 --- a/src/test/ast.ml +++ b/src/test/ast.ml @@ -1,4 +1,6 @@ -let command = Alcotest.of_pp Shark.Ast.pp +open Shark_ast + +let command = Alcotest.of_pp Ast.pp let test_single_block () = let template_markdown = @@ -8,10 +10,10 @@ $ python3 something.py /data/something.txt ``` |} in - let test, _ = Shark.Ast.of_sharkdown template_markdown in - let astblocks = Shark.Ast.to_list test in + let test, _ = Shark.Md_to_ast.of_sharkdown template_markdown in + let astblocks = Ast.to_list test in Alcotest.(check int) "Single command group expected" 1 (List.length astblocks); - let leaves = Shark.Ast.Astblock.commands (List.nth astblocks 0) in + let leaves = Ast.Block.commands (List.nth astblocks 0) in Alcotest.(check int) "Single command expected" 1 (List.length leaves) let test_multicommand_block () = @@ -23,10 +25,10 @@ $ python3 else.py /data/something.txt ``` |} in - let test, _ = Shark.Ast.of_sharkdown template_markdown in - let astblocks = Shark.Ast.to_list test in + let test, _ = Shark.Md_to_ast.of_sharkdown template_markdown in + let astblocks = Ast.to_list test in Alcotest.(check int) "Single command group expected" 1 (List.length astblocks); - let leaves = Shark.Ast.Astblock.commands (List.nth astblocks 0) in + let leaves = Ast.Block.commands (List.nth astblocks 0) in Alcotest.(check int) "Single command expected" 2 (List.length leaves) let test_single_block_no_obvious_side_effects () = @@ -37,10 +39,10 @@ $ python3 something.py ``` |} in - let test, _ = Shark.Ast.of_sharkdown template_markdown in - let astblocks = Shark.Ast.to_list test in + let test, _ = Shark.Md_to_ast.of_sharkdown template_markdown in + let astblocks = Ast.to_list test in Alcotest.(check int) "Single command group expected" 1 (List.length astblocks); - let leaves = Shark.Ast.Astblock.commands (List.nth astblocks 0) in + let leaves = Ast.Block.commands (List.nth astblocks 0) in Alcotest.(check int) "Single command expected" 1 (List.length leaves) let tests = diff --git a/src/test/block.ml b/src/test/block.ml index 3be5fe801..9c3d00025 100644 --- a/src/test/block.ml +++ b/src/test/block.ml @@ -1,15 +1,17 @@ -let block = Alcotest.of_pp Shark.Block.pp +module Block = Shark_ast.Ast.Block.Raw + +let block = Alcotest.of_pp Block.pp let test_shark_build_block_no_hash () = let build_string_no_hash = "shark-build:gdal-env" in - let expect = Shark.Block.build_or_run ~alias:"gdal-env" ~body:"" `Build in + let expect = Block.build_or_run ~alias:"gdal-env" ~body:"" `Build in let test = Shark.Block.of_info_string ~body:"" build_string_no_hash in Alcotest.(check (option block)) "same block" (Some expect) test let test_shark_build_with_hash_block () = let build_string_hash = "shark-build:gdal-env:abcdefg" in let expect = - Shark.Block.build_or_run ~hash:"abcdefg" ~alias:"gdal-env" ~body:"" `Build + Block.build_or_run ~hash:"abcdefg" ~alias:"gdal-env" ~body:"" `Build in let test = Shark.Block.of_info_string ~body:"" build_string_hash in Alcotest.(check (option block)) "same block hash" (Some expect) test @@ -22,40 +24,40 @@ let test_shark_empty_block () = let test_shark_run_block_no_hash () = let info = "shark-run:somecontainer" in let body = "$ python3 something.py\n$ python3 other.py" in - let expect = Shark.Block.build_or_run ~alias:"somecontainer" ~body `Run in + let expect = Block.build_or_run ~alias:"somecontainer" ~body `Run in let test = Shark.Block.of_info_string ~body info in Alcotest.(check (option block)) "same block" (Some expect) test let test_shark_run_multiple_commands () = let body = "$ python3 something.py\n$ python3 other.py" in - let block = Shark.Block.build_or_run ~alias:"somecontainer" ~body `Run in + let block = Block.build_or_run ~alias:"somecontainer" ~body `Run in let expect = [ "$ python3 something.py"; "$ python3 other.py" ] in - let test = Shark.Block.command_list block in + let test = Block.command_list block in Alcotest.(check (list string)) "Some commands" expect test let test_shark_run_multiline_command () = let body = "$ python3 something.py\\\n\targ1 arg2" in - let block = Shark.Block.build_or_run ~alias:"somecontainer" ~body `Run in + let block = Block.build_or_run ~alias:"somecontainer" ~body `Run in let expect = [ "$ python3 something.py arg1 arg2" ] in - let test = Shark.Block.command_list block in + let test = Block.command_list block in Alcotest.(check (list string)) "Single command" expect test let test_git_import_block () = let body = "https://example.com/quantifyearth/littlejohn.git /data/littlejohn" in - let block = Shark.Block.import body in + let block = Block.import body in let expected = [ ("https://example.com/quantifyearth/littlejohn.git", "/data/littlejohn") ] in let test = List.map (fun (u, p) -> (Uri.to_string u, Fpath.to_string p)) - (Shark.Block.imports block) + (Block.imports block) in Alcotest.(check (list (pair string string))) "Single import" expected test; - let spec = Shark.Block.import_spec block in + let spec = Block.import_spec block in let specbody = Sexplib.Sexp.to_string_hum (Obuilder_spec.sexp_of_t spec) in Alcotest.(check bool) "Found git command" true @@ -63,18 +65,18 @@ let test_git_import_block () = let test_http_import_block () = let body = "https://example.com/data/something.csv /data/src.csv" in - let block = Shark.Block.import body in + let block = Block.import body in let expected = [ ("https://example.com/data/something.csv", "/data/src.csv") ] in let test = List.map (fun (u, p) -> (Uri.to_string u, Fpath.to_string p)) - (Shark.Block.imports block) + (Block.imports block) in Alcotest.(check (list (pair string string))) "Single import" expected test; - let spec = Shark.Block.import_spec block in + let spec = Block.import_spec block in let specbody = Sexplib.Sexp.to_string_hum (Obuilder_spec.sexp_of_t spec) in Alcotest.(check bool) "Found git command" true @@ -82,16 +84,16 @@ let test_http_import_block () = let test_file_import_block_no_schema () = let body = "/home/michael/file.csv /data/file.csv" in - let block = Shark.Block.import body in + let block = Block.import body in let expected = [ ("/home/michael/file.csv", "/data/file.csv") ] in let test = List.map (fun (u, p) -> (Uri.to_string u, Fpath.to_string p)) - (Shark.Block.imports block) + (Block.imports block) in Alcotest.(check (list (pair string string))) "Single import" expected test; - let spec = Shark.Block.import_spec block in + let spec = Block.import_spec block in let specbody = Sexplib.Sexp.to_string_hum (Obuilder_spec.sexp_of_t spec) in Alcotest.(check bool) "Found git command" true @@ -99,16 +101,16 @@ let test_file_import_block_no_schema () = let test_file_import_block_with_schema () = let body = "file:///home/michael/file.csv /data/file.csv" in - let block = Shark.Block.import body in + let block = Block.import body in let expected = [ ("file:///home/michael/file.csv", "/data/file.csv") ] in let test = List.map (fun (u, p) -> (Uri.to_string u, Fpath.to_string p)) - (Shark.Block.imports block) + (Block.imports block) in Alcotest.(check (list (pair string string))) "Single import" expected test; - let spec = Shark.Block.import_spec block in + let spec = Block.import_spec block in let specbody = Sexplib.Sexp.to_string_hum (Obuilder_spec.sexp_of_t spec) in Alcotest.(check bool) "Found git command" true diff --git a/src/test/command.ml b/src/test/command.ml index 3a1ee5498..565d5299f 100644 --- a/src/test/command.ml +++ b/src/test/command.ml @@ -1,48 +1,50 @@ -let command = Alcotest.of_pp Shark.Command.pp +open Shark_ast + +let command = Alcotest.of_pp Command.pp let test_python_command_module () = let testcase = "python3 -m some.module.code arg1 arg2" in let expected = - Shark.Command.v ~name:"some.module.code" + Command.v ~name:"some.module.code" ~args:[ "python3"; "-m"; "some.module.code"; "arg1"; "arg2" ] ~file_args:[] in - let test = Shark.Command.of_string testcase in + let test = Command.of_string testcase in Alcotest.(check (option command)) "Command" (Some expected) test; Alcotest.(check string) "Rebuilt command" testcase - (Shark.Command.to_string (Option.get test)) + (Command.to_string (Option.get test)) let test_python_command_direct () = let testcase = "python3 some/module/code.py arg1 arg2" in let expected = - Shark.Command.v ~name:"code.py" + Command.v ~name:"code.py" ~args:[ "python3"; "some/module/code.py"; "arg1"; "arg2" ] ~file_args:[] in - let test = Shark.Command.of_string testcase in + let test = Command.of_string testcase in Alcotest.(check (option command)) "Command" (Some expected) test; Alcotest.(check string) "Rebuilt command" testcase - (Shark.Command.to_string (Option.get test)) + (Command.to_string (Option.get test)) let test_rscript_command_basic () = let testcase = "Rscript some/module/code.r arg1 arg2" in let expected = - Shark.Command.v ~name:"code.r" + Command.v ~name:"code.r" ~args:[ "Rscript"; "some/module/code.r"; "arg1"; "arg2" ] ~file_args:[] in - let test = Shark.Command.of_string testcase in + let test = Command.of_string testcase in Alcotest.(check (option command)) "Command" (Some expected) test; Alcotest.(check string) "Rebuilt command" testcase - (Shark.Command.to_string (Option.get test)) + (Command.to_string (Option.get test)) let test_rscript_command_options () = let testcase = "Rscript --no-environ --save some/module/code.r arg1 arg2" in let expected = - Shark.Command.v ~name:"code.r" + Command.v ~name:"code.r" ~args: [ "Rscript"; @@ -54,37 +56,33 @@ let test_rscript_command_options () = ] ~file_args:[] in - let test = Shark.Command.of_string testcase in + let test = Command.of_string testcase in Alcotest.(check (option command)) "Command" (Some expected) test; Alcotest.(check string) "Rebuilt command" testcase - (Shark.Command.to_string (Option.get test)) + (Command.to_string (Option.get test)) let test_generic_command_basic () = let testcase = "docker arg1 arg2" in let expected = - Shark.Command.v ~name:"docker" - ~args:[ "docker"; "arg1"; "arg2" ] - ~file_args:[] + Command.v ~name:"docker" ~args:[ "docker"; "arg1"; "arg2" ] ~file_args:[] in - let test = Shark.Command.of_string testcase in + let test = Command.of_string testcase in Alcotest.(check (option command)) "Command" (Some expected) test; Alcotest.(check string) "Rebuilt command" testcase - (Shark.Command.to_string (Option.get test)) + (Command.to_string (Option.get test)) let test_generic_command_basic_with_prefix () = let testcase = "$ docker arg1 arg2" in let expected = - Shark.Command.v ~name:"docker" - ~args:[ "docker"; "arg1"; "arg2" ] - ~file_args:[] + Command.v ~name:"docker" ~args:[ "docker"; "arg1"; "arg2" ] ~file_args:[] in - let test = Shark.Command.of_string testcase in + let test = Command.of_string testcase in Alcotest.(check (option command)) "Command" (Some expected) test; Alcotest.(check string) "Rebuilt command" "docker arg1 arg2" - (Shark.Command.to_string (Option.get test)) + (Command.to_string (Option.get test)) let tests = [ diff --git a/src/test/datafile.ml b/src/test/datafile.ml index d02ab9131..46ec09718 100644 --- a/src/test/datafile.ml +++ b/src/test/datafile.ml @@ -1,50 +1,47 @@ +open Shark_ast + let test_basic_file_path () = let testcase = Fpath.v "/data/test/example.tif" in - let test = Shark.Datafile.v 42 testcase in - Alcotest.(check int) "Same id" 42 (Shark.Datafile.id test); + let test = Datafile.v 42 testcase in + Alcotest.(check int) "Same id" 42 (Datafile.id test); Alcotest.(check string) "Same path" (Fpath.to_string testcase) - (Fpath.to_string (Shark.Datafile.path test)); + (Fpath.to_string (Datafile.path test)); Alcotest.(check string) "Same full path" (Fpath.to_string testcase) - (Fpath.to_string (Shark.Datafile.fullpath test)); - Alcotest.(check (option string)) - "No subpath" None - (Shark.Datafile.subpath test); - Alcotest.(check bool) "Isn't wildcard" false (Shark.Datafile.is_wildcard test); - Alcotest.(check bool) "Isn't dir" false (Shark.Datafile.is_dir test) + (Fpath.to_string (Datafile.fullpath test)); + Alcotest.(check (option string)) "No subpath" None (Datafile.subpath test); + Alcotest.(check bool) "Isn't wildcard" false (Datafile.is_wildcard test); + Alcotest.(check bool) "Isn't dir" false (Datafile.is_dir test) let test_sub_path () = let testcase = Fpath.v "/data/test/" in - let test = Shark.Datafile.v ~subpath:"example.tif" 42 testcase in - Alcotest.(check int) "Same id" 42 (Shark.Datafile.id test); + let test = Datafile.v ~subpath:"example.tif" 42 testcase in + Alcotest.(check int) "Same id" 42 (Datafile.id test); Alcotest.(check string) "Same path" (Fpath.to_string testcase) - (Fpath.to_string (Shark.Datafile.path test)); + (Fpath.to_string (Datafile.path test)); Alcotest.(check string) "Same full path" "/data/test/example.tif" - (Fpath.to_string (Shark.Datafile.fullpath test)); + (Fpath.to_string (Datafile.fullpath test)); Alcotest.(check (option string)) - "No subpath" (Some "example.tif") - (Shark.Datafile.subpath test); - Alcotest.(check bool) "Isn't wildcard" false (Shark.Datafile.is_wildcard test); - Alcotest.(check bool) "Is dir" true (Shark.Datafile.is_dir test) + "No subpath" (Some "example.tif") (Datafile.subpath test); + Alcotest.(check bool) "Isn't wildcard" false (Datafile.is_wildcard test); + Alcotest.(check bool) "Is dir" true (Datafile.is_dir test) let test_basic_dir_with_wildcard () = let testcase = Fpath.v "/data/test/" in - let test = Shark.Datafile.v ~subpath:"*" 42 testcase in - Alcotest.(check int) "Same id" 42 (Shark.Datafile.id test); + let test = Datafile.v ~subpath:"*" 42 testcase in + Alcotest.(check int) "Same id" 42 (Datafile.id test); Alcotest.(check string) "Same path" (Fpath.to_string testcase) - (Fpath.to_string (Shark.Datafile.path test)); + (Fpath.to_string (Datafile.path test)); Alcotest.(check string) "Same full path" (Fpath.to_string testcase) - (Fpath.to_string (Shark.Datafile.fullpath test)); - Alcotest.(check (option string)) - "No subpath" None - (Shark.Datafile.subpath test); - Alcotest.(check bool) "Is wildcard" true (Shark.Datafile.is_wildcard test); - Alcotest.(check bool) "Is dir" true (Shark.Datafile.is_dir test) + (Fpath.to_string (Datafile.fullpath test)); + Alcotest.(check (option string)) "No subpath" None (Datafile.subpath test); + Alcotest.(check bool) "Is wildcard" true (Datafile.is_wildcard test); + Alcotest.(check bool) "Is dir" true (Datafile.is_dir test) let tests = [ diff --git a/src/test/expect/test_dot.ml b/src/test/expect/test_dot.ml index de7da168a..8dc1c8bbb 100644 --- a/src/test/expect/test_dot.ml +++ b/src/test/expect/test_dot.ml @@ -1,4 +1,4 @@ let () = let tmf = In_channel.with_open_bin "./tmf.md" In_channel.input_all in - let dot = Shark.Dotrenderer.render ~template_markdown:tmf in - Fmt.(string stdout) dot + let ast, _ = Shark.Md_to_ast.of_sharkdown tmf in + Shark_ast.Ast.pp_dot Fmt.stdout ast diff --git a/src/test/frontmatter.ml b/src/test/frontmatter.ml index 29cdcb4e4..8d47944f9 100644 --- a/src/test/frontmatter.ml +++ b/src/test/frontmatter.ml @@ -1,12 +1,14 @@ open Import +open Shark_ast -let frontmatter = Alcotest.of_pp Shark.Frontmatter.pp +let frontmatter = Alcotest.of_pp Metadata.pp +let of_string s = Yaml.of_string s |> fun v -> Result.bind v Metadata.of_yaml let test_empty () = let testcase = "" in - let res = Shark.Frontmatter.of_string testcase in + let res = of_string testcase in Alcotest.(check (result frontmatter msg)) - "Empty expected" (Ok Shark.Frontmatter.empty) res + "Empty expected" (Ok Metadata.empty) res let test_parse_inputs () = let testcase = @@ -17,9 +19,9 @@ inputs: p3: /data/file.txt |} in - let res = Shark.Frontmatter.of_string testcase in + let res = of_string testcase in let expected = - Shark.Frontmatter.v [] + Metadata.v [] [ ("p1", Fpath.v "/data/stuff"); ("p2", Fpath.v "/data/other/"); @@ -30,14 +32,14 @@ inputs: let test_inputs_api () = let testcase = - Shark.Frontmatter.v [] + Metadata.v [] [ ("p1", Fpath.v "/data/stuff"); ("p2", Fpath.v "/data/other/"); ("p3", Fpath.v "/data/file.txt"); ] in - let res = List.map Fpath.to_string (Shark.Frontmatter.inputs testcase) in + let res = List.map Fpath.to_string (Metadata.inputs testcase) in let expected = [ "/data/stuff"; "/data/other/"; "/data/file.txt" ] in Alcotest.(check (list string)) "Empty expected" expected res diff --git a/src/test/leaf.ml b/src/test/leaf.ml index 77b433622..8b72090eb 100644 --- a/src/test/leaf.ml +++ b/src/test/leaf.ml @@ -1,69 +1,59 @@ +open Shark_ast + let test_leaf_basics () = - let command = Shark.Command.of_string "test --i /data/arg1 --o /data/arg2" in - let inputs = [ Shark.Datafile.v 0 (Fpath.v "/data/arg1") ] - and outputs = [ Shark.Datafile.v 1 (Fpath.v "/data/arg2") ] in - let leaf = - Shark.Leaf.v 42 (Option.get command) Shark.Leaf.Command inputs outputs - in + let command = Command.of_string "test --i /data/arg1 --o /data/arg2" in + let inputs = [ Datafile.v 0 (Fpath.v "/data/arg1") ] + and outputs = [ Datafile.v 1 (Fpath.v "/data/arg2") ] in + let leaf = Leaf.v 42 (Option.get command) Leaf.Command inputs outputs in - Alcotest.(check int) "Check id" 42 (Shark.Leaf.id leaf) + Alcotest.(check int) "Check id" 42 (Leaf.id leaf) let test_leaf_command_sub_empty () = - let command = Shark.Command.of_string "test --i /data/arg1 --o /data/arg2" in - let inputs = [ Shark.Datafile.v 0 (Fpath.v "/data/arg1") ] - and outputs = [ Shark.Datafile.v 1 (Fpath.v "/data/arg2") ] in - let leaf = - Shark.Leaf.v 42 (Option.get command) Shark.Leaf.Command inputs outputs - in + let command = Command.of_string "test --i /data/arg1 --o /data/arg2" in + let inputs = [ Datafile.v 0 (Fpath.v "/data/arg1") ] + and outputs = [ Datafile.v 1 (Fpath.v "/data/arg2") ] in + let leaf = Leaf.v 42 (Option.get command) Leaf.Command inputs outputs in - let test = Shark.Leaf.to_string_for_inputs leaf [] in + let test = Leaf.to_string_for_inputs leaf [] in let expected = [ "test --i /data/arg1 --o /data/arg2" ] in Alcotest.(check (list string)) "Simple sub" expected test let test_leaf_command_sub_simple () = - let command = Shark.Command.of_string "test --i /data/arg1 --o /data/arg2" in - let inputs = [ Shark.Datafile.v 0 (Fpath.v "/data/arg1") ] - and outputs = [ Shark.Datafile.v 1 (Fpath.v "/data/arg2") ] in - let leaf = - Shark.Leaf.v 42 (Option.get command) Shark.Leaf.Command inputs outputs - in + let command = Command.of_string "test --i /data/arg1 --o /data/arg2" in + let inputs = [ Datafile.v 0 (Fpath.v "/data/arg1") ] + and outputs = [ Datafile.v 1 (Fpath.v "/data/arg2") ] in + let leaf = Leaf.v 42 (Option.get command) Leaf.Command inputs outputs in let sublist = [ ("/data/arg1", [ "/some/path/1" ]); ("/data/arg2", [ "/some/path/2" ]) ] in - let test = Shark.Leaf.to_string_for_inputs leaf sublist in + let test = Leaf.to_string_for_inputs leaf sublist in let expected = [ "test --i /some/path/1 --o /some/path/2" ] in Alcotest.(check (list string)) "Simple sub" expected test let test_leaf_sub_simplewildcard () = - let command = - Shark.Command.of_string "test --i /data/arg1/* --o /data/arg2" - in - let inputs = [ Shark.Datafile.v 0 (Fpath.v "/data/arg1/*") ] - and outputs = [ Shark.Datafile.v 1 (Fpath.v "/data/arg2") ] in - let leaf = - Shark.Leaf.v 42 (Option.get command) Shark.Leaf.Command inputs outputs - in + let command = Command.of_string "test --i /data/arg1/* --o /data/arg2" in + let inputs = [ Datafile.v 0 (Fpath.v "/data/arg1/*") ] + and outputs = [ Datafile.v 1 (Fpath.v "/data/arg2") ] in + let leaf = Leaf.v 42 (Option.get command) Leaf.Command inputs outputs in let sublist = [ ("/data/arg1/", [ "/some/path/1" ]); ("/data/arg2", [ "/some/path/2" ]) ] in - let test = Shark.Leaf.to_string_for_inputs leaf sublist in + let test = Leaf.to_string_for_inputs leaf sublist in let expected = [ "test --i /some/path/1 --o /some/path/2" ] in Alcotest.(check (list string)) "Simple sub" expected test let test_leaf_command_sub_one_multi () = - let command = Shark.Command.of_string "test --i /data/arg1 --o /data/arg2" in - let inputs = [ Shark.Datafile.v 0 (Fpath.v "/data/arg1") ] - and outputs = [ Shark.Datafile.v 1 (Fpath.v "/data/arg2") ] in - let leaf = - Shark.Leaf.v 42 (Option.get command) Shark.Leaf.Command inputs outputs - in + let command = Command.of_string "test --i /data/arg1 --o /data/arg2" in + let inputs = [ Datafile.v 0 (Fpath.v "/data/arg1") ] + and outputs = [ Datafile.v 1 (Fpath.v "/data/arg2") ] in + let leaf = Leaf.v 42 (Option.get command) Leaf.Command inputs outputs in let sublist = [ @@ -72,7 +62,7 @@ let test_leaf_command_sub_one_multi () = ] in - let test = Shark.Leaf.to_string_for_inputs leaf sublist in + let test = Leaf.to_string_for_inputs leaf sublist in let expected = [ "test --i /some/path/1 --o /some/path/2"; @@ -83,12 +73,10 @@ let test_leaf_command_sub_one_multi () = Alcotest.(check (list string)) "Simple sub" expected test let test_leaf_command_sub_multi_multi () = - let command = Shark.Command.of_string "test --i /data/arg1 --o /data/arg2" in - let inputs = [ Shark.Datafile.v 0 (Fpath.v "/data/arg1") ] - and outputs = [ Shark.Datafile.v 1 (Fpath.v "/data/arg2") ] in - let leaf = - Shark.Leaf.v 42 (Option.get command) Shark.Leaf.Command inputs outputs - in + let command = Command.of_string "test --i /data/arg1 --o /data/arg2" in + let inputs = [ Datafile.v 0 (Fpath.v "/data/arg1") ] + and outputs = [ Datafile.v 1 (Fpath.v "/data/arg2") ] in + let leaf = Leaf.v 42 (Option.get command) Leaf.Command inputs outputs in let sublist = [ @@ -97,7 +85,7 @@ let test_leaf_command_sub_multi_multi () = ] in - let test = Shark.Leaf.to_string_for_inputs leaf sublist in + let test = Leaf.to_string_for_inputs leaf sublist in let expected = [ "test --i /some/path/1 --o /some/path/2"; diff --git a/src/test/run_block.ml b/src/test/run_block.ml index ebb125ce1..f6b525488 100644 --- a/src/test/run_block.ml +++ b/src/test/run_block.ml @@ -1,3 +1,5 @@ +open Shark_ast + let test_initial_block () = let es = Shark.Run_block.ExecutionState.init ~build_hash:"id" ~workdir:"/some/path" @@ -17,11 +19,9 @@ let null_runner _es _l _m _s _b = Ok "null runner" let test_simple_change_dir () = let raw_command = "cd /data/arg1" in - let command = Shark.Command.of_string raw_command in - let inputs = [ Shark.Datafile.v 0 (Fpath.v "/data/arg1") ] in - let command_leaf = - Shark.Leaf.v 42 (Option.get command) Shark.Leaf.Command inputs [] - in + let command = Command.of_string raw_command in + let inputs = [ Datafile.v 0 (Fpath.v "/data/arg1") ] in + let command_leaf = Leaf.v 42 (Option.get command) Leaf.Command inputs [] in let es = Shark.Run_block.ExecutionState.init ~build_hash:"id" ~workdir:"/some/path" @@ -48,10 +48,8 @@ let test_simple_change_dir () = let test_simple_env_udpate () = let raw_command = "export SOMEKEY=SOMEVALUE" in - let command = Shark.Command.of_string raw_command in - let command_leaf = - Shark.Leaf.v 42 (Option.get command) Shark.Leaf.Command [] [] - in + let command = Command.of_string raw_command in + let command_leaf = Leaf.v 42 (Option.get command) Leaf.Command [] [] in let es = Shark.Run_block.ExecutionState.init ~build_hash:"id" ~workdir:"/some/path" @@ -79,10 +77,8 @@ let test_simple_env_udpate () = let test_override_env_udpate () = let raw_command = "export SOMEKEY=SOMEVALUE" in - let command = Shark.Command.of_string raw_command in - let command_leaf = - Shark.Leaf.v 42 (Option.get command) Shark.Leaf.Command [] [] - in + let command = Command.of_string raw_command in + let command_leaf = Leaf.v 42 (Option.get command) Leaf.Command [] [] in let es = Shark.Run_block.ExecutionState.init ~build_hash:"id" ~workdir:"/some/path" @@ -112,10 +108,8 @@ let test_override_env_udpate () = let test_simple_command_execute () = let raw_command = "mycommand.exe" in - let command = Shark.Command.of_string raw_command in - let command_leaf = - Shark.Leaf.v 42 (Option.get command) Shark.Leaf.Command [] [] - in + let command = Command.of_string raw_command in + let command_leaf = Leaf.v 42 (Option.get command) Leaf.Command [] [] in let es = Shark.Run_block.ExecutionState.init ~build_hash:"id" ~workdir:"/some/path" @@ -149,10 +143,8 @@ let test_simple_command_execute () = let test_simple_failed_command_execute () = let raw_command = "mycommand.exe" in - let command = Shark.Command.of_string raw_command in - let command_leaf = - Shark.Leaf.v 42 (Option.get command) Shark.Leaf.Command [] [] - in + let command = Command.of_string raw_command in + let command_leaf = Leaf.v 42 (Option.get command) Leaf.Command [] [] in let es = Shark.Run_block.ExecutionState.init ~build_hash:"id" ~workdir:"/some/path"