From be7edb52d2bb87f81baf18f9f6befd0178ae6353 Mon Sep 17 00:00:00 2001 From: Thibaut Mattio Date: Mon, 8 May 2023 15:59:41 +0200 Subject: [PATCH] [WIP] Refactor comment parsing to use odoc-parser and cmarkit Omd 2.X doesn't support printing markdown, so OCaml LSP is blocked to Omd 1.X. This creates conflicts with codebases that have migrated to Omd 2.X (e.g. in ocamlorg). To avoid conflicts for users of Omd 2.X, we migrate to cmarkit, which has a smaller dependency profile and supports printing to markdown. --- dune-project | 4 +- flake.nix | 4 +- ocaml-lsp-server.opam | 4 +- ocaml-lsp-server/src/doc_to_md.ml | 353 ++++++++++++++++++------------ ocaml-lsp-server/src/dune | 4 +- 5 files changed, 220 insertions(+), 149 deletions(-) diff --git a/dune-project b/dune-project index 8420d9c41..8aedba4e3 100644 --- a/dune-project +++ b/dune-project @@ -56,11 +56,11 @@ possible and does not make any assumptions about IO. ordering dune-build-info spawn + cmarkit + odoc-parser (ppx_expect (and (>= v0.15.0) :with-test)) (ocamlformat (and :with-test (= 0.24.1))) (ocamlc-loc (and (>= 3.5.0) (< 3.7.0))) - (omd (and (>= 1.3.2) (< 2.0.0~alpha1))) - (octavius (>= 1.2.2)) (uutf (>= 1.0.2)) (pp (>= 1.1.2)) (csexp (>= 1.5)) diff --git a/flake.nix b/flake.nix index c22d616dc..a92dfbabc 100644 --- a/flake.nix +++ b/flake.nix @@ -76,8 +76,8 @@ duneVersion = "3"; buildInputs = with pkgs.ocamlPackages; [ ocamlc-loc - omd - octavius + odoc-parser + cmarkit dune-build-info re dune-rpc diff --git a/ocaml-lsp-server.opam b/ocaml-lsp-server.opam index 8ef8542f2..a7e1688b9 100644 --- a/ocaml-lsp-server.opam +++ b/ocaml-lsp-server.opam @@ -32,11 +32,11 @@ depends: [ "ordering" "dune-build-info" "spawn" + "cmarkit" + "odoc-parser" "ppx_expect" {>= "v0.15.0" & with-test} "ocamlformat" {with-test & = "0.24.1"} "ocamlc-loc" {>= "3.5.0" & < "3.7.0"} - "omd" {>= "1.3.2" & < "2.0.0~alpha1"} - "octavius" {>= "1.2.2"} "uutf" {>= "1.0.2"} "pp" {>= "1.1.2"} "csexp" {>= "1.5"} diff --git a/ocaml-lsp-server/src/doc_to_md.ml b/ocaml-lsp-server/src/doc_to_md.ml index 421636a34..b25a70145 100644 --- a/ocaml-lsp-server/src/doc_to_md.ml +++ b/ocaml-lsp-server/src/doc_to_md.ml @@ -1,150 +1,221 @@ open Import -module Oct = Octavius +open Cmarkit -let ocaml = "ocaml" - -let to_inline_code code = Omd.Code (ocaml, code) - -let to_code_block code = Omd.Code_block (ocaml, code) - -let space = Omd.Text " " - -let new_line = Omd.NL - -(* [put_in_between elem lst] inserts [elem] between all elements of [lst] *) -let put_in_between elem lst = - let rec loop acc = function - | [] -> acc - | [ hd ] -> hd :: acc - | hd :: tail -> loop (elem :: hd :: acc) tail - in - List.rev (loop [] lst) - -let heading_level level heading : Omd.element = - match level with - | 0 -> H1 heading - | 1 -> H2 heading - | 2 -> H3 heading - | 3 -> H4 heading - | 4 -> H5 heading - | _ -> H6 heading - -let style_markdown (kind : Oct.Types.style_kind) md : Omd.element list = - match kind with - | SK_bold -> [ Bold md ] - | SK_italic | SK_emphasize -> [ Emph md ] - | SK_center | SK_left | SK_right | SK_superscript | SK_subscript | SK_custom _ - -> - (* TODO: implement SK_{center, left, right, superscript, subscript, custom} - using html blocks *) - md - -let rec text_to_markdown doc = List.concat_map ~f:text_element_to_markdown doc +type t = + | Raw of string + | Markdown of string -and text_element_to_markdown (doc_elem : Oct.Types.text_element) = - match doc_elem with - | Raw text -> [ Omd.Text text ] - | Code code -> [ to_inline_code code ] - | PreCode code -> [ to_code_block code ] - | Verbatim text -> [ Raw text ] - | Style (kind, text) -> style_markdown kind @@ text_to_markdown text - | List l -> [ Ul (text_elements_to_markdown l) ] - | Enum l -> [ Ol (text_elements_to_markdown l) ] - | Newline -> [ new_line; new_line ] - | Title (i, _, content) -> - (* TODO: along with cross-references support, add support for labels *) - let heading = text_to_markdown content in - [ heading_level i heading ] - | Ref (RK_link, url, descr) -> - let descr = - Option.map descr ~f:text_to_markdown |> Option.value ~default:[] +let loc_to_meta _loc = Meta.none + +let style_inline ~meta (style : Odoc_parser.Ast.style) inline = + match style with + | `Bold -> Inline.Strong_emphasis (Inline.Emphasis.make inline, meta) + | `Italic -> Inline.Emphasis (Inline.Emphasis.make inline, meta) + | `Emphasis -> Inline.Emphasis (Inline.Emphasis.make inline, meta) + | `Superscript -> inline + | `Subscript -> inline + +let rec inline_element_to_inline + (inline : Odoc_parser.Ast.inline_element Odoc_parser.Loc.with_location) : + Inline.t = + match inline with + | Odoc_parser.Loc.{ value = `Space _; location } -> + let meta = loc_to_meta location in + Inline.Text (" ", meta) + | Odoc_parser.Loc.{ value = `Word w; location } -> + let meta = loc_to_meta location in + Inline.Text (w, meta) + | Odoc_parser.Loc.{ value = `Code_span c; location } -> + let meta = loc_to_meta location in + Inline.Code_span (Inline.Code_span.of_string c, meta) + | Odoc_parser.Loc.{ value = `Raw_markup (Some "html", text); location } -> + let meta = loc_to_meta location in + Inline.Raw_html (Block_line.tight_list_of_string text, meta) + | Odoc_parser.Loc.{ value = `Raw_markup (_, text); location } -> + (* Cmarkit doesn't have constructors for backend other than HTML for inline + raw markups, only for blocks. *) + let meta = loc_to_meta location in + Inline.Text (text, meta) + | Odoc_parser.Loc.{ value = `Styled (style, inlines); location } -> + let text = inline_element_list_to_inlines inlines in + let meta = loc_to_meta location in + style_inline ~meta style text + | Odoc_parser.Loc. + { value = `Reference (_kind, _ref, _inlines); location = _location } -> + (* TODO: add support for references *) + Inline.Break (Inline.Break.make `Hard, Meta.none) + | Odoc_parser.Loc.{ value = `Link (link, inlines); location } -> + let text = inline_element_list_to_inlines inlines in + let ref = + `Inline (Link_definition.make ~dest:(link, Meta.none) (), Meta.none) in - let empty_title = "" in - [ Url (url, descr, empty_title) ] - | Ref (_ref_kind, reference, descr) -> - (* TODO: add support for cross-references *) - Option.map ~f:text_to_markdown descr - |> Option.value ~default:[ to_inline_code reference ] - | Special_ref _ - | Target (_, _) (* TODO: add support for markdown-specific blocks *) -> [] - -and text_elements_to_markdown lst = List.map ~f:text_to_markdown lst - -let rec tags_to_markdown (tags : Oct.Types.tag list) = - List.map ~f:tag_to_markdown tags - |> put_in_between [ new_line; new_line ] - |> List.concat - -and tag_to_markdown tag : Omd.element list = - let format_tag tag = Omd.Bold [ Emph [ Text tag ] ] in - let plain_tag_to_md tag descr = [ format_tag tag; space; Text descr ] in - let tag_with_text_to_md tag text = - format_tag tag :: space :: text_to_markdown text - in - let marked_tag_to_md tag mark = - [ format_tag tag; space; to_inline_code mark ] - in - let marked_tag_with_text_to_md tag mark text = - format_tag tag :: space :: to_inline_code mark :: space - :: text_to_markdown text - in - let see_tag_to_md (see_ref, comment) = - let content = - match (see_ref, comment) with - | Oct.Types.See_url url, text -> - let empty_hover_title = "" in - let link_title = [ Omd.Text "link" ] in - Omd.Url (url, link_title, empty_hover_title) - :: space :: text_to_markdown text - | See_file name, text | See_doc name, text -> - let no_prog_lang = "" in - (* TODO: add support to reference files and documents *) - Code (no_prog_lang, name) :: space :: text_to_markdown text + let link = Inline.Link.make text ref in + let meta = loc_to_meta location in + Inline.Link (link, meta) + | Odoc_parser.Loc.{ value = `Math_span text; location } -> + let meta = loc_to_meta location in + Inline.Ext_math_span + ( Inline.Math_span.make + ~display:false + (Block_line.tight_list_of_string text) + , meta ) + +and inline_element_list_to_inlines inlines = + let inlines = List.map ~f:inline_element_to_inline inlines in + Inline.Inlines (inlines, Meta.none) + +let rec nestable_block_element_to_block + (nestable : + Odoc_parser.Ast.nestable_block_element Odoc_parser.Loc.with_location) = + match nestable with + | Odoc_parser.Loc.{ value = `Paragraph text; location } -> + let inline = inline_element_list_to_inlines text in + let paragraph = Block.Paragraph.make inline in + let meta = loc_to_meta location in + Block.Paragraph (paragraph, meta) + | Odoc_parser.Loc.{ value = `List (kind, style, xs); location } -> + let type' = + match kind with + | `Unordered -> `Unordered '*' + | `Ordered -> `Ordered (1, '*') + in + let tight = + match style with + | `Heavy -> false + | `Light -> true + in + let list_items = + List.map + ~f:(fun n -> + let block = nestable_block_element_list_to_block n in + (Block.List_item.make block, Meta.none)) + xs + in + let l = Block.List'.make ~tight type' list_items in + let meta = loc_to_meta location in + Block.List (l, meta) + | Odoc_parser.Loc.{ value = `Modules modules; location } -> + let type' = `Unordered '*' in + let tight = false in + let list_items = + List.map + ~f:(fun Odoc_parser.Loc.{ value = m; location } -> + let inline = Inline.Text (m, Meta.none) in + let paragraph = Block.Paragraph.make inline in + let block = Block.Paragraph (paragraph, Meta.none) in + let meta = loc_to_meta location in + let marker = Layout.string "!modules:" in + (Block.List_item.make ~marker block, meta)) + modules + in + let l = Block.List'.make ~tight type' list_items in + let meta = loc_to_meta location in + Block.List (l, meta) + | Odoc_parser.Loc. + { value = `Code_block (metadata, { value = code; location = _code_loc }) + ; location + } -> + let info_string = + match metadata with + | None -> None + | Some ({ value = lang; location = lang_log }, _env) -> + Some (lang, loc_to_meta lang_log) in - format_tag "@see" :: space :: content + let block_line = Block_line.list_of_string code in + let code_block = Block.Code_block.make ?info_string block_line in + let meta = loc_to_meta location in + Block.Code_block (code_block, meta) + | Odoc_parser.Loc.{ value = `Verbatim code; location } -> + let info_string = Some ("verb", Meta.none) in + let block_line = Block_line.list_of_string code in + let code_block = Block.Code_block.make ?info_string block_line in + let meta = loc_to_meta location in + Block.Code_block (code_block, meta) + | Odoc_parser.Loc.{ value = `Math_block code; location } -> + let block_line = Block_line.list_of_string code in + let code_block = Block.Code_block.make block_line in + let meta = loc_to_meta location in + Block.Ext_math_block (code_block, meta) + +and nestable_block_element_list_to_block nestables = + let blocks = List.map ~f:nestable_block_element_to_block nestables in + Block.Blocks (blocks, Meta.none) + +let tag_to_paragraph (tag : Odoc_parser.Ast.tag) = + let format_tag tag text = + Inline.Inlines + ( [ Inline.Strong_emphasis + (Inline.Emphasis.make (Inline.Text (tag, Meta.none)), Meta.none) + ; Inline.Text (text, Meta.none) + ] + , Meta.none ) in - match tag with - | Author a -> plain_tag_to_md "@author" a - | Canonical c -> plain_tag_to_md "@canonical" c - | Deprecated text -> tag_with_text_to_md "@deprecated" text - | Return_value text -> tag_with_text_to_md "@return" text - | Version v -> marked_tag_to_md "@version" v - | Since v -> marked_tag_to_md "@since" v - | Before (ver, text) -> ( - match text with - | [] -> marked_tag_to_md "@before" ver - | _ -> marked_tag_with_text_to_md "@before" ver text) - | Param (name, text) -> ( - match text with - | [] -> - tag_with_text_to_md "@param" [ Raw name ] (* in case `id` is missing) *) - | _ -> marked_tag_with_text_to_md "@param" name text) - | Raised_exception (exn, text) -> marked_tag_with_text_to_md "@raise" exn text - | See (r, s) -> see_tag_to_md (r, s) - | Custom (name, text) -> - [ Omd.Emph [ Text ("@" ^ name) ]; space ] @ text_to_markdown text - | Inline -> [ Emph [ Text "@inline" ] ] - -let comment_to_markdown (doc, tags) = - let text = text_to_markdown doc in - let tags = tags_to_markdown tags in - match tags with - | [] -> text - | non_empty_tags -> - let separation = Omd.[ NL; Hr ] in - text @ separation @ non_empty_tags - -type t = - | Raw of string - | Markdown of string + let tag, text = + (* TODO: add support for tags *) + match[@warning "-27"] tag with + | `Author s -> ("@author", s) + | `Deprecated text -> ("@deprecated", "") + | `Param (id, text) -> ("@param", "") + | `Raise (exc, text) -> ("@raise", "") + | `Return text -> ("@return", "") + | `See (`Url, url, text) -> ("@see", "") + | `See (`File, filename, text) -> ("@see", "") + | `See (`Document, document, text) -> ("@see", "") + | `Since s -> ("@since", "") + | `Before (version, text) -> ("@before", "") + | `Version s -> ("@version", "") + | `Canonical s -> ("@canonical", "") + | `Inline -> ("@inline", "") + | `Open -> ("@open", "") + | `Closed -> ("@closed", "") + in + let inline = format_tag tag text in + Block.Paragraph.make inline + +let rec block_element_to_block + (block_element : + Odoc_parser.Ast.block_element Odoc_parser.Loc.with_location) = + match block_element with + | Odoc_parser.Loc.{ value = `Heading (level, _, content); location } -> + let text = inline_element_list_to_inlines content in + let heading = Block.Heading.make ~level text in + let meta = loc_to_meta location in + Block.Heading (heading, meta) + | Odoc_parser.Loc.{ value = `Tag t; location } -> + let paragraph = tag_to_paragraph t in + let meta = loc_to_meta location in + Block.Paragraph (paragraph, meta) + | Odoc_parser.Loc. + { value = + ( `Paragraph _ + | `List _ + | `Modules _ + | `Code_block _ + | `Verbatim _ + | `Math_block _ ) + ; location = _ + } as nestable -> nestable_block_element_to_block nestable + +and block_element_list_to_block l = + let blocks = List.map ~f:block_element_to_block l in + Block.Blocks (blocks, Meta.none) let translate doc : t = - match Oct.parse (Lexing.from_string doc) with - | Error e -> - let msg = Oct.Errors.message e.error in + let location = Lexing.dummy_pos in + let v = Odoc_parser.parse_comment ~location ~text:doc in + match Odoc_parser.warnings v with + | [] -> + let ast = Odoc_parser.ast v in + let block = block_element_list_to_block ast in + let doc = Doc.make block in + let cmark = Cmarkit_commonmark.of_doc doc in + Markdown cmark + | warnings -> + let messages = + List.map + ~f:(fun warn -> ("msg", `String (Odoc_parser.Warning.to_string warn))) + warnings + in Log.log ~section:"debug" (fun () -> - Log.msg "invalid doc comments" [ ("msg", `String msg) ]); - Raw (Omd.to_markdown [ Raw doc ]) - | Ok doc -> - let doc = comment_to_markdown doc in - Markdown (Omd.to_markdown doc) + Log.msg "Invalid documentation comment" messages); + Raw doc diff --git a/ocaml-lsp-server/src/dune b/ocaml-lsp-server/src/dune index 6f0584538..c62a1507e 100644 --- a/ocaml-lsp-server/src/dune +++ b/ocaml-lsp-server/src/dune @@ -24,8 +24,8 @@ merlin-lib.ocaml_utils merlin-lib.utils merlin-lib.extend - octavius - omd + cmarkit + odoc-parser ppx_yojson_conv_lib re stdune