From 68554e4b85528af9f3b33a15a654258750dbd159 Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Tue, 10 Dec 2024 14:53:31 -0500 Subject: [PATCH] Enhance `air format` (#86) * Add `Parse` utility struct * Style changes in latest version * Add multi-file and directory support, add `--check` * Wrap `ParseDiagnostic` in our `ParseError` * Clean up a bit with thiserror! * Add `relativize_path()` * Add a comment about tracing * Remove `air_workspace` for now for simplicity LSP needs `format_node()` since it holds onto the AST, so this isn't as useful yet over there * Add `is_changed()` for ergonomics * Remove outdated comment * Use `u8::MAX` as our catch all error code For the "something has gone wrong" unexpected error case * Various tweaks from code review with @lionel- * Add a TODO to use user supplied line endings in the LSP * Add a TODO about not normalizing line endings in the cli * air_fs -> fs * Ah clippy --- Cargo.lock | 124 ++++++--- Cargo.toml | 4 + crates/air/Cargo.toml | 10 +- crates/air/src/args.rs | 13 +- crates/air/src/commands/format.rs | 318 ++++++++++++++++++++--- crates/air/src/status.rs | 2 +- crates/air_r_formatter/src/lib.rs | 15 +- crates/air_r_formatter/tests/language.rs | 2 +- crates/air_r_parser/Cargo.toml | 1 + crates/air_r_parser/src/error.rs | 35 +++ crates/air_r_parser/src/lib.rs | 3 + crates/air_r_parser/src/parse.rs | 141 ++++++++-- crates/air_r_parser/tests/spec_test.rs | 20 +- crates/fs/Cargo.toml | 23 ++ crates/fs/src/lib.rs | 40 +++ crates/lsp/src/documents.rs | 8 +- 16 files changed, 660 insertions(+), 99 deletions(-) create mode 100644 crates/air_r_parser/src/error.rs create mode 100644 crates/fs/Cargo.toml create mode 100644 crates/fs/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 6d586ac4..8e4a7fc7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -33,12 +33,20 @@ dependencies = [ "air_r_formatter", "air_r_parser", "anyhow", + "biome_console", + "biome_diagnostics", "biome_formatter", + "biome_parser", "clap", + "fs", + "ignore", + "itertools", "line_ending", "lsp", "tempfile", + "thiserror 2.0.5", "tokio", + "tracing", ] [[package]] @@ -179,7 +187,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -190,7 +198,7 @@ checksum = "3c87f3f15e7794432337fc718554eaa4dc8f04c9677a950ffe366f20a162ae42" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -470,7 +478,7 @@ checksum = "cf95d9c7e6aba67f8fc07761091e93254677f4db9e27197adecebc7039a58722" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -533,6 +541,7 @@ dependencies = [ "anstyle", "clap_lex", "strsim", + "terminal_size", ] [[package]] @@ -544,7 +553,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -677,7 +686,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -733,7 +742,7 @@ checksum = "de0d48a183585823424a4ce1aa132d174a6a81bd540895822eb4c8373a8e49e8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -777,6 +786,13 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs" +version = "0.0.0" +dependencies = [ + "path-absolutize", +] + [[package]] name = "futures" version = "0.3.31" @@ -833,7 +849,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -1065,7 +1081,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -1375,7 +1391,7 @@ dependencies = [ "serde", "serde_json", "simdutf8", - "thiserror", + "thiserror 1.0.65", "tracing", ] @@ -1402,6 +1418,24 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "path-absolutize" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4af381fe79fa195b4909485d99f73a80792331df0625188e707854f0b3383f5" +dependencies = [ + "path-dedot", +] + +[[package]] +name = "path-dedot" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07ba0ad7e047712414213ff67533e6dd477af0a4e1d14fb52343e53d30ea9397" +dependencies = [ + "once_cell", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -1425,7 +1459,7 @@ checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -1472,9 +1506,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -1642,7 +1676,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -1668,7 +1702,7 @@ checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -1679,7 +1713,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -1714,7 +1748,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -1818,7 +1852,7 @@ checksum = "a2dbf8b57f3ce20e4bb171a11822b283bdfab6c4bb0fe64fa729f045f23a0938" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -1840,9 +1874,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.85" +version = "2.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" dependencies = [ "proc-macro2", "quote", @@ -1857,7 +1891,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -1882,6 +1916,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5352447f921fda68cf61b4101566c0bdb5104eff6804d0678e5227580ab6a4e9" +dependencies = [ + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "tests_macros" version = "0.0.0" @@ -1892,7 +1936,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -1901,7 +1945,16 @@ version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.65", +] + +[[package]] +name = "thiserror" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643caef17e3128658ff44d85923ef2d28af81bb71e0d67bbfe1d76f19a73e053" +dependencies = [ + "thiserror-impl 2.0.5", ] [[package]] @@ -1912,7 +1965,18 @@ checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "995d0bbc9995d1f19d28b7215a9352b0fc3cd3a2d2ec95c2cadc485cdedbcdde" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", ] [[package]] @@ -1951,7 +2015,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -2040,7 +2104,7 @@ checksum = "84fd902d4e0b9a4b27f2f440108dc034e1758628a9b702f8ec61ad66355422fa" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -2050,7 +2114,7 @@ source = "git+https://github.com/lionel-/tower-lsp?branch=bugfix%2Fpatches#49ef5 dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -2078,7 +2142,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] [[package]] @@ -2401,7 +2465,7 @@ checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", "synstructure", ] @@ -2422,7 +2486,7 @@ checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", "synstructure", ] @@ -2451,5 +2515,5 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.85", + "syn 2.0.90", ] diff --git a/Cargo.toml b/Cargo.toml index bd79472d..ee989634 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ air_r_formatter = { path = "./crates/air_r_formatter" } air_r_parser = { path = "./crates/air_r_parser" } air_r_syntax = { path = "./crates/air_r_syntax" } biome_ungrammar = { path = "./crates/biome_ungrammar" } +fs = { path = "./crates/fs" } line_ending = { path = "./crates/line_ending" } lsp = { path = "./crates/lsp" } lsp_test = { path = "./crates/lsp_test" } @@ -50,15 +51,18 @@ dissimilar = "1.0.9" futures = "0.3.31" futures-util = "0.3.31" httparse = "1.9.5" +ignore = "0.4.23" insta = "1.40.0" itertools = "0.13.0" line-index = "0.1.2" log = "0.4.22" memchr = "2.7.4" +path-absolutize = "3.1.1" proc-macro2 = "1.0.86" serde = { version = "1.0.215", features = ["derive"] } serde_json = "1.0.132" struct-field-names-as-array = "0.3.0" +thiserror = "2.0.5" tokio = { version = "1.41.1" } tokio-util = "0.7.12" # For https://github.com/ebkalderon/tower-lsp/pull/428 diff --git a/crates/air/Cargo.toml b/crates/air/Cargo.toml index 042f4c2e..4df75281 100644 --- a/crates/air/Cargo.toml +++ b/crates/air/Cargo.toml @@ -15,11 +15,19 @@ rust-version.workspace = true air_r_formatter = { workspace = true } air_r_parser = { workspace = true } anyhow = { workspace = true } +biome_console = { workspace = true } +biome_diagnostics = { workspace = true } biome_formatter = { workspace = true } -clap = { workspace = true } +biome_parser = { workspace = true } +clap = { workspace = true, features = ["wrap_help"] } +fs = { workspace = true } +ignore = { workspace = true } +itertools = { workspace = true } line_ending = { workspace = true } lsp = { workspace = true } +thiserror = { workspace = true } tokio = "1.41.1" +tracing = { workspace = true } [dev-dependencies] tempfile = "3.9.0" diff --git a/crates/air/src/args.rs b/crates/air/src/args.rs index c65c5831..0d8ef2a9 100644 --- a/crates/air/src/args.rs +++ b/crates/air/src/args.rs @@ -20,7 +20,7 @@ pub(crate) enum Command { /// Start a language server Lsp(LspCommand), - /// Format a file + /// Format a set of files or directories Format(FormatCommand), } @@ -28,7 +28,14 @@ pub(crate) enum Command { pub(crate) struct LspCommand {} #[derive(Clone, Debug, Parser)] +#[command(arg_required_else_help(true))] pub(crate) struct FormatCommand { - /// The file to format - pub file: PathBuf, + /// The files or directories to format + pub paths: Vec, + + /// If enabled, format results are not written back to the file. Instead, + /// exit with a non-zero status code if any files would have been modified, + /// and zero otherwise. + #[arg(long)] + pub check: bool, } diff --git a/crates/air/src/commands/format.rs b/crates/air/src/commands/format.rs index 165e09ae..46c127a6 100644 --- a/crates/air/src/commands/format.rs +++ b/crates/air/src/commands/format.rs @@ -1,64 +1,312 @@ +use std::fmt::Display; +use std::fmt::Formatter; +use std::io; +use std::io::stdout; +use std::io::Write; use std::path::PathBuf; use air_r_formatter::context::RFormatOptions; use air_r_parser::RParserOptions; +use fs::relativize_path; +use ignore::DirEntry; +use itertools::Either; +use itertools::Itertools; use line_ending::LineEnding; +use thiserror::Error; use crate::args::FormatCommand; use crate::ExitStatus; pub(crate) fn format(command: FormatCommand) -> anyhow::Result { - format_file(&command.file) + let mode = FormatMode::from_command(&command); + let paths = resolve_paths(&command.paths); + + let (actions, errors): (Vec<_>, Vec<_>) = paths + .into_iter() + .map(|path| match path { + Ok(path) => format_file(path, mode), + Err(err) => Err(err.into()), + }) + .partition_map(|result| match result { + Ok(result) => Either::Left(result), + Err(err) => Either::Right(err), + }); + + for error in &errors { + // TODO: Hook up a tracing subscriber! + tracing::error!("{error}"); + } + + match mode { + FormatMode::Write => {} + FormatMode::Check => { + write_changed(&actions, &mut stdout().lock())?; + } + } + + match mode { + FormatMode::Write => { + if errors.is_empty() { + Ok(ExitStatus::Success) + } else { + Ok(ExitStatus::Error) + } + } + FormatMode::Check => { + if errors.is_empty() { + let any_changed = actions.iter().any(FormatFileAction::is_changed); + + if any_changed { + Ok(ExitStatus::Failure) + } else { + Ok(ExitStatus::Success) + } + } else { + Ok(ExitStatus::Error) + } + } + } +} + +#[derive(Copy, Clone, Debug)] +pub enum FormatMode { + Write, + Check, +} + +impl FormatMode { + fn from_command(command: &FormatCommand) -> Self { + if command.check { + FormatMode::Check + } else { + FormatMode::Write + } + } } -// TODO: Should you exit after the first failure? Probably not, probably power through -// and elegantly report failures at the end? Since you've formatted stuff up to -// that point already anyways. -// TODO: Hook this up to a command -// TODO: Ignore anything but R files, of course -fn _format_dir(path: &PathBuf) -> anyhow::Result { - let iter = std::fs::read_dir(path)?; - - for file in iter { - let Ok(file) = file else { - continue; - }; - format_file(&file.path())?; +fn write_changed(actions: &[FormatFileAction], f: &mut impl Write) -> io::Result<()> { + for path in actions + .iter() + .filter_map(|result| match result { + FormatFileAction::Formatted(path) => Some(path), + FormatFileAction::Unchanged => None, + }) + .sorted_unstable() + { + writeln!(f, "Would reformat: {}", path.display())?; } - Ok(ExitStatus::Success) + Ok(()) } -fn format_file(path: &PathBuf) -> anyhow::Result { - let contents = std::fs::read_to_string(path)?; +fn resolve_paths(paths: &[PathBuf]) -> Vec> { + let paths: Vec = paths.iter().map(fs::normalize_path).collect(); - let line_ending = line_ending::infer(&contents); + let (first_path, paths) = paths + .split_first() + .expect("Clap should ensure at least 1 path is supplied."); - // Normalize to Unix line endings - let contents = match line_ending { - LineEnding::Lf => contents, - LineEnding::Crlf => line_ending::normalize(contents), - }; + // TODO: Parallel directory visitor + let mut builder = ignore::WalkBuilder::new(first_path); + + for path in paths { + builder.add(path); + } - let parser_options = RParserOptions::default(); - let parsed = air_r_parser::parse(contents.as_str(), parser_options); + let mut out = Vec::new(); - if parsed.has_errors() { - return Ok(ExitStatus::Error); + for path in builder.build() { + match path { + Ok(entry) => { + if let Some(path) = is_valid_path(entry) { + out.push(Ok(path)); + } + } + Err(err) => { + out.push(Err(err)); + } + } + } + + out +} + +// Decide whether or not to accept an `entry` based on include/exclude rules. +fn is_valid_path(entry: DirEntry) -> Option { + // Ignore directories + if entry.file_type().map_or(true, |ft| ft.is_dir()) { + return None; + } + + // Accept all files that are passed-in directly, even non-R files + if entry.depth() == 0 { + let path = entry.into_path(); + return Some(path); + } + + // Otherwise check if we should accept this entry + // TODO: Many other checks based on user exclude/includes + let path = entry.into_path(); + + if !fs::has_r_extension(&path) { + return None; } - // TODO: Respect user specified `LineEnding` option too, not just inferred line endings - let line_ending = match line_ending { + Some(path) +} + +pub(crate) enum FormatFileAction { + Formatted(PathBuf), + Unchanged, +} + +impl FormatFileAction { + fn is_changed(&self) -> bool { + matches!(self, FormatFileAction::Formatted(_)) + } +} + +// TODO: Take workspace `FormatOptions` that get resolved to `RFormatOptions` +// for the formatter here. Respect user specified `LineEnding` option too, and +// only use inferred endings when `FormatOptions::LineEnding::Auto` is used. +fn format_file(path: PathBuf, mode: FormatMode) -> Result { + let source = std::fs::read_to_string(&path) + .map_err(|err| FormatCommandError::Read(path.clone(), err))?; + + let line_ending = match line_ending::infer(&source) { LineEnding::Lf => biome_formatter::LineEnding::Lf, LineEnding::Crlf => biome_formatter::LineEnding::Crlf, }; + let options = RFormatOptions::default().with_line_ending(line_ending); + + let source = line_ending::normalize(source); + let formatted = match format_source(source.as_str(), options) { + Ok(formatted) => formatted, + Err(err) => return Err(FormatCommandError::Format(path.clone(), err)), + }; + + // TODO: We rarely ever take advantage of this optimization on Windows right + // now. We always normalize on entry but we apply the requested line ending + // on exit (so on Windows we often infer CRLF on entry and normalize to + // LF, but apply CRLF on exit so `source` and `new` always have different + // line endings). We probably need to compare pre-normalized against + // post-formatted output? + // + // Ah, no, the right thing to do is for the cli to not care about normalizing + // line endings. This is mostly useful in the LSP for all the document manipulation + // we are going to do there. In the cli, we want to format a whole bunch of files + // so we really want this optimization, and since the formatter and parser can handle + // windows line endings just fine, we should not normalize here. + match formatted { + FormattedSource::Formatted(new) => { + match mode { + FormatMode::Write => { + std::fs::write(&path, new) + .map_err(|err| FormatCommandError::Write(path.clone(), err))?; + } + FormatMode::Check => {} + } + Ok(FormatFileAction::Formatted(path)) + } + FormattedSource::Unchanged => Ok(FormatFileAction::Unchanged), + } +} - let formatter_options = RFormatOptions::default().with_line_ending(line_ending); - let formatted = air_r_formatter::format_node(formatter_options, &parsed.syntax())?; - let result = formatted.print()?; - let code = result.as_code(); +#[derive(Debug)] +pub(crate) enum FormattedSource { + /// The source was formatted, and the [`String`] contains the transformed source code. + Formatted(String), + /// The source was unchanged. + Unchanged, +} + +#[derive(Error, Debug)] +pub(crate) enum FormatSourceError { + #[error(transparent)] + Parse(#[from] air_r_parser::ParseError), + #[error(transparent)] + Format(#[from] biome_formatter::FormatError), + #[error(transparent)] + Print(#[from] biome_formatter::PrintError), +} + +/// Formats a vector of `source` code +/// +/// Safety: `source` should already be normalized to Unix line endings +pub(crate) fn format_source( + source: &str, + options: RFormatOptions, +) -> std::result::Result { + let parsed = air_r_parser::parse(source, RParserOptions::default()); + + if parsed.has_errors() { + let error = parsed.into_errors().into_iter().next().unwrap(); + return Err(error.into()); + } - std::fs::write(path, code)?; + let formatted = air_r_formatter::format_node(options, &parsed.syntax())?; + let formatted = formatted.print()?; + let formatted = formatted.into_code(); - Ok(ExitStatus::Success) + if source.len() == formatted.len() && source == formatted.as_str() { + Ok(FormattedSource::Unchanged) + } else { + Ok(FormattedSource::Formatted(formatted)) + } +} + +#[derive(Error, Debug)] +pub(crate) enum FormatCommandError { + Ignore(#[from] ignore::Error), + Format(PathBuf, FormatSourceError), + Read(PathBuf, io::Error), + Write(PathBuf, io::Error), +} + +impl Display for FormatCommandError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Ignore(err) => { + if let ignore::Error::WithPath { path, .. } = err { + write!( + f, + "Failed to format {path}: {err}", + path = relativize_path(path), + err = err + .io_error() + .map_or_else(|| err.to_string(), std::string::ToString::to_string) + ) + } else { + write!( + f, + "Encountered error: {err}", + err = err + .io_error() + .map_or_else(|| err.to_string(), std::string::ToString::to_string) + ) + } + } + Self::Read(path, err) => { + write!( + f, + "Failed to read {path}: {err}", + path = relativize_path(path), + ) + } + Self::Write(path, err) => { + write!( + f, + "Failed to write {path}: {err}", + path = relativize_path(path), + ) + } + Self::Format(path, err) => { + write!( + f, + "Failed to format {path}: {err}", + path = relativize_path(path), + ) + } + } + } } diff --git a/crates/air/src/status.rs b/crates/air/src/status.rs index 55109528..2645a223 100644 --- a/crates/air/src/status.rs +++ b/crates/air/src/status.rs @@ -15,7 +15,7 @@ impl From for ExitCode { match status { ExitStatus::Success => ExitCode::from(0), ExitStatus::Failure => ExitCode::from(1), - ExitStatus::Error => ExitCode::from(2), + ExitStatus::Error => ExitCode::from(u8::MAX), } } } diff --git a/crates/air_r_formatter/src/lib.rs b/crates/air_r_formatter/src/lib.rs index fe5724a0..93fff37f 100644 --- a/crates/air_r_formatter/src/lib.rs +++ b/crates/air_r_formatter/src/lib.rs @@ -44,7 +44,10 @@ impl AsFormat for &T where T: AsFormat, { - type Format<'a> = T::Format<'a> where Self: 'a; + type Format<'a> + = T::Format<'a> + where + Self: 'a; fn format(&self) -> Self::Format<'_> { AsFormat::format(&**self) @@ -58,7 +61,10 @@ impl AsFormat for biome_rowan::SyntaxResult where T: AsFormat, { - type Format<'a> = biome_rowan::SyntaxResult> where Self: 'a; + type Format<'a> + = biome_rowan::SyntaxResult> + where + Self: 'a; fn format(&self) -> Self::Format<'_> { match self { @@ -75,7 +81,10 @@ impl AsFormat for Option where T: AsFormat, { - type Format<'a> = Option> where Self: 'a; + type Format<'a> + = Option> + where + Self: 'a; fn format(&self) -> Self::Format<'_> { self.as_ref().map(|value| value.format()) diff --git a/crates/air_r_formatter/tests/language.rs b/crates/air_r_formatter/tests/language.rs index 27299772..d74c6b8b 100644 --- a/crates/air_r_formatter/tests/language.rs +++ b/crates/air_r_formatter/tests/language.rs @@ -16,7 +16,7 @@ impl TestFormatLanguage for RTestFormatLanguage { type FormatLanguage = RFormatLanguage; fn parse(&self, text: &str) -> AnyParse { - air_r_parser::parse(text, RParserOptions::default()) + air_r_parser::parse(text, RParserOptions::default()).into() } fn to_format_language(&self) -> Self::FormatLanguage { diff --git a/crates/air_r_parser/Cargo.toml b/crates/air_r_parser/Cargo.toml index e2c8f280..6c41d910 100644 --- a/crates/air_r_parser/Cargo.toml +++ b/crates/air_r_parser/Cargo.toml @@ -15,6 +15,7 @@ version = "0.0.0" [dependencies] air_r_factory = { workspace = true } air_r_syntax = { workspace = true } +biome_diagnostics = { workspace = true } biome_parser = { workspace = true } biome_rowan = { workspace = true } biome_unicode_table = { workspace = true } diff --git a/crates/air_r_parser/src/error.rs b/crates/air_r_parser/src/error.rs new file mode 100644 index 00000000..c15ee158 --- /dev/null +++ b/crates/air_r_parser/src/error.rs @@ -0,0 +1,35 @@ +use biome_diagnostics::Diagnostic; +use biome_parser::prelude::ParseDiagnostic; + +/// An error that occurs during parsing +/// +/// Simply wraps a `ParseDiagnostic`, mainly so we can implement +/// `std::error::Error` for it, which it oddly does not implement. +#[derive(Debug, Clone)] +pub struct ParseError { + inner: ParseDiagnostic, +} + +impl std::error::Error for ParseError {} + +impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + self.diagnostic().description(f) + } +} + +impl ParseError { + pub fn diagnostic(&self) -> &ParseDiagnostic { + &self.inner + } + + pub fn into_diagnostic(self) -> ParseDiagnostic { + self.inner + } +} + +impl From for ParseError { + fn from(diagnostic: ParseDiagnostic) -> Self { + Self { inner: diagnostic } + } +} diff --git a/crates/air_r_parser/src/lib.rs b/crates/air_r_parser/src/lib.rs index 09290c3b..cf3c4acf 100644 --- a/crates/air_r_parser/src/lib.rs +++ b/crates/air_r_parser/src/lib.rs @@ -1,3 +1,4 @@ +mod error; mod options; mod parse; @@ -5,9 +6,11 @@ mod parse; mod treesitter; use air_r_factory::RSyntaxFactory; +pub use error::ParseError; pub use options::RParserOptions; pub use parse::parse; pub use parse::parse_r_with_cache; +pub use parse::Parse; use air_r_syntax::RLanguage; use biome_parser::tree_sink::LosslessTreeSink; diff --git a/crates/air_r_parser/src/parse.rs b/crates/air_r_parser/src/parse.rs index 331e9140..df668c43 100644 --- a/crates/air_r_parser/src/parse.rs +++ b/crates/air_r_parser/src/parse.rs @@ -1,8 +1,14 @@ +use std::marker::PhantomData; + +use air_r_syntax::RLanguage; +use air_r_syntax::RRoot; use air_r_syntax::RSyntaxKind; +use air_r_syntax::RSyntaxNode; use biome_parser::event::Event; use biome_parser::prelude::ParseDiagnostic; use biome_parser::prelude::Trivia; use biome_parser::AnyParse; +use biome_rowan::AstNode; use biome_rowan::NodeCache; use biome_rowan::TextRange; use biome_rowan::TextSize; @@ -13,30 +19,133 @@ use tree_sitter::Tree; use crate::treesitter::NodeTypeExt; use crate::treesitter::Preorder; use crate::treesitter::WalkEvent; +use crate::ParseError; use crate::RLosslessTreeSink; use crate::RParserOptions; -// TODO(r): These should really return an intermediate `Parse` type which -// can `.into()` an `AnyParse`, see `biome_js_parser`'s `Parse` type -pub fn parse(text: &str, options: RParserOptions) -> AnyParse { +/// A utility struct for managing the result of a parser job +#[derive(Debug, Clone)] +pub struct Parse { + root: RSyntaxNode, + errors: Vec, + _ty: PhantomData, +} + +impl Parse { + pub fn new(root: RSyntaxNode, errors: Vec) -> Parse { + Parse { + root, + errors, + _ty: PhantomData, + } + } + + pub fn cast>(self) -> Option> { + if N::can_cast(self.syntax().kind()) { + Some(Parse::new(self.root, self.errors)) + } else { + None + } + } + + /// The syntax node represented by this Parse result + pub fn syntax(&self) -> RSyntaxNode { + self.root.clone() + } + + /// Get the errors which occurred when parsing + pub fn errors(&self) -> &[ParseError] { + self.errors.as_slice() + } + + /// Get the errors which occurred when parsing + pub fn into_errors(self) -> Vec { + self.errors + } + + /// Returns [true] if the parser encountered some errors during the parsing. + pub fn has_errors(&self) -> bool { + !self.errors.is_empty() + } +} + +impl> Parse { + /// Convert this parse result into a typed AST node. + /// + /// # Panics + /// Panics if the node represented by this parse result mismatches. + pub fn tree(&self) -> T { + self.try_tree().unwrap_or_else(|| { + panic!( + "Expected tree to be a {} but root is:\n{:#?}", + std::any::type_name::(), + self.syntax() + ) + }) + } + + /// Try to convert this parse's untyped syntax node into an AST node. + pub fn try_tree(&self) -> Option { + T::cast(self.syntax()) + } + + /// Convert this parse into a result + pub fn into_result(self) -> Result> { + if !self.has_errors() { + Ok(self.tree()) + } else { + Err(self.errors) + } + } +} + +impl From> for AnyParse { + fn from(parse: Parse) -> Self { + let root = parse.syntax(); + let errors = parse.into_errors(); + let diagnostics = errors + .into_iter() + .map(ParseError::into_diagnostic) + .collect(); + Self::new( + // SAFETY: the parser should always return a root node + root.as_send().unwrap(), + diagnostics, + ) + } +} + +pub fn parse(text: &str, options: RParserOptions) -> Parse { let mut cache = NodeCache::default(); parse_r_with_cache(text, options, &mut cache) } -pub fn parse_r_with_cache(text: &str, options: RParserOptions, cache: &mut NodeCache) -> AnyParse { +pub fn parse_r_with_cache( + text: &str, + options: RParserOptions, + cache: &mut NodeCache, +) -> Parse { tracing::debug_span!("parse").in_scope(move || { let (events, tokens, errors) = parse_text(text, options); + + // We've determined that passing diagnostics through does nothing. + // They go into the tree-sink but come right back out. We think they + // are a holdover from rust-analyzer that can be removed now. The real + // errors are in `errors`. + let _diagnostics = vec![]; + let mut tree_sink = RLosslessTreeSink::with_cache(text, &tokens, cache); - biome_parser::event::process(&mut tree_sink, events, errors); - let (green, parse_errors) = tree_sink.finish(); - AnyParse::new(green.as_send().unwrap(), parse_errors) + biome_parser::event::process(&mut tree_sink, events, _diagnostics); + let (green, _diagnostics) = tree_sink.finish(); + + Parse::new(green, errors) }) } pub fn parse_text( text: &str, _options: RParserOptions, -) -> (Vec>, Vec, Vec) { +) -> (Vec>, Vec, Vec) { let mut parser = tree_sitter::Parser::new(); parser .set_language(&tree_sitter_r::LANGUAGE.into()) @@ -54,7 +163,7 @@ pub fn parse_text( parse_tree(ast, text) } -fn parse_failure() -> (Vec>, Vec, Vec) { +fn parse_failure() -> (Vec>, Vec, Vec) { // Must provide a root node on failures, otherwise `tree_sink.finish()` fails let events = vec![ Event::Start { @@ -67,18 +176,16 @@ fn parse_failure() -> (Vec>, Vec, Vec = None; - let error = ParseDiagnostic::new("Tree-sitter failed", span); + let diagnostic = ParseDiagnostic::new("Failed to parse", span); + let error = ParseError::from(diagnostic); let errors = vec![error]; (events, trivia, errors) } -fn parse_tree( - ast: Tree, - text: &str, -) -> (Vec>, Vec, Vec) { +fn parse_tree(ast: Tree, text: &str) -> (Vec>, Vec, Vec) { let mut walker = RWalk::new(text); let root = ast.root_node(); @@ -958,7 +1065,7 @@ impl<'src> RWalk<'src> { struct RParse { events: Vec>, trivia: Vec, - errors: Vec, + errors: Vec, } impl RParse { @@ -993,7 +1100,7 @@ impl RParse { self.trivia.push(trivia); } - fn drain(self) -> (Vec>, Vec, Vec) { + fn drain(self) -> (Vec>, Vec, Vec) { (self.events, self.trivia, self.errors) } diff --git a/crates/air_r_parser/tests/spec_test.rs b/crates/air_r_parser/tests/spec_test.rs index 951e5471..d22860a9 100644 --- a/crates/air_r_parser/tests/spec_test.rs +++ b/crates/air_r_parser/tests/spec_test.rs @@ -1,10 +1,12 @@ +use air_r_parser::ParseError; use air_r_parser::{parse, RParserOptions}; -use air_r_syntax::{RLanguage, RRoot, RSyntaxNode}; +use air_r_syntax::{RRoot, RSyntaxNode}; use biome_console::fmt::{Formatter, Termcolor}; use biome_console::markup; use biome_diagnostics::display::PrintDiagnostic; use biome_diagnostics::termcolor; use biome_diagnostics::DiagnosticExt; +use biome_parser::prelude::ParseDiagnostic; use biome_rowan::SyntaxNode; use biome_rowan::SyntaxSlot; use biome_rowan::{AstNode, SyntaxKind}; @@ -75,12 +77,18 @@ pub fn run(test_case: &str, _snapshot_name: &str, test_directory: &str, outcome_ {:#?} ``` "#, - parsed.syntax::() + parsed.syntax() ) .unwrap(); - let diagnostics = parsed.diagnostics(); - if !diagnostics.is_empty() { + if parsed.has_errors() { + let diagnostics: Vec = parsed + .errors() + .iter() + .map(ParseError::diagnostic) + .map(ParseDiagnostic::clone) + .collect(); + let mut diagnostics_buffer = termcolor::Buffer::no_color(); let termcolor = &mut Termcolor(&mut diagnostics_buffer); @@ -124,13 +132,13 @@ pub fn run(test_case: &str, _snapshot_name: &str, test_directory: &str, outcome_ panic!("Parsed tree of a 'OK' test case should not contain any missing required children or bogus nodes: \n {formatted_ast:#?} \n\n {formatted_ast}"); } - let syntax = parsed.syntax::(); + let syntax = parsed.syntax(); if has_bogus_nodes_or_empty_slots(&syntax) { panic!("modified tree has bogus nodes or empty slots:\n{syntax:#?} \n\n {syntax}") } } ExpectedOutcome::Fail => { - if parsed.diagnostics().is_empty() { + if !parsed.has_errors() { panic!("Failing test must have diagnostics"); } } diff --git a/crates/fs/Cargo.toml b/crates/fs/Cargo.toml new file mode 100644 index 00000000..aa564ed0 --- /dev/null +++ b/crates/fs/Cargo.toml @@ -0,0 +1,23 @@ + +[package] +authors.workspace = true +categories.workspace = true +description = "Air filesystem utilities" +edition.workspace = true +homepage.workspace = true +keywords.workspace = true +license.workspace = true +name = "fs" +publish = false +repository.workspace = true +rust-version.workspace = true +version = "0.0.0" + +[dependencies] +path-absolutize = { workspace = true, features = [ + "once_cell_cache", + "use_unix_paths_on_wasm", +] } + +[lints] +workspace = true diff --git a/crates/fs/src/lib.rs b/crates/fs/src/lib.rs new file mode 100644 index 00000000..3467597a --- /dev/null +++ b/crates/fs/src/lib.rs @@ -0,0 +1,40 @@ +use path_absolutize::Absolutize; +use std::ffi::OsStr; +use std::path::Path; +use std::path::PathBuf; + +pub fn has_r_extension(path: &Path) -> bool { + path.extension() + .and_then(OsStr::to_str) + .map_or(false, is_r_extension) +} + +pub fn is_r_extension(extension: &str) -> bool { + matches!(extension, "r" | "R") +} + +/// Convert any path to an absolute path (based on the current working +/// directory). +pub fn normalize_path>(path: P) -> PathBuf { + let path = path.as_ref(); + if let Ok(path) = path.absolutize() { + path.to_path_buf() + } else { + path.to_path_buf() + } +} + +/// Convert an absolute path to be relative to the current working directory. +pub fn relativize_path>(path: P) -> String { + let path = path.as_ref(); + + #[cfg(target_arch = "wasm32")] + let cwd = Path::new("."); + #[cfg(not(target_arch = "wasm32"))] + let cwd = path_absolutize::path_dedot::CWD.as_path(); + + if let Ok(path) = path.strip_prefix(cwd) { + return format!("{}", path.display()); + } + format!("{}", path.display()) +} diff --git a/crates/lsp/src/documents.rs b/crates/lsp/src/documents.rs index edb9a871..56ed960e 100644 --- a/crates/lsp/src/documents.rs +++ b/crates/lsp/src/documents.rs @@ -61,6 +61,10 @@ impl Document { LineEnding::Crlf => line_ending::normalize(contents), }; + // TODO: Handle user requested line ending preference here + // by potentially overwriting `endings` if the user didn't + // select `LineEndings::Auto`, and then pass that to `LineIndex`. + // Create line index to keep track of newline offsets let line_index = LineIndex { index: triomphe::Arc::new(line_index::LineIndex::new(&contents)), @@ -69,7 +73,7 @@ impl Document { }; // Parse document immediately for now - let parse = air_r_parser::parse(&contents, Default::default()); + let parse = air_r_parser::parse(&contents, Default::default()).into(); Self { contents, @@ -117,7 +121,7 @@ impl Document { ); // No incrementality for now - let parse = air_r_parser::parse(&contents, Default::default()); + let parse = air_r_parser::parse(&contents, Default::default()).into(); self.parse = parse; self.contents = contents;