From 744761b2a5da26b1cd5f6aece2f4fbefd3a94800 Mon Sep 17 00:00:00 2001 From: Miles Johnson Date: Tue, 24 May 2022 17:42:53 -0700 Subject: [PATCH] new: Migrate to `wax` for better glob support. (#105) * Move wax to utils. * Move utils. * Remove glob walk. * Remove globset. * Fix deps. * More globset work. * Add file group docs. * Add file pattern docs. * Bump versions. * Test windows. * Better git handling. * Fix windows prefix strip. * Use standardize. * Move wax to utils. * Move utils. * Remove glob walk. * Remove globset. * Fix deps. * More globset work. * Add file group docs. * Add file pattern docs. * Bump versions. * Test windows. * Better git handling. * Fix windows prefix strip. * Use standardize. * Bubble errors. * Debug hash. * Keep drive around. * Revert "Keep drive around." This reverts commit 55c383894352fc2c0a37a4c05573b343bf9636be. * Use double star. * Update test. --- .yarn/versions/dee9bb70.yml | 2 + Cargo.lock | 5 +- ROADMAP.md | 82 ++++---- crates/cli/Cargo.toml | 1 - crates/cli/src/commands/init.rs | 36 +--- crates/cli/tests/run_test.rs | 7 + ...est__caching__creates_run_state_cache.snap | 38 ++++ crates/project/Cargo.toml | 2 - crates/project/src/errors.rs | 8 +- crates/project/src/file_group.rs | 42 ++-- crates/project/src/task.rs | 21 +- crates/project/src/token.rs | 5 +- crates/project/tests/project_test.rs | 17 +- crates/utils/Cargo.toml | 2 +- crates/utils/src/fs.rs | 15 -- crates/utils/src/glob.rs | 188 ++++++++++++++++++ crates/utils/src/lib.rs | 1 + crates/utils/src/path.rs | 97 --------- crates/utils/src/test.rs | 78 ++++---- .../workspace/src/actions/hashing/target.rs | 8 +- website/docs/concepts/file-group.mdx | 46 +++++ website/docs/concepts/file-pattern.mdx | 70 +++++++ website/docs/config/global-project.mdx | 21 +- website/docs/config/project.mdx | 51 +---- website/docs/create-task.mdx | 3 +- website/docs/install.mdx | 3 +- website/sidebars.js | 10 +- 27 files changed, 510 insertions(+), 349 deletions(-) create mode 100644 .yarn/versions/dee9bb70.yml create mode 100644 crates/cli/tests/snapshots/run_test__caching__creates_run_state_cache.snap create mode 100644 crates/utils/src/glob.rs create mode 100644 website/docs/concepts/file-pattern.mdx diff --git a/.yarn/versions/dee9bb70.yml b/.yarn/versions/dee9bb70.yml new file mode 100644 index 00000000000..0e04d04e9ee --- /dev/null +++ b/.yarn/versions/dee9bb70.yml @@ -0,0 +1,2 @@ +releases: + website: patch diff --git a/Cargo.lock b/Cargo.lock index 65aa7e9b6be..b77fa18742a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1345,7 +1345,6 @@ dependencies = [ "strum_macros", "tera", "tokio", - "wax", ] [[package]] @@ -1407,8 +1406,6 @@ name = "moon_project" version = "0.1.0" dependencies = [ "common-path", - "globset", - "globwalk", "insta", "itertools", "moon_config", @@ -1469,7 +1466,6 @@ dependencies = [ "chrono", "chrono-humanize", "dirs", - "globset", "json_comments", "lazy_static", "moon_error", @@ -1479,6 +1475,7 @@ dependencies = [ "serde", "serde_json", "tokio", + "wax", ] [[package]] diff --git a/ROADMAP.md b/ROADMAP.md index 39afa153d35..c80739ef8ce 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,3 +1,43 @@ +# Backlog + +## Tasks + +- [ ] Add `@cache` token + +## Action runner + +- [ ] Add a debug layer so that the node processes can be inspected +- [ ] Write output logs for every action + +## CLI + +- [ ] `run-many` +- [ ] `graph` + - [ ] Spin up an interactive website with full project/task data + +## Node.js + +- [ ] Add chrome profiling support to spawned processes +- [ ] Publish npm packages + +# 0.2.0 + +## Cache + +- [ ] hashing + - [ ] ignore hashes for files that are gitignored +- [ ] add docs on caching options + +## Tests + +- [ ] add code coverage reports in CI +- [ ] increase code coverage and add more integration tests + +## Targets + +- [ ] macos arm/m1 +- [ ] linux x64 (musl) + # 0.1.0 - [x] website @@ -92,44 +132,4 @@ - [x] use `stdin` for commands that take long arguments - [x] dont load `package.json`/`tsconfig.json` so much - [x] delete old hashes when the hash changes - - [x] include local file changes in hash - -# 0.2.0 - -## Cache - -- [ ] hashing - - [ ] ignore hashes for files that are gitignored -- [ ] add docs on caching options - -## Tests - -- [ ] add code coverage reports in CI -- [ ] increase code coverage and add more integration tests - -## Targets - -- [ ] macos arm/m1 -- [ ] linux x64 (musl) - -# Backlog - -## Tasks - -- [ ] Add `@cache` token - -## Action runner - -- [ ] Add a debug layer so that the node processes can be inspected -- [ ] Write output logs for every action - -## CLI - -- [ ] `run-many` -- [ ] `graph` - - [ ] Spin up an interactive website with full project/task data - -## Node.js - -- [ ] Add chrome profiling support to spawned processes -- [ ] Publish npm packages + - [x] include local file changes in hash \ No newline at end of file diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 45c96e7ae63..9a8c4330e92 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -31,7 +31,6 @@ strum = "0.24.0" strum_macros = "0.24.0" tera = "1.15.0" tokio = { version = "1.18.2", features = ["full"] } -wax = "0.4.0" [dev-dependencies] moon_cache = { path = "../cache" } diff --git a/crates/cli/src/commands/init.rs b/crates/cli/src/commands/init.rs index 5c9bac281b6..e3dc75b1f51 100644 --- a/crates/cli/src/commands/init.rs +++ b/crates/cli/src/commands/init.rs @@ -9,15 +9,13 @@ use moon_lang::is_using_package_manager; use moon_lang_node::{NODENV, NPM, NVMRC, PNPM, YARN}; use moon_logger::color; use moon_terminal::create_theme; -use moon_utils::fs; -use moon_utils::path; +use moon_utils::{fs, glob, path}; use std::collections::BTreeMap; use std::env; use std::fs::{read_to_string, OpenOptions}; use std::io::prelude::*; use std::path::{Path, PathBuf}; use tera::{Context, Tera}; -use wax::Glob; type AnyError = Box; @@ -150,33 +148,11 @@ fn inherit_projects_from_workspaces( workspaces: Vec, projects: &mut BTreeMap, ) -> Result<(), AnyError> { - for pattern in workspaces { - if path::is_glob(&pattern) { - let glob = Glob::new(&pattern).unwrap(); - - for entry in glob.walk(dest_dir, usize::MAX) { - let entry = match entry { - Ok(e) => e, - // Will crash if the dir doesnt exist - Err(_) => { - continue; - } - }; - - if entry.file_type().is_dir() { - let (id, source) = infer_project_name_and_source( - &entry - .path() - .strip_prefix(dest_dir) - .unwrap() - .to_string_lossy(), - ); - - projects.insert(id, source); - } - } - } else { - let (id, source) = infer_project_name_and_source(&pattern); + for path in glob::walk(dest_dir, &workspaces)? { + if path.is_dir() { + let (id, source) = infer_project_name_and_source( + &path.strip_prefix(dest_dir).unwrap().to_string_lossy(), + ); projects.insert(id, source); } diff --git a/crates/cli/tests/run_test.rs b/crates/cli/tests/run_test.rs index d50c7d9f368..9ec28982914 100644 --- a/crates/cli/tests/run_test.rs +++ b/crates/cli/tests/run_test.rs @@ -116,6 +116,13 @@ mod caching { .await .unwrap(); + assert_snapshot!(read_to_string( + fixture + .path() + .join(format!(".moon/cache/hashes/{}.json", state.item.hash)) + ) + .unwrap()); + assert_eq!(state.item.exit_code, 0); assert_eq!(state.item.stdout, "stdout"); assert_eq!(state.item.stderr, "stderr"); diff --git a/crates/cli/tests/snapshots/run_test__caching__creates_run_state_cache.snap b/crates/cli/tests/snapshots/run_test__caching__creates_run_state_cache.snap new file mode 100644 index 00000000000..797f222c136 --- /dev/null +++ b/crates/cli/tests/snapshots/run_test__caching__creates_run_state_cache.snap @@ -0,0 +1,38 @@ +--- +source: crates/cli/tests/run_test.rs +assertion_line: 119 +expression: "read_to_string(fixture.path().join(format!(\".moon/cache/hashes/{}.json\",\n state.item.hash))).unwrap()" +--- +{ + "command": "node", + "args": [ + "./standard.js" + ], + "deps": [], + "envVars": {}, + "inputHashes": { + "node/cjsFile.cjs": "91382f667258361b9397214d0aec54d8d576ae19", + "node/cwd.js": "47d4aa44cd6363251821ab9c59f4c68df455445c", + "node/envVars.js": "44b0fda6bce364122223fbfc67b65ecbc52aec91", + "node/envVarsMoon.js": "69357e53fddeb9270d92a076e7edab3ebf004957", + "node/exitCodeNonZero.js": "1cea9d1c8c3776318c20d54a86c1b492df443397", + "node/exitCodeZero.js": "887c40bee2815c60cf4efde58a52716e4c393a24", + "node/mjsFile.mjs": "a5b6817de5cfa541e430072598de3f42d83bd6cf", + "node/passthroughArgs.js": "83cf03c37aa75b33592735611860e32c6f338693", + "node/processExitNonZero.js": "2fe41b280bb08f250ca7bf2263ec0b17fa7010f1", + "node/processExitZero.js": "eb885b8c763ab8b5d4e1da46494d965078c98e8b", + "node/project.yml": "7b3a3626af11cdc005b7042f3d28171f965e4545", + "node/standard.js": "8d4f87187c238d808f0505268febb619f2978c47", + "node/throwError.js": "9c5b8d2297fe4a6c23e8c18f1efb6871bec6faf4", + "node/topLevelAwait.mjs": "50945a9865eddbd52f7e648743d627749623da22", + "node/unhandledPromise.js": "909d23eeb442f4daff733fe3b601a1a1613c1282" + }, + "nodeVersion": "16.0.0", + "packageDependencies": {}, + "packageDevDependencies": {}, + "packagePeerDependencies": {}, + "projectDeps": [], + "target": "node:standard", + "tsconfigCompilerOptions": {}, + "version": "1" +} diff --git a/crates/project/Cargo.toml b/crates/project/Cargo.toml index cbff3749ac7..7800d4308c7 100644 --- a/crates/project/Cargo.toml +++ b/crates/project/Cargo.toml @@ -9,8 +9,6 @@ moon_error = { path = "../error" } moon_logger = { path = "../logger" } moon_utils = { path = "../utils" } common-path = "1.0.0" -globset = "0.4.8" -globwalk = "0.8.1" itertools = "0.10.3" petgraph = "0.6.0" serde = { version = "1.0.137", features = ["derive"] } diff --git a/crates/project/src/errors.rs b/crates/project/src/errors.rs index 781478058e4..37257269520 100644 --- a/crates/project/src/errors.rs +++ b/crates/project/src/errors.rs @@ -1,5 +1,6 @@ use moon_config::{constants, ValidationErrors}; use moon_error::MoonError; +use moon_utils::glob::GlobError; use std::path::PathBuf; use thiserror::Error; @@ -35,13 +36,10 @@ pub enum ProjectError { UnconfiguredTask(String, String), #[error(transparent)] - Moon(#[from] MoonError), - - #[error(transparent)] - GlobWalk(#[from] globwalk::GlobError), + Glob(#[from] GlobError), #[error(transparent)] - GlobSet(#[from] globset::Error), + Moon(#[from] MoonError), #[error(transparent)] Target(#[from] TargetError), diff --git a/crates/project/src/file_group.rs b/crates/project/src/file_group.rs index f6c8f40f328..f97c1bdd74c 100644 --- a/crates/project/src/file_group.rs +++ b/crates/project/src/file_group.rs @@ -1,9 +1,8 @@ use crate::errors::{ProjectError, TokenError}; use common_path::common_path_all; -use globwalk::GlobWalkerBuilder; -use moon_utils::path::{expand_root_path, is_glob}; +use moon_utils::glob; +use moon_utils::path::expand_root_path; use serde::{Deserialize, Serialize}; -use std::fs; use std::path::{Path, PathBuf}; #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] @@ -56,7 +55,7 @@ impl FileGroup { let mut globs = vec![]; for file in &self.files { - if is_glob(file) { + if glob::is_glob(file) { globs.push(expand_root_path(file, workspace_root, project_root)); } } @@ -99,46 +98,35 @@ impl FileGroup { let mut list = vec![]; for file in &self.files { - if is_glob(file) { + if glob::is_glob(file) { let root = if file.starts_with('/') { workspace_root } else { project_root }; - let walker = GlobWalkerBuilder::from_patterns(&root, &[file]) - .follow_links(false) - .build()?; - - for entry in walker { - let entry_path = entry.unwrap(); // Handle error? + for path in glob::walk(root, &[file.clone()])? { let allowed = if is_dir { - entry_path.file_type().is_dir() + path.is_dir() } else { - entry_path.file_type().is_file() + path.is_file() }; if allowed { - list.push(entry_path.into_path()); + list.push(path); } } } else { - let file_path = expand_root_path(file, workspace_root, project_root); - - let allowed = match fs::metadata(&file_path) { - Ok(meta) => { - if is_dir { - meta.is_dir() - } else { - meta.is_file() - } - } - // Branch exists for logging - Err(_) => false, + let path = expand_root_path(file, workspace_root, project_root); + + let allowed = if is_dir { + path.is_dir() + } else { + path.is_file() }; if allowed { - list.push(file_path.to_owned()); + list.push(path.to_owned()); } } } diff --git a/crates/project/src/task.rs b/crates/project/src/task.rs index 0d43f7c0759..c37bad4f0ac 100644 --- a/crates/project/src/task.rs +++ b/crates/project/src/task.rs @@ -2,12 +2,11 @@ use crate::errors::{ProjectError, TargetError}; use crate::target::{Target, TargetProject}; use crate::token::TokenResolver; use crate::types::{EnvVars, ExpandedFiles, TouchedFilePaths}; -use globset::{Glob, GlobSet, GlobSetBuilder}; use moon_config::{ FilePath, FilePathOrGlob, TargetID, TaskConfig, TaskMergeStrategy, TaskOptionsConfig, TaskType, }; use moon_logger::{color, debug, trace}; -use moon_utils::{fs, path, string_vec}; +use moon_utils::{glob, path, string_vec}; use serde::{Deserialize, Serialize}; use std::collections::HashSet; @@ -138,14 +137,8 @@ impl Task { } /// Create a globset of all input globs to match with. - pub fn create_globset(&self) -> Result { - let mut glob_builder = GlobSetBuilder::new(); - - for glob in &self.input_globs { - glob_builder.add(Glob::new(glob)?); - } - - Ok(glob_builder.build()?) + pub fn create_globset(&self) -> Result { + Ok(glob::GlobSet::new(&self.input_globs)?) } /// Expand the args list to resolve tokens, relative to the project root. @@ -267,8 +260,8 @@ impl Task { for input in &token_resolver.resolve(&self.inputs, None)? { // We cant canonicalize here as these inputs may not exist! - if path::is_path_glob(input) { - self.input_globs.push(path::normalize_glob(input)?); + if glob::is_path_glob(input) { + self.input_globs.push(glob::normalize(input)?); } else { self.input_paths.insert(path::normalize(input)); } @@ -290,7 +283,7 @@ impl Task { ); for output in &token_resolver.resolve(&self.outputs, None)? { - if path::is_path_glob(output) { + if glob::is_path_glob(output) { return Err(ProjectError::NoOutputGlob( output.to_owned(), self.target.clone(), @@ -335,7 +328,7 @@ impl Task { let mut affected = self.input_paths.contains(file); if !affected && has_globs { - affected = fs::matches_globset(&globset, file)?; + affected = globset.matches(file)?; } trace!( diff --git a/crates/project/src/token.rs b/crates/project/src/token.rs index dfa01c418f3..fbb483f386d 100644 --- a/crates/project/src/token.rs +++ b/crates/project/src/token.rs @@ -3,7 +3,8 @@ use crate::file_group::FileGroup; use crate::target::Target; use crate::task::Task; use moon_logger::{color, trace, warn}; -use moon_utils::path::{expand_root_path, is_glob}; +use moon_utils::glob; +use moon_utils::path::expand_root_path; use moon_utils::regex::{ matches_token_func, matches_token_var, TOKEN_FUNC_ANYWHERE_PATTERN, TOKEN_FUNC_PATTERN, TOKEN_VAR_PATTERN, @@ -358,7 +359,7 @@ impl<'a> TokenResolver<'a> { } }; - if is_glob(input) { + if glob::is_glob(input) { match task.input_globs.iter().find(|g| g.ends_with(input)) { Some(g) => { results.push(PathBuf::from(g)); diff --git a/crates/project/tests/project_test.rs b/crates/project/tests/project_test.rs index bc2d83d4e16..2966bc18945 100644 --- a/crates/project/tests/project_test.rs +++ b/crates/project/tests/project_test.rs @@ -217,7 +217,7 @@ mod tasks { use moon_project::test::{ create_expanded_task as create_expanded_task_internal, create_file_groups_config, }; - use moon_utils::path; + use moon_utils::glob; use moon_utils::test::wrap_glob; use pretty_assertions::assert_eq; @@ -311,7 +311,7 @@ mod tasks { // Expanded task.input_globs - .push(path::normalize_glob(&workspace_root.join("tasks/no-tasks/**/*")).unwrap()); + .push(glob::normalize(&workspace_root.join("tasks/no-tasks/**/*")).unwrap()); assert_eq!( project, @@ -367,15 +367,10 @@ mod tasks { // Expanded let wild_glob = workspace_root.join("tasks/basic/**/*"); - build - .input_globs - .push(path::normalize_glob(&wild_glob).unwrap()); - std.input_globs - .push(path::normalize_glob(&wild_glob).unwrap()); - test.input_globs - .push(path::normalize_glob(&wild_glob).unwrap()); - lint.input_globs - .push(path::normalize_glob(&wild_glob).unwrap()); + build.input_globs.push(glob::normalize(&wild_glob).unwrap()); + std.input_globs.push(glob::normalize(&wild_glob).unwrap()); + test.input_globs.push(glob::normalize(&wild_glob).unwrap()); + lint.input_globs.push(glob::normalize(&wild_glob).unwrap()); assert_eq!( project, diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 166bcba9894..617cbac4f43 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -13,7 +13,6 @@ cached = "0.34.0" chrono = "0.4.19" chrono-humanize = "0.2.1" dirs = "4.0.0" -globset = "0.4.8" json_comments = "0.2.1" lazy_static = "1.4.0" path-clean = "0.1.0" @@ -21,4 +20,5 @@ regex = "1.5.6" serde = "1.0.137" serde_json = { version = "1.0.81", features = ["preserve_order"] } tokio = { version = "1.18.2", features = ["full"] } +wax = "0.4.0" diff --git a/crates/utils/src/fs.rs b/crates/utils/src/fs.rs index a499e5f0d18..88fb2ce91a8 100644 --- a/crates/utils/src/fs.rs +++ b/crates/utils/src/fs.rs @@ -1,5 +1,4 @@ use async_recursion::async_recursion; -use globset::GlobSet; use json_comments::StripComments; use moon_error::{map_io_to_fs_error, map_json_to_error, MoonError}; use regex::Regex; @@ -93,20 +92,6 @@ pub async fn link_dir(from_root: &Path, from: &Path, to_root: &Path) -> Result<( Ok(()) } -#[cfg(not(windows))] -pub fn matches_globset(globset: &GlobSet, path: &Path) -> Result { - Ok(globset.is_match(path)) -} - -// globset doesnt match against inputs that use backwards slashes -// https://github.com/BurntSushi/ripgrep/issues/2001 -#[cfg(windows)] -pub fn matches_globset(globset: &GlobSet, path: &Path) -> Result { - use crate::path::normalize_glob; - - Ok(globset.is_match(&PathBuf::from(normalize_glob(path)?))) -} - pub async fn metadata(path: &Path) -> Result { fs::metadata(path) .await diff --git a/crates/utils/src/glob.rs b/crates/utils/src/glob.rs new file mode 100644 index 00000000000..fd09b3adda2 --- /dev/null +++ b/crates/utils/src/glob.rs @@ -0,0 +1,188 @@ +use crate::path::{path_to_string, standardize_separators}; +use lazy_static::lazy_static; +use moon_error::MoonError; +use regex::Regex; +use std::path::{Path, PathBuf}; +pub use wax::Glob; +use wax::{Any, GlobError as WaxGlobError, Pattern}; + +lazy_static! { + pub static ref WINDOWS_PREFIX: Regex = Regex::new(r"(//\?/)?[A-Z]:").unwrap(); +} + +pub type GlobError = WaxGlobError<'static>; + +pub struct GlobSet<'t> { + any: Any<'t>, +} + +impl<'t> GlobSet<'t> { + pub fn new(patterns: &'t [String]) -> Result { + let mut globs = vec![]; + + for pattern in patterns { + globs.push(Glob::new(pattern).map_err(WaxGlobError::into_owned)?); + } + + Ok(GlobSet { + any: wax::any::(globs).unwrap(), + }) + } + + pub fn matches(&self, path: &Path) -> Result { + Ok(self.any.is_match(path)) + } +} + +// This is not very exhaustive and may be inaccurate. +pub fn is_glob(value: &str) -> bool { + let single_values = vec!['*', '?', '!']; + let paired_values = vec![('{', '}'), ('[', ']')]; + let mut bytes = value.bytes(); + let mut is_escaped = |index: usize| { + if index == 0 { + return false; + } + + bytes.nth(index - 1).unwrap_or(b' ') == b'\\' + }; + + if value.contains("**") { + return true; + } + + for single in single_values { + if !value.contains(single) { + continue; + } + + if let Some(index) = value.find(single) { + if !is_escaped(index) { + return true; + } + } + } + + for (open, close) in paired_values { + if !value.contains(open) || !value.contains(close) { + continue; + } + + if let Some(index) = value.find(open) { + if !is_escaped(index) { + return true; + } + } + } + + false +} + +pub fn is_path_glob(path: &Path) -> bool { + is_glob(&path.to_string_lossy()) +} + +pub fn normalize(path: &Path) -> Result { + // Always use forward slashes for globs + let glob = standardize_separators(&path_to_string(path)?); + + // Remove UNC and drive prefix as it breaks glob matching + if cfg!(windows) { + return Ok(WINDOWS_PREFIX.replace_all(&glob, "**").to_string()); + } + + Ok(glob) +} + +/// Wax currently doesn't support negated globs (starts with !), +/// so we must extract them manually. +pub fn split_patterns(patterns: &[String]) -> (Vec, Vec) { + let mut expressions = vec![]; + let mut negations = vec![]; + + for pattern in patterns { + if pattern.starts_with('!') { + negations.push(pattern.strip_prefix('!').unwrap().to_owned()); + } else if pattern.starts_with('/') { + expressions.push(pattern.strip_prefix('/').unwrap().to_owned()); + } else { + expressions.push(pattern.clone()); + } + } + + (expressions, negations) +} + +pub fn walk(base_dir: &Path, patterns: &[String]) -> Result, GlobError> { + let (expressions, _negations) = split_patterns(patterns); + let mut paths = vec![]; + + for expression in expressions { + let glob = Glob::new(&expression).map_err(WaxGlobError::into_owned)?; + + for entry in glob.walk(base_dir, usize::MAX) + // .not(&negations) + { + match entry { + Ok(e) => paths.push(e.into_path()), + Err(_) => { + // Will crash if the file doesnt exist + continue; + } + }; + } + } + + Ok(paths) +} + +#[cfg(test)] +mod tests { + use super::*; + + mod is_glob { + use super::*; + + #[test] + fn returns_true_when_a_glob() { + assert!(is_glob("**")); + assert!(is_glob("**/src/*")); + assert!(is_glob("src/**")); + assert!(is_glob("*.ts")); + assert!(is_glob("file.*")); + assert!(is_glob("file.{js,ts}")); + assert!(is_glob("file.[jstx]")); + assert!(is_glob("file.tsx?")); + } + + #[test] + fn returns_false_when_not_glob() { + assert!(!is_glob("dir")); + assert!(!is_glob("file.rs")); + assert!(!is_glob("dir/file.ts")); + assert!(!is_glob("dir/dir/file_test.rs")); + assert!(!is_glob("dir/dirDir/file-ts.js")); + } + + #[test] + fn returns_false_when_escaped_glob() { + assert!(!is_glob("\\*.rs")); + assert!(!is_glob("file\\?.js")); + assert!(!is_glob("folder-\\[id\\]")); + } + } + + mod windows_prefix { + use super::*; + + #[test] + fn removes_unc_and_drive_prefix() { + assert_eq!( + WINDOWS_PREFIX + .replace_all("//?/D:/Projects/moon", "**") + .to_string(), + String::from("**/Projects/moon") + ); + } + } +} diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs index b7e81c578a6..18dee9600c3 100644 --- a/crates/utils/src/lib.rs +++ b/crates/utils/src/lib.rs @@ -1,4 +1,5 @@ pub mod fs; +pub mod glob; pub mod path; pub mod process; pub mod regex; diff --git a/crates/utils/src/path.rs b/crates/utils/src/path.rs index 79f2dc9fc51..20ddcdd8e34 100644 --- a/crates/utils/src/path.rs +++ b/crates/utils/src/path.rs @@ -12,70 +12,10 @@ pub fn expand_root_path(file: &str, workspace_root: &Path, project_root: &Path) } } -// This is not very exhaustive and may be inaccurate. -pub fn is_glob(value: &str) -> bool { - let single_values = vec!['*', '?', '1']; - let paired_values = vec![('{', '}'), ('[', ']')]; - let mut bytes = value.bytes(); - let mut is_escaped = |index: usize| { - if index == 0 { - return false; - } - - bytes.nth(index - 1).unwrap_or(b' ') == b'\\' - }; - - if value.contains("**") { - return true; - } - - for single in single_values { - if !value.contains(single) { - continue; - } - - if let Some(index) = value.find(single) { - if !is_escaped(index) { - return true; - } - } - } - - for (open, close) in paired_values { - if !value.contains(open) || !value.contains(close) { - continue; - } - - if let Some(index) = value.find(open) { - if !is_escaped(index) { - return true; - } - } - } - - false -} - -pub fn is_path_glob(path: &Path) -> bool { - is_glob(&path.to_string_lossy()) -} - pub fn normalize(path: &Path) -> PathBuf { path.to_path_buf().clean() } -pub fn normalize_glob(path: &Path) -> Result { - // Always use forward slashes for globs - let glob = standardize_separators(&path_to_string(path)?); - - // Remove UNC prefix as it breaks glob matching - if cfg!(windows) { - return Ok(glob.replace("//?/", "")); - } - - Ok(glob) -} - #[cfg(not(windows))] pub fn normalize_separators(path: &str) -> String { path.replace('\\', "/") @@ -109,40 +49,3 @@ pub fn replace_home_dir(value: &str) -> String { pub fn standardize_separators(path: &str) -> String { path.replace('\\', "/") } - -#[cfg(test)] -mod tests { - use super::*; - - mod is_glob { - use super::*; - - #[test] - fn returns_true_when_a_glob() { - assert!(is_glob("**")); - assert!(is_glob("**/src/*")); - assert!(is_glob("src/**")); - assert!(is_glob("*.ts")); - assert!(is_glob("file.*")); - assert!(is_glob("file.{js,ts}")); - assert!(is_glob("file.[jstx]")); - assert!(is_glob("file.tsx?")); - } - - #[test] - fn returns_false_when_not_glob() { - assert!(!is_glob("dir")); - assert!(!is_glob("file.rs")); - assert!(!is_glob("dir/file.ts")); - assert!(!is_glob("dir/dir/file_test.rs")); - assert!(!is_glob("dir/dirDir/file-ts.js")); - } - - #[test] - fn returns_false_when_escaped_glob() { - assert!(!is_glob("\\*.rs")); - assert!(!is_glob("file\\?.js")); - assert!(!is_glob("folder-\\[id\\]")); - } - } -} diff --git a/crates/utils/src/test.rs b/crates/utils/src/test.rs index 49552035320..a24e34a3b6d 100644 --- a/crates/utils/src/test.rs +++ b/crates/utils/src/test.rs @@ -1,55 +1,65 @@ +use crate::glob; use crate::path; use crate::process::output_to_string; use std::env; use std::path::{Path, PathBuf}; -use std::process::Command; +use std::process::{Command, Output}; + +fn handle_command(dir: &str, msg: &str, out: std::io::Result) { + let out = out.unwrap_or_else(|e| { + println!("{:#?}", e); + panic!("{}: {}", msg, dir); + }); + + if !out.status.success() { + eprintln!("{}", output_to_string(&out.stdout)); + eprintln!("{}", output_to_string(&out.stderr)); + } +} pub fn create_fixtures_sandbox(dir: &str) -> assert_fs::fixture::TempDir { use assert_fs::prelude::*; let temp_dir = assert_fs::fixture::TempDir::new().unwrap(); + let git_bin = if cfg!(windows) { "git.exe" } else { "git" }; temp_dir .copy_from(get_fixtures_dir(dir), &["**/*"]) .unwrap(); // Initialize a git repo so that VCS commands work - Command::new("git") - .args(["init", "--initial-branch", "master"]) - .current_dir(temp_dir.path()) - .output() - .unwrap_or_else(|_| panic!("Failed to initialize git for fixtures sandbox: {}", dir)); + handle_command( + dir, + "Failed to initialize git for fixtures sandbox", + Command::new(git_bin) + .args(["init", "--initial-branch", "master"]) + .current_dir(temp_dir.path()) + .output(), + ); // We must also add the files to the index - let out = Command::new("git") - .args(["add", "--all", "."]) - .current_dir(temp_dir.path()) - .output() - .unwrap_or_else(|_| { - panic!( - "Failed to add files to git index for fixtures sandbox: {}", - dir - ) - }); - - if !out.status.success() { - eprintln!("{}", output_to_string(&out.stderr)); - } + handle_command( + dir, + "Failed to add files to git index for fixtures sandbox", + Command::new(git_bin) + .args(["add", "--all", "."]) + .current_dir(temp_dir.path()) + .output(), + ); // And commit them... this seems like a lot of overhead? - let out = Command::new("git") - .args(["commit", "-m", "'Fixtures'"]) - .env("GIT_AUTHOR_NAME", "moon tests") - .env("GIT_AUTHOR_EMAIL", "fakeemail@moonrepo.dev") - .env("GIT_COMMITTER_NAME", "moon tests") - .env("GIT_COMMITTER_EMAIL", "fakeemail@moonrepo.dev") - .current_dir(temp_dir.path()) - .output() - .unwrap_or_else(|_| panic!("Failed to commit files for fixtures sandbox: {}", dir)); - - if !out.status.success() { - eprintln!("{}", output_to_string(&out.stderr)); - } + handle_command( + dir, + "Failed to commit files for fixtures sandbox", + Command::new(git_bin) + .args(["commit", "-m", "'Fixtures'"]) + .env("GIT_AUTHOR_NAME", "moon tests") + .env("GIT_AUTHOR_EMAIL", "fakeemail@moonrepo.dev") + .env("GIT_COMMITTER_NAME", "moon tests") + .env("GIT_COMMITTER_EMAIL", "fakeemail@moonrepo.dev") + .current_dir(temp_dir.path()) + .output(), + ); temp_dir } @@ -76,7 +86,7 @@ pub fn replace_fixtures_dir(value: &str, dir: &Path) -> String { // We need to do this so slashes are accurate and always forward pub fn wrap_glob(path: &Path) -> PathBuf { - PathBuf::from(path::normalize_glob(path).unwrap()) + PathBuf::from(glob::normalize(path).unwrap()) } pub fn create_moon_command(fixture: &str) -> assert_cmd::Command { diff --git a/crates/workspace/src/actions/hashing/target.rs b/crates/workspace/src/actions/hashing/target.rs index 220fc7be750..dbb4ff8d6f1 100644 --- a/crates/workspace/src/actions/hashing/target.rs +++ b/crates/workspace/src/actions/hashing/target.rs @@ -1,7 +1,6 @@ use crate::{Workspace, WorkspaceError}; use moon_cache::Hasher; use moon_project::{ExpandedFiles, Project, Task}; -use moon_utils::fs; use moon_utils::path::path_to_string; use std::path::Path; @@ -78,8 +77,7 @@ pub async fn create_target_hasher( let mut hashed_file_tree = vcs.get_file_tree_hashes(&project.source).await?; // Input globs are absolute paths, so we must do the same - hashed_file_tree - .retain(|k, _| fs::matches_globset(&globset, &workspace.root.join(k)).unwrap()); + hashed_file_tree.retain(|k, _| globset.matches(&workspace.root.join(k)).unwrap_or(false)); hasher.hash_inputs(hashed_file_tree); } @@ -94,9 +92,9 @@ pub async fn create_target_hasher( .all .into_iter() .filter(|f| { - // Delete files will crash `git hash-object` + // Deleted files will crash `git hash-object` !local_files.deleted.contains(f) - && fs::matches_globset(&globset, &workspace.root.join(f)).unwrap() + && globset.matches(&workspace.root.join(f)).unwrap_or(false) }) .collect::>(); diff --git a/website/docs/concepts/file-group.mdx b/website/docs/concepts/file-group.mdx index e69de29bb2d..26c8c6ac56d 100644 --- a/website/docs/concepts/file-group.mdx +++ b/website/docs/concepts/file-group.mdx @@ -0,0 +1,46 @@ +--- +title: File groups +--- + +File groups are a mechanism for grouping similar types of files within a project using +[file glob patterns or literal file paths](./file-pattern). These groups are then used by +[tasks](./task) to calculate functionality like cache computation, affected files since last change, +deterministic builds, and more. + +## Configuration + +File groups can be configured per project through [`project.yml`](../config/project), or for all +projects through [`.moon/project.yml`](../config/global-project). + +## Inheritance and merging + +When a file group of the same name exists in both [configuration files](#configuration), the +project-level group will override the workspace-level group, and all other workspace-level groups +will be inherited as-is. + +A primary scenario in which to define file groups at the project-level is when you want to +_override_ file groups defined at the workspace-level. For example, say we want to override the +`sources` file group because our source folder is named "lib" and not "src", we would define our +file groups as followed. + +```yaml title=".moon/project.yml" +fileGroups: + sources: + - 'src/**/*' + - 'types/**/*' + tests: + - 'tests/**/*.test.*' + - '**/__tests__/**/*' +``` + +```yaml title="project.yml" +fileGroups: + # Overrides global + sources: + - 'lib/**/*' + - 'types/**/*' + # Inherited as-is + tests: + - 'tests/**/*.test.*' + - '**/__tests__/**/*' +``` diff --git a/website/docs/concepts/file-pattern.mdx b/website/docs/concepts/file-pattern.mdx new file mode 100644 index 00000000000..fe63e90a9ab --- /dev/null +++ b/website/docs/concepts/file-pattern.mdx @@ -0,0 +1,70 @@ +--- +title: File patterns +--- + +## Globs + +Globs in moon are [Rust-based globs](https://github.com/olson-sean-k/wax), _not_ JavaScript-based. +This may result in different or unexpected results. The following guidelines must be met when using +globs: + +- Must use forward slashes (`/`) for path separators, even on Windows. +- Must _not_ start with or use any relative path parts, `.` or `..`. + +### Supported syntax + +- `*` - Matches zero or more characters, but does not match the `/` character. Will attempt to match + the longest possible text (eager). +- `$` - Like `*`, but will attempt to match the shortest possible text (lazy). +- `**` - Matches zero or more directories. +- `?` - Matches exactly one character, but not `/`. +- `[abc]` - Matches one case-sensitive character listed in the brackets. +- `[!xyz]` - Like the above, but will match any character _not_ listed. +- `[a-z]` - Matches one case-sensitive character in range in the brackets. +- `[!x-z]` - Like the above, but will match any character _not_ in range. +- `{glob,glob}` - Matches one or more comma separated list of sub-glob patterns. +- `` - Matches a sub-glob within a defined bounds (represented by `#`). +- `!` - At the start of a pattern, will negate previous positive patterns. + +> The biggest difference between JavaScript and Rust globs is `?`. In JavaScript, this marks the +> preceding character as optional, while in Rust it matches exactly 1 character. + +### Examples + +```bash +README.{md,mdx,txt} +src/**/* +tests/**/*.?js +!**/__tests__/**/* +logs/<[0-9]:4>-<[0-9]:2>-<[0-9]:2>.log +``` + +## Project relative + +When configuring [`fileGroups`](../config/project#filegroups), [`inputs`](../config/project#inputs), +and [`outputs`](../config/project#outputs), all listed file paths and globs are relative from the +project root they will be ran in. The _must not_ start with `./`, or traverse upwards with `..`. + +```bash +# Valid +src/**/* +package.json + +# Invalid +./src/**/* +../utils +``` + +## Workspace relative + +When configuring [`fileGroups`](../config/project#filegroups), [`inputs`](../config/project#inputs), +and [`outputs`](../config/project#outputs), a listed file path or glob can be prefixed with `/` to +resolve relative from the workspace root, and _not_ the project root. + +```bash +# In project +package.json + +# In workspace +/package.json +``` diff --git a/website/docs/config/global-project.mdx b/website/docs/config/global-project.mdx index d56923e5348..f45c91750cf 100644 --- a/website/docs/config/global-project.mdx +++ b/website/docs/config/global-project.mdx @@ -13,33 +13,30 @@ in the workspace. Projects can override or merge with these settings within thei > For more information on file group configuration, refer to the > [`fileGroups`](./project#filegroups) section in the [`project.yml`](./project) doc. -As mentioned in the link above, file groups are a mechanism for grouping similar types of files -within a project using file glob patterns or literal file paths. File groups defined here enables -enforcement of organizational patterns and file locations. - -For example, encourage all projects to place source files in a `src` folder, and all test files in -`tests`. +Defines [file groups](../concepts/file-group) that will be inherited by all projects, and also +enables enforcement of organizational patterns and file locations. For example, encourage all +projects to place source files in a `src` folder, and all test files in `tests`. ```yaml title=".moon/project.yml" fileGroups: configs: - - '*.{js,json}' + - '*.config.{js,cjs,mjs}' + - '*.json' sources: - 'src/**/*' - 'types/**/*' tests: - - 'tests/**/*.test.*' + - 'tests/**/*' - '**/__tests__/**/*' assets: - 'assets/**/*' - 'images/**/*' - 'static/**/*' - '**/*.{scss,css}' - - '**/*' ``` -> Relative file paths and globs used within a file group are relative from the inherited project's -> root, and not the workspace. +> File paths and globs used within a file group are relative from the inherited project's root, and +> not the workspace. ## `tasks` @@ -67,7 +64,7 @@ tasks: test: command: 'jest' - args: '--passWithNoTests .' + args: '--passWithNoTests' typecheck: command: 'tsc' diff --git a/website/docs/config/project.mdx b/website/docs/config/project.mdx index bf81c50f68b..15f155bd8f6 100644 --- a/website/docs/config/project.mdx +++ b/website/docs/config/project.mdx @@ -31,68 +31,39 @@ dependsOn: > `Record` -File groups are a mechanism for grouping similar types of files within a project using file glob -patterns or literal file paths. These groups are then used by [tasks](#tasks) to calculate -functionality like cache computation, affected files since last change, deterministic builds, and -more. By default, this setting _is not required_ for the following reasons: +Defines [file groups](../concepts/file-group) to be used by local tasks. By default, this setting +_is not required_ for the following reasons: - File groups are an optional feature, and are designed for advanced use cases. - File groups defined in [`.moon/project.yml`](./global-project) will be inherited by all projects. When defined this setting requires a map, where the key is the file group name, and the value is a -list of globs or paths. Globs and paths are relative to a project (even when defined +list of [globs or paths](../concepts/file-pattern). Globs and paths are +[relative to a project](../concepts/file-pattern#project-relative) (even when defined [globally](./global-project)). ```yaml title="project.yml" fileGroups: configs: - - '*.{js,json}' + - '*.config.{js,cjs,mjs}' + - '*.json' sources: - 'src/**/*' - 'types/**/*' tests: - - 'tests/**/*.test.*' + - 'tests/**/*' - '**/__tests__/**/*' assets: - 'assets/**/*' - 'images/**/*' - 'static/**/*' - '**/*.{scss,css}' - - '**/*' ``` > The code snippet above is merely an example of file groups. Feel free to use those groups as-is, > modify the glob lists, add and remove groups, or implement completely new groups. The choice is > yours! -### Inherited file groups - -A primary scenario in which to define file groups at the project-level is when you want to -_override_ file groups defined at the workspace-level. For example, say we want to override the -`sources` file group because our source folder is named "lib" and not "src", we would define our -file groups as followed. - -```yaml title=".moon/project.yml" -fileGroups: - sources: - - 'src/**/*' - - 'types/**/*' - tests: - - 'tests/**/*.test.*' - - '**/__tests__/**/*' -``` - -```yaml title="project.yml" -fileGroups: - sources: - - 'lib/**/*' - - 'types/**/*' - # Inherit `tests` as-is -``` - -File groups defined in `project.yml` will override file groups defined in `.moon/project.yml` of the -same name, and _will not_ merge the value arrays. - ## `project` > `ProjectMetadataConfig` @@ -279,9 +250,8 @@ The `inputs` field is a list of file paths/globs that are used to calculate whet task based on files that have been touched since the last time the task has been ran. If _not_ defined, then all files within a project are considered an input (`**/*`). -By default inputs are relative from the _project root_, and can reference -[file groups](#filegroups). To reference files from the workspace root (for example, config files), -prefix the path with a "/". +Inputs support +[project and workspace relative file patterns](../concepts/file-pattern#project-relative). ```yaml title="project.yml" {4-10} tasks: @@ -304,8 +274,7 @@ The `outputs` field is a list of files and folders that are _created_ as a resul task, excluding internal cache files that are created from the underlying command (for example, `.eslintcache`). -By default outputs are relative from the _project root_. To output files to the workspace root -(should rarely be used), prefix the path with a "/". +Outputs require [project relative file patterns](../concepts/file-pattern#project-relative). ```yaml title="project.yml" {4-6} tasks: diff --git a/website/docs/create-task.mdx b/website/docs/create-task.mdx index 8c19d0a5d90..81b4cf8bccd 100644 --- a/website/docs/create-task.mdx +++ b/website/docs/create-task.mdx @@ -140,7 +140,8 @@ tasks: Once you're familiar with configuring tasks, you may notice certain inputs being repeated constantly, like source files, test files, and configuration. To reduce the amount of boilerplate required, moon provides a feature known as [file groups](./concepts/file-group), which enables -grouping of similar file types within a project using file glob patterns or literal file paths. +grouping of similar file types within a project using +[file glob patterns or literal file paths](./concepts/file-pattern). File groups are defined with the [`fileGroups`](./config/project#filegroups) setting, which maps a list of file paths/globs to a group, like so. diff --git a/website/docs/install.mdx b/website/docs/install.mdx index ccbf7994044..df098c6f7db 100644 --- a/website/docs/install.mdx +++ b/website/docs/install.mdx @@ -36,8 +36,7 @@ npm init -If you already have a root `package.json`, jump to the -[installation instructions](#installing-the-cli), otherwise... +If you already have a root `package.json`, continue to the next section, otherwise... ```bash npm init diff --git a/website/sidebars.js b/website/sidebars.js index 99e23e96301..94689037bfe 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -35,13 +35,15 @@ const sidebars = { type: 'category', label: 'Concepts', items: [ - 'concepts/workspace', - 'concepts/toolchain', + 'concepts/cache', + 'concepts/file-group', + 'concepts/file-pattern', 'concepts/project', - 'concepts/task', 'concepts/target', + 'concepts/task', 'concepts/token', - 'concepts/cache', + 'concepts/toolchain', + 'concepts/workspace', ], link: { type: 'generated-index',