diff --git a/kclvm/Cargo.lock b/kclvm/Cargo.lock index 53d2192ed..a5a28f187 100644 --- a/kclvm/Cargo.lock +++ b/kclvm/Cargo.lock @@ -2043,6 +2043,7 @@ dependencies = [ "serde_json", "tracing", "unicode_names2", + "walkdir", ] [[package]] diff --git a/kclvm/driver/src/lib.rs b/kclvm/driver/src/lib.rs index 423685513..bbb4c896a 100644 --- a/kclvm/driver/src/lib.rs +++ b/kclvm/driver/src/lib.rs @@ -8,14 +8,11 @@ mod tests; use anyhow::Result; use kclvm_config::{ - modfile::{ - get_pkg_root, load_mod_file, KCL_FILE_EXTENSION, KCL_FILE_SUFFIX, KCL_MOD_FILE, - KCL_WORK_FILE, - }, + modfile::{get_pkg_root, load_mod_file, KCL_FILE_EXTENSION, KCL_MOD_FILE, KCL_WORK_FILE}, settings::{build_settings_pathbuf, DEFAULT_SETTING_FILE}, workfile::load_work_file, }; -use kclvm_parser::LoadProgramOptions; +use kclvm_parser::{get_kcl_files, LoadProgramOptions}; use kclvm_utils::path::PathPrefix; use std::iter; use std::{collections::HashMap, env}; @@ -325,27 +322,6 @@ pub fn lookup_workspace(path: &str) -> io::Result { Ok(WorkSpaceKind::NotFound) } -/// Get kcl files from path. -pub fn get_kcl_files>(path: P, recursively: bool) -> Result> { - let mut files = vec![]; - let walkdir = if recursively { - WalkDir::new(path) - } else { - WalkDir::new(path).max_depth(1) - }; - for entry in walkdir.into_iter().filter_map(|e| e.ok()) { - let path = entry.path(); - if path.is_file() { - let file = path.to_str().unwrap(); - if file.ends_with(KCL_FILE_SUFFIX) { - files.push(file.to_string()) - } - } - } - files.sort(); - Ok(files) -} - /// Get the package string list form the package path. pub fn get_pkg_list(pkgpath: &str) -> Result> { let mut dir_list: Vec = Vec::new(); diff --git a/kclvm/parser/Cargo.toml b/kclvm/parser/Cargo.toml index f4f63b654..29aa4c1f1 100644 --- a/kclvm/parser/Cargo.toml +++ b/kclvm/parser/Cargo.toml @@ -33,6 +33,7 @@ kclvm-error = {path = "../error"} kclvm-config = {path = "../config"} kclvm-sema = {path = "../sema"} kclvm-utils = {path = "../utils"} +walkdir = "2" [dev-dependencies] expect-test = "1.0" diff --git a/kclvm/parser/src/lib.rs b/kclvm/parser/src/lib.rs index 16b133c22..ab1666241 100644 --- a/kclvm/parser/src/lib.rs +++ b/kclvm/parser/src/lib.rs @@ -32,7 +32,7 @@ use anyhow::Result; use lexer::parse_token_streams; use parser::Parser; use std::collections::{HashMap, HashSet, VecDeque}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::{Arc, RwLock}; use kclvm_span::create_session_globals_then; @@ -340,6 +340,8 @@ struct Loader { opts: LoadProgramOptions, module_cache: KCLModuleCache, file_graph: FileGraphCache, + pkgmap: PkgMap, + parsed_file: HashSet, } impl Loader { @@ -358,6 +360,8 @@ impl Loader { opts: opts.unwrap_or_default(), module_cache: module_cache.unwrap_or_default(), file_graph: FileGraphCache::default(), + pkgmap: PkgMap::new(), + parsed_file: HashSet::new(), } } @@ -372,6 +376,8 @@ impl Loader { self.paths.clone(), self.module_cache.clone(), self.file_graph.clone(), + &mut self.pkgmap, + &mut self.parsed_file, &self.opts, ) } @@ -729,8 +735,8 @@ pub fn parse_file( pub fn get_deps( file: &PkgFile, m: &Module, - modules: &mut HashMap>, - pkgmap: &mut PkgMap, + pkgs: &mut HashMap>, + pkgmap: &PkgMap, opts: &LoadProgramOptions, sess: ParseSessionRef, ) -> Result { @@ -756,7 +762,7 @@ pub fn get_deps( // If k_files is empty, the pkg information will not be found in the file graph. // Record the empty pkg to prevent loss. After the parse file is completed, fill in the modules if pkg_info.k_files.is_empty() { - modules.insert(pkg_info.pkg_path.clone(), vec![]); + pkgs.insert(pkg_info.pkg_path.clone(), vec![]); } pkg_info.k_files.iter().for_each(|p| { @@ -837,6 +843,8 @@ pub fn parse_entry( opts, )?; let mut unparsed_file: VecDeque = dependent_paths.into(); + + // Bfs unparsed and import files while let Some(file) = unparsed_file.pop_front() { match &mut module_cache.write() { Ok(m_cache) => match m_cache.file_pkg.get_mut(file.get_path()) { @@ -913,24 +921,24 @@ pub fn parse_program( paths: Vec, module_cache: KCLModuleCache, file_graph: FileGraphCache, + pkgmap: &mut PkgMap, + parsed_file: &mut HashSet, opts: &LoadProgramOptions, ) -> Result { let compile_entries = get_compile_entries_from_paths(&paths, &opts)?; let workdir = compile_entries.get_root_path().to_string(); let mut pkgs: HashMap> = HashMap::new(); - let mut pkgmap = PkgMap::new(); let mut new_files = HashSet::new(); - let mut parsed_file: HashSet = HashSet::new(); for entry in compile_entries.iter() { new_files.extend(parse_entry( sess.clone(), entry, module_cache.clone(), &mut pkgs, - &mut pkgmap, + pkgmap, file_graph.clone(), &opts, - &mut parsed_file, + parsed_file, )?); } @@ -1009,3 +1017,244 @@ pub fn parse_program( paths: files.iter().map(|file| file.get_path().clone()).collect(), }) } + +/// Parse all kcl files under path and dependencies from opts. +/// Different from `load_program`, this function will compile files that are not imported. +pub fn load_all_files_under_paths( + sess: ParseSessionRef, + paths: &[&str], + opts: Option, + module_cache: Option, +) -> Result { + let mut loader = Loader::new(sess.clone(), paths, opts.clone(), module_cache.clone()); + create_session_globals_then(move || { + match parse_program( + loader.sess.clone(), + loader.paths.clone(), + loader.module_cache.clone(), + loader.file_graph.clone(), + &mut loader.pkgmap, + &mut loader.parsed_file, + &loader.opts, + ) { + Ok(res) => { + let mut res = res.clone(); + let k_files_from_import = res.paths.clone(); + let (k_files_under_path, pkgmap) = get_files_from_path(paths, opts)?; + loader.pkgmap.extend(pkgmap); + + // Filter unparsed file + let mut unparsed_file: VecDeque = VecDeque::new(); + for (pkg, paths) in &k_files_under_path { + for p in paths { + if !k_files_from_import.contains(p) { + let pkgfile = PkgFile::new(p.clone(), pkg.clone()); + unparsed_file.push_back(pkgfile); + } + } + } + + let module_cache = module_cache.unwrap_or_default(); + let pkgs = &mut res.program.pkgs; + + let mut new_files = HashSet::new(); + + // Bfs unparsed and import files + while let Some(file) = unparsed_file.pop_front() { + new_files.insert(file.clone()); + let deps = parse_file( + sess.clone(), + file, + None, + module_cache.clone(), + pkgs, + &mut loader.pkgmap, + loader.file_graph.clone(), + &loader.opts, + )?; + for dep in deps { + if loader.parsed_file.insert(dep.clone()) { + unparsed_file.push_back(dep.clone()); + } + } + } + + // Merge unparsed module into res + let modules = &mut res.program.modules; + for file in &new_files { + let filename = file.get_path().to_str().unwrap().to_string(); + let m_ref = match module_cache.read() { + Ok(module_cache) => module_cache + .ast_cache + .get(file.get_path()) + .expect(&format!( + "Module not found in module: {:?}", + file.get_path() + )) + .clone(), + Err(e) => return Err(anyhow::anyhow!("Parse program failed: {e}")), + }; + modules.insert(filename.clone(), m_ref); + match pkgs.get_mut(&file.pkg_path) { + Some(pkg_modules) => { + pkg_modules.push(filename.clone()); + } + None => { + pkgs.insert(file.pkg_path.clone(), vec![filename]); + } + } + } + + // Generate new paths + let files = match loader.file_graph.read() { + Ok(file_graph) => { + let files = match file_graph.toposort() { + Ok(files) => files, + Err(_) => file_graph.paths(), + }; + + let file_path_graph = file_graph.file_path_graph().0; + if let Err(cycle) = toposort(&file_path_graph) { + let formatted_cycle = cycle + .iter() + .map(|file| format!("- {}\n", file.to_string_lossy())) + .collect::(); + + sess.1.write().add_error( + ErrorKind::RecursiveLoad, + &[Message { + range: (Position::dummy_pos(), Position::dummy_pos()), + style: Style::Line, + message: format!( + "Could not compiles due to cyclic import statements\n{}", + formatted_cycle.trim_end() + ), + note: None, + suggested_replacement: None, + }], + ); + } + files + } + Err(e) => return Err(anyhow::anyhow!("Parse program failed: {e}")), + }; + + res.paths = files.iter().map(|file| file.get_path().clone()).collect(); + return Ok(res); + } + e => return e, + } + }) +} + +/// Get all kcl files under path and dependencies from opts, regardless of whether they are imported or not +pub fn get_files_from_path( + paths: &[&str], + opts: Option, +) -> Result<(HashMap>, HashMap)> { + let mut k_files_under_path = HashMap::new(); + let mut pkgmap = HashMap::new(); + + // get files from config + if let Some(opt) = &opts { + for (name, path) in &opt.package_maps { + let path_buf = PathBuf::from(path.clone()); + if path_buf.is_dir() { + let all_k_files_under_path = get_kcl_files(path.clone(), true)?; + for f in &all_k_files_under_path { + let p = PathBuf::from(f); + let fix_path = { + match p.parent().unwrap().strip_prefix(Path::new(&path)) { + Ok(p) => Path::new(&name).join(p), + Err(_) => match p.parent().unwrap().strip_prefix(Path::new(&path)) { + Ok(p) => Path::new(&name).join(p), + Err(_) => Path::new(&name).to_path_buf(), + }, + } + } + .to_str() + .unwrap() + .to_string(); + let fix_path = fix_path + .replace(['/', '\\'], ".") + .trim_end_matches('.') + .to_string(); + + let pkgfile = PkgFile::new(p.clone(), fix_path.clone()); + pkgmap.insert( + pkgfile, + Pkg { + pkg_name: name.clone(), + pkg_root: path.clone(), + }, + ); + k_files_under_path + .entry(fix_path) + .or_insert(Vec::new()) + .push(p); + } + } + } + } + + // get files from input paths + for path in paths { + let path_buf = PathBuf::from(path); + if path_buf.is_dir() { + let all_k_files_under_path = get_kcl_files(path, true)?; + for f in &all_k_files_under_path { + let p = PathBuf::from(f); + + let fix_path = p + .parent() + .unwrap() + .strip_prefix(path_buf.clone()) + .unwrap() + .to_str() + .unwrap() + .to_string(); + + let fix_path = fix_path + .replace(['/', '\\'], ".") + .trim_end_matches('.') + .to_string(); + + let pkgfile = PkgFile::new(p.clone(), fix_path.clone()); + pkgmap.insert( + pkgfile, + Pkg { + pkg_name: MAIN_PKG.to_owned(), + pkg_root: path.to_string(), + }, + ); + k_files_under_path + .entry(fix_path) + .or_insert(Vec::new()) + .push(p); + } + } + } + + Ok((k_files_under_path, pkgmap)) +} + +/// Get kcl files from path. +pub fn get_kcl_files>(path: P, recursively: bool) -> Result> { + let mut files = vec![]; + let walkdir = if recursively { + walkdir::WalkDir::new(path) + } else { + walkdir::WalkDir::new(path).max_depth(1) + }; + for entry in walkdir.into_iter().filter_map(|e| e.ok()) { + let path = entry.path(); + if path.is_file() { + let file = path.to_str().unwrap(); + if file.ends_with(KCL_FILE_SUFFIX) { + files.push(file.to_string()) + } + } + } + files.sort(); + Ok(files) +} diff --git a/kclvm/parser/src/tests.rs b/kclvm/parser/src/tests.rs index ad9905d2e..572014adf 100644 --- a/kclvm/parser/src/tests.rs +++ b/kclvm/parser/src/tests.rs @@ -841,3 +841,30 @@ fn test_expand_input_files() { input.sort(); assert_eq!(input, expected_files); } + +#[test] +fn parse_all_file_under_path() { + let testpath = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("testdata") + .join("parse_all_modules"); + + let main = testpath.join("a").join("main.k"); + let main = main.to_str().unwrap(); + let helloworld = testpath.join("helloworld_0.0.1"); + let b = testpath.join("b"); + + let sess = ParseSessionRef::default(); + let mut opt = LoadProgramOptions::default(); + opt.vendor_dirs = vec![get_vendor_home()]; + opt.package_maps + .insert("b".to_string(), b.to_str().unwrap().to_string()); + opt.package_maps.insert( + "helloworld".to_string(), + helloworld.to_str().unwrap().to_string(), + ); + + let res = load_all_files_under_paths(sess.clone(), &[main], Some(opt), None).unwrap(); + + assert_eq!(res.program.pkgs.keys().len(), 3); + assert_eq!(res.paths.len(), 3); +} diff --git a/kclvm/parser/testdata/parse_all_modules/a/kcl.mod b/kclvm/parser/testdata/parse_all_modules/a/kcl.mod new file mode 100644 index 000000000..31e3a3003 --- /dev/null +++ b/kclvm/parser/testdata/parse_all_modules/a/kcl.mod @@ -0,0 +1,9 @@ +[package] +name = "a" +edition = "v0.9.0" +version = "0.0.1" + +[dependencies] +b = { path = "../b" } +helloworld = "0.0.1" + diff --git a/kclvm/parser/testdata/parse_all_modules/a/main.k b/kclvm/parser/testdata/parse_all_modules/a/main.k new file mode 100644 index 000000000..fa7048e63 --- /dev/null +++ b/kclvm/parser/testdata/parse_all_modules/a/main.k @@ -0,0 +1 @@ +The_first_kcl_program = 'Hello World!' \ No newline at end of file diff --git a/kclvm/parser/testdata/parse_all_modules/a/sub/sub.k b/kclvm/parser/testdata/parse_all_modules/a/sub/sub.k new file mode 100644 index 000000000..e69de29bb diff --git a/kclvm/parser/testdata/parse_all_modules/b/kcl.mod b/kclvm/parser/testdata/parse_all_modules/b/kcl.mod new file mode 100644 index 000000000..f1a67ab29 --- /dev/null +++ b/kclvm/parser/testdata/parse_all_modules/b/kcl.mod @@ -0,0 +1,6 @@ +[package] +name = "b" +edition = "v0.9.0" +version = "0.0.1" + + diff --git a/kclvm/parser/testdata/parse_all_modules/b/main.k b/kclvm/parser/testdata/parse_all_modules/b/main.k new file mode 100644 index 000000000..fa7048e63 --- /dev/null +++ b/kclvm/parser/testdata/parse_all_modules/b/main.k @@ -0,0 +1 @@ +The_first_kcl_program = 'Hello World!' \ No newline at end of file diff --git a/kclvm/parser/testdata/parse_all_modules/helloworld_0.0.1/README.md b/kclvm/parser/testdata/parse_all_modules/helloworld_0.0.1/README.md new file mode 100644 index 000000000..4d63fef38 --- /dev/null +++ b/kclvm/parser/testdata/parse_all_modules/helloworld_0.0.1/README.md @@ -0,0 +1,2 @@ +## Introduction +This is a kcl package named helloworld. diff --git a/kclvm/parser/testdata/parse_all_modules/helloworld_0.0.1/kcl.mod b/kclvm/parser/testdata/parse_all_modules/helloworld_0.0.1/kcl.mod new file mode 100644 index 000000000..bef7e7f76 --- /dev/null +++ b/kclvm/parser/testdata/parse_all_modules/helloworld_0.0.1/kcl.mod @@ -0,0 +1,5 @@ +[package] +name = "helloworld" +edition = "0.0.1" +version = "0.0.1" + diff --git a/kclvm/parser/testdata/parse_all_modules/helloworld_0.0.1/main.k b/kclvm/parser/testdata/parse_all_modules/helloworld_0.0.1/main.k new file mode 100644 index 000000000..fa7048e63 --- /dev/null +++ b/kclvm/parser/testdata/parse_all_modules/helloworld_0.0.1/main.k @@ -0,0 +1 @@ +The_first_kcl_program = 'Hello World!' \ No newline at end of file diff --git a/kclvm/tools/src/LSP/src/completion.rs b/kclvm/tools/src/LSP/src/completion.rs index 6fbabc1a0..0492e39a4 100644 --- a/kclvm/tools/src/LSP/src/completion.rs +++ b/kclvm/tools/src/LSP/src/completion.rs @@ -20,8 +20,8 @@ use indexmap::IndexSet; use kclvm_ast::ast::{self, ImportStmt, Program, Stmt}; use kclvm_ast::MAIN_PKG; use kclvm_config::modfile::KCL_FILE_EXTENSION; -use kclvm_driver::get_kcl_files; use kclvm_driver::toolchain::{get_real_path_from_external, Metadata, Toolchain}; +use kclvm_parser::get_kcl_files; use kclvm_sema::core::global_state::GlobalState; use std::io; use std::{fs, path::Path}; diff --git a/kclvm/tools/src/format/mod.rs b/kclvm/tools/src/format/mod.rs index 5ca091e29..d716e424b 100644 --- a/kclvm/tools/src/format/mod.rs +++ b/kclvm/tools/src/format/mod.rs @@ -7,7 +7,7 @@ //! to print it as source code string. use anyhow::Result; use kclvm_ast_pretty::print_ast_module; -use kclvm_driver::get_kcl_files; +use kclvm_parser::get_kcl_files; use std::path::Path; use kclvm_parser::{parse_file_force_errors, parse_single_file}; diff --git a/kclvm/tools/src/testing/suite.rs b/kclvm/tools/src/testing/suite.rs index 6af3667dc..2d07d66e9 100644 --- a/kclvm/tools/src/testing/suite.rs +++ b/kclvm/tools/src/testing/suite.rs @@ -4,7 +4,8 @@ use crate::testing::{TestCaseInfo, TestOptions, TestResult, TestRun}; use anyhow::{anyhow, Result}; use indexmap::IndexMap; use kclvm_ast::ast; -use kclvm_driver::{get_kcl_files, get_pkg_list}; +use kclvm_driver::get_pkg_list; +use kclvm_parser::get_kcl_files; use kclvm_parser::{parse_file_force_errors, ParseSessionRef}; #[cfg(feature = "llvm")] use kclvm_runner::build_program;