Skip to content

Commit

Permalink
feat: Add a function to parse all kcl files in the given path and opts (
Browse files Browse the repository at this point in the history
#1749)

feat: Add a function to parse all kcl files in the given path and opts. Different from load_program, this function will compile files that are not imported.

Signed-off-by: he1pa <[email protected]>
  • Loading branch information
He1pa authored Nov 13, 2024
1 parent 1457674 commit 9016255
Show file tree
Hide file tree
Showing 16 changed files with 317 additions and 37 deletions.
1 change: 1 addition & 0 deletions kclvm/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 2 additions & 26 deletions kclvm/driver/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,11 @@ mod tests;

use anyhow::Result;
use kclvm_config::{
modfile::{
get_pkg_root, load_mod_file, KCL_FILE_EXTENSION, KCL_FILE_SUFFIX, KCL_MOD_FILE,
KCL_WORK_FILE,
},
modfile::{get_pkg_root, load_mod_file, KCL_FILE_EXTENSION, KCL_MOD_FILE, KCL_WORK_FILE},
settings::{build_settings_pathbuf, DEFAULT_SETTING_FILE},
workfile::load_work_file,
};
use kclvm_parser::LoadProgramOptions;
use kclvm_parser::{get_kcl_files, LoadProgramOptions};
use kclvm_utils::path::PathPrefix;
use std::iter;
use std::{collections::HashMap, env};
Expand Down Expand Up @@ -325,27 +322,6 @@ pub fn lookup_workspace(path: &str) -> io::Result<WorkSpaceKind> {
Ok(WorkSpaceKind::NotFound)
}

/// Get kcl files from path.
pub fn get_kcl_files<P: AsRef<Path>>(path: P, recursively: bool) -> Result<Vec<String>> {
let mut files = vec![];
let walkdir = if recursively {
WalkDir::new(path)
} else {
WalkDir::new(path).max_depth(1)
};
for entry in walkdir.into_iter().filter_map(|e| e.ok()) {
let path = entry.path();
if path.is_file() {
let file = path.to_str().unwrap();
if file.ends_with(KCL_FILE_SUFFIX) {
files.push(file.to_string())
}
}
}
files.sort();
Ok(files)
}

/// Get the package string list form the package path.
pub fn get_pkg_list(pkgpath: &str) -> Result<Vec<String>> {
let mut dir_list: Vec<String> = Vec::new();
Expand Down
1 change: 1 addition & 0 deletions kclvm/parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ kclvm-error = {path = "../error"}
kclvm-config = {path = "../config"}
kclvm-sema = {path = "../sema"}
kclvm-utils = {path = "../utils"}
walkdir = "2"

[dev-dependencies]
expect-test = "1.0"
Expand Down
265 changes: 257 additions & 8 deletions kclvm/parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use anyhow::Result;
use lexer::parse_token_streams;
use parser::Parser;
use std::collections::{HashMap, HashSet, VecDeque};
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock};

use kclvm_span::create_session_globals_then;
Expand Down Expand Up @@ -340,6 +340,8 @@ struct Loader {
opts: LoadProgramOptions,
module_cache: KCLModuleCache,
file_graph: FileGraphCache,
pkgmap: PkgMap,
parsed_file: HashSet<PkgFile>,
}

impl Loader {
Expand All @@ -358,6 +360,8 @@ impl Loader {
opts: opts.unwrap_or_default(),
module_cache: module_cache.unwrap_or_default(),
file_graph: FileGraphCache::default(),
pkgmap: PkgMap::new(),
parsed_file: HashSet::new(),
}
}

Expand All @@ -372,6 +376,8 @@ impl Loader {
self.paths.clone(),
self.module_cache.clone(),
self.file_graph.clone(),
&mut self.pkgmap,
&mut self.parsed_file,
&self.opts,
)
}
Expand Down Expand Up @@ -729,8 +735,8 @@ pub fn parse_file(
pub fn get_deps(
file: &PkgFile,
m: &Module,
modules: &mut HashMap<String, Vec<String>>,
pkgmap: &mut PkgMap,
pkgs: &mut HashMap<String, Vec<String>>,
pkgmap: &PkgMap,
opts: &LoadProgramOptions,
sess: ParseSessionRef,
) -> Result<PkgMap> {
Expand All @@ -756,7 +762,7 @@ pub fn get_deps(
// If k_files is empty, the pkg information will not be found in the file graph.
// Record the empty pkg to prevent loss. After the parse file is completed, fill in the modules
if pkg_info.k_files.is_empty() {
modules.insert(pkg_info.pkg_path.clone(), vec![]);
pkgs.insert(pkg_info.pkg_path.clone(), vec![]);
}

pkg_info.k_files.iter().for_each(|p| {
Expand Down Expand Up @@ -837,6 +843,8 @@ pub fn parse_entry(
opts,
)?;
let mut unparsed_file: VecDeque<PkgFile> = dependent_paths.into();

// Bfs unparsed and import files
while let Some(file) = unparsed_file.pop_front() {
match &mut module_cache.write() {
Ok(m_cache) => match m_cache.file_pkg.get_mut(file.get_path()) {
Expand Down Expand Up @@ -913,24 +921,24 @@ pub fn parse_program(
paths: Vec<String>,
module_cache: KCLModuleCache,
file_graph: FileGraphCache,
pkgmap: &mut PkgMap,
parsed_file: &mut HashSet<PkgFile>,
opts: &LoadProgramOptions,
) -> Result<LoadProgramResult> {
let compile_entries = get_compile_entries_from_paths(&paths, &opts)?;
let workdir = compile_entries.get_root_path().to_string();
let mut pkgs: HashMap<String, Vec<String>> = HashMap::new();
let mut pkgmap = PkgMap::new();
let mut new_files = HashSet::new();
let mut parsed_file: HashSet<PkgFile> = HashSet::new();
for entry in compile_entries.iter() {
new_files.extend(parse_entry(
sess.clone(),
entry,
module_cache.clone(),
&mut pkgs,
&mut pkgmap,
pkgmap,
file_graph.clone(),
&opts,
&mut parsed_file,
parsed_file,
)?);
}

Expand Down Expand Up @@ -1009,3 +1017,244 @@ pub fn parse_program(
paths: files.iter().map(|file| file.get_path().clone()).collect(),
})
}

/// Parse all kcl files under path and dependencies from opts.
/// Different from `load_program`, this function will compile files that are not imported.
pub fn load_all_files_under_paths(
sess: ParseSessionRef,
paths: &[&str],
opts: Option<LoadProgramOptions>,
module_cache: Option<KCLModuleCache>,
) -> Result<LoadProgramResult> {
let mut loader = Loader::new(sess.clone(), paths, opts.clone(), module_cache.clone());
create_session_globals_then(move || {
match parse_program(
loader.sess.clone(),
loader.paths.clone(),
loader.module_cache.clone(),
loader.file_graph.clone(),
&mut loader.pkgmap,
&mut loader.parsed_file,
&loader.opts,
) {
Ok(res) => {
let mut res = res.clone();
let k_files_from_import = res.paths.clone();
let (k_files_under_path, pkgmap) = get_files_from_path(paths, opts)?;
loader.pkgmap.extend(pkgmap);

// Filter unparsed file
let mut unparsed_file: VecDeque<PkgFile> = VecDeque::new();
for (pkg, paths) in &k_files_under_path {
for p in paths {
if !k_files_from_import.contains(p) {
let pkgfile = PkgFile::new(p.clone(), pkg.clone());
unparsed_file.push_back(pkgfile);
}
}
}

let module_cache = module_cache.unwrap_or_default();
let pkgs = &mut res.program.pkgs;

let mut new_files = HashSet::new();

// Bfs unparsed and import files
while let Some(file) = unparsed_file.pop_front() {
new_files.insert(file.clone());
let deps = parse_file(
sess.clone(),
file,
None,
module_cache.clone(),
pkgs,
&mut loader.pkgmap,
loader.file_graph.clone(),
&loader.opts,
)?;
for dep in deps {
if loader.parsed_file.insert(dep.clone()) {
unparsed_file.push_back(dep.clone());
}
}
}

// Merge unparsed module into res
let modules = &mut res.program.modules;
for file in &new_files {
let filename = file.get_path().to_str().unwrap().to_string();
let m_ref = match module_cache.read() {
Ok(module_cache) => module_cache
.ast_cache
.get(file.get_path())
.expect(&format!(
"Module not found in module: {:?}",
file.get_path()
))
.clone(),
Err(e) => return Err(anyhow::anyhow!("Parse program failed: {e}")),
};
modules.insert(filename.clone(), m_ref);
match pkgs.get_mut(&file.pkg_path) {
Some(pkg_modules) => {
pkg_modules.push(filename.clone());
}
None => {
pkgs.insert(file.pkg_path.clone(), vec![filename]);
}
}
}

// Generate new paths
let files = match loader.file_graph.read() {
Ok(file_graph) => {
let files = match file_graph.toposort() {
Ok(files) => files,
Err(_) => file_graph.paths(),
};

let file_path_graph = file_graph.file_path_graph().0;
if let Err(cycle) = toposort(&file_path_graph) {
let formatted_cycle = cycle
.iter()
.map(|file| format!("- {}\n", file.to_string_lossy()))
.collect::<String>();

sess.1.write().add_error(
ErrorKind::RecursiveLoad,
&[Message {
range: (Position::dummy_pos(), Position::dummy_pos()),
style: Style::Line,
message: format!(
"Could not compiles due to cyclic import statements\n{}",
formatted_cycle.trim_end()
),
note: None,
suggested_replacement: None,
}],
);
}
files
}
Err(e) => return Err(anyhow::anyhow!("Parse program failed: {e}")),
};

res.paths = files.iter().map(|file| file.get_path().clone()).collect();
return Ok(res);
}
e => return e,
}
})
}

/// Get all kcl files under path and dependencies from opts, regardless of whether they are imported or not
pub fn get_files_from_path(
paths: &[&str],
opts: Option<LoadProgramOptions>,
) -> Result<(HashMap<String, Vec<PathBuf>>, HashMap<PkgFile, Pkg>)> {
let mut k_files_under_path = HashMap::new();
let mut pkgmap = HashMap::new();

// get files from config
if let Some(opt) = &opts {
for (name, path) in &opt.package_maps {
let path_buf = PathBuf::from(path.clone());
if path_buf.is_dir() {
let all_k_files_under_path = get_kcl_files(path.clone(), true)?;
for f in &all_k_files_under_path {
let p = PathBuf::from(f);
let fix_path = {
match p.parent().unwrap().strip_prefix(Path::new(&path)) {
Ok(p) => Path::new(&name).join(p),
Err(_) => match p.parent().unwrap().strip_prefix(Path::new(&path)) {
Ok(p) => Path::new(&name).join(p),
Err(_) => Path::new(&name).to_path_buf(),
},
}
}
.to_str()
.unwrap()
.to_string();
let fix_path = fix_path
.replace(['/', '\\'], ".")
.trim_end_matches('.')
.to_string();

let pkgfile = PkgFile::new(p.clone(), fix_path.clone());
pkgmap.insert(
pkgfile,
Pkg {
pkg_name: name.clone(),
pkg_root: path.clone(),
},
);
k_files_under_path
.entry(fix_path)
.or_insert(Vec::new())
.push(p);
}
}
}
}

// get files from input paths
for path in paths {
let path_buf = PathBuf::from(path);
if path_buf.is_dir() {
let all_k_files_under_path = get_kcl_files(path, true)?;
for f in &all_k_files_under_path {
let p = PathBuf::from(f);

let fix_path = p
.parent()
.unwrap()
.strip_prefix(path_buf.clone())
.unwrap()
.to_str()
.unwrap()
.to_string();

let fix_path = fix_path
.replace(['/', '\\'], ".")
.trim_end_matches('.')
.to_string();

let pkgfile = PkgFile::new(p.clone(), fix_path.clone());
pkgmap.insert(
pkgfile,
Pkg {
pkg_name: MAIN_PKG.to_owned(),
pkg_root: path.to_string(),
},
);
k_files_under_path
.entry(fix_path)
.or_insert(Vec::new())
.push(p);
}
}
}

Ok((k_files_under_path, pkgmap))
}

/// Get kcl files from path.
pub fn get_kcl_files<P: AsRef<std::path::Path>>(path: P, recursively: bool) -> Result<Vec<String>> {
let mut files = vec![];
let walkdir = if recursively {
walkdir::WalkDir::new(path)
} else {
walkdir::WalkDir::new(path).max_depth(1)
};
for entry in walkdir.into_iter().filter_map(|e| e.ok()) {
let path = entry.path();
if path.is_file() {
let file = path.to_str().unwrap();
if file.ends_with(KCL_FILE_SUFFIX) {
files.push(file.to_string())
}
}
}
files.sort();
Ok(files)
}
Loading

0 comments on commit 9016255

Please sign in to comment.