diff --git a/kclvm/Cargo.lock b/kclvm/Cargo.lock index 8583c784c..68eafc026 100644 --- a/kclvm/Cargo.lock +++ b/kclvm/Cargo.lock @@ -1413,6 +1413,7 @@ dependencies = [ "kclvm-parser", "kclvm-query", "kclvm-sema", + "kclvm-span", "kclvm-tools", "kclvm-utils", "kclvm-version", diff --git a/kclvm/macros/src/symbols.rs b/kclvm/macros/src/symbols.rs index f218f9f60..144f33b80 100644 --- a/kclvm/macros/src/symbols.rs +++ b/kclvm/macros/src/symbols.rs @@ -115,6 +115,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { let mut keyword_stream = quote! {}; let mut symbols_stream = quote! {}; let mut prefill_stream = quote! {}; + let mut reserved_word_stream = quote! {}; let mut counter = 0u32; let mut keys = HashMap::::with_capacity(input.keywords.len() + input.symbols.len() + 10); @@ -151,6 +152,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { let value = &keyword.value; let value_string = value.value(); check_dup(keyword.name.span(), &value_string, &mut errors); + reserved_word_stream.extend(quote! {#value_string,}); prefill_stream.extend(quote! { #value, }); @@ -170,6 +172,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { check_dup(symbol.name.span(), &value, &mut errors); check_order(symbol.name.span(), &name.to_string(), &mut errors); + reserved_word_stream.extend(quote! {#value,}); prefill_stream.extend(quote! { #value, }); @@ -178,6 +181,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { }); counter += 1; } + let reserved_count = counter as usize; // Generate symbols for the strings "0", "1", ..., "9". let digits_base = counter; @@ -208,6 +212,12 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { #symbols_stream } + #[doc(hidden)] + #[allow(non_upper_case_globals)] + pub mod reserved_word { + pub const reserved_words : [&str; #reserved_count] = [#reserved_word_stream]; + } + impl Interner { pub(crate) fn fresh() -> Self { Interner::prefill(&[ diff --git a/kclvm/span/src/symbol.rs b/kclvm/span/src/symbol.rs index 8b783f87b..d57abd990 100644 --- a/kclvm/span/src/symbol.rs +++ b/kclvm/span/src/symbol.rs @@ -228,6 +228,15 @@ pub mod sym { } } +pub mod reserved { + + pub use super::reserved_word; + + pub fn is_reserved_word(word: &str) -> bool { + reserved_word::reserved_words.contains(&word) + } +} + /// Special symbols related to KCL keywords. impl Symbol { /// Returns `true` if the symbol is `true` or `false`. diff --git a/kclvm/tools/src/LSP/Cargo.toml b/kclvm/tools/src/LSP/Cargo.toml index 59e142a02..1d0b228b1 100644 --- a/kclvm/tools/src/LSP/Cargo.toml +++ b/kclvm/tools/src/LSP/Cargo.toml @@ -30,7 +30,8 @@ kclvm-ast = { path = "../../../ast" } kclvm-utils = { path = "../../../utils" } kclvm-version = { path = "../../../version" } compiler_base_session = { path = "../../../../compiler_base/session" } -kclvm-query = {path = "../../../query"} +kclvm-query = { path = "../../../query" } +kclvm-span = { path = "../../../span" } lsp-server = { version = "0.6.0", default-features = false } anyhow = { version = "1.0", default-features = false, features = ["std"] } diff --git a/kclvm/tools/src/LSP/src/find_refs.rs b/kclvm/tools/src/LSP/src/find_refs.rs index 1145dadbc..2a35fab43 100644 --- a/kclvm/tools/src/LSP/src/find_refs.rs +++ b/kclvm/tools/src/LSP/src/find_refs.rs @@ -130,7 +130,7 @@ mod tests { fn setup_word_index_map(root: &str) -> HashMap>> { HashMap::from([( Url::from_file_path(root).unwrap(), - build_word_index(root.to_string()).unwrap(), + build_word_index(root.to_string(), true).unwrap(), )]) } diff --git a/kclvm/tools/src/LSP/src/notification.rs b/kclvm/tools/src/LSP/src/notification.rs index bbb044e99..c4313e892 100644 --- a/kclvm/tools/src/LSP/src/notification.rs +++ b/kclvm/tools/src/LSP/src/notification.rs @@ -97,8 +97,9 @@ impl LanguageServerState { vfs.set_file_contents(path.into(), Some(text.clone().into_bytes())); // update word index - let old_word_index = build_word_index_for_file_content(old_text, &text_document.uri); - let new_word_index = build_word_index_for_file_content(text.clone(), &text_document.uri); + let old_word_index = build_word_index_for_file_content(old_text, &text_document.uri, true); + let new_word_index = + build_word_index_for_file_content(text.clone(), &text_document.uri, true); let binding = text_document.uri.path(); let file_path = Path::new(binding); //todo rename let word_index_map = &mut *self.word_index_map.write(); diff --git a/kclvm/tools/src/LSP/src/rename.rs b/kclvm/tools/src/LSP/src/rename.rs index c051c9272..57cddc016 100644 --- a/kclvm/tools/src/LSP/src/rename.rs +++ b/kclvm/tools/src/LSP/src/rename.rs @@ -30,7 +30,7 @@ pub fn rename_symbol( match select_symbol(&symbol_spec) { Some((name, range)) => { // 3. build word index on file_paths, find refs within file_paths scope - let word_index = build_word_index_for_file_paths(file_paths)?; + let word_index = build_word_index_for_file_paths(file_paths, true)?; if let Some(locations) = word_index.get(&name) { // 4. filter out the matched refs // 4.1 collect matched words(names) and remove Duplicates of the file paths diff --git a/kclvm/tools/src/LSP/src/state.rs b/kclvm/tools/src/LSP/src/state.rs index 80e07f2fe..1b9651aa2 100644 --- a/kclvm/tools/src/LSP/src/state.rs +++ b/kclvm/tools/src/LSP/src/state.rs @@ -125,7 +125,7 @@ impl LanguageServerState { let word_index_map = state.word_index_map.clone(); state .thread_pool - .execute(move || build_word_index_map(word_index_map, initialize_params)); + .execute(move || build_word_index_map(word_index_map, initialize_params, true)); state } @@ -334,17 +334,18 @@ pub(crate) fn log_message(message: String, sender: &Sender) -> anyhow::Res fn build_word_index_map( word_index_map: Arc>>>>, initialize_params: InitializeParams, + prune: bool, ) { if let Some(workspace_folders) = initialize_params.workspace_folders { for folder in workspace_folders { let path = folder.uri.path(); - if let Ok(word_index) = build_word_index(path.to_string()) { + if let Ok(word_index) = build_word_index(path.to_string(), prune) { word_index_map.write().insert(folder.uri, word_index); } } } else if let Some(root_uri) = initialize_params.root_uri { let path = root_uri.path(); - if let Ok(word_index) = build_word_index(path.to_string()) { + if let Ok(word_index) = build_word_index(path.to_string(), prune) { word_index_map.write().insert(root_uri, word_index); } } diff --git a/kclvm/tools/src/LSP/src/util.rs b/kclvm/tools/src/LSP/src/util.rs index 2ae39c392..d6f9e5c40 100644 --- a/kclvm/tools/src/LSP/src/util.rs +++ b/kclvm/tools/src/LSP/src/util.rs @@ -18,6 +18,7 @@ use kclvm_sema::pkgpath_without_prefix; use kclvm_sema::resolver::resolve_program_with_opts; use kclvm_sema::resolver::scope::ProgramScope; use kclvm_sema::resolver::scope::Scope; +use kclvm_span::symbol::reserved; use kclvm_utils::pkgpath::rm_external_pkg_name; use lsp_types::{Location, Position, Range, Url}; use parking_lot::{RwLock, RwLockReadGuard}; @@ -784,6 +785,7 @@ pub(crate) fn get_pkg_scope( pub(crate) fn build_word_index_for_file_paths( paths: &[String], + prune: bool, ) -> anyhow::Result>> { let mut index: HashMap> = HashMap::new(); for p in paths { @@ -791,7 +793,7 @@ pub(crate) fn build_word_index_for_file_paths( if let Ok(url) = Url::from_file_path(p) { // read file content and save the word to word index let text = read_file(p)?; - for (key, values) in build_word_index_for_file_content(text, &url) { + for (key, values) in build_word_index_for_file_content(text, &url, prune) { index.entry(key).or_insert_with(Vec::new).extend(values); } } @@ -800,9 +802,12 @@ pub(crate) fn build_word_index_for_file_paths( } /// scan and build a word -> Locations index map -pub(crate) fn build_word_index(path: String) -> anyhow::Result>> { +pub(crate) fn build_word_index( + path: String, + prune: bool, +) -> anyhow::Result>> { if let Ok(files) = get_kcl_files(path.clone(), true) { - return build_word_index_for_file_paths(&files); + return build_word_index_for_file_paths(&files, prune); } Ok(HashMap::new()) } @@ -810,11 +815,25 @@ pub(crate) fn build_word_index(path: String) -> anyhow::Result HashMap> { let mut index: HashMap> = HashMap::new(); let lines: Vec<&str> = content.lines().collect(); + let mut in_docstring = false; for (li, line) in lines.into_iter().enumerate() { - let words = line_to_words(line.to_string()); + if prune && !in_docstring { + if line.trim_start().starts_with("\"\"\"") { + in_docstring = true; + continue; + } + } + if prune && in_docstring { + if line.trim_end().ends_with("\"\"\"") { + in_docstring = false; + } + continue; + } + let words = line_to_words(line.to_string(), prune); for (key, values) in words { index .entry(key) @@ -878,7 +897,7 @@ fn read_file(path: &String) -> anyhow::Result { } // Split one line into identifier words. -fn line_to_words(text: String) -> HashMap> { +fn line_to_words(text: String, prune: bool) -> HashMap> { let mut result = HashMap::new(); let mut chars: Vec = text.chars().collect(); chars.push('\n'); @@ -887,6 +906,9 @@ fn line_to_words(text: String) -> HashMap> { let mut prev_word = false; let mut words: Vec = vec![]; for (i, ch) in chars.iter().enumerate() { + if prune && *ch == '#' { + break; + } let is_id_start = rustc_lexer::is_id_start(*ch); let is_id_continue = rustc_lexer::is_id_continue(*ch); // If the character is valid identfier start and the previous character is not valid identifier continue, mark the start position. @@ -901,11 +923,11 @@ fn line_to_words(text: String) -> HashMap> { } else { // Find out the end position. if continue_pos + 1 == i { - words.push(Word::new( - start_pos as u32, - i as u32, - chars[start_pos..i].iter().collect::().clone(), - )); + let word = chars[start_pos..i].iter().collect::().clone(); + // skip word if it should be pruned + if !prune || !reserved::is_reserved_word(&word) { + words.push(Word::new(start_pos as u32, i as u32, word)); + } } // Reset the start position. start_pos = usize::MAX; @@ -921,7 +943,10 @@ fn line_to_words(text: String) -> HashMap> { #[cfg(test)] mod tests { - use super::{build_word_index, line_to_words, word_index_add, word_index_subtract, Word}; + use super::{ + build_word_index, build_word_index_for_file_content, line_to_words, word_index_add, + word_index_subtract, Word, + }; use lsp_types::{Location, Position, Range, Url}; use std::collections::HashMap; use std::path::PathBuf; @@ -1006,25 +1031,6 @@ mod tests { }, ], ), - ( - "schema".to_string(), - vec![ - Location { - uri: url.clone(), - range: Range { - start: Position::new(4, 0), - end: Position::new(4, 6), - }, - }, - Location { - uri: url.clone(), - range: Range { - start: Position::new(7, 0), - end: Position::new(7, 6), - }, - }, - ], - ), ( "b".to_string(), vec![Location { @@ -1090,16 +1096,6 @@ mod tests { }, }], ), - ( - "str".to_string(), - vec![Location { - uri: url.clone(), - range: Range { - start: Position::new(5, 10), - end: Position::new(5, 13), - }, - }], - ), ( "Person".to_string(), vec![ @@ -1132,7 +1128,7 @@ mod tests { ] .into_iter() .collect(); - match build_word_index(path.to_string()) { + match build_word_index(path.to_string(), true) { Ok(actual) => { assert_eq!(expect, actual) } @@ -1192,27 +1188,23 @@ mod tests { #[test] fn test_line_to_words() { - let lines = ["schema Person:", "name. name again", "some_word word !word"]; + let lines = [ + "schema Person:", + "name. name again", + "some_word word !word", + "# this line is a single-line comment", + "name # end of line comment", + ]; let expects: Vec>> = vec![ - vec![ - ( - "schema".to_string(), - vec![Word { - start_col: 0, - end_col: 6, - word: "schema".to_string(), - }], - ), - ( - "Person".to_string(), - vec![Word { - start_col: 7, - end_col: 13, - word: "Person".to_string(), - }], - ), - ] + vec![( + "Person".to_string(), + vec![Word { + start_col: 7, + end_col: 13, + word: "Person".to_string(), + }], + )] .into_iter() .collect(), vec![ @@ -1269,10 +1261,72 @@ mod tests { ] .into_iter() .collect(), + HashMap::new(), + vec![( + "name".to_string(), + vec![Word { + start_col: 0, + end_col: 4, + word: "name".to_string(), + }], + )] + .into_iter() + .collect(), ]; for i in 0..lines.len() { - let got = line_to_words(lines[i].to_string()); + let got = line_to_words(lines[i].to_string(), true); assert_eq!(expects[i], got) } } + + #[test] + fn test_build_word_index_for_file_content() { + let content = r#"schema Person: + """ + This is a docstring. + Person is a schema which defines a person's name and age. + """ + name: str # name must not be empty + # age is a positive integer + age: int +"#; + let mock_url = Url::parse("file:///path/to/file.k").unwrap(); + let expects: HashMap> = vec![ + ( + "Person".to_string(), + vec![Location { + uri: mock_url.clone(), + range: Range { + start: Position::new(0, 7), + end: Position::new(0, 13), + }, + }], + ), + ( + "name".to_string(), + vec![Location { + uri: mock_url.clone(), + range: Range { + start: Position::new(5, 4), + end: Position::new(5, 8), + }, + }], + ), + ( + "age".to_string(), + vec![Location { + uri: mock_url.clone(), + range: Range { + start: Position::new(7, 4), + end: Position::new(7, 7), + }, + }], + ), + ] + .into_iter() + .collect(); + + let got = build_word_index_for_file_content(content.to_string(), &mock_url.clone(), true); + assert_eq!(expects, got) + } }