diff --git a/kclvm/Cargo.lock b/kclvm/Cargo.lock index ad9274ab2..502e1d015 100644 --- a/kclvm/Cargo.lock +++ b/kclvm/Cargo.lock @@ -1531,6 +1531,7 @@ dependencies = [ "kclvm-span", "serde", "serde_json", + "thread_local", "uuid", ] @@ -2198,9 +2199,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" diff --git a/kclvm/api/src/service/into.rs b/kclvm/api/src/service/into.rs index 4790a6f0a..36f5d8d18 100644 --- a/kclvm/api/src/service/into.rs +++ b/kclvm/api/src/service/into.rs @@ -1,10 +1,11 @@ use crate::gpyrpc::{ - CliConfig, Error, KeyValuePair, LoadSettingsFilesResult, Message, Position, Symbol, SymbolIndex, + CliConfig, Error, KeyValuePair, LoadSettingsFilesResult, Message, Position, Scope, ScopeIndex, + Symbol, SymbolIndex, }; use kclvm_config::settings::SettingsFile; use kclvm_error::Diagnostic; -use kclvm_loader::SymbolInfo; -use kclvm_sema::core::symbol::SymbolRef; +use kclvm_loader::{ScopeInfo, SymbolInfo}; +use kclvm_sema::core::{scope::ScopeRef, symbol::SymbolRef}; pub(crate) trait IntoLoadSettingsFiles { /// Convert self into the LoadSettingsFiles structure. @@ -23,6 +24,14 @@ pub(crate) trait IntoSymbol { fn into_symbol(self) -> Symbol; } +pub(crate) trait IntoScope { + fn into_scope(self) -> Scope; +} + +pub(crate) trait IntoScopeIndex { + fn into_scope_index(self) -> ScopeIndex; +} + impl IntoLoadSettingsFiles for SettingsFile { fn into_load_settings_files(self, files: &[String]) -> LoadSettingsFilesResult { LoadSettingsFilesResult { @@ -89,6 +98,17 @@ impl IntoSymbolIndex for SymbolRef { } } +impl IntoScopeIndex for ScopeRef { + fn into_scope_index(self) -> ScopeIndex { + let (index, generation) = self.get_id().into_raw_parts(); + ScopeIndex { + i: index as u64, + g: generation as u64, + kind: format!("{:?}", self.get_kind()), + } + } +} + impl IntoSymbol for SymbolInfo { fn into_symbol(self) -> Symbol { Symbol { @@ -101,3 +121,15 @@ impl IntoSymbol for SymbolInfo { } } } + +impl IntoScope for ScopeInfo { + fn into_scope(self) -> Scope { + Scope { + kind: format!("{:?}", self.kind), + parent: self.parent.map(|o| o.into_scope_index()), + owner: self.owner.map(|o| o.into_symbol_index()), + children: self.children.iter().map(|a| a.into_scope_index()).collect(), + defs: self.defs.iter().map(|a| a.into_symbol_index()).collect(), + } + } +} diff --git a/kclvm/api/src/service/service_impl.rs b/kclvm/api/src/service/service_impl.rs index 829c0d944..04664974c 100644 --- a/kclvm/api/src/service/service_impl.rs +++ b/kclvm/api/src/service/service_impl.rs @@ -173,6 +173,8 @@ impl KclvmServiceImpl { /// assert_eq!(result.type_errors.len(), 0); /// assert_eq!(result.node_symbol_map.len(), 159); /// assert_eq!(result.symbols.len(), 12); + /// assert_eq!(result.scopes.len(), 3); + /// assert_eq!(result.pkg_scope_map.len(), 3); /// ``` pub fn load_package(&self, args: &LoadPackageArgs) -> anyhow::Result { let mut package_maps = HashMap::new(); @@ -191,16 +193,29 @@ impl KclvmServiceImpl { resolve_ast: args.resolve_ast, load_builtin: args.load_builtin, })?; + if args.with_ast_index { + // Thread local options + kclvm_ast::ast::set_should_serialize_id(true); + } let program_json = serde_json::to_string(&packages.program)?; let mut node_symbol_map = HashMap::new(); + let mut pkg_scope_map = HashMap::new(); let mut symbols = HashMap::new(); + let mut scopes = HashMap::new(); for (k, s) in packages.node_symbol_map { node_symbol_map.insert(k.id.to_string(), s.into_symbol_index()); } + for (k, s) in packages.pkg_scope_map { + pkg_scope_map.insert(k, s.into_scope_index()); + } for (k, s) in packages.symbols { let symbol_index_string = serde_json::to_string(&k)?; symbols.insert(symbol_index_string, s.into_symbol()); } + for (k, s) in packages.scopes { + let scope_index_string = serde_json::to_string(&k)?; + scopes.insert(scope_index_string, s.into_scope()); + } Ok(LoadPackageResult { program: program_json, paths: packages @@ -209,7 +224,9 @@ impl KclvmServiceImpl { .map(|p| p.to_str().unwrap().to_string()) .collect(), node_symbol_map, + pkg_scope_map, symbols, + scopes, parse_errors: packages .parse_errors .into_iter() diff --git a/kclvm/ast/Cargo.toml b/kclvm/ast/Cargo.toml index 0ef4fc611..0075941c0 100644 --- a/kclvm/ast/Cargo.toml +++ b/kclvm/ast/Cargo.toml @@ -13,6 +13,7 @@ serde_json = "1.0" kclvm-span = { path = "../span" } kclvm-error = { path = "../error" } +thread_local = "1.1.7" [dev-dependencies] kclvm-parser = { path = "../parser" } diff --git a/kclvm/ast/src/ast.rs b/kclvm/ast/src/ast.rs index 8b628761f..79a6971de 100644 --- a/kclvm/ast/src/ast.rs +++ b/kclvm/ast/src/ast.rs @@ -33,7 +33,7 @@ //! in the compiler and regenerate the walker code. //! :copyright: Copyright The KCL Authors. All rights reserved. -use serde::{Deserialize, Serialize}; +use serde::{ser::SerializeStruct, Deserialize, Serialize, Serializer}; use std::collections::HashMap; use compiler_base_span::{Loc, Span}; @@ -43,6 +43,11 @@ use uuid; use super::token; use crate::{node_ref, pos::ContainsPos}; use kclvm_error::{diagnostic::Range, Position}; +use std::cell::RefCell; + +thread_local! { + static SHOULD_SERIALIZE_ID: RefCell = RefCell::new(false); +} /// PosTuple denotes the tuple `(filename, line, column, end_line, end_column)`. pub type PosTuple = (String, u64, u64, u64, u64); @@ -87,7 +92,7 @@ impl Serialize for AstIndex { where S: serde::Serializer, { - serializer.serialize_bytes(self.0.as_bytes()) + serializer.serialize_str(&self.to_string()) } } @@ -107,9 +112,9 @@ impl ToString for AstIndex { /// that all AST nodes need to contain. /// In fact, column and end_column are the counts of character, /// For example, `\t` is counted as 1 character, so it is recorded as 1 here, but generally col is 4. -#[derive(Serialize, Deserialize, Clone, PartialEq)] +#[derive(Deserialize, Clone, PartialEq)] pub struct Node { - #[serde(skip_serializing, skip_deserializing, default)] + #[serde(serialize_with = "serialize_id", skip_deserializing, default)] pub id: AstIndex, pub node: T, pub filename: String, @@ -119,6 +124,33 @@ pub struct Node { pub end_column: u64, } +impl Serialize for Node { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let should_serialize_id = SHOULD_SERIALIZE_ID.with(|f| *f.borrow()); + let mut state = + serializer.serialize_struct("Node", if should_serialize_id { 7 } else { 6 })?; + if should_serialize_id { + state.serialize_field("id", &self.id)?; + } + state.serialize_field("node", &self.node)?; + state.serialize_field("filename", &self.filename)?; + state.serialize_field("line", &self.line)?; + state.serialize_field("column", &self.column)?; + state.serialize_field("end_line", &self.end_line)?; + state.serialize_field("end_column", &self.end_column)?; + state.end() + } +} + +pub fn set_should_serialize_id(value: bool) { + SHOULD_SERIALIZE_ID.with(|f| { + *f.borrow_mut() = value; + }); +} + impl Debug for Node { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Node") diff --git a/kclvm/loader/src/lib.rs b/kclvm/loader/src/lib.rs index d67d93162..7e39086a3 100644 --- a/kclvm/loader/src/lib.rs +++ b/kclvm/loader/src/lib.rs @@ -7,7 +7,11 @@ use kclvm_error::{diagnostic::Range, Diagnostic}; use kclvm_parser::{load_program, KCLModuleCache, LoadProgramOptions, ParseSessionRef}; use kclvm_sema::{ advanced_resolver::AdvancedResolver, - core::{global_state::GlobalState, symbol::SymbolRef}, + core::{ + global_state::GlobalState, + scope::{LocalSymbolScopeKind, ScopeData, ScopeRef}, + symbol::{SymbolData, SymbolRef}, + }, namer::Namer, resolver::{resolve_program_with_opts, scope::NodeKey}, ty::{Type, TypeRef}, @@ -42,12 +46,16 @@ pub struct Packages { pub paths: Vec, /// All Parse errors pub parse_errors: Errors, - // Type errors + /// Type errors pub type_errors: Errors, - // Symbol-Type mapping + /// Symbol information pub symbols: IndexMap, + /// Scope information + pub scopes: IndexMap, // AST Node-Symbol mapping pub node_symbol_map: IndexMap, + // - mapping + pub pkg_scope_map: IndexMap, } #[derive(Debug, Clone)] @@ -61,6 +69,33 @@ pub struct SymbolInfo { pub is_global: bool, } +#[derive(Debug, Clone)] +pub struct ScopeInfo { + /// Scope kind + pub kind: ScopeKind, + /// Scope parent + pub parent: Option, + /// Scope owner + pub owner: Option, + /// Children scopes + pub children: Vec, + /// Definitions + pub defs: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ScopeKind { + Package, + Module, + List, + Dict, + Quant, + Lambda, + SchemaDef, + SchemaConfig, + Value, +} + /// load_package provides users with the ability to parse kcl program and sematic model /// information including symbols, types, definitions, etc. pub fn load_packages(opts: &LoadPackageOptions) -> Result { @@ -101,8 +136,7 @@ pub fn load_packages(opts: &LoadPackageOptions) -> Result { paths: parse_result.paths, parse_errors, type_errors, - symbols: IndexMap::new(), - node_symbol_map: IndexMap::new(), + ..Default::default() }; if !opts.resolve_ast { return Ok(packages); @@ -143,5 +177,66 @@ pub fn load_packages(opts: &LoadPackageOptions) -> Result { } } } + let scopes = gs.get_scopes(); + for (path, scope_ref) in scopes.get_root_scope_map() { + packages.pkg_scope_map.insert(path.clone(), *scope_ref); + // Root scopes + if let Some(scope_ref) = scopes.get_root_scope(path.clone()) { + collect_scope_info( + &mut packages.scopes, + &scope_ref, + scopes, + symbols, + ScopeKind::Package, + ); + } + } Ok(packages) } + +impl From for ScopeKind { + fn from(value: LocalSymbolScopeKind) -> Self { + match value { + LocalSymbolScopeKind::List => ScopeKind::List, + LocalSymbolScopeKind::Dict => ScopeKind::Dict, + LocalSymbolScopeKind::Quant => ScopeKind::Quant, + LocalSymbolScopeKind::Lambda => ScopeKind::Lambda, + LocalSymbolScopeKind::SchemaDef => ScopeKind::SchemaDef, + LocalSymbolScopeKind::SchemaConfig => ScopeKind::SchemaConfig, + LocalSymbolScopeKind::Value => ScopeKind::Value, + } + } +} + +fn collect_scope_info( + scopes: &mut IndexMap, + scope_ref: &ScopeRef, + scope_data: &ScopeData, + symbol_data: &SymbolData, + kind: ScopeKind, +) { + if let Some(scope) = scope_data.get_scope(scope_ref) { + let kind = if let Some(scope) = scope_data.try_get_local_scope(scope_ref) { + scope.get_kind().clone().into() + } else { + kind + }; + scopes.insert( + scope_ref.clone(), + ScopeInfo { + kind, + parent: scope.get_parent(), + owner: scope.get_owner(), + children: scope.get_children(), + defs: scope + .get_all_defs(scope_data, symbol_data, None, false) + .values() + .copied() + .collect::>(), + }, + ); + for s in scope.get_children() { + collect_scope_info(scopes, &s, scope_data, symbol_data, ScopeKind::Module); + } + } +} diff --git a/kclvm/sema/src/advanced_resolver/mod.rs b/kclvm/sema/src/advanced_resolver/mod.rs index 13ffdcb2a..0f8dbc354 100644 --- a/kclvm/sema/src/advanced_resolver/mod.rs +++ b/kclvm/sema/src/advanced_resolver/mod.rs @@ -75,9 +75,9 @@ pub struct Context<'ctx> { cur_node: AstIndex, // whether the identifier currently being visited may be a definition - // it will only be true when visiting a lvalue or parameter, + // it will only be true when visiting a l-value or parameter, // which means advanced resolver will will create the corresponding - // ValueSymbol instead of an UnresolveSymbol + // ValueSymbol instead of an UnresolvedSymbol maybe_def: bool, } diff --git a/kclvm/sema/src/advanced_resolver/node.rs b/kclvm/sema/src/advanced_resolver/node.rs index df66bd6c5..dca600948 100644 --- a/kclvm/sema/src/advanced_resolver/node.rs +++ b/kclvm/sema/src/advanced_resolver/node.rs @@ -413,7 +413,7 @@ impl<'ctx> MutSelfTypedResultWalker<'ctx> for AdvancedResolver<'ctx> { .node_symbol_map .get(&self.ctx.get_node_key(&schema_attr.name.id))?; let parent_scope = *self.ctx.scopes.last().unwrap(); - let parent_scope = self.gs.get_scopes().get_scope(parent_scope).unwrap(); + let parent_scope = self.gs.get_scopes().get_scope(&parent_scope).unwrap(); let mut doc = None; if let Some(schema_symbol) = parent_scope.get_owner() { let schema_symbol = self.gs.get_symbols().get_symbol(schema_symbol).unwrap(); diff --git a/kclvm/sema/src/core/global_state.rs b/kclvm/sema/src/core/global_state.rs index 353e96be3..75655f4b9 100644 --- a/kclvm/sema/src/core/global_state.rs +++ b/kclvm/sema/src/core/global_state.rs @@ -76,7 +76,7 @@ impl GlobalState { scope_ref: ScopeRef, module_info: Option<&ModuleInfo>, ) -> Option { - match self.scopes.get_scope(scope_ref)?.look_up_def( + match self.scopes.get_scope(&scope_ref)?.look_up_def( name, &self.scopes, &self.symbols, @@ -107,7 +107,7 @@ impl GlobalState { pub fn look_up_scope(&self, pos: &Position) -> Option { let scopes = &self.scopes; for root_ref in scopes.root_map.values() { - if let Some(root) = scopes.get_scope(*root_ref) { + if let Some(root) = scopes.get_scope(root_ref) { if root.contains_pos(pos) { if let Some(inner_ref) = self.look_up_into_scope(*root_ref, pos) { return Some(inner_ref); @@ -169,7 +169,7 @@ impl GlobalState { /// all definition symbols in the scope pub fn get_all_defs_in_scope(&self, scope: ScopeRef) -> Option> { let scopes = &self.scopes; - let scope = scopes.get_scope(scope)?; + let scope = scopes.get_scope(&scope)?; let all_defs: Vec = scope .get_all_defs( scopes, @@ -208,7 +208,7 @@ impl GlobalState { Some(parent_scope_ref) => { let candidate_symbol = self.symbols.get_symbol(candidate?)?; let (start, _) = candidate_symbol.get_range(); - let parent_scope = self.scopes.get_scope(parent_scope_ref)?; + let parent_scope = self.scopes.get_scope(&parent_scope_ref)?; if parent_scope.contains_pos(&start) { let barrier_scope = self.look_up_closest_sub_scope(parent_scope_ref, pos); match barrier_scope { @@ -271,7 +271,7 @@ impl GlobalState { fn look_up_into_scope(&self, parent: ScopeRef, pos: &Position) -> Option { let candidate_ref = self.look_up_closest_sub_scope(parent, pos)?; - let candidate = self.scopes.get_scope(candidate_ref)?; + let candidate = self.scopes.get_scope(&candidate_ref)?; if candidate.contains_pos(pos) { if let Some(inner_ref) = self.look_up_into_scope(candidate_ref, pos) { return Some(inner_ref); diff --git a/kclvm/sema/src/core/scope.rs b/kclvm/sema/src/core/scope.rs index 360a79237..ac5010d04 100644 --- a/kclvm/sema/src/core/scope.rs +++ b/kclvm/sema/src/core/scope.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use indexmap::{IndexMap, IndexSet}; use kclvm_error::Position; +use serde::Serialize; use crate::core::symbol::SymbolRef; @@ -36,7 +37,7 @@ pub trait Scope { fn dump(&self, scope_data: &ScopeData, symbol_data: &Self::SymbolData) -> Option; } -#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, Serialize)] pub enum ScopeKind { Local, Root, @@ -48,6 +49,29 @@ pub struct ScopeRef { pub(crate) kind: ScopeKind, } +impl Serialize for ScopeRef { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let (index, generation) = self.id.into_raw_parts(); + let data = SerializableScopeRef { + i: index as u64, + g: generation, + kind: self.kind.clone(), + }; + data.serialize(serializer) + } +} + +#[derive(Debug, Clone, Serialize)] + +struct SerializableScopeRef { + i: u64, + g: u64, + kind: ScopeKind, +} + impl ScopeRef { pub fn get_id(&self) -> generational_arena::Index { self.id @@ -67,7 +91,12 @@ pub struct ScopeData { } impl ScopeData { - pub fn get_scope(&self, scope: ScopeRef) -> Option<&dyn Scope> { + #[inline] + pub fn get_root_scope_map(&self) -> &IndexMap { + &self.root_map + } + + pub fn get_scope(&self, scope: &ScopeRef) -> Option<&dyn Scope> { match scope.get_kind() { ScopeKind::Local => { Some(self.locals.get(scope.get_id())? as &dyn Scope) @@ -78,6 +107,13 @@ impl ScopeData { } } + pub fn try_get_local_scope(&self, scope: &ScopeRef) -> Option<&LocalSymbolScope> { + match scope.get_kind() { + ScopeKind::Local => Some(self.locals.get(scope.get_id())?), + ScopeKind::Root => None, + } + } + pub fn get_root_scope(&self, name: String) -> Option { self.root_map.get(&name).copied() } @@ -241,7 +277,7 @@ impl Scope for RootSymbolScope { for (index, (key, scopes)) in self.children.iter().enumerate() { output.push_str(&format!("\"{}\": [\n", key)); for (index, scope) in scopes.iter().enumerate() { - let scope = scope_data.get_scope(*scope)?; + let scope = scope_data.get_scope(scope)?; output.push_str(&format!("{}", scope.dump(scope_data, symbol_data)?)); if index + 1 < scopes.len() { output.push_str(",\n"); @@ -289,7 +325,6 @@ impl RootSymbolScope { } } -#[allow(unused)] #[derive(Debug, Clone)] pub struct LocalSymbolScope { pub(crate) parent: ScopeRef, @@ -303,7 +338,6 @@ pub struct LocalSymbolScope { pub(crate) kind: LocalSymbolScopeKind, } -#[allow(unused)] #[derive(Debug, Clone, PartialEq, Eq)] pub enum LocalSymbolScopeKind { List, @@ -358,7 +392,7 @@ impl Scope for LocalSymbolScope { return Some(symbol_ref); } }; - let parent = scope_data.get_scope(self.parent)?; + let parent = scope_data.get_scope(&self.parent)?; parent.look_up_def(name, scope_data, symbol_data, module_info) } } @@ -412,7 +446,7 @@ impl Scope for LocalSymbolScope { } } - if let Some(parent) = scope_data.get_scope(self.parent) { + if let Some(parent) = scope_data.get_scope(&self.parent) { for (name, def_ref) in parent.get_all_defs(scope_data, symbol_data, module_info, true) { @@ -468,7 +502,7 @@ impl Scope for LocalSymbolScope { output.push_str("\n],"); output.push_str("\n\"children\": [\n"); for (index, scope) in self.children.iter().enumerate() { - let scope = scope_data.get_scope(*scope)?; + let scope = scope_data.get_scope(scope)?; output.push_str(&format!("{}", scope.dump(scope_data, symbol_data)?)); if index + 1 < self.children.len() { output.push_str(",\n") @@ -502,10 +536,17 @@ impl LocalSymbolScope { } } + #[inline] + pub fn get_kind(&self) -> &LocalSymbolScopeKind { + &self.kind + } + + #[inline] pub fn add_child(&mut self, child: ScopeRef) { self.children.push(child) } + #[inline] pub fn set_owner(&mut self, owner: SymbolRef) { self.owner = Some(owner) } diff --git a/kclvm/spec/gpyrpc/gpyrpc.proto b/kclvm/spec/gpyrpc/gpyrpc.proto index bf50f2b98..0d9c17eb9 100644 --- a/kclvm/spec/gpyrpc/gpyrpc.proto +++ b/kclvm/spec/gpyrpc/gpyrpc.proto @@ -1,4 +1,4 @@ -// Copyright 2023 The KCL Authors. All rights reserved. +// Copyright The KCL Authors. All rights reserved. // // This file defines the request parameters and return structure of the KCL RPC server. @@ -122,6 +122,7 @@ message LoadPackage_Args { ParseProgram_Args parse_args = 1; bool resolve_ast = 2; bool load_builtin = 3; + bool with_ast_index = 4; } message LoadPackage_Result { @@ -129,8 +130,10 @@ message LoadPackage_Result { repeated string paths = 2; // Returns the files in the order they should be compiled map node_symbol_map = 3; // Map key is the AST index UUID string. map symbols = 4; // Map key is the SymbolIndex json string. - repeated Error parse_errors = 5; // Parse errors - repeated Error type_errors = 6; // Parse errors + map pkg_scope_map = 5; // Map key is the package path. + map scopes = 6; // Map key is the ScopeIndex json string. + repeated Error parse_errors = 7; // Parse errors + repeated Error type_errors = 8; // Type errors } message Symbol { @@ -142,12 +145,26 @@ message Symbol { bool is_global = 6; } +message Scope { + string kind = 1; + ScopeIndex parent = 2; + SymbolIndex owner = 3; + repeated ScopeIndex children = 4; + repeated SymbolIndex defs = 5; +} + message SymbolIndex { uint64 i = 1; uint64 g = 2; string kind = 3; } +message ScopeIndex { + uint64 i = 1; + uint64 g = 2; + string kind = 3; +} + message ExecProgram_Args { string work_dir = 1;