From 8c6038181585cbad5b06480b735056db7c09e1c3 Mon Sep 17 00:00:00 2001 From: Peefy Date: Wed, 17 Jan 2024 10:17:37 +0800 Subject: [PATCH] feat: semantic API for multiple language SDKs (#989) Signed-off-by: peefy --- kclvm/Cargo.lock | 36 +++++ kclvm/Cargo.toml | 1 + kclvm/api/Cargo.toml | 1 + kclvm/api/src/service/capi.rs | 22 +++ kclvm/api/src/service/into.rs | 38 ++++- kclvm/api/src/service/jsonrpc.rs | 8 ++ kclvm/api/src/service/service_impl.rs | 85 ++++++++++- kclvm/ast/src/ast.rs | 16 +++ kclvm/loader/Cargo.toml | 39 ++++++ kclvm/loader/src/lib.rs | 147 ++++++++++++++++++++ kclvm/parser/src/lib.rs | 12 +- kclvm/parser/src/session/mod.rs | 3 + kclvm/runner/src/lib.rs | 24 ++-- kclvm/sema/Cargo.toml | 1 + kclvm/sema/src/core/global_state.rs | 10 +- kclvm/sema/src/core/scope.rs | 12 +- kclvm/sema/src/core/semantic_information.rs | 2 +- kclvm/sema/src/core/symbol.rs | 62 ++++++--- kclvm/sema/src/namer/mod.rs | 8 +- kclvm/sema/src/resolver/scope.rs | 3 +- kclvm/spec/gpyrpc/gpyrpc.proto | 61 ++++++-- kclvm/tools/src/LSP/src/rename.rs | 4 +- kclvm/tools/src/LSP/src/util.rs | 2 +- 23 files changed, 528 insertions(+), 69 deletions(-) create mode 100644 kclvm/loader/Cargo.toml create mode 100644 kclvm/loader/src/lib.rs diff --git a/kclvm/Cargo.lock b/kclvm/Cargo.lock index 44cca7330..ad9274ab2 100644 --- a/kclvm/Cargo.lock +++ b/kclvm/Cargo.lock @@ -1465,6 +1465,7 @@ dependencies = [ "kclvm-config", "kclvm-driver", "kclvm-error", + "kclvm-loader", "kclvm-parser", "kclvm-query", "kclvm-runner", @@ -1495,6 +1496,7 @@ dependencies = [ "kclvm-config", "kclvm-driver", "kclvm-error", + "kclvm-loader", "kclvm-parser", "kclvm-query", "kclvm-runner", @@ -1652,6 +1654,39 @@ dependencies = [ "unic-emoji-char", ] +[[package]] +name = "kclvm-loader" +version = "0.7.4" +dependencies = [ + "anyhow", + "cc", + "chrono", + "compiler_base_macros", + "compiler_base_session", + "fslock", + "glob", + "indexmap 1.9.3", + "kclvm-ast", + "kclvm-compiler", + "kclvm-config", + "kclvm-driver", + "kclvm-error", + "kclvm-parser", + "kclvm-query", + "kclvm-runtime", + "kclvm-sema", + "kclvm-utils", + "kclvm-version", + "libc", + "libloading", + "once_cell", + "serde", + "serde_json", + "tempfile", + "threadpool", + "walkdir", +] + [[package]] name = "kclvm-macros" version = "0.7.4" @@ -1803,6 +1838,7 @@ dependencies = [ "petgraph", "phf", "regex", + "serde", "serde_json", "suggestions", "unicode_names2", diff --git a/kclvm/Cargo.toml b/kclvm/Cargo.toml index 5e7e67c21..9e815b1e0 100644 --- a/kclvm/Cargo.toml +++ b/kclvm/Cargo.toml @@ -37,6 +37,7 @@ kclvm-runner = {path = "./runner"} kclvm-parser = {path = "./parser"} kclvm-compiler = {path = "./compiler"} kclvm-config = {path = "./config"} +kclvm-loader = {path = "./loader"} kclvm-runtime = {path = "./runtime"} kclvm-sema = {path = "./sema"} kclvm-tools = {path = "./tools"} diff --git a/kclvm/api/Cargo.toml b/kclvm/api/Cargo.toml index 8f99ea1cd..b47f2c140 100644 --- a/kclvm/api/Cargo.toml +++ b/kclvm/api/Cargo.toml @@ -28,6 +28,7 @@ kclvm-config = {path = "../config"} kclvm-driver = {path = "../driver"} kclvm-error = {path = "../error"} kclvm-parser = {path = "../parser"} +kclvm-loader = {path = "../loader"} kclvm-sema = {path = "../sema"} kclvm-ast = {path = "../ast"} kclvm-ast-pretty = {path = "../ast_pretty"} diff --git a/kclvm/api/src/service/capi.rs b/kclvm/api/src/service/capi.rs index 98b2785eb..b4cd75d1d 100644 --- a/kclvm/api/src/service/capi.rs +++ b/kclvm/api/src/service/capi.rs @@ -107,6 +107,7 @@ pub(crate) fn kclvm_get_service_fn_ptr_by_name(name: &str) -> u64 { "KclvmService.Ping" => ping as *const () as u64, "KclvmService.ParseFile" => parse_file as *const () as u64, "KclvmService.ParseProgram" => parse_program as *const () as u64, + "KclvmService.LoadPackage" => load_package as *const () as u64, "KclvmService.ExecProgram" => exec_program as *const () as u64, "KclvmService.OverrideFile" => override_file as *const () as u64, "KclvmService.GetSchemaType" => get_schema_type as *const () as u64, @@ -170,6 +171,27 @@ pub(crate) fn parse_program(serv: *mut kclvm_service, args: *const c_char) -> *c call!(serv, args, ParseProgramArgs, parse_program) } +/// load_package provides users with the ability to parse kcl program and sematic model +/// information including symbols, types, definitions, etc, +/// +/// # Parameters +/// +/// `serv`: [*mut kclvm_service] +/// The pointer of &\[[KclvmServiceImpl]] +/// +/// +/// `args`: [*const c_char] +/// the items and compile parameters selected by the user in the KCL CLI +/// serialized as protobuf byte sequence +/// +/// # Returns +/// +/// result: [*const c_char] +/// Result of the call serialized as protobuf byte sequence +pub(crate) fn load_package(serv: *mut kclvm_service, args: *const c_char) -> *const c_char { + call!(serv, args, LoadPackageArgs, load_package) +} + /// exec_program provides users with the ability to execute KCL code /// /// # Parameters diff --git a/kclvm/api/src/service/into.rs b/kclvm/api/src/service/into.rs index d4f263fe0..4790a6f0a 100644 --- a/kclvm/api/src/service/into.rs +++ b/kclvm/api/src/service/into.rs @@ -1,6 +1,10 @@ -use crate::gpyrpc::{CliConfig, Error, KeyValuePair, LoadSettingsFilesResult, Message, Position}; +use crate::gpyrpc::{ + CliConfig, Error, KeyValuePair, LoadSettingsFilesResult, Message, Position, Symbol, SymbolIndex, +}; use kclvm_config::settings::SettingsFile; use kclvm_error::Diagnostic; +use kclvm_loader::SymbolInfo; +use kclvm_sema::core::symbol::SymbolRef; pub(crate) trait IntoLoadSettingsFiles { /// Convert self into the LoadSettingsFiles structure. @@ -11,6 +15,14 @@ pub(crate) trait IntoError { fn into_error(self) -> Error; } +pub(crate) trait IntoSymbolIndex { + fn into_symbol_index(self) -> SymbolIndex; +} + +pub(crate) trait IntoSymbol { + fn into_symbol(self) -> Symbol; +} + impl IntoLoadSettingsFiles for SettingsFile { fn into_load_settings_files(self, files: &[String]) -> LoadSettingsFilesResult { LoadSettingsFilesResult { @@ -65,3 +77,27 @@ impl IntoError for Diagnostic { } } } + +impl IntoSymbolIndex for SymbolRef { + fn into_symbol_index(self) -> SymbolIndex { + let (index, generation) = self.get_id().into_raw_parts(); + SymbolIndex { + i: index as u64, + g: generation as u64, + kind: format!("{:?}", self.get_kind()), + } + } +} + +impl IntoSymbol for SymbolInfo { + fn into_symbol(self) -> Symbol { + Symbol { + ty: self.ty.ty_str(), + name: self.name, + owner: self.owner.map(|o| o.into_symbol_index()), + def: self.def.map(|d| d.into_symbol_index()), + attrs: self.attrs.iter().map(|a| a.into_symbol_index()).collect(), + is_global: self.is_global, + } + } +} diff --git a/kclvm/api/src/service/jsonrpc.rs b/kclvm/api/src/service/jsonrpc.rs index 2dd5b1e7d..d86769bd7 100644 --- a/kclvm/api/src/service/jsonrpc.rs +++ b/kclvm/api/src/service/jsonrpc.rs @@ -83,6 +83,14 @@ fn register_kclvm_service(io: &mut IoHandler) { }; futures::future::ready(catch!(kclvm_service_impl, args, parse_program)) }); + io.add_method("KclvmService.LoadPackage", |params: Params| { + let kclvm_service_impl = KclvmServiceImpl::default(); + let args: LoadPackageArgs = match params.parse() { + Ok(val) => val, + Err(err) => return futures::future::ready(Err(err)), + }; + futures::future::ready(catch!(kclvm_service_impl, args, load_package)) + }); io.add_method("KclvmService.ExecProgram", |params: Params| { let kclvm_service_impl = KclvmServiceImpl::default(); let args: ExecProgramArgs = match params.parse() { diff --git a/kclvm/api/src/service/service_impl.rs b/kclvm/api/src/service/service_impl.rs index 13872800e..829c0d944 100644 --- a/kclvm/api/src/service/service_impl.rs +++ b/kclvm/api/src/service/service_impl.rs @@ -10,8 +10,10 @@ use anyhow::anyhow; use kcl_language_server::rename; use kclvm_config::settings::build_settings_pathbuf; use kclvm_driver::canonicalize_input_files; +use kclvm_loader::{load_packages, LoadPackageOptions}; use kclvm_parser::load_program; use kclvm_parser::parse_file; +use kclvm_parser::KCLModuleCache; use kclvm_parser::LoadProgramOptions; use kclvm_parser::ParseSession; use kclvm_query::get_schema_type; @@ -30,8 +32,7 @@ use kclvm_tools::vet::validator::LoaderKind; use kclvm_tools::vet::validator::ValidateOption; use tempfile::NamedTempFile; -use super::into::IntoError; -use super::into::IntoLoadSettingsFiles; +use super::into::*; use super::ty::kcl_schema_ty_to_pb_ty; use super::util::transform_str_para; @@ -75,7 +76,7 @@ impl KclvmServiceImpl { /// // File case /// let serv = KclvmServiceImpl::default(); /// let args = &ParseProgramArgs { - /// paths: vec![Path::new(".").join("src").join("testdata").join("test.k").canonicalize().unwrap().display().to_string(),], + /// paths: vec![Path::new(".").join("src").join("testdata").join("test.k").canonicalize().unwrap().display().to_string()], /// ..Default::default() /// }; /// let result = serv.parse_program(args).unwrap(); @@ -95,9 +96,10 @@ impl KclvmServiceImpl { Some(LoadProgramOptions { k_code_list: args.sources.clone(), package_maps, + load_plugins: true, ..Default::default() }), - None, + Some(KCLModuleCache::default()), )?; let ast_json = serde_json::to_string(&result.program)?; @@ -146,6 +148,81 @@ impl KclvmServiceImpl { }) } + /// load_package provides users with the ability to parse kcl program and sematic model + /// information including symbols, types, definitions, etc. + /// + /// # Examples + /// + /// ``` + /// use kclvm_api::service::service_impl::KclvmServiceImpl; + /// use kclvm_api::gpyrpc::*; + /// use std::path::Path; + /// + /// let serv = KclvmServiceImpl::default(); + /// let args = &LoadPackageArgs { + /// parse_args: Some(ParseProgramArgs { + /// paths: vec![Path::new(".").join("src").join("testdata").join("parse").join("main.k").canonicalize().unwrap().display().to_string()], + /// ..Default::default() + /// }), + /// resolve_ast: true, + /// ..Default::default() + /// }; + /// let result = serv.load_package(args).unwrap(); + /// assert_eq!(result.paths.len(), 3); + /// assert_eq!(result.parse_errors.len(), 0); + /// assert_eq!(result.type_errors.len(), 0); + /// assert_eq!(result.node_symbol_map.len(), 159); + /// assert_eq!(result.symbols.len(), 12); + /// ``` + pub fn load_package(&self, args: &LoadPackageArgs) -> anyhow::Result { + let mut package_maps = HashMap::new(); + let parse_args = args.parse_args.clone().unwrap_or_default(); + for p in &parse_args.external_pkgs { + package_maps.insert(p.pkg_name.to_string(), p.pkg_path.to_string()); + } + let packages = load_packages(&LoadPackageOptions { + paths: parse_args.paths, + load_opts: Some(LoadProgramOptions { + k_code_list: parse_args.sources.clone(), + package_maps, + load_plugins: true, + ..Default::default() + }), + resolve_ast: args.resolve_ast, + load_builtin: args.load_builtin, + })?; + let program_json = serde_json::to_string(&packages.program)?; + let mut node_symbol_map = HashMap::new(); + let mut symbols = HashMap::new(); + for (k, s) in packages.node_symbol_map { + node_symbol_map.insert(k.id.to_string(), s.into_symbol_index()); + } + for (k, s) in packages.symbols { + let symbol_index_string = serde_json::to_string(&k)?; + symbols.insert(symbol_index_string, s.into_symbol()); + } + Ok(LoadPackageResult { + program: program_json, + paths: packages + .paths + .iter() + .map(|p| p.to_str().unwrap().to_string()) + .collect(), + node_symbol_map, + symbols, + parse_errors: packages + .parse_errors + .into_iter() + .map(|e| e.into_error()) + .collect(), + type_errors: packages + .type_errors + .into_iter() + .map(|e| e.into_error()) + .collect(), + }) + } + /// Execute KCL file with args. **Note that it is not thread safe.** /// /// # Examples diff --git a/kclvm/ast/src/ast.rs b/kclvm/ast/src/ast.rs index e0579b427..d205e8fb0 100644 --- a/kclvm/ast/src/ast.rs +++ b/kclvm/ast/src/ast.rs @@ -82,11 +82,27 @@ impl Into for Pos { #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct AstIndex(uuid::Uuid); +impl Serialize for AstIndex { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_bytes(self.0.as_bytes()) + } +} + impl Default for AstIndex { fn default() -> Self { Self(uuid::Uuid::new_v4()) } } + +impl ToString for AstIndex { + fn to_string(&self) -> String { + self.0.to_string() + } +} + /// Node is the file, line and column number information /// that all AST nodes need to contain. /// In fact, column and end_column are the counts of character, diff --git a/kclvm/loader/Cargo.toml b/kclvm/loader/Cargo.toml new file mode 100644 index 000000000..e4777da4a --- /dev/null +++ b/kclvm/loader/Cargo.toml @@ -0,0 +1,39 @@ +[package] +name = "kclvm-loader" +version = "0.7.4" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[build-dependencies] +cc = "1.0" + +[dependencies] +serde_json = "1.0" +serde = { version = "1", features = ["derive"] } +glob = "0.3.0" +walkdir = "2" +libc = "0.2.112" +indexmap = "1.0" +fslock = "0.2.1" +libloading = "0.7.3" +threadpool = "1.0" +chrono = "0.4.19" +tempfile = "3.5.0" +anyhow = "1.0" +once_cell = "1.10" +cc = "1.0" +compiler_base_session = {path = "../../compiler_base/session"} +compiler_base_macros = "0.0.1" + +kclvm-ast = {path = "../ast"} +kclvm-parser = {path = "../parser"} +kclvm-compiler = {path = "../compiler"} +kclvm-config = {path = "../config"} +kclvm-runtime = {path = "../runtime"} +kclvm-sema = {path = "../sema"} +kclvm-version = {path = "../version"} +kclvm-error = {path = "../error"} +kclvm-query = {path = "../query"} +kclvm-utils = {path = "../utils"} +kclvm-driver = {path = "../driver"} diff --git a/kclvm/loader/src/lib.rs b/kclvm/loader/src/lib.rs new file mode 100644 index 000000000..d67d93162 --- /dev/null +++ b/kclvm/loader/src/lib.rs @@ -0,0 +1,147 @@ +use std::path::PathBuf; + +use anyhow::Result; +use indexmap::{IndexMap, IndexSet}; +use kclvm_ast::ast::Program; +use kclvm_error::{diagnostic::Range, Diagnostic}; +use kclvm_parser::{load_program, KCLModuleCache, LoadProgramOptions, ParseSessionRef}; +use kclvm_sema::{ + advanced_resolver::AdvancedResolver, + core::{global_state::GlobalState, symbol::SymbolRef}, + namer::Namer, + resolver::{resolve_program_with_opts, scope::NodeKey}, + ty::{Type, TypeRef}, +}; + +type Errors = IndexSet; + +#[derive(Debug, Clone)] +pub struct LoadPackageOptions { + pub paths: Vec, + pub load_opts: Option, + pub resolve_ast: bool, + pub load_builtin: bool, +} + +impl Default for LoadPackageOptions { + fn default() -> Self { + Self { + paths: Default::default(), + load_opts: Default::default(), + resolve_ast: true, + load_builtin: true, + } + } +} + +#[derive(Debug, Clone, Default)] +pub struct Packages { + /// AST Program + pub program: Program, + /// All compiled files in the package + pub paths: Vec, + /// All Parse errors + pub parse_errors: Errors, + // Type errors + pub type_errors: Errors, + // Symbol-Type mapping + pub symbols: IndexMap, + // AST Node-Symbol mapping + pub node_symbol_map: IndexMap, +} + +#[derive(Debug, Clone)] +pub struct SymbolInfo { + pub ty: TypeRef, + pub name: String, + pub range: Range, + pub owner: Option, + pub def: Option, + pub attrs: Vec, + pub is_global: bool, +} + +/// load_package provides users with the ability to parse kcl program and sematic model +/// information including symbols, types, definitions, etc. +pub fn load_packages(opts: &LoadPackageOptions) -> Result { + let module_cache = KCLModuleCache::default(); + let sess = ParseSessionRef::default(); + let paths: Vec<&str> = opts.paths.iter().map(|s| s.as_str()).collect(); + let parse_result = load_program( + sess.clone(), + &paths, + opts.load_opts.clone(), + Some(module_cache), + )?; + let parse_errors = parse_result.errors; + let (program, type_errors, gs) = if opts.resolve_ast { + let mut program = parse_result.program; + let prog_scope = resolve_program_with_opts( + &mut program, + kclvm_sema::resolver::Options { + merge_program: false, + type_erasure: false, + ..Default::default() + }, + None, + ); + let node_ty_map = prog_scope.node_ty_map; + let gs = Namer::find_symbols(&program, GlobalState::default()); + let gs = AdvancedResolver::resolve_program(&program, gs, node_ty_map.clone()); + (program, prog_scope.handler.diagnostics.clone(), gs) + } else { + ( + parse_result.program, + IndexSet::default(), + GlobalState::default(), + ) + }; + let mut packages = Packages { + program, + paths: parse_result.paths, + parse_errors, + type_errors, + symbols: IndexMap::new(), + node_symbol_map: IndexMap::new(), + }; + if !opts.resolve_ast { + return Ok(packages); + } + let symbols = gs.get_symbols(); + for path in &packages.paths { + let path_str = path + .to_str() + .ok_or(anyhow::anyhow!("path {} to str failed", path.display()))?; + if let Some(files) = gs.get_sema_db().get_file_sema(path_str) { + for symbol_ref in files.get_symbols() { + if let Some(symbol) = symbols.get_symbol(*symbol_ref) { + let def_ty = match symbol.get_definition() { + Some(def) => symbols + .get_symbol(def) + .unwrap() + .get_sema_info() + .ty + .clone() + .unwrap_or(Type::any_ref()), + None => symbol.get_sema_info().ty.clone().unwrap_or(Type::any_ref()), + }; + let info = SymbolInfo { + ty: def_ty, + range: symbol.get_range(), + name: symbol.get_name(), + owner: symbol.get_owner(), + def: symbol.get_definition(), + attrs: symbol.get_all_attributes(symbols, None), + is_global: symbol.is_global(), + }; + packages.symbols.insert(*symbol_ref, info); + let node_symbol_map = symbols.get_node_symbol_map(); + for (k, s) in &node_symbol_map { + packages.node_symbol_map.insert(k.clone(), *s); + } + } + } + } + } + Ok(packages) +} diff --git a/kclvm/parser/src/lib.rs b/kclvm/parser/src/lib.rs index fc67081e0..bcc6f6c80 100644 --- a/kclvm/parser/src/lib.rs +++ b/kclvm/parser/src/lib.rs @@ -12,7 +12,7 @@ mod tests; extern crate kclvm_error; use crate::entry::get_compile_entries_from_paths; -pub use crate::session::ParseSession; +pub use crate::session::{ParseSession, ParseSessionRef}; use compiler_base_macros::bug; use compiler_base_session::Session; use compiler_base_span::span::new_byte_pos; @@ -155,7 +155,7 @@ pub fn parse_file_force_errors(filename: &str, code: Option) -> Result, + sess: ParseSessionRef, filename: &str, code: Option, ) -> Result { @@ -203,7 +203,7 @@ pub fn parse_file_with_session( /// Parse a KCL file to the AST module with the parse session and the global session #[inline] pub fn parse_file_with_global_session( - sess: Arc, + sess: ParseSessionRef, filename: &str, code: Option, ) -> Result { @@ -299,7 +299,7 @@ impl Default for LoadProgramOptions { /// /// ``` pub fn load_program( - sess: Arc, + sess: ParseSessionRef, paths: &[&str], opts: Option, module_cache: Option, @@ -309,7 +309,7 @@ pub fn load_program( pub type KCLModuleCache = Arc>>; struct Loader { - sess: Arc, + sess: ParseSessionRef, paths: Vec, opts: LoadProgramOptions, missing_pkgs: Vec, @@ -319,7 +319,7 @@ struct Loader { impl Loader { fn new( - sess: Arc, + sess: ParseSessionRef, paths: &[&str], opts: Option, module_cache: Option>>>, diff --git a/kclvm/parser/src/session/mod.rs b/kclvm/parser/src/session/mod.rs index 859604601..e15ea170d 100644 --- a/kclvm/parser/src/session/mod.rs +++ b/kclvm/parser/src/session/mod.rs @@ -6,6 +6,9 @@ use kclvm_ast::token::Token; use kclvm_error::{Diagnostic, Handler, ParseError}; use kclvm_span::{BytePos, Loc, Span}; use std::{cell::RefCell, sync::Arc}; + +pub type ParseSessionRef = Arc; + /// ParseSession represents the data associated with a parse session such as the /// source map and the error handler. #[derive(Default, Clone)] diff --git a/kclvm/runner/src/lib.rs b/kclvm/runner/src/lib.rs index 550928aac..3a74ec330 100644 --- a/kclvm/runner/src/lib.rs +++ b/kclvm/runner/src/lib.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, path::Path, sync::Arc}; +use std::{collections::HashMap, path::Path}; use anyhow::{anyhow, bail, Result}; use assembler::KclvmLibAssembler; @@ -7,7 +7,7 @@ use kclvm_ast::{ MAIN_PKG, }; use kclvm_driver::{canonicalize_input_files, expand_input_files}; -use kclvm_parser::{load_program, ParseSession}; +use kclvm_parser::{load_program, KCLModuleCache, ParseSessionRef}; use kclvm_query::apply_overrides; use kclvm_runtime::{Context, PlanOptions, ValueRef}; use kclvm_sema::resolver::{ @@ -70,13 +70,19 @@ pub mod tests; /// // Result is the kcl in json format. /// let result = exec_program(sess, &args).unwrap(); /// ``` -pub fn exec_program(sess: Arc, args: &ExecProgramArgs) -> Result { +pub fn exec_program(sess: ParseSessionRef, args: &ExecProgramArgs) -> Result { // parse args from json string let opts = args.get_load_program_options(); let kcl_paths = expand_files(args)?; let kcl_paths_str = kcl_paths.iter().map(|s| s.as_str()).collect::>(); - let mut program = - load_program(sess.clone(), kcl_paths_str.as_slice(), Some(opts), None)?.program; + let module_cache = KCLModuleCache::default(); + let mut program = load_program( + sess.clone(), + kcl_paths_str.as_slice(), + Some(opts), + Some(module_cache), + )? + .program; apply_overrides( &mut program, &args.overrides, @@ -160,7 +166,7 @@ pub fn exec_program(sess: Arc, args: &ExecProgramArgs) -> Result, + sess: ParseSessionRef, mut program: Program, args: &ExecProgramArgs, ) -> Result { @@ -229,7 +235,7 @@ pub fn execute_module(mut m: Module) -> Result { }; execute( - Arc::new(ParseSession::default()), + ParseSessionRef::default(), prog, &ExecProgramArgs::default(), ) @@ -237,7 +243,7 @@ pub fn execute_module(mut m: Module) -> Result { /// Build a KCL program and generate a library artifact. pub fn build_program>( - sess: Arc, + sess: ParseSessionRef, args: &ExecProgramArgs, output: Option

, ) -> Result { @@ -326,7 +332,7 @@ fn temp_file(dir: &str) -> Result { // [`emit_compile_diag_to_string`] will emit compile diagnostics to string, including parsing and resolving diagnostics. fn emit_compile_diag_to_string( - sess: Arc, + sess: ParseSessionRef, scope: &ProgramScope, include_warnings: bool, ) -> Result<()> { diff --git a/kclvm/sema/Cargo.toml b/kclvm/sema/Cargo.toml index 44a6260d8..c86390c83 100644 --- a/kclvm/sema/Cargo.toml +++ b/kclvm/sema/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [dependencies] serde_json = "1.0" +serde = { version = "1", features = ["derive"] } generational-arena = "0.2.9" phf = { version = "0.9", features = ["macros"] } ahash = "0.7.2" diff --git a/kclvm/sema/src/core/global_state.rs b/kclvm/sema/src/core/global_state.rs index 31c68b2d6..b34069fff 100644 --- a/kclvm/sema/src/core/global_state.rs +++ b/kclvm/sema/src/core/global_state.rs @@ -5,14 +5,14 @@ use super::{ package::{ModuleInfo, PackageDB}, scope::{ScopeData, ScopeKind, ScopeRef}, semantic_information::{CachedLocation, CachedRange, FileSemanticInfo, SemanticDB}, - symbol::{KCLSymbolData, SymbolKind, SymbolRef}, + symbol::{SymbolData, SymbolKind, SymbolRef}, }; /// GlobalState is used to store semantic information of KCL source code #[derive(Default, Debug, Clone)] pub struct GlobalState { // store all allocated symbols - symbols: KCLSymbolData, + symbols: SymbolData, // store all allocated scopes scopes: ScopeData, // store package information for name mapping @@ -22,11 +22,11 @@ pub struct GlobalState { } impl GlobalState { - pub fn get_symbols(&self) -> &KCLSymbolData { + pub fn get_symbols(&self) -> &SymbolData { &self.symbols } - pub fn get_symbols_mut(&mut self) -> &mut KCLSymbolData { + pub fn get_symbols_mut(&mut self) -> &mut SymbolData { &mut self.symbols } @@ -64,7 +64,7 @@ impl GlobalState { /// the reference of scope which was allocated by [ScopeData] /// /// `module_info`: [Option<&ModuleInfo>] - /// the module import infomation + /// the module import information /// /// # Returns /// diff --git a/kclvm/sema/src/core/scope.rs b/kclvm/sema/src/core/scope.rs index 569395dd1..360a79237 100644 --- a/kclvm/sema/src/core/scope.rs +++ b/kclvm/sema/src/core/scope.rs @@ -5,7 +5,7 @@ use kclvm_error::Position; use crate::core::symbol::SymbolRef; -use super::{package::ModuleInfo, symbol::KCLSymbolData}; +use super::{package::ModuleInfo, symbol::SymbolData}; pub trait Scope { type SymbolData; @@ -67,13 +67,13 @@ pub struct ScopeData { } impl ScopeData { - pub fn get_scope(&self, scope: ScopeRef) -> Option<&dyn Scope> { + pub fn get_scope(&self, scope: ScopeRef) -> Option<&dyn Scope> { match scope.get_kind() { ScopeKind::Local => { - Some(self.locals.get(scope.get_id())? as &dyn Scope) + Some(self.locals.get(scope.get_id())? as &dyn Scope) } ScopeKind::Root => { - Some(self.roots.get(scope.get_id())? as &dyn Scope) + Some(self.roots.get(scope.get_id())? as &dyn Scope) } } } @@ -163,7 +163,7 @@ pub struct RootSymbolScope { } impl Scope for RootSymbolScope { - type SymbolData = KCLSymbolData; + type SymbolData = SymbolData; fn get_filename(&self) -> &str { &self.filename } @@ -316,7 +316,7 @@ pub enum LocalSymbolScopeKind { } impl Scope for LocalSymbolScope { - type SymbolData = KCLSymbolData; + type SymbolData = SymbolData; fn get_filename(&self) -> &str { &self.start.filename diff --git a/kclvm/sema/src/core/semantic_information.rs b/kclvm/sema/src/core/semantic_information.rs index 517e53c81..1dc3b03e7 100644 --- a/kclvm/sema/src/core/semantic_information.rs +++ b/kclvm/sema/src/core/semantic_information.rs @@ -12,7 +12,7 @@ pub struct SemanticDB { } impl SemanticDB { - pub fn get_file_sema(&self, file: &String) -> Option<&FileSemanticInfo> { + pub fn get_file_sema(&self, file: &str) -> Option<&FileSemanticInfo> { self.file_sema_map.get(file) } } diff --git a/kclvm/sema/src/core/symbol.rs b/kclvm/sema/src/core/symbol.rs index 1bb33a163..3709ac2c5 100644 --- a/kclvm/sema/src/core/symbol.rs +++ b/kclvm/sema/src/core/symbol.rs @@ -4,6 +4,7 @@ use generational_arena::Arena; use indexmap::{IndexMap, IndexSet}; use kclvm_error::{diagnostic::Range, Position}; +use serde::Serialize; use super::package::ModuleInfo; use crate::{ @@ -45,7 +46,7 @@ pub trait Symbol { fn full_dump(&self, data: &Self::SymbolData) -> Option; } -pub type KCLSymbol = dyn Symbol; +pub type KCLSymbol = dyn Symbol; #[derive(Debug, Clone, Default)] pub struct KCLSymbolSemanticInfo { pub ty: Option>, @@ -55,7 +56,7 @@ pub struct KCLSymbolSemanticInfo { pub(crate) const BUILTIN_STR_PACKAGE: &'static str = "@str"; #[derive(Default, Debug, Clone)] -pub struct KCLSymbolData { +pub struct SymbolData { pub(crate) values: Arena, pub(crate) packages: Arena, pub(crate) attributes: Arena, @@ -78,7 +79,7 @@ pub struct SymbolDB { pub(crate) symbol_ref_map: IndexMap, } -impl KCLSymbolData { +impl SymbolData { pub fn get_package_symbol(&self, id: SymbolRef) -> Option<&PackageSymbol> { if matches!(id.get_kind(), SymbolKind::Package) { self.packages.get(id.get_id()) @@ -95,7 +96,7 @@ impl KCLSymbolData { } } - pub fn get_attribue_symbol(&self, id: SymbolRef) -> Option<&AttributeSymbol> { + pub fn get_attribute_symbol(&self, id: SymbolRef) -> Option<&AttributeSymbol> { if matches!(id.get_kind(), SymbolKind::Attribute) { self.attributes.get(id.get_id()) } else { @@ -618,9 +619,14 @@ impl KCLSymbolData { self.exprs.get_mut(symbol_id).unwrap().id = Some(symbol_ref); Some(symbol_ref) } + + #[inline] + pub fn get_node_symbol_map(&self) -> IndexMap { + self.symbols_info.node_symbol_map.clone() + } } -#[allow(unused)] -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] pub enum SymbolKind { Schema, Attribute, @@ -631,18 +637,42 @@ pub enum SymbolKind { Rule, Expression, } -#[allow(unused)] + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct SymbolRef { pub(crate) id: generational_arena::Index, pub(crate) kind: SymbolKind, } +impl Serialize for SymbolRef { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let (index, generation) = self.id.into_raw_parts(); + let data = SerializableSymbolRef { + i: index as u64, + g: generation, + kind: self.kind.clone(), + }; + data.serialize(serializer) + } +} + +#[derive(Debug, Clone, Serialize)] + +struct SerializableSymbolRef { + i: u64, + g: u64, + kind: SymbolKind, +} + impl SymbolRef { + #[inline] pub fn get_kind(&self) -> SymbolKind { self.kind } - + #[inline] pub fn get_id(&self) -> generational_arena::Index { self.id } @@ -664,7 +694,7 @@ pub struct SchemaSymbol { } impl Symbol for SchemaSymbol { - type SymbolData = KCLSymbolData; + type SymbolData = SymbolData; type SemanticInfo = KCLSymbolSemanticInfo; fn is_global(&self) -> bool { @@ -861,7 +891,7 @@ pub struct ValueSymbol { } impl Symbol for ValueSymbol { - type SymbolData = KCLSymbolData; + type SymbolData = SymbolData; type SemanticInfo = KCLSymbolSemanticInfo; fn is_global(&self) -> bool { @@ -989,7 +1019,7 @@ pub struct AttributeSymbol { } impl Symbol for AttributeSymbol { - type SymbolData = KCLSymbolData; + type SymbolData = SymbolData; type SemanticInfo = KCLSymbolSemanticInfo; fn is_global(&self) -> bool { @@ -1112,7 +1142,7 @@ pub struct PackageSymbol { } impl Symbol for PackageSymbol { - type SymbolData = KCLSymbolData; + type SymbolData = SymbolData; type SemanticInfo = KCLSymbolSemanticInfo; fn is_global(&self) -> bool { @@ -1232,7 +1262,7 @@ pub struct TypeAliasSymbol { } impl Symbol for TypeAliasSymbol { - type SymbolData = KCLSymbolData; + type SymbolData = SymbolData; type SemanticInfo = KCLSymbolSemanticInfo; fn is_global(&self) -> bool { @@ -1356,7 +1386,7 @@ pub struct RuleSymbol { } impl Symbol for RuleSymbol { - type SymbolData = KCLSymbolData; + type SymbolData = SymbolData; type SemanticInfo = KCLSymbolSemanticInfo; fn is_global(&self) -> bool { @@ -1483,7 +1513,7 @@ pub struct UnresolvedSymbol { } impl Symbol for UnresolvedSymbol { - type SymbolData = KCLSymbolData; + type SymbolData = SymbolData; type SemanticInfo = KCLSymbolSemanticInfo; fn is_global(&self) -> bool { @@ -1618,7 +1648,7 @@ pub struct ExpressionSymbol { } impl Symbol for ExpressionSymbol { - type SymbolData = KCLSymbolData; + type SymbolData = SymbolData; type SemanticInfo = KCLSymbolSemanticInfo; fn is_global(&self) -> bool { diff --git a/kclvm/sema/src/namer/mod.rs b/kclvm/sema/src/namer/mod.rs index e16f6c21e..965d02670 100644 --- a/kclvm/sema/src/namer/mod.rs +++ b/kclvm/sema/src/namer/mod.rs @@ -54,6 +54,8 @@ use kclvm_ast::walker::MutSelfTypedResultWalker; use kclvm_error::Position; mod node; +pub const BUILTIN_SYMBOL_PKG_PATH: &str = "@builtin"; + pub struct Namer<'ctx> { gs: GlobalState, ctx: NamerContext<'ctx>, @@ -97,8 +99,10 @@ impl<'ctx> Namer<'ctx> { // serial namer pass pub fn find_symbols(program: &'ctx Program, gs: GlobalState) -> GlobalState { let mut namer = Self::new(program, gs); - namer.ctx.current_package_info = - Some(PackageInfo::new("@builtin".to_string(), "".to_string())); + namer.ctx.current_package_info = Some(PackageInfo::new( + BUILTIN_SYMBOL_PKG_PATH.to_string(), + "".to_string(), + )); namer.init_builtin_symbols(); for (name, modules) in namer.ctx.program.pkgs.iter() { diff --git a/kclvm/sema/src/resolver/scope.rs b/kclvm/sema/src/resolver/scope.rs index 0a6af6cf1..dcb7a3623 100644 --- a/kclvm/sema/src/resolver/scope.rs +++ b/kclvm/sema/src/resolver/scope.rs @@ -23,6 +23,7 @@ use kclvm_ast::ast::AstIndex; use kclvm_ast::pos::ContainsPos; use kclvm_ast::pos::GetPos; use kclvm_error::Position; +use serde::Serialize; /// The object stored in the scope. #[derive(PartialEq, Clone, Debug)] @@ -487,7 +488,7 @@ impl<'ctx> Resolver<'ctx> { } } -#[derive(Clone, Debug, Hash, PartialEq, Eq)] +#[derive(Clone, Debug, Hash, PartialEq, Eq, Serialize)] pub struct NodeKey { pub pkgpath: String, pub id: AstIndex, diff --git a/kclvm/spec/gpyrpc/gpyrpc.proto b/kclvm/spec/gpyrpc/gpyrpc.proto index 902110824..bf50f2b98 100644 --- a/kclvm/spec/gpyrpc/gpyrpc.proto +++ b/kclvm/spec/gpyrpc/gpyrpc.proto @@ -31,8 +31,8 @@ message CmdOverrideSpec { // ---------------------------------------------------------------------------- message Error { - string level = 1; - string code = 2; + string level = 1; + string code = 2; repeated Message messages = 3; } @@ -59,6 +59,7 @@ service KclvmService { rpc ParseFile(ParseFile_Args) returns(ParseFile_Result); rpc ParseProgram(ParseProgram_Args) returns(ParseProgram_Result); + rpc LoadPackage(LoadPackage_Args) returns(LoadPackage_Result); rpc FormatCode(FormatCode_Args) returns(FormatCode_Result); rpc FormatPath(FormatPath_Args) returns(FormatPath_Result); @@ -112,11 +113,41 @@ message ParseProgram_Args { } message ParseProgram_Result { - string ast_json = 1; // JSON string value - repeated string paths = 2; // Return the files in the order they should be compiled + string ast_json = 1; // JSON string value + repeated string paths = 2; // Returns the files in the order they should be compiled repeated Error errors = 3; // Parse errors } +message LoadPackage_Args { + ParseProgram_Args parse_args = 1; + bool resolve_ast = 2; + bool load_builtin = 3; +} + +message LoadPackage_Result { + string program = 1; // JSON string value + repeated string paths = 2; // Returns the files in the order they should be compiled + map node_symbol_map = 3; // Map key is the AST index UUID string. + map symbols = 4; // Map key is the SymbolIndex json string. + repeated Error parse_errors = 5; // Parse errors + repeated Error type_errors = 6; // Parse errors +} + +message Symbol { + string ty = 1; + string name = 2; + SymbolIndex owner = 3; + SymbolIndex def = 4; + repeated SymbolIndex attrs = 5; + bool is_global = 6; +} + +message SymbolIndex { + uint64 i = 1; + uint64 g = 2; + string kind = 3; +} + message ExecProgram_Args { string work_dir = 1; @@ -277,7 +308,7 @@ message LoadSettingsFiles_Result { } message CliConfig { - repeated string files = 1; + repeated string files = 1; string output = 2; repeated string overrides = 3; repeated string path_selector = 4; @@ -300,14 +331,14 @@ message KeyValuePair { // --------------------------------------------------------------------------------- message Rename_Args { - string package_root = 1; // the file path to the package root - string symbol_path = 2; // the path to the target symbol to be renamed. The symbol path should conform to format: `:` When the pkgpath is '__main__', `:` can be omitted. - repeated string file_paths = 3; // the paths to the source code files - string new_name = 4; // the new name of the symbol + string package_root = 1; // the file path to the package root + string symbol_path = 2; // the path to the target symbol to be renamed. The symbol path should conform to format: `:` When the pkgpath is '__main__', `:` can be omitted. + repeated string file_paths = 3; // the paths to the source code files + string new_name = 4; // the new name of the symbol } message Rename_Result { - repeated string changed_files = 1; // the file paths got changed + repeated string changed_files = 1; // the file paths got changed } // --------------------------------------------------------------------------------- @@ -316,14 +347,14 @@ message Rename_Result { // --------------------------------------------------------------------------------- message RenameCode_Args { - string package_root = 1; // the file path to the package root - string symbol_path = 2; // the path to the target symbol to be renamed. The symbol path should conform to format: `:` When the pkgpath is '__main__', `:` can be omitted. - map source_codes = 3; // the source code. a : map - string new_name = 4; // the new name of the symbol + string package_root = 1; // the file path to the package root + string symbol_path = 2; // the path to the target symbol to be renamed. The symbol path should conform to format: `:` When the pkgpath is '__main__', `:` can be omitted. + map source_codes = 3; // the source code. a : map + string new_name = 4; // the new name of the symbol } message RenameCode_Result { - map changed_codes = 1; // the changed code. a : map + map changed_codes = 1; // the changed code. a : map } // --------------------------------------------------------------------------------- diff --git a/kclvm/tools/src/LSP/src/rename.rs b/kclvm/tools/src/LSP/src/rename.rs index ec8ff847f..78ea05218 100644 --- a/kclvm/tools/src/LSP/src/rename.rs +++ b/kclvm/tools/src/LSP/src/rename.rs @@ -7,7 +7,7 @@ use anyhow::{anyhow, Result}; use chumsky::chain::Chain; use kclvm_ast::ast::{self, Program}; use kclvm_error::diagnostic; -use kclvm_parser::{load_program, LoadProgramOptions, ParseSession}; +use kclvm_parser::{load_program, LoadProgramOptions, ParseSessionRef}; use kclvm_query::selector::parse_symbol_selector_spec; use kclvm_sema::{ advanced_resolver::AdvancedResolver, core::global_state::GlobalState, namer::Namer, @@ -185,7 +185,7 @@ where }; let files: Vec<&str> = file_paths.iter().map(|s| s.as_str()).collect(); - let sess: Arc = Arc::new(ParseSession::default()); + let sess: ParseSessionRef = ParseSessionRef::default(); let mut program = load_program(sess.clone(), &files, Some(opt), None)?.program; let prog_scope = resolve_program_with_opts( diff --git a/kclvm/tools/src/LSP/src/util.rs b/kclvm/tools/src/LSP/src/util.rs index 990d95f7e..f39879c02 100644 --- a/kclvm/tools/src/LSP/src/util.rs +++ b/kclvm/tools/src/LSP/src/util.rs @@ -938,7 +938,7 @@ fn line_to_words(text: String, prune: bool) -> HashMap> { } let is_id_start = rustc_lexer::is_id_start(*ch); let is_id_continue = rustc_lexer::is_id_continue(*ch); - // If the character is valid identfier start and the previous character is not valid identifier continue, mark the start position. + // If the character is valid identifier start and the previous character is not valid identifier continue, mark the start position. if is_id_start && !prev_word { start_pos = i; }