From e9b1d6c12eda79f7744c32542aaebe8f97a82e3e Mon Sep 17 00:00:00 2001 From: Kornel Horvath Date: Wed, 9 Oct 2024 17:56:52 +0200 Subject: [PATCH] Add option for cache management Add a configuration setting to change the storage mode for large cache tables (mainly used for code navigation). Available modes: - `memory` (default, original): use ETS tables - `compressed memory`: use ETS tables with `compressed` flag turned on. - `file`: use DETS tables saved into temporary files. Why is this configuration option needed? In shared environment, where many instances of `vscode_erlang` run in the same time on large code bases, noticeable amount of memory can be consumed by this extension that eventually can exhaust all the memory. Notes for `compressed memory`: Some simple tests, done in the above explained scenario, showed memory consumption dropped by 50% when `compressed memory` is configured. Notes for `file`: As multiple extension instances can run in the same time on the same host, even in the same workspace, we cannot use static filenames, nor workspace specific filenames to create DETS tables. The only option is to use unique filenames, like temporary files. Of course, it has a drawback, temporary files must be deleted on exit otherwise those will consume a lot of disk space after a while. If the extension is shut down normally, it's not a problem, the extension deletes it's own cache. However, if some extension is terminated brutally, the cache files left on the disk. Hence, an automatic mechanism is hooked to the points when configuration is received from Visual Studio Code, and to normal shutdown, to look for cache directories created by yet not running extension, and delete them. Reusing old cache files is theoretically possible if only one extension instance run in a workspace, VSCode is closed and reopened, then the new extension instance could continue where the previous one stopped. But, as multiple extension instances can run in the same workspace, it can easily go wrong. Therefore it is simpler to use instance specific DETS files, just like instance specific ETS tables. --- README.md | 1 + .../src/gen_lsp_config_server.erl | 10 +- apps/erlangbridge/src/gen_lsp_doc_server.erl | 217 ++++++++++++++++-- apps/erlangbridge/src/lsp_handlers.erl | 8 + apps/erlangbridge/src/vscode_lsp_app.erl | 1 + lib/ErlangConfigurationProvider.ts | 1 + lib/GenericShell.ts | 2 + lib/erlangSettings.ts | 1 + lib/lsp/ErlangShellLSP.ts | 16 ++ lib/lsp/lspclientextension.ts | 3 +- package.json | 17 +- 11 files changed, 249 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index eb0f610..372d778 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,7 @@ Support for Erlang tools, including rebar3, EUnit and Dialyzer - `erlang.includePaths` - Include paths are read from rebar.config, and also standard set of paths is used. This setting is for special cases when the default behaviour is not enough - `erlang.linting` - Enable/disable dynamic validation of opened Erlang source files - `erlang.codeLensEnabled` - Enable/Disable CodeLens +- `erlang.cacheManagement` - Specify where and how to store large cache tables - `erlang.inlayHintsEnabled` - Enable/Disable InlayHints - `erlang.verbose` - Activate technical traces for use in the extension development diff --git a/apps/erlangbridge/src/gen_lsp_config_server.erl b/apps/erlangbridge/src/gen_lsp_config_server.erl index c9a53af..f0e09cd 100644 --- a/apps/erlangbridge/src/gen_lsp_config_server.erl +++ b/apps/erlangbridge/src/gen_lsp_config_server.erl @@ -4,7 +4,7 @@ %% API -export([start_link/0]). -export([standard_modules/0, bifs/0]). --export([update_config/2, root/0, tmpdir/0, codeLensEnabled/0, includePaths/0, linting/0, +-export([update_config/2, root/0, tmpdir/0, username/0, codeLensEnabled/0, includePaths/0, linting/0, verbose/0, autosave/0, proxy/0, search_files_exclude/0, search_exclude/0, formatting_line_length/0, inlayHintsEnabled/0, verbose_is_include/1]). @@ -78,6 +78,9 @@ proxy() -> tmpdir() -> get_config_entry(computed, tmpdir, ""). +username() -> + get_config_entry(computed, username, ""). + %%-------------------------------------------------------------------- %% @doc Exclude filters for search in workspace. %% @@ -125,6 +128,7 @@ init(_Args) -> BIFs = sets:to_list(lists:foldl(fun ({Name, _Arity}, Acc) -> sets:add_element(atom_to_list(Name), Acc) end, sets:new(), erlang:module_info(exports))), + process_flag(trap_exit, true), % to terminate/2 be called at exit {ok, #state{config = #{}, standard_modules = StandardModules, bifs = BIFs}}. handle_call({standard_modules}, _From, State) -> @@ -144,6 +148,10 @@ handle_cast(stop, State) -> handle_info(_Info, State) -> {noreply, State}. +terminate(_Reason, #state{config = #{computed := #{tmpdir := TmpDir, username := UserName}}}) -> + %% Delete old caches left there by brutally killed extension instances + gen_lsp_doc_server:delete_unused_caches(TmpDir, UserName), + ok; terminate(_Reason, _State) -> ok. diff --git a/apps/erlangbridge/src/gen_lsp_doc_server.erl b/apps/erlangbridge/src/gen_lsp_doc_server.erl index c26a289..2aa7c28 100644 --- a/apps/erlangbridge/src/gen_lsp_doc_server.erl +++ b/apps/erlangbridge/src/gen_lsp_doc_server.erl @@ -9,12 +9,17 @@ -export([get_syntax_tree/1, get_dodged_syntax_tree/1, get_references/1, get_inlayhints/1]). -export([root_available/0, config_change/0, project_modules/0, get_module_file/1, get_module_files/1, get_build_dir/0, find_source_file/1]). +%% Cache management +-export([delete_unused_caches/2, + persist_cache_mgmt_opts/0]). + %% gen_server callbacks -export([init/1,handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). -include("./lsp_log.hrl"). -define(SERVER, ?MODULE). +-define(XETS, (persistent_term:get(large_cache_module, ets))). -define(IIF(Cond, Then, Else), if Cond -> Then; true -> Else end). -record(state, @@ -24,19 +29,27 @@ }). document_opened(File, Contents) -> - ets:insert(document_contents, {File, Contents}). + ?XETS:insert(document_contents, {File, Contents}). document_changed(File, Contents) -> - ets:insert(document_contents, {File, Contents}). + ?XETS:insert(document_contents, {File, Contents}). document_closed(File) -> - ets:delete(document_contents, File). + ?XETS:delete(document_contents, File). opened_documents() -> - [File || {File, _Contents} <- ets:tab2list(document_contents)]. + do_opened_documents(?XETS). + +do_opened_documents(ets) -> + [File || {File, _Contents} <- ets:tab2list(document_contents)]; +do_opened_documents(dets) -> + dets:traverse(document_contents, + fun({File, _Contents}) -> {continue, File}; + (_) -> continue + end). get_document_contents(File) -> - case ets:lookup(document_contents, File) of + case ?XETS:lookup(document_contents, File) of [{File, Contents}] -> Contents; _ -> undefined end. @@ -87,10 +100,10 @@ get_references(Reference) -> ets:match(references, {'$1', Reference, '$2', '$3', '$4'}). get_inlayhints(File) -> - case ets:lookup(document_inlayhints, File) of + case ?XETS:lookup(document_inlayhints, File) of [{File, Inlays}] -> Inlays; _ -> [] - end. + end. root_available() -> gen_server:cast(?SERVER, root_available). @@ -138,14 +151,16 @@ as_string(Text) -> Text. start_link() -> - safe_new_table(document_contents, set), - safe_new_table(syntax_tree, set), - safe_new_table(dodged_syntax_tree, set), - safe_new_table(references, bag), - safe_new_table(document_inlayhints, set), + ExtraCreateOpts = persistent_term:get(large_cache_create_opts, []), + safe_new_table(document_contents, ?XETS, set, ExtraCreateOpts), + safe_new_table(syntax_tree, ?XETS, set, ExtraCreateOpts), + safe_new_table(dodged_syntax_tree, ?XETS, set, ExtraCreateOpts), + safe_new_table(references, ets, bag, []), + safe_new_table(document_inlayhints, ?XETS, set, ExtraCreateOpts), gen_server:start_link({local, ?SERVER}, ?MODULE, [],[]). init(_Args) -> + process_flag(trap_exit, true), % to terminate/2 be called at exit {ok, #state{root_available = false, project_modules = #{}, files_to_parse = []}}. handle_call(project_modules, _From, State) -> @@ -181,7 +196,7 @@ handle_cast({project_file_added, File}, State) -> {noreply, parse_next_file_in_background(UpdatedState)}; handle_cast({project_file_changed, File}, State) -> - case ets:lookup(document_contents, File) of + case ?XETS:lookup(document_contents, File) of [_FileContents] -> {noreply, State}; _ -> @@ -202,6 +217,10 @@ handle_info(_Info, State) -> {noreply, State}. terminate(_Reason, _State) -> + delete_cache_file(document_contents), + delete_cache_file(syntax_tree), + delete_cache_file(dodged_syntax_tree), + delete_cache_file(document_inlayhints), ok. code_change(_OldVersion, State, _Extra) -> @@ -399,11 +418,11 @@ scan_project_files(State = #state{project_modules = OldProjectModules}) -> delete_project_files([], State) -> State; delete_project_files([File | Files], State) -> - ets:delete(document_contents, File), - ets:delete(syntax_tree, File), - ets:delete(dodged_syntax_tree, File), + ?XETS:delete(document_contents, File), + ?XETS:delete(syntax_tree, File), + ?XETS:delete(dodged_syntax_tree, File), ets:delete(references, File), - ets:delete(document_inlayhints, File), + ?XETS:delete(document_inlayhints, File), Module = filename:rootname(filename:basename(File)), UpdatedFiles = lists:delete(File, maps:get(Module, State#state.project_modules, [])), UpdatedProjectModules = case UpdatedFiles of @@ -413,10 +432,48 @@ delete_project_files([File | Files], State) -> NewState = State#state{project_modules = UpdatedProjectModules}, delete_project_files(Files, NewState). -safe_new_table(Name, Type) -> +%%-------------------------------------------------------------------- +%% @private +%% @doc Create a new ETS or DETS table owned by the supervisor. +%% +%% This function is called by {@link start_link/0} that is called from the +%% supervisor process, therefore created ETS and DETS tables are owned by the +%% supervisor instead of the worker `gen_server' process. And so, if the worker +%% process is crashed and restarted then data is still available in the original +%% table. +%% @end +%%-------------------------------------------------------------------- +safe_new_table(Name, ets, Type, ExtraCreateOpts) -> case ets:whereis(Name) of - undefined -> ets:new(Name, [Type, named_table, public]); - _ -> Name + undefined -> + ets:new(Name, [Type, named_table, public | ExtraCreateOpts]), + Name; + _ -> + %% Supervisor still holds the ETS table + Name + end; +safe_new_table(Name, dets, Type, ExtraCreateOpts) -> + case dets:info(Name, filename) of + undefined -> + CacheDir = persistent_term:get(large_cache_dets_dir), + FileName = filename:join(CacheDir, atom_to_list(Name)++".dets"), + OpenOpts = [{type, Type}, {file, FileName} | ExtraCreateOpts], + filelib:ensure_dir(FileName), + dets:open_file(Name, OpenOpts), + % dets:delete_all_objects(Name), + Name; + _ -> + %% Supervisor still holds the DETS table open + Name + end. + +delete_cache_file(Name) -> + case dets:info(Name, filename) of + FileName -> + dets:close(Name), + file:delete(FileName); + _ -> + ok end. parse_and_store(File, ContentsFile) -> @@ -425,21 +482,21 @@ parse_and_store(File, ContentsFile) -> undefined -> ok; _ -> - ets:insert(syntax_tree, {File, SyntaxTree}), + ?XETS:insert(syntax_tree, {File, SyntaxTree}), ets:delete(references, File), - ets:delete(document_inlayhints, File), + ?XETS:delete(document_inlayhints, File), lsp_navigation:fold_references(fun (Reference, Line, Column, End, _) -> ets:insert(references, {File, Reference, Line, Column, End}) end, undefined, File, SyntaxTree), - ets:insert(document_inlayhints, {File, lsp_navigation:full_inlayhints_info(File,SyntaxTree, DodgedSyntaxTree)}) + ?XETS:insert(document_inlayhints, {File, lsp_navigation:full_inlayhints_info(File,SyntaxTree, DodgedSyntaxTree)}) end, case DodgedSyntaxTree of undefined -> ok; - _ -> ets:insert(dodged_syntax_tree, {File, DodgedSyntaxTree}) + _ -> ?XETS:insert(dodged_syntax_tree, {File, DodgedSyntaxTree}) end. get_tree(TreeType, File) -> - case ets:lookup(TreeType, File) of + case ?XETS:lookup(TreeType, File) of [{File, SyntaxTree}] -> SyntaxTree; _ -> @@ -473,3 +530,113 @@ find_module_files(Module, State) -> -spec find_module_files_under_dir(module(), file:filename()) -> [file:filename()]. find_module_files_under_dir(Module, Dir) -> filelib:wildcard(Dir ++ "/**/" ++ atom_to_list(Module) ++ ".erl"). + +%%%------------------------------------------------------------------- +%%% Cache management +%%%------------------------------------------------------------------- + +persist_cache_mgmt_opts() -> + do_persist_cache_mgmt_opts(init:get_argument(vscode_cache_mgmt)). + +do_persist_cache_mgmt_opts({ok, [["memory"]]}) -> + persistent_term:put(large_cache_module, ets), + persistent_term:put(large_cache_create_opts, []); +do_persist_cache_mgmt_opts({ok, [["memory", "compressed"]]}) -> + persistent_term:put(large_cache_module, ets), + persistent_term:put(large_cache_create_opts, [compressed]); +do_persist_cache_mgmt_opts({ok, [["file", UserName, TmpDir]]}) -> + persistent_term:put(large_cache_module, dets), + persistent_term:put(large_cache_create_opts, []), + persistent_term:put(large_cache_dets_dir, cache_dir(TmpDir, UserName, os:getpid())); +do_persist_cache_mgmt_opts(_) -> + do_persist_cache_mgmt_opts({ok, [["memory"]]}). + +cache_dir(TmpDir, UserName, OsPid) -> + filename:join(cache_basedir(TmpDir, UserName), OsPid). + +cache_basedir(TmpDir, UserName) -> + filename:join([TmpDir, "vscode_erlang_"++UserName, "cache"]). + +%%-------------------------------------------------------------------- +%% @doc Delete all cache directories that are not in use any more. +%% It practically means, delete caches of those extension instances that were +%% terminated without executing proper cleanup (e.g. killed by OS). +%% @end +%%-------------------------------------------------------------------- +-spec delete_unused_caches(TmpDir :: string(), UserName :: string()) -> ok. +delete_unused_caches(TmpDir, UserName) when is_binary(TmpDir) -> + delete_unused_caches(binary_to_list(TmpDir), UserName); +delete_unused_caches(TmpDir, UserName) when is_binary(UserName) -> + delete_unused_caches(TmpDir, binary_to_list(UserName)); +delete_unused_caches(TmpDir = [_|_], UserName = [_|_]) -> + try + CacheOsPids = get_cache_os_pids(TmpDir, UserName), + %% NOTE: a non-Erlang process may exists with the same PID as an old + %% extension instance. + DeadCacheOsPids = filter_non_existent_os_pids(CacheOsPids), + do_delete_unused_caches(TmpDir, UserName, DeadCacheOsPids) + catch Class:Reason:StackTrace -> + error_logger:error_report([{Class, Reason}, {stacktrace, StackTrace}]) + end; +delete_unused_caches(_TmpDir, _UserName) -> + ok. + +do_delete_unused_caches(TmpDir, UserName, OsPidsToRemove) -> + lists:foreach( + fun(OsPid) -> + file:del_dir_r(cache_dir(TmpDir, UserName, OsPid)) + end, + OsPidsToRemove). + +%% Return OS PIDs of Erlang VMs, executing extension instances by the current +%% user, that did create cache directories, regardless if the processes are +%% alive or not. +-spec get_cache_os_pids(TmpDir :: string(), UserName :: string()) + -> OsPids :: [string()]. +get_cache_os_pids(TmpDir, UserName) -> + CacheBaseDir = cache_basedir(TmpDir, UserName), + case file:list_dir(CacheBaseDir) of + {ok, Filenames} -> + lists:filter( + fun(FN) -> filelib:is_dir(filename:join(CacheBaseDir, FN)) end, + Filenames); + _ -> + [] + end. + +%% Return OS PIDs that do not belong to any live OS process. +-spec filter_non_existent_os_pids(OsPids) -> OsPids + when OsPids :: [string()]. +filter_non_existent_os_pids([]) -> + []; +filter_non_existent_os_pids(OsPids) -> + case os:type() of + {win32,_} -> filter_non_existent_win32_pids(OsPids); + {unix, _} -> filter_non_existent_unix_pids(OsPids) + end. + +filter_non_existent_unix_pids(OsPids) -> + lists:filter( + fun(OsPid) -> not filelib:is_dir("/proc/" ++ OsPid) end, + OsPids). + +filter_non_existent_win32_pids(OsPids) -> + %% In Windows there is no similar thing like /proc/ filesystem entries + %% in Unix systems, but it's still possible to list all running processes. + WindowsPids = get_win32_pids(), + lists:filter( + fun(OsPid) -> not maps:is_key(OsPid, WindowsPids) end, + OsPids). + +%% Return the PIDs of alive Windows process. +-spec get_win32_pids() -> #{OsPid :: string() => 1}. +get_win32_pids() -> + lists:foldl( + fun(Line, Acc) -> + case string:split(Line, "\",\"", all) of + [_, OsPid | _] -> Acc#{OsPid => 1}; + _ -> Acc + end + end, + #{}, + string:lexemes(os:cmd("tasklist /FO CSV /NH"), ["\r\n", $\r, $\n])). diff --git a/apps/erlangbridge/src/lsp_handlers.erl b/apps/erlangbridge/src/lsp_handlers.erl index d2f103f..bc3325e 100644 --- a/apps/erlangbridge/src/lsp_handlers.erl +++ b/apps/erlangbridge/src/lsp_handlers.erl @@ -87,6 +87,14 @@ configuration(Socket, [ErlangSection, FilesSection, ComputedSection, HttpSection [ErlangSection, FilesSection, ComputedSection, HttpSection, SearchSection]), + %% Delete old caches left there by brutally killed extension instances + case ComputedSection of + #{tmpdir := TmpDir, username := UserName} -> + gen_lsp_doc_server:delete_unused_caches(TmpDir, UserName); + _ -> + ok + end, + %% Scan workspace for source files gen_lsp_doc_server:config_change(), diff --git a/apps/erlangbridge/src/vscode_lsp_app.erl b/apps/erlangbridge/src/vscode_lsp_app.erl index 5422369..626e7b5 100644 --- a/apps/erlangbridge/src/vscode_lsp_app.erl +++ b/apps/erlangbridge/src/vscode_lsp_app.erl @@ -52,6 +52,7 @@ start(_Type, _Args) -> application:start(inets), %uncomment to monitor erlang processes %spawn(fun() -> observer:start() end), + gen_lsp_doc_server:persist_cache_mgmt_opts(), Port = get_port(), case vscode_lsp_app_sup:start_link(Port) of {ok, Pid} -> {ok, Pid}; diff --git a/lib/ErlangConfigurationProvider.ts b/lib/ErlangConfigurationProvider.ts index 6792ecd..b4e6b74 100644 --- a/lib/ErlangConfigurationProvider.ts +++ b/lib/ErlangConfigurationProvider.ts @@ -31,6 +31,7 @@ export function configurationChanged(): void { erlangDistributedNode: erlangConf.get("erlangDistributedNode", false), rebarPath: resolveVariables(erlangConf.get("rebarPath", null)), codeLensEnabled: erlangConf.get('codeLensEnabled', false), + cacheManagement: erlangConf.get("cacheManagement", "memory"), inlayHintsEnabled: erlangConf.get('inlayHintsEnabled', false), debuggerRunMode: erlangConf.get("debuggerRunMode", "Server"), includePaths: erlangConf.get("includePaths", []), diff --git a/lib/GenericShell.ts b/lib/GenericShell.ts index 628bd0a..35f0474 100644 --- a/lib/GenericShell.ts +++ b/lib/GenericShell.ts @@ -39,6 +39,7 @@ export class GenericShell extends EventEmitter { public erlangPath: string = null; public erlangArgs : string[] = []; public erlangDistributedNode: boolean = false; + public cacheManagement: string = "memory"; //provide IGenericShellConfiguration, in order to avoid dependencies on vscode module (it doesn't works with debugger-adpater) constructor(logOutput?: ILogOutput, shellOutput?: IShellOutput, erlangConfiguration?: ErlangSettings) { @@ -67,6 +68,7 @@ export class GenericShell extends EventEmitter { } this.erlangArgs = erlangConfiguration.erlangArgs; this.erlangDistributedNode = erlangConfiguration.erlangDistributedNode; + this.cacheManagement = erlangConfiguration.cacheManagement; } } diff --git a/lib/erlangSettings.ts b/lib/erlangSettings.ts index bd029c3..04c3a3b 100644 --- a/lib/erlangSettings.ts +++ b/lib/erlangSettings.ts @@ -7,6 +7,7 @@ export interface ErlangSettings { includePaths : string[]; linting: boolean; codeLensEnabled : boolean; + cacheManagement: string; inlayHintsEnabled: boolean; verbose: boolean; debuggerRunMode : string; diff --git a/lib/lsp/ErlangShellLSP.ts b/lib/lsp/ErlangShellLSP.ts index 4428f8c..f706f3e 100644 --- a/lib/lsp/ErlangShellLSP.ts +++ b/lib/lsp/ErlangShellLSP.ts @@ -1,3 +1,4 @@ +import * as os from 'os'; import { GenericShell, ILogOutput } from '../GenericShell'; import { getElangConfigConfiguration } from '../ErlangConfigurationProvider'; @@ -13,6 +14,21 @@ export class ErlangShellLSP extends GenericShell { "-sname", "vscode_" + listen_port.toString(), "-setcookie", "vscode_" + listen_port.toString()); } + // Set management mode for large caches + switch (this.cacheManagement) { + case 'file': + debugStartArgs.push("-vscode_cache_mgmt", "file", os.userInfo().username, os.tmpdir()); + break; + + case 'compressed memory': + debugStartArgs.push("-vscode_cache_mgmt", "memory", "compressed"); + break; + + case 'memory': + default: + debugStartArgs.push("-vscode_cache_mgmt", "memory"); + break; + } // Use special command line arguments if (this.erlangArgs) { debugStartArgs = debugStartArgs.concat(this.erlangArgs) diff --git a/lib/lsp/lspclientextension.ts b/lib/lsp/lspclientextension.ts index 5cb9c5a..22e5642 100644 --- a/lib/lsp/lspclientextension.ts +++ b/lib/lsp/lspclientextension.ts @@ -75,7 +75,8 @@ namespace Configuration { if (item.section === "") { result.push({ autosave: Workspace.getConfiguration("files").get("autoSave", "afterDelay") === "afterDelay", - tmpdir: os.tmpdir() + tmpdir: os.tmpdir(), + username: os.userInfo().username }); } else if (item.section === "erlang") { result.push(resolveErlangSettings(Workspace.getConfiguration(item.section))) diff --git a/package.json b/package.json index 0553b43..85f0064 100644 --- a/package.json +++ b/package.json @@ -181,7 +181,7 @@ }, "erlang.erlangDistributedNode": { "type": "boolean", - "description": "Start the Erlang backend in a distributed Erlang node. Could be usefull for extension development. Note, it starts EPMD if not running yet.", + "description": "Start the Erlang backend in a distributed Erlang node. Could be useful for extension development. Note, it starts EPMD if not running yet.", "default": false }, "erlang.rebarPath": { @@ -216,6 +216,21 @@ "default": true, "description": "Enable/disable dynamic validation of opened Erlang source files." }, + "erlang.cacheManagement": { + "type": "string", + "default": "memory", + "description": "Specify where and how to store large cache tables.", + "enum": [ + "memory", + "compressed memory", + "file" + ], + "enumDescriptions": [ + "Store in memory", + "Store in memory and apply lightweight compression to consume less memory (approx. 50%)", + "Store in temporary files" + ] + }, "erlang.codeLensEnabled": { "type": "boolean", "default": false,