Skip to content

Commit

Permalink
Add option for cache management (#319)
Browse files Browse the repository at this point in the history
* Fix syntax highlight

Extract an explicit function expression to a variable to fix broken
syntax highlight. For example add an `-spec` line or `-define` after
function `index_of` and those will not be syntax highlighted.

* Fix a log message

* Use default shutdown time for gen_lsp_server

Erlang/OTP `supervisor` documentation says: [1]

> It is also allowed to set it to `infinity`, if the child process is a
> worker.
>
> Warning
>
> Be careful when setting the shutdown time to `infinity` when the child
> process is a worker. Because, in this situation, the termination of
> the supervision tree depends on the child process, it must be
> implemented in a safe way and its cleanup procedure must always
> return.

Therefore, the shutdown time of `gen_lsp_server` is changed from
`infinity` to default value, that is `5000` milliseconds for worker
processes.

Additionally, a sketch of the application supervision tree is added to
module header of `vscode_lsp_app` for easier understanding.

The `-behaviour` attributes are added to the corresponding modules, also
for easier understanding.

[1] https://www.erlang.org/doc/man/supervisor.html

* Add option for cache management

Add a configuration setting to change the storage mode for large cache
tables (mainly used for code navigation). Available modes:

- `memory` (default, original): use ETS tables
- `compressed memory`: use ETS tables with `compressed` flag turned on.
- `file`: use DETS tables saved into temporary files.

Why is this configuration option needed?

In shared environment, where many instances of `vscode_erlang` run in
the same time on large code bases, noticeable amount of memory can be
consumed by this extension that eventually can exhaust all the memory.

Notes for `compressed memory`:

Some simple tests, done in the above explained scenario, showed memory
consumption dropped by 50% when `compressed memory` is configured.

Notes for `file`:

As multiple extension instances can run in the same time on the same
host, even in the same workspace, we cannot use static filenames, nor
workspace specific filenames to create DETS tables. The only option is
to use unique filenames, like temporary files. Of course, it has a
drawback, temporary files must be deleted on exit otherwise those will
consume a lot of disk space after a while. If the extension is shut down
normally, it's not a problem, the extension deletes it's own cache.

However, if some extension is terminated brutally, the cache files left
on the disk. Hence, an automatic mechanism is hooked to the points when
configuration is received from Visual Studio Code, and to normal
shutdown, to look for cache directories created by yet not running
extension, and delete them.

Reusing old cache files is theoretically possible if only one extension
instance run in a workspace, VSCode is closed and reopened, then the
new extension instance could continue where the previous one stopped.
But, as multiple extension instances can run in the same workspace, it
can easily go wrong. Therefore it is simpler to use instance specific
DETS files, just like instance specific ETS tables.

---------

Co-authored-by: Kornel Horvath <[email protected]>
  • Loading branch information
KornelH and Kornel Horvath authored Dec 8, 2024
1 parent 3dd9bf1 commit b2d4e83
Show file tree
Hide file tree
Showing 19 changed files with 357 additions and 69 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ Support for Erlang tools, including rebar3, EUnit and Dialyzer
- `erlang.includePaths` - Include paths are read from rebar.config, and also standard set of paths is used. This setting is for special cases when the default behaviour is not enough
- `erlang.linting` - Enable/disable dynamic validation of opened Erlang source files
- `erlang.codeLensEnabled` - Enable/Disable CodeLens
- `erlang.cacheManagement` - Specify where and how to store large cache tables
- `erlang.inlayHintsEnabled` - Enable/Disable InlayHints
- `erlang.verbose` - Activate technical traces for use in the extension development

Expand Down
20 changes: 15 additions & 5 deletions apps/erlangbridge/src/gen_lsp_config_server.erl
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
-module(gen_lsp_config_server).

-behavior(gen_server).

%% API
-export([start_link/0]).
-export([standard_modules/0, bifs/0]).
-export([update_config/2, root/0, tmpdir/0, username/0, codeLensEnabled/0, includePaths/0, linting/0,
verbose/0, autosave/0, proxy/0, search_files_exclude/0, search_exclude/0,
formatting_line_length/0, inlayHintsEnabled/0, verbose_is_include/1]).

%% gen_server callbacks
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).
-export([standard_modules/0, bifs/0]).
-export([update_config/2, root/0, tmpdir/0, codeLensEnabled/0, includePaths/0, linting/0,
verbose/0, autosave/0, proxy/0, search_files_exclude/0, search_exclude/0,
formatting_line_length/0, inlayHintsEnabled/0, verbose_is_include/1]).

-include("lsp_log.hrl").
-define(SERVER, ?MODULE).
Expand Down Expand Up @@ -76,6 +78,9 @@ proxy() ->
tmpdir() ->
get_config_entry(computed, tmpdir, "").

username() ->
get_config_entry(computed, username, "").

%%--------------------------------------------------------------------
%% @doc Exclude filters for search in workspace.
%%
Expand Down Expand Up @@ -123,6 +128,7 @@ init(_Args) ->
BIFs = sets:to_list(lists:foldl(fun ({Name, _Arity}, Acc) ->
sets:add_element(atom_to_list(Name), Acc)
end, sets:new(), erlang:module_info(exports))),
process_flag(trap_exit, true), % to terminate/2 be called at exit
{ok, #state{config = #{}, standard_modules = StandardModules, bifs = BIFs}}.

handle_call({standard_modules}, _From, State) ->
Expand All @@ -142,6 +148,10 @@ handle_cast(stop, State) ->
handle_info(_Info, State) ->
{noreply, State}.

terminate(_Reason, #state{config = #{computed := #{tmpdir := TmpDir, username := UserName}}}) ->
%% Delete old caches left there by brutally killed extension instances
gen_lsp_doc_server:delete_unused_caches(TmpDir, UserName),
ok;
terminate(_Reason, _State) ->
ok.

Expand Down
7 changes: 6 additions & 1 deletion apps/erlangbridge/src/gen_lsp_config_sup.erl
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
-module(gen_lsp_config_sup).
-behaviour(supervisor).

-export([init/1, start_link/0]).
%% API
-export([start_link/0]).

%% Supervisor callbacks
-export([init/1]).

start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
Expand Down
223 changes: 197 additions & 26 deletions apps/erlangbridge/src/gen_lsp_doc_server.erl
Original file line number Diff line number Diff line change
@@ -1,16 +1,25 @@
-module(gen_lsp_doc_server).

-behavior(gen_server).

%% API
-export([start_link/0]).

-export([document_opened/2, document_changed/2, document_closed/1, opened_documents/0, get_document_contents/1, parse_document/1]).
-export([project_file_added/1, project_file_changed/1, project_file_deleted/1]).
-export([get_syntax_tree/1, get_dodged_syntax_tree/1, get_references/1, get_inlayhints/1]).
-export([root_available/0, config_change/0, project_modules/0, get_module_file/1, get_module_files/1, get_build_dir/0, find_source_file/1]).

%% Cache management
-export([delete_unused_caches/2,
persist_cache_mgmt_opts/0]).

%% gen_server callbacks
-export([init/1,handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]).

-include("./lsp_log.hrl").

-define(SERVER, ?MODULE).
-define(XETS, (persistent_term:get(large_cache_module, ets))).
-define(IIF(Cond, Then, Else), if Cond -> Then; true -> Else end).

-record(state,
Expand All @@ -20,19 +29,27 @@
}).

document_opened(File, Contents) ->
ets:insert(document_contents, {File, Contents}).
?XETS:insert(document_contents, {File, Contents}).

document_changed(File, Contents) ->
ets:insert(document_contents, {File, Contents}).
?XETS:insert(document_contents, {File, Contents}).

document_closed(File) ->
ets:delete(document_contents, File).
?XETS:delete(document_contents, File).

opened_documents() ->
[File || {File, _Contents} <- ets:tab2list(document_contents)].
do_opened_documents(?XETS).

do_opened_documents(ets) ->
[File || {File, _Contents} <- ets:tab2list(document_contents)];
do_opened_documents(dets) ->
dets:traverse(document_contents,
fun({File, _Contents}) -> {continue, File};
(_) -> continue
end).

get_document_contents(File) ->
case ets:lookup(document_contents, File) of
case ?XETS:lookup(document_contents, File) of
[{File, Contents}] -> Contents;
_ -> undefined
end.
Expand Down Expand Up @@ -83,10 +100,10 @@ get_references(Reference) ->
ets:match(references, {'$1', Reference, '$2', '$3', '$4'}).

get_inlayhints(File) ->
case ets:lookup(document_inlayhints, File) of
case ?XETS:lookup(document_inlayhints, File) of
[{File, Inlays}] -> Inlays;
_ -> []
end.
end.

root_available() ->
gen_server:cast(?SERVER, root_available).
Expand Down Expand Up @@ -134,14 +151,16 @@ as_string(Text) ->
Text.

start_link() ->
safe_new_table(document_contents, set),
safe_new_table(syntax_tree, set),
safe_new_table(dodged_syntax_tree, set),
safe_new_table(references, bag),
safe_new_table(document_inlayhints, set),
ExtraCreateOpts = persistent_term:get(large_cache_create_opts, []),
safe_new_table(document_contents, ?XETS, set, ExtraCreateOpts),
safe_new_table(syntax_tree, ?XETS, set, ExtraCreateOpts),
safe_new_table(dodged_syntax_tree, ?XETS, set, ExtraCreateOpts),
safe_new_table(references, ets, bag, []),
safe_new_table(document_inlayhints, ?XETS, set, ExtraCreateOpts),
gen_server:start_link({local, ?SERVER}, ?MODULE, [],[]).

init(_Args) ->
process_flag(trap_exit, true), % to terminate/2 be called at exit
{ok, #state{root_available = false, project_modules = #{}, files_to_parse = []}}.

handle_call(project_modules, _From, State) ->
Expand Down Expand Up @@ -177,7 +196,7 @@ handle_cast({project_file_added, File}, State) ->
{noreply, parse_next_file_in_background(UpdatedState)};

handle_cast({project_file_changed, File}, State) ->
case ets:lookup(document_contents, File) of
case ?XETS:lookup(document_contents, File) of
[_FileContents] ->
{noreply, State};
_ ->
Expand All @@ -198,6 +217,10 @@ handle_info(_Info, State) ->
{noreply, State}.

terminate(_Reason, _State) ->
delete_cache_file(document_contents),
delete_cache_file(syntax_tree),
delete_cache_file(dodged_syntax_tree),
delete_cache_file(document_inlayhints),
ok.

code_change(_OldVersion, State, _Extra) ->
Expand Down Expand Up @@ -395,11 +418,11 @@ scan_project_files(State = #state{project_modules = OldProjectModules}) ->
delete_project_files([], State) ->
State;
delete_project_files([File | Files], State) ->
ets:delete(document_contents, File),
ets:delete(syntax_tree, File),
ets:delete(dodged_syntax_tree, File),
?XETS:delete(document_contents, File),
?XETS:delete(syntax_tree, File),
?XETS:delete(dodged_syntax_tree, File),
ets:delete(references, File),
ets:delete(document_inlayhints, File),
?XETS:delete(document_inlayhints, File),
Module = filename:rootname(filename:basename(File)),
UpdatedFiles = lists:delete(File, maps:get(Module, State#state.project_modules, [])),
UpdatedProjectModules = case UpdatedFiles of
Expand All @@ -409,10 +432,48 @@ delete_project_files([File | Files], State) ->
NewState = State#state{project_modules = UpdatedProjectModules},
delete_project_files(Files, NewState).

safe_new_table(Name, Type) ->
%%--------------------------------------------------------------------
%% @private
%% @doc Create a new ETS or DETS table owned by the supervisor.
%%
%% This function is called by {@link start_link/0} that is called from the
%% supervisor process, therefore created ETS and DETS tables are owned by the
%% supervisor instead of the worker `gen_server' process. And so, if the worker
%% process is crashed and restarted then data is still available in the original
%% table.
%% @end
%%--------------------------------------------------------------------
safe_new_table(Name, ets, Type, ExtraCreateOpts) ->
case ets:whereis(Name) of
undefined -> ets:new(Name, [Type, named_table, public]);
_ -> Name
undefined ->
ets:new(Name, [Type, named_table, public | ExtraCreateOpts]),
Name;
_ ->
%% Supervisor still holds the ETS table
Name
end;
safe_new_table(Name, dets, Type, ExtraCreateOpts) ->
case dets:info(Name, filename) of
undefined ->
CacheDir = persistent_term:get(large_cache_dets_dir),
FileName = filename:join(CacheDir, atom_to_list(Name)++".dets"),
OpenOpts = [{type, Type}, {file, FileName} | ExtraCreateOpts],
filelib:ensure_dir(FileName),
dets:open_file(Name, OpenOpts),
% dets:delete_all_objects(Name),
Name;
_ ->
%% Supervisor still holds the DETS table open
Name
end.

delete_cache_file(Name) ->
case dets:info(Name, filename) of
FileName ->
dets:close(Name),
file:delete(FileName);
_ ->
ok
end.

parse_and_store(File, ContentsFile) ->
Expand All @@ -421,21 +482,21 @@ parse_and_store(File, ContentsFile) ->
undefined ->
ok;
_ ->
ets:insert(syntax_tree, {File, SyntaxTree}),
?XETS:insert(syntax_tree, {File, SyntaxTree}),
ets:delete(references, File),
ets:delete(document_inlayhints, File),
?XETS:delete(document_inlayhints, File),
lsp_navigation:fold_references(fun (Reference, Line, Column, End, _) ->
ets:insert(references, {File, Reference, Line, Column, End})
end, undefined, File, SyntaxTree),
ets:insert(document_inlayhints, {File, lsp_navigation:full_inlayhints_info(File,SyntaxTree, DodgedSyntaxTree)})
?XETS:insert(document_inlayhints, {File, lsp_navigation:full_inlayhints_info(File,SyntaxTree, DodgedSyntaxTree)})
end,
case DodgedSyntaxTree of
undefined -> ok;
_ -> ets:insert(dodged_syntax_tree, {File, DodgedSyntaxTree})
_ -> ?XETS:insert(dodged_syntax_tree, {File, DodgedSyntaxTree})
end.

get_tree(TreeType, File) ->
case ets:lookup(TreeType, File) of
case ?XETS:lookup(TreeType, File) of
[{File, SyntaxTree}] ->
SyntaxTree;
_ ->
Expand Down Expand Up @@ -469,3 +530,113 @@ find_module_files(Module, State) ->
-spec find_module_files_under_dir(module(), file:filename()) -> [file:filename()].
find_module_files_under_dir(Module, Dir) ->
filelib:wildcard(Dir ++ "/**/" ++ atom_to_list(Module) ++ ".erl").

%%%-------------------------------------------------------------------
%%% Cache management
%%%-------------------------------------------------------------------

persist_cache_mgmt_opts() ->
do_persist_cache_mgmt_opts(init:get_argument(vscode_cache_mgmt)).

do_persist_cache_mgmt_opts({ok, [["memory"]]}) ->
persistent_term:put(large_cache_module, ets),
persistent_term:put(large_cache_create_opts, []);
do_persist_cache_mgmt_opts({ok, [["memory", "compressed"]]}) ->
persistent_term:put(large_cache_module, ets),
persistent_term:put(large_cache_create_opts, [compressed]);
do_persist_cache_mgmt_opts({ok, [["file", UserName, TmpDir]]}) ->
persistent_term:put(large_cache_module, dets),
persistent_term:put(large_cache_create_opts, []),
persistent_term:put(large_cache_dets_dir, cache_dir(TmpDir, UserName, os:getpid()));
do_persist_cache_mgmt_opts(_) ->
do_persist_cache_mgmt_opts({ok, [["memory"]]}).

cache_dir(TmpDir, UserName, OsPid) ->
filename:join(cache_basedir(TmpDir, UserName), OsPid).

cache_basedir(TmpDir, UserName) ->
filename:join([TmpDir, "vscode_erlang_"++UserName, "cache"]).

%%--------------------------------------------------------------------
%% @doc Delete all cache directories that are not in use any more.
%% It practically means, delete caches of those extension instances that were
%% terminated without executing proper cleanup (e.g. killed by OS).
%% @end
%%--------------------------------------------------------------------
-spec delete_unused_caches(TmpDir :: string(), UserName :: string()) -> ok.
delete_unused_caches(TmpDir, UserName) when is_binary(TmpDir) ->
delete_unused_caches(binary_to_list(TmpDir), UserName);
delete_unused_caches(TmpDir, UserName) when is_binary(UserName) ->
delete_unused_caches(TmpDir, binary_to_list(UserName));
delete_unused_caches(TmpDir = [_|_], UserName = [_|_]) ->
try
CacheOsPids = get_cache_os_pids(TmpDir, UserName),
%% NOTE: a non-Erlang process may exists with the same PID as an old
%% extension instance.
DeadCacheOsPids = filter_non_existent_os_pids(CacheOsPids),
do_delete_unused_caches(TmpDir, UserName, DeadCacheOsPids)
catch Class:Reason:StackTrace ->
error_logger:error_report([{Class, Reason}, {stacktrace, StackTrace}])
end;
delete_unused_caches(_TmpDir, _UserName) ->
ok.

do_delete_unused_caches(TmpDir, UserName, OsPidsToRemove) ->
lists:foreach(
fun(OsPid) ->
file:del_dir_r(cache_dir(TmpDir, UserName, OsPid))
end,
OsPidsToRemove).

%% Return OS PIDs of Erlang VMs, executing extension instances by the current
%% user, that did create cache directories, regardless if the processes are
%% alive or not.
-spec get_cache_os_pids(TmpDir :: string(), UserName :: string())
-> OsPids :: [string()].
get_cache_os_pids(TmpDir, UserName) ->
CacheBaseDir = cache_basedir(TmpDir, UserName),
case file:list_dir(CacheBaseDir) of
{ok, Filenames} ->
lists:filter(
fun(FN) -> filelib:is_dir(filename:join(CacheBaseDir, FN)) end,
Filenames);
_ ->
[]
end.

%% Return OS PIDs that do not belong to any live OS process.
-spec filter_non_existent_os_pids(OsPids) -> OsPids
when OsPids :: [string()].
filter_non_existent_os_pids([]) ->
[];
filter_non_existent_os_pids(OsPids) ->
case os:type() of
{win32,_} -> filter_non_existent_win32_pids(OsPids);
{unix, _} -> filter_non_existent_unix_pids(OsPids)
end.

filter_non_existent_unix_pids(OsPids) ->
lists:filter(
fun(OsPid) -> not filelib:is_dir("/proc/" ++ OsPid) end,
OsPids).

filter_non_existent_win32_pids(OsPids) ->
%% In Windows there is no similar thing like /proc/<PID> filesystem entries
%% in Unix systems, but it's still possible to list all running processes.
WindowsPids = get_win32_pids(),
lists:filter(
fun(OsPid) -> not maps:is_key(OsPid, WindowsPids) end,
OsPids).

%% Return the PIDs of alive Windows process.
-spec get_win32_pids() -> #{OsPid :: string() => 1}.
get_win32_pids() ->
lists:foldl(
fun(Line, Acc) ->
case string:split(Line, "\",\"", all) of
[_, OsPid | _] -> Acc#{OsPid => 1};
_ -> Acc
end
end,
#{},
string:lexemes(os:cmd("tasklist /FO CSV /NH"), ["\r\n", $\r, $\n])).
Loading

0 comments on commit b2d4e83

Please sign in to comment.