From 09cf8f58ccbd3202811175720f289b410ea4b348 Mon Sep 17 00:00:00 2001
From: Romain Beguet <beguet@adacore.com>
Date: Mon, 12 Feb 2024 11:57:54 +0100
Subject: [PATCH] Implement a custom heuristic for collecting lookup caches.

---
 .../libadalang-implementation-extensions.adb  | 111 ++++++++++++++++++
 .../libadalang-implementation-extensions.ads  |  13 ++
 manage.py                                     |  12 +-
 3 files changed, 135 insertions(+), 1 deletion(-)

diff --git a/extensions/src/libadalang-implementation-extensions.adb b/extensions/src/libadalang-implementation-extensions.adb
index fa486874f..d8b238590 100644
--- a/extensions/src/libadalang-implementation-extensions.adb
+++ b/extensions/src/libadalang-implementation-extensions.adb
@@ -1054,4 +1054,115 @@ package body Libadalang.Implementation.Extensions is
       return Create_Internal_Solver_Diagnostic_Array (0);
    end Ada_Node_P_Own_Nameres_Diagnostics;
 
+   -------------------------------
+   -- Should_Collect_Env_Caches --
+   -------------------------------
+
+   function Should_Collect_Env_Caches
+     (Ctx                        : Internal_Context;
+      Unit                       : Internal_Unit;
+      All_Env_Caches_Entry_Count : Long_Long_Natural) return Boolean
+   is
+      Ctx_Stats  : Context_Env_Caches_Stats renames Ctx.Env_Caches_Stats;
+      Unit_Stats : Unit_Env_Caches_Stats renames Unit.Env_Caches_Stats;
+   begin
+      --  We only consider units which hold a minimal amount of cache
+      --  entries, to avoid wasting cycles collecting the same seldom-
+      --  used units which don't take much memory.
+      if Unit_Stats.Entry_Count < 100 then
+         return False;
+      end if;
+
+      declare
+         Hit_Ratio : constant Float :=
+           (if Unit_Stats.Lookup_Count = 0 then 1.0
+            else Float (Unit_Stats.Hit_Count)
+                 / Float (Unit_Stats.Lookup_Count));
+         --  Ratio of cache hits over total cache lookups since this unit was
+         --  last collected.
+
+         Lookup_Ratio : constant Float :=
+           Float (Unit_Stats.Lookup_Count)
+           / Float (Ctx_Stats.Lookup_Count
+                    - Unit_Stats.Last_Overall_Lookup_Count);
+         --  Ratio of lookups done on this unit over total lookups done on any
+         --  unit since this unit was last collected.
+
+         Recent_Lookup_Ratio : constant Float :=
+           Float (Unit_Stats.Lookup_Count
+                  - Unit_Stats.Previous_Lookup_Count)
+           / Float (Ctx_Stats.Lookup_Count
+                    - Ctx_Stats.Previous_Lookup_Count);
+         --  Ratio of lookups done on this unit over total lookups done on any
+         --  unit since last time a collection was *attempted*.
+
+         Entry_Ratio : constant Float :=
+           Float (Unit_Stats.Entry_Count)
+           / Float (All_Env_Caches_Entry_Count);
+         --  Ratio of cache entries stored in this unit over total number of
+         --  cache entries spread across all units.
+
+         Usefulness_Score : constant Float :=
+           Lookup_Ratio * Hit_Ratio * (2.0 + 5.0 * Recent_Lookup_Ratio);
+         --  Score to estimate how useful the cache entries in this unit are:
+         --  0 means that the cache entries are useless (we want to get rid of
+         --  them), and a score greater than Entry_Ratio implies that we want
+         --  to keep caches entries.
+         --
+         --  How to compute this score was deduced from trial and error, here
+         --  is how it is supposed to work:
+         --
+         --  * 0 means that this cache is useless because we haven't looked up
+         --    any of its entries. This is why Lookup_Ratio should be a
+         --    multiplicative factor for the overall expression.
+         --
+         --  * If Hit_Ratio is 0, this cache is useless because even if we
+         --    looked up its entries, we never found a relevant one. So
+         --    Hit_Ratio must be a multiplicative factor as well.
+         --
+         --  * We should favor units whose caches were used recently, even if
+         --    their lookup ratios are lower than that of another unit.
+
+         Result : constant Boolean := Usefulness_Score < Entry_Ratio;
+         --  We want to collect this unit if the usefulness of its cache
+         --  entries is lower than the proportion of total memory space needed
+         --  to store them.
+      begin
+         if Cache_Invalidation_Trace.Is_Active then
+            if Result then
+               Cache_Invalidation_Trace.Trace
+                 ("Collecting " & Trace_Image (Unit));
+            else
+               Cache_Invalidation_Trace.Trace
+                 ("Leaving alone " & Trace_Image (Unit));
+            end if;
+
+            Cache_Invalidation_Trace.Increase_Indent;
+            Cache_Invalidation_Trace.Trace
+              ("Cache entries:" & Unit_Stats.Entry_Count'Image);
+            Cache_Invalidation_Trace.Trace
+              ("Cache lookup count:" & Unit_Stats.Lookup_Count'Image);
+            Cache_Invalidation_Trace.Trace
+              ("Cache hit count:" & Unit_Stats.Hit_Count'Image);
+            Cache_Invalidation_Trace.Trace
+              ("Ratio of cache hits:"
+               & Float'Image (100.0 * Hit_Ratio));
+            Cache_Invalidation_Trace.Trace
+              ("Ratio of total cache lookups:"
+               & Float'Image (100.0 * Lookup_Ratio));
+            Cache_Invalidation_Trace.Trace
+              ("Ratio of recent cache lookups:"
+               & Float'Image (100.0 * Recent_Lookup_Ratio));
+            Cache_Invalidation_Trace.Trace
+              ("Cache usefulness:"
+               & Float'Image (100.0 * Usefulness_Score));
+            Cache_Invalidation_Trace.Trace
+              ("Ratio of entries:"
+               & Float'Image (100.0 * Entry_Ratio));
+            Cache_Invalidation_Trace.Decrease_Indent;
+         end if;
+         return Result;
+      end;
+   end Should_Collect_Env_Caches;
+
 end Libadalang.Implementation.Extensions;
diff --git a/extensions/src/libadalang-implementation-extensions.ads b/extensions/src/libadalang-implementation-extensions.ads
index ecb8c902d..1726df4a7 100644
--- a/extensions/src/libadalang-implementation-extensions.ads
+++ b/extensions/src/libadalang-implementation-extensions.ads
@@ -142,4 +142,17 @@ package Libadalang.Implementation.Extensions is
    function Single_Tok_Node_P_Subp_Spec_Var
      (Node : Bare_Single_Tok_Node) return Logic_Var;
 
+   ------------------------
+   -- Cache Invalidation --
+   ------------------------
+
+   function Should_Collect_Env_Caches
+     (Ctx                        : Internal_Context;
+      Unit                       : Internal_Unit;
+      All_Env_Caches_Entry_Count : Long_Long_Natural) return Boolean;
+   --  Decide whether the lexical envs in the given unit should be collected
+   --  or not by combining various data we have been keeping track of about
+   --  their usage. See comments in the implementation for more details about
+   --  what is considered in the heuristics.
+
 end Libadalang.Implementation.Extensions;
diff --git a/manage.py b/manage.py
index a213f0304..225887f15 100755
--- a/manage.py
+++ b/manage.py
@@ -70,7 +70,7 @@ def create_context(self, args):
         # Keep these import statements here so that they are executed only
         # after the coverage computation actually started.
         from langkit.compile_context import (
-            AdaSourceKind, CompileCtx, LibraryEntity
+            AdaSourceKind, CacheCollectionConf, CompileCtx, LibraryEntity
         )
         from ada.lexer import ada_lexer
         from ada.grammar import ada_grammar
@@ -91,6 +91,16 @@ def create_context(self, args):
             documentations=libadalang_docs,
             property_exceptions={"Precondition_Failure"},
             generate_unparser=True,
+
+            # Setup a configuration of the cache collection mechanism that
+            # works well for Ada.
+            cache_collection_conf=CacheCollectionConf(
+                threshold_increment=100000,
+                decision_heuristic=LibraryEntity(
+                    "Libadalang.Implementation.Extensions",
+                    "Should_Collect_Env_Caches"
+                )
+            )
         )
 
         # Internals need to access environment hooks, the symbolizer and