use globals instead of const

svilupp · Aug 18, 2024 · 09d071a · 09d071a
1 parent 45a424c
commit 09d071a
Show file tree

Hide file tree

Showing 11 changed files with 61 additions and 59 deletions.
diff --git a/src/AIHelpMe.jl b/src/AIHelpMe.jl
@@ -41,7 +41,7 @@ function __init__()
     ## Set the active configuration
     update_pipeline!(:bronze)
     ## Load index - auto-loads into MAIN_INDEX
-    load_index!(LOADED_PACKS[])
+    load_index!(LOADED_PACKS)
 end
 
 # Enable precompilation to reduce start time, disabled logging

diff --git a/src/generation.jl b/src/generation.jl
@@ -77,7 +77,7 @@ function aihelp(cfg_orig::RT.AbstractRAGConfig, index::RT.AbstractChunkIndex,
     global LAST_RESULT, CONV_HISTORY_LOCK, RAG_KWARGS
 
     ## Grab the active kwargs
-    kwargs = RAG_KWARGS[]
+    kwargs = RAG_KWARGS
     # Update chat model
     setpropertynested(kwargs,
         [:rephraser_kwargs, :tagger_kwargs, :answerer_kwargs, :refiner_kwargs],
@@ -99,7 +99,7 @@ function aihelp(cfg_orig::RT.AbstractRAGConfig, index::RT.AbstractChunkIndex,
     ## Run the RAG pipeline
     result = airag(cfg, index; question, verbose, return_all = true, kwargs...)
     lock(CONV_HISTORY_LOCK) do
-        LAST_RESULT[] = result
+        LAST_RESULT = result
     end
     return return_all ? result : PT.last_message(result)
 end
@@ -108,13 +108,13 @@ function aihelp(index::RT.AbstractChunkIndex, question::AbstractString;
         kwargs...)
     global RAG_CONFIG
     ## default kwargs and models are injected inside of main aihelp function
-    aihelp(RAG_CONFIG[], index, question; kwargs...)
+    aihelp(RAG_CONFIG, index, question; kwargs...)
 end
 
 function aihelp(question::AbstractString;
         kwargs...)
     global MAIN_INDEX, RAG_CONFIG
-    @assert !isnothing(MAIN_INDEX[]) "MAIN_INDEX is not loaded. Use `AIHelpMe.load_index!()` to load an index."
+    @assert !isnothing(MAIN_INDEX) "MAIN_INDEX is not loaded. Use `AIHelpMe.load_index!()` to load an index."
     ## default kwargs and models are injected inside of main aihelp function
-    aihelp(RAG_CONFIG[], MAIN_INDEX[], question; kwargs...)
-end
+    aihelp(RAG_CONFIG, MAIN_INDEX, question; kwargs...)
+end
diff --git a/src/loading.jl b/src/loading.jl
@@ -17,7 +17,7 @@ AIH.load_index!(index)
 function load_index!(index::RT.AbstractChunkIndex;
         verbose::Bool = true, kwargs...)
     global MAIN_INDEX
-    MAIN_INDEX[] = index
+    MAIN_INDEX = index
     verbose && @info "Loaded index into MAIN_INDEX"
     return index
 end
@@ -40,13 +40,13 @@ function load_index!(file_path::AbstractString;
     end
     @assert index isa RT.AbstractChunkIndex "Provided file path must point to a serialized RAG index (Deserialized type: $(typeof(index)))."
     verbose && @info "Loaded index a file $(file_path) into MAIN_INDEX"
-    MAIN_INDEX[] = index
+    MAIN_INDEX = index
 
     return index
 end
 
 """
-    load_index!(packs::Vector{Symbol}=LOADED_PACKS[]; verbose::Bool = true, kwargs...)
+    load_index!(packs::Vector{Symbol}=LOADED_PACKS; verbose::Bool = true, kwargs...)
     load_index!(pack::Symbol; verbose::Bool = true, kwargs...)
 
 Loads one or more `packs` into the main index from our pre-built artifacts.
@@ -66,11 +66,11 @@ load_index!([:julia, :juliadata, :makie, :tidier, :plots, :sciml])
 But we recommend loading ONLY the packs you expect to need - unnecessary packs introduce noise.
 """
 function load_index!(
-        packs::Vector{Symbol} = LOADED_PACKS[]; verbose::Bool = true, kwargs...)
+        packs::Vector{Symbol} = LOADED_PACKS; verbose::Bool = true, kwargs...)
     global ALLOWED_PACKS, RAG_CONFIG, RAG_CONFIG
     @assert all(x -> x in ALLOWED_PACKS, packs) "Invalid pack(s): $(setdiff(packs, ALLOWED_PACKS)). Allowed packs: $(ALLOWED_PACKS)"
 
-    config_key = get_config_key(RAG_CONFIG[], RAG_KWARGS[])
+    config_key = get_config_key(RAG_CONFIG, RAG_KWARGS)
     indices = RT.ChunkIndex[]
     for pack in packs
         artifact_path = @artifact_str("$(pack)__$(config_key)")
@@ -84,7 +84,7 @@ function load_index!(
     end
     # TODO: dedupe the index
     joined_index = reduce(vcat, indices)
-    MAIN_INDEX[] = joined_index
+    MAIN_INDEX = joined_index
     verbose && @info "Loaded index from packs: $(join(packs,", ")) into MAIN_INDEX"
     return joined_index
 end
@@ -93,7 +93,7 @@ end
 load_index!(pack::Symbol) = load_index!([pack])
 
 """
-    update_index(index::RT.AbstractChunkIndex = MAIN_INDEX[],
+    update_index(index::RT.AbstractChunkIndex = MAIN_INDEX,
         modules::Vector{Module} = Base.Docs.modules;
         verbose::Integer = 1,
         kwargs...)
@@ -116,7 +116,7 @@ AIHelpMe.update_index() |> AHM.load_index!
 index = AIHelpMe.update_index(index)
 ```
 """
-function update_index(index::RT.AbstractChunkIndex = MAIN_INDEX[],
+function update_index(index::RT.AbstractChunkIndex = MAIN_INDEX,
         modules::Vector{Module} = Base.Docs.modules;
         verbose::Integer = 1,
         kwargs...)
@@ -129,7 +129,7 @@ function update_index(index::RT.AbstractChunkIndex = MAIN_INDEX[],
 
     ## Build the new index -- E2E process disabled as it would duplicate a lot the docs we already have
     ##
-    ##     new_index = RT.build_index(RAG_CONFIG[].indexer, all_docs, ;
+    ##     new_index = RT.build_index(RAG_CONFIG.indexer, all_docs, ;
     ##     embedder_kwargs, chunker = TextChunker(), chunker_kwargs,
     ##     verbose, kwargs...
     ## )
@@ -147,9 +147,9 @@ function update_index(index::RT.AbstractChunkIndex = MAIN_INDEX[],
     mask = find_new_chunks(index.chunks, output_chunks)
 
     ## Embed new items
-    embedder = RAG_CONFIG[].retriever.embedder
+    embedder = RAG_CONFIG.retriever.embedder
     embedder_kwargs_ = RT.getpropertynested(
-        RAG_KWARGS[], [:retriever_kwargs], :embedder_kwargs, nothing)
+        RAG_KWARGS, [:retriever_kwargs], :embedder_kwargs, nothing)
     embedder_kwargs = haskey(kwargs, :embedder_kwargs) ?
                       merge(kwargs.embedder_kwargs, embedder_kwargs_) : embedder_kwargs_
     embeddings = RT.get_embeddings(embedder, output_chunks[mask];

diff --git a/src/pipeline_defaults.jl b/src/pipeline_defaults.jl
@@ -2,8 +2,8 @@
 const CONV_HISTORY = Vector{Vector{PT.AbstractMessage}}()
 const CONV_HISTORY_LOCK = ReentrantLock()
 const MAX_HISTORY_LENGTH = 1
-const LAST_RESULT = Ref{Union{Nothing, RT.AbstractRAGResult}}(nothing)
-const MAIN_INDEX = Ref{Union{Nothing, RT.AbstractChunkIndex}}(nothing)
+global LAST_RESULT::Union{Nothing, RT.AbstractRAGResult} = nothing
+global MAIN_INDEX::Union{Nothing, RT.AbstractChunkIndex} = nothing
 """
     ALLOWED PACKS
 
@@ -24,23 +24,23 @@ const ALLOWED_PACKS = [:julia, :juliadata, :tidier, :sciml, :plots, :makie]
 
 The knowledge packs that are currently loaded in the index.
 """
-const LOADED_PACKS = Ref{Vector{Symbol}}(@load_preference("LOADED_PACKS",
-    default=["julia"]) .|> Symbol)
+global LOADED_PACKS::Vector{Symbol} = @load_preference("LOADED_PACKS",
+    default=["julia"]) .|> Symbol
 
 ### Globals for configuration
 # These serve as reference models to be injected in the absence of inputs, 
 # but the actual used for the query is primarily provided aihelpme directly or via the active RAG_KWARGS
-const MODEL_CHAT = @load_preference("MODEL_CHAT",
+global MODEL_CHAT::String = @load_preference("MODEL_CHAT",
     default="gpt4t")
-const MODEL_EMBEDDING = @load_preference("MODEL_EMBEDDING",
+global MODEL_EMBEDDING::String = @load_preference("MODEL_EMBEDDING",
     default="text-embedding-3-large")
-const EMBEDDING_DIMENSION = @load_preference("EMBEDDING_DIMENSION",
+global EMBEDDING_DIMENSION::Int = @load_preference("EMBEDDING_DIMENSION",
     default=1024)
 
 # Loaded up with `update_pipeline!` later once RAG CONFIGURATIONS is populated
-const RAG_KWARGS = Ref{NamedTuple}()
-const RAG_CONFIG = Ref{RT.AbstractRAGConfig}()
-const LOADED_CONFIG_KEY = Ref{String}("")  # get the current config key
+global RAG_KWARGS::NamedTuple = NamedTuple()
+global RAG_CONFIG::RT.AbstractRAGConfig = RAGConfig() # just initialize, it will be changed
+global LOADED_CONFIG_KEY::String = "" # get the current config key
 
 """
     RAG_CONFIGURATIONS
@@ -54,8 +54,12 @@ Available Options:
 - `:silver`: A simple configuration for a bronze pipeline, using truncated binary embeddings (dimensionality: 1024) but also enables re-ranking step.
 - `:gold`: A more complex configuration, similar to `:simpler`, but using a standard embeddings (dimensionality: 3072, type: Float32). It also leverages re-ranking and refinement with a web-search.
 """
-const RAG_CONFIGURATIONS = let MODEL_CHAT = MODEL_CHAT, MODEL_EMBEDDING = MODEL_EMBEDDING
-    RAG_CONFIGURATIONS = Dict{Symbol, Dict{Symbol, Any}}()
+global RAG_CONFIGURATIONS::Dict{Symbol, Dict{Symbol, Any}} = Dict{
+    Symbol, Dict{Symbol, Any}}()
+
+## Load configurations
+let MODEL_CHAT = MODEL_CHAT, MODEL_EMBEDDING = MODEL_EMBEDDING,
+    RAG_CONFIGURATIONS = RAG_CONFIGURATIONS
     ## Bronze
     RAG_CONFIGURATIONS[:bronze] = Dict{Symbol, Any}(
         :config => RT.RAGConfig(;
@@ -129,13 +133,11 @@ const RAG_CONFIGURATIONS = let MODEL_CHAT = MODEL_CHAT, MODEL_EMBEDDING = MODEL_
                     model = MODEL_EMBEDDING),
                 refiner_kwargs = (;
                     model = MODEL_CHAT))))
-
-    RAG_CONFIGURATIONS
 end
 
 "Returns the configuration key for the given `cfg` and `kwargs` to use the relevant artifacts."
 function get_config_key(
-        cfg::AbstractRAGConfig = RAG_CONFIG[], kwargs::NamedTuple = RAG_KWARGS[])
+        cfg::AbstractRAGConfig = RAG_CONFIG, kwargs::NamedTuple = RAG_KWARGS)
     emb_model = getpropertynested(kwargs, [:embedder_kwargs], :model)
     emb_dim = getpropertynested(kwargs, [:embedder_kwargs], :truncate_dimension, 0)
     emb_eltype = RT.EmbedderEltype(cfg.retriever.embedder)
@@ -221,11 +223,11 @@ function update_pipeline!(option::Symbol = :bronze; model_chat = MODEL_CHAT,
     ## Set the options
     config_key = get_config_key(config, kwargs)
     ## detect significant changes
-    !isempty(LOADED_CONFIG_KEY[]) && LOADED_CONFIG_KEY[] != config_key &&
+    !isempty(LOADED_CONFIG_KEY) && LOADED_CONFIG_KEY != config_key &&
         @warn "Core RAG pipeline configuration has changed! You must re-build your index with `AIHelpMe.load_index!()`!"
-    LOADED_CONFIG_KEY[] = config_key
-    RAG_KWARGS[] = kwargs
-    RAG_CONFIG[] = config
+    LOADED_CONFIG_KEY = config_key
+    RAG_KWARGS = kwargs
+    RAG_CONFIG = config
 
     verbose &&
         @info "Updated RAG pipeline to `:$option` (Configuration key: \"$config_key\")."

diff --git a/src/precompilation.jl b/src/precompilation.jl
@@ -1,10 +1,10 @@
 ## Mock run for aihelp
 # remember prior settings
-RAG_CONFIG[] = RT.RAGConfig(;
+RAG_CONFIG = RT.RAGConfig(;
     indexer = RT.SimpleIndexer(; embedder = RT.BinaryBatchEmbedder()),
     retriever = RT.SimpleRetriever(;
         embedder = RT.BinaryBatchEmbedder(), reranker = RT.CohereReranker()))
-RAG_KWARGS[] = (
+RAG_KWARGS = (
     retriever_kwargs = (;
         top_k = 100,
         top_n = 5,
@@ -49,15 +49,15 @@ index = ChunkIndex(chunks = ["chunk1", "chunk2"],
     tags = nothing,
     tags_vocab = nothing,
     sources = ["source1", "source2"])
-MAIN_INDEX[] = index
+MAIN_INDEX = index
 
 ## Change for our test
 update_pipeline!(:bronze; model_chat = "mockgen",
     model_embedding = "mockemb", embedding_dimension = 0)
 
 question = "ABC?"
-cfg = RAG_CONFIG[]
-kwargs = RAG_KWARGS[]
+cfg = RAG_CONFIG
+kwargs = RAG_KWARGS
 ## Simple RAG pre-run
 msg = airag(cfg, index; question, kwargs...)
 

diff --git a/src/preparation.jl b/src/preparation.jl
@@ -101,14 +101,14 @@ function RT.build_index(mod::Module; verbose::Int = 1, kwargs...)
                      merge(kwargs[:chunker_kwargs], chunker_kwargs_) : chunker_kwargs_
 
     embedder_kwargs_ = RT.getpropertynested(
-        RAG_KWARGS[], [:retriever_kwargs], :embedder_kwargs, nothing)
+        RAG_KWARGS, [:retriever_kwargs], :embedder_kwargs, nothing)
     # Note: force Matrix{Bool} structure for now, switch to Int8-based binary embeddings with the latest PT
     embedder_kwargs = haskey(kwargs, :embedder_kwargs) ?
                       merge(
         (; return_type = Matrix{Bool}), embedder_kwargs_, kwargs[:embedder_kwargs]) :
                       merge((; return_type = Matrix{Bool}), embedder_kwargs_)
 
-    new_index = RT.build_index(RAG_CONFIG[].indexer, all_docs;
+    new_index = RT.build_index(RAG_CONFIG.indexer, all_docs;
         kwargs...,
         embedder_kwargs, chunker = RT.TextChunker(), chunker_kwargs,
         verbose, index_id = nameof(mod))
@@ -134,14 +134,14 @@ function RT.build_index(modules::Vector{Module} = Base.Docs.modules; verbose::In
 
     # Note: force Matrix{Bool} structure for now, switch to Int8-based binary embeddings with the latest PT
     embedder_kwargs_ = RT.getpropertynested(
-        RAG_KWARGS[], [:retriever_kwargs], :embedder_kwargs, nothing)
+        RAG_KWARGS, [:retriever_kwargs], :embedder_kwargs, nothing)
     embedder_kwargs = haskey(kwargs, :embedder_kwargs) ?
                       merge(
         (; return_type = Matrix{Bool}), embedder_kwargs_, kwargs[:embedder_kwargs]) :
                       merge((; return_type = Matrix{Bool}), embedder_kwargs_)
 
-    new_index = RT.build_index(RAG_CONFIG[].indexer, all_docs;
+    new_index = RT.build_index(RAG_CONFIG.indexer, all_docs;
         kwargs...,
         embedder_kwargs, chunker = RT.TextChunker(), chunker_kwargs,
         verbose, index_id = :all_modules)
-end
+end
diff --git a/src/user_preferences.jl b/src/user_preferences.jl
@@ -49,7 +49,7 @@ function set_preferences!(pairs::Pair{String, <:Any}...)
             @set_preferences!(key=>value_int)
         elseif key == "LOADED_PACKS"
             value_vecstr = value isa Symbol ? [string(value)] : string.(value)
-            LOADED_PACKS[] = Symbol.(value_vecstr)
+            LOADED_PACKS = Symbol.(value_vecstr)
             @set_preferences!(key=>value_vecstr)
         else
             setproperty!(@__MODULE__, Symbol(key), value)

diff --git a/src/utils.jl b/src/utils.jl
@@ -37,7 +37,7 @@ It can be useful to see the sources/references used by the AI model to generate
 
 If you're using `aihelp()` make sure to set `return_all = true` to return the RAGResult.
 """
-last_result() = LAST_RESULT[]
+last_result() = LAST_RESULT
 
 "Hacky function to load a HDF5 file into a ChunkIndex object. Only bare-bone ChunkIndex is supported right now."
 function load_index_hdf5(path::AbstractString; verbose::Bool = true)

diff --git a/test/generation.jl b/test/generation.jl
@@ -26,34 +26,34 @@ using PromptingTools: TestEchoOpenAISchema
         sources = ["source1", "source2"])
 
     # remember prior settings
-    current_index = MAIN_INDEX[]
+    current_index = MAIN_INDEX
     current_chat_model = MODEL_CHAT
     current_emb_model = MODEL_EMBEDDING
     current_dimensionality = getpropertynested(
-        RAG_KWARGS[], [:embedder_kwargs], :truncate_dimension, nothing)
+        RAG_KWARGS, [:embedder_kwargs], :truncate_dimension, nothing)
 
     ## Change for our test
-    MAIN_INDEX[] = index
+    MAIN_INDEX = index
     update_pipeline!(:bronze; model_chat = "mockgen",
         model_embedding = "mockemb", embedding_dimension = 0)
 
     question = "ABC?"
-    cfg = RAG_CONFIG[]
-    kwargs = RAG_KWARGS[]
+    cfg = RAG_CONFIG
+    kwargs = RAG_KWARGS
     ## Simple RAG pre-run
     msg = airag(cfg, index; question, kwargs...)
     @test msg.content == "new answer"
 
     ## run for a message
     msg = aihelp(cfg, index, question)
     @test msg.content == "new answer"
-    @test LAST_RESULT[].final_answer == "new answer"
+    @test LAST_RESULT.final_answer == "new answer"
 
     ## run for result
     result = aihelp(cfg, index, question; return_all = true)
     @test result isa RT.RAGResult
     @test result.final_answer == "new answer"
-    @test LAST_RESULT[] == result
+    @test LAST_RESULT == result
 
     # short hand
     msg = aihelp(index, question)
@@ -72,5 +72,5 @@ using PromptingTools: TestEchoOpenAISchema
     ## Return previous settings
     update_pipeline!(:bronze; model_chat = current_chat_model,
         model_embedding = current_emb_model, embedding_dimension = current_dimensionality)
-    MAIN_INDEX[] = current_index
+    MAIN_INDEX = current_index
 end
diff --git a/test/pipeline_defaults.jl b/test/pipeline_defaults.jl
@@ -17,7 +17,7 @@ using AIHelpMe: get_config_key, MODEL_CHAT, MODEL_EMBEDDING, update_pipeline!,
         :truncate_dimension, 100
     )
     @test get_config_key(cfg, kwargs2) == "mockemb-100-Bool"
-    @test get_config_key() == get_config_key(RAG_CONFIG[], RAG_KWARGS[])
+    @test get_config_key() == get_config_key(RAG_CONFIG, RAG_KWARGS)
 end
 
 @testset "update_pipeline!" begin

diff --git a/test/utils.jl b/test/utils.jl
@@ -8,7 +8,7 @@ using AIHelpMe: last_result, LAST_RESULT, remove_pkgdir, load_index_hdf5, find_n
 end
 
 @testset "last_result" begin
-    @test last_result() == LAST_RESULT[]
+    @test last_result() == LAST_RESULT
 end
 
 @testset "find_new_chunks" begin