diff --git a/src/Experimental/RAGTools/generation.jl b/src/Experimental/RAGTools/generation.jl
index 50448b211..437ebe5c9 100644
--- a/src/Experimental/RAGTools/generation.jl
+++ b/src/Experimental/RAGTools/generation.jl
@@ -64,6 +64,17 @@ function build_context(contexter::ContextEnumerator,
     return context
 end
 
+"""
+    build_context(contexter::ContextEnumerator,
+        index::AbstractManagedIndex, candidates::AbstractCandidateWithChunks;
+        verbose::Bool = true,
+        chunks_window_margin::Tuple{Int, Int} = (1, 1), kwargs...)
+
+        build_context!(contexter::ContextEnumerator,
+        index::AbstractManagedIndex, result::AbstractRAGResult; kwargs...)
+
+Dispatch for `AbstractManagedIndex` with `AbstractCandidateWithChunks`.
+"""
 function build_context(contexter::ContextEnumerator,
         index::AbstractManagedIndex,
         candidates::AbstractCandidateWithChunks;
@@ -124,7 +135,6 @@ function answer!(
     throw(ArgumentError("Answerer $(typeof(answerer)) not implemented"))
 end
 
-# TODO: update docs signature
 """
     answer!(
         answerer::SimpleAnswerer, index::AbstractDocumentIndex, result::AbstractRAGResult;
@@ -173,6 +183,17 @@ function answer!(
 
     return result
 end
+
+"""
+    answer!(
+        answerer::SimpleAnswerer, index::AbstractManagedIndex, result::AbstractRAGResult;
+        model::AbstractString = PT.MODEL_CHAT, verbose::Bool = true,
+        template::Symbol = :RAGAnswerFromContext,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+
+Dispatch for `AbstractManagedIndex`.
+"""
 function answer!(
         answerer::SimpleAnswerer, index::AbstractManagedIndex, result::AbstractRAGResult;
         model::AbstractString = PT.MODEL_CHAT, verbose::Bool = true,
@@ -228,7 +249,6 @@ function refine!(
 end
 
 
-# TODO: update docs signature
 """
     refine!(
         refiner::NoRefiner, index::AbstractChunkIndex, result::AbstractRAGResult;
@@ -247,10 +267,9 @@ function refine!(
 end
 
 
-# TODO: update docs signature
 """
     refine!(
-        refiner::SimpleRefiner, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        refiner::SimpleRefiner, index::Union{AbstractDocumentIndex, AbstractManagedIndex}, result::AbstractRAGResult;
         verbose::Bool = true,
         model::AbstractString = PT.MODEL_CHAT,
         template::Symbol = :RAGAnswerRefiner,
@@ -303,10 +322,9 @@ function refine!(
 end
 
 
-# TODO: update docs signature
 """
     refine!(
-        refiner::TavilySearchRefiner, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        refiner::TavilySearchRefiner, index::Union{AbstractDocumentIndex, AbstractManagedIndex}, result::AbstractRAGResult;
         verbose::Bool = true,
         model::AbstractString = PT.MODEL_CHAT,
         include_answer::Bool = true,
@@ -458,10 +476,9 @@ It uses `ContextEnumerator`, `SimpleAnswerer`, `SimpleRefiner`, and `NoPostproce
     postprocessor::AbstractPostprocessor = NoPostprocessor()
 end
 
-# TODO: update docs signature
 """
     generate!(
-        generator::AbstractGenerator, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        generator::AbstractGenerator, index::Union{AbstractDocumentIndex, AbstractManagedIndex}, result::AbstractRAGResult;
         verbose::Integer = 1,
         api_kwargs::NamedTuple = NamedTuple(),
         contexter::AbstractContextBuilder = generator.contexter,
@@ -591,8 +608,9 @@ function Base.show(io::IO, cfg::AbstractRAGConfig)
     dump(io, cfg; maxdepth = 2)
 end
 
+# TODO: add example for Pinecone
 """
-    airag(cfg::AbstractRAGConfig, index::AbstractDocumentIndex;
+    airag(cfg::AbstractRAGConfig, index::Union{AbstractDocumentIndex, AbstractManagedIndex};
         question::AbstractString,
         verbose::Integer = 1, return_all::Bool = false,
         api_kwargs::NamedTuple = NamedTuple(),
diff --git a/src/Experimental/RAGTools/preparation.jl b/src/Experimental/RAGTools/preparation.jl
index 187f9495f..2f18e7b47 100644
--- a/src/Experimental/RAGTools/preparation.jl
+++ b/src/Experimental/RAGTools/preparation.jl
@@ -145,9 +145,12 @@ end
     PineconeIndexer <: AbstractIndexBuilder
 
 Pinecone index to be returned by `build_index`.
+
+It uses `FileChunker`, `SimpleEmbedder` and `NoTagger` as default chunker, embedder and tagger.
 """
 @kwdef mutable struct PineconeIndexer <: AbstractIndexBuilder
     chunker::AbstractChunker = FileChunker()
+    # TODO: BatchEmbedder?
     embedder::AbstractEmbedder = SimpleEmbedder()
     tagger::AbstractTagger = NoTagger()
 end
@@ -726,18 +729,94 @@ function build_index(
     return index
 end
 
+# TODO: where to put these?
 using Pinecone: Pinecone, PineconeContextv3, PineconeIndexv3, init_v3, Index, PineconeVector, upsert
 using UUIDs: UUIDs, uuid4
-# TODO: change docs
 """
     build_index(
-        indexer::PineconeIndexer;
-        namespace::AbstractString,
+        indexer::PineconeIndexer, files_or_docs::Vector{<:AbstractString};
+        metadata::Vector{Dict{String, Any}} = Vector{Dict{String, Any}}(),
+        pinecone_context::Pinecone.PineconeContextv3 = Pinecone.init_v3(""),
+        pinecone_index::Pinecone.PineconeIndexv3 = nothing,
+        pinecone_namespace::AbstractString = "",
+        upsert::Bool = true,
         verbose::Integer = 1,
-        index_id = gensym("PTPineconeIndex"),
+        index_id = gensym(pinecone_namespace),
+        chunker::AbstractChunker = indexer.chunker,
+        chunker_kwargs::NamedTuple = NamedTuple(),
+        embedder::AbstractEmbedder = indexer.embedder,
+        embedder_kwargs::NamedTuple = NamedTuple(),
+        tagger::AbstractTagger = indexer.tagger,
+        tagger_kwargs::NamedTuple = NamedTuple(),
+        api_kwargs::NamedTuple = NamedTuple(),
         cost_tracker = Threads.Atomic{Float64}(0.0))
 
 Builds a `PineconeIndex` containing a Pinecone context (API key, index and namespace).
+The index stores the document chunks and their embeddings (and potentially other information).
+
+The function processes each file or document (depending on `chunker`), splits its content into chunks, embeds these chunks
+and then combines this information into a retrievable index. The chunks and embeddings are upsert to Pinecone using
+the provided Pinecone context (unless the `upsert` flag is set to `false`).
+
+# Arguments
+- `indexer::PineconeIndexer`: The indexing logic for Pinecone operations.
+- `files_or_docs`: A vector of valid file paths to be indexed (chunked and embedded).
+- `metadata::Vector{Dict{String, Any}}`: A vector of metadata attributed to each docs file, given as dictionaries with `String` keys. Default is empty vector.
+- `pinecone_context::Pinecone.PineconeContextv3`: The Pinecone API key generated using Pinecone.jl. Must be specified.
+- `pinecone_index::Pinecone.PineconeIndexv3`: The Pinecone index generated using Pinecone.jl. Must be specified.
+- `pinecone_namespace::AbstractString`: The Pinecone namespace associated to `pinecone_index`.
+- `upsert::Bool = true`: A flag specifying whether to upsert the chunks and embeddings to Pinecone. Defaults to `true`.
+- `verbose`: An Integer specifying the verbosity of the logs. Default is `1` (high-level logging). `0` is disabled.
+- `index_id`: A unique identifier for the index. Default is a generated symbol.
+- `chunker`: The chunker logic to use for splitting the documents. Default is `TextChunker()`.
+- `chunker_kwargs`: Parameters to be provided to the `get_chunks` function. Useful to change the `separators` or `max_length`.
+  - `sources`: A vector of strings indicating the source of each chunk. Default is equal to `files_or_docs`.
+- `embedder`: The embedder logic to use for embedding the chunks. Default is `BatchEmbedder()`.
+- `embedder_kwargs`: Parameters to be provided to the `get_embeddings` function. Useful to change the `target_batch_size_length` or reduce asyncmap tasks `ntasks`.
+  - `model`: The model to use for embedding. Default is `PT.MODEL_EMBEDDING`.
+- `tagger`: The tagger logic to use for extracting tags from the chunks. Default is `NoTagger()`, ie, skip tag extraction. There are also `PassthroughTagger` and `OpenTagger`.
+- `tagger_kwargs`: Parameters to be provided to the `get_tags` function.
+  - `model`: The model to use for tags extraction. Default is `PT.MODEL_CHAT`.
+  - `template`: A template to be used for tags extraction. Default is `:RAGExtractMetadataShort`.
+  - `tags`: A vector of vectors of strings directly providing the tags for each chunk. Applicable for `tagger::PasstroughTagger`.
+- `api_kwargs`: Parameters to be provided to the API endpoint. Shared across all API calls if provided.
+- `cost_tracker`: A `Threads.Atomic{Float64}` object to track the total cost of the API calls. Useful to pass the total cost to the parent call.
+
+# Returns
+- `PineconeIndex`: An object containing the compiled index of chunks, embeddings, tags, vocabulary, sources and metadata, together with the Pinecone connection data.
+
+See also: `PineconeIndex`, `get_chunks`, `get_embeddings`, `get_tags`, `CandidateWithChunks`, `find_closest`, `find_tags`, `rerank`, `retrieve`, `generate!`, `airag`
+
+# Examples
+```julia
+using Pinecone
+
+# Prepare the Pinecone connection data
+pinecone_context = Pinecone.init_v3(ENV["PINECONE_API_KEY"])
+pindex = ENV["PINECONE_INDEX"]
+pinecone_index = !isempty(pindex) ? Pinecone.Index(pinecone_context, pindex) : nothing
+namespace = "my-namespace"
+
+# Add metadata about the sources in Pinecone
+metadata = [Dict{String, Any}("source" => doc_file) for doc_file in docs_files]
+
+# Build the index. By default, the chunks and embeddings get upserted to Pinecone.
+const RT = PromptingTools.Experimental.RAGTools
+index_pinecone = RT.build_index(
+    RT.PineconeIndexer(),
+    docs_files;
+    pinecone_context = pinecone_context,
+    pinecone_index = pinecone_index,
+    pinecone_namespace = namespace,
+    metadata = metadata
+)
+
+# Notes
+- If you get errors about exceeding embedding input sizes, first check the `max_length` in your chunks. 
+  If that does NOT resolve the issue, try changing the `embedding_kwargs`. 
+  In particular, reducing the `target_batch_size_length` parameter (eg, 10_000) and number of tasks `ntasks=1`. 
+  Some providers cannot handle large batch sizes (eg, Databricks).
+
 """
 function build_index(
         indexer::PineconeIndexer, files_or_docs::Vector{<:AbstractString};
@@ -745,7 +824,7 @@ function build_index(
         pinecone_context::Pinecone.PineconeContextv3 = Pinecone.init_v3(""),
         pinecone_index::Pinecone.PineconeIndexv3 = nothing,
         pinecone_namespace::AbstractString = "",
-        upsert::Bool = false,
+        upsert::Bool = true,
         verbose::Integer = 1,
         index_id = gensym(pinecone_namespace),
         chunker::AbstractChunker = indexer.chunker,
@@ -756,7 +835,7 @@ function build_index(
         tagger_kwargs::NamedTuple = NamedTuple(),
         api_kwargs::NamedTuple = NamedTuple(),
         cost_tracker = Threads.Atomic{Float64}(0.0))
-    @assert !isempty(pinecone_context.apikey) && !isnothing(pinecone_index) "Pinecone context and index not set"
+    @assert !isempty(pinecone_context.apikey) && !isnothing(pinecone_index) && "Pinecone context and index not set"
 
     ## Split into chunks
     chunks, sources = get_chunks(chunker, files_or_docs;
diff --git a/src/Experimental/RAGTools/retrieval.jl b/src/Experimental/RAGTools/retrieval.jl
index 673fdafba..5af1ad3a7 100644
--- a/src/Experimental/RAGTools/retrieval.jl
+++ b/src/Experimental/RAGTools/retrieval.jl
@@ -241,6 +241,37 @@ function find_closest(
     return CandidateChunks(indexid(index), positions, Float32.(scores))
 end
 
+# Dispatch to find scores for multiple embeddings
+function find_closest(
+        finder::AbstractSimilarityFinder, index::AbstractChunkIndex,
+        query_emb::AbstractMatrix{<:Real}, query_tokens::AbstractVector{<:AbstractVector{<:AbstractString}} = Vector{Vector{String}}();
+        top_k::Int = 100, kwargs...)
+    if isnothing(chunkdata(parent(index)))
+        return CandidateChunks(; index_id = indexid(index))
+    end
+    ## reduce top_k since we have more than one query
+    top_k_ = top_k ÷ size(query_emb, 2)
+    ## simply vcat together (gets sorted from the highest similarity to the lowest)
+    if isempty(query_tokens)
+        mapreduce(
+            c -> find_closest(finder, index, c; top_k = top_k_, kwargs...), vcat, eachcol(query_emb))
+    else
+        @assert length(query_tokens)==size(query_emb, 2) "Length of `query_tokens` must be equal to the number of columns in `query_emb`."
+        mapreduce(
+            (emb, tok) -> find_closest(finder, index, emb, tok; top_k = top_k_, kwargs...), vcat, eachcol(query_emb), query_tokens)
+    end
+end
+
+"""
+    find_closest(
+        finder::AbstractSimilarityFinder, index::PineconeIndex,
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 10, kwargs...)
+
+Finds the indices of chunks that are closest to query embedding (`query_emb`) by querying Pinecone.
+
+Returns only `top_k` closest indices.
+"""
 function find_closest(
         finder::AbstractSimilarityFinder, index::PineconeIndex,
         query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
@@ -261,6 +292,7 @@ function find_closest(
     scores = [m.score for m in matches]
     chunks = [m.metadata.content for m in matches]
     metadata = [JSON3.read(JSON3.write(m.metadata), Dict{String, Any}) for m in matches]
+    # TODO: metadata might not have `source`, change this
     sources = [m.metadata.source for m in matches]
 
     return CandidateWithChunks(
@@ -272,6 +304,7 @@ function find_closest(
         sources = Vector{String}(sources))
 end
 
+# Dispatch to find scores for multiple embeddings
 function find_closest(
         finder::AbstractSimilarityFinder, index::PineconeIndex,
         query_emb::AbstractMatrix{<:Real}, query_tokens::AbstractVector{<:AbstractVector{<:AbstractString}} = Vector{Vector{String}}();
@@ -290,27 +323,6 @@ function find_closest(
     end
 end
 
-# Dispatch to find scores for multiple embeddings
-function find_closest(
-        finder::AbstractSimilarityFinder, index::AbstractChunkIndex,
-        query_emb::AbstractMatrix{<:Real}, query_tokens::AbstractVector{<:AbstractVector{<:AbstractString}} = Vector{Vector{String}}();
-        top_k::Int = 100, kwargs...)
-    if isnothing(chunkdata(parent(index)))
-        return CandidateChunks(; index_id = indexid(index))
-    end
-    ## reduce top_k since we have more than one query
-    top_k_ = top_k ÷ size(query_emb, 2)
-    ## simply vcat together (gets sorted from the highest similarity to the lowest)
-    if isempty(query_tokens)
-        mapreduce(
-            c -> find_closest(finder, index, c; top_k = top_k_, kwargs...), vcat, eachcol(query_emb))
-    else
-        @assert length(query_tokens)==size(query_emb, 2) "Length of `query_tokens` must be equal to the number of columns in `query_emb`."
-        mapreduce(
-            (emb, tok) -> find_closest(finder, index, emb, tok; top_k = top_k_, kwargs...), vcat, eachcol(query_emb), query_tokens)
-    end
-end
-
 ### For MultiIndex
 function find_closest(
         finder::MultiFinder, index::AbstractMultiIndex,
@@ -612,7 +624,7 @@ function find_tags(method::AllTagFilter, index::AbstractChunkIndex,
 end
 
 """
-    find_tags(method::NoTagFilter, index::AbstractChunkIndex,
+    find_tags(method::NoTagFilter, index::Union{AbstractChunkIndex, AbstractManagedIndex},
         tags::Union{T, AbstractVector{<:T}}; kwargs...) where {T <:
                                                                Union{
         AbstractString, Regex, Nothing}}
@@ -620,12 +632,6 @@ end
 
 Returns all chunks in the index, ie, no filtering, so we simply return `nothing` (easier for dispatch).
 """
-# function find_tags(method::NoTagFilter, index::AbstractChunkIndex,
-#         tags::Union{T, AbstractVector{<:T}}; kwargs...) where {T <:
-#                                                                Union{
-#         AbstractString, Regex, Nothing}}
-#     return nothing
-# end
 function find_tags(
         method::NoTagFilter, index::Union{AbstractChunkIndex,
             AbstractManagedIndex},
@@ -748,8 +754,6 @@ function rerank(reranker::NoReranker,
         candidates::AbstractCandidateWithChunks;
         top_n::Integer = length(candidates),
         kwargs...)
-    # Since this is almost a passthrough strategy, it returns the candidate_chunks unchanged
-    # but it truncates to `top_n` if necessary
     return first(candidates, top_n)
 end
 
@@ -1017,11 +1021,22 @@ end
     PineconeRetriever <: AbstractRetriever
 
 Dispatch for `retrieve` for Pinecone.
+
+# Fields
+- `rephraser::AbstractRephraser`: the rephrasing method, dispatching `rephrase` - uses `NoRephraser`
+- `embedder::AbstractEmbedder`: the embedding method, dispatching `get_embeddings` (see Preparation Stage for more details) - uses `SimpleEmbedder`
+- `processor::AbstractProcessor`: the processor method, dispatching `get_keywords` (see Preparation Stage for more details) - uses `NoProcessor`
+- `finder::AbstractSimilarityFinder`: the similarity search method, dispatching `find_closest` - uses `CosineSimilarity`
+- `tagger::AbstractTagger`: the tag generating method, dispatching `get_tags` (see Preparation Stage for more details) - uses `NoTagger`
+- `filter::AbstractTagFilter`: the tag matching method, dispatching `find_tags` - uses `NoTagFilter`
+- `reranker::AbstractReranker`: the reranking method, dispatching `rerank` - uses `NoReranker`
 """
 @kwdef mutable struct PineconeRetriever <: AbstractRetriever
     rephraser::AbstractRephraser = NoRephraser()
+    # TODO: BatchEmbedder?
     embedder::AbstractEmbedder = SimpleEmbedder()
     processor::AbstractProcessor = NoProcessor()
+    # TODO: actually do something with this; Pinecone allows choosing finder
     finder::AbstractSimilarityFinder = CosineSimilarity()
     tagger::AbstractTagger = NoTagger()
     filter::AbstractTagFilter = NoTagFilter()
@@ -1242,6 +1257,33 @@ function retrieve(retriever::AbstractRetriever,
     return result
 end
 
+"""
+    retrieve(retriever::PineconeRetriever,
+        index::PineconeIndex,
+        question::AbstractString;
+        verbose::Integer = 1,
+        top_k::Integer = 100,
+        top_n::Integer = 10,
+        api_kwargs::NamedTuple = NamedTuple(),
+        rephraser::AbstractRephraser = retriever.rephraser,
+        rephraser_kwargs::NamedTuple = NamedTuple(),
+        embedder::AbstractEmbedder = retriever.embedder,
+        embedder_kwargs::NamedTuple = NamedTuple(),
+        processor::AbstractProcessor = retriever.processor,
+        processor_kwargs::NamedTuple = NamedTuple(),
+        finder::AbstractSimilarityFinder = retriever.finder,
+        finder_kwargs::NamedTuple = NamedTuple(),
+        tagger::AbstractTagger = retriever.tagger,
+        tagger_kwargs::NamedTuple = NamedTuple(),
+        filter::AbstractTagFilter = retriever.filter,
+        filter_kwargs::NamedTuple = NamedTuple(),
+        reranker::AbstractReranker = retriever.reranker,
+        reranker_kwargs::NamedTuple = NamedTuple(),
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+
+Dispatch method for `PineconeIndex`.
+"""
 function retrieve(retriever::PineconeRetriever,
         index::PineconeIndex,
         question::AbstractString;
diff --git a/src/Experimental/RAGTools/types.jl b/src/Experimental/RAGTools/types.jl
index 3301c1b67..1dcfcdc0b 100644
--- a/src/Experimental/RAGTools/types.jl
+++ b/src/Experimental/RAGTools/types.jl
@@ -136,17 +136,39 @@ chunkdata(index::ChunkEmbeddingsIndex) = embeddings(index)
 # For backward compatibility
 const ChunkIndex = ChunkEmbeddingsIndex
 
+# TODO: where to put these?
 indexid(index::AbstractManagedIndex) = index.id
 chunks(index::AbstractManagedIndex) = index.chunks
 sources(index::AbstractManagedIndex) = index.sources
 
+# TODO: what about this?
 using Pinecone: Pinecone, PineconeContextv3, PineconeIndexv3
+
+"""
+    PineconeIndex
+
+Main struct for storing document chunks and their embeddings along with the necessary Pinecone context for connecting to Pinecone.
+
+# Fields
+- `id::Symbol`: unique identifier of each index (a symbol of the Pinecone index namespace)
+- `pinecone_context::Pinecone.PineconeContextv3`: Pinecone API key
+- `pinecone_index::Pinecone.PineconeIndexv3`: Pinecone index
+- `pinecone_namespace::String`: name of the namespace inside the Pinecone index
+- `chunks::Vector{<:AbstractString}`: underlying document chunks / snippets
+- `embeddings::Union{Nothing, Matrix{<:Real}}`: for semantic search
+- `tags::Union{Nothing, AbstractMatrix{<:Bool}}`: for exact search, filtering, etc. This is often a sparse matrix indicating which chunks have the given `tag` (see `tag_vocab` for the position lookup)
+- `tags_vocab::Union{Nothing, Vector{<:AbstractString}}`: vocabulary for the `tags` matrix (each column in `tags` is one item in `tags_vocab` and rows are the chunks)
+- `sources::Vector{<:AbstractString}`: sources of the chunks
+- `metadata::Vector{Dict{String, Any}}`: metadata for each chunk/embedding stored in Pinecone
+"""
 @kwdef struct PineconeIndex{
     T1 <: Union{Nothing, AbstractString},
     T2 <: Union{Nothing, Matrix{<:Real}},
     T3 <: Union{Nothing, AbstractMatrix{<:Bool}}
 } <: AbstractManagedIndex
+    # TODO: id should be a combination of index + namespace?
     id::Symbol  # namespace
+    # TODO: these should not be v3, maybe?
     pinecone_context::Pinecone.PineconeContextv3
     pinecone_index::Pinecone.PineconeIndexv3
     pinecone_namespace::String
@@ -159,9 +181,10 @@ using Pinecone: Pinecone, PineconeContextv3, PineconeIndexv3
     # column oriented, ie, each column is one item in `tags_vocab` and rows are the chunks
     tags::T3 = nothing
     tags_vocab::Union{Nothing, Vector{<:AbstractString}} = nothing
+    sources::Union{Nothing, Vector{<:AbstractString}} = nothing
     # metadata for each chunk
+    # TODO: should be changed to `extras`? but different type -- this needs to be vector of dicts
     metadata::Vector{Dict{String, Any}} = Vector{Dict{String, Any}}()
-    sources::Union{Nothing, Vector{<:AbstractString}} = nothing
 end
 HasKeywords(::PineconeIndex) = false
 HasEmbeddings(::PineconeIndex) = true
@@ -549,6 +572,11 @@ Base.@propagate_inbounds function translate_positions_to_parent(
 end
 
 
+"""
+    SubManagedIndex
+
+Provides the same functionality for `AbstractManagedIndex` as `SubChunkIndex` does for `AbstractChunkIndex`.
+"""
 @kwdef struct SubManagedIndex{T <: AbstractManagedIndex} <: AbstractManagedIndex
     parent::T
     positions::Vector{Int}
@@ -560,6 +588,7 @@ Base.parent(index::SubManagedIndex) = index.parent
 HasEmbeddings(index::SubManagedIndex) = HasEmbeddings(parent(index))
 HasKeywords(index::SubManagedIndex) = HasKeywords(parent(index))
 
+# TODO: see which of these are needed
 Base.@propagate_inbounds function chunks(index::SubManagedIndex)
     view(chunks(parent(index)), positions(index))
 end
@@ -569,7 +598,6 @@ end
 Base.@propagate_inbounds function chunkdata(index::SubManagedIndex)
     chunkdata(parent(index), positions(index))
 end
-"Access chunkdata for a subset of chunks, `chunk_idx` is a vector of chunk indices in the index"
 Base.@propagate_inbounds function chunkdata(
         index::SubManagedIndex, chunk_idx::AbstractVector{<:Integer})
     ## We need this accessor because different chunk indices can have chunks in different dimensions!!
@@ -671,16 +699,31 @@ function CandidateChunks(index::AbstractChunkIndex, positions::AbstractVector{<:
         indexid(index), convert(Vector{Int}, positions), convert(Vector{Float32}, scores))
 end
 
+
+"""
+    CandidateWithChunks
+
+Similar to `CandidateChunks`, but for `AbstractManagedIndex`. It's the result of the retrieval stage of RAG.
+
+# Fields
+- `index_id::Symbol`: the id of the index from which the candidates are drawn
+- `positions::Vector{Int}`: the positions of the candidates in the index (ie, `5` refers to the 5th chunk in the index - `chunks(index)[5]`)
+- `scores::Vector{Float32}`: the similarity scores of the candidates from the query (higher is better)
+- `chunks::Vector{String}`: the chunks retrieved for a given question
+- `metadata::AbstractVector`: metadata corresponding to `chunks`
+- `sources::Vector{String}`: sources corresponding to `chunks`
+"""
 @kwdef struct CandidateWithChunks{TP <: Integer, TD <: Real} <:
               AbstractCandidateWithChunks
     index_id::Symbol
     positions::Vector{TP} = Int[]
     scores::Vector{TD} = Float32[]
-    ## fields that we don't have in Index anymore -- so we get them "per question"
+    ## fields obtained "per question"
     chunks::Vector{String} = String[]
     metadata::AbstractVector = Dict{String, Any}[]
     sources::Vector{String} = String[]
 end
+# TODO: see which can be removed
 indexid(cc::CandidateWithChunks) = cc.index_id
 positions(cc::CandidateWithChunks) = cc.positions
 scores(cc::CandidateWithChunks) = cc.scores
@@ -942,7 +985,6 @@ end
 Base.@propagate_inbounds function Base.view(index::SubChunkIndex, cc::MultiCandidateChunks)
     SubChunkIndex(index, cc)
 end
-# TODO: proper `view` -- `SubManagedIndex`?
 Base.@propagate_inbounds function Base.view(index::AbstractManagedIndex, cc::CandidateWithChunks)
     @boundscheck let chk_vector = chunks(parent(index))
         if !checkbounds(Bool, axes(chk_vector, 1), positions(cc))