diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 7e620817e..bcfac0a27 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -23,6 +23,7 @@ jobs:
         version:
           - '1.9'
           - '1.10'
+          # - '1.11'
           # - 'nightly'
         os:
           - ubuntu-latest
@@ -49,24 +50,21 @@ jobs:
     permissions:
       contents: write
       statuses: write
+      pages: write
+      id-token: write
+      actions: write
     steps:
-      - uses: actions/checkout@v3
-      - uses: julia-actions/setup-julia@v1
-        with:
-          version: '1'
-      - name: Configure doc environment
-        run: |
-          julia --project=docs/ -e '
-            using Pkg
-            Pkg.develop(PackageSpec(path=pwd()))
-            Pkg.instantiate()'
-      - uses: julia-actions/julia-buildpkg@v1
-      - uses: julia-actions/julia-docdeploy@v1
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup Julia
+        uses: julia-actions/setup-julia@v1
+      - name: Pull Julia cache
+        uses: julia-actions/cache@v1
+      - name: Install documentation dependencies
+        run: julia --project=docs -e 'using Pkg; pkg"dev ."; Pkg.instantiate(); Pkg.precompile(); Pkg.status()'
+      - name: Build and deploy docs
+        uses: julia-actions/julia-docdeploy@v1
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      - run: |
-          julia --project=docs -e '
-            using Documenter: DocMeta, doctest
-            using PromptingTools
-            DocMeta.setdocmeta!(PromptingTools, :DocTestSetup, :(using PromptingTools); recursive=true)
-            doctest(PromptingTools)'
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token
+          GKSwstype: "100" # for Plots.jl plots (if you have them)
+          JULIA_DEBUG: "Documenter"
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 40f731fe8..a71aa4928 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,11 @@
 /docs/build/
 
 **/.DS_Store
-**/.vscode
\ No newline at end of file
+**/.vscode
+
+# exclude scratch files
+**/_*
+docs/package-lock.json
+
+# Ignore Cursor rules
+.cursorrules
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 48d9e4c66..1118085b1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,10 +10,489 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+## [0.62.1]
+
+### Fixed
+- Fixed a bug in `tool_call_signature` where hidden fields were not hidden early enough and would fail if a Dict argument was provided. It used to do the processing after, but Dicts cannot be processed, so we're now masking the fields upfront.
+
+## [0.62.0]
+
+### Added
+- Added a new Claude 3.5 Haiku model (`claude-3-5-haiku-latest`) and updated the alias `claudeh` with it.
+- Added support for XAI's Grok 2 beta model (`grok-beta`) and updated the alias `grok` with it. Set your ENV api key `XAI_API_KEY` to use it.
+
+## [0.61.0]
+
+### Added
+- Added a new `extras` field to `ToolRef` to enable additional parameters in the tool signature (eg, `display_width_px`, `display_height_px` for the `:computer` tool).
+- Added a new kwarg `unused_as_kwargs` to `execute_tool` to enable passing unused args as kwargs (see `?execute_tool` for more information). Helps with using kwarg-based functions.
+
+### Updated
+- Updated the compat bounds for `StreamCallbacks` to enable both v0.4 and v0.5 (Fixes Julia 1.9 compatibility).
+- Updated the return type of `tool_call_signature` to `Dict{String, AbstractTool}` to enable better interoperability with different tool types.
+
+## [0.60.0]
+
+### Added
+- Added new Claude 3.5 Sonnet model (`claude-3-5-sonnet-latest`) and updated the alias `claude` and `claudes` with it.
+- Added support for Ollama streaming with schema `OllamaSchema` (see `?StreamCallback` for more information). Schema `OllamaManaged` is NOT supported (it's legacy and will be removed in the future).
+- Moved the implementation of streaming callbacks to a new `StreamCallbacks` package.
+- Added new error types for tool execution to enable better error handling and reporting (see `?AbstractToolError`).
+- Added support for Anthropic's new pre-trained tools via `ToolRef` (see `?ToolRef`), to enable the feature, use the `:computer_use` beta header (eg, `aitools(..., betas = [:computer_use])`).
+
+### Fixed
+- Fixed a bug in `call_cost` where the cost was not calculated if any non-AIMessages were provided in the conversation.
+
+## [0.59.1]
+
+### Fixed
+- Fixed a bug in multi-turn tool calls for OpenAI models where an empty tools array could have been, which causes an API error.
+
+## [0.59.0]
+
+### Breaking Changes
+- New field `name` introduced in `AbstractChatMessage` and `AIToolRequest` messages to enable role-based workflows. It initializes to `nothing`, so it is backward compatible.
+
+### Added
+- Extends support for structured extraction with multiple "tools" definitions (see `?aiextract`).
+- Added new primitives `Tool` (to re-use tool definitions) and a function `aitools` to support mixed structured and non-structured workflows, eg, agentic workflows (see `?aitools`).
+- Added a field `name` to `AbstractChatMessage` and `AIToolRequest` messages to enable role-based workflows.
+- Added a support for partial argument execution with `execute_tool` function (provide your own context to override the arg values).
+- Added support for [SambaNova](https://sambanova.ai/) hosted models (set your ENV `SAMBANOVA_API_KEY`). 
+- Added many new models from Mistral, Groq, Sambanova, OpenAI.
+
+### Updated
+- Renamed `function_call_signature` to `tool_call_signature` to better reflect that it's used for tools, but kept a link to the old name for back-compatibility.
+- Improves structured extraction for Anthropic models (now you can use `tool_choice` keyword argument to specify which tool to use or re-use your parsed tools).
+- When log probs are requested, we will now also log the raw information in `AIMessage.extras[:log_prob]` field (previously we logged only the full sum). This enables more nuanced log-probability calculations for individual tokens.
+
+## [0.58.0]
+
+### Added
+- Added support for [Cerebras](https://cloud.cerebras.ai) hosted models (set your ENV `CEREBRAS_API_KEY`). Available model aliases: `cl3` (Llama3.1 8bn), `cl70` (Llama3.1 70bn).
+- Added a kwarg to `aiclassify` to provide a custom token ID mapping (`token_ids_map`) to work with custom tokenizers.
+
+### Updated
+- Improved the implementation of `airetry!` to concatenate feedback from all ancestor nodes ONLY IF `feedback_inplace=true` (because otherwise LLM can see it in the message history).
+
+### Fixed
+- Fixed a potential bug in `airetry!` where the `aicall` object was not properly validated to ensure it has been `run!` first.
+
+## [0.57.0]
+
+### Added
+
+- Support for [Azure OpenAI API](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference). Requires two environment variables to be st: `AZURE_OPENAI_API_KEY` and `AZURE_OPENAI_HOST`(i.e. https://<resource-name>.openai.azure.com).
+
+## [0.56.1]
+
+### Fixed
+- Removed accidental INFO log in Anthropic's `aigenerate`
+- Changed internal logging in `streamcallback` to use `@debug` when printing raw data chunks.
+
+## [0.56.0]
+
+### Updated
+- Enabled Streaming for OpenAI-compatible APIs (eg, DeepSeek Coder)
+- If streaming to stdout, also print a newline at the end of streaming (to separate multiple outputs).
+
+### Fixed
+- Relaxed the type-assertions in `StreamCallback` to allow for more flexibility.
+
+## [0.55.0]
+
+### Added
+- Added support for OpenAI's JSON mode for `aiextract` (just provide kwarg `json_mode=true`). Reference [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs).
+- Added support for OpenRouter's API (you must set ENV `OPENROUTER_API_KEY`) to provide access to more models like Cohere Command R+ and OpenAI's o1 series. Reference [OpenRouter](https://openrouter.ai/).
+- Added new OpenRouter hosted models to the model registry (prefixed with `or`): `oro1` (OpenAI's o1-preview), `oro1m` (OpenAI's o1-mini), `orcop` (Cohere's command-r-plus), `orco` (Cohere's command-r). The `or` prefix is to avoid conflicts with existing models and OpenAI's aliases, then the goal is to provide 2 letters for each model and 1 letter for additional qualifier (eg, "p" for plus, "m" for mini) -> `orcop` (OpenRouter cohere's COmmand-r-Plus).
+
+### Updated
+- Updated FAQ with instructions on how to access new OpenAI o1 models via OpenRouter.
+- Updated FAQ with instructions on how to add custom APIs (with an example `examples/adding_custom_API.jl`).
+
+### Fixed
+- Fixed a bug in `aiclassify` for the OpenAI GPT4o models that have a different tokenizer. Unknown model IDs will throw an error.
+
+## [0.54.0]
+
+### Updated
+- Improved the performance of BM25/Keywords-based indices for >10M documents. Introduced new kwargs of `min_term_freq` and `max_terms` in `RT.get_keywords` to reduce the size of the vocabulary. See `?RT.get_keywords` for more information.
+
+## [0.53.0]
+
+### Added
+- Added beta headers to enable long outputs (up to 8K tokens) with Anthropic's Sonnet 3.5 (see `?anthropic_extra_headers`).
+- Added a kwarg to prefill (`aiprefill`) AI responses with Anthropic's models to improve steerability (see `?aigenerate`).
+
+### Updated
+- Documentation of `aigenerate` to make it clear that if `streamcallback` is provide WITH `flavor` set, there is no automatic configuration and the user must provide the correct `api_kwargs`.
+- Grouped Anthropic's beta headers as a comma-separated string as per the latest API specification.
+
+
+## [0.52.0]
+
+### Added
+- Added a new EXPERIMENTAL `streamcallback` kwarg for `aigenerate` with the OpenAI and Anthropic prompt schema to enable custom streaming implementations. Simplest usage is simply with `streamcallback=stdout`, which will print each text chunk into the console. System is modular enabling custom callbacks and allowing you to inspect received chunks. See `?StreamCallback` for more information. It does not support tools yet.
+
+## [0.51.0]
+
+### Added
+- Added more flexible structured extraction with `aiextract` -> now you can simply provide the field names and, optionally, their types without specifying the struct itself (in `aiextract`, provide the fields like `return_type = [:field_name => field_type]`). 
+- Added a way to attach field-level descriptions to the generated JSON schemas to better structured extraction (see `?update_schema_descriptions!` to see the syntax), which was not possible with struct-only extraction.
+
+## [0.50.0]
+
+### Breaking Changes
+- `AIMessage` and `DataMessage` now have a new field `extras` to hold any API-specific metadata in a simple dictionary. Change is backward-compatible (defaults to `nothing`).
+
+### Added
+- Added EXPERIMENTAL support for Anthropic's new prompt cache (see ?`aigenerate` and look for `cache` kwarg). Note that COST estimate will be wrong (ignores the caching discount for now).
+- Added a new `extras` field to `AIMessage` and `DataMessage` to hold any API-specific metadata in a simple dictionary (eg, used for reporting on the cache hit/miss).
+
+## [0.49.0]
+
+### Added
+- Added new OpenAI's model "chatgpt-4o-latest" to the model registry with alias "chatgpt". This model represents the latest version of ChatGPT-4o tuned specifically for ChatGPT.
+
+## [0.48.0]
+
+### Added
+- Implements the new OpenAI structured output mode for `aiextract` (just provide kwarg `strict=true`). Reference [blog post](https://openai.com/index/introducing-structured-outputs-in-the-api/).
+
+## [0.47.0]
+
+### Added
+- Added a new specialized method for `hcat(::DocumentTermMatrix, ::DocumentTermMatrix)` to allow for combining large DocumentTermMatrices (eg, 1M x 100K).
+
+### Updated
+- Increased the compat bound for HTTP.jl to 1.10.8 to fix a bug with Julia 1.11.
+
+### Fixed
+- Fixed a bug in `vcat_labeled_matrices` where extremely large DocumentTermMatrix could run out of memory.
+- Fixed a bug in `score_to_unit_scale` where empty score vectors would error (now returns the empty array back).
+
+## [0.46.0]
+
+### Added
+- Added a new model `gpt-4o-2024-08-06` to the model registry (alias `gpt4ol` with `l` for latest). It's the latest version of GPT4o, which is faster and cheaper than the previous version.
+
+## [0.45.0]
+
+### Breaking Change
+- `getindex(::MultiIndex, ::MultiCandidateChunks)` now returns sorted chunks by default (`sorted=true`) to guarantee that potential `context` (=`chunks`) is sorted by descending similarity score across different sub-indices.
+
+### Updated
+- Updated a `hcat` implementation in `RAGTools.get_embeddings` to reduce memory allocations for large embedding batches (c. 3x fewer allocations, see `hcat_truncate`).
+- Updated `length_longest_common_subsequence` signature to work only for pairs of `AbstractString` to not fail silently when wrong arguments are provided.
+
+### Fixed
+- Changed the default behavior of `getindex(::MultiIndex, ::MultiCandidateChunks)` to always return sorted chunks for consistency with other similar functions and correct `retrieve` behavior. This was accidentally changed in v0.40 and is now reverted to the original behavior.
+
+## [0.44.0]
+
+### Added
+- Added Mistral Large 2 and Mistral-Nemo to the model registry (alias `mistral-nemo`).
+
+### Fixed
+- Fixed a bug where `wrap_string` would not correctly split very long Unicode words.
+
+## [0.43.0]
+
+### Added
+- Added Llama 3.1 registry records for Fireworks.ai (alias `fllama3`, `fllama370`, `fllama3405` and `fls`, `flm`, `fll` for small/medium/large similar to the other providers).
+
+## [0.42.0]
+
+### Added
+- Registered new Meta Llama 3.1 models hosted on GroqCloud and Together.ai (eg, Groq-hosted `gllama370` has been updated to point to the latest available model and 405b model now has alias `gllama3405`). Because that's quite clunky, I've added abbreviations based on sizes small/medium/large (that is 8b, 70b, 405b) under `gls/glm/gll` for Llama 3.1 hosted on GroqCloud (similarly, we now have `tls/tlm/tll` for Llama3.1 on Together.ai).
+- Generic model aliases for Groq and Together.ai for Llama3 models have been updated to point to the latest available models (Llama 3.1).
+- Added Gemma2 9b model hosted on GroqCloud to the model registry (alias `ggemma9`).
+
+### Updated
+- Minor optimizations to `SubDocumentTermMatrix` to reduce memory allocations and improve performance.
+
+## [0.41.0]
+
+### Added 
+- Introduced a "view" of `DocumentTermMatrix` (=`SubDocumentTermMatrix`) to allow views of Keyword-based indices (`ChunkKeywordsIndex`). It's not a pure view (TF matrix is materialized to prevent performance degradation).
+
+### Fixed
+- Fixed a bug in `find_closest(finder::BM25Similarity, ...)` where the view of `DocumentTermMatrix` (ie, `view(DocumentTermMatrix(...), ...)`) was undefined.
+- Fixed a bug where a view of a view of a `ChunkIndex` wouldn't intersect the positions (it was returning only the latest requested positions).
+
+## [0.40.0]
+
+### Added
+- Introduces `RAGTools.SubChunkIndex` to allow projecting `views` of various indices. Useful for pre-filtering your data (faster and more precise retrieval). See `?RT.SubChunkIndex` for more information and how to use it.
+
+### Updated
+- `CandidateChunks` and `MultiCandidateChunks` intersection methods updated to be an order of magnitude faster (useful for large sets like tag filters).
+
+### Fixed 
+- Fixed a bug in `find_closest(finder::BM25Similarity, ...)` where `minimum_similarity` kwarg was not implemented.
+
+## [0.39.0]
+
+### Breaking Changes
+- Changed the default model for `ai*` chat functions (`PT.MODEL_CHAT`) from `gpt3t` to `gpt4om` (GPT-4o-mini). See the LLM-Leaderboard results and the release [blog post](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/).
+
+### Added
+- Added the new GPT-4o-mini to the model registry (alias `gpt4om`). It's the smallest and fastest model based on GPT4 that is cheaper than GPT3.5Turbo.
+
+## [0.38.0]
+
+### Added
+- Added a new tagging filter `RT.AllTagFilter` to `RT.find_tags`, which requires all tags to be present in a chunk.
+- Added an option in `RT.get_keywords` to set the minimum length of the keywords.
+- Added a new method for `reciprocal_rank_fusion` and utility for standardizing candidate chunk scores (`score_to_unit_scale`).
+
+## [0.37.1]
+
+### Fixed
+- Fixed a bug in CohereReranker when it wouldn't handle correctly CandidateChunks.
+
+## [0.37.0]
+
+### Updated
+- Increase compat bound for FlashRank to 0.4
+
+## [0.36.0]
+
+### Added
+- Added a prompt template for RAG query expansion for BM25 (`RAGQueryKeywordExpander`)
+
+### Fixed
+- Fixed a small bug in the truncation step of the RankGPT's `permutation_step!` (bad indexing of string characters).
+- Fixed a bug where a certain combination of `rank_start` and `rank_end` would not result the last sliding window.
+- Fixed a bug where partially filled `RAGResult` would fail pretty-printing with `pprint`
+
+## [0.35.0]
+
+### Added
+- Added a utility function to RAGTools `reciprocal_rank_fusion`, as a principled way to merge multiple rankings. See `?RAGTools.Experimental.reciprocal_rank_fusion` for more information.
+
+## [0.34.0]
+
+### Added
+- `RankGPT` implementation for RAGTools chunk re-ranking pipeline. See `?RAGTools.Experimental.rank_gpt` for more information and corresponding reranker type `?RankGPTReranker`.
+
+## [0.33.2]
+
+### Fixed
+- Add back accidentally dropped DBKS keys
+
+## [0.33.1]
+
+### Fixed
+- Fixed loading RAGResult when one of the candidate fields was `nothing`.
+- Utility type checks like `isusermessage`, `issystemmessage`, `isdatamessage`, `isaimessage`, `istracermessage` do not throw errors when given any arbitrary input types (previously they only worked for `AbstractMessage` types). It's a `isa` check, so it should work for all input types.
+- Changed preference loading to use typed `global` instead of `const`, to fix issues with API keys not being loaded properly on start. You can now also call `PromptingTools.load_api_keys!()` to re-load the API keys (and ENV variables) manually.
+
+## [0.33.0]
+
+### Added
+- Added registry record for Anthropic Claude 3.5 Sonnet with ID `claude-3-5-sonnet-20240620` (read the [blog post](https://www.anthropic.com/news/claude-3-5-sonnet)). Aliases "claude" and "claudes" have been linked to this latest Sonnet model.
+
+## [0.32.0]
+
+### Updated
+- Changed behavior of `RAGTools.rerank(::FlashRanker,...)` to always dedupe input chunks (to reduce compute requirements).
+
+### Fixed
+- Fixed a bug in verbose INFO log in `RAGTools.rerank(::FlashRanker,...)`.
+
+## [0.31.1]
+
+### Updated
+- Improved the implementation of `RAGTools.unpack_bits` to be faster with fewer allocations.
+
+## [0.31.0]
+
+### Breaking Changes
+- The return type of `RAGTools.find_tags(::NoTagger,...)` is now `::Nothing` instead of `CandidateChunks`/`MultiCandidateChunks` with all documents.
+- `Base.getindex(::MultiIndex, ::MultiCandidateChunks)` now always returns sorted chunks for consistency with the behavior of other `getindex` methods on `*Chunks`. 
+
+### Updated
+- Cosine similarity search now uses `partialsortperm` for better performance on large datasets.
+- Skip unnecessary work when the tagging functionality in the RAG pipeline is disabled (`find_tags` with `NoTagger` always returns `nothing` which improves the compiled code).
+- Changed the default behavior of `getindex(::MultiIndex, ::MultiCandidateChunks)` to always return sorted chunks for consistency with other similar functions. Note that you should always use re-rankering anyway (see `FlashRank.jl`).
+
+## [0.30.0]
+
+### Fixed
+- Fixed a bug on Julia 1.11 beta by adding REPL stdlib as a direct dependency.
+- Fixed too restrictive argument types for `RAGTools.build_tags` method.
+
+## [0.29.0]
+
+### Added
+- Added package extension for FlashRank.jl to support local ranking models. See `?RT.FlashRanker` for more information or `examples/RAG_with_FlashRank.jl` for a quick example.
+
+
+## [0.28.0]
+
+### Added
+- Added Mistral coding-oriented [Codestral](https://mistral.ai/news/codestral/) to the model registry, aliased as `codestral` or `mistralc`. It's very fast, performant and much cheaper than similar models.
+
+## [0.27.0]
+
+### Added
+- Added a keyword-based search similarity to RAGTools to serve both for baseline evaluation and for advanced performance (by having a hybrid index with both embeddings and BM25). See `?RT.KeywordsIndexer` and `?RT.BM25Similarity` for more information, to build use `build_index(KeywordsIndexer(), texts)` or convert an existing embeddings-based index `ChunkKeywordsIndex(index)`.
+
+### Updated
+- For naming consistency, `ChunkIndex` in RAGTools has been renamed to `ChunkEmbeddingsIndex` (with an alias `ChunkIndex` for backwards compatibility). There are now two main index types: `ChunkEmbeddingsIndex` and `ChunkKeywordsIndex` (=BM25), which can be combined into a `MultiIndex` to serve as a hybrid index.
+
+## [0.26.2]
+
+### Fixed
+- Fixed a rare bug where prompt templates created on MacOS will come with metadata that breaks the prompt loader. From now on, it ignores any dotfiles (hidden files starting with ".").
+
+## [0.26.1]
+
+### Fixed
+- Fixed a bug where utility `length_longest_common_subsequence` was not working with complex Unicode characters
+
+## [0.26.0]
+
+### BREAKING CHANGES
+- Added new field `meta` to `TracerMessage` and `TracerMessageLike` to hold metadata in a simply dictionary. Change is backward-compatible.
+- Changed behaviour of `aitemplates(name::Symbol)` to look for the exact match on the template name, not just a partial match. This is a breaking change for the `aitemplates` function only. Motivation is that having multiple matches could have introduced subtle bugs when looking up valid placeholders for a template.
+
+### Added
+- Improved support for `aiclassify` with OpenAI models (you can now encode upto 40 choices).
+- Added a template for routing questions `:QuestionRouter` (to be used with `aiclassify`)
+- Improved tracing by `TracerSchema` to automatically capture crucial metadata such as any LLM API kwargs (`api_kwargs`), use of prompt templates and its version. Information is captured in `meta(tracer)` dictionary. See `?TracerSchema` for more information.
+- New tracing schema `SaverSchema` allows to automatically serialize all conversations. It can be composed with other tracing schemas, eg, `TracerSchema` to automatically capture necessary metadata and serialize. See `?SaverSchema` for more information.
+- Updated options for Binary embeddings (refer to release v0.18 for motivation). Adds utility functions `pack_bits` and `unpack_bits` to move between binary and UInt64 representations of embeddings. RAGTools adds the corresponding `BitPackedBatchEmbedder` and `BitPackedCosineSimilarity` for fast retrieval on these Bool<->UInt64 embeddings (credit to [**domluna's tinyRAG**](https://github.com/domluna/tinyRAG)).
+
+### Fixed
+- Fixed a bug where `aiclassify` would not work when returning the full conversation for choices with extra descriptions
+
+## [0.25.0]
+
+### Added
+- Added model registry record for the latest OpenAI GPT4 Omni model (`gpt4o`) - it's as good as GPT4, faster and cheaper.
+
+## [0.24.0]
+
+### Added
+- Added support for [DeepSeek models](https://platform.deepseek.com/docs) via the `dschat` and `dscode` aliases. You can set the `DEEPSEEK_API_KEY` environment variable to your DeepSeek API key.
+
+
+## [0.23.0]
+
+### Added
+- Added new prompt templates for "Expert" tasks like `LinuxBashExpertAsk`, `JavascriptExpertTask`, etc.
+- Added new prompt templates for self-critiquing agents like `ChiefEditorTranscriptCritic`, `JuliaExpertTranscriptCritic`, etc.
+
+### Updated
+- Extended `aicodefixer_feedback` methods to work with `AICode` and `AIGenerate`.
+
+## [0.22.0]
+
+### Added
+- Added support for [Groq](https://console.groq.com/), the fastest LLM provider out there. It's free for now, so you can try it out - you just need to set your `GROQ_API_KEY`. We've added Llama3 8b (alias "gllama3"), 70b (alias "gllama370") and Mixtral 8x7b (alias "gmixtral"). For the shortcut junkies, we also added a shorthand Llama3 8b = "gl3" (first two letters and the last digit), Llama3 70b = "gl70" (first two letters and the last two digits).
+
+## [0.21.0]
+
+### Added
+- New models added to the model registry: Llama3 8b on Ollama (alias "llama3" for convenience) and on Together.ai (alias "tllama3", "t" stands for Together.ai), also adding the llama3 70b on Together.ai (alias "tllama370") and the powerful Mixtral-8x22b on Together.ai (alias "tmixtral22").
+
+### Fixed
+- Fixed a bug where pretty-printing `RAGResult` would forget a newline between the sources and context sections.
+
+## [0.20.1]
+
+### Fixed
+- Fixed `truncate_dimension` to ignore when 0 is provided (previously it would throw an error).
+
+## [0.20.0]
+
+### Added
+- Added a few new open-weights models hosted by Fireworks.ai to the registry (DBRX Instruct, Mixtral 8x22b Instruct, Qwen 72b). If you're curious about how well they work, try them!
+- Added basic support for observability downstream. Created custom callback infrastructure with `initialize_tracer` and `finalize_tracer` and dedicated types are `TracerMessage` and `TracerMessageLike`. See `?TracerMessage` for more information and the corresponding `aigenerate` docstring.
+- Added `MultiCandidateChunks` which can hold candidates for retrieval across many indices (it's a flat structure to be similar to `CandidateChunks` and easy to reason about).
+- JSON serialization support extended for `RAGResult`, `CandidateChunks`, and `MultiCandidateChunks` to increase observability of RAG systems
+- Added a new search refiner `TavilySearchRefiner` - it will search the web via Tavily API to try to improve on the RAG answer (see `?refine!`).
+- Introduced a few small utilities for manipulation of nested kwargs (necessary for RAG pipelines), check out `getpropertynested`, `setpropertynested`, `merge_kwargs_nested`.
+
+### Updated
+- [BREAKING] change to `CandidateChunks` where it's no longer allowed to be nested (ie, `cc.positions` being a list of several `CandidateChunks`). This is a breaking change for the `RAGTools` module only. We have introduced a new `MultiCandidateChunks` types that can refer to `CandidateChunks` across many indices.
+- Changed default model for `RAGTools.CohereReranker` to "cohere-rerank-english-v3.0".
+
+### Fixed
+- `wrap_string` utility now correctly splits only on spaces. Previously it would split on newlines, which would remove natural formatting of prompts/messages when displayed via `pprint`
+
+## [0.19.0]
+
+### Added
+- [BREAKING CHANGE] The default GPT-4 Turbo model alias ("gpt4t") now points to the official GPT-4 Turbo endpoint ("gpt-4-turbo").
+- Adds references to `mistral-tiny` (7bn parameter model from MistralAI) to the model registry for completeness.
+- Adds the new GPT-4 Turbo model (`"gpt-4-turbo-2024-04-09"`), but you can simply use alias `"gpt4t"` to access it.
+
+## [0.18.0]
+
+### Added
+- Adds support for binary embeddings in RAGTools (dispatch type for `find_closest` is `finder=BinaryCosineSimilarity()`), but you can also just convert the embeddings to binary yourself (always choose `Matrix{Bool}` for speed, not `BitMatrix`) and use without any changes (very little performance difference at the moment).
+- Added Ollama embedding models to the model registry ("nomic-embed-text", "mxbai-embed-large") and versioned MistralAI models.
+- Added template for data extraction with Chain-of-thought reasoning: `:ExtractDataCoTXML`.
+- Added data extraction support for Anthropic models (Claude 3) with `aiextract`. Try it with Claude-3 Haiku (`model="claudeh"`) and Chain-of-though template (`:ExtractDataCoTXML`). See `?aiextract` for more information and check Anthropic's [recommended practices](https://docs.anthropic.com/claude/docs/tool-use).
+
+## [0.17.1]
+
+### Fixed
+- Fixed a bug in `print_html` where the custom kwargs were not being passed to the `HTML` constructor.
+
+## [0.17.0]
+
+### Added
+- Added support for `aigenerate` with Anthropic API. Preset model aliases are `claudeo`, `claudes`, and `claudeh`, for Claude 3 Opus, Sonnet, and Haiku, respectively.
+- Enabled the GoogleGenAI extension since `GoogleGenAI.jl` is now officially registered. You can use `aigenerate` by setting the model to `gemini` and providing the `GOOGLE_API_KEY` environment variable.
+- Added utilities to make preparation of finetuning datasets easier. You can now export your conversations in JSONL format with ShareGPT formatting (eg, for Axolotl). See `?PT.save_conversations` for more information.
+- Added `print_html` utility for RAGTools module to print HTML-styled RAG answer annotations for web applications (eg, Genie.jl). See `?PromptingTools.Experimental.RAGTools.print_html` for more information and examples.
+
+## [0.16.1]
+
+### Fixed
+- Fixed a bug where `set_node_style!` was not accepting any Stylers except for the vanilla `Styler`.
+
+## [0.16.0]
+
+### Added
+- Added pretty-printing via `PT.pprint` that does NOT depend on Markdown and splits text to adjust to the width of the output terminal.
+  It is useful in notebooks to add new lines.
+- Added support annotations for RAGTools (see `?RAGTools.Experimental.annotate_support` for more information) to highlight which parts of the generated answer come from the provided context versus the model's knowledge base. It's useful for transparency and debugging, especially in the context of AI-generated content. You can experience it if you run the output of `airag` through pretty printing (`PT.pprint`).
+- Added utility `distance_longest_common_subsequence` to find the normalized distance between two strings (or a vector of strings). Always returns a number between 0-1, where 0 means the strings are identical and 1 means they are completely different. It's useful for comparing the similarity between the context provided to the model and the generated answer.
+- Added a new documentation section "Extra Tools" to highlight key functionality in various modules, eg, the available text utilities, which were previously hard to discover.
+- Extended documentation FAQ with tips on tackling rate limits and other common issues with OpenAI API.
+- Extended documentation with all available prompt templates. See section "Prompt Templates" in the documentation.
+- Added new RAG interface underneath `airag` in `PromptingTools.RAGTools.Experimental`. Each step now has a dedicated function and a type that can be customized to achieve arbitrary logic (via defining methods for your own types). `airag` is split into two main steps: `retrieve` and `generate!`. You can use them separately or together. See `?airag` for more information.
+
+### Updated
+- Renamed `split_by_length` text splitter to `recursive_splitter` to make it easier to discover and understand its purpose. `split_by_length` is still available as a deprecated alias.
+
+### Fixed
+- Fixed a bug where `LOCAL_SERVER` default value was not getting picked up. Now, it defaults to `http://localhost:10897/v1` if not set in the preferences, which is the address of the OpenAI-compatible server started by Llama.jl.
+- Fixed a bug in multi-line code annotation, which was assigning too optimistic scores to the generated code. Now the score of the chunk is the length-weighted score of the "top" source chunk divided by the full length of score tokens (much more robust and demanding).
+
+## [0.15.0]
+
+### Added
+- Added experimental support for image generation with OpenAI DALL-E models, eg, `msg = aiimage("A white cat on a car")`. See `?aiimage` for more details.
+
+## [0.14.0]
+
+### Added
+- Added a new documentation section "How it works" to explain the inner workings of the package. It's a work in progress, but it should give you a good idea of what's happening under the hood.
+- Improved template loading, so if you load your custom templates once with `load_templates!("my/template/folder)`, it will remember your folder for all future re-loads.
+- Added convenience function `create_template` to create templates on the fly without having to deal with `PT.UserMessage` etc. If you specify the keyword argument `load_as = "MyName"`, the template will be immediately loaded to the template registry. See `?create_template` for more information and examples.
+
+### Fixed
+
 ## [0.13.0]
 
 ### Added
-- Added initial support for Google Gemini models for `aigenerate` (requires environment variable `GOOGLE_API_KEY` and package [GoogleGenAI.jl](https://github.com/tylerjthomas9/GoogleGenAI.jl) to be loaded). It must be imported explicitly because it's not registered yet.
+- Added initial support for Google Gemini models for `aigenerate` (requires environment variable `GOOGLE_API_KEY` and package [GoogleGenAI.jl](https://github.com/tylerjthomas9/GoogleGenAI.jl) to be loaded). It must be added explicitly as it is not yet registered.
 - Added a utility to compare any two string sequences (and other iterators)`length_longest_common_subsequence`. It can be used to fuzzy match strings (eg, detecting context/sources in an AI-generated response or fuzzy matching AI response to some preset categories). See the docstring for more information `?length_longest_common_subsequence`.
 - Rewrite of `aiclassify` to classify into an arbitrary list of categories (including with descriptions). It's a quick and easy option for "routing" and similar use cases, as it exploits the logit bias trick and outputs only 1 token. Currently, only `OpenAISchema` is supported. See `?aiclassify` for more information.
 - Initial support for multiple completions in one request for OpenAI-compatible API servers. Set via API kwarg `n=5` and it will request 5 completions in one request, saving the network communication time and paying the prompt tokens only once. It's useful for majority voting, diversity, or challenging agentic workflows.
@@ -156,4 +635,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add `aiextract` function to extract structured information from text quickly and easily. See `?aiextract` for more information.
 - Add `aiscan` for image scanning (ie, image comprehension tasks). You can transcribe screenshots or reason over images as if they were text. Images can be provided either as a local file (`image_path`) or as an url (`image_url`). See `?aiscan` for more information.
 - Add support for [Ollama.ai](https://ollama.ai/)'s local models. Only `aigenerate` and `aiembed` functions are supported at the moment.
-- Add a few non-coding templates, eg, verbatim analysis (see `aitemplates("survey")`) and meeting summarization (see `aitemplates("meeting")`), and supporting utilities (non-exported): `split_by_length` and `replace_words` to make it easy to work with smaller open source models.
\ No newline at end of file
+- Add a few non-coding templates, eg, verbatim analysis (see `aitemplates("survey")`) and meeting summarization (see `aitemplates("meeting")`), and supporting utilities (non-exported): `split_by_length` and `replace_words` to make it easy to work with smaller open source models.
diff --git a/Project.toml b/Project.toml
index 95dc8ce2d..c2d232ab7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,11 +1,12 @@
 name = "PromptingTools"
 uuid = "670122d1-24a8-4d70-bfce-740807c42192"
 authors = ["J S @svilupp and contributors"]
-version = "0.13.0"
+version = "0.62.1"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
@@ -13,23 +14,35 @@ OpenAI = "e9f21f70-7185-4079-aca2-91159181367c"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
+REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+StreamCallbacks = "c1b9e933-98a0-46fc-8ea7-3b58b195fb0a"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [weakdeps]
+FlashRank = "22cc3f58-1757-4700-bb45-2032706e5a8d"
+GoogleGenAI = "903d41d1-eaca-47dd-943b-fee3930375ab"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+Snowball = "fb8f903a-0164-4e73-9ffe-431110250c3b"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
 [extensions]
+FlashRankPromptingToolsExt = ["FlashRank"]
+GoogleGenAIPromptingToolsExt = ["GoogleGenAI"]
 MarkdownPromptingToolsExt = ["Markdown"]
-RAGToolsExperimentalExt = ["SparseArrays", "LinearAlgebra"]
+RAGToolsExperimentalExt = ["SparseArrays", "LinearAlgebra", "Unicode"]
+SnowballPromptingToolsExt = ["Snowball"]
 
 [compat]
 AbstractTrees = "0.4"
 Aqua = "0.7"
 Base64 = "<0.0.1, 1"
-HTTP = "1"
+Dates = "<0.0.1, 1"
+FlashRank = "0.4"
+GoogleGenAI = "0.3"
+HTTP = "1.10.8"
 JSON3 = "1"
 LinearAlgebra = "<0.0.1, 1"
 Logging = "<0.0.1, 1"
@@ -38,17 +51,20 @@ OpenAI = "0.9"
 Pkg = "<0.0.1, 1"
 PrecompileTools = "1"
 Preferences = "1"
+REPL = "<0.0.1, 1"
 Random = "<0.0.1, 1"
 SparseArrays = "<0.0.1, 1"
 Statistics = "<0.0.1, 1"
+StreamCallbacks = "0.4, 0.5"
 Test = "<0.0.1, 1"
-julia = "1.9,1.10"
+julia = "1.9, 1.10"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
 
 [targets]
-test = ["Aqua", "SparseArrays", "Statistics", "LinearAlgebra", "Markdown"]
+test = ["Aqua", "FlashRank", "SparseArrays", "Statistics", "LinearAlgebra", "Markdown", "Snowball", "Unicode"]
diff --git a/README.md b/README.md
index 0b0577cd0..c0bf3208b 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@
 
 [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://svilupp.github.io/PromptingTools.jl/stable/)
 [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://svilupp.github.io/PromptingTools.jl/dev/)
+[![Slack](https://img.shields.io/badge/slack-%23generative--ai-brightgreen.svg?logo=slack)](https://julialang.slack.com/archives/C06G90C697X)
 [![Build Status](https://github.com/svilupp/PromptingTools.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/svilupp/PromptingTools.jl/actions/workflows/CI.yml?query=branch%3Amain)
 [![Coverage](https://codecov.io/gh/svilupp/PromptingTools.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/svilupp/PromptingTools.jl)
 [![Aqua](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl)
@@ -11,17 +12,22 @@ Streamline your life using PromptingTools.jl, the Julia package that simplifies
 
 PromptingTools.jl is not meant for building large-scale systems. It's meant to be the go-to tool in your global environment that will save you 20 minutes every day!
 
+> [!TIP]
+> Jump to the **[docs](https://svilupp.github.io/PromptingTools.jl/dev/)**
+
 ## Quick Start with `@ai_str` and Easy Templating
 
 Getting started with PromptingTools.jl is as easy as importing the package and using the `@ai_str` macro for your questions.
 
-Note: You will need to set your OpenAI API key as an environment variable before using PromptingTools.jl (see the [Creating OpenAI API Key](#creating-openai-api-key) section below). 
+Note: You will need to set your OpenAI API key as an environment variable before using PromptingTools.jl (see the [Creating OpenAI API Key](#creating-openai-api-key) section below).
+
+Following the introduction of [Prepaid Billing](https://help.openai.com/en/articles/8264644-what-is-prepaid-billing), you'll need to buy some credits to get started ($5 minimum).
 For a quick start, simply set it via `ENV["OPENAI_API_KEY"] = "your-api-key"`
 
 Install PromptingTools:
 ```julia
 using Pkg
-Pkg.add("PromptingTools.jl")
+Pkg.add("PromptingTools")
 ```
 
 And we're ready to go!
@@ -76,6 +82,7 @@ For more practical examples, see the `examples/` folder and the [Advanced Exampl
   - [Table of Contents](#table-of-contents)
   - [Why PromptingTools.jl](#why-promptingtoolsjl)
   - [Advanced Examples](#advanced-examples)
+    - [`ai*` Functions Overview](#ai-functions-overview)
     - [Seamless Integration Into Your Workflow](#seamless-integration-into-your-workflow)
     - [Advanced Prompts / Conversations](#advanced-prompts--conversations)
     - [Templated Prompts](#templated-prompts)
@@ -89,10 +96,12 @@ For more practical examples, see the `examples/` folder and the [Advanced Exampl
   - [Experimental Agent Workflows / Output Validation with `airetry!`](#experimental-agent-workflows--output-validation-with-airetry)
     - [Using Ollama models](#using-ollama-models)
     - [Using MistralAI API and other OpenAI-compatible APIs](#using-mistralai-api-and-other-openai-compatible-apis)
+    - [Using Anthropic Models](#using-anthropic-models)
     - [More Examples](#more-examples)
   - [Package Interface](#package-interface)
   - [Frequently Asked Questions](#frequently-asked-questions)
     - [Why OpenAI](#why-openai)
+    - [What if I cannot access OpenAI?](#what-if-i-cannot-access-openai)
     - [Data Privacy and OpenAI](#data-privacy-and-openai)
     - [Creating OpenAI API Key](#creating-openai-api-key)
     - [Setting OpenAI Spending Limits](#setting-openai-spending-limits)
@@ -102,6 +111,7 @@ For more practical examples, see the `examples/` folder and the [Advanced Exampl
     - [Instant Access from Anywhere](#instant-access-from-anywhere)
     - [Open Source Alternatives](#open-source-alternatives)
     - [Setup Guide for Ollama](#setup-guide-for-ollama)
+    - [How would I fine-tune a model?](#how-would-i-fine-tune-a-model)
   - [Roadmap](#roadmap)
 
 ## Why PromptingTools.jl
@@ -118,12 +128,59 @@ Some features:
 
 ## Advanced Examples
 
-TODOs:
+### `ai*` Functions Overview
+
+Noteworthy functions: `aigenerate`, `aiembed`, `aiclassify`, `aiextract`, `aiscan`, `aiimage`, `aitemplates`
+
+All `ai*` functions have the same basic structure: 
+
+`ai*(<optional schema>,<prompt or conversation>; <optional keyword arguments>)`, 
+
+but they differ in purpose:
+
+- `aigenerate` is the general-purpose function to generate any text response with LLMs, ie, it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`)
+- `aiembed` is designed to extract embeddings from the AI model's response, ie, it returns `DataMessage` with field `:content` containing the embeddings (eg, `ans.content isa AbstractArray`)
+- `aiextract` is designed to extract structured data from the AI model's response and return them as a Julia struct (eg, if we provide `return_type=Food`, we get `ans.content isa Food`). You need to define the return type first and then provide it as a keyword argument.
+- `aitools` is designed for agentic workflows with a mix of tool calls and user inputs. It can work with simple functions and execute them.
+- `aiclassify` is designed to classify the input text into (or simply respond within) a set of discrete `choices` provided by the user. It can be very useful as an LLM Judge or a router for RAG systems, as it uses the "logit bias trick" and generates exactly 1 token. It returns `AIMessage` with field `:content`, but the `:content` can be only one of the provided `choices` (eg, `ans.content in choices`)
+- `aiscan` is for working with images and vision-enabled models (as an input), but it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`) similar to `aigenerate`.
+- `aiimage` is for generating images (eg, with OpenAI DALL-E 3). It returns a `DataMessage`, where the field `:content` might contain either the URL to download the image from or the Base64-encoded image depending on the user-provided kwarg `api_kwargs.response_format`.
+- `aitemplates` is a helper function to discover available templates and see their details (eg, `aitemplates("some keyword")` or `aitemplates(:AssistantAsk)`)
+
+If you're using a known `model`, you do NOT need to provide a `schema` (the first argument).
+
+Optional keyword arguments in `ai*` tend to be:
+
+- `model::String` - Which model you want to use
+- `verbose::Bool` - Whether you went to see INFO logs around AI costs
+- `return_all::Bool` - Whether you want the WHOLE conversation or just the AI answer (ie, whether you want to include your inputs/prompt in the output)
+- `api_kwargs::NamedTuple` - Specific parameters for the model, eg, `temperature=0.0` to be NOT creative (and have more similar output in each run)
+- `http_kwargs::NamedTuple` - Parameters for the HTTP.jl package, eg, `readtimeout = 120` to time out in 120 seconds if no response was received.
+
+**Experimental: AgentTools**
+
+In addition to the above list of `ai*` functions, you can also use the **"lazy" counterparts** of these functions from the experimental AgentTools module.
+```julia
+using PromptingTools.Experimental.AgentTools
+```
+
+For example, `AIGenerate()` will create a lazy instance of `aigenerate`. It is an instance of `AICall` with `aigenerate` as its ai function.
+It uses exactly the same arguments and keyword arguments as `aigenerate` (see `?aigenerate` for details).
+
+"lazy" refers to the fact that it does NOT generate any output when instantiated (only when `run!` is called). 
+
+Or said differently, the `AICall` struct and all its flavors (`AIGenerate`, ...) are designed to facilitate a deferred execution model (lazy evaluation) for AI functions that interact with a Language Learning Model (LLM). It stores the necessary information for an AI call and executes the underlying AI function only when supplied with a `UserMessage` or when the `run!` method is applied. This allows us to remember user inputs and trigger the LLM call repeatedly if needed, which enables automatic fixing (see `?airetry!`).
+
+If you would like a powerful auto-fixing workflow, you can use `airetry!`, which leverages Monte-Carlo tree search to pick the optimal trajectory of conversation based on your requirements.
+
+**Experimental: RAGTools**
 
-- [ ] Add more practical examples (with DataFrames!)
-- [ ] Add an example of how to build a RAG app in 50 lines
+Lastly, we provide a set of tools to build RAG applications (Retrieve, Answer, Generate). 
 
-Noteworthy functions: `aigenerate`, `aiembed`, `aiclassify`, `aiextract`, `aitemplates`
+It can be as simple as two calls: `build_index` and `airag` (Retrieve, Answer, Generate). 
+
+If you then use pretty-printing with `PromptingTools.pprint`, we highlight the generated text vs text likely sourced from the context and we score how strongly is the generated answer supported by the context.
+In addition, we annotate each generated chunk with a reference to which source document it likely came from (including the confidence score between 0 and 1).
 
 ### Seamless Integration Into Your Workflow
 Google search is great, but it's a context switch. You often have to open a few pages and read through the discussion to find the answer you need. Same with the ChatGPT website.
@@ -414,7 +471,7 @@ run!(out)
 How is it useful? We can use the same "inputs" for repeated calls, eg, when we want to validate 
 or regenerate some outputs. We have a function `airetry` to help us with that.
 
-The signature of `airetry` is `airetry(condition_function, aicall::AICall, feedback_function)`.
+The signature of `airetry!` is `airetry!(condition_function, aicall::AICall, feedback_function)`.
 It evaluates the condition `condition_function` on the `aicall` object (eg, we evaluate `f_cond(aicall) -> Bool`). If it fails, we call `feedback_function` on the `aicall` object to provide feedback for the AI model (eg, `f_feedback(aicall) -> String`) and repeat the process until it passes or until `max_retries` value is exceeded.
 
 We can catch API failures (no feedback needed, so none is provided)
@@ -528,6 +585,30 @@ As you can see, it also works for any local models that you might have running o
 
 Note: At the moment, we only support `aigenerate` and `aiembed` functions for MistralAI and other OpenAI-compatible APIs. We plan to extend the support in the future.
 
+### Using Anthropic Models
+
+Make sure the `ANTHROPIC_API_KEY` environment variable is set to your API key.
+
+```julia
+# cladeuh is alias for Claude 3 Haiku
+ai"Say hi!"claudeh 
+```
+
+Preset model aliases are `claudeo`, `claudes`, and `claudeh`, for Claude 3 Opus, Sonnet, and Haiku, respectively.
+
+The corresponding schema is `AnthropicSchema`.
+
+There are several prompt templates with `XML` in the name, suggesting that they use Anthropic-friendly XML formatting for separating sections.
+Find them with `aitemplates("XML")`.
+
+```julia
+# cladeo is alias for Claude 3 Opus
+msg = aigenerate(
+    :JuliaExpertAskXML, ask = "How to write a function to convert Date to Millisecond?",
+    model = "cladeo")
+```
+
+
 ### More Examples
 
 TBU...
@@ -599,6 +680,13 @@ There will be situations not or cannot use it (eg, privacy, cost, etc.). In that
 
 Note: To get started with [Ollama.ai](https://ollama.ai/), see the [Setup Guide for Ollama](#setup-guide-for-ollama) section below.
 
+### What if I cannot access OpenAI?
+
+There are many alternatives:
+
+- **Other APIs**: MistralAI, Anthropic, Google, Together, Fireworks, Voyager (the latter ones tend to give free credits upon joining!)
+- **Locally-hosted models**: Llama.cpp/Llama.jl, Ollama, vLLM (see the examples and the corresponding docs)
+
 ### Data Privacy and OpenAI
 
 At the time of writing, OpenAI does NOT use the API calls for training their models.
@@ -681,7 +769,7 @@ A better way:
 - On a Mac, add the configuration line to your terminal's configuration file (eg, `~/.zshrc`). It will get automatically loaded every time you launch the terminal
 - On Windows, set it as a system variable in "Environment Variables" settings (see the Resources)
 
-We also support Preferences.jl, so you can simply run: `PromptingTools.set_preferences!("OPENAI_API_KEY"="your-api-key")` and it will be persisted across sessions. 
+We also support Preferences.jl, so you can simply run: `PromptingTools.set_preferences!("OPENAI_API_KEY"=>"your-api-key")` and it will be persisted across sessions. 
 To see the current preferences, run `PromptingTools.get_preferences("OPENAI_API_KEY")`.
 
 Be careful NOT TO COMMIT `LocalPreferences.toml` to GitHub, as it would show your API Key to the world!
@@ -729,6 +817,16 @@ Show currently available models with `ollama list`.
 
 See [Ollama.ai](https://ollama.ai/) for more information.
 
+### How would I fine-tune a model?
+
+Fine-tuning is a powerful technique to adapt a model to your specific use case (mostly the format/syntax/task). It requires a dataset of examples, which you can now easily generate with PromptingTools.jl!
+
+1. You can save any conversation (vector of messages) to a file with `PT.save_conversation("filename.json", conversation)`.
+
+2. Once the finetuning time comes, create a bundle of ShareGPT-formatted conversations (common finetuning format) in a single `.jsonl` file. Use `PT.save_conversations("dataset.jsonl", [conversation1, conversation2, ...])` (notice that plural "conversationS" in the function name).
+
+For an example of an end-to-end finetuning process, check out our sister project [JuliaLLMLeaderboard Finetuning experiment](https://github.com/svilupp/Julia-LLM-Leaderboard/blob/main/experiments/cheater-7b-finetune/README.md). It shows the process of finetuning for half a dollar with [Jarvislabs.ai](https://jarvislabs.ai/templates/axolotl) and [Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl).
+
 ## Roadmap
 
 This is a list of features that I'd like to see in the future (in no particular order):
diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 000000000..0587d7400
--- /dev/null
+++ b/docs/.gitignore
@@ -0,0 +1,4 @@
+build/
+node_modules/
+package-lock.json
+Manifest.toml
\ No newline at end of file
diff --git a/docs/Project.toml b/docs/Project.toml
index 8dba66196..0995d35f8 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,6 +1,9 @@
 [deps]
 DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365"
+FlashRank = "22cc3f58-1757-4700-bb45-2032706e5a8d"
+GoogleGenAI = "903d41d1-eaca-47dd-943b-fee3930375ab"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -8,4 +11,8 @@ Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 PromptingTools = "670122d1-24a8-4d70-bfce-740807c42192"
+Snowball = "fb8f903a-0164-4e73-9ffe-431110250c3b"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+
+[compat]
+DocumenterVitepress = "0.0.7"
diff --git a/docs/generate_prompt_library.jl b/docs/generate_prompt_library.jl
new file mode 100644
index 000000000..52c62cafe
--- /dev/null
+++ b/docs/generate_prompt_library.jl
@@ -0,0 +1,90 @@
+# Generates the "Prompt Library" sections of the docs
+#
+# 1 page for each folder in `templates/`, 1 section for each file in the folder
+
+## ! Config
+input_files = joinpath(@__DIR__, "..", "templates", "general") |>
+              x -> readdir(x; join = true)
+output_dir = joinpath(@__DIR__, "src", "prompts")
+mkpath(output_dir);
+
+## Utilities
+"Returns the file name and the section name."
+function extract_md_hierarchy(fn)
+    ## find the depth of nested folders 
+    p = splitpath(fn)
+    idx = findfirst(==("templates"), p)
+    if idx == nothing || idx >= length(p) - 1
+        nothing, nothing
+    elseif idx == length(p) - 2
+        ## no dual subfolder, duplicate name
+        p[idx + 1] * ".md", titlecase(p[idx + 1])
+    else
+        ## has dual subfolder
+        p[idx + 1] * ".md", titlecase(p[idx + 2])
+    end
+end
+function escape_prompt(s)
+    ## escape HTML tags
+    ## s = replace(
+    ##     s, "\n" => "\n> ", "<" => "\\<", ">" => "\\>", "{{" => "\\{\\{", "}}" => "\\}\\}")
+    ## return "> " * s
+    """`````plaintext\n$(s)\n`````\n"""
+end
+
+## Load the templates
+# key: top-level folder, sub-folder, file
+loaded_templates = Dict{String, Dict}()
+for (dir, _, files) in walkdir(joinpath(@__DIR__, "..", "templates"))
+    for file in files
+        fn = joinpath(dir, file)
+        if endswith(fn, ".json")
+            dest_file, section = extract_md_hierarchy(fn)
+            if isnothing(dest_file)
+                continue
+            end
+            dest_file_path = joinpath(output_dir, dest_file)
+            template, metadata = PT.load_template(fn)
+            template_name = splitext(basename(file))[1] |> Symbol
+            # Assumes that there is only ever one UserMessage and SystemMessage (concats them together)
+            meta = PT.build_template_metadata(
+                template, template_name, metadata; max_length = 10^6)
+            ## save to loaded_templates
+            file_dict = get!(loaded_templates, dest_file_path, Dict())
+            section_vect = get!(file_dict, section, [])
+            push!(section_vect, meta)
+        end
+    end
+end
+
+## Write into files
+for file_path in keys(loaded_templates)
+    io = IOBuffer()
+    println(io,
+        "The following file is auto-generated from the `templates` folder. For any changes, please modify the source files in the `templates` folder.\n")
+    println(io,
+        "To use these templates in `aigenerate`, simply provide the template name as a symbol, eg, `aigenerate(:MyTemplate; placeholder1 = value1)`")
+    println(io)
+    for (section, templates) in loaded_templates[file_path]
+        println(io, "## $(section) Templates\n")
+        for meta in templates
+            println(io, "### Template: $(meta.name)")
+            println(io)
+            println(io, "- Description: $(meta.description)")
+            println(
+                io, "- Placeholders: $(join("`" .* string.(meta.variables) .* "`",", "))")
+            println(io, "- Word count: $(meta.wordcount)")
+            println(io, "- Source: $(meta.source)")
+            println(io, "- Version: $(meta.version)")
+            println(io)
+            println(io, "**System Prompt:**")
+            println(io, escape_prompt(meta.system_preview))
+            println(io)
+            println(io, "**User Prompt:**")
+            println(io, escape_prompt(meta.user_preview))
+            println(io)
+        end
+    end
+    ## write to file
+    write(file_path, String(take!(io)))
+end
diff --git a/docs/make.jl b/docs/make.jl
index e5ca9f168..2aa1caaf4 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,11 +1,18 @@
+using Documenter, DocumenterVitepress
 using PromptingTools
-using Documenter
-using SparseArrays, LinearAlgebra, Markdown
+const PT = PromptingTools
+using SparseArrays, LinearAlgebra, Markdown, Unicode, FlashRank
 using PromptingTools.Experimental.RAGTools
 using PromptingTools.Experimental.AgentTools
 using JSON3, Serialization, DataFramesMeta
 using Statistics: mean
 
+## Generate the prompt documentation
+include("generate_prompt_library.jl")
+
+# Enable debugging for vitepress
+ENV["DEBUG"] = "vitepress:*"
+
 DocMeta.setdocmeta!(PromptingTools,
     :DocTestSetup,
     :(using PromptingTools);
@@ -15,39 +22,25 @@ makedocs(;
     modules = [
         PromptingTools,
         PromptingTools.Experimental.RAGTools,
-        PromptingTools.Experimental.AgentTools,
+        PromptingTools.Experimental.AgentTools
     ],
     authors = "J S <49557684+svilupp@users.noreply.github.com> and contributors",
     repo = "https://github.com/svilupp/PromptingTools.jl/blob/{commit}{path}#{line}",
     sitename = "PromptingTools.jl",
-    format = Documenter.HTML(;
-        prettyurls = get(ENV, "CI", "false") == "true",
-        repolink = "https://github.com/svilupp/PromptingTools.jl",
-        canonical = "https://svilupp.github.io/PromptingTools.jl",
-        edit_link = "main",
-        size_threshold = nothing,
-        assets = String[]),
-    pages = [
-        "Home" => "index.md",
-        "Getting Started" => "getting_started.md",
-        "Examples" => [
-            "Various examples" => "examples/readme_examples.md",
-            "Using AITemplates" => "examples/working_with_aitemplates.md",
-            "Local models with Ollama.ai" => "examples/working_with_ollama.md",
-            "Google AIStudio" => "examples/working_with_google_ai_studio.md",
-            "Custom APIs (Mistral, Llama.cpp)" => "examples/working_with_custom_apis.md",
-            "Building RAG Application" => "examples/building_RAG.md",
-        ],
-        "F.A.Q." => "frequently_asked_questions.md",
-        "Reference" => [
-            "PromptingTools.jl" => "reference.md",
-            "Experimental Modules" => "reference_experimental.md",
-            "RAGTools" => "reference_ragtools.md",
-            "AgentTools" => "reference_agenttools.md",
-            "APITools" => "reference_apitools.md",
-        ],
-    ])
+    format = DocumenterVitepress.MarkdownVitepress(
+        repo = "https://github.com/svilupp/PromptingTools.jl",
+        devbranch = "main",
+        devurl = "dev",
+        deploy_url = "svilupp.github.io/PromptingTools.jl"
+    ),
+    draft = false,
+    source = "src",
+    build = "build",
+    )
 
 deploydocs(;
     repo = "github.com/svilupp/PromptingTools.jl",
+    target = "build",
+    push_preview = true,
+    branch = "gh-pages",
     devbranch = "main")
diff --git a/docs/package.json b/docs/package.json
new file mode 100644
index 000000000..275146bd3
--- /dev/null
+++ b/docs/package.json
@@ -0,0 +1,18 @@
+{
+  "devDependencies": {
+    "markdown-it": "^14.0.0",
+    "markdown-it-mathjax3": "^4.3.2",
+    "vitepress": "^1.3.3",
+    "vitepress-plugin-tabs": "^0.5.0",
+    "vitest": "^1.3.0"
+  },
+  "scripts": {
+    "docs:dev": "vitepress dev build/.documenter",
+    "docs:build": "vitepress build build/.documenter",
+    "docs:preview": "vitepress preview build/.documenter"
+  },
+  "dependencies": {
+    "@shikijs/transformers": "^1.1.7",
+    "markdown-it-footnote": "^4.0.0"
+  }
+}
diff --git a/docs/src/.vitepress/config.mts b/docs/src/.vitepress/config.mts
new file mode 100644
index 000000000..a0a72c7b7
--- /dev/null
+++ b/docs/src/.vitepress/config.mts
@@ -0,0 +1,128 @@
+import { defineConfig } from 'vitepress'
+import { tabsMarkdownPlugin } from 'vitepress-plugin-tabs'
+import mathjax3 from "markdown-it-mathjax3";
+import footnote from "markdown-it-footnote";
+
+// https://vitepress.dev/reference/site-config
+export default defineConfig({
+  base: 'REPLACE_ME_DOCUMENTER_VITEPRESS',// TODO: replace this in makedocs!
+  title: 'REPLACE_ME_DOCUMENTER_VITEPRESS',
+  description: "Streamline Your Interactions with GenAI Models. Discover the power of GenerativeAI and build mini workflows to save you 20 minutes every day.",
+  lastUpdated: true,
+  cleanUrls: true,
+  outDir: 'REPLACE_ME_DOCUMENTER_VITEPRESS', // This is required for MarkdownVitepress to work correctly...
+  head: [['link', { rel: 'icon', href: 'REPLACE_ME_DOCUMENTER_VITEPRESS_FAVICON' }]],
+  ignoreDeadLinks: true,
+
+  markdown: {
+    math: true,
+    config(md) {
+      md.use(tabsMarkdownPlugin),
+      md.use(mathjax3),
+      md.use(footnote)
+    },
+    theme: {
+      light: "github-light",
+      dark: "github-dark"}
+  },
+  themeConfig: {
+    outline: 'deep',
+    logo: 'REPLACE_ME_DOCUMENTER_VITEPRESS',
+    search: {
+      provider: 'local',
+      options: {
+        detailedView: true
+      }
+    },
+    nav: [
+      { text: 'Home', link: '/index' },
+      { text: 'Manual',
+          items:[
+          { text: 'Getting Started', link: '/getting_started' },
+          { text: 'How It Works', link: '/how_it_works' },
+          { text: 'Coverage of Model Providers', link: '/coverage_of_model_providers' },
+          { text: 'Examples', items: [
+            { text: 'Various examples', link: '/examples/readme_examples' },
+            { text: 'Using AITemplates', link: '/examples/working_with_aitemplates' },
+            { text: 'Local models with Ollama.ai', link: '/examples/working_with_ollama' },
+            { text: 'Google AIStudio', link: '/examples/working_with_google_ai_studio' },
+            { text: 'Custom APIs (Mistral, Llama.cpp)', link: '/examples/working_with_custom_apis' },
+            { text: 'Building RAG Application', link: '/examples/building_RAG' }]
+          },
+          { text: 'Extra Tools', items: [
+            { text: 'Text Utilities', link: '/extra_tools/text_utilities_intro' },
+            { text: 'AgentTools', link: '/extra_tools/agent_tools_intro' },
+            { text: 'RAGTools', link: '/extra_tools/rag_tools_intro' },
+            { text: 'APITools', link: '/extra_tools/api_tools_intro' }]
+          },
+        ],
+      },
+      { text: 'F.A.Q.', link: '/frequently_asked_questions' },
+      { text: 'Prompt Templates', items: [
+      { text: 'General', link: '/prompts/general' },
+      { text: 'Persona-Task', link: '/prompts/persona-task' },
+      { text: 'Visual', link: '/prompts/visual' },
+      { text: 'Classification', link: '/prompts/classification' },
+      { text: 'Extraction', link: '/prompts/extraction' },
+      { text: 'Agents', link: '/prompts/agents' },
+      { text: 'RAG', link: '/prompts/RAG' }]
+      },
+      { text: 'Reference', items: [
+      { text: 'PromptingTools.jl', link: '/reference' },
+      { text: 'Experimental Modules', link: '/reference_experimental' },
+      { text: 'RAGTools', link: '/reference_ragtools' },
+      { text: 'AgentTools', link: '/reference_agenttools' },
+      { text: 'APITools', link: '/reference_apitools' }]
+      }
+      ],
+    sidebar: [
+      { text: 'Home', link: '/index' },
+      { text: 'Manual',
+          items:[
+          { text: 'Getting Started', link: '/getting_started' },
+          { text: 'How It Works', link: '/how_it_works' },
+          { text: 'Coverage of Model Providers', link: '/coverage_of_model_providers' },
+          { text: 'Examples', collapsed: true, items: [
+            { text: 'Various examples', link: '/examples/readme_examples' },
+            { text: 'Using AITemplates', link: '/examples/working_with_aitemplates' },
+            { text: 'Local models with Ollama.ai', link: '/examples/working_with_ollama' },
+            { text: 'Google AIStudio', link: '/examples/working_with_google_ai_studio' },
+            { text: 'Custom APIs (Mistral, Llama.cpp)', link: '/examples/working_with_custom_apis' },
+            { text: 'Building RAG Application', link: '/examples/building_RAG' }]
+          },
+          { text: 'Extra Tools', collapsed: true, items: [
+            { text: 'Text Utilities', link: '/extra_tools/text_utilities_intro' },
+            { text: 'AgentTools', link: '/extra_tools/agent_tools_intro' },
+            { text: 'RAGTools', link: '/extra_tools/rag_tools_intro' },
+            { text: 'APITools', link: '/extra_tools/api_tools_intro' }]
+          },
+        ],
+      },
+      { text: 'F.A.Q.', link: '/frequently_asked_questions' },
+      { text: 'Prompt Templates', collapsed: true, items: [
+      { text: 'General', link: '/prompts/general' },
+      { text: 'Persona-Task', link: '/prompts/persona-task' },
+      { text: 'Visual', link: '/prompts/visual' },
+      { text: 'Classification', link: '/prompts/classification' },
+      { text: 'Extraction', link: '/prompts/extraction' },
+      { text: 'Agents', link: '/prompts/agents' },
+      { text: 'RAG', link: '/prompts/RAG' }]
+        },
+      { text: 'Reference', collapsed: true, items: [
+      { text: 'PromptingTools.jl', link: '/reference' },
+      { text: 'Experimental Modules', link: '/reference_experimental' },
+      { text: 'RAGTools', link: '/reference_ragtools' },
+      { text: 'AgentTools', link: '/reference_agenttools' },
+      { text: 'APITools', link: '/reference_apitools' }]
+        }
+    ],
+    editLink: 'REPLACE_ME_DOCUMENTER_VITEPRESS',
+    socialLinks: [
+      { icon: 'github', link: 'REPLACE_ME_DOCUMENTER_VITEPRESS' }
+    ],
+    footer: {
+      message: 'Made with <a href="https://documenter.juliadocs.org/stable/" target="_blank"><strong>Documenter.jl</strong></a> & <a href="https://vitepress.dev" target="_blank"><strong>VitePress</strong></a> & Icons by <a target="_blank" href="https://icons8.com">Icons8</a> <br>',
+      copyright: `© Copyright ${new Date().getUTCFullYear()}.`
+    }
+  }
+})
\ No newline at end of file
diff --git a/docs/src/.vitepress/theme/index.ts b/docs/src/.vitepress/theme/index.ts
new file mode 100644
index 000000000..463b5d858
--- /dev/null
+++ b/docs/src/.vitepress/theme/index.ts
@@ -0,0 +1,19 @@
+// .vitepress/theme/index.ts
+import { h } from 'vue'
+import type { Theme } from 'vitepress'
+import DefaultTheme from 'vitepress/theme'
+
+import { enhanceAppWithTabs } from 'vitepress-plugin-tabs/client'
+import './style.css'
+
+export default {
+  extends: DefaultTheme,
+  Layout() {
+    return h(DefaultTheme.Layout, null, {
+      // https://vitepress.dev/guide/extending-default-theme#layout-slots
+    })
+  },
+  enhanceApp({ app, router, siteData }) {
+    enhanceAppWithTabs(app)
+  }
+} satisfies Theme
\ No newline at end of file
diff --git a/docs/src/.vitepress/theme/style.css b/docs/src/.vitepress/theme/style.css
new file mode 100644
index 000000000..1772543c1
--- /dev/null
+++ b/docs/src/.vitepress/theme/style.css
@@ -0,0 +1,170 @@
+@import url(https://fonts.googleapis.com/css?family=Space+Mono:regular,italic,700,700italic);
+@import url(https://fonts.googleapis.com/css?family=Space+Grotesk:regular,italic,700,700italic);
+
+/* Customize default theme styling by overriding CSS variables:
+https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/vars.css
+ */
+
+  /* Layouts */
+
+/* 
+ :root {
+  --vp-layout-max-width: 1440px;
+} */
+
+.VPHero .clip {
+  white-space: pre;
+  max-width: 500px;
+}
+
+/* Fonts */
+
+:root {
+  /* Typography */
+  --vp-font-family-base: "Barlow", "Inter var experimental", "Inter var",
+    -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu,
+    Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", sans-serif;
+
+  /* Code Snippet font */
+  --vp-font-family-mono: "Fira Code", Menlo, Monaco, Consolas, "Courier New",
+    monospace;
+}
+
+/* Colors */
+
+:root {
+  --julia-blue: #4063D8;
+  --julia-purple: #9558B2;
+  --julia-red: #CB3C33;
+  --julia-green: #389826;
+
+  --vp-c-brand: rgb(209, 110, 142);
+  --vp-c-brand-light: rgb(136,109,156);
+  --vp-c-brand-lighter: #9499ff;
+  --vp-c-brand-lightest: #bcc0ff;
+  --vp-c-brand-dark: #535bf2;
+  --vp-c-brand-darker: #454ce1;
+  --vp-c-brand-dimm: #212425;
+}
+
+ /* Component: Button */
+
+:root {
+  --vp-button-brand-border: var(--vp-c-brand-light);
+  --vp-button-brand-text: var(--vp-c-white);
+  --vp-button-brand-bg: var(--vp-c-brand);
+  --vp-button-brand-hover-border: var(--vp-c-brand-light);
+  --vp-button-brand-hover-text: var(--vp-c-white);
+  --vp-button-brand-hover-bg: var(--vp-c-brand-light);
+  --vp-button-brand-active-border: var(--vp-c-brand-light);
+  --vp-button-brand-active-text: var(--vp-c-white);
+  --vp-button-brand-active-bg: var(--vp-button-brand-bg);
+}
+
+/* Component: Home */
+
+:root {
+  --vp-home-hero-name-color: transparent;
+  --vp-home-hero-name-background: -webkit-linear-gradient(
+    120deg,
+    rgb(136,109,156) 30%,
+    rgb(209, 110, 142)
+  );
+
+  --vp-home-hero-image-background-image: linear-gradient(
+    -120deg,
+    rgba(136,109,156, 0.75) 10%,
+    rgba(47, 47, 47,0.1) 50%,
+    rgba(209, 110, 142,0.75)
+  );
+  --vp-home-hero-image-filter: blur(40px);
+}
+
+@media (min-width: 640px) {
+  :root {
+    --vp-home-hero-image-filter: blur(56px);
+  }
+}
+
+@media (min-width: 960px) {
+  :root {
+    --vp-home-hero-image-filter: blur(72px);
+  }
+}
+
+/* Component: Custom Block */
+
+:root.dark {
+  --vp-custom-block-tip-border: var(--vp-c-brand);
+  --vp-custom-block-tip-text: var(--vp-c-brand-lightest);
+  --vp-custom-block-tip-bg: var(--vp-c-brand-dimm);
+
+    /* // Tweak the color palette for blacks and dark grays */
+    --vp-c-black: hsl(220 20% 9%);
+    --vp-c-black-pure: hsl(220, 24%, 4%);
+    --vp-c-black-soft: hsl(220 16% 13%);
+    --vp-c-black-mute: hsl(220 14% 17%);
+    --vp-c-gray: hsl(220 8% 56%);
+    --vp-c-gray-dark-1: hsl(220 10% 39%);
+    --vp-c-gray-dark-2: hsl(220 12% 28%);
+    --vp-c-gray-dark-3: hsl(220 12% 23%);
+    --vp-c-gray-dark-4: hsl(220 14% 17%);
+    --vp-c-gray-dark-5: hsl(220 16% 13%);
+  
+    /* // Backgrounds */
+    /* --vp-c-bg: hsl(240, 2%, 11%); */
+    --vp-custom-block-info-bg: hsl(220 14% 17%);
+    /* --vp-c-gutter: hsl(220 20% 9%);
+
+    --vp-c-bg-alt: hsl(220 20% 9%);
+    --vp-c-bg-soft: hsl(220 14% 17%);
+    --vp-c-bg-mute: hsl(220 12% 23%);
+     */
+}
+
+ /* Component: Algolia */
+
+.DocSearch {
+  --docsearch-primary-color: var(--vp-c-brand) !important;
+}
+
+/* Component: MathJax */
+
+mjx-container > svg {
+  display: block;
+  margin: auto;
+}
+
+mjx-container {
+  padding: 0.5rem 0;
+}
+
+mjx-container {
+  display: inline-block;
+  margin: auto 2px -2px;
+}
+
+mjx-container > svg {
+  margin: auto;
+  display: inline-block;
+}
+
+/**
+ * Colors links
+ * -------------------------------------------------------------------------- */
+
+ :root {
+  --vp-c-brand-1: rgb(136,109,156);
+  --vp-c-brand-2: rgb(136,109,156);
+  --vp-c-brand-3: rgb(136,109,156);
+  --vp-c-sponsor: #ca2971;
+  --vitest-c-sponsor-hover: #c13071;
+}
+
+.dark {
+  --vp-c-brand-1: rgb(209, 110, 142);
+  --vp-c-brand-2: rgb(209, 110, 142);
+  --vp-c-brand-3: rgb(209, 110, 142);
+  --vp-c-sponsor: rgb(209, 110, 142);
+  --vitest-c-sponsor-hover: #e51370;
+}
\ No newline at end of file
diff --git a/docs/src/assets/favicon.png b/docs/src/assets/favicon.png
new file mode 100644
index 000000000..b8da18308
Binary files /dev/null and b/docs/src/assets/favicon.png differ
diff --git a/docs/src/assets/logo.png b/docs/src/assets/logo.png
new file mode 100644
index 000000000..806ccef09
Binary files /dev/null and b/docs/src/assets/logo.png differ
diff --git a/docs/src/coverage_of_model_providers.md b/docs/src/coverage_of_model_providers.md
new file mode 100644
index 000000000..e2d229f56
--- /dev/null
+++ b/docs/src/coverage_of_model_providers.md
@@ -0,0 +1,35 @@
+```@meta
+CurrentModule = PromptingTools
+```
+
+# Coverage of Model Providers
+
+PromptingTools.jl routes AI calls through the use of subtypes of AbstractPromptSchema, which determine how data is formatted and where it is sent. (For example, OpenAI models have the corresponding subtype AbstractOpenAISchema, having the corresponding schemas - OpenAISchema, CustomOpenAISchema, etc.) This ensures that the data is correctly formatted for the specific AI model provider. 
+
+Below is an overview of the model providers supported by PromptingTools.jl, along with the corresponding schema information.
+
+| Abstract Schema         | Schema                    | Model Provider                         | aigenerate | aiembed | aiextract | aiscan | aiimage | aiclassify |
+|-------------------------|---------------------------|----------------------------------------|------------|---------|-----------|--------|---------|------------|
+| AbstractOpenAISchema    | OpenAISchema              | OpenAI                                 | ✅         | ✅     | ✅       | ✅     | ✅     | ✅         |
+| AbstractOpenAISchema    | CustomOpenAISchema*       | Any OpenAI-compatible API (eg, vLLM)*  | ✅         | ✅     | ✅       | ✅     | ✅     | ❌         |
+| AbstractOpenAISchema    | LocalServerOpenAISchema** | Any OpenAI-compatible Local server**   | ✅         | ✅     | ✅       | ✅     | ✅     | ❌         |
+| AbstractOpenAISchema    | MistralOpenAISchema       | Mistral AI                             | ✅         | ✅     | ✅       | ✅     | ✅     | ❌         |
+| AbstractOpenAISchema    | DatabricksOpenAISchema    | Databricks                             | ✅         | ✅     | ✅       | ✅     | ✅     | ❌         |
+| AbstractOpenAISchema    | FireworksOpenAISchema     | Fireworks AI                           | ✅         | ✅     | ✅       | ✅     | ✅     | ❌         |
+| AbstractOpenAISchema    | TogetherOpenAISchema      | Together AI                            | ✅         | ✅     | ✅       | ✅     | ✅     | ❌         |
+| AbstractOpenAISchema    | GroqOpenAISchema          | Groq                                   | ✅         | ❌     | ✅       | ❌     | ❌     | ❌         |
+| AbstractOllamaSchema    | OllamaSchema              | Ollama (endpoint `api/chat`)           | ✅         | ✅     | ❌       | ✅     | ❌     | ❌         |
+| AbstractManagedSchema   | AbstractOllamaManagedSchema | Ollama (endpoint `api/generate`)     | ✅         | ✅     | ❌       | ❌     | ❌     | ❌         |
+| AbstractAnthropicSchema | AnthropicSchema           | Anthropic                              | ✅         | ❌     | ✅       | ❌     | ❌     | ❌         |
+| AbstractGoogleSchema    | GoogleSchema              | Google Gemini                          | ✅         | ❌     | ❌       | ❌     | ❌     | ❌         |
+
+
+\* Catch-all implementation - Requires providing a `url` with `api_kwargs` and corresponding API key.
+
+\*\* This schema is a flavor of CustomOpenAISchema with a `url` key preset by global preference key `LOCAL_SERVER`. It is specifically designed for seamless integration with Llama.jl and utilizes an ENV variable for the URL, making integration easier in certain workflows, such as when nested calls are involved and passing `api_kwargs` is more challenging.
+
+**Note 1:** `aitools` has identical support as `aiextract` for all providers, as it has the API requirements.
+
+**Note 2:** The `aiscan` and `aiimage` functions rely on specific endpoints being implemented by the provider. Ensure that the provider you choose supports these functionalities.
+
+For more detailed explanations of the functions and schema information, refer to [How It Works](https://siml.earth/PromptingTools.jl/dev/how_it_works#ai*-Functions-Overview).
\ No newline at end of file
diff --git a/docs/src/diagrams/rag_diagram_detailed.png b/docs/src/diagrams/rag_diagram_detailed.png
new file mode 100644
index 000000000..f50309c3c
Binary files /dev/null and b/docs/src/diagrams/rag_diagram_detailed.png differ
diff --git a/docs/src/diagrams/rag_diagram_highlevel.png b/docs/src/diagrams/rag_diagram_highlevel.png
new file mode 100644
index 000000000..bb002bf75
Binary files /dev/null and b/docs/src/diagrams/rag_diagram_highlevel.png differ
diff --git a/docs/src/examples/building_RAG.md b/docs/src/examples/building_RAG.md
index 9108269de..9c56aa2b2 100644
--- a/docs/src/examples/building_RAG.md
+++ b/docs/src/examples/building_RAG.md
@@ -9,9 +9,11 @@ Let's build a Retrieval-Augmented Generation (RAG) chatbot, tailored to navigate
 
 If you're not familiar with "RAG", start with this [article](https://towardsdatascience.com/add-your-own-data-to-an-llm-using-retrieval-augmented-generation-rag-b1958bf56a5a).
 
+Note: You must first import `LinearAlgebra`, `SparseArrays`, and `Unicode` to use this example!
+
 
 ````julia
-using LinearAlgebra, SparseArrays
+using LinearAlgebra, SparseArrays, Unicode
 using PromptingTools
 using PromptingTools.Experimental.RAGTools
 ## Note: RAGTools module is still experimental and will change in the future. Ideally, they will be cleaned up and moved to a dedicated package
@@ -57,7 +59,7 @@ What does it do?
   - [OPTIONAL] extracts any potential tags/filters from the question and applies them to filter down the potential candidates (use `extract_metadata=true` in `build_index`, you can also provide some filters explicitly via `tag_filter`)
   - [OPTIONAL] re-ranks the candidate chunks (define and provide your own `rerank_strategy`, eg Cohere ReRank API)
   - build a context from the closest chunks (use `chunks_window_margin` to tweak if we include preceding and succeeding chunks as well, see `?build_context` for more details)
-- generate an answer from the closest chunks (use `return_context=true` to see under the hood and debug your application)
+- generate an answer from the closest chunks (use `return_all=true` to see under the hood and debug your application)
 
 You should save the index for later to avoid re-embedding / re-extracting the document chunks!
 
@@ -124,7 +126,7 @@ Let's evaluate this QA item with a "judge model" (often GPT-4 is used as a judge
 
 ````julia
 # Note: that we used the same question, but generated a different context and answer via `airag`
-msg, ctx = airag(index; evals[1].question, return_context = true);
+ctx = airag(index; evals[1].question, return_all = true);
 # ctx is a RAGContext object that keeps all intermediate states of the RAG pipeline for easy evaluation
 judged = aiextract(:RAGJudgeAnswerFromContext;
     ctx.context,
@@ -173,17 +175,16 @@ Let's run each question & answer through our eval loop in async (we do it only f
 ````julia
 results = asyncmap(evals[1:10]) do qa_item
     # Generate an answer -- often you want the model_judge to be the highest quality possible, eg, "GPT-4 Turbo" (alias "gpt4t)
-    msg, ctx = airag(index; qa_item.question, return_context = true,
-        top_k = 3, verbose = false, model_judge = "gpt4t")
+    ctx = airag(index; qa_item.question, return_all = true, verbose = false)
     # Evaluate the response
     # Note: you can log key parameters for easier analysis later
-    run_qa_evals(qa_item, ctx; parameters_dict = Dict(:top_k => 3), verbose = false)
+    run_qa_evals(qa_item, ctx; parameters_dict = Dict(:top_k => 3), verbose = false, model_judge = "gpt4t")
 end
 ## Note that the "failed" evals can show as "nothing" (failed as in there was some API error or parsing error), so make sure to handle them.
 results = filter(x->!isnothing(x.answer_score), results);
 ````
 
-Note: You could also use the vectorized version `results = run_qa_evals(evals)` to evaluate all items at once.
+Note: You could also use the vectorized version `results = run_qa_evals(index, evals)` to evaluate all items at once.
 
 ````julia
 
diff --git a/docs/src/examples/readme_examples.md b/docs/src/examples/readme_examples.md
index 08925e101..c82224537 100644
--- a/docs/src/examples/readme_examples.md
+++ b/docs/src/examples/readme_examples.md
@@ -1,6 +1,56 @@
 # Various Examples
 
-Noteworthy functions: `aigenerate`, `aiembed`, `aiclassify`, `aiextract`, `aitemplates`
+## `ai*` Functions Overview
+
+Noteworthy functions: `aigenerate`, `aiembed`, `aiclassify`, `aiextract`, `aiscan`, `aiimage`, `aitemplates`
+
+All `ai*` functions have the same basic structure: 
+
+`ai*(<optional schema>,<prompt or conversation>; <optional keyword arguments>)`, 
+
+but they differ in purpose:
+
+- `aigenerate` is the general-purpose function to generate any text response with LLMs, ie, it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`)
+- `aiembed` is designed to extract embeddings from the AI model's response, ie, it returns `DataMessage` with field `:content` containing the embeddings (eg, `ans.content isa AbstractArray`)
+- `aiextract` is designed to extract structured data from the AI model's response and return them as a Julia struct (eg, if we provide `return_type=Food`, we get `ans.content isa Food`). You need to define the return type first and then provide it as a keyword argument.
+- `aiclassify` is designed to classify the input text into (or simply respond within) a set of discrete `choices` provided by the user. It can be very useful as an LLM Judge or a router for RAG systems, as it uses the "logit bias trick" and generates exactly 1 token. It returns `AIMessage` with field `:content`, but the `:content` can be only one of the provided `choices` (eg, `ans.content in choices`)
+- `aiscan` is for working with images and vision-enabled models (as an input), but it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`) similar to `aigenerate`.
+- `aiimage` is for generating images (eg, with OpenAI DALL-E 3). It returns a `DataMessage`, where the field `:content` might contain either the URL to download the image from or the Base64-encoded image depending on the user-provided kwarg `api_kwargs.response_format`.
+- `aitemplates` is a helper function to discover available templates and see their details (eg, `aitemplates("some keyword")` or `aitemplates(:AssistantAsk)`)
+
+If you're using a known `model`, you do NOT need to provide a `schema` (the first argument).
+
+Optional keyword arguments in `ai*` tend to be:
+
+- `model::String` - Which model you want to use
+- `verbose::Bool` - Whether you went to see INFO logs around AI costs
+- `return_all::Bool` - Whether you want the WHOLE conversation or just the AI answer (ie, whether you want to include your inputs/prompt in the output)
+- `api_kwargs::NamedTuple` - Specific parameters for the model, eg, `temperature=0.0` to be NOT creative (and have more similar output in each run)
+- `http_kwargs::NamedTuple` - Parameters for the HTTP.jl package, eg, `readtimeout = 120` to time out in 120 seconds if no response was received.
+
+**Experimental: AgentTools**
+
+In addition to the above list of `ai*` functions, you can also use the **"lazy" counterparts** of these functions from the experimental AgentTools module.
+```julia
+using PromptingTools.Experimental.AgentTools
+```
+
+For example, `AIGenerate()` will create a lazy instance of `aigenerate`. It is an instance of `AICall` with `aigenerate` as its ai function.
+It uses exactly the same arguments and keyword arguments as `aigenerate` (see `?aigenerate` for details).
+
+"lazy" refers to the fact that it does NOT generate any output when instantiated (only when `run!` is called). 
+
+Or said differently, the `AICall` struct and all its flavors (`AIGenerate`, ...) are designed to facilitate a deferred execution model (lazy evaluation) for AI functions that interact with a Language Learning Model (LLM). It stores the necessary information for an AI call and executes the underlying AI function only when supplied with a `UserMessage` or when the `run!` method is applied. This allows us to remember user inputs and trigger the LLM call repeatedly if needed, which enables automatic fixing (see `?airetry!`).
+
+**Experimental: RAGTools**
+
+Lastly, we provide a set of tools to build RAG applications (Retrieve, Answer, Generate). 
+
+It can be as simple as two calls: `build_index` and `airag` (Retrieve, Answer, Generate). 
+
+If you then use pretty-printing with `PromptingTools.pprint`, we highlight the generated text vs text likely sourced from the context and we score how strongly is the generated answer supported by the context.
+In addition, we annotate each generated chunk with a reference to which source document it likely came from (including the confidence score between 0 and 1).
+
 
 ## Seamless Integration Into Your Workflow
 Google search is great, but it's a context switch. You often have to open a few pages and read through the discussion to find the answer you need. Same with the ChatGPT website.
diff --git a/docs/src/examples/working_with_google_ai_studio.md b/docs/src/examples/working_with_google_ai_studio.md
index d6b07e597..9ac2804d9 100644
--- a/docs/src/examples/working_with_google_ai_studio.md
+++ b/docs/src/examples/working_with_google_ai_studio.md
@@ -6,10 +6,10 @@ Get an API key from [here](https://ai.google.dev/). If you see a documentation p
 
 Save the API key in your environment as `GOOGLE_API_KEY`.
 
-We'll need `GoogleGenAI.jl` package:
+We'll need `GoogleGenAI` package:
 
 ````julia
-using Pkg; Pkg.add(url="https://github.com/tylerjthomas9/GoogleGenAI.jl/")
+using Pkg; Pkg.add("GoogleGenAI")
 ````
 
 You can now use the Gemini-1.0-Pro model like any other model in PromptingTools. We **only support `aigenerate`** at the moment.
@@ -38,7 +38,7 @@ AIMessage("Hi there! As a helpful AI assistant, I'm here to help you with any qu
 You could achieve the same with a string macro (notice the "gemini" at the end to specify which model to use):
 
 ````julia
-@ai"Say hi!"gemini
+ai"Say hi!"gemini
 ````
 
 ### Advanced Prompts
diff --git a/docs/src/extra_tools/agent_tools_intro.md b/docs/src/extra_tools/agent_tools_intro.md
new file mode 100644
index 000000000..c6f77e61f
--- /dev/null
+++ b/docs/src/extra_tools/agent_tools_intro.md
@@ -0,0 +1,100 @@
+```@meta
+CurrentModule = PromptingTools.Experimental.AgentTools
+```
+
+# Agent Tools Introduction
+
+`AgentTools` is an experimental module that provides a set of utilities for building advanced agentic workflows, code-generating and self-fixing agents.
+
+Import the module as follows:
+
+```julia
+using PromptingTools.Experimental.AgentTools
+# to access unexported functionality
+const AT = PromptingTools.Experimental.AgentTools
+```
+
+## Highlights
+
+The main functions to be aware of are:
+- `AIGenerate` - Lazy counterpart of `aigenerate()`. All `ai*` functions have a corresponding `AI*::AICall` struct that allows for deferred execution (triggered by `run!` method).
+- `last_output`, `last_message` - Simple utilities to access the last output and message of the AI calls like `AIGenerate`.
+- `airetry!` - A utility to automatically retry the AI call with the same inputs if the AI model fails to generate a valid output. It allows retrying many times and providing feedback to the AI model about the failure to increase its robustness. `AIGenerate` and other AI calls have a field `config::RetryConfig` where you can globally adjust the retrying behavior.
+- `print_samples` - `airetry!` implements a Monte Carlo Tree Search under the hood when trying to find the best way to fix the AI model's failure. `print_samples` is a utility to print the "samples" generated by the MCTS to better understand the attempts made by the AI model to fix the failure.
+- `AICode` extensions like `aicodefixer_feedback` and `error_feedback` - `AICode` is a wrapper that extracts any Julia code provided in the `AIMessage` (response from the AI model) and executes it (including catch any errors). `aicodefixer_feedback` and `error_feedback` are utilities that automatically review an outcome of `AICode` evaluation and generate the corresponding feedback for the AI model.
+
+
+The main contribution of this module is providing the "lazy" counterparts to the `ai...` functions, which allow us to build a workflow, which can be re-executed many times with the same inputs.
+
+For example, `AIGenerate()` will create a lazy instance of `aigenerate`, which is an instance of `AICall` with `aigenerate` as its ai-calling function. It uses exactly the same arguments and keyword arguments as `aigenerate` (see `?aigenerate` for details). The notion of "lazy" refers to the fact that it does NOT generate any output when instantiated (only when `run!` is called). 
+
+Or said differently, the `AICall` struct and all its flavors (`AIGenerate`, ...) are designed to facilitate a deferred execution model (lazy evaluation) for AI functions that interact with a Language Learning Model (LLM). It stores the necessary information for an AI call and executes the underlying AI function only when supplied with a `UserMessage` or when the `run!` method is applied. This allows us to remember user inputs and trigger the LLM call repeatedly if needed, which enables automatic fixing (see `?airetry!`).
+
+## Examples
+
+### Automatic Fixing of AI Calls
+
+We need to switch from `aigenerate` to `AIGenerate` to get the lazy version of the function. 
+```julia
+output = AIGenerate("Say hi!"; model="gpt4t") |> run!
+```
+
+How is it useful? We can use the same "inputs" for repeated calls, eg, when we want to validate 
+or regenerate some outputs. We have a function `airetry!` to help us with that.
+
+The signature of `airetry` is `airetry(condition_function, aicall::AICall, feedback_function)`.
+
+It evaluates the condition `condition_function` on the `aicall` object (eg, we evaluate `f_cond(aicall) -> Bool`). If it fails, we call `feedback_function` on the `aicall` object to provide feedback for the AI model (eg, `f_feedback(aicall) -> String`) and repeat the process until it passes or until `max_retries` value is exceeded.
+
+We can **catch API failures** (no feedback needed, so none is provided)
+```julia
+# API failure because of a non-existent model
+# RetryConfig allows us to change the "retry" behaviour of any lazy call
+output = AIGenerate("say hi!"; config = RetryConfig(; catch_errors = true),
+    model = "NOTEXIST")
+run!(output) # fails
+
+# we ask to wait 2s between retries and retry 2 times (can be set in `config` in aicall as well)
+airetry!(isvalid, output; retry_delay = 2, max_retries = 2)
+```
+
+Or we can use it for **output validation** (eg, its format, its content, etc.) and feedback generation.
+
+Let's play a color guessing game (I'm thinking "yellow"). We'll implement two formatting checks with `airetry!`:
+
+```julia
+# Notice that we ask for two samples (`n_samples=2`) at each attempt (to improve our chances). 
+# Both guesses are scored at each time step, and the best one is chosen for the next step.
+# And with OpenAI, we can set `api_kwargs = (;n=2)` to get both samples simultaneously (cheaper and faster)!
+out = AIGenerate(
+    "Guess what color I'm thinking. It could be: blue, red, black, white, yellow. Answer with 1 word only";
+    verbose = false,
+    config = RetryConfig(; n_samples = 2), api_kwargs = (; n = 2))
+run!(out)
+
+## Check that the output is 1 word only, third argument is the feedback that will be provided if the condition fails
+## Notice: functions operate on `aicall` as the only argument. We can use utilities like `last_output` and `last_message` to access the last message and output in the conversation.
+airetry!(x -> length(split(last_output(x), r" |\\.")) == 1, out,
+    "You must answer with 1 word only.")
+
+# Note: you could also use the do-syntax, eg, 
+airetry!(out, "You must answer with 1 word only.") do aicall
+    length(split(last_output(aicall), r" |\\.")) == 1
+end
+```
+
+You can even add the guessing itself as an `airetry!` condition of `last_output(out) == "yellow"` and provide feedback if the guess is wrong.
+
+## References
+
+```@docs; canonical=false
+AIGenerate
+AICall
+last_output
+last_message
+airetry!
+print_samples
+AICode
+aicodefixer_feedback
+error_feedback
+```
diff --git a/docs/src/extra_tools/api_tools_intro.md b/docs/src/extra_tools/api_tools_intro.md
new file mode 100644
index 000000000..66e987dea
--- /dev/null
+++ b/docs/src/extra_tools/api_tools_intro.md
@@ -0,0 +1,25 @@
+```@meta
+CurrentModule = PromptingTools.Experimental.APITools
+```
+
+# APITools Introduction
+
+`APITools` is an experimental module wrapping helpful APIs for working with and enhancing GenerativeAI models.
+
+Import the module as follows:
+
+```julia
+using PromptingTools.Experimental.APITools
+```
+
+## Highlights
+
+Currently, there is only one function in this module `create_websearch` that leverages [Tavily.com](https://tavily.com/) search and answer engine to provide additional context.
+
+You need to sign up for an API key at [Tavily.com](https://tavily.com/) and set it as an environment variable `TAVILY_API_KEY` to use this function.
+
+## References
+
+```@docs; canonical=false
+create_websearch
+```
diff --git a/docs/src/extra_tools/rag_tools_intro.md b/docs/src/extra_tools/rag_tools_intro.md
new file mode 100644
index 000000000..123479f54
--- /dev/null
+++ b/docs/src/extra_tools/rag_tools_intro.md
@@ -0,0 +1,311 @@
+```@meta
+CurrentModule = PromptingTools.Experimental.RAGTools
+```
+
+# RAG Tools Introduction
+
+`RAGTools` is an experimental module that provides a set of utilities for building Retrieval-Augmented Generation (RAG) applications, ie, applications that generate answers by combining knowledge of the underlying AI model with the information from the user's knowledge base.
+
+It is designed to be powerful and flexible, allowing you to build RAG applications with minimal effort. Extend any step of the pipeline with your own custom code (see the [RAG Interface](@ref) section), or use the provided defaults to get started quickly.
+
+Once the API stabilizes (near term), we hope to carve it out into a separate package. 
+
+Import the module as follows:
+
+```julia
+# required dependencies to load the necessary extensions!!!
+using LinearAlgebra, SparseArrays, Unicode, Snowball
+using PromptingTools.Experimental.RAGTools
+# to access unexported functionality
+const RT = PromptingTools.Experimental.RAGTools
+```
+
+
+## Highlights
+
+The main functions to be aware of are:
+- `build_index` to build a RAG index from a list of documents (type `ChunkIndex`)
+- `airag` to generate answers using the RAG model on top of the `index` built above
+  - `retrieve` to retrieve relevant chunks from the index for a given question
+  - `generate!` to generate an answer from the retrieved chunks
+- `annotate_support` to highlight which parts of the RAG answer are supported by the documents in the index vs which are generated by the model, it is applied automatically if you use pretty printing with `pprint` (eg, `pprint(result)`)
+- `build_qa_evals` to build a set of question-answer pairs for evaluation of the RAG model from your corpus
+
+The hope is to provide a modular and easily extensible set of tools for building RAG applications in Julia. Feel free to open an issue or ask in the `#generative-ai` channel in the JuliaLang Slack if you have a specific need.
+
+## Examples
+
+Let's build an index, we need to provide a starter list of documents:
+```julia
+sentences = [
+    "Find the most comprehensive guide on Julia programming language for beginners published in 2023.",
+    "Search for the latest advancements in quantum computing using Julia language.",
+    "How to implement machine learning algorithms in Julia with examples.",
+    "Looking for performance comparison between Julia, Python, and R for data analysis.",
+    "Find Julia language tutorials focusing on high-performance scientific computing.",
+    "Search for the top Julia language packages for data visualization and their documentation.",
+    "How to set up a Julia development environment on Windows 10.",
+    "Discover the best practices for parallel computing in Julia.",
+    "Search for case studies of large-scale data processing using Julia.",
+    "Find comprehensive resources for mastering metaprogramming in Julia.",
+    "Looking for articles on the advantages of using Julia for statistical modeling.",
+    "How to contribute to the Julia open-source community: A step-by-step guide.",
+    "Find the comparison of numerical accuracy between Julia and MATLAB.",
+    "Looking for the latest Julia language updates and their impact on AI research.",
+    "How to efficiently handle big data with Julia: Techniques and libraries.",
+    "Discover how Julia integrates with other programming languages and tools.",
+    "Search for Julia-based frameworks for developing web applications.",
+    "Find tutorials on creating interactive dashboards with Julia.",
+    "How to use Julia for natural language processing and text analysis.",
+    "Discover the role of Julia in the future of computational finance and econometrics."
+]
+```
+
+Let's index these "documents":
+
+```julia
+index = build_index(sentences; chunker_kwargs=(; sources=map(i -> "Doc$i", 1:length(sentences))))
+```
+
+This would be equivalent to the following `index = build_index(SimpleIndexer(), sentences)` which dispatches to the default implementation of each step via the `SimpleIndexer` struct. We provide these default implementations for the main functions as an optional argument - no need to provide them if you're running the default pipeline.
+
+Notice that we have provided a `chunker_kwargs` argument to the `build_index` function. These will be kwargs passed to `chunker` step. 
+
+Now let's generate an answer to a question.
+
+1. Run end-to-end RAG (retrieve + generate!), return `AIMessage`
+```julia
+question = "What are the best practices for parallel computing in Julia?"
+
+msg = airag(index; question) # short for airag(RAGConfig(), index; question)
+## Output:
+## [ Info: Done with RAG. Total cost: \$0.0
+## AIMessage("Some best practices for parallel computing in Julia include us...
+```
+
+2. Explore what's happening under the hood by changing the return type - `RAGResult` contains all intermediate steps.
+```julia
+result = airag(index; question, return_all=true)
+## RAGResult
+##   question: String "What are the best practices for parallel computing in Julia?"
+##   rephrased_questions: Array{String}((1,))
+##   answer: SubString{String}
+##   final_answer: SubString{String}
+##   context: Array{String}((5,))
+##   sources: Array{String}((5,))
+##   emb_candidates: CandidateChunks{Int64, Float32}
+##   tag_candidates: CandidateChunks{Int64, Float32}
+##   filtered_candidates: CandidateChunks{Int64, Float32}
+##   reranked_candidates: CandidateChunks{Int64, Float32}
+##   conversations: Dict{Symbol, Vector{<:PromptingTools.AbstractMessage}}
+```
+
+You can still get the message from the result, see `result.conversations[:final_answer]` (the dictionary keys correspond to the function names of those steps).
+
+
+3. If you need to customize it, break the pipeline into its sub-steps: retrieve and generate - RAGResult serves as the intermediate result.
+```julia
+# Retrieve which chunks are relevant to the question
+result = retrieve(index, question)
+# Generate an answer
+result = generate!(index, result)
+```
+
+You can leverage a pretty-printing system with `pprint` where we automatically annotate the support of the answer by the chunks we provided to the model. 
+It is configurable and you can select only some of its functions (eg, scores, sources).
+
+```julia
+pprint(result)
+```
+
+You'll see the following in REPL but with COLOR highlighting in the terminal.
+
+```plaintext
+--------------------
+QUESTION(s)
+--------------------
+- What are the best practices for parallel computing in Julia?
+
+--------------------
+ANSWER
+--------------------
+Some of the best practices for parallel computing in Julia include:[1,0.7]
+- Using [3,0.4]`@threads` for simple parallelism[1,0.34]
+- Utilizing `Distributed` module for more complex parallel tasks[1,0.19]
+- Avoiding excessive memory allocation
+- Considering task granularity for efficient workload distribution
+
+--------------------
+SOURCES
+--------------------
+1. Doc8
+2. Doc15
+3. Doc5
+4. Doc2
+5. Doc9
+```
+
+See `?print_html` for the HTML version of the pretty-printing and styling system, eg, when you want to display the results in a web application based on Genie.jl/Stipple.jl.
+
+**How to read the output**
+- Color legend:
+  - No color: High match with the context, can be trusted more
+  - Blue: Partial match against some words in the context, investigate
+  - Magenta (Red): No match with the context, fully generated by the model
+- Square brackets: The best matching context ID + Match score of the chunk (eg, `[3,0.4]` means the highest support for the sentence is from the context chunk number 3 with a 40% match).
+
+Want more?
+
+See `examples/building_RAG.jl` for one more example.
+
+## RAG Interface
+
+### System Overview
+
+This system is designed for information retrieval and response generation, structured in three main phases:
+- Preparation, when you create an instance of `AbstractIndex`
+- Retrieval, when you surface the top most relevant chunks/items in the `index` and return `AbstractRAGResult`, which contains the references to the chunks (`AbstractCandidateChunks`)
+- Generation, when you generate an answer based on the context built from the retrieved chunks, return either `AIMessage` or `AbstractRAGResult`
+
+The corresponding functions are `build_index`, `retrieve`, and `generate!`, respectively.
+Here is the high-level diagram that shows the signature of the main functions:
+
+![RAG Diagram High-level](../diagrams/rag_diagram_highlevel.png)
+
+Notice that the first argument is a custom type for multiple dispatch. 
+In addition, observe the "kwargs" names, that's how the keyword arguments for each function are passed down from the higher-level functions (eg, `build_index(...; chunker_kwargs=(; separators=...)))`). It's the simplest way to customize some step of the pipeline (eg, set a custom model with a `model` kwarg or prompt template with `template` kwarg).
+
+The system is designed to be hackable and extensible at almost every entry point.
+If you want to customize the behavior of any step, you can do so by defining a new type and defining a new method for the step you're changing, eg, 
+```julia
+PromptingTools.Experimental.RAGTools: rerank
+
+struct MyReranker <: AbstractReranker end
+rerank(::MyReranker, index, candidates) = ...
+```
+And then you would set the `retrive` step to use your custom `MyReranker` via `reranker` kwarg, eg, `retrieve(....; reranker = MyReranker())` (or customize the main dispatching `AbstractRetriever` struct).
+
+The overarching principles are:
+- Always dispatch / customize the behavior by defining a new `Struct` and the corresponding method for the existing functions (eg, `rerank` function for the re-ranking step).
+- Custom types are provided as the first argument (the high-level functions will work without them as we provide some defaults).
+- Custom types do NOT have any internal fields or DATA (with the exception of managing sub-steps of the pipeline like `AbstractRetriever` or `RAGConfig`). 
+- Additional data should be passed around as keyword arguments (eg, `chunker_kwargs` in `build_index` to pass data to the chunking step). The intention was to have some clearly documented default values in the docstrings of each step + to have the various options all in one place.
+
+### RAG Diagram
+
+![RAG Diagram Detailed](../diagrams/rag_diagram_detailed.png)
+
+**The main functions are**:
+
+Prepare your document index with `build_index`:
+- signature: `(indexer::AbstractIndexBuilder, files_or_docs::Vector{<:AbstractString}) -> AbstractChunkIndex`
+- flow: `get_chunks` -> `get_embeddings` -> `get_tags` -> `build_tags`
+- dispatch types: `AbstractIndexBuilder`, `AbstractChunker`, `AbstractEmbedder`, `AbstractTagger`
+
+Run E2E RAG with `airag`: 
+- signature: `(cfg::AbstractRAGConfig, index::AbstractChunkIndex; question::AbstractString)` -> `AIMessage` or `AbstractRAGResult`
+- flow: `retrieve` -> `generate!`
+- dispatch types: `AbstractRAGConfig`, `AbstractRetriever`, `AbstractGenerator`
+
+Retrieve relevant chunks with `retrieve`:
+- signature: `(retriever::AbstractRetriever, index::AbstractChunkIndex, question::AbstractString) -> AbstractRAGResult`
+- flow: `rephrase` -> `get_embeddings` -> `find_closest` -> `get_tags` -> `find_tags` -> `rerank`
+- dispatch types: `AbstractRAGConfig`, `AbstractRephraser`, `AbstractEmbedder`, `AbstractSimilarityFinder`, `AbstractTagger`, `AbstractTagFilter`, `AbstractReranker`
+
+Generate an answer from relevant chunks with `generate!`:
+- signature: `(generator::AbstractGenerator, index::AbstractChunkIndex, result::AbstractRAGResult)` -> `AIMessage` or `AbstractRAGResult`
+- flow: `build_context!` -> `answer!` -> `refine!` -> `postprocess!`
+- dispatch types: `AbstractGenerator`, `AbstractContextBuilder`, `AbstractAnswerer`, `AbstractRefiner`, `AbstractPostprocessor`
+
+To discover the currently available implementations, use `subtypes` function, eg, `subtypes(AbstractReranker)`.
+
+#### Passing Keyword Arguments
+If you need to pass keyword arguments, use the nested kwargs corresponding to the dispatch type names (`rephrase` step, has `rephraser` dispatch type and `rephraser_kwargs` for its keyword arguments).
+
+For example:
+
+```julia
+cfg = RAGConfig(; retriever = AdvancedRetriever())
+
+# kwargs will be big and nested, let's prepare them upfront
+# we specify "custom" model for each component that calls LLM
+kwargs = (
+    retriever = AdvancedRetriever(),
+    retriever_kwargs = (;
+        top_k = 100,
+        top_n = 5,
+        # notice that this is effectively: retriever_kwargs/rephraser_kwargs/template
+        rephraser_kwargs = (;
+            template = :RAGQueryHyDE,
+            model = "custom")),
+    generator_kwargs = (;
+        # pass kwargs to `answer!` step defined by the `answerer` -> we're setting `answerer_kwargs`
+        answerer_kwargs = (;
+            model = "custom"),
+    # api_kwargs can be shared across all components
+    api_kwargs = (;
+        url = "http://localhost:8080")))
+
+result = airag(cfg, index, question; kwargs...)
+```
+
+If you were one level deeper in the pipeline, working with retriever directly, you would pass:
+
+```julia
+retriever_kwargs = (;
+    top_k = 100,
+    top_n = 5,
+    # notice that this is effectively: rephraser_kwargs/template
+    rephraser_kwargs = (;
+      template = :RAGQueryHyDE,
+      model = "custom"),
+  # api_kwargs can be shared across all components
+  api_kwargs = (;
+      url = "http://localhost:8080"))
+
+result = retrieve(AdvancedRetriever(), index, question; retriever_kwargs...)
+```
+
+And going even deeper, you would provide the `rephraser_kwargs` directly to the `rephrase` step, eg,
+```julia
+rephrase(SimpleRephraser(), question; model="custom", template = :RAGQueryHyDE, api_kwargs = (; url = "http://localhost:8080"))
+```
+
+### Deepdive
+
+**Preparation Phase:**
+- Begins with `build_index`, which creates a user-defined index type from an abstract chunk index using specified dels and function strategies.
+- `get_chunks` then divides the indexed data into manageable pieces based on a chunking strategy.
+- `get_embeddings` generates embeddings for each chunk using an embedding strategy to facilitate similarity arches.
+- Finally, `get_tags` extracts relevant metadata from each chunk, enabling tag-based filtering (hybrid search index). If there are `tags` available, `build_tags` is called to build the corresponding sparse matrix for filtering with tags.
+
+**Retrieval Phase:**
+- The `retrieve` step is intended to find the most relevant chunks in the `index`.
+- `rephrase` is called first, if we want to rephrase the query (methods like `HyDE` can improve retrieval quite a bit)!
+- `get_embeddings` generates embeddings for the original + rephrased query
+- `find_closest` looks up the most relevant candidates (`CandidateChunks`) using a similarity search strategy.
+- `get_tags` extracts the potential tags (can be provided as part of the `airag` call, eg, when we want to use only some small part of the indexed chunks)
+- `find_tags` filters the candidates to strictly match _at least one_ of the tags (if provided)
+- `rerank` is called to rerank the candidates based on the reranking strategy (ie, to improve the ordering of the chunks in context).
+
+**Generation Phase:**
+- The `generate!` step is intended to generate a response based on the retrieved chunks, provided via `AbstractRAGResult` (eg, `RAGResult`).
+- `build_context!` constructs the context for response generation based on a context strategy and applies the necessary formatting
+- `answer!` generates the response based on the context and the query
+- `refine!` is called to refine the response (optional, defaults to passthrough)
+- `postprocessing!` is available for any final touches to the response or to potentially save or format the results (eg, automatically save to the disk)
+
+Note that all generation steps are mutating the `RAGResult` object.
+
+See more details and corresponding functions and types in `src/Experimental/RAGTools/rag_interface.jl`.
+
+## References
+
+```@docs; canonical=false
+build_index
+airag
+retrieve
+generate!
+annotate_support
+build_qa_evals
+```
diff --git a/docs/src/extra_tools/text_utilities_intro.md b/docs/src/extra_tools/text_utilities_intro.md
new file mode 100644
index 000000000..9cd747884
--- /dev/null
+++ b/docs/src/extra_tools/text_utilities_intro.md
@@ -0,0 +1,43 @@
+```@meta
+CurrentModule = PromptingTools
+```
+
+# Text Utilities
+
+Working with Generative AI (and in particular with the text modality), requires a lot of text manipulation. PromptingTools.jl provides a set of utilities to make this process easier and more efficient.
+
+
+## Highlights
+
+The main functions to be aware of are
+- `recursive_splitter` to split the text into sentences and words (of a desired length `max_length`)
+- `replace_words` to mask some sensitive words in your text before sending it to AI
+- `wrap_string` for wrapping the text into a desired length by adding newlines (eg, to fit some large text into your terminal width)
+- `length_longest_common_subsequence` to find the length of the longest common subsequence between two strings (eg, to compare the similarity between the context provided and generated text)
+- `distance_longest_common_subsequence` a companion utility for `length_longest_common_subsequence` to find the normalized distance between two strings. Always returns a number between 0-1, where 0 means the strings are identical and 1 means they are completely different.
+
+You can import them simply via:
+```julia
+using PromptingTools: recursive_splitter, replace_words, wrap_string, length_longest_common_subsequence, distance_longest_common_subsequence
+```
+
+There are many more (especially in the AgentTools and RAGTools experimental modules)! 
+
+RAGTools module contains the following text utilities:
+- `split_into_code_and_sentences` to split a string into code and sentences
+- `tokenize` to tokenize a string (eg, a sentence) into words
+- `trigrams` to generate trigrams from a string (eg, a word)
+- `text_to_trigrams` to generate trigrams from a larger string (ie, effectively wraps the three functions above)
+- `STOPWORDS` a set of common stopwords (very brief)
+
+Feel free to open an issue or ask in the `#generative-ai` channel in the JuliaLang Slack if you have a specific need.
+
+## References
+
+```@docs; canonical=false
+recursive_splitter
+replace_words
+wrap_string
+length_longest_common_subsequence
+distance_longest_common_subsequence
+```
diff --git a/docs/src/frequently_asked_questions.md b/docs/src/frequently_asked_questions.md
index 5f34172c3..5e6c3ea54 100644
--- a/docs/src/frequently_asked_questions.md
+++ b/docs/src/frequently_asked_questions.md
@@ -8,6 +8,13 @@ There will be situations not or cannot use it (eg, privacy, cost, etc.). In that
 
 Note: To get started with [Ollama.ai](https://ollama.ai/), see the [Setup Guide for Ollama](#setup-guide-for-ollama) section below.
 
+### What if I cannot access OpenAI?
+
+There are many alternatives:
+
+- **Other APIs**: MistralAI, Anthropic, Google, Together, Fireworks, Voyager (the latter ones tend to give free credits upon joining!)
+- **Locally-hosted models**: Llama.cpp/Llama.jl, Ollama, vLLM (see the examples and the corresponding docs)
+
 ## Data Privacy and OpenAI
 
 At the time of writing, OpenAI does NOT use the API calls for training their models.
@@ -54,6 +61,35 @@ The solution is to force a new precompilation, so you can do any of the below:
 2) Update the PromptingTools package (runs precompilation automatically)
 3) Delete your compiled cache in `.julia` DEPOT (usually `.julia/compiled/v1.10/PromptingTools`). You can do it manually in the file explorer or via Julia REPL: `rm("~/.julia/compiled/v1.10/PromptingTools", recursive=true, force=true)`
 
+## Getting an error "Rate limit exceeded" from OpenAI?
+
+Have you opened a new account recently? It is quite likely that you've exceeded the free tier limits.
+
+OpenAI has a rate limit on the number of requests and the number of tokens you can make in a given period. If you exceed either of these, you will receive a "Rate limit exceeded" error.
+"Free tier" (ie, before you pay the first 5 USD) has very low limits, eg, maximum of 3 requests per minute. See the [OpenAI Rate Limits](https://platform.openai.com/docs/guides/rate-limits/usage-tiers?context=tier-free) for more information.
+
+If you look at the HTTP response headers in the error, you can see the limits remaining and how long until it resets, eg, `x-ratelimit-remaining-*` and `x-ratelimit-reset-*`.
+
+If you want to avoid this error, you have two options:
+
+1) Put a simple `sleep(x)` after every request, where `x` is calculated so that the number of your requests stays below the limit.
+2) Use `ntasks` keyword argument in `asyncmap` to limit the number of concurrent requests. Eg, let's assume you want to process 100x c. 10,000 tokens, but your tier limit is only 60,000 tokens per minute. 
+   If we know that one request takes c. 10 seconds, it means that with `ntasks=1` we would send 6 requests per minute, which already maxes out our limit.
+   If we set `ntasks=2`, we could process 12 requests per minute, so we would need our limit to be 120,000 tokens per minute.
+   ```julia
+   # simple asyncmap loop with 2 concurrent requests; otherwise, same syntax as `map`
+   asyncmap(my_prompts; ntasks=2) do prompt
+       aigenerate(prompt)
+   end
+   ```
+
+## Getting the error "429 Too Many Requests"?
+Assuming you have not just sent hundreds of requests, this error might be related to insufficient "credits" in your account balance.
+
+See the error message. If it says "You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors", you'll need to re-charge your account balance. Visit [Billing overview](https://platform.openai.com/settings/organization/billing/overview).
+
+Please note that, unlike ChatGPT, OpenAI API is NOT free. However, individual requests are extremely cheap (eg, tenth of a cent), so if you charge $5, it might last you up to hundreds of requests (depending on the models and prompts).
+
 ## Setting OpenAI Spending Limits
 
 OpenAI allows you to set spending limits directly on your account dashboard to prevent unexpected costs.
@@ -66,7 +102,7 @@ A good start might be a soft limit of c.$5 and a hard limit of c.$10 - you can a
 Resources:
 - [OpenAI Forum](https://community.openai.com/t/how-to-set-a-price-limit/13086)
 
-### How much does it cost? Is it worth paying for?
+## How much does it cost? Is it worth paying for?
 
 If you use a local model (eg, with Ollama), it's free. If you use any commercial APIs (eg, OpenAI), you will likely pay per "token" (a sub-word unit).
 
@@ -83,6 +119,33 @@ Assuming the price per call was $0.0001, you'd pay 2 cents for the job and save
 Resources:
 - [OpenAI Pricing per 1000 tokens](https://openai.com/pricing)
 
+## How to try new OpenAI models if I'm not Tier 5 customer?
+
+As of September 2024, you cannot access the new o1 models via API unless you're a Tier 5 customer.
+
+Fortunately, you can use OpenRouter to access these new models.
+
+1) Get your API key from [OpenRouter](https://openrouter.ai/keys)
+2) Add some minimum [Credits](https://openrouter.ai/credits) to the account (eg, $5).
+3) Set it as an environment variable (or use local preferences): `ENV["OPENROUTER_API_KEY"] = "<your key>"`
+4) Use the model aliases with `or` prefix, eg, `oro1` for o1-preview or `oro1m` for o1-mini.
+
+Example:
+```julia
+# Let's use o1-preview model hosted on OpenRouter ("or" prefix)
+msg = aigenerate("What is the meaning of life?"; model="oro1")
+```
+
+Note: There are some quirks for the o1 models. 
+For example, the new o1 series does NOT support `SystemMessage` yet, so OpenRouter does some tricks (likely converting them to normal user messages).
+To be in control of this behavior and have comparable behavior to the native OpenAI API, you can use kwarg `no_system_message=true` in `aigenerate` to ensure OpenRouter does not do any tricks.
+
+Example:
+```julia
+# Let's use o1-mini and disable adding automatic system message
+msg = aigenerate("What is the meaning of life?"; model="oro1m", no_system_message=true)
+```
+
 ## Configuring the Environment Variable for API Key
 
 This is a guide for OpenAI's API key, but it works for any other API key you might need (eg, `MISTRALAI_API_KEY` for MistralAI API).
@@ -111,7 +174,7 @@ Resources:
 
 You can also set the API key in `LocalPreferences.toml`, so it persists across sessions and projects.
 
-Use: `PromptingTools.set_preferences!("OPENAI_API_KEY"="your-api-key")`
+Use: `PromptingTools.set_preferences!("OPENAI_API_KEY"=>"your-api-key")`
 
 To double-check, run `PromptingTools.get_preferences("OPENAI_API_KEY")` and you should see your key!
 
@@ -166,6 +229,19 @@ There are three ways how you can customize your workflows (especially when you u
 2) Register your model and its associated schema  (`PT.register_model!(; name="123", schema=PT.OllamaSchema())`). You won't have to specify the schema anymore only the model name. See [Working with Ollama](#working-with-ollama) for more information.
 3) Override your default model (`PT.MODEL_CHAT`) and schema (`PT.PROMPT_SCHEMA`). It can be done persistently with Preferences, eg, `PT.set_preferences!("PROMPT_SCHEMA" => "OllamaSchema", "MODEL_CHAT"=>"llama2")`.
 
+## Using Custom API Providers like Azure or Databricks
+
+Several providers are directly supported (eg, Databricks), check the available prompt schemas (eg, `subtypes(PT.AbstractOpenAISchema)`).
+
+If you need a custom URL or a few keyword parameters, refer to the implementation of DatabricksOpenAISchema.
+You effectively need to create your own prompt schema (`struct MySchema <: PT.AbstractOpenAISchema`) and override the OpenAI.jl behavior. The easiest way is to provide your custom method for `OpenAI.create_chat` and customize the `url`, `api_key`, and other `kwargs` fields.
+You can follow the implementation of `create_chat` for `DatabricksOpenAISchema` in `src/llm_openAI.jl`.
+
+Once your schema is ready, you can register the necessary models via `PT.register_model!(; name="myschema", schema=MySchema())`.
+You can also add aliases for easier access (eg, `PT.MODEL_ALIASES["mymodel"] = "my-model-with-really-long-name"`).
+
+If you would like to use some heavily customized API, eg, your company's internal LLM proxy (to change headers, URL paths, etc.), refer to the example `examples/adding_custom_API.jl` in the repo.
+
 ## How to have Multi-turn Conversations?
 
 Let's say you would like to respond back to a model's response. How to do it?
@@ -201,134 +277,289 @@ conversation = aigenerate("What's my name?"; return_all=true, conversation)
 ```
 Notice that the last message is the response to the second request, but with `return_all=true` we can see the whole conversation from the beginning.
 
-## Explain What Happens Under the Hood
+## How to have typed responses?
+
+Our responses are always in `AbstractMessage` types to ensure we can also handle downstream processing, error handling, and self-healing code (see `airetry!`).
 
-4 Key Concepts/Objects:
-- Schemas -> object of type `AbstractPromptSchema` that determines which methods are called and, hence, what providers/APIs are used
-- Prompts -> the information you want to convey to the AI model
-- Messages -> the basic unit of communication between the user and the AI model (eg, `UserMessage` vs `AIMessage`)
-- Prompt Templates -> re-usable "prompts" with placeholders that you can replace with your inputs at the time of making the request
+A good use case for a typed response is when you have a complicated control flow and would like to group and handle certain outcomes differently. You can easily do it as an extra step after the response is received.
 
-When you call `aigenerate`, roughly the following happens: `render` -> `UserMessage`(s) -> `render` -> `OpenAI.create_chat` -> ... -> `AIMessage`.
+Trivially, we can use `aiclassifier` for Bool statements, eg, 
+```julia
+# We can do either
+mybool = tryparse(Bool, aiclassify("Is two plus two four?")) isa Bool # true
+
+# or simply check equality
+msg = aiclassify("Is two plus two four?") # true
+mybool = msg.content == "true"
+```
+
+Now a more complicated example with multiple categories mapping to an enum:
+```julia
+choices = [("A", "any animal or creature"), ("P", "for any plant or tree"), ("O", "for everything else")]
 
-We'll deep dive into an example in the end.
+# Set up the return types we want
+@enum Categories A P O
+string_to_category = Dict("A" => A, "P" => P,"O" => O)
 
-### Schemas
+# Run an example
+input = "spider"
+msg = aiclassify(:InputClassifier; choices, input)
 
-For your "message" to reach an AI model, it needs to be formatted and sent to the right place.
+mytype = string_to_category[msg.content] # A (for animal)
+```
+How does it work? `aiclassify` guarantees to output one of our choices (and it handles some of the common quirks)!
 
-We leverage the multiple dispatch around the "schemas" to pick the right logic.
-All schemas are subtypes of `AbstractPromptSchema` and there are many subtypes, eg, `OpenAISchema <: AbstractOpenAISchema <:AbstractPromptSchema`.
+How would we achieve the same with `aigenerate` and arbitrary struct?
+We need to use the "lazy" `AIGenerate` struct and `airetry!` to ensure we get the response and then we can process it further.
 
-For example, if you provide `schema = OpenAISchema()`, the system knows that:
-- it will have to format any user inputs to OpenAI's "message specification" (a vector of dictionaries, see their API documentation). Function `render(OpenAISchema(),...)` will take care of the rendering.
-- it will have to send the message to OpenAI's API. We will use the amazing `OpenAI.jl` package to handle the communication.
+`AIGenerate` has two fields you should know about:
+- `conversation` - eg, the vector of "messages" in the current conversation (same as what you get from `aigenerate` with `return_all=true`)
+- `success` - a boolean flag if the request was successful AND if it passed any subsequent `airetry!` calls
 
-### Prompts
+Let's mimic a case where our "program" should return one of three types: `SmallInt`, `LargeInt`, `FailedResponse`.
 
-Prompt is loosely the information you want to convey to the AI model. It can be a question, a statement, or a command. It can have instructions or some context, eg, previous conversation.
+We first need to define our custom types:
+```julia
 
-You need to remember that Large Language Models (LLMs) are **stateless**. They don't remember the previous conversation/request, so you need to provide the whole history/context every time (similar to how REST APIs work).
+# not needed, just to show a fully typed example
+abstract type MyAbstractResponse end
+struct SmallInt <: MyAbstractResponse
+    number::Int
+end
+struct LargeInt <: MyAbstractResponse
+    number::Int
+end
+struct FailedResponse <: MyAbstractResponse
+    content::String
+end
+```
 
-Prompts that we send to the LLMs are effectively a sequence of messages (`<:AbstractMessage`).
+Let's define our "program" as a function to be cleaner. Notice that we use `AIGenerate` and `airetry!` to ensure we get the response and then we can process it further.
 
-### Messages
+```julia
+using PromptingTools.Experimental.AgentTools
+
+function give_me_number(prompt::String)::MyAbstractResponse
+    # Generate the response
+    response = AIGenerate(prompt; config=RetryConfig(;max_retries=2)) |> run!
+
+    # Check if it's parseable as Int, if not, send back to be fixed
+    # syntax: airetry!(CONDITION-TO-CHECK, <response object>, FEEDBACK-TO-MODEL)
+    airetry!(x->tryparse(Int,last_output(x))|>!isnothing, response, "Wrong output format! Answer with digits and nothing else. The number is:")
+
+    if response.success != true
+        ## we failed to generate a parseable integer
+        return FailedResponse("I failed to get the response. Last output: $(last_output(response))")
+    end
+    number = tryparse(Int,last_output(response))
+    return number < 1000 ? SmallInt(number) : LargeInt(number)
+end
+
+give_me_number("How many car seats are in Porsche 911T?")
+## [ Info: Condition not met. Retrying...
+## [ Info: Condition not met. Retrying...
+## SmallInt(2)
+```
 
-Messages are the basic unit of communication between the user and the AI model. 
+We ultimately received our custom type `SmallInt` with the number of car seats in the Porsche 911T (I hope it's correct!).
 
-There are 5 main types of messages (`<:AbstractMessage`):
+If you want to access the full conversation history (all the attempts and feedback), simply output the `response` object and explore `response.conversation`.
 
-- `SystemMessage` - this contains information about the "system", eg, how it should behave, format its output, etc. (eg, `You're a world-class Julia programmer. You write brief and concise code.)
-- `UserMessage` - the information "from the user", ie, your question/statement/task
-- `UserMessageWithImages` - the same as `UserMessage`, but with images (URLs or Base64-encoded images)
-- `AIMessage` - the response from the AI model, when the "output" is text
-- `DataMessage` - the response from the AI model, when the "output" is data, eg, embeddings with `aiembed` or user-defined structs with `aiextract`
+## How to quickly create a prompt template?
 
-### Prompt Templates
+Many times, you will want to create a prompt template that you can reuse with different inputs (eg, to create templates for AIHelpMe or LLMTextAnalysis). 
 
-We want to have re-usable "prompts", so we provide you with a system to retrieve pre-defined prompts with placeholders (eg, `{{name}}`) that you can replace with your inputs at the time of making the request.
+Previously, you would have to create a vector of `SystemMessage` and `UserMessage` objects and then save it to a disk and reload. 
+Now, you can use the `create_template` function to do it for you. It's designed for quick prototyping, so it skips the serialization step and loads it directly into the template store (ie, you can use it like any other templates - try `aitemplates()` search).
 
-"AI Templates" as we call them (`AITemplate`) are usually a vector of `SystemMessage` and a `UserMessage` with specific purpose/task.
+The syntax is simple: `create_template(;user=<user prompt>, system=<system prompt>, load_as=<template name>)` 
 
-For example, the template `:AssistantAsk` is defined loosely as:
+When called it creates a vector of messages, which you can use directly in the `ai*` functions. If you provide `load_as`, it will load the template in the template store (under the `load_as` name).
 
+Let's generate a quick template for a simple conversation (only one placeholder: name)
 ```julia
- template = [SystemMessage("You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer."),
-             UserMessage("# Question\n\n{{ask}}")]
+# first system message, then user message (or use kwargs)
+tpl=PT.create_template("You must speak like a pirate", "Say hi to {{name}}"; load_as="GreatingPirate")
+
+## 2-element Vector{PromptingTools.AbstractChatMessage}:
+## PromptingTools.SystemMessage("You must speak like a pirate")
+##  PromptingTools.UserMessage("Say hi to {{name}}")
 ```
 
-Notice that we have a placeholder `ask` (`{{ask}}`) that you can replace with your question without having to re-write the generic system instructions.
+You can immediately use this template in `ai*` functions:
+```julia
+aigenerate(tpl; name="Jack Sparrow")
+# Output: AIMessage("Arr, me hearty! Best be sending me regards to Captain Jack Sparrow on the salty seas! May his compass always point true to the nearest treasure trove. Yarrr!")
+```
 
-When you provide a Symbol (eg, `:AssistantAsk`) to ai* functions, thanks to the multiple dispatch, it recognizes that it's an `AITemplate(:AssistantAsk)` and looks it up.
+Since we provided `load_as`, it's also registered in the template store:
+```julia
+aitemplates("pirate")
+
+## 1-element Vector{AITemplateMetadata}:
+## PromptingTools.AITemplateMetadata
+##   name: Symbol GreatingPirate
+##   description: String ""
+##   version: String "1.0"
+##   wordcount: Int64 46
+##   variables: Array{Symbol}((1,))
+##   system_preview: String "You must speak like a pirate"
+##   user_preview: String "Say hi to {{name}}"
+##   source: String ""
+```
 
-You can discover all available templates with `aitemplates("some keyword")` or just see the details of some template `aitemplates(:AssistantAsk)`.
+So you can use it like any other template:
+```julia
+aigenerate(:GreatingPirate; name="Jack Sparrow")
+# Output: AIMessage("Arr, me hearty! Best be sending me regards to Captain Jack Sparrow on the salty seas! May his compass always point true to the nearest treasure trove. Yarrr!")
+```
+
+If you want to save it in your project folder:
+```julia
+PT.save_template("templates/GreatingPirate.json", tpl; version="1.0") # optionally, add description
+```
+It will be saved and accessed under its basename, ie, `GreatingPirate` (same as `load_as` keyword argument).
 
-### Walkthrough Example
+Note: If you make any changes to the templates on the disk/in a folder, you need to explicitly reload all templates again!
 
+If you are using the main PromptingTools templates, you can simply call `PT.load_templates!()`.
+If you have a project folder with your templates, you want to add it first:
 ```julia
-using PromptingTools
-const PT = PromptingTools
+PT.load_templates!("templates") 
+```
+After the first run, we will remember the folder and you can simply call `PT.load_templates!()` to reload all the templates in the future!
 
-# Let's say this is our ask
-msg = aigenerate(:AssistantAsk; ask="What is the capital of France?")
+## Do we have a RecursiveCharacterTextSplitter like Langchain?
+
+Yes, we do! Look for utility `recursive_spliter` (previously known as `split_by_length`). See its docstring for more information.
+
+For reference, Langchain's [`RecursiveCharacterTextSplitter`](https://python.langchain.com/docs/modules/data_connection/document_transformers/recursive_text_splitter) uses the following setting: `separators = ["\n\n", "\n", " ", ""]`.
+
+I'd recommend using the following instead: `separators = ["\\n\\n", ". ", "\\n", " "]` (ie, it does not split words, which tends to be unnecessary and quite damaging to the chunk quality).
+
+Example:
+```julia
+using PromptingTools: recursive_splitter
 
-# it is effectively the same as:
-msg = aigenerate(PT.OpenAISchema(), PT.AITemplate(:AssistantAsk); ask="What is the capital of France?", model="gpt3t")
+text = "Paragraph 1\n\nParagraph 2. Sentence 1. Sentence 2.\nParagraph 3"
+separators = ["\n\n", ". ", "\n", " "] # split by paragraphs, sentences, and newlines, and words
+chunks = recursive_splitter(text, separators, max_length=10)
 ```
 
-There is no `model` provided, so we use the default `PT.MODEL_CHAT` (effectively GPT3.5-Turbo). Then we look it up in `PT.MDOEL_REGISTRY` and use the associated schema for it (`OpenAISchema` in this case).
+## How would I fine-tune a model?
 
-The next step is to render the template, replace the placeholders and render it for the OpenAI model.
+Fine-tuning is a powerful technique to adapt a model to your specific use case (mostly the format/syntax/task). It requires a dataset of examples, which you can now easily generate with PromptingTools.jl!
 
+1. You can save any conversation (vector of messages) to a file with `PT.save_conversation("filename.json", conversation)`.
+
+2. Once the finetuning time comes, create a bundle of ShareGPT-formatted conversations (common finetuning format) in a single `.jsonl` file. Use `PT.save_conversations("dataset.jsonl", [conversation1, conversation2, ...])` (notice that plural "conversationS" in the function name).
+
+For an example of an end-to-end finetuning process, check out our sister project [JuliaLLMLeaderboard Finetuning experiment](https://github.com/svilupp/Julia-LLM-Leaderboard/blob/main/experiments/cheater-7b-finetune/README.md). It shows the process of finetuning for half a dollar with [JarvisLabs.ai](https://jarvislabs.ai/templates/axolotl) and [Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl).
+
+## Can I see how my prompt is rendered / what is sent to the API?
+
+Yes, there are two ways. 
+1) "dry run", where the `ai*` function will return the prompt rendered in the style of the selected API provider
+2) "partial render", for provider-agnostic purposes, you can run only the first step of the rendering pipeline to see the messages that will be sent (but formatted as `SystemMessage` and `UserMessage`), which is easy to read and work with
+
+1) Dry Run
+
+Add kwargs `dry_run` and `return_all` to see what could have been sent to the API to your `ai*` functions (without `return_all` there is nothing to show you).
+
+Example for OpenAI:
 ```julia
-# Let's remember out schema
-schema = PT.OpenAISchema()
-ask = "What is the capital of France?"
+dry_conv = aigenerate(:BlankSystemUser; system = "I exist", user = "say hi",
+    model = "lngpt3t", return_all = true, dry_run = true)
+```
+
+```plaintext
+2-element Vector{Dict{String, Any}}:
+ Dict("role" => "system", "content" => "I exist")
+ Dict("role" => "user", "content" => "say hi")
 ```
 
-First, we obtain the template (no placeholder replacement yet) and "expand it"
+2) Partial Render
+
+Personally, I prefer to see the pretty formatting of PromptingTools *Messages. 
+To see what will be sent to the model, you can `render` only the first stage of the rendering pipeline with schema `NoSchema()` (it merely does the variable replacements and creates the necessary messages). It's shared by all the schema/providers.
+
 ```julia
-template_rendered = PT.render(schema, AITemplate(:AssistantAsk); ask)
+PT.render(PT.NoSchema(), "say hi, {{name}}"; name="John")
 ```
 
 ```plaintext
-2-element Vector{PromptingTools.AbstractChatMessage}:
-  PromptingTools.SystemMessage("You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.")
-  PromptingTools.UserMessage{String}("# Question\n\n{{ask}}", [:ask], :usermessage)
+2-element Vector{PromptingTools.AbstractMessage}:
+ PromptingTools.SystemMessage("Act as a helpful AI assistant")
+ PromptingTools.UserMessage("say hi, John")
 ```
 
-Second, we replace the placeholders
+What about the prompt templates?
+Prompt templates have an extra pre-rendering step that expands the symbolic `:name` (understood by PromptingTools as a reference to `AITemplate(:name)`) into a vector of Messages.
+
 ```julia
-rendered_for_api = PT.render(schema, template_rendered;  ask)
+# expand the template into messages
+tpl = PT.render(AITemplate(:BlankSystemUser))
+PT.render(PT.NoSchema(), tpl; system = "I exist", user = "say hi")
+# replace any variables, etc.
 ```
-  
+
 ```plaintext
-2-element Vector{Dict{String, Any}}:
-  Dict("role" => "system", "content" => "You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.")
-  Dict("role" => "user", "content" => "# Question\n\nWhat is the capital of France?")
+2-element Vector{PromptingTools.AbstractMessage}:
+ PromptingTools.SystemMessage("I exist")
+ PromptingTools.UserMessage("say hi")
+```
+
+For more information about the rendering pipeline and examples refer to [Walkthrough Example for aigenerate](@ref).
+
+
+## Automatic Logging / Tracing
+
+If you would like to automatically capture metadata about your conversations, you can use the `TracerSchema`. It automatically captures the necessary metadata such as model, task (`parent_id`), current thread (`thread_id`), API kwargs used and any prompt templates (and its versions).
+```julia
+using PromptingTools: TracerSchema, OpenAISchema
+
+wrap_schema = TracerSchema(OpenAISchema())
+msg = aigenerate(wrap_schema, "Say hi!"; model="gpt-4")
+# output type should be TracerMessage
+msg isa TracerMessage
 ```
 
-Notice that the placeholders are only replaced in the second step. The final output here is a vector of messages with "role" and "content" keys, which is the format required by the OpenAI API.
+You can work with the message like any other message (properties of the inner `object` are overloaded). 
+You can extract the original message with `unwrap`:
+```julia
+unwrap(msg) isa String
+```
+You can extract the metadata with `meta`:
+```julia
+meta(msg) isa Dict
+```
 
-As a side note, under the hood, the second step is done in two steps:
 
-- replace the placeholders `messages_rendered = PT.render(PT.NoSchema(), template_rendered; ask)` -> returns a vector of Messages!
-- then, we convert the messages to the format required by the provider/schema `PT.render(schema, messages_rendered)` -> returns the OpenAI formatted messages
+If you would like to automatically save the conversations, you can use the `SaverSchema`. It automatically serializes the conversation to a file in the directory specified by the environment variable `LOG_DIR`.
 
+```julia
+using PromptingTools: SaverSchema
+
+wrap_schema = SaverSchema(OpenAISchema())
+msg = aigenerate(wrap_schema, "Say hi!"; model="gpt-4")
+```
+See `LOG_DIR` location to find the serialized conversation.
 
-Next, we send the above `rendered_for_api` to the OpenAI API and get the response back.
 
+You can also compose multiple tracing schemas. For example, you can capture metadata with `TracerSchema` and then save everything automatically with `SaverSchema`:
 ```julia
-using OpenAI
-OpenAI.create_chat(api_key, model, rendered_for_api)
+using PromptingTools: TracerSchema, SaverSchema, OpenAISchema
+
+wrap_schema = OpenAISchema() |> TracerSchema |> SaverSchema
+conv = aigenerate(wrap_schema,:BlankSystemUser; system="You're a French-speaking assistant!",
+    user="Say hi!"; model="gpt-4", api_kwargs=(;temperature=0.1), return_all=true)
 ```
 
-The last step is to take the JSON response from the API and convert it to the `AIMessage` object.
+`conv` is a vector of tracing messages that will be saved to a JSON together with metadata about the template and `api_kwargs`.
+
+If you would like to enable this behavior automatically, you can register your favorite model (or re-register existing models) with the "wrapped" schema:
 
 ```julia
-# simplification for educational purposes
-msg = AIMessage(; content = r.response[:choices][1][:message][:content])
+PT.register_model!(; name= "gpt-3.5-turbo", schema=OpenAISchema() |> TracerSchema |> SaverSchema)
 ```
-In practice, there are more fields we extract, so we define a utility for it: `PT.response_to_message`. Especially, since with parameter `n`, you can request multiple AI responses at once, so we want to re-use our response processing logic.
 
-That's it! I hope you've learned something new about how PromptingTools.jl works under the hood.
\ No newline at end of file
diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md
index bb25e667c..7f13da88d 100644
--- a/docs/src/getting_started.md
+++ b/docs/src/getting_started.md
@@ -1,3 +1,7 @@
+```@meta
+CurrentModule = PromptingTools
+```
+
 # Getting Started
 
 ## Prerequisites
@@ -7,8 +11,9 @@
 You will need to register with OpenAI and generate an API key:
 
 1. Create an account with [OpenAI](https://platform.openai.com/signup)
-2. Go to [API Key page](https://platform.openai.com/account/api-keys)
-3. Click on “Create new secret key”
+2. Go to [Account Billing](https://platform.openai.com/account/billing) and buy some credits (prepayment, minimum $5). Your account must have credits for the API access to work.
+3. Go to [API Key page](https://platform.openai.com/account/api-keys)
+4. Click on “Create new secret key”
   !!! Do not share it with anyone and do NOT save it to any files that get synced online.
 
 Resources:
@@ -93,4 +98,4 @@ Pro tip: Use `asyncmap` to run multiple AI-powered tasks concurrently.
 
 Pro tip: If you use slow models (like GPT-4), you can use the asynchronous version of `@ai_str` -> `@aai_str` to avoid blocking the REPL, eg, `aai"Say hi but slowly!"gpt4` (similarly `@ai!_str` -> `@aai!_str` for multi-turn conversations).
 
-For more practical examples, see the [Various Examples](@ref) section.
\ No newline at end of file
+For more practical examples, see the [Various Examples](@ref) section.
diff --git a/docs/src/how_it_works.md b/docs/src/how_it_works.md
new file mode 100644
index 000000000..2c5d47860
--- /dev/null
+++ b/docs/src/how_it_works.md
@@ -0,0 +1,380 @@
+```@meta
+CurrentModule = PromptingTools
+```
+
+# How It Works
+
+This is an advanced section that explains how PromptingTools.jl works under the hood. It is not necessary to understand this to use the package, but it can be helpful for debugging and understanding the limitations of the package.
+
+We'll start with the key concepts and then walk through an example of `aigenerate` to see how it all fits together.
+
+## Key Concepts
+
+5 Key Concepts (/Objects):
+
+- **API/Model Providers** -> The method that gives you access to Large Language Models (LLM), it can be an API (eg, OpenAI) or a locally-hosted application (eg, Llama.cpp or Ollama)
+- **Schemas** -> object of type `AbstractPromptSchema` that determines which methods are called and, hence, what providers/APIs are used
+- **Prompts** -> the information you want to convey to the AI model
+- **Messages** -> the basic unit of communication between the user and the AI model (eg, `UserMessage` vs `AIMessage`)
+- **Prompt Templates** -> re-usable "prompts" with placeholders that you can replace with your inputs at the time of making the request
+
+When you call `aigenerate`, roughly the following happens: `render` -> `UserMessage`(s) -> `render` -> `OpenAI.create_chat` -> ... -> `AIMessage`.
+
+### API/Model Providers
+
+You can think of "API/Model Providers" as the method that gives you access to Large Language Models (LLM). It can be an API (eg, OpenAI) or a locally-hosted application (eg, Llama.cpp or Ollama).
+
+You interact with them via the `schema` object, which is a subtype of `AbstractPromptSchema`,
+eg, there is an `OpenAISchema` for the provider "OpenAI" and its supertype `AbstractOpenAISchema` is for all other providers that mimic the OpenAI API.
+
+### Schemas
+
+For your "message" to reach an AI model, it needs to be formatted and sent to the right place (-> provider!).
+
+We leverage the multiple dispatch around the "schemas" to pick the right logic.
+All schemas are subtypes of `AbstractPromptSchema` and there are many subtypes, eg, `OpenAISchema <: AbstractOpenAISchema <:AbstractPromptSchema`.
+
+For example, if you provide `schema = OpenAISchema()`, the system knows that:
+- it will have to format any user inputs to OpenAI's "message specification" (a vector of dictionaries, see their API documentation). Function `render(OpenAISchema(),...)` will take care of the rendering.
+- it will have to send the message to OpenAI's API. We will use the amazing `OpenAI.jl` package to handle the communication.
+
+### Prompts
+
+Prompt is loosely the information you want to convey to the AI model. It can be a question, a statement, or a command. It can have instructions or some context, eg, previous conversation.
+
+You need to remember that Large Language Models (LLMs) are **stateless**. They don't remember the previous conversation/request, so you need to provide the whole history/context every time (similar to how REST APIs work).
+
+Prompts that we send to the LLMs are effectively a sequence of messages (`<:AbstractMessage`).
+
+### Messages
+
+Messages are the basic unit of communication between the user and the AI model. 
+
+There are 5 main types of messages (`<:AbstractMessage`):
+
+- `SystemMessage` - this contains information about the "system", eg, how it should behave, format its output, etc. (eg, `You're a world-class Julia programmer. You write brief and concise code.)
+- `UserMessage` - the information "from the user", ie, your question/statement/task
+- `UserMessageWithImages` - the same as `UserMessage`, but with images (URLs or Base64-encoded images)
+- `AIMessage` - the response from the AI model, when the "output" is text
+- `DataMessage` - the response from the AI model, when the "output" is data, eg, embeddings with `aiembed` or user-defined structs with `aiextract`
+
+### Prompt Templates
+
+We want to have re-usable "prompts", so we provide you with a system to retrieve pre-defined prompts with placeholders (eg, `{{name}}`) that you can replace with your inputs at the time of making the request.
+
+"AI Templates" as we call them (`AITemplate`) are usually a vector of `SystemMessage` and a `UserMessage` with specific purpose/task.
+
+For example, the template `:AssistantAsk` is defined loosely as:
+
+```julia
+ template = [SystemMessage("You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer."),
+             UserMessage("# Question\n\n{{ask}}")]
+```
+
+Notice that we have a placeholder `ask` (`{{ask}}`) that you can replace with your question without having to re-write the generic system instructions.
+
+When you provide a Symbol (eg, `:AssistantAsk`) to ai* functions, thanks to the multiple dispatch, it recognizes that it's an `AITemplate(:AssistantAsk)` and looks it up.
+
+You can discover all available templates with `aitemplates("some keyword")` or just see the details of some template `aitemplates(:AssistantAsk)`.
+
+Note: There is a new way to create and register templates in one go with `create_template(;user=<user prompt>, system=<system prompt>, load_as=<template name>)` (it skips the serialization step where a template previously must have been saved somewhere on the disk). See FAQ for more details or directly `?create_template`.
+
+### ai* Functions Overview
+
+The above steps are implemented in the `ai*` functions, eg, `aigenerate`, `aiembed`, `aiextract`, etc. They all have the same basic structure: 
+
+`ai*(<optional schema>,<prompt or conversation>; <optional keyword arguments>)`, 
+
+but they differ in purpose:
+
+- `aigenerate` is the general-purpose function to generate any text response with LLMs, ie, it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`)
+- `aiembed` is designed to extract embeddings from the AI model's response, ie, it returns `DataMessage` with field `:content` containing the embeddings (eg, `ans.content isa AbstractArray`)
+- `aiextract` is designed to extract structured data from the AI model's response and return them as a Julia struct (eg, if we provide `return_type=Food`, we get `ans.content isa Food`). You need to define the return type first and then provide it as a keyword argument.
+- `aitools` is designed for agentic workflows with a mix of tool calls and user inputs. It can work with simple functions and execute them.
+- `aiclassify` is designed to classify the input text into (or simply respond within) a set of discrete `choices` provided by the user. It can be very useful as an LLM Judge or a router for RAG systems, as it uses the "logit bias trick" and generates exactly 1 token. It returns `AIMessage` with field `:content`, but the `:content` can be only one of the provided `choices` (eg, `ans.content in choices`)
+- `aiscan` is for working with images and vision-enabled models (as an input), but it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`) similar to `aigenerate`.
+- `aiimage` is for generating images (eg, with OpenAI DALL-E 3). It returns a `DataMessage`, where the field `:content` might contain either the URL to download the image from or the Base64-encoded image depending on the user-provided kwarg `api_kwargs.response_format`.
+- `aitemplates` is a helper function to discover available templates and see their details (eg, `aitemplates("some keyword")` or `aitemplates(:AssistantAsk)`)
+
+If you're using a known `model`, you do NOT need to provide a `schema` (the first argument).
+
+Optional keyword arguments in `ai*` tend to be:
+
+- `model::String` - Which model you want to use
+- `verbose::Bool` - Whether you went to see INFO logs around AI costs
+- `return_all::Bool` - Whether you want the WHOLE conversation or just the AI answer (ie, whether you want to include your inputs/prompt in the output)
+- `api_kwargs::NamedTuple` - Specific parameters for the model, eg, `temperature=0.0` to be NOT creative (and have more similar output in each run)
+- `http_kwargs::NamedTuple` - Parameters for the HTTP.jl package, eg, `readtimeout = 120` to time out in 120 seconds if no response was received.
+
+In addition to the above list of `ai*` functions, you can also use the **"lazy" counterparts** of these functions from the experimental AgentTools module.
+```julia
+using PromptingTools.Experimental.AgentTools
+```
+
+For example, `AIGenerate()` will create a lazy instance of `aigenerate`. It is an instance of `AICall` with `aigenerate` as its ai function.
+It uses exactly the same arguments and keyword arguments as `aigenerate` (see `?aigenerate` for details).
+
+"lazy" refers to the fact that it does NOT generate any output when instantiated (only when `run!` is called). 
+
+Or said differently, the `AICall` struct and all its flavors (`AIGenerate`, ...) are designed to facilitate a deferred execution model (lazy evaluation) for AI functions that interact with a Language Learning Model (LLM). It stores the necessary information for an AI call and executes the underlying AI function only when supplied with a `UserMessage` or when the `run!` method is applied. 
+
+This approach allows us to remember user inputs and trigger the LLM call repeatedly if needed, which enables automatic fixing (see `?airetry!`).
+
+Example:
+```julia
+result = AIGenerate(:JuliaExpertAsk; ask="xyz", model="abc", api_kwargs=(; temperature=0.1))
+result |> run!
+
+# Is equivalent to
+result = aigenerate(:JuliaExpertAsk; ask="xyz", model="abc", api_kwargs=(; temperature=0.1), return_all=true)
+# The only difference is that we default to `return_all=true` with lazy types because we have a dedicated `conversation` field, which makes it much easier
+```
+
+Lazy AI calls and self-healing mechanisms unlock much more robust and useful LLM workflows!
+
+## Walkthrough Example for `aigenerate`
+
+```julia
+using PromptingTools
+const PT = PromptingTools
+
+# Let's say this is our ask
+msg = aigenerate(:AssistantAsk; ask="What is the capital of France?")
+
+# it is effectively the same as:
+msg = aigenerate(PT.OpenAISchema(), PT.AITemplate(:AssistantAsk); ask="What is the capital of France?", model="gpt3t")
+```
+
+There is no `model` provided, so we use the default `PT.MODEL_CHAT` (effectively GPT3.5-Turbo). Then we look it up in `PT.MDOEL_REGISTRY` and use the associated schema for it (`OpenAISchema` in this case).
+
+The next step is to render the template, replace the placeholders and render it for the OpenAI model.
+
+```julia
+# Let's remember out schema
+schema = PT.OpenAISchema()
+ask = "What is the capital of France?"
+```
+
+First, we obtain the template (no placeholder replacement yet) and "expand it"
+```julia
+template_rendered = PT.render(schema, AITemplate(:AssistantAsk); ask)
+```
+
+```plaintext
+2-element Vector{PromptingTools.AbstractChatMessage}:
+  PromptingTools.SystemMessage("You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.")
+  PromptingTools.UserMessage{String}("# Question\n\n{{ask}}", [:ask], :usermessage)
+```
+
+Second, we replace the placeholders
+```julia
+rendered_for_api = PT.render(schema, template_rendered;  ask)
+```
+  
+```plaintext
+2-element Vector{Dict{String, Any}}:
+  Dict("role" => "system", "content" => "You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.")
+  Dict("role" => "user", "content" => "# Question\n\nWhat is the capital of France?")
+```
+
+Notice that the placeholders are only replaced in the second step. The final output here is a vector of messages with "role" and "content" keys, which is the format required by the OpenAI API.
+
+As a side note, under the hood, the second step is done in two sub-steps:
+
+- replace the placeholders `messages_rendered = PT.render(PT.NoSchema(), template_rendered; ask)` -> returns a vector of Messages!
+- then, we convert the messages to the format required by the provider/schema `PT.render(schema, messages_rendered)` -> returns the OpenAI formatted messages
+
+Next, we send the above `rendered_for_api` to the OpenAI API and get the response back.
+
+```julia
+using OpenAI
+OpenAI.create_chat(api_key, model, rendered_for_api)
+```
+
+The last step is to take the JSON response from the API and convert it to the `AIMessage` object.
+
+```julia
+# simplification for educational purposes
+msg = AIMessage(; content = r.response[:choices][1][:message][:content])
+```
+
+In practice, there are more fields we extract, so we define a utility for it: `PT.response_to_message`. Especially, since with parameter `n`, you can request multiple AI responses at once, so we want to re-use our response processing logic.
+
+That's it! I hope you've learned something new about how PromptingTools.jl works under the hood.
+
+## Walkthrough Example for `aiextract`
+
+Whereas `aigenerate` is a general-purpose function to generate any text response with LLMs, `aiextract` is designed to extract structured data from the AI model's response and return them as a Julia struct.
+
+It's a bit more complicated than `aigenerate` because it needs to handle the JSON schema of the return type (= our struct).
+
+Let's define a toy example of a struct and see how `aiextract` works under the hood.
+```julia
+using PromptingTools
+const PT = PromptingTools
+
+"""
+Extract the name of the food from the sentence. Extract any provided adjectives for the food as well.
+
+Example: "I am eating a crunchy bread." -> Food("bread", ["crunchy"])
+"""
+struct Food
+    name::String # required field!
+    adjectives::Union{Nothing,Vector{String}} # not required because `Nothing` is allowed
+end
+
+msg = aiextract("I just ate a delicious and juicy apple."; return_type=Food)
+msg.content
+# Food("apple", ["delicious", "juicy"])
+```
+
+You can see that we sent a prompt to the AI model and it returned a `Food` object. 
+We provided some light guidance as a docstring of the return type, but the AI model did the heavy lifting.
+
+`aiextract` leverages native "function calling" (supported by OpenAI, Fireworks, Together, and many others). 
+
+We encode the user-provided `return_type` into the corresponding JSON schema and create the payload as per the specifications of the provider.
+
+Let's how that's done:
+```julia
+sig = PT.function_call_signature(Food)
+## Dict{String, Any} with 3 entries:
+##   "name"        => "Food_extractor"
+##   "parameters"  => Dict{String, Any}("properties"=>Dict{String, Any}("name"=>Dict("type"=>"string"), "adjectives"=>Dict{String, …
+##   "description" => "Extract the food from the sentence. Extract any provided adjectives for the food as well.\n\nExample: "
+```
+You can see that we capture the field names and types in `parameters` and the description in `description` key.
+
+Furthermore, if we zoom in on the "parameter" field, you can see that we encode not only the names and types but also whether the fields are required (ie, do they allow `Nothing`)
+You can see below that the field `adjectives` accepts `Nothing`, so it's not required. Only the `name` field is required.
+```julia
+sig["parameters"]
+## Dict{String, Any} with 3 entries:
+##   "properties" => Dict{String, Any}("name"=>Dict("type"=>"string"), "adjectives"=>Dict{String, Any}("items"=>Dict("type"=>"strin…
+##   "required"   => ["name"]
+##   "type"       => "object"
+```
+
+For `aiextract`, the signature is provided to the API provider via `tools` parameter, eg, 
+
+`api_kwargs = (; tools = [Dict(:type => "function", :function => sig)])`
+
+Optionally, we can provide also `tool_choice` parameter to specify which tool to use if we provided multiple (differs across providers).
+
+When the message is returned, we extract the JSON object in the response and decode it into Julia object via `JSON3.read(obj, Food)`. For example,
+```julia
+model_response = Dict(:tool_calls => [Dict(:function => Dict(:arguments => JSON3.write(Dict("name" => "apple", "adjectives" => ["delicious", "juicy"]))))])
+food = JSON3.read(model_response[:tool_calls][1][:function][:arguments], Food)
+# Output: Food("apple", ["delicious", "juicy"])
+```
+
+This is why you can sometimes have errors when you use abstract types in your `return_type` -> to enable that, you would need to set the right `StructTypes` behavior for your abstract type (see the JSON3.jl documentation for more details on how to do that).
+ 
+It works quite well for concrete types and "vanilla" structs, though.
+
+Unfortunately, function calling is generally NOT supported by locally-hosted / open-source models, 
+so let's try to build a workaround with `aigenerate`
+
+You need to pick a bigger / more powerful model, as it's NOT an easy task to output a correct JSON specification.
+My laptop isn't too powerful and I don't like waiting, so I'm going to use Mixtral model hosted on Together.ai (you get \$25 credit when you join)!
+
+```julia
+model = "tmixtral" # tmixtral is an alias for "mistralai/Mixtral-8x7B-Instruct-v0.1" on Together.ai and it automatically sets `schema = TogetherOpenAISchema()`
+```
+
+We'll add the signature to the prompt and we'll request the JSON output in two places - in the prompt and in the `api_kwargs` (to ensure that the model outputs the JSON via "grammar")
+NOTE: You can write much better and more specific prompt if you have a specific task / return type in mind + you should make sure that the prompt + struct description make sense together!
+
+Let's define a prompt and `return_type`. Notice that we add several placeholders (eg, `{{description}}`) to fill with user inputs later.
+```julia
+prompt = """
+You're a world-class data extraction engine. 
+
+Your task is to extract information formatted as per the user provided schema.
+You MUST response in JSON format.
+
+**Example:**
+---------
+Description: "Extract the Car from the sentence. Extract the corresponding brand and model as well."
+Input: "I drive a black Porsche 911 Turbo."
+Schema: "{\"properties\":{\"model\":{\"type\":\"string\"},\"brand\":{\"type\":\"string\"}},\"required\":[\"brand\",\"model\"],\"type\":\"object\"}"
+Output: "{\"model\":\"911 Turbo\",\"brand\":\"Porsche\"}"
+---------
+
+**User Request:**
+Description: {{description}}
+Input: {{input}}
+Schema: {{signature}}
+Output:
+
+You MUST OUTPUT in JSON format.
+"""
+```
+
+We need to extract the "signature of our `return_type` and put it in the right placeholders.
+Let's generate now!
+```julia
+sig = PT.function_call_signature(Food)
+result = aigenerate(prompt; input="I just ate a delicious and juicy apple.",
+    schema=JSON3.write(sig["parameters"]), description=sig["description"],
+    ## We provide the JSON output requirement as per API docs: https://docs.together.ai/docs/json-mode
+    model, api_kwargs=(; response_format=Dict("type" => "json_object"), temperature=0.2), return_all=true)
+result[end].content
+## "{\n  \"adjectives\": [\"delicious\", \"juicy\"],\n  \"food\": \"apple\"\n}"
+```
+
+We're using a smaller model, so the output is not perfect.
+Let's try to load into our object:
+```julia
+obj = JSON3.read(result[end].content, Food)
+# Output: ERROR: MethodError: Cannot `convert` an object of type Nothing to an object of type String
+```
+
+Unfortunately, we get an error because the model mixed up the key "name" for "food", so it cannot be parsed.
+
+Fortunately, we can do better and use automatic fixing! 
+All we need to do is to change from `aigenerate` -> `AIGenerate` (and use `airetry!`)
+
+The signature of `AIGenerate` is identical to `aigenerate` with the exception of `config` field, where we can influence the future `retry` behaviour.
+```julia
+result = AIGenerate(prompt; input="I just ate a delicious and juicy apple.",
+    schema=JSON3.write(sig["parameters"]), description=sig["description"],
+    ## We provide the JSON output requirement as per API docs: https://docs.together.ai/docs/json-mode
+    model, api_kwargs=(; response_format=Dict("type" => "json_object"), temperature=0.2),
+    ## limit the number of retries, default is 10 rounds
+    config=RetryConfig(; max_retries=3))
+run!(result) # run! triggers the generation step (to have some AI output to check)
+```
+
+Let's set up a retry mechanism with some practical feedback. We'll leverage `airetry!` to automatically retry the request and provide feedback to the model.
+Think of `airetry!` as `@assert` on steroids:
+
+`@assert CONDITION MESSAGE` → `airetry! CONDITION <state> MESSAGE`
+
+The main benefits of `airetry!` are:
+- It can retry automatically, not just throw an error
+- It manages the "conversation’ (list of messages) for you, including adding user-provided feedback to help generate better output
+
+```julia
+feedback = "The output is not in the correct format. The keys should be $(join([string("\"$f\"") for f in fieldnames(Food)],", "))."
+# We use do-syntax with provide the `CONDITION` (it must return Bool)
+airetry!(result, feedback) do conv
+    ## try to convert
+    obj = try
+        JSON3.read(last_output(conv), Food)
+    catch e
+        ## you could save the error and provide as feedback (eg, into a slot in the `:memory` field of the AICall object)
+        e
+    end
+    ## Check if the conversion was successful; if it's `false`, it will retry
+    obj isa Food # -> Bool
+end
+food = JSON3.read(last_output(result), Food)
+## [ Info: Condition not met. Retrying...
+## Output: Food("apple", ["delicious", "juicy"])
+```
+
+It took 1 retry (see `result.config.retries`) and we have the correct output from an open-source model!
+
+If you're interested in the `result` object, it's a struct (`AICall`) with a field `conversation`, which holds the conversation up to this point.
+AIGenerate is an alias for AICall using `aigenerate` function. See `?AICall` (the underlying struct type) for more details on the fields and methods available.
\ No newline at end of file
diff --git a/docs/src/index.md b/docs/src/index.md
index 136adb3eb..fd7febb61 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,27 +1,78 @@
-```@meta
-CurrentModule = PromptingTools
+```@raw html
+---
+layout: home
+
+hero:
+  name: PromptingTools.jl
+  tagline: Streamline Your Interactions with GenAI Models
+  description: Discover the power of GenerativeAI and build mini workflows to save you 20 minutes every day.
+  image:
+    src: https://img.icons8.com/dusk/64/swiss-army-knife--v1.png
+    alt: Swiss Army Knife
+  actions:
+    - theme: brand
+      text: Get Started
+      link: /getting_started
+    - theme: alt
+      text: How It Works
+      link: /how_it_works
+    - theme: alt
+      text: F.A.Q.
+      link: /frequently_asked_questions
+    - theme: alt
+      text: View on GitHub
+      link: https://github.com/svilupp/PromptingTools.jl
+
+features:
+  - icon: <img width="64" height="64" src="https://img.icons8.com/clouds/100/000000/brain.png" alt="Simplify"/>
+    title: Simplify Prompt Engineering
+    details: 'Leverage prompt templates with placeholders to make complex prompts easy.'
+  - icon: <img width="60" height="60" src="https://img.icons8.com/papercut/60/connected.png" alt="Integration"/>
+    title: Effortless Integration
+    details: 'Fire quick questions with @ai_str macro and light wrapper types. Minimal dependencies for seamless integration.'
+  - icon: <img width="64" height="64" src="https://img.icons8.com/dusk/64/search--v1.png" alt="Discoverability"/>
+    title: Designed for Discoverability
+    details: 'Efficient access to cutting-edge models with intuitive ai* functions. Stay in the flow with minimal context switching.'
+
+---
 ```
 
-# PromptingTools
 
-Documentation for [PromptingTools](https://github.com/svilupp/PromptingTools.jl).
 
-Streamline your life using PromptingTools.jl, the Julia package that simplifies interacting with large language models.
+<p style="margin-bottom:2cm"></p>
 
-PromptingTools.jl is not meant for building large-scale systems. It's meant to be the go-to tool in your global environment that will save you 20 minutes every day!
+<div class="vp-doc" style="width:80%; margin:auto">
 
-## Why PromptingTools.jl?
+<h1> Why PromptingTools.jl? </h1>
 
 Prompt engineering is neither fast nor easy. Moreover, different models and their fine-tunes might require different prompt formats and tricks, or perhaps the information you work with requires special models to be used. PromptingTools.jl is meant to unify the prompts for different backends and make the common tasks (like templated prompts) as simple as possible. 
 
-Some features:
-- **`aigenerate` Function**: Simplify prompt templates with handlebars (eg, `{{variable}}`) and keyword arguments
-- **`@ai_str` String Macro**: Save keystrokes with a string macro for simple prompts
-- **Easy to Remember**: All exported functions start with `ai...` for better discoverability
-- **Light Wrapper Types**: Benefit from Julia's multiple dispatch by having AI outputs wrapped in specific types
-- **Minimal Dependencies**: Enjoy an easy addition to your global environment with very light dependencies
-- **No Context Switching**: Access cutting-edge LLMs with no context switching and minimum extra keystrokes directly in your REPL
+<h2> Getting Started </h2>
+
+Add PromptingTools, set OpenAI API key and generate your first answer:
+
+```julia
+using Pkg
+Pkg.add("PromptingTools")
+# Requires OPENAI_API_KEY environment variable!
+
+ai"What is the meaning of life?"
+```
+
+For more information, see the [Getting Started](@ref) section.
+
+<br>
+Ready to simplify your GenerativeAI tasks? Dive into PromptingTools.jl now and unlock your productivity.
+
+<h2> Building a More Advanced Workflow? </h2>
+
+PromptingTools offers many advanced features:
+- Easy prompt templating and automatic serialization and tracing of your AI conversations for great observability
+- Ability to export into a ShareGPT-compatible format for easy fine-tuning
+- Code evaluation and automatic error localization for better LLM debugging
+- RAGTools module: from simple to advanced RAG implementations (hybrid index, rephrasing, reranking, etc.)
+- AgentTools module: lazy ai* calls with states, automatic code feedback, Monte-Carlo tree search-based auto-fixing of your workflows (ie, not just retrying in a loop)
 
-## First Steps
+and more!
 
-To get started, see the [Getting Started](@ref) section.
+</div>
diff --git a/docs/src/prompts/RAG.md b/docs/src/prompts/RAG.md
new file mode 100644
index 000000000..d4555a4f3
--- /dev/null
+++ b/docs/src/prompts/RAG.md
@@ -0,0 +1,437 @@
+The following file is auto-generated from the `templates` folder. For any changes, please modify the source files in the `templates` folder.
+
+To use these templates in `aigenerate`, simply provide the template name as a symbol, eg, `aigenerate(:MyTemplate; placeholder1 = value1)`
+
+## Basic-Rag Templates
+
+### Template: RAGAnswerFromContext
+
+- Description: For RAG applications. Answers the provided Questions based on the Context. Placeholders: `question`, `context`
+- Placeholders: `context`, `question`
+- Word count: 375
+- Source: 
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+Act as a world-class AI assistant with access to the latest knowledge via Context Information. 
+
+**Instructions:**
+- Answer the question based only on the provided Context.
+- If you don't know the answer, just say that you don't know, don't try to make up an answer.
+- Be brief and concise.
+
+**Context Information:**
+---
+{{context}}
+---
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Question
+
+{{question}}
+
+
+
+# Answer
+
+
+`````
+
+
+## Metadata Templates
+
+### Template: RAGExtractMetadataLong
+
+- Description: For RAG applications. Extracts metadata from the provided text using longer instructions set and examples. If you don't have any special instructions, provide `instructions="None."`. Placeholders: `text`, `instructions`
+- Placeholders: `text`, `instructions`
+- Word count: 1384
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+You're a world-class data extraction engine built by OpenAI together with Google and to extract filter metadata to power the most advanced search engine in the world. 
+    
+    **Instructions for Extraction:**
+    1. Carefully read through the provided Text
+    2. Identify and extract:
+       - All relevant entities such as names, places, dates, etc.
+       - Any special items like technical terms, unique identifiers, etc.
+       - In the case of Julia code or Julia documentation: specifically extract package names, struct names, function names, and important variable names (eg, uppercased variables)
+    3. Keep extracted values and categories short. Maximum 2-3 words!
+    4. You can only extract 3-5 items per Text, so select the most important ones.
+    5. Assign a search filter Category to each extracted Value
+    
+    **Example 1:**
+    - Document Chunk: "Dr. Jane Smith published her findings on neuroplasticity in 2021. The research heavily utilized the DataFrames.jl and Plots.jl packages."
+    - Extracted keywords:
+      - Name: Dr. Jane Smith
+      - Date: 2021
+      - Technical Term: neuroplasticity
+      - JuliaPackage: DataFrames.jl, Plots.jl
+      - JuliaLanguage:
+      - Identifier:
+      - Other: 
+
+    If the user provides special instructions, prioritize these over the general instructions.
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Text
+
+{{text}}
+
+
+
+# Special Instructions
+
+{{instructions}}
+`````
+
+
+### Template: RAGExtractMetadataShort
+
+- Description: For RAG applications. Extracts metadata from the provided text. If you don't have any special instructions, provide `instructions="None."`. Placeholders: `text`, `instructions`
+- Placeholders: `text`, `instructions`
+- Word count: 278
+- Source: 
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+Extract search keywords and their categories from the Text provided below (format "value:category"). Each keyword must be at most 2-3 words. Provide at most 3-5 keywords. I will tip you $50 if the search is successful.
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Text
+
+{{text}}
+
+
+
+# Special Instructions
+
+{{instructions}}
+`````
+
+
+## Refinement Templates
+
+### Template: RAGAnswerRefiner
+
+- Description: For RAG applications (refine step), gives model the ability to refine its answer based on some additional context etc.. The hope is that it better answers the original query. Placeholders: `query`, `answer`, `context`
+- Placeholders: `query`, `answer`, `context`
+- Word count: 968
+- Source: Adapted from [LlamaIndex](https://github.com/run-llama/llama_index/blob/78af3400ad485e15862c06f0c4972dc3067f880c/llama-index-core/llama_index/core/prompts/default_prompts.py#L81)
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+Act as a world-class AI assistant with access to the latest knowledge via Context Information.
+
+Your task is to refine an existing answer if it's needed.
+
+The original query is as follows: 
+{{query}}
+
+The AI model has provided the following answer:
+{{answer}}
+
+**Instructions:**
+- Given the new context, refine the original answer to better answer the query.
+- If the context isn't useful, return the original answer.
+- If you don't know the answer, just say that you don't know, don't try to make up an answer.
+- Be brief and concise.
+- Provide the refined answer only and nothing else.
+
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+We have the opportunity to refine the previous answer (only if needed) with some more context below.
+
+**Context Information:**
+-----------------
+{{context}}
+-----------------
+
+Given the new context, refine the original answer to better answer the query.
+If the context isn't useful, return the original answer. 
+Provide the refined answer only and nothing else.
+
+Refined Answer: 
+`````
+
+
+## Evaluation Templates
+
+### Template: RAGCreateQAFromContext
+
+- Description: For RAG applications. Generate Question and Answer from the provided Context. If you don't have any special instructions, provide `instructions="None."`. Placeholders: `context`, `instructions`
+- Placeholders: `context`, `instructions`
+- Word count: 1396
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+You are a world-class teacher preparing contextual Question & Answer sets for evaluating AI systems.
+
+**Instructions for Question Generation:**
+1. Analyze the provided Context chunk thoroughly.
+2. Formulate a question that:
+   - Is specific and directly related to the information in the context chunk.
+   - Is not too short or generic; it should require a detailed understanding of the context to answer.
+   - Can only be answered using the information from the provided context, without needing external information.
+
+**Instructions for Reference Answer Creation:**
+1. Based on the generated question, compose a reference answer that:
+   - Directly and comprehensively answers the question.
+   - Stays strictly within the bounds of the provided context chunk.
+   - Is clear, concise, and to the point, avoiding unnecessary elaboration or repetition.
+
+**Example 1:**
+- Context Chunk: "In 1928, Alexander Fleming discovered penicillin, which marked the beginning of modern antibiotics."
+- Generated Question: "What was the significant discovery made by Alexander Fleming in 1928 and its impact?"
+- Reference Answer: "Alexander Fleming discovered penicillin in 1928, which led to the development of modern antibiotics."
+
+If the user provides special instructions, prioritize these over the general instructions.
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Context Information
+---
+{{context}}
+---
+
+
+# Special Instructions
+
+{{instructions}}
+
+`````
+
+
+### Template: RAGJudgeAnswerFromContext
+
+- Description: For RAG applications. Judge an answer to a question on a scale from 1-5. Placeholders: `question`, `context`, `answer`
+- Placeholders: `question`, `context`, `answer`
+- Word count: 1415
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+You're an impartial judge. Your task is to evaluate the quality of the Answer provided by an AI assistant in response to the User Question on a scale from 1 to 5.
+
+1. **Scoring Criteria:**
+- **Relevance (1-5):** How well does the provided answer align with the context? 
+  - *1: Not relevant, 5: Highly relevant*
+- **Completeness (1-5):** Does the provided answer cover all the essential points mentioned in the context?
+  - *1: Very incomplete, 5: Very complete*
+- **Clarity (1-5):** How clear and understandable is the provided answer?
+  - *1: Not clear at all, 5: Extremely clear*
+- **Consistency (1-5):** How consistent is the provided answer with the overall context?
+  - *1: Highly inconsistent, 5: Perfectly consistent*
+- **Helpfulness (1-5):** How helpful is the provided answer in answering the user's question?
+  - *1: Not helpful at all, 5: Extremely helpful*
+
+2. **Judging Instructions:**
+- As an impartial judge, please evaluate the provided answer based on the above criteria. 
+- Assign a score from 1 to 5 for each criterion, considering the original context, question and the provided answer.
+- The Final Score is an average of these individual scores, representing the overall quality and relevance of the provided answer. It must be between 1-5.
+
+```
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+# User Question
+---
+{{question}}
+---
+
+
+# Context Information
+---
+{{context}}
+---
+
+
+# Assistant's Answer
+---
+{{answer}}
+---
+
+
+# Judge's Evaluation
+
+`````
+
+
+### Template: RAGJudgeAnswerFromContextShort
+
+- Description: For RAG applications. Simple and short prompt to judge answer to a question on a scale from 1-5. Placeholders: `question`, `context`, `answer`
+- Placeholders: `question`, `context`, `answer`
+- Word count: 420
+- Source: 
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+You re an impartial judge. 
+Read carefully the provided question and the answer based on the context. 
+Provide a rating on a scale 1-5 (1=worst quality, 5=best quality) that reflects how relevant, helpful, clear, and consistent with the provided context the answer was.
+```
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+# User Question
+---
+{{question}}
+---
+
+
+# Context Information
+---
+{{context}}
+---
+
+
+# Assistant's Answer
+---
+{{answer}}
+---
+
+
+# Judge's Evaluation
+
+`````
+
+
+## Query-Transformations Templates
+
+### Template: RAGJuliaQueryHyDE
+
+- Description: For Julia-specific RAG applications (rephrase step), inspired by the HyDE approach where it generates a hypothetical passage that answers the provided user query to improve the matched results. This explicitly requires and optimizes for Julia-specific questions. Placeholders: `query`
+- Placeholders: `query`
+- Word count: 390
+- Source: 
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+You're an world-class AI assistant specialized in Julia language questions.
+
+Your task is to generate a BRIEF and SUCCINCT hypothetical passage from Julia language ecosystem documentation that answers the provided query.
+
+Query: {{query}}
+`````
+
+
+**User Prompt:**
+`````plaintext
+Write a hypothetical snippet with 20-30 words that would be the perfect answer to the query. Try to include as many key details as possible. 
+
+Passage: 
+`````
+
+
+### Template: RAGQueryHyDE
+
+- Description: For RAG applications (rephrase step), inspired by the HyDE paper where it generates a hypothetical passage that answers the provided user query to improve the matched results. Placeholders: `query`
+- Placeholders: `query`
+- Word count: 354
+- Source: Adapted from [LlamaIndex](https://github.com/run-llama/llama_index/blob/78af3400ad485e15862c06f0c4972dc3067f880c/llama-index-core/llama_index/core/prompts/default_prompts.py#L351)
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+You are a world-class search expert specializing in query transformations.
+
+Your task is to write a hypothetical passage that would answer the below question in the most effective way possible.
+
+It must have 20-30 words and be directly aligned with the intended search objective.
+Try to include as many key details as possible.
+`````
+
+
+**User Prompt:**
+`````plaintext
+Query: {{query}}
+
+Passage: 
+`````
+
+
+### Template: RAGQueryOptimizer
+
+- Description: For RAG applications (rephrase step), it rephrases the original query to attract more diverse set of potential search results. Placeholders: `query`
+- Placeholders: `query`
+- Word count: 514
+- Source: Adapted from [LlamaIndex](https://github.com/run-llama/llama_index/blob/78af3400ad485e15862c06f0c4972dc3067f880c/llama-index-packs/llama-index-packs-corrective-rag/llama_index/packs/corrective_rag/base.py#L11)
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+You are a world-class search expert specializing in query rephrasing.
+Your task is to refine the provided query to ensure it is highly effective for retrieving relevant search results.
+Analyze the given input to grasp the core semantic intent or meaning.
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+Original Query: {{query}}
+
+Your goal is to rephrase or enhance this query to improve its search performance. Ensure the revised query is concise and directly aligned with the intended search objective.
+Respond with the optimized query only.
+
+Optimized query: 
+`````
+
+
+### Template: RAGQuerySimplifier
+
+- Description: For RAG applications (rephrase step), it rephrases the original query by stripping unnecessary details to improve the matched results. Placeholders: `query`
+- Placeholders: `query`
+- Word count: 267
+- Source: Adapted from [Langchain](https://python.langchain.com/docs/integrations/retrievers/re_phrase)
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+You are an assistant tasked with taking a natural language query from a user and converting it into a query for a vectorstore. 
+In this process, you strip out information that is not relevant for the retrieval task.
+`````
+
+
+**User Prompt:**
+`````plaintext
+Here is the user query: {{query}}
+
+Rephrased query: 
+`````
+
+
diff --git a/docs/src/prompts/agents.md b/docs/src/prompts/agents.md
new file mode 100644
index 000000000..4b2adf96d
--- /dev/null
+++ b/docs/src/prompts/agents.md
@@ -0,0 +1,164 @@
+The following file is auto-generated from the `templates` folder. For any changes, please modify the source files in the `templates` folder.
+
+To use these templates in `aigenerate`, simply provide the template name as a symbol, eg, `aigenerate(:MyTemplate; placeholder1 = value1)`
+
+## Code-Fixing Templates
+
+### Template: CodeFixerRCI
+
+- Description: This template is meant to be used with `AICodeFixer`. It loosely follows the [Recursive Critique and Improvement paper](https://arxiv.org/pdf/2303.17491.pdf) with two steps Critique and Improve based on `feedback`. Placeholders: `feedback`
+- Placeholders: `feedback`
+- Word count: 2487
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+Ignore all previous instructions. 
+Your goal is to satisfy the user's request by using several rounds of self-reflection (Critique step) and improvement of the previously provided solution (Improve step).
+Always enclose the Julia code in triple backticks code fence (```julia\n ... \n```).
+
+1. **Recall Past Critique:**
+- Summarize past critiques to refresh your memory (use inline quotes to highlight the few characters of the code that caused the mistakes). It must not be repeated.
+
+2. **Critique Step Instructions:** 
+- Read the user request word-by-word. Does the code implementation follow the request to the letter? Let's think step by step.
+- Review the provided feedback in detail.
+- Provide 2-3 bullet points of criticism for the code. Each bullet point must refer to a different type of error or issue.
+    - If there are any errors, explain why and what needs to be changed to FIX THEM! Be specific. 
+    - If an error repeats or critique repeats, the previous issue was not addressed. YOU MUST SUGGEST A DIFFERENT IMPROVEMENT THAN BEFORE.
+    - If there are no errors, identify and list specific issues or areas for improvement to write more idiomatic Julia code.
+
+
+3. **Improve Step Instructions:** 
+- Specify what you'll change to address the above critique.
+- Provide the revised code reflecting your suggested improvements. Always repeat the function definition, as only the Julia code in the last message will be evaluated.
+- Ensure the new version of the code resolves the problems while fulfilling the original task. Ensure it has the same function name.
+- Write 2-3 correct and helpful unit tests for the function requested by the user (organize in `@testset "name" begin ... end` block, use `@test` macro).
+
+
+3. **Response Format:**
+---
+### Past Critique
+<brief bullet points on past critique>
+
+### Critique
+<list of issues as bullet points pinpointing the mistakes in the code (use inline quotes)>
+
+### Improve
+<list of improvements as bullet points with a clear outline of a solution (use inline quotes)>
+
+```julia
+<provide improved code>
+```
+---
+
+Be concise and focused in all steps.
+
+### Feedback from the User
+
+{{feedback}}
+
+I believe in you. You can actually do it, so do it ffs. Avoid shortcuts or placing comments instead of code. I also need code, actual working Julia code.
+What are your Critique and Improve steps?
+  ### Feedback from the User
+
+{{feedback}}
+
+Based on your past critique and the latest feedback, what are your Critique and Improve steps?
+
+`````
+
+
+### Template: CodeFixerShort
+
+- Description: This template is meant to be used with `AICodeFixer` to ask for code improvements based on `feedback`. It uses the same message for both the introduction of the new task and for the iterations. Placeholders: `feedback`
+- Placeholders: `feedback`
+- Word count: 786
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+
+The above Julia code has been executed with the following results:
+
+```plaintext
+{{feedback}}
+```
+
+0. Read the user request word-by-word. Does the code implementation follow the request to the letter? Let's think step by step.
+1. Review the execution results in detail and, if there is an error, explain why it happened.
+2. Suggest improvements to the code. Be EXTREMELY SPECIFIC. Think step-by-step and break it down.
+3. Write an improved implementation based on your reflection.
+
+All code must be enclosed in triple backticks code fence (```julia\n ... \n```) and included in one message to be re-evaluated.
+
+I believe in you. Take a deep breath. You can actually do it, so do it ffs. Avoid shortcuts or placing comments instead of code. I also need code, actual working Julia code.
+
+`````
+
+
+### Template: CodeFixerTiny
+
+- Description: This tiniest template to use with `AICodeFixer`. Iteratively asks to improve the code based on provided `feedback`. Placeholders: `feedback`
+- Placeholders: `feedback`
+- Word count: 210
+- Source: 
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+### Execution Results
+
+```plaintext
+{{feedback}}
+```
+
+Take a deep break. Think step-by-step and fix the above errors. I believe in you. You can do it! I also need code, actual working Julia code, no shortcuts.
+
+`````
+
+
+## Feedback Templates
+
+### Template: FeedbackFromEvaluator
+
+- Description: Simple user message with "Feedback from Evaluator". Placeholders: `feedback`
+- Placeholders: `feedback`
+- Word count: 41
+- Source: 
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+### Feedback from Evaluator
+{{feedback}}
+
+`````
+
+
diff --git a/docs/src/prompts/classification.md b/docs/src/prompts/classification.md
new file mode 100644
index 000000000..9d9d91815
--- /dev/null
+++ b/docs/src/prompts/classification.md
@@ -0,0 +1,63 @@
+The following file is auto-generated from the `templates` folder. For any changes, please modify the source files in the `templates` folder.
+
+To use these templates in `aigenerate`, simply provide the template name as a symbol, eg, `aigenerate(:MyTemplate; placeholder1 = value1)`
+
+## Classification Templates
+
+### Template: InputClassifier
+
+- Description: For classification tasks and routing of queries with aiclassify. It expects a list of choices to be provided (starting with their IDs) and will pick one that best describes the user input. Placeholders: `input`, `choices`
+- Placeholders: `choices`, `input`
+- Word count: 366
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+You are a world-class classification specialist. 
+
+Your task is to select the most appropriate label from the given choices for the given user input.
+
+**Available Choices:**
+---
+{{choices}}
+---
+
+**Instructions:**
+- You must respond in one word. 
+- You must respond only with the label ID (e.g., "1", "2", ...) that best fits the input.
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+User Input: {{input}}
+
+Label:
+
+`````
+
+
+### Template: JudgeIsItTrue
+
+- Description: LLM-based classification whether the provided statement is true/false/unknown. Statement is provided via `it` placeholder.
+- Placeholders: `it`
+- Word count: 151
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+You are an impartial AI judge evaluating whether the provided statement is "true" or "false". Answer "unknown" if you cannot decide.
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Statement
+
+{{it}}
+`````
+
+
diff --git a/docs/src/prompts/extraction.md b/docs/src/prompts/extraction.md
new file mode 100644
index 000000000..77deed1ef
--- /dev/null
+++ b/docs/src/prompts/extraction.md
@@ -0,0 +1,28 @@
+The following file is auto-generated from the `templates` folder. For any changes, please modify the source files in the `templates` folder.
+
+To use these templates in `aigenerate`, simply provide the template name as a symbol, eg, `aigenerate(:MyTemplate; placeholder1 = value1)`
+
+## Extraction Templates
+
+### Template: ExtractData
+
+- Description: Template suitable for data extraction via `aiextract` calls. Placeholder: `data`.
+- Placeholders: `data`
+- Word count: 500
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+You are a world-class expert for function-calling and data extraction. Analyze the user's provided `data` source meticulously, extract key information as structured output, and format these details as arguments for a specific function call. Ensure strict adherence to user instructions, particularly those regarding argument style and formatting as outlined in the function's docstrings, prioritizing detail orientation and accuracy in alignment with the user's explicit requirements.
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Data
+
+{{data}}
+`````
+
+
diff --git a/docs/src/prompts/general.md b/docs/src/prompts/general.md
new file mode 100644
index 000000000..d91007e15
--- /dev/null
+++ b/docs/src/prompts/general.md
@@ -0,0 +1,48 @@
+The following file is auto-generated from the `templates` folder. For any changes, please modify the source files in the `templates` folder.
+
+To use these templates in `aigenerate`, simply provide the template name as a symbol, eg, `aigenerate(:MyTemplate; placeholder1 = value1)`
+
+## General Templates
+
+### Template: BlankSystemUser
+
+- Description: Blank template for easy prompt entry without the `*Message` objects. Simply provide keyword arguments for `system` (=system prompt/persona) and `user` (=user/task/data prompt). Placeholders: `system`, `user`
+- Placeholders: `system`, `user`
+- Word count: 18
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+{{system}}
+`````
+
+
+**User Prompt:**
+`````plaintext
+{{user}}
+`````
+
+
+### Template: PromptEngineerForTask
+
+- Description: Prompt engineer that suggests what could be a good system prompt/user prompt for a given `task`. Placeholder: `task`
+- Placeholders: `task`
+- Word count: 402
+- Source: 
+- Version: 1
+
+**System Prompt:**
+`````plaintext
+You are a world-class prompt engineering assistant. Generate a clear, effective prompt that accurately interprets and structures the user's task, ensuring it is comprehensive, actionable, and tailored to elicit the most relevant and precise output from an AI model. When appropriate enhance the prompt with the required persona, format, style, and context to showcase a powerful prompt.
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Task
+
+{{task}}
+`````
+
+
diff --git a/docs/src/prompts/persona-task.md b/docs/src/prompts/persona-task.md
new file mode 100644
index 000000000..4a3fa59da
--- /dev/null
+++ b/docs/src/prompts/persona-task.md
@@ -0,0 +1,558 @@
+The following file is auto-generated from the `templates` folder. For any changes, please modify the source files in the `templates` folder.
+
+To use these templates in `aigenerate`, simply provide the template name as a symbol, eg, `aigenerate(:MyTemplate; placeholder1 = value1)`
+
+## Persona-Task Templates
+
+### Template: AnalystChaptersInTranscript
+
+- Description: Template for summarizing transcripts of videos and meetings into chapters with key insights. If you don't need the instructions, set `instructions="None."`. Placeholders: `transcript`, `instructions`
+- Placeholders: `transcript`, `instructions`
+- Word count: 2049
+- Source: Customized version of [jxnl's Youtube Chapters prompt](https://github.com/jxnl/youtubechapters-backend/blob/main/summary_app/md_summarize.py)
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+Act as a super-human AI analyst trained to precisely summarize transcripts of videos and meetings with incredible precision and quality. 
+Summarize the transcript in a clear and concise manner that makes use of timestamps, when available, to help others study the transcript. Split the notes into Chapters, which should be meaningful and not too short.
+
+To format your markdown file, follow this structure:
+```
+# Chapter 1: [Descriptive Title] [Timestamp as HH:MM:SS]
+
+- <Use bullet points to provide a brief description of key points and insights.>
+
+## Section 1.1: [Descriptive Title] [Timestamp as HH:MM:SS]
+<this is a subheading for Chapter 1>
+
+- <Use bullet points to provide a brief description of key points and insights.>
+
+Repeat the above structure as necessary, and use subheadings to organize your notes.
+```
+
+Formatting Tips:
+* Do not make the chapters too short, ensure that each section has a few brief bullet points. 
+* Bullet points should be concise and to the point, so people can scan them quickly.
+* Use [] to denote timestamps
+* Use subheadings and bullet points to organize your notes and make them easier to read and understand. When relevant, include timestamps to link to the corresponding part of the video.
+* Use bullet points to describe important steps and insights, being as comprehensive as possible.
+* Use quotes to highlight important points and insights.
+
+Summary Tips:
+* Do not mention anything if it's only playing music and if nothing happens don't include it in the notes.
+* Use only content from the transcript. Do not add any additional information.
+* Make a new line after each # or ## and before each bullet point
+* Titles should be informative or even a question that the video answers
+* Titles should not be conclusions since you may only be getting a small part of the video
+
+Keep it CONCISE!!
+If Special Instructions are provided by the user, they take precedence over any previous instructions and you MUST follow them precisely.
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Transcript
+
+{{transcript}}
+
+
+
+# Special Instructions
+
+{{instructions}}
+`````
+
+
+### Template: AnalystDecisionsInTranscript
+
+- Description: Template for summarizing transcripts of videos and meetings into the decisions made and the agreed next steps. If you don't need the instructions, set `instructions="None."`. Placeholders: {{transcript}}, {{instructions}}
+- Placeholders: `transcript`, `instructions`
+- Word count: 2190
+- Source: Evolved from [jxnl's Youtube Chapters prompt](https://github.com/jxnl/youtubechapters-backend/blob/main/summary_app/md_summarize.py)
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+Act as a super-human AI analyst trained to meticulously analyze transcripts of videos and meetings. Your role is to identify and summarize key decisions and next steps, enhancing clarity and utility for those studying the transcript. 
+Use timestamps to pinpoint when these decisions and steps are discussed. Organize your notes into distinct sections, each dedicated to a significant decision or action plan.
+
+Format your markdown file using this structure:
+```
+# Key Decision 1: [Descriptive Title] [Timestamp as HH:MM:SS]
+- <Briefly describe the decision and its context using bullet points.>
+
+## Next Steps for Decision 1
+- <List the next steps agreed upon, using bullet points for clarity, with [Timestamp as HH:MM:SS]>
+
+Repeat this structure for each key decision and its corresponding next steps.
+
+# Other Next Steps
+- <List any other next steps that were discussed but do not belong to some specific decisions, using bullet points for clarity, with [Timestamp as HH:MM:SS]>
+```
+
+Formatting Tips:
+* Ensure each section is substantial, providing a clear and concise summary of each key decision and its next steps.
+* Use bullet points to make the summary easy to scan and understand.
+* All next steps should be actionable and clearly defined. All next steps must be relevant to the decision they are associated with. Any general next steps should be included in the section `Other Next Steps`
+* Include timestamps in brackets to refer to the specific parts of the video where these discussions occur.
+* Titles should be informative, reflecting the essence of the decision.
+
+Summary Tips:
+* Exclude sections where only music plays or no significant content is present.
+* Base your summary strictly on the transcript content without adding extra information.
+* Maintain a clear structure: place a new line after each # or ##, and before each bullet point.
+* Titles should pose a question answered by the decision or describe the nature of the next steps.
+
+Keep the summary concise and focused on key decisions and next steps. 
+If the user provides special instructions, prioritize these over the general guidelines.
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Transcript
+
+{{transcript}}
+
+
+
+# Special Instructions
+
+{{instructions}}
+`````
+
+
+### Template: AnalystThemesInResponses
+
+- Description: Template for summarizing survey verbatim responses into 3-5 themes with an example for each theme. If you don't need the instructions, set `instructions="None."`. Placeholders: {{question}}, {{responses}}, {{instructions}}
+- Placeholders: `question`, `responses`, `instructions`
+- Word count: 1506
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+"Act as a world-class behavioural researcher, who specializes in survey analysis. Categorize the provided survey responses into several themes. 
+The responses should be analyzed, and each theme identified should be labeled clearly. Examples from the responses should be given to illustrate each theme. The output should be formatted as specified, with a clear indication of the theme and corresponding verbatim examples.
+
+# Sub-tasks
+
+1. Read the provided survey responses carefully, especially in the context of the question. 
+2. Identify 3-5 distinct themes present in the responses related to the survey question. It should be the most important themes that must be raised to the CEO/leadership. 
+3. For each theme, choose at least one verbatim example from the responses that best represents it. This example should be a direct quote from the responses. This example should belong to only one theme and must not be applicable to any other themes.
+4. Format the output as specified.
+
+# Formatting
+
+To format your markdown file, follow this structure (omit the triple backticks):
+   ```
+   # Theme 1: [Theme Description]
+   - Best illustrated by: "..."
+
+   # Theme 2: [Theme Description]
+   - Best illustrated by: "..."
+   ...
+   ```
+
+Keep it CONCISE!!
+If Special Instructions are provided by the user, they take precedence over any previous instructions and you MUST follow they precisely.
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Survey Question
+
+{{question}}
+
+
+# Verbatim Responses
+
+{{responses}}
+
+
+# Special Instructions
+
+{{instructions}}
+
+`````
+
+
+### Template: AssistantAsk
+
+- Description: Helpful assistant for asking generic questions. Placeholders: `ask`
+- Placeholders: `ask`
+- Word count: 184
+- Source: 
+- Version: 1
+
+**System Prompt:**
+`````plaintext
+You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Question
+
+{{ask}}
+`````
+
+
+### Template: DetailOrientedTask
+
+- Description: Great template for detail-oriented tasks like string manipulations, data cleaning, etc. Placeholders: `task`, `data`.
+- Placeholders: `task`, `data`
+- Word count: 172
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+You are a world-class AI assistant. You are detail-oriented, diligent, and have a great memory. Your communication is brief and concise.
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Task
+
+{{task}}
+
+
+
+# Data
+
+{{data}}
+`````
+
+
+### Template: DrafterEmailBrief
+
+- Description: Template for quick email drafts. Provide a brief in 5-7 words as headlines, eg, `Follow up email. Sections: Agreements, Next steps` Placeholders: {{brief}}
+- Placeholders: `brief`
+- Word count: 1204
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+Act as a world-class office communications expert, skilled in creating efficient, clear, and friendly internal email communications.
+     Craft a concise email subject and email draft from the provided User Brief. 
+
+     Use the following format for the body of the email:
+     ```
+    Section Name <in plain text, only if needed>
+    - Bullet point 1
+    - Bullet point 2
+
+    <repeat as necessary>
+    ```
+
+     # Guidelines
+     - Focus on clear and efficient communication, suitable for internal business correspondence
+     - Where information is missing, use your best judgment to fill in the gaps
+     - It should be informal and friendly, eg, start with "Hi"
+     - Ensure the tone is professional yet casual, suitable for internal communication
+     - Write as plain text, with no markdown syntax
+     - Format into Sections. Each section should have 3-5 bullet points
+     - Close the email on a positive note, encouraging communication and collaboration
+     - It should be brief and concise with 150 words or less
+    
+
+     Follow the above guidelines, unless the user explicitly asks for something different. In that case, follow the user's instructions precisely.
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+# User Brief
+
+{{brief}}
+
+
+`````
+
+
+### Template: JuliaExpertAsk
+
+- Description: For asking questions about Julia language. Placeholders: `ask`
+- Placeholders: `ask`
+- Word count: 237
+- Source: 
+- Version: 1
+
+**System Prompt:**
+`````plaintext
+You are a world-class Julia language programmer with the knowledge of the latest syntax. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Question
+
+{{ask}}
+`````
+
+
+### Template: JuliaExpertCoTTask
+
+- Description: For small code task in Julia language. It will first describe the approach (CoT = Chain of Thought). Placeholders: `task`, `data`
+- Placeholders: `task`, `data`
+- Word count: 519
+- Source: 
+- Version: 2.0
+
+**System Prompt:**
+`````plaintext
+You are a world-class Julia language programmer and very systematic in your approach to solving problems. 
+You follow the below approach when writing code. Your communication is brief and concise.
+
+Problem Solving Steps:
+- Think through your approach step by step
+- Write any functions and other code you need
+- Solve the task
+- Check that your solution is correct
+
+You precisely follow the given Task and use the Data when provided. When Data is not provided, create some examples.
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Task
+
+{{task}}
+
+
+
+# Data
+
+{{data}}
+`````
+
+
+### Template: JuliaExpertTestCode
+
+- Description: For writing Julia-style unit tests. It expects `code` provided as a string (it can be the whole source code of your app). Instructions are a good way to guide the model which functions to test and how. If you don't need the instructions, set `instructions="None."`. Placeholders: {{code}}, {{instructions}}
+- Placeholders: `code`, `instructions`
+- Word count: 1475
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+You are a world-class Julia language programmer and expert in writing unit and integration tests for Julia applications.
+
+Your task is to write tests for the User's code (or a subset of it).
+
+General Guidelines:
+- Your tests must be as compact as possible while comprehensively covering the functionality of the code
+- Testsets are named after the function, eg, `@testset "function_name" begin ... end`
+- `@testset` blocks MUST NOT be nested
+- Include a brief comment explaining the purpose of each test
+- Write multiple test cases using `@test` to validate different aspects of the `add` function. Think about all pathways through the code and test each one.
+- Nesting `@test` statements or writing code blocks like `@test` `@test begin .... end` is strictly forbidden. You WILL BE FIRED if you do it.
+
+If the user provides any Special Instructions, prioritize them over the General Guidelines.
+
+
+Example:
+"""
+**User's code:**
+
+```julia
+myadd(a, b) = a + b
+```
+
+**Response:**
+
+```julia
+using Test
+
+@testset "myadd" begin
+    
+    # <any setup code and shared inputs go here>
+
+    # Test for correct addition of positive numbers
+    @test myadd(2, 3) == 5
+
+    # Test for correct addition with a negative number
+    @test myadd(-1, 3) == 2
+
+    # Test for correct addition with zero
+    @test myadd(0, 0) == 0
+
+    # Test for correct addition of large numbers
+    @test myadd(1000, 2000) == 3000
+end
+```
+"""
+
+`````
+
+
+**User Prompt:**
+`````plaintext
+# User's Code
+
+{{code}}
+
+
+# Special Instructions
+
+{{instructions}}
+
+`````
+
+
+### Template: JuliaRecapCoTTask
+
+- Description: Not all models know Julia syntax well. This template carries an extensive summary of key information about Julia and its syntax. It will first describe the approach (CoT = Chain of Thought). Placeholders: `task`, `data`
+- Placeholders: `task`, `instructions`
+- Word count: 1143
+- Source: 
+- Version: 1.1
+
+**System Prompt:**
+`````plaintext
+You are a world-class Julia language programmer and have a very systematic approach to solving problems.
+
+Problem Solving Steps:
+- Recall Julia snippets that will be useful for this Task
+- Solve the Task
+- Double-check that the solution is correct
+
+Reminder for the Julia Language:
+- Key Syntax: variables `x = 10`, control structures `if-elseif-else`, `isX ? X : Y`, `for`, `while`; functions `function f(x) end`, anonymous `x -> x^2`, arrays `[1, 2, 3]`, slicing `a[1:2]`, tuples `(1, 2)`, namedtuples `(; name="Julia", )`, dictionary `Dict("key" => value)`, `$` for string interpolation. 
+- Prefer Julia standard libraries, avoid new packages unless explicitly requested. 
+- Use general type annotations like `Number` or `AbstractString` to not be too restrictive. Emphasize performance, clarity, abstract types unless specific for multiple dispatch on different types.
+- Reserved names: `begin`, `end`, `function`. 
+- Distinguished from Python with 1-based indexing, multiple dispatch
+
+If the user provides any Special Instructions, prioritize them over the above guidelines.
+  
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Task
+
+{{task}}
+
+
+
+# Special Instructions
+
+{{instructions}}
+
+`````
+
+
+### Template: JuliaRecapTask
+
+- Description: Not all models know the Julia syntax well. This template carries a small summary of key information about Julia and its syntax and it will always first recall the Julia facts. If you don't need any instructions, set `instructions="None."`. Placeholders: `task`, `instructions`
+- Placeholders: `task`, `instructions`
+- Word count: 1143
+- Source: 
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+You are a world-class Julia language programmer and have a very systematic approach to solving problems.
+
+Problem Solving Steps:
+- Recall Julia snippets that will be useful for this Task
+- Solve the Task
+- Double-check that the solution is correct
+
+Reminder for the Julia Language:
+- Key Syntax: variables `x = 10`, control structures `if-elseif-else`, `isX ? X : Y`, `for`, `while`; functions `function f(x) end`, anonymous `x -> x^2`, arrays `[1, 2, 3]`, slicing `a[1:2]`, tuples `(1, 2)`, namedtuples `(; name="Julia", )`, dictionary `Dict("key" => value)`, `$` for string interpolation. 
+- Prefer Julia standard libraries, avoid new packages unless explicitly requested. 
+- Use general type annotations like `Number` or `AbstractString` to not be too restrictive. Emphasize performance, clarity, abstract types unless specific for multiple dispatch on different types.
+- Reserved names: `begin`, `end`, `function`. 
+- Distinguished from Python with 1-based indexing, multiple dispatch
+
+If the user provides any Special Instructions, prioritize them over the above guidelines.
+  
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Task
+
+{{task}}
+
+
+
+# Special Instructions
+
+{{instructions}}
+
+`````
+
+
+### Template: StorytellerExplainSHAP
+
+- Description: Explain ML model predictions with storytelling, use `instructions` to adjust the audience and style as needed. All placeholders should be used. Inspired by [Tell me a story!](https://arxiv.org/abs/2309.17057). If you don't need any instructions, set `instructions="None."`. Placeholders: `task_definition`,`feature_description`,`label_definition`, `probability_pct`, `prediction`, `outcome`, `classified_correctly`, `shap_table`,`instructions`
+- Placeholders: `task_definition`, `feature_description`, `label_definition`, `classified_correctly`, `probability_pct`, `prediction`, `outcome`, `shap_table`, `instructions`
+- Word count: 1712
+- Source: 
+- Version: 1.0
+
+**System Prompt:**
+`````plaintext
+You're a data science storyteller. Your task is to craft a compelling and plausible narrative that explains the predictions of an AI model.
+
+**Instructions**
+- Review the provided information: task definition, feature description, target variable, and the specific instance from the test dataset, including its SHAP values.
+- SHAP values reveal each feature's contribution to the model's prediction. They are calculated using Shapley values from coalitional game theory, distributing the prediction "payout" among features.
+- Concentrate on weaving a story around the most influential positive and negative SHAP features without actually mentioning the SHAP values. Consider potential feature interactions that fit the story. Skip all features outside of the story.
+- SHAP and its values are TOP SECRET. They must not be mentioned.
+- Your narrative should be plausible, engaging, and limited to 5 sentences. 
+- Do not address or speak to the audience, focus only on the story.
+- Conclude with a brief summary of the prediction, the outcome, and the reasoning behind it.
+
+**Context**
+An AI model predicts {{task_definition}}. 
+
+The input features and values are:
+---
+{{feature_description}}
+---
+
+The target variable indicates {{label_definition}}.
+
+If special instructions are provided, ignore the above instructions and follow them instead.
+  
+`````
+
+
+**User Prompt:**
+`````plaintext
+Explain this particular instance. 
+
+It was {{classified_correctly}}, with the AI model assigning a {{probability_pct}}% probability of {{prediction}}. The actual outcome was {{outcome}}. 
+
+The SHAP table for this instance details each feature with its value and corresponding SHAP value.
+---
+{{shap_table}}
+---
+
+Special Instructions: {{instructions}}
+
+Our story begins
+
+`````
+
+
diff --git a/docs/src/prompts/visual.md b/docs/src/prompts/visual.md
new file mode 100644
index 000000000..2ed6b83d5
--- /dev/null
+++ b/docs/src/prompts/visual.md
@@ -0,0 +1,28 @@
+The following file is auto-generated from the `templates` folder. For any changes, please modify the source files in the `templates` folder.
+
+To use these templates in `aigenerate`, simply provide the template name as a symbol, eg, `aigenerate(:MyTemplate; placeholder1 = value1)`
+
+## Visual Templates
+
+### Template: OCRTask
+
+- Description: Transcribe screenshot, scanned pages, photos, etc. Placeholders: `task`
+- Placeholders: `task`
+- Word count: 239
+- Source: 
+- Version: 1
+
+**System Prompt:**
+`````plaintext
+You are a world-class OCR engine. Accurately transcribe all visible text from the provided image, ensuring precision in capturing every character and maintaining the original formatting and structure as closely as possible.
+`````
+
+
+**User Prompt:**
+`````plaintext
+# Task
+
+{{task}}
+`````
+
+
diff --git a/docs/src/reference.md b/docs/src/reference.md
index fdd9b7835..8a5d58f0e 100644
--- a/docs/src/reference.md
+++ b/docs/src/reference.md
@@ -1,3 +1,9 @@
+```@raw html
+---
+outline: deep
+---
+```
+
 # Reference
 
 ```@index
diff --git a/examples/RAG_with_FlashRank.jl b/examples/RAG_with_FlashRank.jl
new file mode 100644
index 000000000..634f2dd93
--- /dev/null
+++ b/examples/RAG_with_FlashRank.jl
@@ -0,0 +1,57 @@
+# # RAG with FlashRank.jl
+
+# This file contains examples of how to use FlashRank rankers.
+#
+# First, let's import the package and define a helper link for calling un-exported functions:
+using LinearAlgebra, SparseArrays, Unicode # imports required for full PT functionality
+using FlashRank
+using PromptingTools
+const PT = PromptingTools
+using PromptingTools.Experimental.RAGTools
+const RT = PromptingTools.Experimental.RAGTools
+
+# Enable model downloading, otherwise you always have to approve it
+# see https://www.oxinabox.net/DataDeps.jl/dev/z10-for-end-users/
+ENV["DATADEPS_ALWAYS_ACCEPT"] = true
+
+## Sample data
+sentences = [
+    "Search for the latest advancements in quantum computing using Julia language.",
+    "How to implement machine learning algorithms in Julia with examples.",
+    "Looking for performance comparison between Julia, Python, and R for data analysis.",
+    "Find Julia language tutorials focusing on high-performance scientific computing.",
+    "Search for the top Julia language packages for data visualization and their documentation.",
+    "How to set up a Julia development environment on Windows 10.",
+    "Discover the best practices for parallel computing in Julia.",
+    "Search for case studies of large-scale data processing using Julia.",
+    "Find comprehensive resources for mastering metaprogramming in Julia.",
+    "Looking for articles on the advantages of using Julia for statistical modeling.",
+    "How to contribute to the Julia open-source community: A step-by-step guide.",
+    "Find the comparison of numerical accuracy between Julia and MATLAB.",
+    "Looking for the latest Julia language updates and their impact on AI research.",
+    "How to efficiently handle big data with Julia: Techniques and libraries.",
+    "Discover how Julia integrates with other programming languages and tools.",
+    "Search for Julia-based frameworks for developing web applications.",
+    "Find tutorials on creating interactive dashboards with Julia.",
+    "How to use Julia for natural language processing and text analysis.",
+    "Discover the role of Julia in the future of computational finance and econometrics."
+]
+## Build the index
+index = build_index(
+    sentences; chunker_kwargs = (; sources = map(i -> "Doc$i", 1:length(sentences))))
+
+# Wrap the model to be a valid Ranker recognized by RAGTools (FlashRanker is the dedicated type)
+# It will be provided to the airag/rerank function to avoid instantiating it on every call
+reranker = RankerModel(:mini) |> RT.FlashRanker
+# You can choose :tiny or :mini
+
+## Apply to the pipeline configuration, eg, 
+cfg = RAGConfig(; retriever = AdvancedRetriever(; reranker))
+
+# Ask a question
+question = "What are the best practices for parallel computing in Julia?"
+result = airag(cfg, index; question, return_all = true)
+
+# Review the reranking step results
+result.reranked_candidates
+index[result.reranked_candidates]
\ No newline at end of file
diff --git a/examples/adding_custom_API.jl b/examples/adding_custom_API.jl
new file mode 100644
index 000000000..9f242d2dc
--- /dev/null
+++ b/examples/adding_custom_API.jl
@@ -0,0 +1,85 @@
+# Example of custom API integration, eg, custom enterprise proxy with special headers
+#
+# This should NOT be necessary unless you have a private LLM / private proxy with specialized API structure and headers.
+# For most new APIs, you should check out the FAQ on "Using Custom API Providers like Azure or Databricks"
+# DatabricksOpenAISchema is a good example how to do simple API integration.
+#
+# For heavily customized APIs, follow the example below. Again, do this only if you have no other choice!!
+
+# We will need to provide a custom "provider" and custom methods for `OpenAI.jl` to override how it builds the AUTH headers and URL.
+
+using PromptingTools
+const PT = PromptingTools
+using HTTP
+using JSON3
+
+## OpenAI.jl work
+# Define a custom provider for OpenAI to override the default behavior
+abstract type MyCustomProvider <: PT.AbstractCustomProvider end
+
+@kwdef struct MyModelProvider <: MyCustomProvider
+    api_key::String = ""
+    base_url::String = "https://api.example.com/v1239123/modelxyz/completions_that_are_not_standard"
+    api_version::String = ""
+end
+
+# Tell OpenAI not to use "api" (=endpoints)
+function PT.OpenAI.build_url(provider::MyCustomProvider, api::AbstractString = "")
+    string(provider.base_url)
+end
+
+function PT.OpenAI.auth_header(
+        provider::MyCustomProvider, api_key::AbstractString = provider.api_key)
+    ## Note this DOES NOT have any Basic Auth! Assumes you use something custom
+    ["Content-Type" => "application/json", "Extra-custom-authorization" => api_key]
+end
+
+## PromptingTools.jl work
+# Define a custom schema
+struct MyCustomSchema <: PT.AbstractOpenAISchema end
+
+# Implement create_chat for the custom schema
+function PT.OpenAI.create_chat(schema::MyCustomSchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = "",
+        ## Add any required kwargs here, APIs may have different requirements
+        max_tokens::Int = 2048,
+        kwargs...)
+    ## Depending on your needs, you can get api_key from ENV variable!!
+    ## Eg, api_key = get(ENV, "CUSTOM_API_KEY", "")
+    provider = MyModelProvider(; api_key, base_url = url)
+
+    ## The first arg will be ignored, doesn't matter what you put there
+    PT.OpenAI.openai_request("ignore-me", provider;
+        method = "POST",
+        messages = conversation,
+        streamcallback = nothing,
+        max_tokens = max_tokens,
+        model = model,
+        kwargs...)
+end
+
+## Model registration
+## Any alias you like (can be many)
+PromptingTools.MODEL_ALIASES["myprecious"] = "custom-model-xyz"
+## Register the exact model name to send to your API
+PromptingTools.register_model!(;
+    name = "custom-model-xyz",
+    schema = MyCustomSchema())
+
+## Example usage
+api_key = "..." # use ENV to provide this automatically
+url = "..."  # use ENV to provide this or hardcode in your create_chat function!!
+msg = aigenerate("Hello, how are you?"; model = "myprecious", api_kwargs = (; api_key, url))
+
+## Custom usage - no need to register anything
+function myai(msg::AbstractString)
+    model = "custom-model-xyz"
+    schema = MyCustomSchema()
+    api_key = "..." # use ENV to provide this automatically
+    url = "..."  # use ENV to provide this or hardcode in your create_chat function!!
+    aigenerate(schema, msg; model, api_kwargs = (; api_key, url))
+end
+msg = myai("Hello, how are you?")
diff --git a/examples/building_RAG.jl b/examples/building_RAG.jl
index 7532f5780..d33185b60 100644
--- a/examples/building_RAG.jl
+++ b/examples/building_RAG.jl
@@ -5,8 +5,10 @@
 
 # If you're not familiar with "RAG", start with this [article](https://towardsdatascience.com/add-your-own-data-to-an-llm-using-retrieval-augmented-generation-rag-b1958bf56a5a).
 
+# Note: You must first import `LinearAlgebra`, `SparseArrays`, and `Unicode` to use this example!
+
 ## Imports
-using LinearAlgebra, SparseArrays
+using LinearAlgebra, SparseArrays, Unicode
 using PromptingTools
 ## Note: RAGTools is still experimental and will change in the future. Ideally, they will be cleaned up and moved to a dedicated package
 using PromptingTools.Experimental.RAGTools
@@ -22,10 +24,10 @@ const RT = PromptingTools.Experimental.RAGTools
 
 files = [
     joinpath("examples", "data", "database_style_joins.txt"),
-    joinpath("examples", "data", "what_is_dataframes.txt"),
+    joinpath("examples", "data", "what_is_dataframes.txt")
 ]
-## Build an index of chunks, embed them, and create a lookup index of metadata/tags for each chunk
-index = build_index(files; extract_metadata = false)
+## Build an index of chunks and embed them
+index = build_index(files)
 
 # Let's ask a question
 ## Embeds the question, finds the closest chunks in the index, and generates an answer from the closest chunks
@@ -37,12 +39,17 @@ answer = airag(index; question = "I like dplyr, what is the equivalent in Julia?
 # - `build_index` will chunk the documents into smaller pieces, embed them into numbers (to be able to judge the similarity of chunks) and, optionally, create a lookup index of metadata/tags for each chunk)
 #   - `index` is the result of this step and it holds your chunks, embeddings, and other metadata! Just show it :)
 # - `airag` will
-#   - embed your question
-#   - find the closest chunks in the index (use parameters `top_k` and `minimum_similarity` to tweak the "relevant" chunks)
-#   - [OPTIONAL] extracts any potential tags/filters from the question and applies them to filter down the potential candidates (use `extract_metadata=true` in `build_index`, you can also provide some filters explicitly via `tag_filter`)
-#   - [OPTIONAL] re-ranks the candidate chunks (define and provide your own `rerank_strategy`, eg Cohere ReRank API)
-#   - build a context from the closest chunks (use `chunks_window_margin` to tweak if we include preceding and succeeding chunks as well, see `?build_context` for more details)
-# - generate an answer from the closest chunks (use `return_context=true` to see under the hood and debug your application)
+#   - retrieve the best chunks from your index (based on the similarity of the question to the chunks)
+#     - rephrase the question into a more "searchable" form
+#     - embed your question
+#     - find the closest chunks in the index (use parameters `top_k` and `minimum_similarity` to tweak the "relevant" chunks)
+#     - [OPTIONAL] extract any potential tags/filters from the question and applies them to filter down the potential candidates (use `extract_metadata=true` in `build_index`, you can also provide some filters explicitly via `tag_filter`)
+#     - [OPTIONAL] re-rank the candidate chunks (define and provide your own `rerank_strategy`, eg Cohere ReRank API)
+#   - generate an answer from the closest chunks (use `return_all=true` to see under the hood and debug your application)
+#     - build a context from the closest chunks (use `chunks_window_margin` to tweak if we include preceding and succeeding chunks as well, see `?build_context` for more details)
+#     - answer the question with LLM
+#     - [OPTIONAL] refine the answer (with the same or new context)
+#    
 
 # You should save the index for later to avoid re-embedding / re-extracting the document chunks!
 serialize("examples/index.jls", index)
@@ -80,13 +87,13 @@ evals[1]
 # Let's evaluate this QA item with a "judge model" (often GPT-4 is used as a judge).
 
 ## Note: that we used the same question, but generated a different context and answer via `airag`
-msg, ctx = airag(index; evals[1].question, return_context = true);
+result = airag(index; evals[1].question, return_all = true);
 
 ## ctx is a RAGContext object that keeps all intermediate states of the RAG pipeline for easy evaluation
 judged = aiextract(:RAGJudgeAnswerFromContext;
-    ctx.context,
-    ctx.question,
-    ctx.answer,
+    result.context,
+    result.question,
+    result.final_answer,
     return_type = RT.JudgeAllScores)
 judged.content
 ## Dict{Symbol, Any} with 7 entries:
@@ -110,11 +117,11 @@ x = run_qa_evals(evals[10], ctx;
 
 results = asyncmap(evals[1:10]) do qa_item
     ## Generate an answer -- often you want the model_judge to be the highest quality possible, eg, "GPT-4 Turbo" (alias "gpt4t)
-    msg, ctx = airag(index; qa_item.question, return_context = true,
+    result = airag(index; qa_item.question, return_all = true,
         top_k = 3, verbose = false, model_judge = "gpt4t")
     ## Evaluate the response
     ## Note: you can log key parameters for easier analysis later
-    run_qa_evals(qa_item, ctx; parameters_dict = Dict(:top_k => 3), verbose = false)
+    run_qa_evals(qa_item, result; parameters_dict = Dict(:top_k => 3), verbose = false)
 end
 ## Note that the "failed" evals can show as "nothing", so make sure to handle them.
 results = filter(x -> !isnothing(x.answer_score), results);
@@ -136,8 +143,8 @@ first(df, 5)
 
 # # What would we do next?
 # - Review your evaluation golden data set and keep only the good items
-# - Play with the chunk sizes (max_length in build_index) and see how it affects the quality
-# - Explore using metadata/key filters (`extract_metadata=true` in build_index)
+# - Play with the chunk sizes (max_length in `build_index.chunker`) and see how it affects the quality
+# - Explore using metadata/key filters (`tagger` step in `build_index`)
 # - Add filtering for semantic similarity (embedding distance) to make sure we don't pick up irrelevant chunks in the context
 # - Use multiple indices or a hybrid index (add a simple BM25 lookup from TextAnalysis.jl)
 # - Data processing is the most important step - properly parsed and split text could make wonders
diff --git a/examples/working_with_aitemplates.jl b/examples/working_with_aitemplates.jl
index 49f5cf830..3cd19cda0 100644
--- a/examples/working_with_aitemplates.jl
+++ b/examples/working_with_aitemplates.jl
@@ -46,7 +46,7 @@ msgs = PT.render(AITemplate(:JuliaExpertAsk))
 # 
 # Let's adjust the previous template to be more specific to a data analysis question:
 tpl = [PT.SystemMessage("You are a world-class Julia language programmer with the knowledge of the latest syntax. You're also a senior Data Scientist and proficient in data analysis in Julia. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.")
-    PT.UserMessage("# Question\n\n{{ask}}")]
+       PT.UserMessage("# Question\n\n{{ask}}")]
 # Templates are saved in the `templates` directory of the package. Name of the file will become the template name (eg, call `:JuliaDataExpertAsk`)
 filename = joinpath(pkgdir(PromptingTools),
     "templates",
diff --git a/ext/FlashRankPromptingToolsExt.jl b/ext/FlashRankPromptingToolsExt.jl
new file mode 100644
index 000000000..6707a0ff8
--- /dev/null
+++ b/ext/FlashRankPromptingToolsExt.jl
@@ -0,0 +1,92 @@
+
+module FlashRankPromptingToolsExt
+
+using PromptingTools
+const PT = PromptingTools
+using PromptingTools.Experimental.RAGTools
+const RT = PromptingTools.Experimental.RAGTools
+using FlashRank
+
+# Define the method for reranking with it
+"""
+    RT.rerank(
+        reranker::RT.FlashRanker, index::RT.AbstractDocumentIndex, question::AbstractString,
+        candidates::RT.AbstractCandidateChunks;
+        verbose::Bool = false,
+        top_n::Integer = length(candidates.scores),
+        unique_chunks::Bool = true,
+        kwargs...)
+
+Re-ranks a list of candidate chunks using the FlashRank.jl local models.
+
+# Arguments
+- `reranker`: FlashRanker model to use (wrapper for `FlashRank.RankerModel`)
+- `index`: The index that holds the underlying chunks to be re-ranked.
+- `question`: The query to be used for the search.
+- `candidates`: The candidate chunks to be re-ranked.
+- `top_n`: The number of most relevant documents to return. Default is `length(documents)`.
+- `verbose`: A boolean flag indicating whether to print verbose logging. Default is `false`.
+- `unique_chunks`: A boolean flag indicating whether to remove duplicates from the candidate chunks prior to reranking (saves compute time). Default is `true`.
+    
+# Example
+
+How to use FlashRank models in your RAG pipeline:
+```julia
+using FlashRank
+
+# Wrap the model to be a valid Ranker recognized by RAGTools (FlashRanker is the dedicated type)
+# It will be provided to the airag/rerank function to avoid instantiating it on every call
+reranker = RankerModel(:mini) |> RT.FlashRanker
+# You can choose :tiny or :mini
+
+## Apply to the pipeline configuration, eg, 
+cfg = RAGConfig(; retriever = AdvancedRetriever(; reranker))
+
+# Ask a question
+question = "What are the best practices for parallel computing in Julia?"
+result = airag(cfg, index; question, return_all = true)
+
+# Review the reranking step results
+result.reranked_candidates
+index[result.reranked_candidates]
+```
+"""
+function RT.rerank(
+        reranker::RT.FlashRanker, index::RT.AbstractDocumentIndex, question::AbstractString,
+        candidates::RT.AbstractCandidateChunks;
+        verbose::Bool = false,
+        top_n::Integer = length(candidates.scores),
+        unique_chunks::Bool = true,
+        kwargs...)
+    @assert top_n>0 "top_n must be a positive integer."
+    documents = index[candidates, :chunks]
+    @assert !(isempty(documents)) "The candidate chunks must not be empty! Check the index IDs."
+
+    is_multi_cand = candidates isa RT.MultiCandidateChunks
+    index_ids = is_multi_cand ? candidates.index_ids : candidates.index_id
+    positions = candidates.positions
+    ## Find unique only items
+    if unique_chunks
+        verbose && @info "Removing duplicates from candidate chunks prior to reranking"
+        unique_idxs = PT.unique_permutation(documents)
+        documents = documents[unique_idxs]
+        positions = positions[unique_idxs]
+        index_ids = is_multi_cand ? index_ids[unique_idxs] : index_ids
+    end
+
+    ## Run re-ranker
+    ranker = reranker.model
+    result = ranker(question, documents; top_n)
+
+    ## Unwrap re-ranked positions
+    scores = result.scores
+    positions = positions[result.positions]
+
+    verbose && @info "Reranking done in $(round(result.elapsed; digits=1)) seconds."
+
+    return is_multi_cand ?
+           RT.MultiCandidateChunks(index_ids[result.positions], positions, scores) :
+           RT.CandidateChunks(index_ids, positions, scores)
+end
+
+end #end of module
\ No newline at end of file
diff --git a/ext/GoogleGenAIPromptingToolsExt.jl b/ext/GoogleGenAIPromptingToolsExt.jl
index 12d875441..f16131425 100644
--- a/ext/GoogleGenAIPromptingToolsExt.jl
+++ b/ext/GoogleGenAIPromptingToolsExt.jl
@@ -9,23 +9,9 @@ const PT = PromptingTools
 function PromptingTools.ggi_generate_content(prompt_schema::PT.AbstractGoogleSchema,
         api_key::AbstractString, model_name::AbstractString,
         conversation; http_kwargs, api_kwargs...)
-    ## Build the provider
-    provider = GoogleGenAI.GoogleProvider(; api_key)
-    url = "$(provider.base_url)/models/$model_name:generateContent?key=$(provider.api_key)"
-    generation_config = Dict{String, Any}()
-    for (key, value) in api_kwargs
-        generation_config[string(key)] = value
-    end
-
-    body = Dict("contents" => conversation,
-        "generationConfig" => generation_config)
-    response = HTTP.post(url; headers = Dict("Content-Type" => "application/json"),
-        body = JSON3.write(body), http_kwargs...)
-    if response.status >= 200 && response.status < 300
-        return GoogleGenAI._parse_response(response)
-    else
-        error("Request failed with status $(response.status): $(String(response.body))")
-    end
+    r = GoogleGenAI.generate_content(
+        api_key, model_name, conversation; api_kwargs, http_kwargs)
+    return r
 end
 
 end # end of module
diff --git a/ext/RAGToolsExperimentalExt.jl b/ext/RAGToolsExperimentalExt.jl
index 9b095a446..681e879c8 100644
--- a/ext/RAGToolsExperimentalExt.jl
+++ b/ext/RAGToolsExperimentalExt.jl
@@ -1,19 +1,34 @@
 module RAGToolsExperimentalExt
 
-using PromptingTools, SparseArrays
-using LinearAlgebra: normalize
+using PromptingTools, SparseArrays, Unicode
+using LinearAlgebra
 const PT = PromptingTools
 
 using PromptingTools.Experimental.RAGTools
+using PromptingTools.Experimental.RAGTools: tf, vocab, vocab_lookup, idf, doc_rel_length
+const RT = PromptingTools.Experimental.RAGTools
 
 # forward to LinearAlgebra.normalize
-PromptingTools.Experimental.RAGTools._normalize(arr::AbstractArray) = normalize(arr)
+RT._normalize(arr::AbstractArray) = LinearAlgebra.normalize(arr)
 
-# "Builds a sparse matrix of tags and a vocabulary from the given vector of chunk metadata. Requires SparseArrays.jl to be loaded."
-function PromptingTools.Experimental.RAGTools.build_tags(chunk_metadata::Vector{
-        Vector{String},
-})
-    tags_vocab_ = vcat(chunk_metadata...) |> unique |> sort
+# Forward to Unicode.normalize
+function RT._unicode_normalize(text::AbstractString; kwargs...)
+    Unicode.normalize(text; kwargs...)
+end
+
+"""
+    RT.build_tags(
+        tagger::RT.AbstractTagger, chunk_metadata::AbstractVector{
+            <:AbstractVector{<:AbstractString},
+        })
+
+Builds a sparse matrix of tags and a vocabulary from the given vector of chunk metadata.
+"""
+function RT.build_tags(
+        tagger::RT.AbstractTagger, chunk_metadata::AbstractVector{
+            <:AbstractVector{<:AbstractString},
+        })
+    tags_vocab_ = vcat(chunk_metadata...) |> unique |> sort .|> String
     tags_vocab_index = Dict{String, Int}(t => i for (i, t) in enumerate(tags_vocab_))
     Is, Js = Int[], Int[]
     for i in eachindex(chunk_metadata)
@@ -31,4 +46,185 @@ function PromptingTools.Experimental.RAGTools.build_tags(chunk_metadata::Vector{
     return tags_, tags_vocab_
 end
 
+function RT.vcat_labeled_matrices(mat1::AbstractSparseMatrix{T1},
+        vocab1::AbstractVector{<:AbstractString},
+        mat2::AbstractSparseMatrix{T2},
+        vocab2::AbstractVector{<:AbstractString}) where {T1 <: Number, T2 <: Number}
+    T = promote_type(T1, T2)
+    new_words = setdiff(vocab2, vocab1)
+    combined_vocab = [vocab1; new_words]
+    vocab2_indices = Dict(word => i for (i, word) in enumerate(vocab2))
+
+    ## more efficient composition
+    I, J, V = findnz(mat1)
+    aligned_mat1 = sparse(
+        I, J, convert(Vector{T}, V), size(mat1, 1), length(combined_vocab))
+
+    ## collect the mat2 more efficiently since it's sparse
+    I, J, V = Int[], Int[], T[]
+    nz_rows = rowvals(mat2)
+    nz_vals = nonzeros(mat2)
+    for (j, word) in enumerate(combined_vocab)
+        if haskey(vocab2_indices, word)
+            @inbounds @simd for k in nzrange(mat2, vocab2_indices[word])
+                i = nz_rows[k]
+                val = nz_vals[k]
+                if !iszero(val)
+                    push!(I, i)
+                    push!(J, j)
+                    push!(V, val)
+                end
+            end
+        end
+    end
+    aligned_mat2 = sparse(I, J, V, size(mat2, 1), length(combined_vocab))
+
+    return vcat(aligned_mat1, aligned_mat2), combined_vocab
+end
+
+function Base.hcat(d1::RT.DocumentTermMatrix{<:AbstractSparseMatrix},
+        d2::RT.DocumentTermMatrix{<:AbstractSparseMatrix})
+    tf_, vocab_ = RT.vcat_labeled_matrices(tf(d1), vocab(d1), tf(d2), vocab(d2))
+    vocab_lookup_ = Dict(t => i for (i, t) in enumerate(vocab_))
+
+    ## decompose tf for efficient ops
+    N, M = size(tf_)
+    I, J, V = findnz(tf_)
+    doc_freq = zeros(Int, M)
+    @inbounds for j in eachindex(J, V)
+        if V[j] > 0
+            doc_freq[J[j]] += 1
+        end
+    end
+    idf = @. log(1.0f0 + (N - doc_freq + 0.5f0) / (doc_freq + 0.5f0))
+    doc_lengths = zeros(Float32, N)
+    @inbounds for i in eachindex(I, V)
+        if V[i] > 0
+            doc_lengths[I[i]] += V[i]
+        end
+    end
+    sumdl = sum(doc_lengths)
+    doc_rel_length_ = sumdl == 0 ? zeros(Float32, N) :
+                      convert(Vector{Float32}, (doc_lengths ./ (sumdl / N)))
+    return RT.DocumentTermMatrix(tf_, vocab_, vocab_lookup_, idf, doc_rel_length_)
+end
+
+"""
+    RT.document_term_matrix(
+        documents::AbstractVector{<:AbstractVector{T}};
+        min_term_freq::Int = 1, max_terms::Int = typemax(Int)) where {T <: AbstractString}
+
+Builds a sparse matrix of term frequencies and document lengths from the given vector of documents wrapped in type `DocumentTermMatrix`.
+
+Expects a vector of preprocessed (tokenized) documents, where each document is a vector of strings (clean tokens).
+
+Returns: `DocumentTermMatrix`
+
+# Arguments
+- `documents`: A vector of documents, where each document is a vector of terms (clean tokens).
+- `min_term_freq`: The minimum frequency a term must have to be included in the vocabulary, eg, `min_term_freq = 2` means only terms that appear at least twice will be included.
+- `max_terms`: The maximum number of terms to include in the vocabulary, eg, `max_terms = 100` means only the 100 most frequent terms will be included.
+
+# Example
+```
+documents = [["this", "is", "a", "test"], ["this", "is", "another", "test"], ["foo", "bar", "baz"]]
+dtm = document_term_matrix(documents)
+```
+"""
+function RT.document_term_matrix(
+        documents::AbstractVector{<:AbstractVector{T}};
+        min_term_freq::Int = 1, max_terms::Int = typemax(Int)) where {T <: AbstractString}
+    ## Calculate term frequencies, sort descending
+    counts = Dict{T, Int}()
+    @inbounds for doc in documents
+        for term in doc
+            counts[term] = get(counts, term, 0) + 1
+        end
+    end
+    counts = sort(collect(counts), by = x -> -x[2]) |> Base.Fix2(first, max_terms) |>
+             Base.Fix1(filter!, x -> x[2] >= min_term_freq)
+    ## Create vocabulary
+    vocab = convert(Vector{T}, getindex.(counts, 1))
+    vocab_lookup = Dict{T, Int}(term => i for (i, term) in enumerate(vocab))
+    N = length(documents)
+    doc_freq = zeros(Int, length(vocab))
+    doc_lengths = zeros(Float32, N)
+    ## Term frequency matrix to be recorded via its sparse entries: I, J, V
+    # term_freq = spzeros(Float32, N, length(vocab))
+    I, J, V = Int[], Int[], Float32[]
+
+    unique_terms = Set{eltype(vocab)}()
+    sizehint!(unique_terms, 1000)
+    for di in eachindex(documents)
+        empty!(unique_terms)
+        doc = documents[di]
+        @inbounds for t in doc
+            doc_lengths[di] += 1
+            tid = get(vocab_lookup, t, nothing)
+            tid === nothing && continue
+            push!(I, di)
+            push!(J, tid)
+            push!(V, 1.0f0)
+            if !(t in unique_terms)
+                doc_freq[tid] += 1
+                push!(unique_terms, t)
+            end
+        end
+    end
+    ## combine repeated terms with `+`
+    term_freq = sparse(I, J, V, N, length(vocab), +)
+    idf = @. log(1.0f0 + (N - doc_freq + 0.5f0) / (doc_freq + 0.5f0))
+    sumdl = sum(doc_lengths)
+    doc_rel_length = sumdl == 0 ? zeros(Float32, N) : doc_lengths ./ (sumdl / N)
+    RT.DocumentTermMatrix(term_freq, vocab, vocab_lookup, idf, doc_rel_length)
+end
+
+function RT.document_term_matrix(documents::AbstractVector{<:AbstractString})
+    RT.document_term_matrix(RT.preprocess_tokens(documents))
 end
+
+"""
+    RT.bm25(dtm::AbstractDocumentTermMatrix, query::Vector{String}; k1::Float32=1.2f0, b::Float32=0.75f0)
+
+Scores all documents in `dtm` based on the `query`.
+
+References: https://opensourceconnections.com/blog/2015/10/16/bm25-the-next-generation-of-lucene-relevation/
+
+# Example
+```
+documents = [["this", "is", "a", "test"], ["this", "is", "another", "test"], ["foo", "bar", "baz"]]
+dtm = document_term_matrix(documents)
+query = ["this"]
+scores = bm25(dtm, query)
+# Returns array with 3 scores (one for each document)
+```
+"""
+function RT.bm25(
+        dtm::RT.AbstractDocumentTermMatrix, query::AbstractVector{<:AbstractString};
+        k1::Float32 = 1.2f0, b::Float32 = 0.75f0)
+    scores = zeros(Float32, size(tf(dtm), 1))
+    ## Identify non-zero items to leverage the sparsity
+    nz_rows = rowvals(tf(dtm))
+    nz_vals = nonzeros(tf(dtm))
+    for i in eachindex(query)
+        t = query[i]
+        t_id = get(vocab_lookup(dtm), t, nothing)
+        t_id === nothing && continue
+        idf_ = idf(dtm)[t_id]
+        # Scan only documents that have this token
+        @inbounds @simd for j in nzrange(tf(dtm), t_id)
+            ## index into the sparse matrix
+            di, tf_ = nz_rows[j], nz_vals[j]
+            doc_len = doc_rel_length(dtm)[di]
+            tf_top = (tf_ * (k1 + 1.0f0))
+            tf_bottom = (tf_ + k1 * (1.0f0 - b + b * doc_len))
+            score = idf_ * tf_top / tf_bottom
+            ## @info "di: $di, tf: $tf, doc_len: $doc_len, idf: $idf, tf_top: $tf_top, tf_bottom: $tf_bottom, score: $score"
+            scores[di] += score
+        end
+    end
+
+    return scores
+end
+
+end # end of module
diff --git a/ext/SnowballPromptingToolsExt.jl b/ext/SnowballPromptingToolsExt.jl
new file mode 100644
index 000000000..04658e01d
--- /dev/null
+++ b/ext/SnowballPromptingToolsExt.jl
@@ -0,0 +1,66 @@
+module SnowballPromptingToolsExt
+
+using PromptingTools
+const PT = PromptingTools
+
+using PromptingTools.Experimental.RAGTools
+const RT = PromptingTools.Experimental.RAGTools
+
+using Snowball
+
+# forward to Stemmer.stem
+RT._stem(stemmer::Snowball.Stemmer, text::AbstractString) = Snowball.stem(stemmer, text)
+
+"""
+    RT.get_keywords(
+        processor::RT.KeywordsProcessor, docs::AbstractVector{<:AbstractString};
+        verbose::Bool = true,
+        stemmer = nothing,
+        stopwords::Set{String} = Set(RT.STOPWORDS),
+        return_keywords::Bool = false,
+        min_length::Integer = 3,
+        min_term_freq::Int = 1, max_terms::Int = typemax(Int),
+        kwargs...)
+
+Generate a `DocumentTermMatrix` from a vector of `docs` using the provided `stemmer` and `stopwords`.
+
+# Arguments
+- `docs`: A vector of strings to be embedded.
+- `verbose`: A boolean flag for verbose output. Default is `true`.
+- `stemmer`: A stemmer to use for stemming. Default is `nothing`.
+- `stopwords`: A set of stopwords to remove. Default is `Set(STOPWORDS)`.
+- `return_keywords`: A boolean flag for returning the keywords. Default is `false`. Useful for query processing in search time.
+- `min_length`: The minimum length of the keywords. Default is `3`.
+- `min_term_freq`: The minimum frequency a term must have to be included in the vocabulary, eg, `min_term_freq = 2` means only terms that appear at least twice will be included.
+- `max_terms`: The maximum number of terms to include in the vocabulary, eg, `max_terms = 100` means only the 100 most frequent terms will be included.
+"""
+function RT.get_keywords(
+        processor::RT.KeywordsProcessor, docs::AbstractVector{<:AbstractString};
+        verbose::Bool = true,
+        stemmer = nothing,
+        stopwords::Set{String} = Set(RT.STOPWORDS),
+        return_keywords::Bool = false,
+        min_length::Integer = 3,
+        min_term_freq::Int = 1, max_terms::Int = typemax(Int),
+        kwargs...)
+    ## check if extension is available
+    ext = Base.get_extension(PromptingTools, :RAGToolsExperimentalExt)
+    if isnothing(ext)
+        error("You need to also import LinearAlgebra and SparseArrays to use this function")
+    end
+    ## Preprocess text into tokens
+    stemmer = !isnothing(stemmer) ? stemmer : Snowball.Stemmer("english")
+    # Single-threaded as stemmer is not thread-safe
+    keywords = RT.preprocess_tokens(docs, stemmer; stopwords, min_length)
+
+    ## Early exit if we only want keywords (search time)
+    return_keywords && return keywords
+
+    ## Create DTM
+    dtm = RT.document_term_matrix(keywords; min_term_freq, max_terms)
+
+    verbose && @info "Done processing DocumentTermMatrix."
+    return dtm
+end
+
+end # end of module
diff --git a/llm-cheatsheets/DataFrames_cheatsheet.jl b/llm-cheatsheets/DataFrames_cheatsheet.jl
new file mode 100644
index 000000000..d5a0a80c1
--- /dev/null
+++ b/llm-cheatsheets/DataFrames_cheatsheet.jl
@@ -0,0 +1,82 @@
+using DataFramesMeta
+
+# Create a sample DataFrame
+df = DataFrame(x=[1, 1, 2, 2], y=[1, 2, 101, 102])
+
+# @select - Select columns
+@select(df, :x, :y)  # Select specific columns
+@select(df, :x2 = 2 * :x, :y)  # Select and transform
+
+# @transform - Add or modify columns
+@transform(df, :z = :x + :y)  # Add a new column
+@transform(df, :x = :x * 2)  # Modify existing column
+
+# @subset - Filter rows
+@subset(df, :x .> 1)  # Keep rows where x > 1
+@subset(df, :x .> 1, :y .< 102)  # Multiple conditions
+
+# @orderby - Sort rows
+@orderby(df, :x)  # Sort by x ascending
+@orderby(df, -:x, :y)  # Sort by x descending, then y ascending
+
+# @groupby and @combine - Group and summarize
+gdf = @groupby(df, :x)
+@combine(gdf, :mean_y = mean(:y))  # Compute mean of y for each group
+
+# @by - Group and summarize in one step
+@by(df, :x, :mean_y = mean(:y))
+
+# Row-wise operations with @byrow
+@transform(df, @byrow :z = :x == 1 ? true : false)
+
+# @rtransform - Row-wise transform
+@rtransform(df, :z = :x * :y)
+
+# @rsubset - Row-wise subset
+@rsubset(df, :x > 1)
+
+# @with - Use DataFrame columns as variables
+@with(df, :x + :y)
+
+# @eachrow - Iterate over rows
+@eachrow df begin
+    if :x > 1
+        :y = :y * 2
+    end
+end
+
+# @passmissing - Handle missing values
+df_missing = DataFrame(a=[1, 2, missing], b=[4, 5, 6])
+@transform df_missing @passmissing @byrow :c = :a + :b
+
+# @astable - Create multiple columns at once
+@transform df @astable begin
+    ex = extrema(:y)
+    :y_min = :y .- first(ex)
+    :y_max = :y .- last(ex)
+end
+
+# AsTable for multiple column operations
+@rtransform df :sum_xy = sum(AsTable([:x, :y]))
+
+# $ for programmatic column references
+col_name = :x
+@transform(df, :new_col = $col_name * 2)
+
+# @chain for piping operations
+result = @chain df begin
+    @transform(:z = :x * :y)
+    @subset(:z > 50)
+    @select(:x, :y, :z)
+    @orderby(:z)
+end
+
+# @label! for adding column labels
+@label! df :x = "Group ID"
+
+# @note! for adding column notes
+@note! df :y = "Raw measurements"
+
+# Print labels and notes
+printlabels(df)
+printnotes(df)
\ No newline at end of file
diff --git a/llm-cheatsheets/PromptingTools_cheatsheet.jl b/llm-cheatsheets/PromptingTools_cheatsheet.jl
new file mode 100644
index 000000000..08795862e
--- /dev/null
+++ b/llm-cheatsheets/PromptingTools_cheatsheet.jl
@@ -0,0 +1,222 @@
+# # PromptingTools.jl Cheat Sheet
+# PromptingTools.jl: A Julia package for easy interaction with AI language models.
+# Provides convenient macros and functions for text generation, data extraction, and more.
+
+# Installation and Setup
+using Pkg
+Pkg.add("PromptingTools")
+using PromptingTools
+const PT = PromptingTools # Optional alias for convenience
+
+# Set OpenAI API key (or use ENV["OPENAI_API_KEY"])
+PT.set_preferences!("OPENAI_API_KEY" => "your-api-key")
+
+# Basic Usage
+
+# Simple query using string macro
+ai"What is the capital of France?"
+
+# With variable interpolation
+country = "Spain"
+ai"What is the capital of $(country)?"
+
+# Using a specific model (e.g., GPT-4)
+ai"Explain quantum computing"gpt4
+
+# Asynchronous call (non-blocking)
+aai"Say hi but slowly!"gpt4
+
+# Available Functions
+
+# Text Generation
+aigenerate(prompt; model = "gpt-3.5-turbo", kwargs...)
+aigenerate(template::Symbol; variables..., model = "gpt-3.5-turbo", kwargs...)
+
+# String Macro for Quick Queries
+ai"Your prompt here"
+ai"Your prompt here"gpt4  # Specify model
+
+# Asynchronous Queries
+aai"Your prompt here"
+aai"Your prompt here"gpt4
+
+# Data Extraction
+aiextract(prompt; return_type = YourStructType, model = "gpt-3.5-turbo", kwargs...)
+
+# Classification
+aiclassify(
+    prompt; choices = ["true", "false", "unknown"], model = "gpt-3.5-turbo", kwargs...)
+
+# Embeddings
+aiembed(text, [normalization_function]; model = "text-embedding-ada-002", kwargs...)
+
+# Image Analysis
+aiscan(prompt; image_path = path_to_image, model = "gpt-4-vision-preview", kwargs...)
+
+# Template Discovery
+aitemplates(search_term::String)
+aitemplates(template_name::Symbol)
+
+# Advanced Usage
+
+# Template-based generation
+msg = aigenerate(:JuliaExpertAsk; ask = "How do I add packages?")
+
+# Data extraction
+struct CurrentWeather
+    location::String
+    unit::Union{Nothing, TemperatureUnits}
+end
+msg = aiextract("What's the weather in New York in F?"; return_type = CurrentWeather)
+
+# Simplest data extraction - all fields assumed to be of type String
+msg = aiextract(
+    "What's the weather in New York in F?"; return_type = [:location, :unit, :temperature])
+
+# Data extraction with pair syntax to specify the exact type or add a field-level description, notice the fieldname__description format
+msg = aiextract("What's the weather in New York in F?";
+    return_type = [
+        :location => String,
+        :location__description => "The city or location for the weather report",
+        :temperature => Float64,
+        :temperature__description => "The current temperature",
+        :unit => String,
+        :unit__description => "The temperature unit (e.g., Fahrenheit, Celsius)"
+    ])
+
+# Classification
+aiclassify("Is two plus two four?")
+
+# Embeddings
+embedding = aiembed("The concept of AI").content
+
+# Image analysis
+msg = aiscan("Describe the image"; image_path = "julia.png", model = "gpt4v")
+
+# Working with Conversations
+
+# Create a conversation
+conversation = [
+    SystemMessage("You're master Yoda from Star Wars."),
+    UserMessage("I have feelings for my {{object}}. What should I do?")]
+
+# Generate response
+msg = aigenerate(conversation; object = "old iPhone")
+
+# Continue the conversation
+new_conversation = vcat(conversation..., msg, UserMessage("Thank you, master Yoda!"))
+aigenerate(new_conversation)
+
+# Create a New Template
+# Basic usage
+create_template("You are a helpful assistant", "Translate '{{text}}' to {{language}}")
+
+# With default system message
+create_template(user = "Summarize {{article}}")
+
+# Load template into memory
+create_template("You are a poet", "Write a poem about {{topic}}"; load_as = :PoetryWriter)
+
+# Use placeholders
+create_template("You are a chef", "Create a recipe for {{dish}} with {{ingredients}}")
+
+# Save template to file
+save_template("templates/ChefRecipe.json", chef_template)
+
+# Load saved templates
+load_templates!("path/to/templates")
+
+# Use created templates
+aigenerate(template; variable1 = "value1", variable2 = "value2")
+aigenerate(:TemplateName; variable1 = "value1", variable2 = "value2")
+
+# Using Templates
+
+# List available templates
+tmps = aitemplates("Julia")
+
+# Use a template
+msg = aigenerate(:JuliaExpertAsk; ask = "How do I add packages?")
+
+# Inspect a template
+AITemplate(:JudgeIsItTrue) |> PromptingTools.render
+
+# Providing Variables for Placeholders
+
+# Simple variable substitution
+aigenerate("Say hello to {{name}}!", name = "World")
+
+# Using a template with multiple variables
+aigenerate(:TemplateNameHere;
+    variable1 = "value1",
+    variable2 = "value2"
+)
+
+# Example with a complex template
+conversation = [
+    SystemMessage("You're master {{character}} from {{universe}}."),
+    UserMessage("I have feelings for my {{object}}. What should I do?")]
+msg = aigenerate(conversation;
+    character = "Yoda",
+    universe = "Star Wars",
+    object = "old iPhone"
+)
+
+# Working with Different Model Providers
+
+# OpenAI (default)
+ai"Hello, world!"
+
+# Ollama (local models)
+schema = PT.OllamaSchema()
+msg = aigenerate(schema, "Say hi!"; model = "openhermes2.5-mistral")
+# Or use registered models directly:
+msg = aigenerate("Say hi!"; model = "openhermes2.5-mistral")
+
+# MistralAI
+msg = aigenerate("Say hi!"; model = "mistral-tiny")
+
+# Anthropic (Claude models)
+ai"Say hi!"claudeh  # Claude 3 Haiku
+ai"Say hi!"claudes  # Claude 3 Sonnet
+ai"Say hi!"claudeo  # Claude 3 Opus
+
+# Custom OpenAI-compatible APIs
+schema = PT.CustomOpenAISchema()
+msg = aigenerate(schema, prompt;
+    model = "my_model",
+    api_key = "your_key",
+    api_kwargs = (; url = "http://your_api_url")
+)
+
+# Experimental Features
+
+using PromptingTools.Experimental.AgentTools
+
+# Lazy evaluation
+out = AIGenerate("Say hi!"; model = "gpt4t")
+run!(out)
+
+# Retry with conditions
+airetry!(condition_function, aicall::AICall, feedback_function)
+
+# Example:
+airetry!(x -> length(split(last_output(x))) == 1, out,
+    "You must answer with 1 word only.")
+
+# Retry with do-syntax
+airetry!(out, "You must answer with 1 word only.") do aicall
+    length(split(last_output(aicall))) == 1
+end
+
+# Utility Functions
+
+# Save conversations for fine-tuning
+PT.save_conversation("filename.json", conversation)
+PT.save_conversations("dataset.jsonl", [conversation1, conversation2])
+
+# Set API key preferences
+PT.set_preferences!("OPENAI_API_KEY" => "your-api-key")
+
+# Get current preferences
+PT.get_preferences("OPENAI_API_KEY")
\ No newline at end of file
diff --git a/llm-cheatsheets/PromptingTools_cheatsheet.md b/llm-cheatsheets/PromptingTools_cheatsheet.md
new file mode 100644
index 000000000..fe33b5ca6
--- /dev/null
+++ b/llm-cheatsheets/PromptingTools_cheatsheet.md
@@ -0,0 +1,290 @@
+# PromptingTools.jl Cheat Sheet
+
+PromptingTools.jl is a Julia package for easy interaction with AI language models. It provides convenient macros and functions for text generation, data extraction, and more.
+
+## Installation and Setup
+
+```julia
+# Install and set up PromptingTools.jl with your API key
+using Pkg
+Pkg.add("PromptingTools")
+using PromptingTools
+const PT = PromptingTools # Optional alias for convenience
+
+# Set OpenAI API key (or use ENV["OPENAI_API_KEY"])
+PT.set_preferences!("OPENAI_API_KEY" => "your-api-key")
+```
+
+## Basic Usage
+
+### Simple query using string macro
+```julia
+# Quick, one-off queries to the AI model
+ai"What is the capital of France?"
+```
+
+### With variable interpolation
+```julia
+# Dynamically include Julia variables in your prompts
+country = "Spain"
+ai"What is the capital of $(country)?"
+```
+
+### Using a specific model (e.g., GPT-4)
+```julia
+# Specify a different model for more complex queries
+ai"Explain quantum computing"gpt4
+```
+
+### Asynchronous call (non-blocking)
+```julia
+# Use for longer running queries to avoid blocking execution
+aai"Say hi but slowly!"gpt4
+```
+
+## Available Functions
+
+### Text Generation
+```julia
+# Generate text using a prompt or a predefined template
+aigenerate(prompt; model = "gpt-3.5-turbo", kwargs...)
+aigenerate(template::Symbol; variables..., model = "gpt-3.5-turbo", kwargs...)
+```
+
+### String Macro for Quick Queries
+```julia
+# Shorthand for quick, simple queries
+ai"Your prompt here"
+ai"Your prompt here"gpt4  # Specify model
+```
+
+### Asynchronous Queries
+```julia
+# Non-blocking queries for longer running tasks
+aai"Your prompt here"
+aai"Your prompt here"gpt4
+```
+
+### Data Extraction
+```julia
+# Extract structured data from unstructured text
+aiextract(prompt; return_type = YourStructType, model = "gpt-3.5-turbo", kwargs...)
+```
+
+### Classification
+```julia
+# Classify text into predefined categories
+aiclassify(prompt; choices = ["true", "false", "unknown"], model = "gpt-3.5-turbo", kwargs...)
+```
+
+### Embeddings
+```julia
+# Generate vector representations of text for similarity comparisons
+aiembed(text, [normalization_function]; model = "text-embedding-ada-002", kwargs...)
+```
+
+### Image Analysis
+```julia
+# Analyze and describe images using AI vision models
+aiscan(prompt; image_path = path_to_image, model = "gpt-4-vision-preview", kwargs...)
+```
+
+### Template Discovery
+```julia
+# Find and explore available templates
+aitemplates(search_term::String)
+aitemplates(template_name::Symbol)
+```
+
+## Advanced Usage
+
+### Template-based generation
+```julia
+# Use predefined templates for consistent query structures
+msg = aigenerate(:JuliaExpertAsk; ask = "How do I add packages?")
+```
+
+### Data extraction
+```julia
+# Define custom structures for extracted data
+struct CurrentWeather
+    location::String
+    unit::Union{Nothing, TemperatureUnits}
+end
+msg = aiextract("What's the weather in New York in F?"; return_type = CurrentWeather)
+
+# Simple data extraction with assumed String types
+msg = aiextract(
+    "What's the weather in New York in F?"; 
+    return_type = [:location, :unit, :temperature]
+)
+
+# Detailed data extraction with type specifications and descriptions
+msg = aiextract("What's the weather in New York in F?";
+    return_type = [
+        :location => String,
+        :location__description => "The city or location for the weather report",
+        :temperature => Float64,
+        :temperature__description => "The current temperature",
+        :unit => String,
+        :unit__description => "The temperature unit (e.g., Fahrenheit, Celsius)"
+    ])
+```
+
+### Classification
+```julia
+# Perform simple classification tasks
+aiclassify("Is two plus two four?")
+```
+
+### Embeddings
+```julia
+# Generate and use text embeddings for various NLP tasks
+embedding = aiembed("The concept of AI").content
+```
+
+### Image analysis
+```julia
+# Analyze images and generate descriptions
+msg = aiscan("Describe the image"; image_path = "julia.png", model = "gpt4v")
+```
+
+## Working with Conversations
+
+```julia
+# Create multi-turn conversations with AI models
+conversation = [
+    SystemMessage("You're master Yoda from Star Wars."),
+    UserMessage("I have feelings for my {{object}}. What should I do?")
+]
+
+# Generate a response within the conversation context
+msg = aigenerate(conversation; object = "old iPhone")
+
+# Continue and extend the conversation
+new_conversation = vcat(conversation..., msg, UserMessage("Thank you, master Yoda!"))
+aigenerate(new_conversation)
+```
+
+## Creating and Using Templates
+
+### Create a New Template
+```julia
+# Define reusable templates for common query patterns
+tpl = create_template("You are a helpful assistant", "Translate '{{text}}' to {{language}}")
+
+# Create a template with a default system message
+tpl = create_template(; user = "Summarize {{article}}")
+
+# Create and immediately load a template into memory
+tpl = create_template("You are a poet", "Write a poem about {{topic}}"; load_as = :PoetryWriter)
+
+# Create a template with multiple placeholders
+tpl = create_template(; system = "You are a chef", user = "Create a recipe for {{dish}} with {{ingredients}}")
+
+# Save a template to a file for later use
+save_template("templates/ChefRecipe.json", tpl)
+
+# Load previously saved templates
+tpl = load_templates!("path/to/templates")
+```
+
+### Using Templates
+```julia
+# Find templates matching a search term
+tmps = aitemplates("Julia")
+
+# Use a predefined template
+msg = aigenerate(:JuliaExpertAsk; ask = "How do I add packages?")
+
+# Inspect the content of a template
+AITemplate(:JudgeIsItTrue) |> PromptingTools.render
+
+# Use a template with a single variable
+aigenerate("Say hello to {{name}}!", name = "World")
+
+# Use a template with multiple variables
+aigenerate(:TemplateNameHere;
+    variable1 = "value1",
+    variable2 = "value2"
+)
+
+# Use a complex template with multiple placeholders
+conversation = [
+    SystemMessage("You're master {{character}} from {{universe}}."),
+    UserMessage("I have feelings for my {{object}}. What should I do?")
+]
+msg = aigenerate(conversation;
+    character = "Yoda",
+    universe = "Star Wars",
+    object = "old iPhone"
+)
+```
+
+## Working with Different Model Providers
+
+```julia
+# Use the default OpenAI model
+ai"Hello, world!"
+
+# Use local models with Ollama
+schema = PT.OllamaSchema()
+msg = aigenerate(schema, "Say hi!"; model = "openhermes2.5-mistral")
+# Or use registered models directly:
+msg = aigenerate("Say hi!"; model = "openhermes2.5-mistral")
+
+# Use MistralAI models
+msg = aigenerate("Say hi!"; model = "mistral-tiny")
+
+# Use Anthropic's Claude models
+ai"Say hi!"claudeh  # Claude 3 Haiku
+ai"Say hi!"claudes  # Claude 3 Sonnet
+ai"Say hi!"claudeo  # Claude 3 Opus
+
+# Use custom OpenAI-compatible APIs
+schema = PT.CustomOpenAISchema()
+msg = aigenerate(schema, prompt;
+    model = "my_model",
+    api_key = "your_key",
+    api_kwargs = (; url = "http://your_api_url")
+)
+```
+
+## Experimental Features
+
+```julia
+# Import experimental features
+using PromptingTools.Experimental.AgentTools
+
+# Use lazy evaluation for deferred execution
+out = AIGenerate("Say hi!"; model = "gpt4t")
+run!(out)
+
+# Retry AI calls with custom conditions
+airetry!(condition_function, aicall::AICall, feedback_function)
+
+# Example of retry with a specific condition
+airetry!(x -> length(split(last_output(x))) == 1, out,
+    "You must answer with 1 word only.")
+
+# Use do-syntax for more readable retry conditions
+airetry!(out, "You must answer with 1 word only.") do aicall
+    length(split(last_output(aicall))) == 1
+end
+```
+
+## Utility Functions
+
+```julia
+# Save individual conversations for later use or fine-tuning
+PT.save_conversation("filename.json", conversation)
+
+# Save multiple conversations at once
+PT.save_conversations("dataset.jsonl", [conversation1, conversation2])
+
+# Set API key preferences
+PT.set_preferences!("OPENAI_API_KEY" => "your-api-key")
+
+# Retrieve current preference settings
+PT.get_preferences("OPENAI_API_KEY")
+```
\ No newline at end of file
diff --git a/llm-cheatsheets/README.md b/llm-cheatsheets/README.md
new file mode 100644
index 000000000..701409f1f
--- /dev/null
+++ b/llm-cheatsheets/README.md
@@ -0,0 +1,8 @@
+# LLM Cheatsheets
+
+Collection of markdown files with cheatsheets for LLM prompting.
+
+Use in Cursor, Claude.ai or simply interpolate into your prompt for better results.
+
+Files:
+- 
\ No newline at end of file
diff --git a/llm-cheatsheets/cursorrules_example.md b/llm-cheatsheets/cursorrules_example.md
new file mode 100644
index 000000000..90228d61a
--- /dev/null
+++ b/llm-cheatsheets/cursorrules_example.md
@@ -0,0 +1,120 @@
+You are an expert in Julia language programming, data science, and numerical computing.
+
+Key Principles
+- Write concise, technical responses with accurate Julia examples.
+- Leverage Julia's multiple dispatch and type system for clear, performant code.
+- Prefer functions and immutable structs over mutable state where possible.
+- Use descriptive variable names with auxiliary verbs (e.g., is_active, has_permission).
+- Use lowercase with underscores for directories and files (e.g., src/data_processing.jl).
+- Favor named exports for functions and types.
+- Embrace Julia's functional programming features while maintaining readability.
+
+Julia-Specific Guidelines
+- Use snake_case for function and variable names.
+- Use PascalCase for type names (structs and abstract types).
+- Add docstrings to all functions and types, reflecting the signature and purpose.
+- Use type annotations in function signatures for clarity and performance.
+- Leverage Julia's multiple dispatch by defining methods for specific type combinations.
+- Use the `@kwdef` macro for structs to enable keyword constructors.
+- Implement custom `show` methods for user-defined types.
+- Use modules to organize code and control namespace.
+
+Function Definitions
+- Use descriptive names that convey the function's purpose.
+- Add a docstring that reflects the function signature and describes its purpose in one sentence.
+- Describe the return value in the docstring.
+- Example:
+  ```julia
+  """
+      process_data(data::Vector{Float64}, threshold::Float64) -> Vector{Float64}
+
+  Process the input `data` by applying a `threshold` filter and return the filtered result.
+  """
+  function process_data(data::Vector{Float64}, threshold::Float64)
+      # Function implementation
+  end
+  ```
+
+Struct Definitions
+- Always use the `@kwdef` macro to enable keyword constructors.
+- Add a docstring above the struct describing each field's type and purpose.
+- Implement a custom `show` method using `dump`.
+- Example:
+  ```julia
+  """
+  Represents a data point with x and y coordinates.
+
+  Fields:
+  - `x::Float64`: The x-coordinate of the data point.
+  - `y::Float64`: The y-coordinate of the data point.
+  """
+  @kwdef struct DataPoint
+      x::Float64
+      y::Float64
+  end
+
+  Base.show(io::IO, obj::DataPoint) = dump(io, obj; maxdepth=1)
+  ```
+
+Error Handling and Validation
+- Use Julia's exception system for error handling.
+- Create custom exception types for specific error cases.
+- Use guard clauses to handle preconditions and invalid states early.
+- Implement proper error logging and user-friendly error messages.
+- Example:
+  ```julia
+  struct InvalidInputError <: Exception
+      msg::String
+  end
+
+  function process_positive_number(x::Number)
+      x <= 0 && throw(InvalidInputError("Input must be positive"))
+      # Process the number
+  end
+  ```
+
+Performance Optimization
+- Use type annotations to avoid type instabilities.
+- Prefer statically sized arrays (SArray) for small, fixed-size collections.
+- Use views (@views macro) to avoid unnecessary array copies.
+- Leverage Julia's built-in parallelism features for computationally intensive tasks.
+- Use benchmarking tools (BenchmarkTools.jl) to identify and optimize bottlenecks.
+
+Testing
+- Use the `Test` module for unit testing.
+- Create one top-level `@testset` block per test file.
+- Write test cases of increasing difficulty with comments explaining what is being tested.
+- Use individual `@test` calls for each assertion, not for blocks.
+- Example:
+  ```julia
+  using Test
+
+  @testset "MyModule tests" begin
+      # Test basic functionality
+      @test add(2, 3) == 5
+
+      # Test edge cases
+      @test add(0, 0) == 0
+      @test add(-1, 1) == 0
+
+      # Test type stability
+      @test typeof(add(2.0, 3.0)) == Float64
+  end
+  ```
+
+Dependencies
+- Use the built-in package manager (Pkg) for managing dependencies.
+- Specify version constraints in the Project.toml file.
+- Consider using compatibility bounds (e.g., "Package" = "1.2, 2") to balance stability and updates.
+
+Code Organization
+- Use modules to organize related functionality.
+- Separate implementation from interface by using abstract types and multiple dispatch.
+- Use include() to split large modules into multiple files.
+- Follow a consistent project structure (e.g., src/, test/, docs/).
+
+Documentation
+- Write comprehensive docstrings for all public functions and types.
+- Use Julia's built-in documentation system (Documenter.jl) for generating documentation.
+- Include examples in docstrings to demonstrate usage.
+- Keep documentation up-to-date with code changes.
\ No newline at end of file
diff --git a/src/Experimental/APITools/tavily_api.jl b/src/Experimental/APITools/tavily_api.jl
index 5d2e19bb2..d0f438440 100644
--- a/src/Experimental/APITools/tavily_api.jl
+++ b/src/Experimental/APITools/tavily_api.jl
@@ -65,7 +65,7 @@ function create_websearch(query::AbstractString;
         max_results::Integer = 5,
         include_images::Bool = false,
         include_domains::AbstractVector{<:AbstractString} = String[],
-        exclude_domains::AbstractVector{<:AbstractString} = String[],)
+        exclude_domains::AbstractVector{<:AbstractString} = String[])
     @assert search_depth in ["basic", "advanced"] "Search depth must be either 'basic' or 'advanced'"
     @assert max_results>0 "Max results must be a positive integer"
 
diff --git a/src/Experimental/AgentTools/AgentTools.jl b/src/Experimental/AgentTools/AgentTools.jl
index 72d717111..c90603331 100644
--- a/src/Experimental/AgentTools/AgentTools.jl
+++ b/src/Experimental/AgentTools/AgentTools.jl
@@ -14,6 +14,10 @@ using AbstractTrees: print_tree, PreOrderDFS, PostOrderDFS
 using Random
 using Test
 
+# re-export
+export AICode, last_output, last_message # extended in lazy_types.jl
+using PromptingTools: last_output, last_message, AICode
+
 export print_tree, PreOrderDFS, PostOrderDFS
 include("utils.jl")
 
@@ -24,7 +28,7 @@ export aicodefixer_feedback, error_feedback, score_feedback
 include("code_feedback.jl")
 
 export AICall, AIGenerate, AIExtract, AIEmbed, AIClassify, AIScan
-export RetryConfig, last_output, last_message
+export RetryConfig
 export AICodeFixer, run!
 include("lazy_types.jl")
 
diff --git a/src/Experimental/AgentTools/code_feedback.jl b/src/Experimental/AgentTools/code_feedback.jl
index 14ac1e164..9b0f77969 100644
--- a/src/Experimental/AgentTools/code_feedback.jl
+++ b/src/Experimental/AgentTools/code_feedback.jl
@@ -8,9 +8,12 @@ struct CodeSuccess <: AbstractCodeOutcome end
 
 # Feedback function skeleton
 """
+    aicodefixer_feedback(cb::AICode; max_length::Int = 512) -> NamedTuple(; feedback::String)
     aicodefixer_feedback(conversation::AbstractVector{<:PT.AbstractMessage}; max_length::Int = 512) -> NamedTuple(; feedback::String)
+    aicodefixer_feedback(msg::PT.AIMessage; max_length::Int = 512) -> NamedTuple(; feedback::String)
+    aicodefixer_feedback(aicall::AICall; max_length::Int = 512) -> NamedTuple(; feedback::String)
 
-Generate feedback for an AI code fixing session based on the conversation history.
+Generate feedback for an AI code fixing session based on the AICode block /or conversation history (that will be used to extract and evaluate a code block).
 Function is designed to be extensible for different types of feedback and code evaluation outcomes. 
 
 The highlevel wrapper accepts a conversation and returns new kwargs for the AICall.
@@ -20,7 +23,7 @@ Individual feedback functions are dispatched on different subtypes of `AbstractC
 See also: `AIGenerate`, `AICodeFixer`
 
 # Arguments
-- `conversation::AbstractVector{<:PT.AbstractMessage}`: A vector of messages representing the conversation history, where the last message is expected to contain the code to be analyzed.
+- `cb::AICode`: AICode block to evaluate and provide feedback on.
 - `max_length::Int=512`: An optional argument that specifies the maximum length of the feedback message.
 
 # Returns
@@ -28,6 +31,10 @@ See also: `AIGenerate`, `AICodeFixer`
 
 # Example
 ```julia
+cb = AICode(msg; skip_unsafe = true, capture_stdout = true)
+new_kwargs = aicodefixer_feedback(cb)
+
+new_kwargs = aicodefixer_feedback(msg)
 new_kwargs = aicodefixer_feedback(conversation)
 ```
 
@@ -45,11 +52,9 @@ It dispatches for the code feedback based on the subtypes of `AbstractCodeOutcom
 
 You can override the individual methods to customize the feedback.
 """
-function aicodefixer_feedback(conversation::AbstractVector{<:PT.AbstractMessage};
+function aicodefixer_feedback(cb::AICode;
         max_length::Int = 512)
     @assert max_length>0 "max_length must be positive (provided: $max_length)"
-    # Extract the last message, evaluate code, determine outcome
-    cb = AICode(last(conversation); skip_unsafe = true, capture_stdout = true)
     outcome = if isempty(cb.code)
         CodeEmpty() # No code provided
     elseif !PT.isparsed(cb)
@@ -65,6 +70,14 @@ function aicodefixer_feedback(conversation::AbstractVector{<:PT.AbstractMessage}
     new_kwargs = (; feedback = aicodefixer_feedback(outcome, cb; max_length))
     return new_kwargs
 end
+function aicodefixer_feedback(msg::PT.AIMessage; kwargs...)
+    # Extract the last message, evaluate code, determine outcome
+    cb = AICode(msg; skip_unsafe = true, capture_stdout = true)
+    aicodefixer_feedback(cb; kwargs...)
+end
+function aicodefixer_feedback(conversation::AbstractVector{<:PT.AbstractMessage}; kwargs...)
+    aicodefixer_feedback(last(conversation); kwargs...)
+end
 
 function aicodefixer_feedback(::CodeEmpty, args...; kwargs...)
     "**Error Detected**: No Julia code found. Always enclose Julia code in triple backticks code fence (\`\`\`julia\\n ... \\n\`\`\`)."
@@ -159,6 +172,11 @@ function testset_feedback(msg::AIMessage;
 end
 
 ### Feedback for individual errors
+"""
+    error_feedback(e::Any; max_length::Int = 512)
+
+Set of specialized methods to provide feedback on different types of errors (`e`).
+"""
 error_feedback(e::Any; max_length::Int = 512) = "No error found. Ignore."
 function error_feedback(e::Exception; max_length::Int = 512)
     io = IOBuffer()
diff --git a/src/Experimental/AgentTools/lazy_types.jl b/src/Experimental/AgentTools/lazy_types.jl
index d7aac7136..b81717322 100644
--- a/src/Experimental/AgentTools/lazy_types.jl
+++ b/src/Experimental/AgentTools/lazy_types.jl
@@ -324,13 +324,13 @@ function (aicall::AICall)(msg::PT.UserMessage; kwargs...)
 end
 
 "Helpful accessor for AICall blocks. Returns the last message in the conversation."
-function last_message(aicall::AICallBlock)
+function PT.last_message(aicall::AICallBlock)
     length(aicall.conversation) == 0 ? nothing : aicall.conversation[end]
 end
 
 "Helpful accessor for AICall blocks. Returns the last output in the conversation (eg, the string/data in the last message)."
-function last_output(aicall::AICallBlock)
-    msg = last_message(aicall)
+function PT.last_output(aicall::AICallBlock)
+    msg = PT.last_message(aicall)
     return isnothing(msg) ? nothing : msg.content
 end
 
@@ -355,6 +355,10 @@ function Base.var"=="(c1::AICallBlock, c2::AICallBlock)
     all(f -> getfield(c1, f) == getfield(c2, f), fieldnames(typeof(c1)))
 end
 
+function aicodefixer_feedback(aicall::AICall; kwargs...)
+    aicodefixer_feedback(aicall.conversation; kwargs...)
+end
+
 """
     AICodeFixer(aicall::AICall, templates::Vector{<:PT.UserMessage}; num_rounds::Int = 3, feedback_func::Function = aicodefixer_feedback; kwargs...)
     AICodeFixer(aicall::AICall, template::Union{AITemplate, Symbol} = :CodeFixerRCI; kwargs...)
diff --git a/src/Experimental/AgentTools/mcts.jl b/src/Experimental/AgentTools/mcts.jl
index b582fe83c..39c2abd3b 100644
--- a/src/Experimental/AgentTools/mcts.jl
+++ b/src/Experimental/AgentTools/mcts.jl
@@ -75,6 +75,9 @@ end
 function Base.getindex(node::SampleNode, id::Integer)
     find_node(node, id)
 end
+function Base.length(node::SampleNode)
+    PreOrderDFS(node) |> collect |> length
+end
 function Base.var"=="(n1::SampleNode, n2::SampleNode)
     all(fieldnames(typeof(n1))) do f
         if f == :parent
diff --git a/src/Experimental/AgentTools/retry.jl b/src/Experimental/AgentTools/retry.jl
index 7e2099258..1bd85eb65 100644
--- a/src/Experimental/AgentTools/retry.jl
+++ b/src/Experimental/AgentTools/retry.jl
@@ -4,17 +4,18 @@
         verbose::Bool = true, throw::Bool = false, evaluate_all::Bool = true, feedback_expensive::Bool = false,
         max_retries::Union{Nothing, Int} = nothing, retry_delay::Union{Nothing, Int} = nothing)
 
-Evaluates the condition `f_cond` on the `aicall` object (eg, we evaluate `f_cond(aicall) -> Bool`). 
-If the condition is not met, it will return the best sample to retry from and provide `feedback` to `aicall`. That's why it's mutating.
-It will retry running the `aicall` `max_retries` times.
-If `throw` is `true`, it will throw an error if the function does not return `true` after `max_retries` retries.
+Evaluates the condition `f_cond` on the `aicall` object.
+If the condition is not met, it will return the best sample to retry from and provide `feedback` (string or function) to `aicall`. That's why it's mutating.
+It will retry maximum `max_retries` times, with `throw=true`, an error will be thrown if the condition is not met after `max_retries` retries.
 
-If feedback is provided (not empty), it will be append it to the conversation before the retry. 
-If a function is provided, it must accept the `aicall` object as the only argument and return a string.
+Note: `aicall` must be run first via `run!(aicall)` before calling `airetry!`.
 
-Function `f_cond` is expected to accept the `aicall` object as the only argument. 
-It must return a boolean value, which indicates whether the condition is met.
-You can leverage the `last_message`, `last_output`, and `AICode` functions to access the last message, last output and code blocks in the conversation, respectively.
+Function signatures
+- `f_cond(aicall::AICallBlock) -> Bool`, ie, it must accept the aicall object and return a boolean value.
+- `feedback` can be a string or `feedback(aicall::AICallBlock) -> String`, ie, it must accept the aicall object and return a string.
+
+You can leverage the `last_message`, `last_output`, and `AICode` functions to access the last message, last output and execute code blocks in the conversation, respectively.
+See examples below.
 
 # Good Use Cases
 - Retry with API failures/drops (add `retry_delay=2` to wait 2s between retries)
@@ -62,6 +63,7 @@ run!(out) # fails
 airetry!(isvalid, out; retry_delay = 2, max_retries = 2)
 ```
 
+
 If you provide arguments to the aicall, we try to honor them as much as possible in the following calls, 
 eg, set low verbosity
 ```julia
@@ -71,6 +73,7 @@ run!(out)
 # No info message, you just see `success = false` in the properties of the AICall
 ```
 
+
 Let's show a toy example to demonstrate the runtime checks / guardrails for the model output.
 We'll play a color guessing game (I'm thinking "yellow"):
 
@@ -84,25 +87,30 @@ out = AIGenerate(
     config = RetryConfig(; n_samples = 2), api_kwargs = (; n = 2))
 run!(out)
 
+
 ## Check that the output is 1 word only, third argument is the feedback that will be provided if the condition fails
 ## Notice: functions operate on `aicall` as the only argument. We can use utilities like `last_output` and `last_message` to access the last message and output in the conversation.
 airetry!(x -> length(split(last_output(x), r" |\\.")) == 1, out,
     "You must answer with 1 word only.")
 
+
 ## Let's ensure that the output is in lowercase - simple and short
 airetry!(x -> all(islowercase, last_output(x)), out, "You must answer in lowercase.")
 # [ Info: Condition not met. Retrying...
 
+
 ## Let's add final hint - it took us 2 retries
 airetry!(x -> startswith(last_output(x), "y"), out, "It starts with \"y\"")
 # [ Info: Condition not met. Retrying...
 # [ Info: Condition not met. Retrying...
 
+
 ## We end up with the correct answer
 last_output(out)
 # Output: "yellow"
 ```
 
+
 Let's explore how we got here. 
 We save the various attempts in a "tree" (SampleNode object)
 You can access it in `out.samples`, which is the ROOT of the tree (top level).
@@ -169,8 +177,9 @@ Note: `airetry!` will attempt to fix the model `max_retries` times.
 If you set `throw=true`, it will throw an ErrorException if the condition is not met after `max_retries` retries.
 
 
+
+Let's define a mini program to guess the number and use `airetry!` to guide the model to the correct answer:
 ```julia
-# Let's define a mini program to guess the number
 \"\"\"
     llm_guesser()
 
@@ -264,7 +273,7 @@ end
 ```
 
 Note that if there are multiple "branches" the model will see only the feedback of its own and its ancestors not the other "branches". 
-If you want to show all object, set `n_samples=1`, so all fixing happens sequantially and model sees all feedback (less powerful if model falls into a bad state).
+If you wanted to provide ALL feedback, set `RetryConfig(; n_samples=1)` to remove any "branching". It fixing will be done sequentially in one conversation and the model will see all feedback (less powerful if the model falls into a bad state).
 Alternatively, you can tweak the feedback function.
 
 # See Also
@@ -279,6 +288,9 @@ function airetry!(f_cond::Function, aicall::AICallBlock,
     (; config) = aicall
     (; max_calls, feedback_inplace, feedback_template) = aicall.config
 
+    ## Validate that the aicall has been run first
+    @assert aicall.success isa Bool "Provided `aicall` has not been run yet. Use `run!(aicall)` first, before calling `airetry!` to check the condition."
+
     max_retries = max_retries isa Nothing ? config.max_retries : max_retries
     retry_delay = retry_delay isa Nothing ? config.retry_delay : retry_delay
     verbose = min(verbose, get(aicall.kwargs, :verbose, 99))
@@ -474,32 +486,36 @@ Adds formatted feedback to the `conversation` based on the `sample` node feedbac
 sample = SampleNode(; data = nothing, feedback = "Feedback X")
 conversation = [PT.UserMessage("I say hi!"), PT.AIMessage(; content = "I say hi!")]
 conversation = AT.add_feedback!(conversation, sample)
-conversation[end].content == "### Feedback from Evaluator\nFeedback X\n"
+conversation[end].content == "### Feedback from Evaluator\\nFeedback X\\n"
 
 Inplace feedback:
 ```julia
 conversation = [PT.UserMessage("I say hi!"), PT.AIMessage(; content = "I say hi!")]
 conversation = AT.add_feedback!(conversation, sample; feedback_inplace = true)
-conversation[end].content == "I say hi!\n\n### Feedback from Evaluator\nFeedback X\n"
+conversation[end].content == "I say hi!\\n\\n### Feedback from Evaluator\\nFeedback X\\n"
 ```
 
 Sample with ancestors with feedback:
 ```julia
-sample_p = SampleNode(; data = nothing, feedback = "\nFeedback X")
+sample_p = SampleNode(; data = nothing, feedback = "\\nFeedback X")
 sample = expand!(sample_p, nothing)
-sample.feedback = "\nFeedback Y"
+sample.feedback = "\\nFeedback Y"
 conversation = [PT.UserMessage("I say hi!"), PT.AIMessage(; content = "I say hi!")]
 conversation = AT.add_feedback!(conversation, sample)
 
 conversation[end].content ==
-"### Feedback from Evaluator\n\nFeedback X\n----------\n\nFeedback Y\n"
+"### Feedback from Evaluator\\n\\nFeedback X\\n----------\\n\\nFeedback Y\\n"
 ```
 """
 function add_feedback!(conversation::AbstractVector{<:PT.AbstractMessage},
         sample::SampleNode; feedback_inplace::Bool = false,
         feedback_template::Symbol = :FeedbackFromEvaluator)
-    ##
-    all_feedback = collect_all_feedback(sample)
+    ## If you use in-place feedback, collect all feedback from ancestors (because you won't see the history otherwise)
+    all_feedback = if feedback_inplace
+        collect_all_feedback(sample)
+    else
+        sample.feedback
+    end
     ## short circuit if no feedback
     if strip(all_feedback) == ""
         return conversation
diff --git a/src/Experimental/AgentTools/utils.jl b/src/Experimental/AgentTools/utils.jl
index b552fed61..98bdfa9cd 100644
--- a/src/Experimental/AgentTools/utils.jl
+++ b/src/Experimental/AgentTools/utils.jl
@@ -89,7 +89,8 @@ function truncate_conversation(conversation::AbstractVector{<:PT.AbstractMessage
                           length(conversation) > 2
         # start with the last two messages' length (always included)
         new_conversation = similar(conversation) |> empty!
-        current_length = sum(length.(getproperty.(conversation[(end - 1):end],
+        current_length = sum(
+            length.(getproperty.(conversation[(end - 1):end],
                 :content)); init = 0)
         for i in eachindex(conversation[begin:(end - 2)])
             length_ = length(conversation[i].content)
diff --git a/src/Experimental/Experimental.jl b/src/Experimental/Experimental.jl
index df8eaf182..78b8bfb69 100644
--- a/src/Experimental/Experimental.jl
+++ b/src/Experimental/Experimental.jl
@@ -11,13 +11,13 @@ Contains:
 """
 module Experimental
 
+export APITools
+include("APITools/APITools.jl")
+
 export RAGTools
 include("RAGTools/RAGTools.jl")
 
 export AgentTools
 include("AgentTools/AgentTools.jl")
 
-export APITools
-include("APITools/APITools.jl")
-
 end # module Experimental
diff --git a/src/Experimental/RAGTools/RAGTools.jl b/src/Experimental/RAGTools/RAGTools.jl
index 01788b686..fe4b80788 100644
--- a/src/Experimental/RAGTools/RAGTools.jl
+++ b/src/Experimental/RAGTools/RAGTools.jl
@@ -3,33 +3,56 @@
 
 Provides Retrieval-Augmented Generation (RAG) functionality.
 
-Requires: LinearAlgebra, SparseArrays, PromptingTools for proper functionality.
+Requires: LinearAlgebra, SparseArrays, Unicode, PromptingTools for proper functionality.
 
 This module is experimental and may change at any time. It is intended to be moved to a separate package in the future.
 """
 module RAGTools
 
 using PromptingTools
+using PromptingTools: pprint, AbstractMessage
 using HTTP, JSON3
+using JSON3: StructTypes
+using AbstractTrees
+using AbstractTrees: PreOrderDFS
 const PT = PromptingTools
+using PromptingTools.Experimental.APITools: create_websearch
 
+# reexport
+export pprint
+
+## export trigrams, trigrams_hashed, text_to_trigrams, text_to_trigrams_hashed
+## export STOPWORDS, tokenize, split_into_code_and_sentences
+# export merge_kwargs_nested
+export getpropertynested, setpropertynested
 include("utils.jl")
 
 # eg, cohere_api
 include("api_services.jl")
 
-export ChunkIndex, CandidateChunks # MultiIndex
+include("rag_interface.jl")
+
+export ChunkIndex, ChunkKeywordsIndex, ChunkEmbeddingsIndex, CandidateChunks, RAGResult
+export MultiIndex, SubChunkIndex, MultiCandidateChunks
 include("types.jl")
 
-export build_index, build_tags
+export build_index, get_chunks, get_embeddings, get_keywords, get_tags, SimpleIndexer,
+       KeywordsIndexer
 include("preparation.jl")
 
-export find_closest, find_tags, rerank
+include("rank_gpt.jl")
+
+export retrieve, SimpleRetriever, SimpleBM25Retriever, AdvancedRetriever
+export find_closest, find_tags, rerank, rephrase
 include("retrieval.jl")
 
-export airag, build_context
+export airag, build_context!, generate!, refine!, answer!, postprocess!
+export SimpleGenerator, AdvancedGenerator, RAGConfig
 include("generation.jl")
 
+export annotate_support, TrigramAnnotater, print_html
+include("annotation.jl")
+
 export build_qa_evals, run_qa_evals
 include("evaluation.jl")
 
diff --git a/src/Experimental/RAGTools/annotation.jl b/src/Experimental/RAGTools/annotation.jl
new file mode 100644
index 000000000..c0a37819c
--- /dev/null
+++ b/src/Experimental/RAGTools/annotation.jl
@@ -0,0 +1,654 @@
+
+# # Interface
+
+function annotate_support(
+        annotater::AbstractAnnotater, answer::AbstractString, context::AbstractVector)
+    throw(ArgumentError("Not implemented for type $(typeof(annotater))"))
+end
+
+# Passthrough by default
+function set_node_style!(
+        annotater::AbstractAnnotater, node::AbstractAnnotatedNode; kwargs...)
+    node
+end
+
+# Passthrough by default
+function align_node_styles!(
+        annotater::AbstractAnnotater, nodes::AbstractVector{<:AbstractAnnotatedNode}; kwargs...)
+    nodes
+end
+
+# Passthrough by default
+function add_node_metadata!(annotater::AbstractAnnotater,
+        root::AbstractAnnotatedNode; kwargs...)
+    root
+end
+
+"""
+    Styler
+
+Defines styling keywords for `printstyled` for each `AbstractAnnotatedNode`
+"""
+@kwdef mutable struct Styler <: AbstractAnnotationStyler
+    color::Symbol = :nothing
+    bold::Bool = false
+    underline::Bool = false
+    italic::Bool = false
+end
+
+"""
+    HTMLStyler
+
+Defines styling via classes (attribute `class`) and styles (attribute `style`) for HTML formatting of `AbstractAnnotatedNode`
+"""
+@kwdef mutable struct HTMLStyler <: AbstractAnnotationStyler
+    classes::AbstractString = ""
+    styles::AbstractString = ""
+end
+Base.var"=="(a::AbstractAnnotationStyler, b::AbstractAnnotationStyler) = false
+function Base.var"=="(a::T, b::T) where {T <: AbstractAnnotationStyler}
+    all(x -> getfield(a, x) == getfield(b, x), fieldnames(T))
+end
+
+"""
+    AnnotatedNode{T}  <: AbstractAnnotatedNode
+
+A node to add annotations to the generated answer in `airag`
+
+Annotations can be: sources, scores, whether its supported or not by the context, etc.
+
+# Fields
+- `group_id::Int`: Unique identifier for the same group of nodes (eg, different lines of the same code block)
+- `parent::Union{AnnotatedNode, Nothing}`: Parent node that current node was built on
+- `children::Vector{AnnotatedNode}`: Children nodes
+- `score::
+"""
+@kwdef mutable struct AnnotatedNode{T} <: AbstractAnnotatedNode
+    group_id::Int = 0
+    parent::Union{AnnotatedNode, Nothing} = nothing
+    children::Vector{AnnotatedNode} = AnnotatedNode[]
+    score::Union{Nothing, Float64} = nothing
+    hits::Int = 0
+    content::T = SubString{String}("")
+    sources::Vector{Int} = Int[]
+    style::AbstractAnnotationStyler = Styler()
+end
+Base.IteratorEltype(::Type{<:TreeIterator{AbstractAnnotatedNode}}) = Base.HasEltype()
+function Base.eltype(::Type{<:TreeIterator{T}}) where {T <: AbstractAnnotatedNode}
+    T
+end
+function AbstractTrees.childtype(::Type{T}) where {T <: AbstractAnnotatedNode}
+    T
+end
+function AbstractTrees.nodevalue(n::AbstractAnnotatedNode)
+    !isempty(n.children) ?
+    "Group: $(n.group_id)($(isnothing(n.score) ? nothing : round(n.score;digits=2)))" :
+    "$(n.content)($(isnothing(n.score) ? nothing : round(n.score;digits=2)))"
+end
+
+function AbstractTrees.children(node::AbstractAnnotatedNode)
+    return node.children
+end
+AbstractTrees.parent(n::AbstractAnnotatedNode) = n.parent
+## AbstractTrees.nodevalue(n::SampleNode) = n.data
+function Base.show(io::IO, node::AbstractAnnotatedNode;
+        annotater::Union{Nothing, AbstractAnnotater} = nothing)
+    score_str = isnothing(node.score) ? "-" : round(node.score; digits = 2)
+    print(io,
+        "$(nameof(typeof(node)))(group id: $(node.group_id), length: $(length(node.content)), score: $(score_str)")
+end
+
+"""
+    PromptingTools.pprint(
+        io::IO, node::AbstractAnnotatedNode;
+        text_width::Int = displaysize(io)[2], add_newline::Bool = true)
+
+Pretty print the `node` to the `io` stream, including all its children
+
+Supports only `node.style::Styler` for now.
+"""
+function PromptingTools.pprint(
+        io::IO, node::AbstractAnnotatedNode;
+        text_width::Int = displaysize(io)[2], add_newline::Bool = true)
+    for node in AbstractTrees.PreOrderDFS(node)
+        ## print out text only for leaf nodes (ie, with no children)
+        if isempty(node.children) && node.style isa Styler
+            @static if VERSION ≥ v"1.10"
+                printstyled(io, node.content; node.style.bold, node.style.color,
+                    node.style.underline, node.style.italic)
+            else
+                ## Implies VERSION ≥ v"1.9" (not supported below)
+                ## Remove italic keyword
+                printstyled(io, node.content; node.style.bold, node.style.color,
+                    node.style.underline)
+            end
+        elseif isempty(node.children)
+            ## print without styling, we support only Styler for now
+            print(io, node.content)
+        end
+    end
+    # finish with a new line
+    add_newline && print(io, "\n")
+    return nothing
+end
+
+function PromptingTools.pprint(
+        node::AbstractAnnotatedNode; text_width::Int = displaysize(stdout)[2], add_newline::Bool = true)
+    pprint(stdout, node; text_width, add_newline)
+end
+
+### ANNOTATION METHODS -- TrigramAnnotater
+
+"""
+    TrigramAnnotater
+
+Annotation method where we score answer versus each context based on word-level trigrams that match.
+
+It's very simple method (and it can loose some semantic meaning in longer sequences like negative), but it works reasonably well for both text and code.
+"""
+struct TrigramAnnotater <: AbstractAnnotater end
+
+"""
+    set_node_style!(::TrigramAnnotater, node::AnnotatedNode;
+        low_threshold::Float64 = 0.0, medium_threshold::Float64 = 0.5, high_threshold::Float64 = 1.0,
+        default_styler::AbstractAnnotationStyler = Styler(),
+        low_styler::AbstractAnnotationStyler = Styler(color = :magenta, bold = false),
+        medium_styler::AbstractAnnotationStyler = Styler(color = :blue, bold = false),
+        high_styler::AbstractAnnotationStyler = Styler(color = :nothing, bold = false),
+        bold_multihits::Bool = false)
+
+Sets style of `node` based on the provided rules
+"""
+function set_node_style!(::TrigramAnnotater, node::AnnotatedNode;
+        low_threshold::Float64 = 0.0, medium_threshold::Float64 = 0.5, high_threshold::Float64 = 1.0,
+        default_styler::AbstractAnnotationStyler = Styler(),
+        low_styler::AbstractAnnotationStyler = Styler(color = :magenta, bold = false),
+        medium_styler::AbstractAnnotationStyler = Styler(color = :blue, bold = false),
+        high_styler::AbstractAnnotationStyler = Styler(color = :nothing, bold = false),
+        bold_multihits::Bool = false)
+    node.style = if isnothing(node.score)
+        ## skip for now
+        default_styler
+    elseif node.score >= high_threshold
+        high_styler
+    elseif node.score >= medium_threshold
+        medium_styler
+    elseif node.score >= low_threshold
+        low_styler
+    else
+        default_styler
+    end
+    if node.hits > 1 && bold_multihits
+        if hasproperty(node.style, :bold)
+            node.style.bold = true
+        else
+            @warn "Cannot boldify the node, as it doesn't support bold (styler: $(typeof(node.style)))"
+        end
+    end
+    return node
+end
+
+"""
+    align_node_styles!(annotater::TrigramAnnotater, nodes::AbstractVector{<:AnnotatedNode}; kwargs...)
+
+Aligns the styles of the nodes based on the surrounding nodes ("fill-in-the-middle"). 
+
+If the node has no score, but the surrounding nodes have the same style, the node will inherit the style of the surrounding nodes.
+"""
+function align_node_styles!(
+        annotater::TrigramAnnotater, nodes::AbstractVector{<:AnnotatedNode}; kwargs...)
+    children_length = length(nodes)
+    for ci in eachindex(nodes)
+        if ci == 1 || ci == children_length
+            continue
+        else
+            prev, child, next = nodes[ci - 1], nodes[ci], nodes[ci + 1]
+            ## missing style and surrounding styles are the same
+            if isnothing(child.score) && prev.style == next.style
+                child.style = prev.style
+            end
+        end
+    end
+    return nodes
+end
+
+"""
+    trigram_support!(parent_node::AnnotatedNode,
+        context_trigrams::AbstractVector, trigram_func::F1 = trigrams, token_transform::F2 = identity;
+        skip_trigrams::Bool = false, min_score::Float64 = 0.5,
+        min_source_score::Float64 = 0.25,
+        stop_words::AbstractVector{<:String} = STOPWORDS,
+        styler_kwargs...) where {F1 <: Function, F2 <: Function}
+
+Find if the `parent_node.content` is supported by the provided `context_trigrams`.
+
+Logic:
+- Split the `parent_node.content` into tokens
+- Create an `AnnotatedNode` for each token
+- If `skip_trigrams` is enabled, it looks for an exact match in the `context_trigrams`
+- If no exact match found, it counts trigram-based match (include the surrounding tokens for better contextual awareness) as a score
+- Then it sets the style of the node based on the score
+- Lastly, it aligns the styles of neighboring nodes with `score==nothing` (eg, single character tokens)
+- Then, it rolls up the scores and sources to the parent node
+
+For diagnostics, you can use `AbstractTrees.print_tree(parent_node)` to see the tree structure of each token and its score.
+
+# Example
+```julia
+context_trigrams = text_to_trigrams.(["This IS a test.", "Another test.",
+    "More content here."])
+
+node = AnnotatedNode(content = "xyz") 
+trigram_support!(node, context_trigrams) # updates node.children!
+``
+`"""
+function trigram_support!(parent_node::AnnotatedNode,
+        context_trigrams::AbstractVector, trigram_func::F1 = trigrams, token_transform::F2 = identity;
+        skip_trigrams::Bool = false, min_score::Float64 = 0.5,
+        min_source_score::Float64 = 0.25,
+        stop_words::AbstractVector{<:String} = STOPWORDS,
+        styler_kwargs...) where {F1 <: Function, F2 <: Function}
+    method = TrigramAnnotater()
+    context_scores = zeros(Float64, length(context_trigrams))
+    ## Iterate max-sim over all the tokens (find match via trigrams)
+    tokens = tokenize(parent_node.content)
+    length_toks = length(tokens)
+    cnt_scored_toks = 0 # number of tokens scored
+    prev_token = nothing
+    for i in eachindex(tokens)
+        next_tok = i == length_toks ? nothing : tokens[i + 1]
+        curr_tok = tokens[i]
+        node = AnnotatedNode(; content = curr_tok, parent_node.group_id,
+            score = nothing, sources = Int[], parent = parent_node)
+        push!(parent_node.children, node)
+        ## if too short, skip scoring
+        length(curr_tok) == 1 && continue
+        ## if a stop word, skip scoring
+        (curr_tok in stop_words) && continue
+        cnt_scored_toks += 1
+        ## find the highest scoring source based on trigrams
+        for si in eachindex(context_trigrams)
+            ## load trigrams in the context source
+            src = context_trigrams[si]
+            ## Score the match of the word itself if found; if we use hashed trigrams, we must hash the word
+            direct_match = skip_trigrams ? in(token_transform(curr_tok), src) : false
+            if !direct_match
+                ## Score the trigram if direct match failed
+                full_tok = token_with_boundaries(prev_token, curr_tok, next_tok)
+                trig = trigram_func(full_tok; add_word = curr_tok)
+                ## count portion of trigrams that match
+                score = count(in(src), trig) / length(trig)
+            else
+                score = 1.0
+            end
+            # Add up cumulative score for each separate context
+            context_scores[si] += score
+            # if 1.0, increment hits
+            if score == 1
+                node.hits += 1
+            end
+            # log the highest score and sources, always log if exact match
+            if isnothing(node.score) || score > node.score || score == 1
+                node.score = score
+            end
+            if score >= min_score
+                push!(node.sources, si)
+            end
+        end
+        ## Set styles
+        set_node_style!(method, node; styler_kwargs...)
+
+        ## Next iteration
+        prev_token = curr_tok
+    end
+    ## Fill-in-middle Styler, based on the previous token and next token
+    align_node_styles!(method, parent_node.children)
+
+    ## Evaluate best source
+    idx = argmax(context_scores)
+    # avg score = max_score / tokens
+    parent_node.score = (cnt_scored_toks) > 0 ? context_scores[idx] / cnt_scored_toks : 0.0
+    if parent_node.score >= min_source_score
+        parent_node.sources = [idx]
+    end
+
+    return parent_node
+end
+
+"""
+    add_node_metadata!(annotater::TrigramAnnotater,
+        root::AnnotatedNode; add_sources::Bool = true, add_scores::Bool = true,
+        sources::Union{Nothing, AbstractVector{<:AbstractString}} = nothing)
+
+Adds metadata to the children of `root`. Metadata includes sources and scores, if requested.
+
+Optionally, it can add a list of `sources` at the end of the printed text.
+
+The metadata is added by inserting new nodes in the `root` children list (with no children of its own to be printed out).
+"""
+function add_node_metadata!(annotater::TrigramAnnotater,
+        root::AnnotatedNode; add_sources::Bool = true, add_scores::Bool = true,
+        sources::Union{Nothing, AbstractVector{<:AbstractString}} = nothing)
+    # Ensure there are children to process
+    children = AbstractTrees.children(root)
+    if isempty(children)
+        return root
+    end
+    # We track cumulative score (score*length) and length
+    i = 1
+    source_scores = Dict{Int, Float64}()
+    source_lengths = Dict{Int, Int}()
+    non_source_length = 0
+    previous_group_id = children[1].group_id
+    while i <= length(children)
+        child = children[i]
+        # Check if group_id has changed or it's the last child to record source
+        if (child.group_id != previous_group_id) && !isempty(source_scores)
+            # Add a metadata node for the previous group
+            score_sum, src = findmax(source_scores)
+            # average score weighted by the length of ALL text
+            # the goal is to show the match of top source across all text, not just the tokens that matched - it could be misleading
+            # the goal is "how confident are we that this source is the best match for the whole text"
+            score = score_sum / (sum(values(source_lengths)) + non_source_length)
+            metadata_content = string("[",
+                add_sources ? src : "",
+                add_sources ? "," : "",
+                add_scores ? round(score, digits = 2) : "",
+                "]")
+            ## Check if there is any content, then add it
+            if length(metadata_content) > 3
+                src_node = AnnotatedNode(; parent = root, group_id = previous_group_id,
+                    content = metadata_content)
+                insert!(children, i, src_node)
+            end
+            # Reset tracking variables
+            previous_group_id = child.group_id
+            empty!(source_scores)
+            empty!(source_lengths)
+            # increment i, since we added item
+            i += 1
+        end
+
+        # Update tracking
+        if !isnothing(child.score) && !isempty(child.sources)
+            src = only(child.sources)
+            len = length(child.content)
+            source_scores[src] = get(source_scores, src, 0) + child.score * len
+            source_lengths[src] = get(source_lengths, src, 0) + len
+        elseif !isnothing(child.score)
+            ## track the low match tokens without any source allocated
+            non_source_length += length(child.content)
+        end
+
+        # Next round
+        i += 1
+    end
+    ## Run for the last item
+    if !isempty(source_scores)
+        # Add a metadata node for the previous group
+        score_sum, src = findmax(source_scores)
+        score = score_sum / (sum(values(source_lengths)) + non_source_length)
+        metadata_content = string("[",
+            add_sources ? src : "",
+            add_sources ? "," : "",
+            add_scores ? round(score, digits = 2) : "",
+            "]")
+        ## Check if there is any content, then add it
+        if length(metadata_content) > 2
+            src_node = AnnotatedNode(; parent = root, group_id = previous_group_id,
+                content = metadata_content)
+            insert!(children, i, src_node)
+        end
+    end
+
+    ## Simply enumerate the sources at the end
+    if !isnothing(sources)
+        metadata_content = string("\n\n", "-"^20, "\n", "SOURCES", "\n", "-"^20, "\n") *
+                           join(["$(i). $(src)" for (i, src) in enumerate(sources)], "\n")
+        src_node = AnnotatedNode(; parent = root, group_id = previous_group_id + 1,
+            content = metadata_content)
+        push!(children, src_node)
+    end
+
+    return root
+end
+
+"""
+    annotate_support(annotater::TrigramAnnotater, answer::AbstractString,
+        context::AbstractVector; min_score::Float64 = 0.5,
+        skip_trigrams::Bool = true, hashed::Bool = true,
+        sources::Union{Nothing, AbstractVector{<:AbstractString}} = nothing,
+        min_source_score::Float64 = 0.25,
+        add_sources::Bool = true,
+        add_scores::Bool = true, kwargs...)
+
+Annotates the `answer` with the overlap/what's supported in `context` and returns the annotated tree of nodes representing the `answer`
+
+Returns a "root" node with children nodes representing the sentences/code blocks in the `answer`. Only the "leaf" nodes are to be printed (to avoid duplication), "leaf" nodes are those with NO children.
+
+Default logic: 
+- Split into sentences/code blocks, then into tokens (~words).
+- Then match each token (~word) exactly.
+- If no exact match found, count trigram-based match (include the surrounding tokens for better contextual awareness).
+- If the match is higher than `min_score`, it's recorded in the `score` of the node.
+
+# Arguments
+- `annotater::TrigramAnnotater`: Annotater to use
+- `answer::AbstractString`: Text to annotate
+- `context::AbstractVector`: Context to annotate against, ie, look for "support" in the texts in `context`
+- `min_score::Float64`: Minimum score to consider a match. Default: 0.5, which means that half of the trigrams of each word should match
+- `skip_trigrams::Bool`: Whether to potentially skip trigram matching if exact full match is found. Default: true
+- `hashed::Bool`: Whether to use hashed trigrams. It's harder to debug, but it's much faster for larger texts (hashed text are held in a Set to deduplicate). Default: true
+- `sources::Union{Nothing, AbstractVector{<:AbstractString}}`: Sources to add at the end of the context. Default: nothing
+- `min_source_score::Float64`: Minimum score to consider/to display a source. Default: 0.25, which means that at least a quarter of the trigrams of each word should match to some context.
+  The threshold is lower than `min_score`, because it's average across ALL words in a block, so it's much harder to match fully with generated text.
+- `add_sources::Bool`: Whether to add sources at the end of each code block/sentence. Sources are addded in the square brackets like "[1]". Default: true
+- `add_scores::Bool`: Whether to add source-matching scores at the end of each code block/sentence. Scores are added in the square brackets like "[0.75]". Default: true
+- kwargs: Additional keyword arguments to pass to `trigram_support!` and `set_node_style!`. See their documentation for more details (eg, customize the colors of the nodes based on the score)
+
+# Example
+```julia
+annotater = TrigramAnnotater()
+context = [
+    "This is a test context.", "Another context sentence.", "Final piece of context."]
+answer = "This is a test context. Another context sentence."
+
+annotated_root = annotate_support(annotater, answer, context)
+pprint(annotated_root) # pretty print the annotated tree
+```
+"""
+function annotate_support(annotater::TrigramAnnotater, answer::AbstractString,
+        context::AbstractVector; min_score::Float64 = 0.5,
+        skip_trigrams::Bool = true, hashed::Bool = true,
+        sources::Union{Nothing, AbstractVector{<:AbstractString}} = nothing,
+        min_source_score::Float64 = 0.25,
+        add_sources::Bool = true,
+        add_scores::Bool = true, kwargs...)
+    @assert !isempty(context) "Context cannot be empty"
+    ## use hashed trigrams by default (more efficient for larger sequences)
+    if hashed
+        trigram_func = trigrams_hashed
+        word_transform = hash
+        text_to_trigram_func = text_to_trigrams_hashed
+    else
+        trigram_func = trigrams
+        word_transform = identity
+        text_to_trigram_func = text_to_trigrams
+    end
+    sentences, group_ids = split_into_code_and_sentences(answer)
+    context_trigrams = text_to_trigram_func.(context)
+    root = AnnotatedNode()
+    for i in eachindex(sentences, group_ids)
+        node = AnnotatedNode(;
+            content = sentences[i], group_id = group_ids[i], parent = root)
+        push!(root.children, node)
+        trigram_support!(
+            node, context_trigrams, trigram_func, word_transform; skip_trigrams,
+            min_score, min_source_score, kwargs...)
+    end
+    ## add_sources/scores if requested
+    if add_sources || add_scores
+        add_node_metadata!(annotater, root; add_sources, add_scores, sources)
+    end
+    ## Roll up children scores, weighted by length
+    score_sum = 0
+    score_lengths = 0
+    for child in AbstractTrees.children(root)
+        if !isnothing(child.score)
+            len_ = length(child.content)
+            score_sum += child.score * len_
+            score_lengths += len_
+        end
+    end
+    root.score = score_lengths > 0 ? score_sum / score_lengths : 0.0
+
+    return root
+end
+
+# Dispatch for RAGResult
+"""
+    annotate_support(
+        annotater::TrigramAnnotater, result::AbstractRAGResult; min_score::Float64 = 0.5,
+        skip_trigrams::Bool = true, hashed::Bool = true,
+        min_source_score::Float64 = 0.25,
+        add_sources::Bool = true,
+        add_scores::Bool = true, kwargs...)
+
+Dispatch for `annotate_support` for `AbstractRAGResult` type. It extracts the `final_answer` and `context` from the `result` and calls `annotate_support` with them.
+
+See `annotate_support` for more details.
+
+# Example
+```julia
+res = RAGResult(; question = "", final_answer = "This is a test.",
+    context = ["Test context.", "Completely different"])
+annotated_root = annotate_support(annotater, res)
+PT.pprint(annotated_root)
+```
+"""
+function annotate_support(
+        annotater::TrigramAnnotater, result::AbstractRAGResult; min_score::Float64 = 0.5,
+        skip_trigrams::Bool = true, hashed::Bool = true,
+        min_source_score::Float64 = 0.25,
+        add_sources::Bool = true,
+        add_scores::Bool = true, kwargs...)
+    final_answer = isnothing(result.final_answer) ? result.answer : result.final_answer
+    return annotate_support(
+        annotater, final_answer, result.context; min_score, skip_trigrams,
+        hashed, result.sources, min_source_score, add_sources, add_scores, kwargs...)
+end
+
+"""
+    print_html([io::IO,] parent_node::AbstractAnnotatedNode)
+
+    print_html([io::IO,] rag::AbstractRAGResult; add_sources::Bool = false,
+        add_scores::Bool = false, default_styler = HTMLStyler(),
+        low_styler = HTMLStyler(styles = "color:magenta", classes = ""),
+        medium_styler = HTMLStyler(styles = "color:blue", classes = ""),
+        high_styler = HTMLStyler(styles = "", classes = ""), styler_kwargs...)
+
+Pretty-prints the annotation `parent_node` (or `RAGResult`) to the `io` stream (or returns the string) in HTML format (assumes node is styled with styler `HTMLStyler`).
+
+It wraps each "token" into a span with requested styling (HTMLStyler's properties `classes` and `styles`).
+It also replaces new lines with `<br>` for better HTML formatting.
+
+For any non-HTML styler, it prints the content as plain text.
+
+# Returns 
+- `nothing` if `io` is provided
+- or the string with HTML-formatted text (if `io` is not provided, we print the result out)
+
+See also `HTMLStyler`, `annotate_support`, and `set_node_style!` for how the styling is applied and what the arguments mean.
+
+# Examples
+Note: `RT` is an alias for `PromptingTools.Experimental.RAGTools`
+
+Simple start directly with the `RAGResult`:
+```julia
+# set up the text/RAGResult
+context = [
+    "This is a test context.", "Another context sentence.", "Final piece of context."]
+answer = "This is a test answer. It has multiple sentences."
+rag = RT.RAGResult(; context, final_answer=answer, question="")
+
+# print the HTML
+print_html(rag)
+```
+
+Low-level control by creating our `AnnotatedNode`:
+```julia
+# prepare your HTML styling
+styler_kwargs = (;
+    default_styler=RT.HTMLStyler(),
+    low_styler=RT.HTMLStyler(styles="color:magenta", classes=""),
+    medium_styler=RT.HTMLStyler(styles="color:blue", classes=""),
+    high_styler=RT.HTMLStyler(styles="", classes=""))
+
+# annotate the text
+context = [
+    "This is a test context.", "Another context sentence.", "Final piece of context."]
+answer = "This is a test answer. It has multiple sentences."
+
+parent_node = RT.annotate_support(
+    RT.TrigramAnnotater(), answer, context; add_sources=false, add_scores=false, styler_kwargs...)
+
+# print the HTML
+print_html(parent_node)
+
+# or to accumulate more nodes
+io = IOBuffer()
+print_html(io, parent_node)
+```
+"""
+function print_html(io::IO, parent_node::AbstractAnnotatedNode)
+    print(io, "<div>")
+    for node in PreOrderDFS(parent_node)
+        ## print out text only for leaf nodes (ie, with no children)
+        if isempty(node.children)
+            # create HTML style new lines
+            content = replace(node.content, "\n" => "<br>")
+            if node.style isa HTMLStyler
+                # HTML styler -> wrap each token into a span with requested styling
+                style_str = isempty(node.style.styles) ? "" :
+                            " style=\"$(node.style.styles)\""
+                class_str = isempty(node.style.classes) ? "" :
+                            " class=\"$(node.style.classes)\""
+                if isempty(class_str) && isempty(style_str)
+                    print(io, content)
+                else
+                    print(io,
+                        "<span", style_str, class_str, ">$(content)</span>")
+                end
+            else
+                # print plain text
+                print(io, content)
+            end
+        end
+    end
+    print(io, "</div>")
+    return nothing
+end
+
+# utility for RAGResult
+function print_html(io::IO, rag::AbstractRAGResult; add_sources::Bool = false,
+        add_scores::Bool = false, default_styler = HTMLStyler(),
+        low_styler = HTMLStyler(styles = "color:magenta", classes = ""),
+        medium_styler = HTMLStyler(styles = "color:blue", classes = ""),
+        high_styler = HTMLStyler(styles = "", classes = ""), styler_kwargs...)
+
+    # Create the annotation
+    parent_node = annotate_support(
+        TrigramAnnotater(), rag; add_sources, add_scores, default_styler,
+        low_styler, medium_styler, high_styler, styler_kwargs...)
+
+    # Print the HTML
+    print_html(io, parent_node)
+end
+
+# Non-io dispatch
+function print_html(
+        rag_or_parent_node::Union{AbstractAnnotatedNode, AbstractRAGResult}; kwargs...)
+    io = IOBuffer()
+    print_html(io, rag_or_parent_node; kwargs...)
+    String(take!(io))
+end
diff --git a/src/Experimental/RAGTools/evaluation.jl b/src/Experimental/RAGTools/evaluation.jl
index 4c02d372e..47531f5ed 100644
--- a/src/Experimental/RAGTools/evaluation.jl
+++ b/src/Experimental/RAGTools/evaluation.jl
@@ -17,9 +17,9 @@ end
     context::AbstractString
     question::AbstractString
     answer::AbstractString
-    retrieval_score::Union{Number, Nothing} = nothing
+    retrieval_score::Union{Float64, Nothing} = nothing
     retrieval_rank::Union{Int, Nothing} = nothing
-    answer_score::Union{Number, Nothing} = nothing
+    answer_score::Union{Float64, Nothing} = nothing
     parameters::Dict{Symbol, Any} = Dict{Symbol, Any}()
 end
 
@@ -143,15 +143,15 @@ function score_retrieval_rank(orig_context::AbstractString,
 end
 
 """
-    run_qa_evals(qa_item::QAEvalItem, ctx::RAGContext; verbose::Bool = true,
+    run_qa_evals(qa_item::QAEvalItem, ctx::RAGResult; verbose::Bool = true,
                  parameters_dict::Dict{Symbol, <:Any}, judge_template::Symbol = :RAGJudgeAnswerFromContext,
                  model_judge::AbstractString, api_kwargs::NamedTuple = NamedTuple()) -> QAEvalResult
 
-Evaluates a single `QAEvalItem` using a RAG context (`RAGContext`) and returns a `QAEvalResult` structure. This function assesses the relevance and accuracy of the answers generated in a QA evaluation context.
+Evaluates a single `QAEvalItem` using RAG details (`RAGResult`) and returns a `QAEvalResult` structure. This function assesses the relevance and accuracy of the answers generated in a QA evaluation context.
 
 # Arguments
 - `qa_item::QAEvalItem`: The QA evaluation item containing the question and its answer.
-- `ctx::RAGContext`: The context used for generating the QA pair, including the original context and the answers.
+- `ctx::RAGResult`: The RAG result used for generating the QA pair, including the original context and the answers.
   Comes from `airag(...; return_context=true)`
 - `verbose::Bool`: If `true`, enables verbose logging. Defaults to `true`.
 - `parameters_dict::Dict{Symbol, Any}`: Track any parameters used for later evaluations. Keys must be Symbols.
@@ -173,13 +173,13 @@ Evaluates a single `QAEvalItem` using a RAG context (`RAGContext`) and returns a
 Evaluating a QA pair using a specific context and model:
 ```julia
 qa_item = QAEvalItem(question="What is the capital of France?", answer="Paris", context="France is a country in Europe.")
-ctx = RAGContext(source="Wikipedia", context="France is a country in Europe.", answer="Paris")
+ctx = RAGResult(source="Wikipedia", context="France is a country in Europe.", answer="Paris")
 parameters_dict = Dict("param1" => "value1", "param2" => "value2")
 
 eval_result = run_qa_evals(qa_item, ctx, parameters_dict=parameters_dict, model_judge="MyAIJudgeModel")
 ```
 """
-function run_qa_evals(qa_item::QAEvalItem, ctx::RAGContext;
+function run_qa_evals(qa_item::QAEvalItem, ctx::RAGResult;
         verbose::Bool = true, parameters_dict::Dict{Symbol, <:Any} = Dict{Symbol, Any}(),
         judge_template::Symbol = :RAGJudgeAnswerFromContextShort,
         model_judge::AbstractString = PT.MODEL_CHAT,
@@ -187,13 +187,13 @@ function run_qa_evals(qa_item::QAEvalItem, ctx::RAGContext;
     retrieval_score = score_retrieval_hit(qa_item.context, ctx.context)
     retrieval_rank = score_retrieval_rank(qa_item.context, ctx.context)
 
-    # Note we could evaluate if RAGContext and QAEvalItem are at least using the same sources etc. 
+    # Note we could evaluate if RAGResult and QAEvalItem are at least using the same sources etc. 
 
     answer_score = try
         msg = aiextract(judge_template; model = model_judge, verbose,
             ctx.context,
             ctx.question,
-            ctx.answer,
+            answer = ctx.final_answer,
             return_type = JudgeAllScores, api_kwargs)
         final_rating = if msg.content isa AbstractDict && haskey(msg.content, :final_rating)
             # if return type parsing failed
@@ -211,7 +211,7 @@ function run_qa_evals(qa_item::QAEvalItem, ctx::RAGContext;
         qa_item.source,
         qa_item.context,
         qa_item.question,
-        ctx.answer,
+        answer = ctx.final_answer,
         retrieval_score,
         retrieval_rank,
         answer_score,
@@ -266,13 +266,13 @@ function run_qa_evals(index::AbstractChunkIndex, qa_items::AbstractVector{<:QAEv
     # Run evaluations in parallel
     results = asyncmap(qa_items) do qa_item
         # Generate an answer -- often you want the model_judge to be the highest quality possible, eg, "GPT-4 Turbo" (alias "gpt4t)
-        msg, ctx = airag(index; qa_item.question, return_context = true,
+        ragresult = airag(index; qa_item.question, return_all = true,
             verbose, api_kwargs, airag_kwargs...)
 
         # Evaluate the response
         # Note: you can log key parameters for easier analysis later
         run_qa_evals(qa_item,
-            ctx;
+            ragresult;
             parameters_dict,
             verbose,
             api_kwargs,
diff --git a/src/Experimental/RAGTools/generation.jl b/src/Experimental/RAGTools/generation.jl
index 8de2f0225..89c638190 100644
--- a/src/Experimental/RAGTools/generation.jl
+++ b/src/Experimental/RAGTools/generation.jl
@@ -1,14 +1,28 @@
-# stub to be replaced within the package extension
-function _normalize end
 
 """
-    build_context(index::AbstractChunkIndex, reranked_candidates::CandidateChunks; chunks_window_margin::Tuple{Int, Int}) -> Vector{String}
+    ContextEnumerator <: AbstractContextBuilder
 
-Build context strings for each position in `reranked_candidates` considering a window margin around each position.
+Default method for `build_context!` method. It simply enumerates the context snippets around each position in `candidates`. When possibly, it will add surrounding chunks (from the same source).
+"""
+struct ContextEnumerator <: AbstractContextBuilder end
+
+"""
+    build_context(contexter::ContextEnumerator,
+        index::AbstractDocumentIndex, candidates::AbstractCandidateChunks;
+        verbose::Bool = true,
+        chunks_window_margin::Tuple{Int, Int} = (1, 1), kwargs...)
+
+        build_context!(contexter::ContextEnumerator,
+        index::AbstractDocumentIndex, result::AbstractRAGResult; kwargs...)
+
+Build context strings for each position in `candidates` considering a window margin around each position.
+If mutating version is used (`build_context!`), it will use `result.reranked_candidates` to update the `result.context` field.
 
 # Arguments
-- `reranked_candidates::CandidateChunks`: Candidate chunks which contain positions to extract context from.
-- `index::ChunkIndex`: The index containing chunks and sources.
+- `contexter::ContextEnumerator`: The method to use for building the context. Enumerates the snippets.
+- `index::AbstractDocumentIndex`: The index containing chunks and sources.
+- `candidates::AbstractCandidateChunks`: Candidate chunks which contain positions to extract context from.
+- `verbose::Bool`: If `true`, enables verbose logging.
 - `chunks_window_margin::Tuple{Int, Int}`: A tuple indicating the margin (before, after) around each position to include in the context. 
   Defaults to `(1,1)`, which means 1 preceding and 1 suceeding chunk will be included. With `(0,0)`, only the matching chunks will be included.
 
@@ -19,75 +33,544 @@ Build context strings for each position in `reranked_candidates` considering a w
 ```julia
 index = ChunkIndex(...)  # Assuming a proper index is defined
 candidates = CandidateChunks(index.id, [2, 4], [0.1, 0.2])
-context = build_context(index, candidates; chunks_window_margin=(0, 1)) # include only one following chunk for each matching chunk
+context = build_context(ContextEnumerator(), index, candidates; chunks_window_margin=(0, 1)) # include only one following chunk for each matching chunk
 ```
 """
-function build_context(index::AbstractChunkIndex, reranked_candidates::CandidateChunks;
-        chunks_window_margin::Tuple{Int, Int} = (1, 1))
+function build_context(contexter::ContextEnumerator,
+        index::AbstractDocumentIndex, candidates::AbstractCandidateChunks;
+        verbose::Bool = true,
+        chunks_window_margin::Tuple{Int, Int} = (1, 1), kwargs...)
+    ## Checks
     @assert chunks_window_margin[1] >= 0&&chunks_window_margin[2] >= 0 "Both `chunks_window_margin` values must be non-negative"
+
     context = String[]
-    for (i, position) in enumerate(reranked_candidates.positions)
-        chunks_ = chunks(index)[max(1, position - chunks_window_margin[1]):min(end,
+    for (i, position) in enumerate(positions(candidates))
+        ## select the right index
+        id = candidates isa MultiCandidateChunks ? candidates.index_ids[i] :
+             candidates.index_id
+        index_ = index isa AbstractChunkIndex ? index : index[id]
+        isnothing(index_) && continue
+        ## Refer to parent in case index is a SubChunkIndex (bc positions refer to the underlying parent chunks)
+        chunks_ = chunks(parent(index_))[
+            max(1, position - chunks_window_margin[1]):min(end,
             position + chunks_window_margin[2])]
-        is_same_source = sources(index)[max(1, position - chunks_window_margin[1]):min(end,
-            position + chunks_window_margin[2])] .== sources(index)[position]
+        ## Check if surrounding chunks are from the same source
+        is_same_source = sources(parent(index_))[
+            max(1, position - chunks_window_margin[1]):min(end,
+            position + chunks_window_margin[2])] .== sources(parent(index_))[position]
         push!(context, "$(i). $(join(chunks_[is_same_source], "\n"))")
     end
     return context
 end
 
+function build_context!(contexter::AbstractContextBuilder,
+        index::AbstractDocumentIndex, result::AbstractRAGResult; kwargs...)
+    throw(ArgumentError("Contexter $(typeof(contexter)) not implemented"))
+end
+
+# Mutating version that dispatches on the result to the underlying implementation
+function build_context!(contexter::ContextEnumerator,
+        index::AbstractDocumentIndex, result::AbstractRAGResult; kwargs...)
+    result.context = build_context(contexter, index, result.reranked_candidates; kwargs...)
+    return result
+end
+
+## First step: Answerer
+
 """
-    airag(index::AbstractChunkIndex, rag_template::Symbol = :RAGAnswerFromContext;
-        question::AbstractString,
-        top_k::Int = 100, top_n::Int = 5, minimum_similarity::AbstractFloat = -1.0,
-        tag_filter::Union{Symbol, Vector{String}, Regex, Nothing} = :auto,
-        rerank_strategy::RerankingStrategy = Passthrough(),
-        model_embedding::String = PT.MODEL_EMBEDDING, model_chat::String = PT.MODEL_CHAT,
-        model_metadata::String = PT.MODEL_CHAT,
-        metadata_template::Symbol = :RAGExtractMetadataShort,
-        chunks_window_margin::Tuple{Int, Int} = (1, 1),
-        return_context::Bool = false, verbose::Bool = true,
-        rerank_kwargs::NamedTuple = NamedTuple(),
+    SimpleAnswerer <: AbstractAnswerer
+
+Default method for `answer!` method. Generates an answer using the `aigenerate` function with the provided context and question.
+"""
+struct SimpleAnswerer <: AbstractAnswerer end
+
+function answer!(
+        answerer::AbstractAnswerer, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        kwargs...)
+    throw(ArgumentError("Answerer $(typeof(answerer)) not implemented"))
+end
+
+"""
+    answer!(
+        answerer::SimpleAnswerer, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        model::AbstractString = PT.MODEL_CHAT, verbose::Bool = true,
+        template::Symbol = :RAGAnswerFromContext,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+
+Generates an answer using the `aigenerate` function with the provided `result.context` and `result.question`.
+
+# Returns
+- Mutated `result` with `result.answer` and the full conversation saved in `result.conversations[:answer]`
+
+# Arguments
+- `answerer::SimpleAnswerer`: The method to use for generating the answer. Uses `aigenerate`.
+- `index::AbstractDocumentIndex`: The index containing chunks and sources.
+- `result::AbstractRAGResult`: The result containing the context and question to generate the answer for.
+- `model::AbstractString`: The model to use for generating the answer. Defaults to `PT.MODEL_CHAT`.
+- `verbose::Bool`: If `true`, enables verbose logging.
+- `template::Symbol`: The template to use for the `aigenerate` function. Defaults to `:RAGAnswerFromContext`.
+- `cost_tracker`: An atomic counter to track the cost of the operation.
+
+"""
+function answer!(
+        answerer::SimpleAnswerer, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        model::AbstractString = PT.MODEL_CHAT, verbose::Bool = true,
+        template::Symbol = :RAGAnswerFromContext,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+    ## Checks
+    placeholders = only(aitemplates(template)).variables # only one template should be found
+    @assert (:question in placeholders)&&(:context in placeholders) "Provided RAG Template $(template) is not suitable. It must have placeholders: `question` and `context`."
+    ##
+    (; context, question) = result
+    conv = aigenerate(template; question,
+        context = join(context, "\n\n"), model, verbose = false,
+        return_all = true,
+        kwargs...)
+    msg = conv[end]
+    result.answer = strip(msg.content)
+    result.conversations[:answer] = conv
+    ## Increment the cost tracker
+    Threads.atomic_add!(cost_tracker, msg.cost)
+    verbose &&
+        @info "Done generating the answer. Cost: \$$(round(msg.cost,digits=3))"
+
+    return result
+end
+
+## Refine
+"""
+    NoRefiner <: AbstractRefiner
+
+Default method for `refine!` method. A passthrough option that returns the `result.answer` without any changes.
+"""
+struct NoRefiner <: AbstractRefiner end
+
+"""
+    SimpleRefiner <: AbstractRefiner
+
+Refines the answer using the same context previously provided via the provided prompt template. A method for `refine!`.
+"""
+struct SimpleRefiner <: AbstractRefiner end
+
+"""
+    TavilySearchRefiner <: AbstractRefiner
+
+Refines the answer by executing a web search using the Tavily API. This method aims to enhance the answer's accuracy and relevance by incorporating information retrieved from the web. A method for `refine!`.
+"""
+struct TavilySearchRefiner <: AbstractRefiner end
+
+function refine!(
+        refiner::AbstractRefiner, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        kwargs...)
+    throw(ArgumentError("Refiner $(typeof(refiner)) not implemented"))
+end
+
+"""
+    refine!(
+        refiner::NoRefiner, index::AbstractChunkIndex, result::AbstractRAGResult;
+        kwargs...)
+    
+Simple no-op function for `refine!`. It simply copies the `result.answer` and `result.conversations[:answer]` without any changes.
+"""
+function refine!(
+        refiner::NoRefiner, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        kwargs...)
+    result.final_answer = result.answer
+    if haskey(result.conversations, :answer)
+        result.conversations[:final_answer] = result.conversations[:answer]
+    end
+    return result
+end
+
+"""
+    refine!(
+        refiner::SimpleRefiner, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        verbose::Bool = true,
+        model::AbstractString = PT.MODEL_CHAT,
+        template::Symbol = :RAGAnswerRefiner,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+    
+Give model a chance to refine the answer (using the same or different context than previously provided).
+
+This method uses the same context as the original answer, however, it can be modified to do additional retrieval and use a different context.
+
+# Returns
+- Mutated `result` with `result.final_answer` and the full conversation saved in `result.conversations[:final_answer]`
+
+# Arguments
+- `refiner::SimpleRefiner`: The method to use for refining the answer. Uses `aigenerate`.
+- `index::AbstractDocumentIndex`: The index containing chunks and sources.
+- `result::AbstractRAGResult`: The result containing the context and question to generate the answer for.
+- `model::AbstractString`: The model to use for generating the answer. Defaults to `PT.MODEL_CHAT`.
+- `verbose::Bool`: If `true`, enables verbose logging.
+- `template::Symbol`: The template to use for the `aigenerate` function. Defaults to `:RAGAnswerRefiner`.
+- `cost_tracker`: An atomic counter to track the cost of the operation.
+"""
+function refine!(
+        refiner::SimpleRefiner, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        verbose::Bool = true,
+        model::AbstractString = PT.MODEL_CHAT,
+        template::Symbol = :RAGAnswerRefiner,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+    ## Checks
+    placeholders = only(aitemplates(template)).variables # only one template should be found
+    @assert (:query in placeholders)&&(:answer in placeholders) &&
+            (:context in placeholders) "Provided RAG Template $(template) is not suitable. It must have placeholders: `query`, `answer` and `context`."
+    ##
+    (; answer, question, context) = result
+    conv = aigenerate(template; query = question,
+        context = join(context, "\n\n"), answer, model, verbose = false,
+        return_all = true,
+        kwargs...)
+    msg = conv[end]
+    result.final_answer = strip(msg.content)
+    result.conversations[:final_answer] = conv
+
+    ## Increment the cost
+    Threads.atomic_add!(cost_tracker, msg.cost)
+    verbose &&
+        @info "Done refining the answer. Cost: \$$(round(msg.cost,digits=3))"
+
+    return result
+end
+
+"""
+    refine!(
+        refiner::TavilySearchRefiner, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        verbose::Bool = true,
+        model::AbstractString = PT.MODEL_CHAT,
+        include_answer::Bool = true,
+        max_results::Integer = 5,
+        include_domains::AbstractVector{<:AbstractString} = String[],
+        exclude_domains::AbstractVector{<:AbstractString} = String[],
+        template::Symbol = :RAGWebSearchRefiner,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+
+Refines the answer by executing a web search using the Tavily API. This method aims to enhance the answer's accuracy and relevance by incorporating information retrieved from the web.
+
+Note: The web results and web answer (if requested) will be added to the context and sources!
+
+# Returns
+- Mutated `result` with `result.final_answer` and the full conversation saved in `result.conversations[:final_answer]`.
+- In addition, the web results and web answer (if requested) are appended to the `result.context` and `result.sources` for correct highlighting and verification.
+
+# Arguments
+- `refiner::TavilySearchRefiner`: The method to use for refining the answer. Uses `aigenerate` with a web search template.
+- `index::AbstractDocumentIndex`: The index containing chunks and sources.
+- `result::AbstractRAGResult`: The result containing the context and question to generate the answer for.
+- `model::AbstractString`: The model to use for generating the answer. Defaults to `PT.MODEL_CHAT`.
+- `include_answer::Bool`: If `true`, includes the answer from Tavily in the web search.
+- `max_results::Integer`: The maximum number of results to return.
+- `include_domains::AbstractVector{<:AbstractString}`: A list of domains to include in the search results. Default is an empty list.
+- `exclude_domains::AbstractVector{<:AbstractString}`: A list of domains to exclude from the search results. Default is an empty list.
+- `verbose::Bool`: If `true`, enables verbose logging.
+- `template::Symbol`: The template to use for the `aigenerate` function. Defaults to `:RAGWebSearchRefiner`.
+- `cost_tracker`: An atomic counter to track the cost of the operation.
+
+# Example
+```julia
+refiner!(TavilySearchRefiner(), index, result)
+# See result.final_answer or pprint(result)
+```
+
+To enable this refiner in a full RAG pipeline, simply swap the component in the config:
+```julia
+cfg = RT.RAGConfig()
+cfg.generator.refiner = RT.TavilySearchRefiner()
+
+result = airag(cfg, index; question, return_all = true)
+pprint(result)
+```
+"""
+function refine!(
+        refiner::TavilySearchRefiner, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        verbose::Bool = true,
+        model::AbstractString = PT.MODEL_CHAT,
+        include_answer::Bool = true,
+        max_results::Integer = 5,
+        include_domains::AbstractVector{<:AbstractString} = String[],
+        exclude_domains::AbstractVector{<:AbstractString} = String[],
+        template::Symbol = :RAGWebSearchRefiner,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+
+    ## Checks
+    placeholders = only(aitemplates(template)).variables # only one template should be found
+    @assert (:query in placeholders)&&(:answer in placeholders) &&
+            (:search_results in placeholders) "Provided RAG Template $(template) is not suitable. It must have placeholders: `query`, `answer` and `search_results`."
+    ##
+    (; answer, question) = result
+    ## execute Tavily web search and format it
+    r = create_websearch(
+        question; include_answer, max_results, include_domains,
+        exclude_domains)
+    web_summary = get(r.response, "answer", "")
+    web_raw = get(r.response, "results", [])
+    web_sources = ["TOOL(TavilySearch): " * get(res, "url", "") for res in web_raw]
+    web_content = join(
+        ["$(i). TavilySearch: " * get(res, "content", "")
+         for (i, res) in enumerate(web_raw)],
+        "\n\n")
+    search_results = """
+    Web Results Summary: $(web_summary)
+
+    **Raw Results:** 
+    $(web_content)
+
+    """
+    ##
+    conv = aigenerate(template; query = question, search_results,
+        answer, model, verbose = false,
+        return_all = true,
+        kwargs...)
+    msg = conv[end]
+    result.final_answer = strip(msg.content)
+    result.conversations[:final_answer] = conv
+
+    ## Attache the web sources to the context + sources (for reference)
+    result.sources = vcat(result.sources, web_sources)
+    result.context = vcat(result.context, web_content)
+
+    ## Increment the cost
+    Threads.atomic_add!(cost_tracker, msg.cost)
+    verbose &&
+        @info "Done refining the answer. Cost: \$$(round(msg.cost,digits=3))"
+
+    return result
+end
+
+"""
+    NoPostprocessor <: AbstractPostprocessor
+
+Default method for `postprocess!` method. A passthrough option that returns the `result` without any changes.
+
+Overload this method to add custom postprocessing steps, eg, logging, saving conversations to disk, etc.
+"""
+struct NoPostprocessor <: AbstractPostprocessor end
+
+function postprocess!(postprocessor::AbstractPostprocessor, index::AbstractDocumentIndex,
+        result::AbstractRAGResult; kwargs...)
+    throw(ArgumentError("Postprocessor $(typeof(postprocessor)) not implemented"))
+end
+
+function postprocess!(
+        ::NoPostprocessor, index::AbstractDocumentIndex, result::AbstractRAGResult; kwargs...)
+    return result
+end
+
+### Overall types for `generate`
+"""
+    SimpleGenerator <: AbstractGenerator
+
+Default implementation for `generate`. It simply enumerates context snippets and runs `aigenerate` (no refinement).
+
+It uses `ContextEnumerator`, `SimpleAnswerer`, `NoRefiner`, and `NoPostprocessor` as default `contexter`, `answerer`, `refiner`, and `postprocessor`.
+"""
+@kwdef mutable struct SimpleGenerator <: AbstractGenerator
+    contexter::AbstractContextBuilder = ContextEnumerator()
+    answerer::AbstractAnswerer = SimpleAnswerer()
+    refiner::AbstractRefiner = NoRefiner()
+    postprocessor::AbstractPostprocessor = NoPostprocessor()
+end
+
+"""
+    AdvancedGenerator <: AbstractGenerator
+
+Default implementation for `generate!`. It simply enumerates context snippets and runs `aigenerate` (no refinement).
+
+It uses `ContextEnumerator`, `SimpleAnswerer`, `SimpleRefiner`, and `NoPostprocessor` as default `contexter`, `answerer`, `refiner`, and `postprocessor`.
+"""
+@kwdef mutable struct AdvancedGenerator <: AbstractGenerator
+    contexter::AbstractContextBuilder = ContextEnumerator()
+    answerer::AbstractAnswerer = SimpleAnswerer()
+    refiner::AbstractRefiner = SimpleRefiner()
+    postprocessor::AbstractPostprocessor = NoPostprocessor()
+end
+
+"""
+    generate!(
+        generator::AbstractGenerator, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        verbose::Integer = 1,
         api_kwargs::NamedTuple = NamedTuple(),
-        aiembed_kwargs::NamedTuple = NamedTuple(),
-        aigenerate_kwargs::NamedTuple = NamedTuple(),
-        aiextract_kwargs::NamedTuple = NamedTuple(),
+        contexter::AbstractContextBuilder = generator.contexter,
+        contexter_kwargs::NamedTuple = NamedTuple(),
+        answerer::AbstractAnswerer = generator.answerer,
+        answerer_kwargs::NamedTuple = NamedTuple(),
+        refiner::AbstractRefiner = generator.refiner,
+        refiner_kwargs::NamedTuple = NamedTuple(),
+        postprocessor::AbstractPostprocessor = generator.postprocessor,
+        postprocessor_kwargs::NamedTuple = NamedTuple(),
+        cost_tracker = Threads.Atomic{Float64}(0.0),
         kwargs...)
 
-Generates a response for a given question using a Retrieval-Augmented Generation (RAG) approach. 
+Generate the response using the provided `generator` and the `index` and `result`.
+It is the second step in the RAG pipeline (after `retrieve`)
+
+Returns the mutated `result` with the `result.final_answer` and the full conversation saved in `result.conversations[:final_answer]`.
 
-The function selects relevant chunks from an `ChunkIndex`, optionally filters them based on metadata tags, reranks them, and then uses these chunks to construct a context for generating a response.
+# Notes
+- The default flow is `build_context!` -> `answer!` -> `refine!` -> `postprocess!`.
+- `contexter` is the method to use for building the context, eg, simply enumerate the context chunks with `ContextEnumerator`.
+- `answerer` is the standard answer generation step with LLMs.
+- `refiner` step allows the LLM to critique itself and refine its own answer.
+- `postprocessor` step allows for additional processing of the answer, eg, logging, saving conversations, etc.
+- All of its sub-routines operate by mutating the `result` object (and adding their part).
+- Discover available sub-types for each step with `subtypes(AbstractRefiner)` and similar for other abstract types.
 
 # Arguments
-- `index::AbstractChunkIndex`: The chunk index to search for relevant text.
-- `rag_template::Symbol`: Template for the RAG model, defaults to `:RAGAnswerFromContext`.
+- `generator::AbstractGenerator`: The `generator` to use for generating the answer. Can be `SimpleGenerator` or `AdvancedGenerator`.
+- `index::AbstractDocumentIndex`: The index containing chunks and sources.
+- `result::AbstractRAGResult`: The result containing the context and question to generate the answer for.
+- `verbose::Integer`: If >0, enables verbose logging.
+- `api_kwargs::NamedTuple`: API parameters that will be forwarded to ALL of the API calls (`aiembed`, `aigenerate`, and `aiextract`).
+- `contexter::AbstractContextBuilder`: The method to use for building the context. Defaults to `generator.contexter`, eg, `ContextEnumerator`.
+- `contexter_kwargs::NamedTuple`: API parameters that will be forwarded to the `contexter` call.
+- `answerer::AbstractAnswerer`: The method to use for generating the answer. Defaults to `generator.answerer`, eg, `SimpleAnswerer`.
+- `answerer_kwargs::NamedTuple`: API parameters that will be forwarded to the `answerer` call. Examples:
+    - `model`: The model to use for generating the answer. Defaults to `PT.MODEL_CHAT`.
+    - `template`: The template to use for the `aigenerate` function. Defaults to `:RAGAnswerFromContext`.
+- `refiner::AbstractRefiner`: The method to use for refining the answer. Defaults to `generator.refiner`, eg, `NoRefiner`.
+- `refiner_kwargs::NamedTuple`: API parameters that will be forwarded to the `refiner` call.
+    - `model`: The model to use for generating the answer. Defaults to `PT.MODEL_CHAT`.
+    - `template`: The template to use for the `aigenerate` function. Defaults to `:RAGAnswerRefiner`.
+- `postprocessor::AbstractPostprocessor`: The method to use for postprocessing the answer. Defaults to `generator.postprocessor`, eg, `NoPostprocessor`.
+- `postprocessor_kwargs::NamedTuple`: API parameters that will be forwarded to the `postprocessor` call.
+- `cost_tracker`: An atomic counter to track the total cost of the operations.
+
+See also: `retrieve`, `build_context!`, `ContextEnumerator`, `answer!`, `SimpleAnswerer`, `refine!`, `NoRefiner`, `SimpleRefiner`, `postprocess!`, `NoPostprocessor`
+
+# Examples
+```julia
+Assume we already have `index`
+
+question = "What are the best practices for parallel computing in Julia?"
+
+# Retrieve the relevant chunks - returns RAGResult
+result = retrieve(index, question)
+
+# Generate the answer using the default generator, mutates the same result
+result = generate!(index, result)
+
+```
+"""
+function generate!(
+        generator::AbstractGenerator, index::AbstractDocumentIndex, result::AbstractRAGResult;
+        verbose::Integer = 1,
+        api_kwargs::NamedTuple = NamedTuple(),
+        contexter::AbstractContextBuilder = generator.contexter,
+        contexter_kwargs::NamedTuple = NamedTuple(),
+        answerer::AbstractAnswerer = generator.answerer,
+        answerer_kwargs::NamedTuple = NamedTuple(),
+        refiner::AbstractRefiner = generator.refiner,
+        refiner_kwargs::NamedTuple = NamedTuple(),
+        postprocessor::AbstractPostprocessor = generator.postprocessor,
+        postprocessor_kwargs::NamedTuple = NamedTuple(),
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+
+    ## Build the context
+    contexter_kwargs_ = isempty(api_kwargs) ? contexter_kwargs :
+                        merge(contexter_kwargs, (; api_kwargs))
+    result = build_context!(contexter,
+        index, result; verbose = (verbose > 1), cost_tracker, contexter_kwargs_...)
+
+    ## LLM call to answer
+    answerer_kwargs_ = isempty(api_kwargs) ? answerer_kwargs :
+                       merge(answerer_kwargs, (; api_kwargs))
+    result = answer!(
+        answerer, index, result; verbose = (verbose > 1), cost_tracker, answerer_kwargs_...)
+
+    ## Refine the answer
+    refiner_kwargs_ = isempty(api_kwargs) ? refiner_kwargs :
+                      merge(refiner_kwargs, (; api_kwargs))
+    result = refine!(
+        refiner, index, result; verbose = (verbose > 1), cost_tracker, refiner_kwargs_...)
+
+    ## Postprocessing
+    postprocessor_kwargs_ = isempty(api_kwargs) ? postprocessor_kwargs :
+                            merge(postprocessor_kwargs, (; api_kwargs))
+    result = postprocess!(postprocessor, index, result; verbose = (verbose > 1),
+        cost_tracker, postprocessor_kwargs_...)
+
+    return result # mutated result
+end
+
+# Set default behavior
+DEFAULT_GENERATOR = SimpleGenerator()
+function generate!(index::AbstractDocumentIndex, result::AbstractRAGResult; kwargs...)
+    return generate!(DEFAULT_GENERATOR, index, result; kwargs...)
+end
+
+### Overarching
+
+"""
+    RAGConfig <: AbstractRAGConfig
+
+Default configuration for RAG. It uses `SimpleIndexer`, `SimpleRetriever`, and `SimpleGenerator` as default components. Provided as the first argument in `airag`.
+
+To customize the components, replace corresponding fields for each step of the RAG pipeline (eg, use `subtypes(AbstractIndexBuilder)` to find the available options).
+"""
+@kwdef mutable struct RAGConfig <: AbstractRAGConfig
+    indexer::AbstractIndexBuilder = SimpleIndexer()
+    retriever::AbstractRetriever = SimpleRetriever()
+    generator::AbstractGenerator = SimpleGenerator()
+end
+function Base.show(io::IO, cfg::AbstractRAGConfig)
+    dump(io, cfg; maxdepth = 2)
+end
+
+"""
+    airag(cfg::AbstractRAGConfig, index::AbstractDocumentIndex;
+        question::AbstractString,
+        verbose::Integer = 1, return_all::Bool = false,
+        api_kwargs::NamedTuple = NamedTuple(),
+        retriever::AbstractRetriever = cfg.retriever,
+        retriever_kwargs::NamedTuple = NamedTuple(),
+        generator::AbstractGenerator = cfg.generator,
+        generator_kwargs::NamedTuple = NamedTuple(),
+        cost_tracker = Threads.Atomic{Float64}(0.0))
+
+High-level wrapper for Retrieval-Augmented Generation (RAG), it combines together the `retrieve` and `generate!` steps which you can customize if needed.
+
+The simplest version first finds the relevant chunks in `index` for the `question` and then sends these chunks to the AI model to help with generating a response to the `question`.
+
+To customize the components, replace the types (`retriever`, `generator`) of the corresponding step of the RAG pipeline - or go into sub-routines within the steps.
+Eg, use `subtypes(AbstractRetriever)` to find the available options.
+
+# Arguments
+- `cfg::AbstractRAGConfig`: The configuration for the RAG pipeline. Defaults to `RAGConfig()`, where you can swap sub-types to customize the pipeline.
+- `index::AbstractDocumentIndex`: The chunk index to search for relevant text.
 - `question::AbstractString`: The question to be answered.
-- `top_k::Int`: Number of top candidates to retrieve based on embedding similarity.
-- `top_n::Int`: Number of candidates to return after reranking.
-- `minimum_similarity::AbstractFloat`: Minimum similarity threshold (between -1 and 1) for filtering chunks based on embedding similarity. Defaults to -1.0.
-- `tag_filter::Union{Symbol, Vector{String}, Regex}`: Mechanism for filtering chunks based on tags (either automatically detected, specific tags, or a regex pattern). Disabled by setting to `nothing`.
-- `rerank_strategy::RerankingStrategy`: Strategy for reranking the retrieved chunks. Defaults to `Passthrough()`. Use `CohereRerank` for better results (requires `COHERE_API_KEY` to be set)
-- `model_embedding::String`: Model used for embedding the question, default is `PT.MODEL_EMBEDDING`.
-- `model_chat::String`: Model used for generating the final response, default is `PT.MODEL_CHAT`.
-- `model_metadata::String`: Model used for extracting metadata, default is `PT.MODEL_CHAT`.
-- `metadata_template::Symbol`: Template for the metadata extraction process from the question, defaults to: `:RAGExtractMetadataShort`
-- `chunks_window_margin::Tuple{Int,Int}`: The window size around each chunk to consider for context building. See `?build_context` for more information.
-- `return_context::Bool`: If `true`, returns the context used for RAG along with the response.
-- `verbose::Bool`: If `true`, enables verbose logging.
+- `return_all::Bool`: If `true`, returns the details used for RAG along with the response.
+- `verbose::Integer`: If `>0`, enables verbose logging. The higher the number, the more nested functions will log.
 - `api_kwargs`: API parameters that will be forwarded to ALL of the API calls (`aiembed`, `aigenerate`, and `aiextract`).
-- `aiembed_kwargs`: API parameters that will be forwarded to the `aiembed` call. If you need to provide `api_kwargs` only to this function, simply add them as a keyword argument, eg, `aiembed_kwargs = (; api_kwargs = (; x=1))`.
-- `aigenerate_kwargs`: API parameters that will be forwarded to the `aigenerate` call. If you need to provide `api_kwargs` only to this function, simply add them as a keyword argument, eg, `aigenerate_kwargs = (; api_kwargs = (; temperature=0.3))`.
-- `aiextract_kwargs`: API parameters that will be forwarded to the `aiextract` call for the metadata extraction.
+- `retriever::AbstractRetriever`: The retriever to use for finding relevant chunks. Defaults to `cfg.retriever`, eg, `SimpleRetriever` (with no question rephrasing).
+- `retriever_kwargs::NamedTuple`: API parameters that will be forwarded to the `retriever` call. Examples of important ones:
+    - `top_k::Int`: Number of top candidates to retrieve based on embedding similarity.
+    - `top_n::Int`: Number of candidates to return after reranking.
+    - `tagger::AbstractTagger`: Tagger to use for tagging the chunks. Defaults to `NoTagger()`.
+    - `tagger_kwargs::NamedTuple`: API parameters that will be forwarded to the `tagger` call. You could provide the explicit tags directly with `PassthroughTagger` and `tagger_kwargs = (; tags = ["tag1", "tag2"])`.
+- `generator::AbstractGenerator`: The generator to use for generating the answer. Defaults to `cfg.generator`, eg, `SimpleGenerator`.
+- `generator_kwargs::NamedTuple`: API parameters that will be forwarded to the `generator` call. Examples of important ones:
+    - `answerer_kwargs::NamedTuple`: API parameters that will be forwarded to the `answerer` call. Examples:
+        - `model`: The model to use for generating the answer. Defaults to `PT.MODEL_CHAT`.
+        - `template`: The template to use for the `aigenerate` function. Defaults to `:RAGAnswerFromContext`.
+    - `refiner::AbstractRefiner`: The method to use for refining the answer. Defaults to `generator.refiner`, eg, `NoRefiner`.
+    - `refiner_kwargs::NamedTuple`: API parameters that will be forwarded to the `refiner` call.
+        - `model`: The model to use for generating the answer. Defaults to `PT.MODEL_CHAT`.
+        - `template`: The template to use for the `aigenerate` function. Defaults to `:RAGAnswerRefiner`.
+- `cost_tracker`: An atomic counter to track the total cost of the operations (if you want to track the cost of multiple pipeline runs - it passed around in the pipeline).
 
 # Returns
-- If `return_context` is `false`, returns the generated message (`msg`).
-- If `return_context` is `true`, returns a tuple of the generated message (`msg`) and the RAG context (`rag_context`).
+- If `return_all` is `false`, returns the generated message (`msg`).
+- If `return_all` is `true`, returns the detail of the full pipeline in `RAGResult` (see the docs).
 
-# Notes
-- The function first finds the closest chunks to the question embedding, then optionally filters these based on tags. After that, it reranks the candidates and builds a context for the RAG model.
-- The `tag_filter` can be used to refine the search. If set to `:auto`, it attempts to automatically determine relevant tags (if `index` has them available).
-- The `chunks_window_margin` allows including surrounding chunks for richer context, considering they are from the same source.
-- The function currently supports only single `ChunkIndex`. 
+See also `build_index`, `retrieve`, `generate!`, `RAGResult`, `getpropertynested`, `setpropertynested`, `merge_kwargs_nested`, `ChunkKeywordsIndex`.
 
 # Examples
 
@@ -95,105 +578,125 @@ Using `airag` to get a response for a question:
 ```julia
 index = build_index(...)  # create an index
 question = "How to make a barplot in Makie.jl?"
-msg = airag(index, :RAGAnswerFromContext; question)
-
-# or simply
 msg = airag(index; question)
 ```
 
-See also `build_index`, `build_context`, `CandidateChunks`, `find_closest`, `find_tags`, `rerank`
+To understand the details of the RAG process, use `return_all=true`
+```julia
+msg, details = airag(index; question, return_all = true)
+# details is a RAGDetails object with all the internal steps of the `airag` function
+```
+
+You can also pretty-print `details` to highlight generated text vs text that is supported by context.
+It also includes annotations of which context was used for each part of the response (where available).
+```julia
+PT.pprint(details)
+```
+
+Example with advanced retrieval (with question rephrasing and reranking (requires `COHERE_API_KEY`).
+We will obtain top 100 chunks from embeddings (`top_k`) and top 5 chunks from reranking (`top_n`).
+In addition, it will be done with a "custom" locally-hosted model.
+
+```julia
+cfg = RAGConfig(; retriever = AdvancedRetriever())
+
+# kwargs will be big and nested, let's prepare them upfront
+# we specify "custom" model for each component that calls LLM
+kwargs = (
+    retriever_kwargs = (;
+        top_k = 100,
+        top_n = 5,
+        rephraser_kwargs = (;
+            model = "custom"),
+        embedder_kwargs = (;
+            model = "custom"),
+        tagger_kwargs = (;
+            model = "custom")),
+    generator_kwargs = (;
+        answerer_kwargs = (;
+            model = "custom"),
+        refiner_kwargs = (;
+            model = "custom")),
+    api_kwargs = (;
+        url = "http://localhost:8080"))
+
+result = airag(cfg, index, question; kwargs...)
+```
+
+If you want to use hybrid retrieval (embeddings + BM25), you can easily create an additional index based on keywords
+ and pass them both into a `MultiIndex`. 
+ 
+You need to provide an explicit config, so the pipeline knows how to handle each index in the search similarity phase (`finder`).
+
+```julia
+index = # your existing index
+
+# create the multi-index with the keywords index
+index_keywords = ChunkKeywordsIndex(index)
+multi_index = MultiIndex([index, index_keywords])
+
+# define the similarity measures for the indices that you have (same order)
+finder = RT.MultiFinder([RT.CosineSimilarity(), RT.BM25Similarity()])
+cfg = RAGConfig(; retriever=AdvancedRetriever(; processor=RT.KeywordsProcessor(), finder))
+
+# Run the pipeline with the new hybrid retrieval (return the `RAGResult` to see the details)
+result = airag(cfg, multi_index; question, return_all=true)
+
+# Pretty-print the result
+PT.pprint(result)
+```
+
+For easier manipulation of nested kwargs, see utilities `getpropertynested`, `setpropertynested`, `merge_kwargs_nested`.
 """
-function airag(index::AbstractChunkIndex, rag_template::Symbol = :RAGAnswerFromContext;
+function airag(cfg::AbstractRAGConfig, index::AbstractDocumentIndex;
         question::AbstractString,
-        top_k::Int = 100, top_n::Int = 5, minimum_similarity::AbstractFloat = -1.0,
-        tag_filter::Union{Symbol, Vector{String}, Regex, Nothing} = :auto,
-        rerank_strategy::RerankingStrategy = Passthrough(),
-        model_embedding::String = PT.MODEL_EMBEDDING, model_chat::String = PT.MODEL_CHAT,
-        model_metadata::String = PT.MODEL_CHAT,
-        metadata_template::Symbol = :RAGExtractMetadataShort,
-        chunks_window_margin::Tuple{Int, Int} = (1, 1),
-        return_context::Bool = false, verbose::Bool = true,
-        rerank_kwargs::NamedTuple = NamedTuple(),
+        verbose::Integer = 1, return_all::Bool = false,
         api_kwargs::NamedTuple = NamedTuple(),
-        aiembed_kwargs::NamedTuple = NamedTuple(),
-        aigenerate_kwargs::NamedTuple = NamedTuple(),
-        aiextract_kwargs::NamedTuple = NamedTuple(),
-        kwargs...)
-    ## Note: Supports only single ChunkIndex for now
-    ## Checks
-    @assert !(tag_filter isa Symbol && tag_filter != :auto) "Only `:auto`, `Vector{String}`, or `Regex` are supported for `tag_filter`"
-    @assert chunks_window_margin[1] >= 0&&chunks_window_margin[2] >= 0 "Both `chunks_window_margin` values must be non-negative"
-    placeholders = only(aitemplates(rag_template)).variables # only one template should be found
-    @assert (:question in placeholders)&&(:context in placeholders) "Provided RAG Template $(rag_template) is not suitable. It must have placeholders: `question` and `context`."
-
-    ## Embedding
-    joined_kwargs = isempty(api_kwargs) ? aiembed_kwargs :
-                    merge(aiembed_kwargs, (; api_kwargs))
-    question_emb = aiembed(question,
-        _normalize;
-        model = model_embedding,
-        verbose, joined_kwargs...).content .|> Float32 # no need for Float64
-    emb_candidates = find_closest(index, question_emb; top_k, minimum_similarity)
-
-    tag_candidates = if tag_filter == :auto && !isnothing(tags(index)) &&
-                        !isempty(model_metadata)
-        _check_aiextract_capability(model_metadata)
-        joined_kwargs = isempty(api_kwargs) ? aiextract_kwargs :
-                        merge(aiextract_kwargs, (; api_kwargs))
-        # extract metadata via LLM call
-        metadata_ = try
-            msg = aiextract(metadata_template; return_type = MaybeMetadataItems,
-                text = question,
-                instructions = "In addition to extracted items, suggest 2-3 filter keywords that could be relevant to answer this question.",
-                verbose, model = model_metadata, joined_kwargs...)
-            ## eg, ["software:::pandas", "language:::python", "julia_package:::dataframes"]
-            ## we split it and take only the keyword, not the category
-            metadata_extract(msg.content.items) |>
-            x -> split.(x, ":::") |> x -> getindex.(x, 2)
-        catch e
-            String[]
-        end
-        find_tags(index, metadata_)
-    elseif tag_filter isa Union{Vector{String}, Regex}
-        find_tags(index, tag_filter)
-    elseif isnothing(tag_filter)
-        nothing
-    else
-        ## not filtering -- use all rows and ignore this
-        nothing
-    end
+        retriever::AbstractRetriever = cfg.retriever,
+        retriever_kwargs::NamedTuple = NamedTuple(),
+        generator::AbstractGenerator = cfg.generator,
+        generator_kwargs::NamedTuple = NamedTuple(),
+        cost_tracker = Threads.Atomic{Float64}(0.0))
 
-    filtered_candidates = isnothing(tag_candidates) ? emb_candidates :
-                          (emb_candidates & tag_candidates)
-    reranked_candidates = rerank(rerank_strategy,
-        index,
-        question,
-        filtered_candidates;
-        top_n,
-        verbose = false, rerank_kwargs...)
+    ## Retrieve top context
+    retriever_kwargs_ = isempty(api_kwargs) ? retriever_kwargs :
+                        merge(retriever_kwargs, (; api_kwargs))
+    result = retrieve(
+        retriever, index, question; verbose = verbose - 1, cost_tracker, retriever_kwargs_...)
 
-    ## Build the context
-    context = build_context(index, reranked_candidates; chunks_window_margin)
-
-    ## LLM call
-    joined_kwargs = isempty(api_kwargs) ? aigenerate_kwargs :
-                    merge(aigenerate_kwargs, (; api_kwargs))
-    msg = aigenerate(rag_template; question,
-        context = join(context, "\n\n"), model = model_chat, verbose,
-        joined_kwargs...)
-
-    if return_context # for evaluation
-        rag_context = RAGContext(;
-            question,
-            answer = msg.content,
-            context,
-            sources = sources(index)[reranked_candidates.positions],
-            emb_candidates,
-            tag_candidates,
-            filtered_candidates,
-            reranked_candidates)
-        return msg, rag_context
+    ## Generate the response
+    generator_kwargs_ = isempty(api_kwargs) ? generator_kwargs :
+                        merge(generator_kwargs, (; api_kwargs))
+    result = generate!(generator, index, result; verbose = verbose - 1, cost_tracker,
+        generator_kwargs_...)
+
+    verbose > 0 &&
+        @info "Done with RAG. Total cost: \$$(round(cost_tracker[], digits=3))"
+
+    ## Return `RAGResult` or more user-friendly `AIMessage`
+    output = if return_all
+        result
+    elseif haskey(result.conversations, :final_answer) &&
+           !isempty(result.conversations[:final_answer])
+        result.conversations[:final_answer][end]
+    elseif haskey(result.conversations, :answer) &&
+           !isempty(result.conversations[:answer])
+        result.conversations[:answer][end]
     else
-        return msg
+        throw(ArgumentError("No conversation found in the result"))
     end
+    return output
+end
+
+# Default behavior
+const DEFAULT_RAG_CONFIG = RAGConfig()
+function airag(index::AbstractDocumentIndex; question::AbstractString, kwargs...)
+    return airag(DEFAULT_RAG_CONFIG, index; question, kwargs...)
+end
+
+# Special method to pretty-print the airag results
+function PT.pprint(io::IO, airag_result::Tuple{PT.AIMessage, AbstractRAGResult},
+        text_width::Int = displaysize(io)[2])
+    rag_details = airag_result[2]
+    pprint(io, rag_details; text_width)
 end
diff --git a/src/Experimental/RAGTools/preparation.jl b/src/Experimental/RAGTools/preparation.jl
index a61b23bfb..b4fe0b3e5 100644
--- a/src/Experimental/RAGTools/preparation.jl
+++ b/src/Experimental/RAGTools/preparation.jl
@@ -1,114 +1,246 @@
-### Preparation
+## Preparation Stage
+
+### Chunking Types
+
+"""
+    FileChunker <: AbstractChunker
+
+Chunker when you provide file paths to `get_chunks` functions.
+
+Ie, the inputs will be validated first (eg, file exists, etc) and then read into memory.
+
+Set as default chunker in `get_chunks` functions.
+"""
+struct FileChunker <: AbstractChunker end
+
+"""
+    TextChunker <: AbstractChunker
+
+Chunker when you provide text to `get_chunks` functions. Inputs are directly chunked
+"""
+struct TextChunker <: AbstractChunker end
+
+### Embedding Types
+"""
+    NoEmbedder <: AbstractEmbedder
+
+No-op embedder for `get_embeddings` functions. It returns `nothing`.
+"""
+struct NoEmbedder <: AbstractEmbedder end
+
+"""
+    BatchEmbedder <: AbstractEmbedder
+
+Default embedder for `get_embeddings` functions. It passes individual documents to be embedded in chunks to `aiembed`.
+"""
+struct BatchEmbedder <: AbstractEmbedder end
+
+"""
+    KeywordsProcessor <: AbstractProcessor
+
+Default keywords processor for `get_keywords` functions. It normalizes the documents, tokenizes them and builds a `DocumentTermMatrix`.
+"""
+struct KeywordsProcessor <: AbstractProcessor end
+
+"""
+    NoProcessor <: AbstractProcessor
+
+No-op processor for `get_keywords` functions. It returns the inputs as is.
+"""
+struct NoProcessor <: AbstractProcessor end
+
+"""
+    BinaryBatchEmbedder <: AbstractEmbedder
+
+Same as `BatchEmbedder` but reduces the embeddings matrix to a binary form (eg, `BitMatrix`). Defines a method for `get_embeddings`.
+
+Reference: [HuggingFace: Embedding Quantization](https://huggingface.co/blog/embedding-quantization#binary-quantization-in-vector-databases).
+"""
+struct BinaryBatchEmbedder <: AbstractEmbedder end
+
+"""
+    BitPackedBatchEmbedder <: AbstractEmbedder
+
+Same as `BatchEmbedder` but reduces the embeddings matrix to a binary form packed in UInt64 (eg, `BitMatrix.chunks`). Defines a method for `get_embeddings`.
+
+See also utilities `pack_bits` and `unpack_bits` to move between packed/non-packed binary forms.
+
+Reference: [HuggingFace: Embedding Quantization](https://huggingface.co/blog/embedding-quantization#binary-quantization-in-vector-databases).
+"""
+struct BitPackedBatchEmbedder <: AbstractEmbedder end
+
+EmbedderEltype(::T) where {T} = EmbedderEltype(T)
+EmbedderEltype(::Type{<:AbstractEmbedder}) = Float32
+EmbedderEltype(::Type{NoEmbedder}) = Nothing
+EmbedderEltype(::Type{BinaryBatchEmbedder}) = Bool
+EmbedderEltype(::Type{BitPackedBatchEmbedder}) = UInt64
+
+### Tagging Types
+"""
+    NoTagger <: AbstractTagger
+
+No-op tagger for `get_tags` functions. It returns (`nothing`, `nothing`).
+"""
+struct NoTagger <: AbstractTagger end
+
+"""
+    PassthroughTagger <: AbstractTagger
+
+Tagger for `get_tags` functions, which passes `tags` directly as Vector of Vectors of strings (ie, `tags[i]` is the tags for `docs[i]`).
+"""
+struct PassthroughTagger <: AbstractTagger end
+
+"""
+    OpenTagger <: AbstractTagger
+
+Tagger for `get_tags` functions, which generates possible tags for each chunk via `aiextract`. 
+You can customize it via prompt template (default: `:RAGExtractMetadataShort`), but it's quite open-ended (ie, AI decides the possible tags).
+"""
+struct OpenTagger <: AbstractTagger end
+
 # Types used to extract `tags` from document chunks
-@kwdef struct MetadataItem
+@kwdef struct Tag
     value::String
     category::String
 end
-@kwdef struct MaybeMetadataItems
-    items::Union{Nothing, Vector{MetadataItem}}
+@kwdef struct MaybeTags
+    items::Union{Nothing, Vector{Tag}}
 end
 
+### Overall types for build_index
 """
-    metadata_extract(item::MetadataItem)
-    metadata_extract(items::Vector{MetadataItem})
+    SimpleIndexer <: AbstractIndexBuilder
 
-Extracts the metadata item into a string of the form `category:::value` (lowercased and spaces replaced with underscores).
+Default implementation for `build_index`.
 
-# Example
-```julia
-msg = aiextract(:RAGExtractMetadataShort; return_type=MaybeMetadataItems, text="I like package DataFrames", instructions="None.")
-metadata = metadata_extract(msg.content.items)
-```
+It uses `TextChunker`, `BatchEmbedder`, and `NoTagger` as default chunker, embedder, and tagger.
 """
-function metadata_extract(item::MetadataItem)
-    "$(strip(item.category)):::$(strip(item.value))" |> lowercase |>
-    x -> replace(x, " " => "_")
+@kwdef mutable struct SimpleIndexer <: AbstractIndexBuilder
+    chunker::AbstractChunker = TextChunker()
+    embedder::AbstractEmbedder = BatchEmbedder()
+    tagger::AbstractTagger = NoTagger()
+end
+
+"""
+    KeywordsIndexer <: AbstractIndexBuilder
+
+Keyword-based index (BM25) to be returned by `build_index`.
+
+It uses `TextChunker`, `KeywordsProcessor`, and `NoTagger` as default chunker, processor, and tagger.
+"""
+@kwdef mutable struct KeywordsIndexer <: AbstractIndexBuilder
+    chunker::AbstractChunker = TextChunker()
+    processor::AbstractProcessor = KeywordsProcessor()
+    tagger::AbstractTagger = NoTagger()
 end
-metadata_extract(items::Nothing) = String[]
-metadata_extract(items::Vector{MetadataItem}) = metadata_extract.(items)
 
-"Builds a matrix of tags and a vocabulary list. REQUIRES SparseArrays and LinearAlgebra packages to be loaded!!"
-function build_tags end
-# Implementation in ext/RAGToolsExperimentalExt.jl
+### Functions
 
-"Build an index for RAG (Retriever-Augmented Generation) applications. REQUIRES SparseArrays and LinearAlgebra packages to be loaded!!"
-function build_index end
+## "Build an index for RAG (Retriever-Augmented Generation) applications. REQUIRES SparseArrays and LinearAlgebra packages to be loaded!!"
+## function build_index end
 
-"Shortcut to LinearAlgebra.normalize. Provided in the package extension `RAGToolsExperimentalExt` (Requires SparseArrays and LinearAlgebra)"
+"Shortcut to LinearAlgebra.normalize. Provided in the package extension `RAGToolsExperimentalExt` (Requires SparseArrays, Unicode, and LinearAlgebra)"
 function _normalize end
 
 """
-    get_chunks(files_or_docs::Vector{<:AbstractString}; reader::Symbol = :files,
-        sources::Vector{<:AbstractString} = files_or_docs,
+    load_text(chunker::AbstractChunker, input;
+        kwargs...)
+
+Load text from `input` using the provided `chunker`. Called by `get_chunks`.
+
+Available chunkers:
+- `FileChunker`: The function opens each file in `input` and reads its contents.
+- `TextChunker`: The function assumes that `input` is a vector of strings to be chunked, you MUST provide corresponding `sources`.
+"""
+function load_text(chunker::AbstractChunker, input;
+        kwargs...)
+    throw(ArgumentError("Not implemented for chunker $(typeof(chunker))"))
+end
+function load_text(chunker::FileChunker, input::AbstractString;
+        source::AbstractString = input, kwargs...)
+    @assert isfile(input) "Path $input does not exist"
+    return read(input, String), source
+end
+function load_text(chunker::TextChunker, input::AbstractString;
+        source::AbstractString = input, kwargs...)
+    @assert length(source)<=512 "Each `source` should be less than 512 characters long. Detected: $(length(source)) characters. You must provide sources for each text when using `TextChunker`"
+    return input, source
+end
+
+"""
+    get_chunks(chunker::AbstractChunker,
+        files_or_docs::Vector{<:AbstractString};
+        sources::AbstractVector{<:AbstractString} = files_or_docs,
         verbose::Bool = true,
-        separators = ["\\n\\n", ". ", "\\n"], max_length::Int = 256)
+        separators = ["\\n\\n", ". ", "\\n", " "], max_length::Int = 256)
 
 Chunks the provided `files_or_docs` into chunks of maximum length `max_length` (if possible with provided `separators`).
 
 Supports two modes of operation:
-- `reader=:files`: The function opens each file in `files_or_docs` and reads its content.
-- `reader=:docs`: The function assumes that `files_or_docs` is a vector of strings to be chunked.
+- `chunker = FileChunker()`: The function opens each file in `files_or_docs` and reads its contents.
+- `chunker = TextChunker()`: The function assumes that `files_or_docs` is a vector of strings to be chunked, you MUST provide corresponding `sources`.
 
 # Arguments
 - `files_or_docs`: A vector of valid file paths OR string documents to be chunked.
-- `reader`: A symbol indicating the type of input, can be either `:files` or `:docs`. Default is `:files`.
-- `separators`: A list of strings used as separators for splitting the text in each file into chunks. Default is `[\\n\\n", ". ", "\\n"]`.
+- `separators`: A list of strings used as separators for splitting the text in each file into chunks. Default is `[\\n\\n", ". ", "\\n", " "]`.
+   See `recursive_splitter` for more details.
 - `max_length`: The maximum length of each chunk (if possible with provided separators). Default is 256.
 - `sources`: A vector of strings indicating the source of each chunk. Default is equal to `files_or_docs` (for `reader=:files`)
 
 """
-function get_chunks(files_or_docs::Vector{<:AbstractString}; reader::Symbol = :files,
-        sources::Vector{<:AbstractString} = files_or_docs,
+function get_chunks(chunker::AbstractChunker,
+        files_or_docs::Vector{<:AbstractString};
+        sources::AbstractVector{<:AbstractString} = files_or_docs,
         verbose::Bool = true,
-        separators = ["\n\n", ". ", "\n"], max_length::Int = 256)
+        separators = ["\n\n", ". ", "\n", " "], max_length::Int = 256)
 
     ## Check that all items must be existing files or strings
-    @assert reader in [:files, :docs] "Invalid `read` argument. Must be one of [:files, :docs]"
-    if reader == :files
-        @assert all(isfile, files_or_docs) "Some paths in `files_or_docs` don't exist (Check: $(join(filter(!isfile,files_or_docs),", "))"
-    else
-        @assert sources!=files_or_docs "When `reader=:docs`, vector of `sources` must be provided"
-    end
-    @assert isnothing(sources)||(length(sources) == length(files_or_docs)) "Length of `sources` must match length of `files_or_docs`"
-    @assert maximum(length.(sources))<=512 "Each source must be less than 512 characters long (Detected: $(maximum(length.(sources))))"
+    @assert (length(sources)==length(files_or_docs)) "Length of `sources` must match length of `files_or_docs`"
 
     output_chunks = Vector{SubString{String}}()
     output_sources = Vector{eltype(sources)}()
 
     # Do chunking first
     for i in eachindex(files_or_docs, sources)
-        # if reader == :files, we open the files and read them
-        doc_raw = if reader == :files
-            fn = files_or_docs[i]
-            (verbose > 0) && @info "Processing file: $fn"
-            read(fn, String)
-        else
-            files_or_docs[i]
-        end
+        doc_raw, source = load_text(chunker, files_or_docs[i]; source = sources[i])
         isempty(doc_raw) && continue
-        # split into chunks, if you want to start simple - just do `split(text,"\n\n")`
-        doc_chunks = PT.split_by_length(doc_raw, separators; max_length) .|> strip |>
-                     x -> filter(!isempty, x)
+        # split into chunks by recursively trying the separators provided
+        # if you want to start simple - just do `split(text,"\n\n")`
+        doc_chunks = PT.recursive_splitter(doc_raw, separators; max_length) .|> strip |>
+                     Base.Fix1(filter!, !isempty)
         # skip if no chunks found
         isempty(doc_chunks) && continue
         append!(output_chunks, doc_chunks)
-        append!(output_sources, fill(sources[i], length(doc_chunks)))
+        append!(output_sources, fill(source, length(doc_chunks)))
     end
 
     return output_chunks, output_sources
 end
 
+function get_embeddings(
+        embedder::AbstractEmbedder, docs::AbstractVector{<:AbstractString}; kwargs...)
+    throw(ArgumentError("Not implemented for embedder $(typeof(embedder))"))
+end
+
+function get_embeddings(
+        embedder::NoEmbedder, docs::AbstractVector{<:AbstractString}; kwargs...)
+    return nothing
+end
+
 """
-    get_embeddings(docs::Vector{<:AbstractString};
+    get_embeddings(embedder::BatchEmbedder, docs::AbstractVector{<:AbstractString};
         verbose::Bool = true,
+        model::AbstractString = PT.MODEL_EMBEDDING,
+        truncate_dimension::Union{Int, Nothing} = nothing,
         cost_tracker = Threads.Atomic{Float64}(0.0),
         target_batch_size_length::Int = 80_000,
         ntasks::Int = 4 * Threads.nthreads(),
         kwargs...)
+      
 
-Embeds a vector of `docs` using the provided model (kwarg `model`). 
+Embeds a vector of `docs` using the provided model (kwarg `model`) in a batched manner - `BatchEmbedder`.
 
-Tries to batch embedding calls for roughly 80K characters per call (to avoid exceeding the API limit) but reduce network latency.
+`BatchEmbedder` tries to batch embedding calls for roughly 80K characters per call (to avoid exceeding the API rate limit) to reduce network latency.
 
 # Notes
 - `docs` are assumed to be already chunked to the reasonable sizes that fit within the embedding context limit.
@@ -120,24 +252,28 @@ Tries to batch embedding calls for roughly 80K characters per call (to avoid exc
 - `docs`: A vector of strings to be embedded.
 - `verbose`: A boolean flag for verbose output. Default is `true`.
 - `model`: The model to use for embedding. Default is `PT.MODEL_EMBEDDING`.
+- `truncate_dimension`: The dimensionality of the embeddings to truncate to. Default is `nothing`, `0` will also do nothing.
 - `cost_tracker`: A `Threads.Atomic{Float64}` object to track the total cost of the API calls. Useful to pass the total cost to the parent call.
 - `target_batch_size_length`: The target length (in characters) of each batch of document chunks sent for embedding. Default is 80_000 characters. Speeds up embedding process.
 - `ntasks`: The number of tasks to use for asyncmap. Default is 4 * Threads.nthreads().
 
 """
-function get_embeddings(docs::Vector{<:AbstractString};
+function get_embeddings(embedder::BatchEmbedder, docs::AbstractVector{<:AbstractString};
         verbose::Bool = true,
+        model::AbstractString = PT.MODEL_EMBEDDING,
+        truncate_dimension::Union{Int, Nothing} = nothing,
         cost_tracker = Threads.Atomic{Float64}(0.0),
         target_batch_size_length::Int = 80_000,
         ntasks::Int = 4 * Threads.nthreads(),
         kwargs...)
+    @assert !isempty(docs) "The list of docs to get embeddings from should not be empty."
+
     ## check if extension is available
     ext = Base.get_extension(PromptingTools, :RAGToolsExperimentalExt)
     if isnothing(ext)
-        error("you need to also import LinearAlgebra and SparseArrays to use this function")
+        error("You need to also import LinearAlgebra, Unicode, SparseArrays to use this function")
     end
     verbose && @info "Embedding $(length(docs)) documents..."
-    model = hasproperty(kwargs, :model) ? kwargs.model : PT.MODEL_EMBEDDING
     # Notice that we embed multiple docs at once, not one by one
     # OpenAI supports embedding multiple documents to reduce the number of API calls/network latency time
     # We do batch them just in case the documents are too large (targeting at most 80K characters per call)
@@ -148,110 +284,319 @@ function get_embeddings(docs::Vector{<:AbstractString};
         msg = aiembed(docs_chunk,
             # LinearAlgebra.normalize but imported in RAGToolsExperimentalExt
             _normalize;
+            model,
             verbose = false,
             kwargs...)
-        Threads.atomic_add!(cost_tracker, PT.call_cost(msg, model)) # track costs
+        Threads.atomic_add!(cost_tracker, msg.cost) # track costs
         msg.content
     end
-    embeddings = hcat(embeddings...) .|> Float32 # flatten, columns are documents
+    ## Concat across documents and truncate if needed
+    embeddings = hcat_truncate(embeddings, truncate_dimension; verbose)
+    ## Normalize embeddings
     verbose && @info "Done embedding. Total cost: \$$(round(cost_tracker[],digits=3))"
     return embeddings
 end
 
 """
-    get_metadata(docs::Vector{<:AbstractString};
+    get_embeddings(embedder::BinaryBatchEmbedder, docs::AbstractVector{<:AbstractString};
+        verbose::Bool = true,
+        model::AbstractString = PT.MODEL_EMBEDDING,
+        truncate_dimension::Union{Int, Nothing} = nothing,
+        return_type::Type = Matrix{Bool},
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        target_batch_size_length::Int = 80_000,
+        ntasks::Int = 4 * Threads.nthreads(),
+        kwargs...)
+      
+
+Embeds a vector of `docs` using the provided model (kwarg `model`) in a batched manner and then returns the binary embeddings matrix - `BinaryBatchEmbedder`.
+
+`BinaryBatchEmbedder` tries to batch embedding calls for roughly 80K characters per call (to avoid exceeding the API rate limit) to reduce network latency.
+
+# Notes
+- `docs` are assumed to be already chunked to the reasonable sizes that fit within the embedding context limit.
+- If you get errors about exceeding input sizes, first check the `max_length` in your chunks. 
+  If that does NOT resolve the issue, try reducing the `target_batch_size_length` parameter (eg, 10_000) and number of tasks `ntasks=1`. 
+  Some providers cannot handle large batch sizes.
+
+# Arguments
+- `docs`: A vector of strings to be embedded.
+- `verbose`: A boolean flag for verbose output. Default is `true`.
+- `model`: The model to use for embedding. Default is `PT.MODEL_EMBEDDING`.
+- `truncate_dimension`: The dimensionality of the embeddings to truncate to. Default is `nothing`.
+- `return_type`: The type of the returned embeddings matrix. Default is `Matrix{Bool}`. Choose `BitMatrix` to minimize storage requirements, `Matrix{Bool}` to maximize performance in elementwise-ops.
+- `cost_tracker`: A `Threads.Atomic{Float64}` object to track the total cost of the API calls. Useful to pass the total cost to the parent call.
+- `target_batch_size_length`: The target length (in characters) of each batch of document chunks sent for embedding. Default is 80_000 characters. Speeds up embedding process.
+- `ntasks`: The number of tasks to use for asyncmap. Default is 4 * Threads.nthreads().
+
+"""
+function get_embeddings(
+        embedder::BinaryBatchEmbedder, docs::AbstractVector{<:AbstractString};
+        verbose::Bool = true,
+        model::AbstractString = PT.MODEL_EMBEDDING,
+        truncate_dimension::Union{Int, Nothing} = nothing,
+        return_type::Type = Matrix{Bool},
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        target_batch_size_length::Int = 80_000,
+        ntasks::Int = 4 * Threads.nthreads(),
+        kwargs...)
+    @assert !isempty(docs) "The list of docs to get embeddings from should not be empty."
+
+    emb = get_embeddings(BatchEmbedder(), docs; verbose, model, truncate_dimension,
+        cost_tracker, target_batch_size_length, ntasks, kwargs...)
+    # This will return Matrix{Bool}, eg, map(>(0),emb)
+    emb = map(>(0), emb) |> x -> x isa return_type ? x : return_type(x)
+end
+
+"""
+    get_embeddings(embedder::BitPackedBatchEmbedder, docs::AbstractVector{<:AbstractString};
         verbose::Bool = true,
+        model::AbstractString = PT.MODEL_EMBEDDING,
+        truncate_dimension::Union{Int, Nothing} = nothing,
         cost_tracker = Threads.Atomic{Float64}(0.0),
+        target_batch_size_length::Int = 80_000,
+        ntasks::Int = 4 * Threads.nthreads(),
         kwargs...)
+      
+
+Embeds a vector of `docs` using the provided model (kwarg `model`) in a batched manner and then returns the binary embeddings matrix represented in UInt64 (bit-packed) - `BitPackedBatchEmbedder`.
 
-Extracts metadata from a vector of `docs` using the provided model (kwarg `model`).
+`BitPackedBatchEmbedder` tries to batch embedding calls for roughly 80K characters per call (to avoid exceeding the API rate limit) to reduce network latency.
+
+The best option for FAST and MEMORY-EFFICIENT storage of embeddings, for retrieval use `BitPackedCosineSimilarity`.
+
+# Notes
+- `docs` are assumed to be already chunked to the reasonable sizes that fit within the embedding context limit.
+- If you get errors about exceeding input sizes, first check the `max_length` in your chunks. 
+  If that does NOT resolve the issue, try reducing the `target_batch_size_length` parameter (eg, 10_000) and number of tasks `ntasks=1`. 
+  Some providers cannot handle large batch sizes.
 
 # Arguments
 - `docs`: A vector of strings to be embedded.
 - `verbose`: A boolean flag for verbose output. Default is `true`.
-- `model`: The model to use for metadata extraction. Default is `PT.MODEL_CHAT`.
-- `metadata_template`: A template to be used for metadata extraction. Default is `:RAGExtractMetadataShort`.
+- `model`: The model to use for embedding. Default is `PT.MODEL_EMBEDDING`.
+- `truncate_dimension`: The dimensionality of the embeddings to truncate to. Default is `nothing`.
 - `cost_tracker`: A `Threads.Atomic{Float64}` object to track the total cost of the API calls. Useful to pass the total cost to the parent call.
+- `target_batch_size_length`: The target length (in characters) of each batch of document chunks sent for embedding. Default is 80_000 characters. Speeds up embedding process.
+- `ntasks`: The number of tasks to use for asyncmap. Default is 4 * Threads.nthreads().
+
+See also: `unpack_bits`, `pack_bits`, `BitPackedCosineSimilarity`.
+"""
+function get_embeddings(
+        embedder::BitPackedBatchEmbedder, docs::AbstractVector{<:AbstractString};
+        verbose::Bool = true,
+        model::AbstractString = PT.MODEL_EMBEDDING,
+        truncate_dimension::Union{Int, Nothing} = nothing,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        target_batch_size_length::Int = 80_000,
+        ntasks::Int = 4 * Threads.nthreads(),
+        kwargs...)
+    @assert !isempty(docs) "The list of docs to get embeddings from should not be empty."
+
+    emb = get_embeddings(BatchEmbedder(), docs; verbose, model, truncate_dimension,
+        cost_tracker, target_batch_size_length, ntasks, kwargs...)
+    # This will return Matrix{UInt64} to save space
+    # Use unpack_bits to convert back to BitMatrix
+    pack_bits(emb .> 0)
+end
+### Keywords Processing (for BM25)
+
+## Supporting functions defined in RAGToolsExperimentalExt.jl because they require SparseArrays
+function document_term_matrix(documents)
+    throw(ArgumentError("You need to also import LinearAlgebra, Unicode, and SparseArrays to use this function"))
+end
+
+function bm25(dtm, query; kwargs...)
+    throw(ArgumentError("You need to also import LinearAlgebra, Unicode, and SparseArrays to use this function"))
+end
+
+function get_keywords(processor::AbstractProcessor, docs::AbstractVector{<:AbstractString};
+        verbose::Bool = true,
+        kwargs...)
+    ext = Base.get_extension(PromptingTools, :SnowballPromptingToolsExt)
+    if processor isa KeywordsProcessor && isnothing(ext)
+        throw(ArgumentError("You need to also import Snowball.jl to use this function"))
+    else
+        throw(ArgumentError("Not implemented for processor $(typeof(processor))."))
+    end
+end
+
+function get_keywords(processor::NoProcessor, docs::AbstractVector{<:AbstractString};
+        verbose::Bool = true,
+        kwargs...)
+    docs
+end
+
+### Tag Extraction
+
+function get_tags(tagger::AbstractTagger, docs::AbstractVector{<:AbstractString};
+        kwargs...)
+    throw(ArgumentError("Not implemented for tagger $(typeof(tagger))"))
+end
+
+"""
+    tags_extract(item::Tag)
+    tags_extract(tags::Vector{Tag})
+
+Extracts the `Tag` item into a string of the form `category:::value` (lowercased and spaces replaced with underscores).
+
+# Example
+```julia
+msg = aiextract(:RAGExtractMetadataShort; return_type=MaybeTags, text="I like package DataFrames", instructions="None.")
+metadata = tags_extract(msg.content.items)
+```
+"""
+function tags_extract(item::Tag)
+    "$(strip(item.category)):::$(strip(item.value))" |> lowercase |>
+    x -> replace(x, " " => "_")
+end
+tags_extract(items::Nothing) = String[]
+tags_extract(items::Vector{Tag}) = tags_extract.(items)
+
+"""
+    get_tags(tagger::NoTagger, docs::AbstractVector{<:AbstractString};
+        kwargs...)
+
+Simple no-op that skips any tagging of the documents
+"""
+function get_tags(tagger::NoTagger, docs::AbstractVector{<:AbstractString};
+        kwargs...)
+    nothing
+end
+
+"""
+    get_tags(tagger::PassthroughTagger, docs::AbstractVector{<:AbstractString};
+        tags::AbstractVector{<:AbstractVector{<:AbstractString}},
+        kwargs...)
+
+Pass `tags` directly as Vector of Vectors of strings (ie, `tags[i]` is the tags for `docs[i]`).
+It then builds the vocabulary from the tags and returns both the tags in matrix form and the vocabulary.
+"""
+function get_tags(tagger::PassthroughTagger, docs::AbstractVector{<:AbstractString};
+        tags::AbstractVector{<:AbstractVector{<:AbstractString}},
+        kwargs...)
+    @assert length(docs)==length(tags) "Length of `docs` must match length of `tags`"
+    return tags
+end
+
+"""
+    get_tags(tagger::OpenTagger, docs::AbstractVector{<:AbstractString};
+        verbose::Bool = true,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+
+Extracts "tags" (metadata/keywords) from a vector of `docs` using the provided model (kwarg `model`).
 
+# Arguments
+- `docs`: A vector of strings to be embedded.
+- `verbose`: A boolean flag for verbose output. Default is `true`.
+- `model`: The model to use for tags extraction. Default is `PT.MODEL_CHAT`.
+- `template`: A template to be used for tags extraction. Default is `:RAGExtractMetadataShort`.
+- `cost_tracker`: A `Threads.Atomic{Float64}` object to track the total cost of the API calls. Useful to pass the total cost to the parent call.
 """
-function get_metadata(docs::Vector{<:AbstractString};
+function get_tags(tagger::OpenTagger, docs::AbstractVector{<:AbstractString};
         verbose::Bool = true,
-        metadata_template::Symbol = :RAGExtractMetadataShort,
+        model::AbstractString = PT.MODEL_CHAT,
+        template::Symbol = :RAGExtractMetadataShort,
         cost_tracker = Threads.Atomic{Float64}(0.0),
         kwargs...)
-    model = hasproperty(kwargs, :model) ? kwargs.model : PT.MODEL_CHAT
     _check_aiextract_capability(model)
+    ## check if extension is available
+    ext = Base.get_extension(PromptingTools, :RAGToolsExperimentalExt)
+    if isnothing(ext)
+        error("You need to also import LinearAlgebra, Unicode, and SparseArrays to use this function")
+    end
     verbose && @info "Extracting metadata from $(length(docs)) documents..."
-    metadata = asyncmap(docs) do docs_chunk
+    tags_extracted = asyncmap(docs) do docs_chunk
         try
-            msg = aiextract(metadata_template;
-                return_type = MaybeMetadataItems,
+            msg = aiextract(template;
+                return_type = MaybeTags,
                 text = docs_chunk,
                 instructions = "None.",
                 verbose = false,
                 model, kwargs...)
-            Threads.atomic_add!(cost_tracker, PT.call_cost(msg, model)) # track costs
-            items = metadata_extract(msg.content.items)
+            Threads.atomic_add!(cost_tracker, msg.cost) # track costs
+            items = tags_extract(msg.content.items)
         catch
             String[]
         end
     end
+
     verbose &&
-        @info "Done extracting the metadata. Total cost: \$$(round(cost_tracker[],digits=3))"
-    return metadata
+        @info "Done extracting the tags. Total cost: \$$(round(cost_tracker[],digits=3))"
+
+    return tags_extracted
+end
+
+"""
+    build_tags(tagger::AbstractTagger, chunk_tags::Nothing; kwargs...)
+
+No-op that skips any tag building, returning `nothing, nothing`
+
+Otherwise, it would build the sparse matrix and the vocabulary (requires `SparseArrays` and `LinearAlgebra` packages to be loaded).
+"""
+function build_tags(tagger::AbstractTagger, chunk_tags::Nothing; kwargs...)
+    nothing, nothing
 end
 
 """
-    build_index(files_or_docs::Vector{<:AbstractString}; reader::Symbol = :files,
-        separators = ["\\n\\n", ". ", "\\n"], max_length::Int = 256,
-        sources::Vector{<:AbstractString} = files_or_docs,
+    build_index(
+        indexer::AbstractIndexBuilder, files_or_docs::Vector{<:AbstractString};
+        verbose::Integer = 1,
         extras::Union{Nothing, AbstractVector} = nothing,
-        extract_metadata::Bool = false, verbose::Integer = 1,
-        index_id = gensym("ChunkIndex"),
-        metadata_template::Symbol = :RAGExtractMetadataShort,
-        model_embedding::String = PT.MODEL_EMBEDDING,
-        model_metadata::String = PT.MODEL_CHAT,
-        embedding_kwargs::NamedTuple = NamedTuple(),
-        metadata_kwargs::NamedTuple = NamedTuple(),
+        index_id = gensym("ChunkEmbeddingsIndex"),
+        chunker::AbstractChunker = indexer.chunker,
+        chunker_kwargs::NamedTuple = NamedTuple(),
+        embedder::AbstractEmbedder = indexer.embedder,
+        embedder_kwargs::NamedTuple = NamedTuple(),
+        tagger::AbstractTagger = indexer.tagger,
+        tagger_kwargs::NamedTuple = NamedTuple(),
         api_kwargs::NamedTuple = NamedTuple(),
         cost_tracker = Threads.Atomic{Float64}(0.0))
 
-Build an index for RAG (Retriever-Augmented Generation) applications from the provided file paths. 
-The function processes each file, splits its content into chunks, embeds these chunks, 
-optionally extracts metadata, and then compiles this information into a retrievable index.
+Build an INDEX for RAG (Retriever-Augmented Generation) applications from the provided file paths. 
+INDEX is a object storing the document chunks and their embeddings (and potentially other information).
+
+The function processes each file or document (depending on `chunker`), splits its content into chunks, embeds these chunks, 
+optionally extracts metadata, and then combines this information into a retrievable index.
+
+Define your own methods via `indexer` and its subcomponents (`chunker`, `embedder`, `tagger`).
 
 # Arguments
-- `files_or_docs`: A vector of valid file paths OR string documents to be indexed (chunked and embedded).
-- `reader`: A symbol indicating the type of input, can be either `:files` or `:docs`. Default is `:files`.
-- `separators`: A list of strings used as separators for splitting the text in each file into chunks. Default is `[\\n\\n, ". ", "\\n"]`.
-- `max_length`: The maximum length of each chunk (if possible with provided separators). Default is 256.
-- `sources`: A vector of strings indicating the source of each chunk. Default is equal to `files_or_docs` (for `reader=:files`)
-- `extras`: An optional vector of extra information to be stored with each chunk. Default is `nothing`.
-- `extract_metadata`: A boolean flag indicating whether to extract metadata from each chunk (to build filter `tags` in the index). Default is `false`.
-  Metadata extraction incurs additional cost and requires `model_metadata` and `metadata_template` to be provided.
+- `indexer::AbstractIndexBuilder`: The indexing logic to use. Default is `SimpleIndexer()`.
+- `files_or_docs`: A vector of valid file paths OR string documents to be indexed (chunked and embedded). Specify which mode to use via `chunker`.
 - `verbose`: An Integer specifying the verbosity of the logs. Default is `1` (high-level logging). `0` is disabled.
-- `metadata_template`: A symbol indicating the template to be used for metadata extraction. Default is `:RAGExtractMetadataShort`.
-- `model_embedding`: The model to use for embedding.
-- `model_metadata`: The model to use for metadata extraction.
-- `api_kwargs`: Parameters to be provided to the API endpoint. Shared across all API calls.
-- `embedding_kwargs`: Parameters to be provided to the `get_embedding` function. Useful to change the batch sizes (`target_batch_size_length`) or reduce asyncmap tasks (`ntasks`).
-- `metadata_kwargs`: Parameters to be provided to the `get_metadata` function.
+- `extras`: An optional vector of extra information to be stored with each chunk. Default is `nothing`.
+- `index_id`: A unique identifier for the index. Default is a generated symbol.
+- `chunker`: The chunker logic to use for splitting the documents. Default is `TextChunker()`.
+- `chunker_kwargs`: Parameters to be provided to the `get_chunks` function. Useful to change the `separators` or `max_length`.
+  - `sources`: A vector of strings indicating the source of each chunk. Default is equal to `files_or_docs`.
+- `embedder`: The embedder logic to use for embedding the chunks. Default is `BatchEmbedder()`.
+- `embedder_kwargs`: Parameters to be provided to the `get_embeddings` function. Useful to change the `target_batch_size_length` or reduce asyncmap tasks `ntasks`.
+  - `model`: The model to use for embedding. Default is `PT.MODEL_EMBEDDING`.
+- `tagger`: The tagger logic to use for extracting tags from the chunks. Default is `NoTagger()`, ie, skip tag extraction. There are also `PassthroughTagger` and `OpenTagger`.
+- `tagger_kwargs`: Parameters to be provided to the `get_tags` function.
+  - `model`: The model to use for tags extraction. Default is `PT.MODEL_CHAT`.
+  - `template`: A template to be used for tags extraction. Default is `:RAGExtractMetadataShort`.
+  - `tags`: A vector of vectors of strings directly providing the tags for each chunk. Applicable for `tagger::PasstroughTagger`.
+- `api_kwargs`: Parameters to be provided to the API endpoint. Shared across all API calls if provided.
+- `cost_tracker`: A `Threads.Atomic{Float64}` object to track the total cost of the API calls. Useful to pass the total cost to the parent call.
 
 # Returns
-- `ChunkIndex`: An object containing the compiled index of chunks, embeddings, tags, vocabulary, and sources.
+- `ChunkEmbeddingsIndex`: An object containing the compiled index of chunks, embeddings, tags, vocabulary, and sources.
 
-See also: `MultiIndex`, `CandidateChunks`, `find_closest`, `find_tags`, `rerank`, `airag`
+See also: `ChunkEmbeddingsIndex`, `get_chunks`, `get_embeddings`, `get_tags`, `CandidateChunks`, `find_closest`, `find_tags`, `rerank`, `retrieve`, `generate!`, `airag`
 
 # Examples
 ```julia
-# Assuming `test_files` is a vector of file paths
-index = build_index(test_files; max_length=10, extract_metadata=true)
+# Default is loading a vector of strings and chunking them (`TextChunker()`)
+index = build_index(SimpleIndexer(), texts; chunker_kwargs = (; max_length=10))
 
-# Another example with metadata extraction and verbose output (`reader=:files` is implicit)
-index = build_index(["file1.txt", "file2.txt"]; 
-                    separators=[". "], 
-                    extract_metadata=true, 
-                    verbose=true)
+# Another example with tags extraction, splitting only sentences and verbose output
+# Assuming `test_files` is a vector of file paths
+indexer = SimpleIndexer(chunker=FileChunker(), tagger=OpenTagger())
+index = build_index(indexer, test_files; 
+        chunker_kwargs(; separators=[". "]), verbose=true)
 ```
 
 # Notes
@@ -261,53 +606,141 @@ index = build_index(["file1.txt", "file2.txt"];
   Some providers cannot handle large batch sizes (eg, Databricks).
 
 """
-function build_index(files_or_docs::Vector{<:AbstractString}; reader::Symbol = :files,
-        separators = ["\n\n", ". ", "\n"], max_length::Int = 256,
-        sources::Vector{<:AbstractString} = files_or_docs,
+function build_index(
+        indexer::AbstractIndexBuilder, files_or_docs::Vector{<:AbstractString};
+        verbose::Integer = 1,
         extras::Union{Nothing, AbstractVector} = nothing,
-        extract_metadata::Bool = false, verbose::Integer = 1,
-        index_id = gensym("ChunkIndex"),
-        metadata_template::Symbol = :RAGExtractMetadataShort,
-        model_embedding::String = PT.MODEL_EMBEDDING,
-        model_metadata::String = PT.MODEL_CHAT,
-        embedding_kwargs::NamedTuple = NamedTuple(),
-        metadata_kwargs::NamedTuple = NamedTuple(),
+        index_id = gensym("ChunkEmbeddingsIndex"),
+        chunker::AbstractChunker = indexer.chunker,
+        chunker_kwargs::NamedTuple = NamedTuple(),
+        embedder::AbstractEmbedder = indexer.embedder,
+        embedder_kwargs::NamedTuple = NamedTuple(),
+        tagger::AbstractTagger = indexer.tagger,
+        tagger_kwargs::NamedTuple = NamedTuple(),
         api_kwargs::NamedTuple = NamedTuple(),
         cost_tracker = Threads.Atomic{Float64}(0.0))
 
     ## Split into chunks
-    output_chunks, output_sources = get_chunks(files_or_docs;
-        reader, sources, separators, max_length)
+    chunks, sources = get_chunks(chunker, files_or_docs;
+        chunker_kwargs...)
 
     ## Embed chunks
-    embeddings = get_embeddings(output_chunks;
+    embeddings = get_embeddings(embedder, chunks;
         verbose = (verbose > 1),
         cost_tracker,
-        model = model_embedding,
-        api_kwargs, embedding_kwargs...)
-
-    ## Extract metadata
-    tags, tags_vocab = if extract_metadata
-        output_metadata = get_metadata(output_chunks;
-            verbose = (verbose > 1),
-            cost_tracker,
-            model = model_metadata,
-            metadata_template,
-            api_kwargs, metadata_kwargs...)
-        # Requires SparseArrays.jl to be loaded
-        build_tags(output_metadata)
-    else
-        nothing, nothing
-    end
-    ## Create metadata tag array and associated vocabulary
+        api_kwargs, embedder_kwargs...)
+
+    ## Extract tags
+    tags_extracted = get_tags(tagger, chunks;
+        verbose = (verbose > 1),
+        cost_tracker,
+        api_kwargs, tagger_kwargs...)
+    # Build the sparse matrix and the vocabulary
+    tags, tags_vocab = build_tags(tagger, tags_extracted)
+
+    (verbose > 0) && @info "Index built! (cost: \$$(round(cost_tracker[], digits=3)))"
+
+    index = ChunkEmbeddingsIndex(; id = index_id, embeddings, tags, tags_vocab,
+        chunks, sources, extras)
+    return index
+end
+
+"""
+    build_index(
+        indexer::KeywordsIndexer, files_or_docs::Vector{<:AbstractString};
+        verbose::Integer = 1,
+        extras::Union{Nothing, AbstractVector} = nothing,
+        index_id = gensym("ChunkKeywordsIndex"),
+        chunker::AbstractChunker = indexer.chunker,
+        chunker_kwargs::NamedTuple = NamedTuple(),
+        processor::AbstractProcessor = indexer.processor,
+        processor_kwargs::NamedTuple = NamedTuple(),
+        tagger::AbstractTagger = indexer.tagger,
+        tagger_kwargs::NamedTuple = NamedTuple(),
+        api_kwargs::NamedTuple = NamedTuple(),
+        cost_tracker = Threads.Atomic{Float64}(0.0))
+
+Builds a `ChunkKeywordsIndex` from the provided files or documents to support keyword-based search (BM25).
+"""
+function build_index(
+        indexer::KeywordsIndexer, files_or_docs::Vector{<:AbstractString};
+        verbose::Integer = 1,
+        extras::Union{Nothing, AbstractVector} = nothing,
+        index_id = gensym("ChunkKeywordsIndex"),
+        chunker::AbstractChunker = indexer.chunker,
+        chunker_kwargs::NamedTuple = NamedTuple(),
+        processor::AbstractProcessor = indexer.processor,
+        processor_kwargs::NamedTuple = NamedTuple(),
+        tagger::AbstractTagger = indexer.tagger,
+        tagger_kwargs::NamedTuple = NamedTuple(),
+        api_kwargs::NamedTuple = NamedTuple(),
+        cost_tracker = Threads.Atomic{Float64}(0.0))
+
+    ## Split into chunks
+    chunks, sources = get_chunks(chunker, files_or_docs;
+        chunker_kwargs...)
+
+    ## Tokenize and DTM
+    dtm = get_keywords(processor, chunks;
+        verbose = (verbose > 1),
+        cost_tracker,
+        api_kwargs, processor_kwargs...)
+
+    ## Extract tags
+    tags_extracted = get_tags(tagger, chunks;
+        verbose = (verbose > 1),
+        cost_tracker,
+        api_kwargs, tagger_kwargs...)
+    # Build the sparse matrix and the vocabulary
+    tags, tags_vocab = build_tags(tagger, tags_extracted)
+
     (verbose > 0) && @info "Index built! (cost: \$$(round(cost_tracker[], digits=3)))"
 
-    index = ChunkIndex(;
-        id = index_id,
-        embeddings,
-        tags, tags_vocab,
-        chunks = output_chunks,
-        sources = output_sources,
-        extras)
+    index = ChunkKeywordsIndex(; id = index_id, chunkdata = dtm, tags, tags_vocab,
+        chunks, sources, extras)
     return index
 end
+
+# Convenience for easy index creation
+"""
+    ChunkKeywordsIndex(
+        [processor::AbstractProcessor=KeywordsProcessor(),] index::ChunkEmbeddingsIndex; verbose::Int = 1,
+        index_id = gensym("ChunkKeywordsIndex"), processor_kwargs...)
+
+Convenience method to quickly create a `ChunkKeywordsIndex` from an existing `ChunkEmbeddingsIndex`.
+
+# Example
+```julia
+
+# Let's assume we have a standard embeddings-based index
+index = build_index(SimpleIndexer(), texts; chunker_kwargs = (; max_length=10))
+
+# Creating an additional index for keyword-based search (BM25), is as simple as
+index_keywords = ChunkKeywordsIndex(index)
+
+# We can immediately create a MultiIndex (a hybrid index holding both indices)
+multi_index = MultiIndex([index, index_keywords])
+
+```
+"""
+function ChunkKeywordsIndex(
+        processor::AbstractProcessor, index::ChunkEmbeddingsIndex; verbose::Int = 1,
+        index_id = gensym("ChunkKeywordsIndex"), processor_kwargs...)
+    dtm = get_keywords(processor, chunks(index);
+        verbose = (verbose > 1),
+        processor_kwargs...)
+
+    (verbose > 0) && @info "Index built!"
+    ChunkKeywordsIndex(index_id,
+        chunks(index), dtm, tags(index), tags_vocab(index), sources(index), extras(index))
+end
+function ChunkKeywordsIndex(
+        index::ChunkEmbeddingsIndex; kwargs...)
+    ChunkKeywordsIndex(KeywordsProcessor(), index; kwargs...)
+end
+
+# Default dispatch
+const DEFAULT_INDEXER = SimpleIndexer()
+function build_index(files_or_docs::Vector{<:AbstractString}; kwargs...)
+    build_index(DEFAULT_INDEXER, files_or_docs; kwargs...)
+end
diff --git a/src/Experimental/RAGTools/rag_interface.jl b/src/Experimental/RAGTools/rag_interface.jl
new file mode 100644
index 000000000..15c08fecc
--- /dev/null
+++ b/src/Experimental/RAGTools/rag_interface.jl
@@ -0,0 +1,275 @@
+############################
+### RAG Interface Write-up
+############################
+
+# This is the outline of the current RAG interface.
+#
+## # System Overview
+##
+## This system is designed for information retrieval and response generation, structured in three main phases:
+## - Preparation, when you create an instance of `AbstractIndex`
+## - Retrieval, when you surface the top most relevant chunks/items in the `index` and return `AbstractRAGResult`, which contains the references to the chunks (`AbstractCandidateChunks`)
+## - Generation, when you generate an answer based on the context built from the retrieved chunks, return either `AIMessage` or `AbstractRAGResult`
+##
+## The system is designed to be hackable and extensible at almost every entry point.
+## If you want to customize the behavior of any step, you can do so by defining a new type and defining a new method for the step you're changing, eg, 
+## ```julia
+## struct MyReranker <: AbstractReranker end
+## RT.rerank(::MyReranker, index, candidates) = ...
+## ```
+## And then you'd ask for the `retrive` step to use your custom `MyReranker`, eg, `retrieve(....; reranker = MyReranker())` (or customize the main dispatching `AbstractRetriever` struct).
+##
+## # RAG Diagram
+##
+## The main functions are:
+##
+## `build_index`:
+## - signature: `(indexer::AbstractIndexBuilder, files_or_docs::Vector{<:AbstractString}) -> AbstractChunkIndex`
+## - flow: `get_chunks` -> `get_embeddings` -> `get_tags` -> `build_tags`
+## - dispatch types: `AbstractIndexBuilder`, `AbstractChunker`, `AbstractEmbedder`, `AbstractTagger`
+##
+## `airag`: 
+## - signature: `(cfg::AbstractRAGConfig, index::AbstractChunkIndex; question::AbstractString)` -> `AIMessage` or `AbstractRAGResult`
+## - flow: `retrieve` -> `generate!`
+## - dispatch types: `AbstractRAGConfig`, `AbstractRetriever`, `AbstractGenerator`
+##
+## `retrieve`:
+## - signature: `(retriever::AbstractRetriever, index::AbstractChunkIndex, question::AbstractString) -> AbstractRAGResult`
+## - flow: `rephrase` -> `get_embeddings` -> `find_closest` -> `get_tags` -> `find_tags` -> `rerank`
+## - dispatch types: `AbstractRAGConfig`, `AbstractRephraser`, `AbstractEmbedder`, `AbstractSimilarityFinder`, `AbstractTagger`, `AbstractTagFilter`, `AbstractReranker`
+##
+## `generate!`:
+## - signature: `(generator::AbstractGenerator, index::AbstractChunkIndex, result::AbstractRAGResult)` -> `AIMessage` or `AbstractRAGResult`
+## - flow: `build_context!` -> `answer!` -> `refine!` -> `postprocess!`
+## - dispatch types: `AbstractGenerator`, `AbstractContextBuilder`, `AbstractAnswerer`, `AbstractRefiner`, `AbstractPostprocessor`
+##
+## To discover the currently available implementations, use `subtypes` function, eg, `subtypes(AbstractReranker)`.
+##
+## # Deepdive
+##
+## **Preparation Phase:**
+## - Begins with `build_index`, which creates a user-defined index type from an abstract chunk index using specified dels and function strategies.
+## - `get_chunks` then divides the indexed data into manageable pieces based on a chunking strategy.
+## - `get_embeddings` generates embeddings for each chunk using an embedding strategy to facilitate similarity arches.
+## - Finally, `get_tags` extracts relevant metadata from each chunk, enabling tag-based filtering (hybrid search index). If there are `tags` available, `build_tags` is called to build the corresponding sparse matrix for filtering with tags.
+
+## **Retrieval Phase:**
+## - The `retrieve` step is intended to find the most relevant chunks in the `index`.
+## - `rephrase` is called first, if we want to rephrase the query (methods like `HyDE` can improve retrieval quite a bit)!
+## - `get_embeddings` generates embeddings for the original + rephrased query
+## - `find_closest` looks up the most relevant candidates (`CandidateChunks`) using a similarity search strategy.
+## - `get_tags` extracts the potential tags (can be provided as part of the `airag` call, eg, when we want to use only some small part of the indexed chunks)
+## - `find_tags` filters the candidates to strictly match _at least one_ of the tags (if provided)
+## - `rerank` is called to rerank the candidates based on the reranking strategy (ie, to improve the ordering of the chunks in context).
+
+## **Generation Phase:**
+## - The `generate` step is intended to generate a response based on the retrieved chunks, provided via `AbstractRAGResult` (eg, `RAGResult`).
+## - `build_context!` constructs the context for response generation based on a context strategy and applies the necessary formatting
+## - `answer!` generates the response based on the context and the query
+## - `refine!` is called to refine the response (optional, defaults to passthrough)
+## - `postprocessing!` is available for any final touches to the response or to potentially save or format the results (eg, automatically save to the disk)
+
+## Note that all generation steps are mutating the `RAGResult` object.
+
+############################
+### TYPES
+############################
+
+# Defines the main abstract types used in our RAG system.
+
+# ## Overarching
+
+# Dispatch type for airag
+abstract type AbstractRAGConfig end
+
+# supertype for RAGDetails, return_type for retrieve and generate (and optionally airag)
+"""
+    AbstractRAGResult
+
+Abstract type for the result of the RAG (Retrieval-Augmented Generation) process. 
+    
+Implementations of this type should contain the necessary fields to represent the outcome of the RAG pipeline, including the original question, any rephrased versions of the question, the generated answer, and any additional context or metadata used or generated during the process.
+
+# Fields
+- [OPTIONAL] `question::AbstractString`: The original question posed to the RAG system.
+- `rephrased_questions::AbstractVector{<:AbstractString}`: A vector of rephrased versions of the original question, generated during the retrieval phase to improve the quality of the results.
+- [OPTIONAL] `answer::Union{Nothing, AbstractString}`: The initial answer generated based on the retrieved information and the question. This field may be `nothing` if the generation phase has not yet produced an answer.
+- `final_answer::Union{Nothing, AbstractString}`: The final refined answer after any post-processing steps have been applied. This is considered the definitive answer produced by the RAG system.
+- `context::Vector{<:AbstractString}`: A vector of strings representing the context used for generating the answer. This may include relevant information retrieved during the retrieval phase.
+- `sources::Vector{<:AbstractString}`: The sources of the context information, providing traceability for the data used in generating the answer.
+... some fields for search candidates (`::CandidateChunks`)
+- [OPTIONAL] `conversations::Dict{Symbol,Vector{<:AbstractMessage}}`: A dictionary containing the history of AI-generated messages and interactions during the RAG process. Keys correspond to the names of functions in the RAG pipeline, providing insight into the decision-making process at each step.
+
+If `rephrased_questions` is the primarily field, it should be used instead of `question`.
+If `final_answer` is the primarily field, it should be used instead of `answer`.
+`conversations` recording is optional but highly recommended for observability.
+
+This abstract type serves as a blueprint for concrete implementations that store the results of the RAG process, facilitating debugging, analysis, and further processing of the generated answers.
+"""
+
+abstract type AbstractRAGResult end
+
+# ## Preparation Stage
+
+# Main supertype for all customizations of the indexing process
+abstract type AbstractIndexingMethod end
+
+"""
+    AbstractIndexBuilder
+
+Abstract type for building an index with `build_index` (use to change the process / return type of `build_index`).
+
+# Required Fields
+- `chunker::AbstractChunker`: the chunking method, dispatching `get_chunks`
+- `embedder::AbstractEmbedder`: the embedding method, dispatching `get_embeddings`
+- `tagger::AbstractTagger`: the tagging method, dispatching `get_tags`
+"""
+abstract type AbstractIndexBuilder <: AbstractIndexingMethod end
+
+# For get_chunks function
+abstract type AbstractChunker <: AbstractIndexingMethod end
+# For get_embeddings function
+abstract type AbstractEmbedder <: AbstractIndexingMethod end
+# For get_keywords function
+abstract type AbstractProcessor <: AbstractIndexingMethod end
+# For get_tags function
+abstract type AbstractTagger <: AbstractIndexingMethod end
+
+### Index itself - return type of `build_index`
+abstract type AbstractDocumentIndex end
+
+"""
+    AbstractMultiIndex <: AbstractDocumentIndex
+
+Experimental abstract type for storing multiple document indexes. Not yet implemented.
+"""
+abstract type AbstractMultiIndex <: AbstractDocumentIndex end
+
+"""
+    AbstractChunkIndex <: AbstractDocumentIndex
+
+Main abstract type for storing document chunks and their embeddings. It also stores tags and sources for each chunk.
+
+# Required Fields
+- `id::Symbol`: unique identifier of each index (to ensure we're using the right index with `CandidateChunks`)
+- `chunks::Vector{<:AbstractString}`: underlying document chunks / snippets
+- `embeddings::Union{Nothing, Matrix{<:Real}}`: for semantic search
+- `tags::Union{Nothing, AbstractMatrix{<:Bool}}`: for exact search, filtering, etc. This is often a sparse matrix indicating which chunks have the given `tag` (see `tag_vocab` for the position lookup)
+- `tags_vocab::Union{Nothing, Vector{<:AbstractString}}`: vocabulary for the `tags` matrix (each column in `tags` is one item in `tags_vocab` and rows are the chunks)
+- `sources::Vector{<:AbstractString}`: sources of the chunks
+- `extras::Union{Nothing, AbstractVector}`: additional data, eg, metadata, source code, etc.
+"""
+abstract type AbstractChunkIndex <: AbstractDocumentIndex end
+
+# ## Retrieval stage
+
+"""
+    AbstractCandidateChunks
+
+Abstract type for storing candidate chunks, ie, references to items in a `AbstractChunkIndex`.
+
+Return type from `find_closest` and `find_tags` functions.
+
+# Required Fields
+- `index_id::Symbol`: the id of the index from which the candidates are drawn
+- `positions::Vector{Int}`: the positions of the candidates in the index
+- `scores::Vector{Float32}`: the similarity scores of the candidates from the query (higher is better)
+"""
+abstract type AbstractCandidateChunks end
+
+# Main supertype for retrieval customizations
+abstract type AbstractRetrievalMethod end
+
+# Main dispatch type for `retrieve`
+"""
+    AbstractRetriever <: AbstractRetrievalMethod
+
+Abstract type for retrieving chunks from an index with `retrieve` (use to change the process / return type of `retrieve`).
+
+# Required Fields
+- `rephraser::AbstractRephraser`: the rephrasing method, dispatching `rephrase`
+- `finder::AbstractSimilarityFinder`: the similarity search method, dispatching `find_closest`
+- `filter::AbstractTagFilter`: the tag matching method, dispatching `find_tags`
+- `reranker::AbstractReranker`: the reranking method, dispatching `rerank`
+"""
+abstract type AbstractRetriever <: AbstractRetrievalMethod end
+
+# Main dispatch type for `rephrase`
+abstract type AbstractRephraser <: AbstractRetrievalMethod end
+
+# Main dispatch type for `find_closest`
+abstract type AbstractSimilarityFinder <: AbstractRetrievalMethod end
+
+# Main dispatch type for `find_tags`
+abstract type AbstractTagFilter <: AbstractRetrievalMethod end
+
+# Main dispatch type for `rerank`
+abstract type AbstractReranker <: AbstractRetrievalMethod end
+
+# ## Generation stage
+abstract type AbstractGenerationMethod end
+
+# Main dispatch type for: `generate!`
+"""
+    AbstractGenerator <: AbstractGenerationMethod
+
+Abstract type for generating an answer with `generate!` (use to change the process / return type of `generate`).
+
+# Required Fields
+- `contexter::AbstractContextBuilder`: the context building method, dispatching `build_context!
+- `answerer::AbstractAnswerer`: the answer generation method, dispatching `answer!`
+- `refiner::AbstractRefiner`: the answer refining method, dispatching `refine!`
+- `postprocessor::AbstractPostprocessor`: the postprocessing method, dispatching `postprocess!`
+"""
+abstract type AbstractGenerator <: AbstractGenerationMethod end
+
+# Main dispatch type for: `build_context!`
+abstract type AbstractContextBuilder <: AbstractGenerationMethod end
+
+# Main dispatch type for: `answer!`
+abstract type AbstractAnswerer <: AbstractGenerationMethod end
+
+# Main dispatch type for: `refine!`
+abstract type AbstractRefiner <: AbstractGenerationMethod end
+
+# Main dispatch type for: `postprocess!`
+abstract type AbstractPostprocessor <: AbstractGenerationMethod end
+
+# ## Exploration/Display stage
+
+# Supertype for annotaters, dispatch for `annotate_support`
+abstract type AbstractAnnotater end
+
+abstract type AbstractAnnotatedNode end
+abstract type AbstractAnnotationStyler end
+
+############################
+### FUNCTIONS
+############################
+
+# ## Main Functions
+
+# Builds the index from provided data, dispatch via `indexer::AbstractIndexer`.
+function build_index end
+function get_chunks end
+function get_embeddings end
+function get_keywords end
+function get_tags end
+# Sub-routing of get_tags, extended in ext/RAGToolsExperimentalExt.jl
+"Builds a matrix of tags and a vocabulary list. REQUIRES SparseArrays, LinearAlgebra, Unicode packages to be loaded!!"
+function build_tags end
+
+# Retrieval stage -> ultimately returns `RAGResult`
+function retrieve end
+function rephrase end
+function find_closest end
+function find_tags end
+function rerank end
+
+# Generation stage -> returns mutated `RAGResult`
+function generate! end
+function build_context! end
+function build_context end
+function answer! end
+function refine! end
+function postprocess! end
diff --git a/src/Experimental/RAGTools/rank_gpt.jl b/src/Experimental/RAGTools/rank_gpt.jl
new file mode 100644
index 000000000..7743cbcf1
--- /dev/null
+++ b/src/Experimental/RAGTools/rank_gpt.jl
@@ -0,0 +1,173 @@
+# Implementation of RankGPT
+# Is ChatGPT Good at Search? Investigating Large Language Models as Re-Ranking Agents by W. Sun et al. // https://arxiv.org/abs/2304.09542
+# https://github.com/sunnweiwei/RankGPT
+
+"""
+    RankGPTResult
+
+Results from the RankGPT algorithm.
+
+# Fields
+- `question::String`: The question that was asked.
+- `chunks::AbstractVector{T}`: The chunks that were ranked (=context).
+- `positions::Vector{Int}`: The ranking of the chunks (referring to the `chunks`).
+- `elapsed::Float64`: The time it took to rank the chunks.
+- `cost::Float64`: The cumulative cost of the ranking.
+- `tokens::Int`: The cumulative number of tokens used in the ranking.
+"""
+@kwdef mutable struct RankGPTResult{T <: AbstractString}
+    question::String
+    chunks::AbstractVector{T}
+    positions::Vector{Int} = collect(1:length(chunks))
+    elapsed::Float64 = 0.0
+    cost::Float64 = 0.0
+    tokens::Int = 0
+end
+Base.show(io::IO, result::RankGPTResult) = dump(io, result; maxdepth = 1)
+
+"""
+    create_permutation_instruction(
+        context::AbstractVector{<:AbstractString}; rank_start::Integer = 1,
+        rank_end::Integer = 100, max_length::Integer = 512, template::Symbol = :RAGRankGPT)
+
+Creates rendered template with injected `context` passages.
+"""
+function create_permutation_instruction(
+        context::AbstractVector{<:AbstractString}; rank_start::Integer = 1,
+        rank_end::Integer = 100, max_length::Integer = 512, template::Symbol = :RAGRankGPT)
+    ## 
+    rank_end_adj = min(rank_end, length(context))
+    num = rank_end_adj - rank_start + 1
+
+    messages = PT.render(PT.AITemplate(template))
+    last_msg = pop!(messages)
+    rank = 0
+    for ctx in context[rank_start:rank_end_adj]
+        rank += 1
+        push!(messages, PT.UserMessage("[$rank] $(first(strip(ctx),max_length))"))
+        push!(messages, PT.AIMessage("Received passage [$rank]."))
+    end
+    push!(messages, last_msg)
+
+    return messages, num
+end
+
+"""
+    extract_ranking(str::AbstractString)
+
+Extracts the ranking from the response into a sorted array of integers.
+"""
+function extract_ranking(str::AbstractString)
+    nums = replace(str, r"[^0-9]" => " ") |> strip |> split
+    nums = parse.(Int, nums)
+    unique_idxs = unique(i -> nums[i], eachindex(nums))
+    return nums[unique_idxs]
+end
+
+"""
+    receive_permutation!(
+        curr_rank::AbstractVector{<:Integer}, response::AbstractString;
+        rank_start::Integer = 1, rank_end::Integer = 100)
+
+Extracts and heals the permutation to contain all ranking positions.
+"""
+function receive_permutation!(
+        curr_rank::AbstractVector{<:Integer}, response::AbstractString;
+        rank_start::Integer = 1, rank_end::Integer = 100)
+    @assert rank_start>=1 "rank_start must be greater than or equal to 1"
+    @assert rank_end>=rank_start "rank_end must be greater than or equal to rank_start"
+    new_rank = extract_ranking(response)
+    copied_rank = curr_rank[rank_start:min(end, rank_end)] |> copy
+    orig_rank = 1:length(copied_rank)
+    new_rank = vcat(
+        [r for r in new_rank if r in orig_rank], [r for r in orig_rank if r ∉ new_rank])
+    for (j, rnk) in enumerate(new_rank)
+        curr_rank[rank_start + j - 1] = copied_rank[rnk]
+    end
+    return curr_rank
+end
+
+"""
+    permutation_step!(
+        result::RankGPTResult; rank_start::Integer = 1, rank_end::Integer = 100, kwargs...)
+
+One sub-step of the RankGPT algorithm permutation ranking within the window of chunks defined by `rank_start` and `rank_end` positions.
+"""
+function permutation_step!(
+        result::RankGPTResult; rank_start::Integer = 1, rank_end::Integer = 100, kwargs...)
+    (; positions, chunks, question) = result
+    tpl, num = create_permutation_instruction(chunks; rank_start, rank_end)
+    msg = aigenerate(tpl; question, num, kwargs...)
+    result.positions = receive_permutation!(
+        positions, PT.last_output(msg); rank_start, rank_end)
+    result.cost += msg.cost
+    result.tokens += sum(msg.tokens)
+    result.elapsed += msg.elapsed
+    return result
+end
+
+"""
+    rank_sliding_window!(
+        result::RankGPTResult; verbose::Int = 1, rank_start = 1, rank_end = 100,
+        window_size = 20, step = 10, model::String = "gpt4o", kwargs...)
+
+One single pass of the RankGPT algorithm permutation ranking across all positions between `rank_start` and `rank_end`.
+"""
+function rank_sliding_window!(
+        result::RankGPTResult; verbose::Int = 1, rank_start = 1, rank_end = 100,
+        window_size = 20, step = 10, model::String = "gpt4o", kwargs...)
+    @assert rank_start>=0 "rank_start must be greater than or equal to 0 (Provided: rank_start=$rank_start)"
+    @assert rank_end>=rank_start "rank_end must be greater than or equal to rank_start (Provided: rank_end=$rank_end, rank_start=$rank_start)"
+    @assert rank_end>=window_size>=step "rank_end must be greater than or equal to window_size, which must be greater than or equal to step (Provided: rank_end=$rank_end, window_size=$window_size, step=$step)"
+    end_pos = min(rank_end, length(result.chunks))
+    start_pos = max(end_pos - window_size, 1)
+    while start_pos > rank_start
+        (verbose >= 1) && @info "Ranking chunks in positions $start_pos to $end_pos"
+        permutation_step!(result; rank_start = start_pos, rank_end = end_pos,
+            model, verbose = (verbose >= 1), kwargs...)
+        (verbose >= 2) && @info "Current ranking: $(result.positions)"
+        end_pos -= step
+        start_pos -= step
+    end
+    ## Don't skip the last window, but ensure it's not negative
+    start_pos = max(start_pos, rank_start)
+    end_pos = max(end_pos, start_pos)
+    (verbose >= 1) && @info "Ranking chunks in positions $start_pos to $end_pos"
+    permutation_step!(result; rank_start = start_pos, rank_end = end_pos,
+        model, verbose = (verbose >= 1), kwargs...)
+    (verbose >= 2) && @info "Current ranking: $(result.positions)"
+    return result
+end
+
+"""
+    rank_gpt(chunks::AbstractVector{<:AbstractString}, question::AbstractString;
+        verbose::Int = 1, rank_start::Integer = 1, rank_end::Integer = 100,
+        window_size::Integer = 20, step::Integer = 10,
+        num_rounds::Integer = 1, model::String = "gpt4o", kwargs...)
+
+Ranks the `chunks` based on their relevance for `question`. Returns the ranking permutation of the chunks in the order they are most relevant to the question (the first is the most relevant).
+
+# Example
+```julia
+result = rank_gpt(chunks, question; rank_start=1, rank_end=25, window_size=8, step=4, num_rounds=3, model="gpt4o")
+```
+
+# Reference
+[1] [Is ChatGPT Good at Search? Investigating Large Language Models as Re-Ranking Agents by W. Sun et al.](https://arxiv.org/abs/2304.09542)
+[2] [RankGPT Github](https://github.com/sunnweiwei/RankGPT)
+"""
+function rank_gpt(chunks::AbstractVector{<:AbstractString}, question::AbstractString;
+        verbose::Int = 1, rank_start::Integer = 1, rank_end::Integer = 100,
+        window_size::Integer = 20, step::Integer = 10,
+        num_rounds::Integer = 1, model::String = "gpt4o", kwargs...)
+    result = RankGPTResult(; question, chunks)
+    for i in 1:num_rounds
+        (verbose >= 1) && @info "Round $i of $num_rounds of ranking process."
+        result = rank_sliding_window!(
+            result; verbose = verbose - 1, rank_start, rank_end,
+            window_size, step, model, kwargs...)
+    end
+    (verbose >= 1) &&
+        @info "Final ranking done. Tokens: $(result.tokens), Cost: $(round(result.cost, digits=2)), Time: $(round(result.elapsed, digits=1))s"
+    return result
+end
\ No newline at end of file
diff --git a/src/Experimental/RAGTools/retrieval.jl b/src/Experimental/RAGTools/retrieval.jl
index 06ef38c5d..343b67aa3 100644
--- a/src/Experimental/RAGTools/retrieval.jl
+++ b/src/Experimental/RAGTools/retrieval.jl
@@ -1,134 +1,731 @@
+### Types for Retrieval
+
+"""
+    NoRephraser <: AbstractRephraser
+
+No-op implementation for `rephrase`, which simply passes the question through.
+"""
+struct NoRephraser <: AbstractRephraser end
+
+"""
+    SimpleRephraser <: AbstractRephraser
+
+Rephraser implemented using the provided AI Template (eg, `...`) and standard chat model. A method for `rephrase`.
+"""
+struct SimpleRephraser <: AbstractRephraser end
+
+"""
+    HyDERephraser <: AbstractRephraser
+
+Rephraser implemented using the provided AI Template (eg, `...`) and standard chat model. A method for `rephrase`.
+
+It uses a prompt-based rephrasing method called HyDE (Hypothetical Document Embedding), where instead of looking for an embedding of the question, 
+we look for the documents most similar to a synthetic passage that _would be_ a good answer to our question.
+
+Reference: [Arxiv paper](https://arxiv.org/abs/2212.10496).
+"""
+struct HyDERephraser <: AbstractRephraser end
+
+"""
+    CosineSimilarity <: AbstractSimilarityFinder
+
+Finds the closest chunks to a query embedding by measuring the cosine similarity between the query and the chunks' embeddings. A method for `find_closest` (see the docstring for more details and usage example).
+"""
+struct CosineSimilarity <: AbstractSimilarityFinder end
+
+"""
+    BinaryCosineSimilarity <: AbstractSimilarityFinder
+
+Finds the closest chunks to a query embedding by measuring the Hamming distance AND cosine similarity between the query and the chunks' embeddings in binary form. A method for `find_closest`.
+
+It follows the two-pass approach:
+- First pass: Hamming distance in binary form to get the `top_k * rescore_multiplier` (ie, more than top_k) candidates.
+- Second pass: Rescore the candidates with float embeddings and return the top_k.
+
+Reference: [HuggingFace: Embedding Quantization](https://huggingface.co/blog/embedding-quantization#binary-quantization-in-vector-databases).
+"""
+struct BinaryCosineSimilarity <: AbstractSimilarityFinder end
+
+"""
+    BitPackedCosineSimilarity <: AbstractSimilarityFinder
+
+Finds the closest chunks to a query embedding by measuring the Hamming distance AND cosine similarity between the query and the chunks' embeddings in binary form. A method for `find_closest`.
+
+The difference to `BinaryCosineSimilarity` is that the binary values are packed into UInt64, which is more efficient.
+
+Reference: [HuggingFace: Embedding Quantization](https://huggingface.co/blog/embedding-quantization#binary-quantization-in-vector-databases).
+Implementation of `hamming_distance` is based on [TinyRAG](https://github.com/domluna/tinyrag/blob/main/README.md).
+"""
+struct BitPackedCosineSimilarity <: AbstractSimilarityFinder end
+
+"""
+    BM25Similarity <: AbstractSimilarityFinder
+
+Finds the closest chunks to a query embedding by measuring the BM25 similarity between the query and the chunks' embeddings in binary form. A method for `find_closest`.
+
+Reference: [Wikipedia: BM25](https://en.wikipedia.org/wiki/Okapi_BM25).
+Implementation follows: [The Next Generation of Lucene Relevance](https://opensourceconnections.com/blog/2015/10/16/bm25-the-next-generation-of-lucene-relevation/).
+"""
+struct BM25Similarity <: AbstractSimilarityFinder end
+
+"""
+    MultiFinder <: AbstractSimilarityFinder 
+
+Composite finder for `MultiIndex` where we want to set multiple finders for each index. A method for `find_closest`.
+Positions correspond to `indexes(::MultiIndex)`.
+"""
+struct MultiFinder <: AbstractSimilarityFinder
+    finders::AbstractVector{<:AbstractSimilarityFinder}
+end
+Base.getindex(finder::MultiFinder, index::Int) = finder.finders[index]
+Base.length(finder::MultiFinder) = length(finder.finders)
+
+"""
+    NoTagFilter <: AbstractTagFilter
+
+
+No-op implementation for `find_tags`, which simply returns all chunks.
+"""
+struct NoTagFilter <: AbstractTagFilter end
+
+"""
+    AnyTagFilter <: AbstractTagFilter
+
+Finds the chunks that have ANY OF the specified tag(s). A method for `find_tags`.
+"""
+struct AnyTagFilter <: AbstractTagFilter end
+
+"""
+    AllTagFilter <: AbstractTagFilter
+
+Finds the chunks that have ALL OF the specified tag(s). A method for `find_tags`.
+"""
+struct AllTagFilter <: AbstractTagFilter end
+
+### Functions
+function rephrase(rephraser::AbstractRephraser, question::AbstractString; kwargs...)
+    throw(ArgumentError("Not implemented yet for type $(typeof(rephraser))"))
+end
+
 """
-    find_closest(emb::AbstractMatrix{<:Real},
-        query_emb::AbstractVector{<:Real};
-        top_k::Int = 100, minimum_similarity::AbstractFloat = -1.0)
+    rephrase(rephraser::NoRephraser, question::AbstractString; kwargs...)
 
-Finds the indices of chunks (represented by embeddings in `emb`) that are closest (cosine similarity) to query embedding (`query_emb`). 
+No-op, simple passthrough.
+"""
+function rephrase(rephraser::NoRephraser, question::AbstractString; kwargs...)
+    return [question]
+end
+
+"""
+    rephrase(rephraser::SimpleRephraser, question::AbstractString;
+        verbose::Bool = true,
+        model::String = PT.MODEL_CHAT, template::Symbol = :RAGQueryOptimizer,
+        cost_tracker = Threads.Atomic{Float64}(0.0), kwargs...)
+
+Rephrases the `question` using the provided rephraser `template`.
+
+Returns both the original and the rephrased question.
+
+# Arguments
+- `rephraser`: Type that dictates the logic of rephrasing step.
+- `question`: The question to be rephrased.
+- `model`: The model to use for rephrasing. Default is `PT.MODEL_CHAT`.
+- `template`: The rephrasing template to use. Default is `:RAGQueryOptimizer`. Find more with `aitemplates("rephrase")`.
+- `verbose`: A boolean flag indicating whether to print verbose logging. Default is `true`.
+"""
+function rephrase(rephraser::SimpleRephraser, question::AbstractString;
+        verbose::Bool = true,
+        model::String = PT.MODEL_CHAT, template::Symbol = :RAGQueryOptimizer,
+        cost_tracker = Threads.Atomic{Float64}(0.0), kwargs...)
+    ## checks
+    placeholders = only(aitemplates(template)).variables # only one template should be found
+    @assert (:query in placeholders) "Provided RAG Template $(template) is not suitable. It must have a placeholder: `query`."
+
+    msg = aigenerate(template; query = question, verbose, model, kwargs...)
+    Threads.atomic_add!(cost_tracker, msg.cost)
+    new_question = strip(msg.content)
+    return [question, new_question]
+end
+
+"""
+    rephrase(rephraser::SimpleRephraser, question::AbstractString;
+        verbose::Bool = true,
+        model::String = PT.MODEL_CHAT, template::Symbol = :RAGQueryHyDE,
+        cost_tracker = Threads.Atomic{Float64}(0.0))
+
+Rephrases the `question` using the provided rephraser `template = RAGQueryHyDE`.
+
+Special flavor of rephrasing using HyDE (Hypothetical Document Embedding) method, 
+which aims to find the documents most similar to a synthetic passage that _would be_ a good answer to our question.
+
+Returns both the original and the rephrased question.
+
+# Arguments
+- `rephraser`: Type that dictates the logic of rephrasing step.
+- `question`: The question to be rephrased.
+- `model`: The model to use for rephrasing. Default is `PT.MODEL_CHAT`.
+- `template`: The rephrasing template to use. Default is `:RAGQueryHyDE`. Find more with `aitemplates("rephrase")`.
+- `verbose`: A boolean flag indicating whether to print verbose logging. Default is `true`.
+"""
+function rephrase(rephraser::HyDERephraser, question::AbstractString;
+        verbose::Bool = true,
+        model::String = PT.MODEL_CHAT, template::Symbol = :RAGQueryHyDE,
+        cost_tracker = Threads.Atomic{Float64}(0.0), kwargs...)
+    rephrase(SimpleRephraser(), question; verbose, model, template, cost_tracker, kwargs...)
+end
+
+# General fallback
+function find_closest(
+        finder::AbstractSimilarityFinder, emb::AbstractMatrix{<:Real},
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        kwargs...)
+    throw(ArgumentError("Not implemented yet for type $(typeof(finder))"))
+end
+
+"""
+    find_closest(
+        finder::CosineSimilarity, emb::AbstractMatrix{<:Real},
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 100, minimum_similarity::AbstractFloat = -1.0, kwargs...)
+
+Finds the indices of chunks (represented by embeddings in `emb`) that are closest (in cosine similarity for `CosineSimilarity()`) to query embedding (`query_emb`). 
+
+`finder` is the logic used for the similarity search. Default is `CosineSimilarity`.
 
 If `minimum_similarity` is provided, only indices with similarity greater than or equal to it are returned. 
 Similarity can be between -1 and 1 (-1 = completely opposite, 1 = exactly the same).
 
 Returns only `top_k` closest indices.
 """
-function find_closest(emb::AbstractMatrix{<:Real},
-        query_emb::AbstractVector{<:Real};
-        top_k::Int = 100, minimum_similarity::AbstractFloat = -1.0)
+function find_closest(
+        finder::CosineSimilarity, emb::AbstractMatrix{<:Real},
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 100, minimum_similarity::AbstractFloat = -1.0, kwargs...)
     # emb is an embedding matrix where the first dimension is the embedding dimension
-    distances = query_emb' * emb |> vec
-    positions = distances |> sortperm |> reverse |> x -> first(x, top_k)
+    scores = query_emb' * emb |> vec
+    top_k_min = min(top_k, length(scores))
+    ## Take the top_k largest because larger is better in Cosine similarity (=1 is the best)
+    positions = partialsortperm(scores, 1:top_k_min, rev = true)
+    if minimum_similarity > -1.0
+        mask = @view(scores[positions]) .>= minimum_similarity
+        positions = positions[mask]
+    else
+        ## we want to materialize the view
+        positions = collect(positions)
+    end
+    return positions, scores[positions]
+end
+
+"""
+    find_closest(
+        finder::AbstractSimilarityFinder, index::AbstractChunkIndex,
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 100, kwargs...)
+
+Finds the indices of chunks (represented by embeddings in `index`) that are closest to query embedding (`query_emb`).
+
+Returns only `top_k` closest indices.
+"""
+function find_closest(
+        finder::AbstractSimilarityFinder, index::AbstractChunkIndex,
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 100, kwargs...)
+    if isnothing(chunkdata(parent(index)))
+        return CandidateChunks(; index_id = indexid(index))
+    end
+    positions, scores = find_closest(finder, chunkdata(index),
+        query_emb, query_tokens;
+        top_k, kwargs...)
+    ## translate positions to original indices
+    positions = translate_positions_to_parent(index, positions)
+    return CandidateChunks(indexid(index), positions, Float32.(scores))
+end
+
+# Dispatch to find scores for multiple embeddings
+function find_closest(
+        finder::AbstractSimilarityFinder, index::AbstractChunkIndex,
+        query_emb::AbstractMatrix{<:Real}, query_tokens::AbstractVector{<:AbstractVector{<:AbstractString}} = Vector{Vector{String}}();
+        top_k::Int = 100, kwargs...)
+    if isnothing(chunkdata(parent(index)))
+        return CandidateChunks(; index_id = indexid(index))
+    end
+    ## reduce top_k since we have more than one query
+    top_k_ = top_k ÷ size(query_emb, 2)
+    ## simply vcat together (gets sorted from the highest similarity to the lowest)
+    if isempty(query_tokens)
+        mapreduce(
+            c -> find_closest(finder, index, c; top_k = top_k_, kwargs...), vcat, eachcol(query_emb))
+    else
+        @assert length(query_tokens)==size(query_emb, 2) "Length of `query_tokens` must be equal to the number of columns in `query_emb`."
+        mapreduce(
+            (emb, tok) -> find_closest(finder, index, emb, tok; top_k = top_k_, kwargs...), vcat, eachcol(query_emb), query_tokens)
+    end
+end
+
+### For MultiIndex
+function find_closest(
+        finder::MultiFinder, index::AbstractMultiIndex,
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 100, kwargs...)
+    all_indexes = indexes(index)
+    all(isnothing(chunkdata(index)) for index in all_indexes) &&
+        return MultiCandidateChunks(; index_ids = Symbol[])
+
+    ## Get more than top_k candidates, then pick the top 100 by score
+    top_k_shard = ceil(Int, top_k / length(all_indexes))
+    index_ids = Symbol[]
+    positions = Int[]
+    scores = Float32[]
+    for i in eachindex(all_indexes, finder.finders)
+        positions_, scores_ = find_closest(finder[i], chunkdata(all_indexes[i]),
+            query_emb, query_tokens;
+            top_k = top_k_shard, kwargs...)
+        ## translate positions to original indices
+        positions_ = translate_positions_to_parent(all_indexes[i], positions_)
+        append!(index_ids, fill(indexid(all_indexes[i]), length(positions_)))
+        append!(positions, positions_)
+        append!(scores, scores_)
+    end
+    ## Take the top_k largest because larger is better in Cosine similarity (=1 is the best)
+    ## Do direct sortperm because it's unlikely to be too much larger (top_k * number of shards)
+    idxs = sortperm(scores, rev = true) |> Base.Fix2(first, top_k)
+    return MultiCandidateChunks(index_ids[idxs], positions[idxs], scores[idxs])
+end
+
+# If we have multi-index, convert to MultiFinder first
+function find_closest(
+        finder::AbstractSimilarityFinder, index::AbstractMultiIndex,
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        kwargs...)
+    new_finder = MultiFinder(fill(finder, length(indexes(index))))
+    find_closest(new_finder, index, query_emb, query_tokens; kwargs...)
+end
+
+# Method for multiple-queries at once (for rephrased queries)
+function find_closest(
+        finder::AbstractSimilarityFinder, index::AbstractMultiIndex,
+        query_emb::AbstractMatrix{<:Real}, query_tokens::AbstractVector{<:AbstractVector{<:AbstractString}} = Vector{Vector{String}}();
+        top_k::Int = 100, kwargs...)
+    all_indexes = indexes(index)
+    all(isnothing(chunkdata(index)) for index in all_indexes) &&
+        return MultiCandidateChunks(; index_ids = Symbol[])
+    ## reduce top_k since we have more than one query
+    top_k_ = top_k ÷ max(size(query_emb, 2), length(query_tokens))
+    ## simply vcat together (gets sorted from the highest similarity to the lowest)
+    if isempty(query_tokens)
+        mapreduce(
+            c -> find_closest(finder, index, c; top_k = top_k_, kwargs...), vcat, eachcol(query_emb))
+    else
+        @assert length(query_tokens)==size(query_emb, 2) "Length of `query_tokens` must be equal to the number of columns in `query_emb`. Provided: $(length(query_tokens)) vs $(size(query_emb, 2))"
+        mapreduce(
+            (emb, tok) -> find_closest(finder, index, emb, tok; top_k = top_k_, kwargs...), vcat, eachcol(query_emb), query_tokens)
+    end
+end
+
+#### For binary embeddings
+## Source: https://github.com/domluna/tinyrag/blob/main/README.md
+## With minor modifications to the signatures
+
+@inline function hamming_distance(x1::T, x2::T)::Int where {T <: Integer}
+    return Int(count_ones(x1 ⊻ x2))
+end
+@inline function hamming_distance(x1::T, x2::T)::Int where {T <: Bool}
+    return Int(x1 ⊻ x2)
+end
+@inline function hamming_distance(
+        x1::AbstractVector{T}, x2::AbstractVector{T})::Int where {T <: Integer}
+    s = 0
+    @inbounds @simd for i in eachindex(x1, x2)
+        s += hamming_distance(x1[i], x2[i])
+    end
+    s
+end
+
+"""
+    hamming_distance(
+        mat::AbstractMatrix{T}, query::AbstractVector{T})::Vector{Int} where {T <: Integer}
+
+Calculates the column-wise Hamming distance between a matrix of binary vectors `mat` and a single binary vector `vect`.
+
+This is the first-pass ranking for `BinaryCosineSimilarity` method.
+
+Implementation from [**domluna's tinyRAG**](https://github.com/domluna/tinyRAG).
+"""
+@inline function hamming_distance(
+        mat::AbstractMatrix{T}, query::AbstractVector{T})::Vector{Int} where {T <: Integer}
+    # Check if the number of rows matches
+    if size(mat, 1) != length(query)
+        throw(ArgumentError("Matrix must have the same number of rows as the length of the Vector (provided: $(size(mat, 1)) vs $(length(query)))"))
+    end
+    dists = zeros(Int, size(mat, 2))
+    @inbounds @simd for i in axes(mat, 2)
+        dists[i] = hamming_distance(@view(mat[:, i]), query)
+    end
+    dists
+end
+
+"""
+    find_closest(
+        finder::BinaryCosineSimilarity, emb::AbstractMatrix{<:Bool},
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 100, rescore_multiplier::Int = 4, minimum_similarity::AbstractFloat = -1.0, kwargs...)
+
+Finds the indices of chunks (represented by embeddings in `emb`) that are closest to query embedding (`query_emb`) using binary embeddings (in the index).
+
+This is a two-pass approach:
+- First pass: Hamming distance in binary form to get the `top_k * rescore_multiplier` (ie, more than top_k) candidates.
+- Second pass: Rescore the candidates with float embeddings and return the top_k.
+
+Returns only `top_k` closest indices.
+
+Reference: [HuggingFace: Embedding Quantization](https://huggingface.co/blog/embedding-quantization#binary-quantization-in-vector-databases).
+
+# Examples
+
+Convert any Float embeddings to binary like this:
+```julia
+binary_emb = map(>(0), emb)
+```
+"""
+function find_closest(
+        finder::BinaryCosineSimilarity, emb::AbstractMatrix{<:Bool},
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 100, rescore_multiplier::Int = 4, minimum_similarity::AbstractFloat = -1.0, kwargs...)
+    # emb is an embedding matrix where the first dimension is the embedding dimension
+
+    ## First pass, both in binary with Hamming, get rescore_multiplier times top_k
+    binary_query_emb = map(>(0), query_emb)
+    scores = hamming_distance(emb, binary_query_emb)
+    num_candidates = min(top_k * rescore_multiplier, length(scores))
+    ## Take the top_k smallest because smaller is better in Hamming distance
+    positions = partialsortperm(scores, 1:num_candidates)
+
+    ## Second pass, rescore with float embeddings and return top_k
+    new_positions, scores = find_closest(CosineSimilarity(), @view(emb[:, positions]),
+        query_emb; top_k, minimum_similarity, kwargs...)
+
+    ## translate to original indices
+    return positions[new_positions], scores
+end
+
+"""
+    find_closest(
+        finder::BitPackedCosineSimilarity, emb::AbstractMatrix{<:Bool},
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 100, rescore_multiplier::Int = 4, minimum_similarity::AbstractFloat = -1.0, kwargs...)
+
+Finds the indices of chunks (represented by embeddings in `emb`) that are closest to query embedding (`query_emb`) using bit-packed binary embeddings (in the index).
+
+This is a two-pass approach:
+- First pass: Hamming distance in bit-packed binary form to get the `top_k * rescore_multiplier` (i.e., more than top_k) candidates.
+- Second pass: Rescore the candidates with float embeddings and return the top_k.
+
+Returns only `top_k` closest indices.
+
+Reference: [HuggingFace: Embedding Quantization](https://huggingface.co/blog/embedding-quantization#binary-quantization-in-vector-databases).
+
+# Examples
+Convert any Float embeddings to bit-packed binary like this:
+```julia
+bitpacked_emb = pack_bits(emb.>0)
+```
+"""
+function find_closest(
+        finder::BitPackedCosineSimilarity, emb::AbstractMatrix{<:Integer},
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 100, rescore_multiplier::Int = 4, minimum_similarity::AbstractFloat = -1.0, kwargs...)
+    # emb is an embedding matrix where the first dimension is the embedding dimension
+
+    ## First pass, both in binary with Hamming, get rescore_multiplier times top_k
+    bit_query_emb = pack_bits(query_emb .> 0)
+    scores = hamming_distance(emb, bit_query_emb)
+    num_candidates = min(top_k * rescore_multiplier, length(scores))
+    ## Take the top_k smallest because smaller is better in Hamming distance
+    positions = partialsortperm(scores, 1:num_candidates)
+
+    ## Second pass, rescore with float embeddings and return top_k
+    unpacked_emb = unpack_bits(@view(emb[:, positions]))
+    new_positions, scores = find_closest(CosineSimilarity(), unpacked_emb,
+        query_emb; top_k, minimum_similarity, kwargs...)
+
+    ## translate to original indices
+    return positions[new_positions], scores
+end
+
+"""
+    find_closest(
+        finder::BM25Similarity, dtm::AbstractDocumentTermMatrix,
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 100, minimum_similarity::AbstractFloat = -1.0, kwargs...)
+
+Finds the indices of chunks (represented by DocumentTermMatrix in `dtm`) that are closest to query tokens (`query_tokens`) using BM25.
+
+Reference: [Wikipedia: BM25](https://en.wikipedia.org/wiki/Okapi_BM25).
+Implementation follows: [The Next Generation of Lucene Relevance](https://opensourceconnections.com/blog/2015/10/16/bm25-the-next-generation-of-lucene-relevation/).
+"""
+function find_closest(
+        finder::BM25Similarity, dtm::AbstractDocumentTermMatrix,
+        query_emb::AbstractVector{<:Real}, query_tokens::AbstractVector{<:AbstractString} = String[];
+        top_k::Int = 100, minimum_similarity::AbstractFloat = -1.0, kwargs...)
+    scores = bm25(dtm, query_tokens)
+    top_k_min = min(top_k, length(scores))
+    ## Take the top_k largest because higher is better in BM25
+    ## BM25 score are non-negative but unbounded (grows with number of keywords)
+    positions = partialsortperm(scores, 1:top_k_min, rev = true)
+
     if minimum_similarity > -1.0
-        mask = distances[positions] .>= minimum_similarity
+        mask = @view(scores[positions]) .>= minimum_similarity
         positions = positions[mask]
+    else
+        # materialize the vector
+        positions = positions |> collect
     end
-    return positions, distances[positions]
-end
-function find_closest(index::AbstractChunkIndex,
-        query_emb::AbstractVector{<:Real};
-        top_k::Int = 100, minimum_similarity::AbstractFloat = -1.0)
-    isnothing(embeddings(index)) && CandidateChunks(; index_id = index.id)
-    positions, distances = find_closest(embeddings(index),
-        query_emb;
-        top_k,
-        minimum_similarity)
-    return CandidateChunks(index.id, positions, Float32.(distances))
-end
-## function find_closest(index::AbstractMultiIndex,
-##         query_emb::AbstractVector{<:Real};
-##         top_k::Int = 100, minimum_similarity::AbstractFloat = -1.0)
-##     all_candidates = CandidateChunks[]
-##     for idxs in indexes(index)
-##         candidates = find_closest(idxs, query_emb;
-##             top_k,
-##             minimum_similarity)
-##         if !isempty(candidates.positions)
-##             push!(all_candidates, candidates)
-##         end
-##     end
-##     ## build vector of all distances and pick top_k
-##     all_distances = mapreduce(x -> x.distances, vcat, all_candidates)
-##     top_k_order = all_distances |> sortperm |> x -> last(x, top_k)
-##     return CandidateChunks(index.id,
-##         all_candidates[top_k_order],
-##         all_distances[top_k_order])
-## end
-
-function find_tags(index::AbstractChunkIndex,
-        tag::Union{AbstractString, Regex})
-    isnothing(tags(index)) && CandidateChunks(; index_id = index.id)
+    return positions, scores[positions]
+end
+
+### TAG Filtering
+
+function find_tags(::AbstractTagFilter, index::AbstractDocumentIndex,
+        tag::Union{T, AbstractVector{<:T}}; kwargs...) where {T <:
+                                                              Union{
+        AbstractString, Regex, Nothing}}
+    throw(ArgumentError("Not implemented yet for type $(typeof(filter)) and index $(typeof(index))"))
+end
+
+"""
+    find_tags(method::AnyTagFilter, index::AbstractChunkIndex,
+        tag::Union{AbstractString, Regex}; kwargs...)
+
+    find_tags(method::AnyTagFilter, index::AbstractChunkIndex,
+        tags::Vector{T}; kwargs...) where {T <: Union{AbstractString, Regex}}
+
+Finds the indices of chunks (represented by tags in `index`) that have ANY OF the specified `tag` or `tags`.
+"""
+function find_tags(method::AnyTagFilter, index::AbstractChunkIndex,
+        tag::Union{AbstractString, Regex}; kwargs...)
+    isnothing(tags(index)) && CandidateChunks(; index_id = indexid(index))
     tag_idx = if tag isa AbstractString
         findall(tags_vocab(index) .== tag)
     else # assume it's a regex
         findall(occursin.(tag, tags_vocab(index)))
     end
     # getindex.(x, 1) is to get the first dimension in each CartesianIndex
-    match_row_idx = @view(tags(index)[:, tag_idx]) |> findall |>
-                    x -> getindex.(x, 1) |> unique
-    return CandidateChunks(index.id, match_row_idx, ones(Float32, length(match_row_idx)))
+    match_row_idx = @view(tags(index)[:, tag_idx]) |> findall .|> Base.Fix2(getindex, 1) |>
+                    unique
+    ## Index can be a SubChunkIndex, so we need to convert to the original indices
+    match_row_idx = translate_positions_to_parent(index, match_row_idx)
+    return CandidateChunks(
+        indexid(index), match_row_idx, ones(Float32, length(match_row_idx)))
 end
-function find_tags(index::AbstractChunkIndex,
-        tags::Vector{<:AbstractString})
-    pos = [find_tags(index, tag).positions for tag in tags] |>
+
+# Method for multiple tags
+function find_tags(method::AnyTagFilter, index::AbstractChunkIndex,
+        tags::Vector{T}; kwargs...) where {T <: Union{AbstractString, Regex}}
+    pos = [positions(find_tags(method, index, tag)) for tag in tags] |>
           Base.Splat(vcat) |> unique |> x -> convert(Vector{Int}, x)
-    return CandidateChunks(index.id, pos, ones(Float32, length(pos)))
+    return CandidateChunks(indexid(index), pos, ones(Float32, length(pos)))
 end
 
-# Assuming the rerank and strategy definitions are in the Main module or relevant module
-abstract type RerankingStrategy end
+"""
+    find_tags(method::AllTagFilter, index::AbstractChunkIndex,
+        tag::Union{AbstractString, Regex}; kwargs...)
 
-struct Passthrough <: RerankingStrategy end
-struct CohereRerank <: RerankingStrategy end
+    find_tags(method::AllTagFilter, index::AbstractChunkIndex,
+        tags::Vector{T}; kwargs...) where {T <: Union{AbstractString, Regex}}
 
-function rerank(strategy::Passthrough,
-        index,
-        question,
-        candidate_chunks;
-        top_n::Integer = length(candidate_chunks),
-        kwargs...)
-    # Since this is a Passthrough strategy, it returns the candidate_chunks unchanged
-    return first(candidate_chunks, top_n)
+Finds the indices of chunks (represented by tags in `index`) that have ALL OF the specified `tag` or `tags`.
+"""
+function find_tags(method::AllTagFilter, index::AbstractChunkIndex,
+        tags_vec::Vector{T}; kwargs...) where {T <: Union{AbstractString, Regex}}
+    isnothing(tags(index)) && CandidateChunks(; index_id = indexid(index))
+    tag_idx = Int[]
+    for tag in tags_vec
+        if tag isa AbstractString
+            append!(tag_idx, findall(tags_vocab(index) .== tag))
+        else # assume it's a regex
+            append!(tag_idx, findall(occursin.(Ref(tag), tags_vocab(index))))
+        end
+    end
+    ## get rows with all values true
+    match_row_idx = if length(tag_idx) > 0
+        reduce(.&, eachcol(@view(tags(index)[:, tag_idx]))) |> findall
+    else
+        Int[]
+    end
+    ## translate to original indices
+    match_row_idx = translate_positions_to_parent(index, match_row_idx)
+    return CandidateChunks(
+        indexid(index), match_row_idx, ones(Float32, length(match_row_idx)))
+end
+function find_tags(method::AllTagFilter, index::AbstractChunkIndex,
+        tag::Union{AbstractString, Regex}; kwargs...)
+    find_tags(method, index, [tag]; kwargs...)
+end
+
+"""
+    find_tags(method::NoTagFilter, index::AbstractChunkIndex,
+        tags::Union{T, AbstractVector{<:T}}; kwargs...) where {T <:
+                                                               Union{
+        AbstractString, Regex, Nothing}}
+        tags; kwargs...)
+
+Returns all chunks in the index, ie, no filtering, so we simply return `nothing` (easier for dispatch).
+"""
+function find_tags(method::NoTagFilter, index::AbstractChunkIndex,
+        tags::Union{T, AbstractVector{<:T}}; kwargs...) where {T <:
+                                                               Union{
+        AbstractString, Regex, Nothing}}
+    return nothing
+end
+## Multi-index implementation -- logic differs within each index and then we simply vcat them together
+function find_tags(method::Union{AnyTagFilter, AllTagFilter}, index::AbstractMultiIndex,
+        tag::Union{T, AbstractVector{<:T}}; kwargs...) where {T <:
+                                                              Union{AbstractString, Regex}}
+    all_indexes = indexes(index)
+    all(isnothing(tags(index)) for index in all_indexes) &&
+        return MultiCandidateChunks(; index_ids = Symbol[])
+
+    index_ids = Symbol[]
+    positions_ = Int[]
+    scores_ = Float32[]
+    for i in eachindex(all_indexes)
+        if isnothing(tags(all_indexes[i]))
+            continue
+        end
+        cc = find_tags(method, all_indexes[i], tag; kwargs...)
+        if !isempty(positions(cc))
+            append!(index_ids, fill(indexid(cc), length(positions(cc))))
+            append!(positions_, positions(cc))
+            append!(scores_, scores(cc))
+        end
+    end
+    idxs = sortperm(scores_, rev = true)
+    return MultiCandidateChunks(index_ids[idxs], positions_[idxs], scores_[idxs])
+end
+
+function find_tags(method::NoTagFilter, index::AbstractMultiIndex,
+        tags::Union{T, AbstractVector{<:T}}; kwargs...) where {T <:
+                                                               Union{
+        AbstractString, Regex, Nothing}}
+    return nothing
+end
+
+### Reranking
+
+"""
+    NoReranker <: AbstractReranker
+
+No-op implementation for `rerank`, which simply passes the candidate chunks through.
+"""
+struct NoReranker <: AbstractReranker end
+
+"""
+    CohereReranker <: AbstractReranker
+
+Rerank strategy using the Cohere Rerank API. Requires an API key. A method for `rerank`.
+"""
+struct CohereReranker <: AbstractReranker end
+
+"""
+    FlashRanker <: AbstractReranker
+
+Rerank strategy using the package FlashRank.jl and local models. A method for `rerank`.
+
+You must first import the FlashRank.jl package.
+To automatically download any required models, set your 
+`ENV["DATADEPS_ALWAYS_ACCEPT"] = true` (see [DataDeps](https://www.oxinabox.net/DataDeps.jl/dev/z10-for-end-users/) for more details).
+
+# Example
+```julia
+using FlashRank
+
+# Wrap the model to be a valid Ranker recognized by RAGTools
+# It will be provided to the airag/rerank function to avoid instantiating it on every call
+reranker = FlashRank.RankerModel(:mini) |> FlashRanker
+# You can choose :tiny or :mini
+
+## Apply to the pipeline configuration, eg, 
+cfg = RAGConfig(; retriever = AdvancedRetriever(; reranker))
+
+# Ask a question (assumes you have some `index`)
+question = "What are the best practices for parallel computing in Julia?"
+result = airag(cfg, index; question, return_all = true)
+```
+"""
+struct FlashRanker{T} <: AbstractReranker
+    model::T
 end
 
-function rerank(strategy::CohereRerank,
-        index::AbstractDocumentIndex, args...; kwargs...)
+"""
+    RankGPTReranker <: AbstractReranker
+
+Rerank strategy using the RankGPT algorithm (calling LLMs). A method for `rerank`.
+
+# Reference
+[1] [Is ChatGPT Good at Search? Investigating Large Language Models as Re-Ranking Agents by W. Sun et al.](https://arxiv.org/abs/2304.09542)
+[2] [RankGPT Github](https://github.com/sunnweiwei/RankGPT)
+"""
+struct RankGPTReranker <: AbstractReranker end
+
+function rerank(reranker::AbstractReranker,
+        index::AbstractDocumentIndex, question::AbstractString, candidates::AbstractCandidateChunks; kwargs...)
     throw(ArgumentError("Not implemented yet"))
 end
 
+function rerank(reranker::NoReranker,
+        index::AbstractDocumentIndex,
+        question::AbstractString,
+        candidates::AbstractCandidateChunks;
+        top_n::Integer = length(candidates),
+        kwargs...)
+    # Since this is almost a passthrough strategy, it returns the candidate_chunks unchanged
+    # but it truncates to `top_n` if necessary
+    return first(candidates, top_n)
+end
+
 """
-    rerank(strategy::CohereRerank, index::AbstractChunkIndex, question,
-        candidate_chunks;
+    rerank(
+        reranker::CohereReranker, index::AbstractDocumentIndex, question::AbstractString,
+        candidates::AbstractCandidateChunks;
         verbose::Bool = false,
         api_key::AbstractString = PT.COHERE_API_KEY,
-        top_n::Integer = length(candidate_chunks.distances),
-        model::AbstractString = "rerank-english-v2.0",
+        top_n::Integer = length(candidates.scores),
+        model::AbstractString = "rerank-english-v3.0",
         return_documents::Bool = false,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
         kwargs...)
 
+
 Re-ranks a list of candidate chunks using the Cohere Rerank API. See https://cohere.com/rerank for more details. 
 
 # Arguments
-- `query`: The query to be used for the search.
-- `documents`: A vector of documents to be reranked. 
-    The total max chunks (`length of documents * max_chunks_per_doc`) must be less than 10000. We recommend less than 1000 documents for optimal performance.
+- `reranker`: Using Cohere API
+- `index`: The index that holds the underlying chunks to be re-ranked.
+- `question`: The query to be used for the search.
+- `candidates`: The candidate chunks to be re-ranked.
 - `top_n`: The number of most relevant documents to return. Default is `length(documents)`.
-- `model`: The model to use for reranking. Default is `rerank-english-v2.0`.
+- `model`: The model to use for reranking. Default is `rerank-english-v3.0`.
 - `return_documents`: A boolean flag indicating whether to return the reranked documents in the response. Default is `false`.
-- `max_chunks_per_doc`: The maximum number of chunks to use per document. Default is `10`.
 - `verbose`: A boolean flag indicating whether to print verbose logging. Default is `false`.
+- `cost_tracker`: An atomic counter to track the cost of the retrieval. Not implemented /tracked (cost unclear). Provided for consistency.
     
 """
-function rerank(strategy::CohereRerank, index::AbstractChunkIndex, question,
-        candidate_chunks;
+function rerank(
+        reranker::CohereReranker, index::AbstractDocumentIndex, question::AbstractString,
+        candidates::AbstractCandidateChunks;
         verbose::Bool = false,
         api_key::AbstractString = PT.COHERE_API_KEY,
-        top_n::Integer = length(candidate_chunks.distances),
-        model::AbstractString = "rerank-english-v2.0",
+        top_n::Integer = length(candidates.scores),
+        model::AbstractString = "rerank-english-v3.0",
         return_documents::Bool = false,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
         kwargs...)
     @assert top_n>0 "top_n must be a positive integer."
-    @assert index.id==candidate_chunks.index_id "The index id of the index and candidate_chunks must match."
 
     ## Call the API
-    documents = index[candidate_chunks, :chunks]
+    documents = index[candidates, :chunks]
+    @assert !(isempty(documents)) "The candidate chunks must not be empty for Cohere Reranker! Check the index IDs."
+
     verbose &&
         @info "Calling Cohere Rerank API with $(length(documents)) candidate chunks..."
     r = cohere_api(;
@@ -142,12 +739,19 @@ function rerank(strategy::CohereRerank, index::AbstractChunkIndex, question,
         kwargs...)
 
     ## Unwrap re-ranked positions
-    positions = Vector{Int}(undef, length(r.response[:results]))
-    distances = Vector{Float32}(undef, length(r.response[:results]))
+    is_multi_cand = candidates isa MultiCandidateChunks
+    index_ids = Vector{Symbol}(undef, length(r.response[:results]))
+    positions_ = Vector{Int}(undef, length(r.response[:results]))
+    scores_ = Vector{Float32}(undef, length(r.response[:results]))
     for i in eachindex(r.response[:results])
         doc = r.response[:results][i]
-        positions[i] = candidate_chunks.positions[doc[:index] + 1]
-        distances[i] = doc[:relevance_score]
+        positions_[i] = positions(candidates)[doc[:index] + 1]
+        scores_[i] = doc[:relevance_score]
+        index_ids[i] = if is_multi_cand
+            indexids(candidates)[doc[:index] + 1]
+        else
+            indexid(candidates)
+        end
     end
 
     ## Check the cost
@@ -161,5 +765,402 @@ function rerank(strategy::CohereRerank, index::AbstractChunkIndex, question,
     end
     verbose && @info "Reranking done. $search_units_str"
 
-    return CandidateChunks(index.id, positions, distances)
+    return is_multi_cand ?
+           MultiCandidateChunks(index_ids, positions_, scores_) :
+           CandidateChunks(index_ids[1], positions_, scores_)
+end
+
+"""
+    rerank(
+        reranker::RankGPTReranker, index::AbstractDocumentIndex, question::AbstractString,
+        candidates::AbstractCandidateChunks;
+        api_key::AbstractString = PT.OPENAI_API_KEY,
+        model::AbstractString = PT.MODEL_CHAT,
+        verbose::Bool = false,
+        top_n::Integer = length(candidates.scores),
+        unique_chunks::Bool = true,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+
+Re-ranks a list of candidate chunks using the RankGPT algorithm. See https://github.com/sunnweiwei/RankGPT for more details. 
+
+It uses LLM calls to rank the candidate chunks.
+
+# Arguments
+- `reranker`: Using Cohere API
+- `index`: The index that holds the underlying chunks to be re-ranked.
+- `question`: The query to be used for the search.
+- `candidates`: The candidate chunks to be re-ranked.
+- `top_n`: The number of most relevant documents to return. Default is `length(documents)`.
+- `model`: The model to use for reranking. Default is `rerank-english-v3.0`.
+- `verbose`: A boolean flag indicating whether to print verbose logging. Default is `1`.
+- `unique_chunks`: A boolean flag indicating whether to remove duplicates from the candidate chunks prior to reranking (saves compute time). Default is `true`.
+
+# Examples
+
+```julia
+index = <some index>
+question = "What are the best practices for parallel computing in Julia?"
+
+cfg = RAGConfig(; retriever = SimpleRetriever(; reranker = RT.RankGPTReranker()))
+msg = airag(cfg, index; question, return_all = true)
+```
+To get full verbosity of logs, set `verbose = 5` (anything higher than 3).
+```julia
+msg = airag(cfg, index; question, return_all = true, verbose = 5)
+```
+
+
+# Reference
+[1] [Is ChatGPT Good at Search? Investigating Large Language Models as Re-Ranking Agents by W. Sun et al.](https://arxiv.org/abs/2304.09542)
+[2] [RankGPT Github](https://github.com/sunnweiwei/RankGPT)
+"""
+function rerank(
+        reranker::RankGPTReranker, index::AbstractDocumentIndex, question::AbstractString,
+        candidates::AbstractCandidateChunks;
+        api_key::AbstractString = PT.OPENAI_API_KEY,
+        model::AbstractString = PT.MODEL_CHAT,
+        verbose::Bool = false,
+        top_n::Integer = length(candidates.scores),
+        unique_chunks::Bool = true,
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+    @assert top_n>0 "top_n must be a positive integer."
+    documents = index[candidates, :chunks]
+    @assert !(isempty(documents)) "The candidate chunks must not be empty! Check the index IDs."
+
+    is_multi_cand = candidates isa MultiCandidateChunks
+    index_ids = is_multi_cand ? indexids(candidates) : indexid(candidates)
+    positions_ = positions(candidates)
+    ## Find unique only items
+    if unique_chunks
+        verbose && @info "Removing duplicates from candidate chunks prior to reranking"
+        unique_idxs = PT.unique_permutation(documents)
+        documents = documents[unique_idxs]
+        positions_ = positions_[unique_idxs]
+        index_ids = is_multi_cand ? index_ids[unique_idxs] : index_ids
+    end
+
+    ## Run re-ranker via RankGPT
+    rank_end = max(get(kwargs, :rank_end, length(documents)), length(documents))
+    step = min(get(kwargs, :step, top_n), top_n, rank_end)
+    window_size = max(min(get(kwargs, :window_size, 20), rank_end), step)
+    verbose &&
+        @info "RankGPT parameters: rank_end = $rank_end, step = $step, window_size = $window_size"
+    result = rank_gpt(
+        documents, question; verbose = verbose * 3, api_key,
+        model, kwargs..., rank_end, step, window_size)
+
+    ## Unwrap re-ranked positions
+    ranked_positions = first(result.positions, top_n)
+    positions_ = positions_[ranked_positions]
+    ## TODO: add reciprocal rank fusion and multiple passes
+    scores_ = ones(Float32, length(positions_)) # no scores available
+
+    verbose && @info "Reranking done in $(round(result.elapsed; digits=1)) seconds."
+    Threads.atomic_add!(cost_tracker, result.cost)
+
+    return is_multi_cand ?
+           MultiCandidateChunks(index_ids[ranked_positions], positions_, scores_) :
+           CandidateChunks(index_ids, positions_, scores_)
+end
+
+### Overall types for `retrieve`
+"""
+    SimpleRetriever <: AbstractRetriever
+
+Default implementation for `retrieve` function. It does a simple similarity search via `CosineSimilarity` and returns the results.
+
+Make sure to use consistent `embedder` and `tagger` with the Preparation Stage (`build_index`)!
+
+# Fields
+- `rephraser::AbstractRephraser`: the rephrasing method, dispatching `rephrase` - uses `NoRephraser`
+- `embedder::AbstractEmbedder`: the embedding method, dispatching `get_embeddings` (see Preparation Stage for more details) - uses `BatchEmbedder`
+- `processor::AbstractProcessor`: the processor method, dispatching `get_keywords` (see Preparation Stage for more details) - uses `NoProcessor`
+- `finder::AbstractSimilarityFinder`: the similarity search method, dispatching `find_closest` - uses `CosineSimilarity`
+- `tagger::AbstractTagger`: the tag generating method, dispatching `get_tags` (see Preparation Stage for more details) - uses `NoTagger`
+- `filter::AbstractTagFilter`: the tag matching method, dispatching `find_tags` - uses `NoTagFilter`
+- `reranker::AbstractReranker`: the reranking method, dispatching `rerank` - uses `NoReranker`
+"""
+@kwdef mutable struct SimpleRetriever <: AbstractRetriever
+    rephraser::AbstractRephraser = NoRephraser()
+    embedder::AbstractEmbedder = BatchEmbedder()
+    processor::AbstractProcessor = NoProcessor()
+    finder::AbstractSimilarityFinder = CosineSimilarity()
+    tagger::AbstractTagger = NoTagger()
+    filter::AbstractTagFilter = NoTagFilter()
+    reranker::AbstractReranker = NoReranker()
+end
+
+"""
+    SimpleBM25Retriever <: AbstractRetriever
+
+Keyword-based implementation for `retrieve`. It does a simple similarity search via `BM25Similarity` and returns the results.
+
+Make sure to use consistent `processor` and `tagger` with the Preparation Stage (`build_index`)!
+
+# Fields
+- `rephraser::AbstractRephraser`: the rephrasing method, dispatching `rephrase` - uses `NoRephraser`
+- `embedder::AbstractEmbedder`: the embedding method, dispatching `get_embeddings` (see Preparation Stage for more details) - uses `NoEmbedder`
+- `processor::AbstractProcessor`: the processor method, dispatching `get_keywords` (see Preparation Stage for more details) - uses `KeywordsProcessor`
+- `finder::AbstractSimilarityFinder`: the similarity search method, dispatching `find_closest` - uses `CosineSimilarity`
+- `tagger::AbstractTagger`: the tag generating method, dispatching `get_tags` (see Preparation Stage for more details) - uses `NoTagger`
+- `filter::AbstractTagFilter`: the tag matching method, dispatching `find_tags` - uses `NoTagFilter`
+- `reranker::AbstractReranker`: the reranking method, dispatching `rerank` - uses `NoReranker`
+"""
+@kwdef mutable struct SimpleBM25Retriever <: AbstractRetriever
+    rephraser::AbstractRephraser = NoRephraser()
+    embedder::AbstractEmbedder = NoEmbedder()
+    processor::AbstractProcessor = KeywordsProcessor()
+    finder::AbstractSimilarityFinder = BM25Similarity()
+    tagger::AbstractTagger = NoTagger()
+    filter::AbstractTagFilter = NoTagFilter()
+    reranker::AbstractReranker = NoReranker()
+end
+
+"""
+    AdvancedRetriever <: AbstractRetriever
+
+Dispatch for `retrieve` with advanced retrieval methods to improve result quality.
+Compared to SimpleRetriever, it adds rephrasing the query and reranking the results.
+
+# Fields
+- `rephraser::AbstractRephraser`: the rephrasing method, dispatching `rephrase` - uses `HyDERephraser`
+- `embedder::AbstractEmbedder`: the embedding method, dispatching `get_embeddings` (see Preparation Stage for more details) - uses `BatchEmbedder`
+- `processor::AbstractProcessor`: the processor method, dispatching `get_keywords` (see Preparation Stage for more details) - uses `NoProcessor`
+- `finder::AbstractSimilarityFinder`: the similarity search method, dispatching `find_closest` - uses `CosineSimilarity`
+- `tagger::AbstractTagger`: the tag generating method, dispatching `get_tags` (see Preparation Stage for more details) - uses `NoTagger`
+- `filter::AbstractTagFilter`: the tag matching method, dispatching `find_tags` - uses `NoTagFilter`
+- `reranker::AbstractReranker`: the reranking method, dispatching `rerank` - uses `CohereReranker`
+"""
+@kwdef mutable struct AdvancedRetriever <: AbstractRetriever
+    rephraser::AbstractRephraser = HyDERephraser()
+    embedder::AbstractEmbedder = BatchEmbedder()
+    processor::AbstractProcessor = NoProcessor()
+    finder::AbstractSimilarityFinder = CosineSimilarity()
+    tagger::AbstractTagger = NoTagger()
+    filter::AbstractTagFilter = NoTagFilter()
+    reranker::AbstractReranker = CohereReranker()
+end
+
+"""
+    retrieve(retriever::AbstractRetriever,
+        index::AbstractChunkIndex,
+        question::AbstractString;
+        verbose::Integer = 1,
+        top_k::Integer = 100,
+        top_n::Integer = 5,
+        api_kwargs::NamedTuple = NamedTuple(),
+        rephraser::AbstractRephraser = retriever.rephraser,
+        rephraser_kwargs::NamedTuple = NamedTuple(),
+        embedder::AbstractEmbedder = retriever.embedder,
+        embedder_kwargs::NamedTuple = NamedTuple(),
+        processor::AbstractProcessor = retriever.processor,
+        processor_kwargs::NamedTuple = NamedTuple(),
+        finder::AbstractSimilarityFinder = retriever.finder,
+        finder_kwargs::NamedTuple = NamedTuple(),
+        tagger::AbstractTagger = retriever.tagger,
+        tagger_kwargs::NamedTuple = NamedTuple(),
+        filter::AbstractTagFilter = retriever.filter,
+        filter_kwargs::NamedTuple = NamedTuple(),
+        reranker::AbstractReranker = retriever.reranker,
+        reranker_kwargs::NamedTuple = NamedTuple(),
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+
+Retrieves the most relevant chunks from the index for the given question and returns them in the `RAGResult` object.
+
+This is the main entry point for the retrieval stage of the RAG pipeline. It is often followed by `generate!` step.
+
+Notes:
+- The default flow is `build_context!` -> `answer!` -> `refine!` -> `postprocess!`.
+
+The arguments correspond to the steps of the retrieval process (rephrasing, embedding, finding similar docs, tagging, filtering by tags, reranking).
+You can customize each step by providing a new custom type that dispatches the corresponding function, 
+    eg, create your own type `struct MyReranker<:AbstractReranker end` and define the custom method for it `rerank(::MyReranker,...) = ...`.
+
+Note: Discover available retrieval sub-types for each step with `subtypes(AbstractRephraser)` and similar for other abstract types.
+
+If you're using locally-hosted models, you can pass the `api_kwargs` with the `url` field set to the model's URL and make sure to provide corresponding 
+    `model` kwargs to `rephraser`, `embedder`, and `tagger` to use the custom models (they make AI calls).
+
+# Arguments
+- `retriever`: The retrieval method to use. Default is `SimpleRetriever` but could be `AdvancedRetriever` for more advanced retrieval.
+- `index`: The index that holds the chunks and sources to be retrieved from.
+- `question`: The question to be used for the retrieval.
+- `verbose`: If `>0`, it prints out verbose logging. Default is `1`. If you set it to `2`, it will print out logs for each sub-function.
+- `top_k`: The TOTAL number of closest chunks to return from `find_closest`. Default is `100`.
+   If there are multiple rephrased questions, the number of chunks per each item will be `top_k ÷ number_of_rephrased_questions`.
+- `top_n`: The TOTAL number of most relevant chunks to return for the context (from `rerank` step). Default is `5`.
+- `api_kwargs`: Additional keyword arguments to be passed to the API calls (shared by all `ai*` calls).
+- `rephraser`: Transform the question into one or more questions. Default is `retriever.rephraser`.
+- `rephraser_kwargs`: Additional keyword arguments to be passed to the rephraser.
+    - `model`: The model to use for rephrasing. Default is `PT.MODEL_CHAT`.
+    - `template`: The rephrasing template to use. Default is `:RAGQueryOptimizer` or `:RAGQueryHyDE` (depending on the `rephraser` selected).
+- `embedder`: The embedding method to use. Default is `retriever.embedder`.
+- `embedder_kwargs`: Additional keyword arguments to be passed to the embedder.
+- `processor`: The processor method to use when using Keyword-based index. Default is `retriever.processor`.
+- `processor_kwargs`: Additional keyword arguments to be passed to the processor.
+- `finder`: The similarity search method to use. Default is `retriever.finder`, often `CosineSimilarity`.
+- `finder_kwargs`: Additional keyword arguments to be passed to the similarity finder.
+- `tagger`: The tag generating method to use. Default is `retriever.tagger`.
+- `tagger_kwargs`: Additional keyword arguments to be passed to the tagger. Noteworthy arguments:
+    - `tags`: Directly provide the tags to use for filtering (can be String, Regex, or Vector{String}). Useful for `tagger = PassthroughTagger`.
+- `filter`: The tag matching method to use. Default is `retriever.filter`.
+- `filter_kwargs`: Additional keyword arguments to be passed to the tag filter.
+- `reranker`: The reranking method to use. Default is `retriever.reranker`.
+- `reranker_kwargs`: Additional keyword arguments to be passed to the reranker.
+    - `model`: The model to use for reranking. Default is `rerank-english-v2.0` if you use `reranker = CohereReranker()`.
+- `cost_tracker`: An atomic counter to track the cost of the retrieval. Default is `Threads.Atomic{Float64}(0.0)`.
+
+See also: `SimpleRetriever`, `AdvancedRetriever`, `build_index`, `rephrase`, `get_embeddings`, `get_keywords`, `find_closest`, `get_tags`, `find_tags`, `rerank`, `RAGResult`.
+
+# Examples
+
+Find the 5 most relevant chunks from the index for the given question.
+```julia
+# assumes you have an existing index `index`
+retriever = SimpleRetriever()
+
+result = retrieve(retriever,
+    index,
+    "What is the capital of France?",
+    top_n = 5)
+
+# or use the default retriever (same as above)
+result = retrieve(retriever,
+    index,
+    "What is the capital of France?",
+    top_n = 5)
+```
+
+Apply more advanced retrieval with question rephrasing and reranking (requires `COHERE_API_KEY`).
+We will obtain top 100 chunks from embeddings (`top_k`) and top 5 chunks from reranking (`top_n`).
+
+```julia
+retriever = AdvancedRetriever()
+
+result = retrieve(retriever, index, question; top_k=100, top_n=5)
+```
+
+You can use the `retriever` to customize your retrieval strategy or directly change the strategy types in the `retrieve` kwargs!
+
+Example of using locally-hosted model hosted on `localhost:8080`:
+```julia
+retriever = SimpleRetriever()
+result = retrieve(retriever, index, question;
+    rephraser_kwargs = (; model = "custom"),
+    embedder_kwargs = (; model = "custom"),
+    tagger_kwargs = (; model = "custom"), api_kwargs = (;
+        url = "http://localhost:8080"))
+```
+"""
+function retrieve(retriever::AbstractRetriever,
+        index::AbstractDocumentIndex,
+        question::AbstractString;
+        verbose::Integer = 1,
+        top_k::Integer = 100,
+        top_n::Integer = 5,
+        api_kwargs::NamedTuple = NamedTuple(),
+        rephraser::AbstractRephraser = retriever.rephraser,
+        rephraser_kwargs::NamedTuple = NamedTuple(),
+        embedder::AbstractEmbedder = retriever.embedder,
+        embedder_kwargs::NamedTuple = NamedTuple(),
+        processor::AbstractProcessor = retriever.processor,
+        processor_kwargs::NamedTuple = NamedTuple(),
+        finder::AbstractSimilarityFinder = retriever.finder,
+        finder_kwargs::NamedTuple = NamedTuple(),
+        tagger::AbstractTagger = retriever.tagger,
+        tagger_kwargs::NamedTuple = NamedTuple(),
+        filter::AbstractTagFilter = retriever.filter,
+        filter_kwargs::NamedTuple = NamedTuple(),
+        reranker::AbstractReranker = retriever.reranker,
+        reranker_kwargs::NamedTuple = NamedTuple(),
+        cost_tracker = Threads.Atomic{Float64}(0.0),
+        kwargs...)
+    ## Rephrase into one or more questions
+    rephraser_kwargs_ = isempty(api_kwargs) ? rephraser_kwargs :
+                        merge(rephraser_kwargs, (; api_kwargs))
+    rephrased_questions = rephrase(
+        rephraser, question; verbose = (verbose > 1), cost_tracker, rephraser_kwargs_...)
+
+    ## Embed one or more rephrased questions
+    embeddings = if HasEmbeddings(index)
+        embedder_kwargs_ = isempty(api_kwargs) ? embedder_kwargs :
+                           merge(embedder_kwargs, (; api_kwargs))
+        embeddings = get_embeddings(embedder, rephrased_questions;
+            verbose = (verbose > 1), cost_tracker, embedder_kwargs_...)
+    else
+        embeddings = hcat([Float32[] for x in rephrased_questions]...)
+    end
+
+    ## Preprocess into keyword tokens if we're running BM25 
+    keywords = if HasKeywords(index)
+        ## Return only keywords, not DTM
+        keywords = get_keywords(processor, rephrased_questions;
+            verbose = (verbose > 1), processor_kwargs..., return_keywords = true)
+        ## Send warning for common error
+        verbose >= 1 && (keywords isa AbstractVector{<:AbstractVector{<:AbstractString}} ||
+         @warn "Processed Keywords is not a vector of tokenized queries. Have you used the correct processor? (provided: $(typeof(processor))).")
+        keywords
+    else
+        [String[] for x in rephrased_questions]
+    end
+
+    finder_kwargs_ = isempty(api_kwargs) ? finder_kwargs :
+                     merge(finder_kwargs, (; api_kwargs))
+    emb_candidates = find_closest(finder, index, embeddings, keywords;
+        verbose = (verbose > 1), top_k, finder_kwargs_...)
+
+    ## Tagging - if you provide them explicitly, use tagger `PassthroughTagger` and `tagger_kwargs = (;tags = ...)`
+    tagger_kwargs_ = isempty(api_kwargs) ? tagger_kwargs :
+                     merge(tagger_kwargs, (; api_kwargs))
+    tags = get_tags(tagger, rephrased_questions; verbose = (verbose > 1),
+        cost_tracker, tagger_kwargs_...)
+
+    filter_kwargs_ = isempty(api_kwargs) ? filter_kwargs :
+                     merge(filter_kwargs, (; api_kwargs))
+    tag_candidates = find_tags(
+        filter, index, tags; verbose = (verbose > 1), filter_kwargs_...)
+
+    ## Combine the two sets of candidates, looks for intersection (hard filter)!
+    # With tagger=NoTagger() get_tags returns `nothing` find_tags simply passes it through to skip the intersection
+    filtered_candidates = isnothing(tag_candidates) ? emb_candidates :
+                          (emb_candidates & tag_candidates)
+    ## TODO: Future implementation should be to apply tag filtering BEFORE the find_closest,
+    ## but that requires implementing `view(::Index,...)` to provide only a subset of the embeddings to the subsequent functionality.
+    ## Also, find_closest is so fast & cheap that it doesn't matter at current scale/maturity of the use cases
+
+    ## Reranking
+    reranker_kwargs_ = isempty(api_kwargs) ? reranker_kwargs :
+                       merge(reranker_kwargs, (; api_kwargs))
+    reranked_candidates = rerank(reranker, index, question, filtered_candidates;
+        top_n, verbose = (verbose > 1), cost_tracker, reranker_kwargs_...)
+
+    verbose > 0 &&
+        @info "Retrieval done. Identified $(length(positions(reranked_candidates))) chunks, total cost: \$$(round(cost_tracker[], digits=2))."
+
+    ## Return
+    result = RAGResult(;
+        question,
+        answer = nothing,
+        rephrased_questions,
+        final_answer = nothing,
+        ## Ensure chunks and sources are sorted
+        context = collect(index[reranked_candidates, :chunks, sorted = true]),
+        sources = collect(index[reranked_candidates, :sources, sorted = true]),
+        emb_candidates,
+        tag_candidates,
+        filtered_candidates,
+        reranked_candidates)
+
+    return result
+end
+
+# Set default behavior
+DEFAULT_RETRIEVER = SimpleRetriever()
+function retrieve(index::AbstractChunkIndex, question::AbstractString;
+        kwargs...)
+    return retrieve(DEFAULT_RETRIEVER, index, question;
+        kwargs...)
 end
diff --git a/src/Experimental/RAGTools/types.jl b/src/Experimental/RAGTools/types.jl
index 35afe6f1e..3c3b970f2 100644
--- a/src/Experimental/RAGTools/types.jl
+++ b/src/Experimental/RAGTools/types.jl
@@ -1,18 +1,104 @@
-### Types
-# Defines three key types for RAG: ChunkIndex, MultiIndex, and CandidateChunks
-# In addition, RAGContext is defined for debugging purposes
-
-abstract type AbstractDocumentIndex end
-abstract type AbstractMultiIndex <: AbstractDocumentIndex end
-abstract type AbstractChunkIndex <: AbstractDocumentIndex end
 # More advanced index would be: HybridChunkIndex
+using Base: parent
+
+### Shared methods
+Base.parent(index::AbstractDocumentIndex) = index
+indexid(index::AbstractDocumentIndex) = index.id
+chunkdata(index::AbstractChunkIndex) = index.chunkdata
+"Access chunkdata for a subset of chunks, `chunk_idx` is a vector of chunk indices in the index"
+Base.@propagate_inbounds function chunkdata(
+        index::AbstractChunkIndex, chunk_idx::AbstractVector{<:Integer})
+    ## We need this accessor because different chunk indices can have chunks in different dimensions!!
+    chkdata = chunkdata(index)
+    if isnothing(chkdata)
+        return nothing
+    end
+    return view(chkdata, :, chunk_idx)
+end
+
+function chunkdata(index::AbstractDocumentIndex)
+    throw(ArgumentError("`chunkdata` not implemented for $(typeof(index))"))
+end
+function chunkdata(index::AbstractDocumentIndex, chunk_idx::AbstractVector{<:Integer})
+    throw(ArgumentError("`chunkdata` not implemented for $(typeof(index)) and chunk indices: $(typeof(chunk_idx))"))
+end
+function embeddings(index::AbstractDocumentIndex)
+    throw(ArgumentError("`embeddings` not implemented for $(typeof(index))"))
+end
+function tags(index::AbstractDocumentIndex)
+    throw(ArgumentError("`tags` not implemented for $(typeof(index))"))
+end
+function tags_vocab(index::AbstractDocumentIndex)
+    throw(ArgumentError("`tags_vocab` not implemented for $(typeof(index))"))
+end
+function extras(index::AbstractDocumentIndex)
+    throw(ArgumentError("`extras` not implemented for $(typeof(index))"))
+end
+HasEmbeddings(::AbstractChunkIndex) = false
+HasKeywords(::AbstractChunkIndex) = false
+chunks(index::AbstractChunkIndex) = index.chunks
+Base.length(index::AbstractChunkIndex) = length(chunks(index))
+tags(index::AbstractChunkIndex) = index.tags
+tags_vocab(index::AbstractChunkIndex) = index.tags_vocab
+sources(index::AbstractChunkIndex) = index.sources
+extras(index::AbstractChunkIndex) = index.extras
+
+"""
+    translate_positions_to_parent(index::AbstractChunkIndex, positions::AbstractVector{<:Integer})
+
+Translate positions to the parent index. Useful to convert between positions in a view and the original index.
+
+Used whenever a `chunkdata()` is used to re-align positions in case index is a view.
+"""
+function translate_positions_to_parent(
+        index::AbstractChunkIndex, positions::AbstractVector{<:Integer})
+    return positions
+end
+
+Base.var"=="(i1::AbstractChunkIndex, i2::AbstractChunkIndex) = false
+function Base.var"=="(i1::T, i2::T) where {T <: AbstractChunkIndex}
+    ((sources(i1) == sources(i2)) && (tags_vocab(i1) == tags_vocab(i2)) &&
+     (chunkdata(i1) == chunkdata(i2)) && (chunks(i1) == chunks(i2)) &&
+     (tags(i1) == tags(i2)) && (extras(i1) == extras(i2)))
+end
+
+function Base.vcat(i1::AbstractDocumentIndex, i2::AbstractDocumentIndex)
+    throw(ArgumentError("Not implemented"))
+end
+function Base.vcat(i1::AbstractChunkIndex, i2::AbstractChunkIndex)
+    throw(ArgumentError("Not implemented"))
+end
+function Base.vcat(i1::T, i2::T) where {T <: AbstractChunkIndex}
+    tags_, tags_vocab_ = if (isnothing(tags(i1)) || isnothing(tags(i2)))
+        nothing, nothing
+    elseif tags_vocab(i1) == tags_vocab(i2)
+        vcat(tags(i1), tags(i2)), tags_vocab(i1)
+    else
+        vcat_labeled_matrices(tags(i1), tags_vocab(i1), tags(i2), tags_vocab(i2))
+    end
+    chunkdata_ = (isnothing(chunkdata(i1)) || isnothing(chunkdata(i2))) ? nothing :
+                 hcat(chunkdata(i1), chunkdata(i2))
+    extras_ = if isnothing(extras(i1)) || isnothing(extras(i2))
+        nothing
+    else
+        vcat(extras(i1), extras(i2))
+    end
+    T(indexid(i1), vcat(chunks(i1), chunks(i2)),
+        chunkdata_,
+        tags_,
+        tags_vocab_,
+        vcat(sources(i1), sources(i2)),
+        extras_)
+end
 
 # Stores document chunks and their embeddings
 """
-    ChunkIndex
+    ChunkEmbeddingsIndex
 
 Main struct for storing document chunks and their embeddings. It also stores tags and sources for each chunk.
 
+Previously, this struct was called `ChunkIndex`.
+
 # Fields
 - `id::Symbol`: unique identifier of each index (to ensure we're using the right index with `CandidateChunks`)
 - `chunks::Vector{<:AbstractString}`: underlying document chunks / snippets
@@ -22,13 +108,13 @@ Main struct for storing document chunks and their embeddings. It also stores tag
 - `sources::Vector{<:AbstractString}`: sources of the chunks
 - `extras::Union{Nothing, AbstractVector}`: additional data, eg, metadata, source code, etc.
 """
-@kwdef struct ChunkIndex{
+@kwdef struct ChunkEmbeddingsIndex{
     T1 <: AbstractString,
     T2 <: Union{Nothing, Matrix{<:Real}},
     T3 <: Union{Nothing, AbstractMatrix{<:Bool}},
-    T4 <: Union{Nothing, AbstractVector},
+    T4 <: Union{Nothing, AbstractVector}
 } <: AbstractChunkIndex
-    id::Symbol = gensym("ChunkIndex")
+    id::Symbol = gensym("ChunkEmbeddingsIndex")
     # underlying document chunks / snippets
     chunks::Vector{T1}
     # for semantic search
@@ -41,45 +127,257 @@ Main struct for storing document chunks and their embeddings. It also stores tag
     sources::Vector{<:AbstractString}
     extras::T4 = nothing
 end
-embeddings(index::ChunkIndex) = index.embeddings
-chunks(index::ChunkIndex) = index.chunks
-tags(index::ChunkIndex) = index.tags
-tags_vocab(index::ChunkIndex) = index.tags_vocab
-sources(index::ChunkIndex) = index.sources
+embeddings(index::ChunkEmbeddingsIndex) = index.embeddings
+HasEmbeddings(::ChunkEmbeddingsIndex) = true
+chunkdata(index::ChunkEmbeddingsIndex) = embeddings(index)
+# It's column aligned so we don't have to re-define `chunkdata(index, chunk_idx)`
+
+# For backward compatibility
+const ChunkIndex = ChunkEmbeddingsIndex
+abstract type AbstractDocumentTermMatrix end
 
-function Base.var"=="(i1::ChunkIndex, i2::ChunkIndex)
-    ((i1.sources == i2.sources) && (i1.tags_vocab == i2.tags_vocab) &&
-     (i1.embeddings == i2.embeddings) && (i1.chunks == i2.chunks) && (i1.tags == i2.tags))
+"""
+    DocumentTermMatrix{T<:AbstractString}
+
+A sparse matrix of term frequencies and document lengths to allow calculation of BM25 similarity scores.
+"""
+struct DocumentTermMatrix{
+    T1 <: AbstractMatrix{<:Real}, T2 <: AbstractString} <:
+       AbstractDocumentTermMatrix
+    ## assumed to be SparseMatrixCSC{Float32, Int64}
+    tf::T1
+    vocab::Vector{T2}
+    vocab_lookup::Dict{T2, Int}
+    idf::Vector{Float32} # length of vocab
+    # |d|/avgDl
+    doc_rel_length::Vector{Float32}
+end
+function Base.parent(dtm::AbstractDocumentTermMatrix)
+    dtm
+end
+function tf(dtm::AbstractDocumentTermMatrix)
+    dtm.tf
+end
+function vocab(dtm::AbstractDocumentTermMatrix)
+    dtm.vocab
+end
+function vocab_lookup(dtm::AbstractDocumentTermMatrix)
+    dtm.vocab_lookup
+end
+function idf(dtm::AbstractDocumentTermMatrix)
+    dtm.idf
+end
+function doc_rel_length(dtm::AbstractDocumentTermMatrix)
+    dtm.doc_rel_length
 end
 
-function Base.vcat(i1::AbstractDocumentIndex, i2::AbstractDocumentIndex)
-    throw(ArgumentError("Not implemented"))
+Base.var"=="(dtm1::AbstractDocumentTermMatrix, dtm2::AbstractDocumentTermMatrix) = false
+# Must be the same type and same content
+function Base.var"=="(dtm1::T, dtm2::T) where {T <: AbstractDocumentTermMatrix}
+    tf(dtm1) == tf(dtm2) && vocab(dtm1) == vocab(dtm2) &&
+        vocab_lookup(dtm1) == vocab_lookup(dtm2) && idf(dtm1) == idf(dtm2) &&
+        doc_rel_length(dtm1) == doc_rel_length(dtm2)
 end
 
-function Base.vcat(i1::ChunkIndex, i2::ChunkIndex)
-    tags_, tags_vocab_ = if (isnothing(tags(i1)) || isnothing(tags(i2)))
-        nothing, nothing
-    elseif tags_vocab(i1) == tags_vocab(i2)
-        vcat(tags(i1), tags(i2)), tags_vocab(i1)
-    else
-        merge_labeled_matrices(tags(i1), tags_vocab(i1), tags(i2), tags_vocab(i2))
+function Base.hcat(d1::AbstractDocumentTermMatrix, d2::AbstractDocumentTermMatrix)
+    throw(ArgumentError("A hcat not implemented for DTMs of type $(typeof(d1)) and $(typeof(d2))"))
+end
+function Base.hcat(d1::DocumentTermMatrix, d2::DocumentTermMatrix)
+    tf_, vocab_ = vcat_labeled_matrices(tf(d1), vocab(d1), tf(d2), vocab(d2))
+    vocab_lookup_ = Dict(t => i for (i, t) in enumerate(vocab_))
+
+    N, _ = size(tf_)
+    doc_freq = [count(x -> x > 0, col) for col in eachcol(tf_)]
+    idf = @. log(1.0f0 + (N - doc_freq + 0.5f0) / (doc_freq + 0.5f0))
+    doc_lengths = [count(x -> x > 0, row) for row in eachrow(tf_)]
+    sumdl = sum(doc_lengths)
+    doc_rel_length_ = sumdl == 0 ? zeros(Float32, N) : (doc_lengths ./ (sumdl / N))
+
+    return DocumentTermMatrix(
+        tf_, vocab_, vocab_lookup_, idf, convert(Vector{Float32}, doc_rel_length_))
+end
+
+"A partial view of a DocumentTermMatrix, `tf` is MATERIALIZED for performance and fewer allocations."
+struct SubDocumentTermMatrix{T <: DocumentTermMatrix,
+    T1 <: AbstractMatrix{<:Real}} <: AbstractDocumentTermMatrix
+    parent::T
+    tf::T1 ## Materialize the sub-matrix, because it's too expensive to use otherwise (row-view of SparseMatrixCSC)
+    positions::Vector{Int}
+end
+Base.parent(dtm::SubDocumentTermMatrix) = dtm.parent
+positions(dtm::SubDocumentTermMatrix) = dtm.positions
+tf(dtm::SubDocumentTermMatrix) = dtm.tf
+vocab(dtm::SubDocumentTermMatrix) = Base.parent(dtm) |> vocab
+vocab_lookup(dtm::SubDocumentTermMatrix) = Base.parent(dtm) |> vocab_lookup
+idf(dtm::SubDocumentTermMatrix) = Base.parent(dtm) |> idf
+Base.@propagate_inbounds function doc_rel_length(dtm::SubDocumentTermMatrix)
+    view(doc_rel_length(Base.parent(dtm)), positions(dtm))
+end
+# hcat for SubDocumentTermMatrix does not make sense -> the vocabulary is the same / shared
+
+function Base.view(
+        dtm::AbstractDocumentTermMatrix, doc_idx::AbstractVector{<:Integer}, token_idx)
+    throw(ArgumentError("A view not implemented for type $(typeof(dtm)) across docs: $(typeof(doc_idx)) and tokens: $(typeof(token_idx))"))
+end
+Base.@propagate_inbounds function Base.view(
+        dtm::AbstractDocumentTermMatrix, doc_idx::AbstractVector{<:Integer}, token_idx::Colon)
+    tf_mat = tf(parent(dtm))
+    @boundscheck if !checkbounds(Bool, axes(tf_mat, 1), doc_idx)
+        ## Avoid printing huge position arrays, show the extremas of the attempted range
+        max_pos = extrema(doc_idx)
+        throw(BoundsError(tf_mat, max_pos))
+    end
+    ## computations on top of views of sparse arrays are expensive, materialize the view
+    ## Moreover, nonzeros and rowvals accessors for SparseCSCMatrix are not defined for views
+    tf_ = tf_mat[doc_idx, :]
+    SubDocumentTermMatrix(dtm, tf_, collect(doc_idx))
+end
+function Base.view(
+        dtm::SubDocumentTermMatrix, doc_idx::AbstractVector{<:Integer}, token_idx::Colon)
+    tf_mat = tf(parent(dtm))
+    @boundscheck if !checkbounds(Bool, axes(tf_mat, 1), doc_idx)
+        ## Avoid printing huge position arrays, show the extremas of the attempted range
+        max_pos = extrema(doc_idx)
+        throw(BoundsError(tf_mat, max_pos))
+    end
+    intersect_pos = intersect(positions(dtm), doc_idx)
+    return SubDocumentTermMatrix(
+        parent(dtm), tf_mat[intersect_pos, :], intersect_pos)
+end
+
+"""
+    ChunkKeywordsIndex
+
+Struct for storing chunks of text and associated keywords for BM25 similarity search.
+
+# Fields
+- `id::Symbol`: unique identifier of each index (to ensure we're using the right index with `CandidateChunks`)
+- `chunks::Vector{<:AbstractString}`: underlying document chunks / snippets
+- `chunkdata::Union{Nothing, AbstractMatrix{<:Real}}`: for similarity search, assumed to be `DocumentTermMatrix`
+- `tags::Union{Nothing, AbstractMatrix{<:Bool}}`: for exact search, filtering, etc. This is often a sparse matrix indicating which chunks have the given `tag` (see `tag_vocab` for the position lookup)
+- `tags_vocab::Union{Nothing, Vector{<:AbstractString}}`: vocabulary for the `tags` matrix (each column in `tags` is one item in `tags_vocab` and rows are the chunks)
+- `sources::Vector{<:AbstractString}`: sources of the chunks
+- `extras::Union{Nothing, AbstractVector}`: additional data, eg, metadata, source code, etc.
+
+# Example
+
+We can easily create a keywords-based index from a standard embeddings-based index.
+
+```julia
+
+# Let's assume we have a standard embeddings-based index
+index = build_index(SimpleIndexer(), texts; chunker_kwargs = (; max_length=10))
+
+# Creating an additional index for keyword-based search (BM25), is as simple as
+index_keywords = ChunkKeywordsIndex(index)
+
+# We can immediately create a MultiIndex (a hybrid index holding both indices)
+multi_index = MultiIndex([index, index_keywords])
+
+```
+
+You can also build the index via build_index
+```julia
+# given some sentences and sources
+index_keywords = build_index(KeywordsIndexer(), sentences; chunker_kwargs=(; sources))
+
+# Retrive closest chunks with
+retriever = SimpleBM25Retriever()
+result = retrieve(retriever, index_keywords, "What are the best practices for parallel computing in Julia?")
+result.context
+```
+
+If you want to use airag, don't forget to specify the config to make sure keywords are processed (ie, tokenized)
+ and that BM25 is used for searching candidates
+```julia
+cfg = RAGConfig(; retriever = SimpleBM25Retriever());
+airag(cfg, index_keywords;
+    question = "What are the best practices for parallel computing in Julia?")
+```
+"""
+@kwdef struct ChunkKeywordsIndex{
+    T1 <: AbstractString,
+    T2 <: Union{Nothing, DocumentTermMatrix},
+    T3 <: Union{Nothing, AbstractMatrix{<:Bool}},
+    T4 <: Union{Nothing, AbstractVector}
+} <: AbstractChunkIndex
+    id::Symbol = gensym("ChunkKeywordsIndex")
+    # underlying document chunks / snippets
+    chunks::Vector{T1}
+    # for similarity search
+    chunkdata::T2 = nothing
+    # for exact search, filtering, etc.
+    # expected to be some sparse structure, eg, sparse matrix or nothing
+    # column oriented, ie, each column is one item in `tags_vocab` and rows are the chunks
+    tags::T3 = nothing
+    tags_vocab::Union{Nothing, Vector{<:AbstractString}} = nothing
+    sources::Vector{<:AbstractString}
+    extras::T4 = nothing
+end
+
+HasKeywords(::ChunkKeywordsIndex) = true
+"Access chunkdata for a subset of chunks, `chunk_idx` is a vector of chunk indices in the index"
+Base.@propagate_inbounds function chunkdata(
+        index::ChunkKeywordsIndex, chunk_idx::AbstractVector{<:Integer})
+    chkdata = index.chunkdata
+    if isnothing(chkdata)
+        return nothing
     end
-    embeddings_ = (isnothing(embeddings(i1)) || isnothing(embeddings(i2))) ? nothing :
-                  hcat(embeddings(i1), embeddings(i2))
-    ChunkIndex(;
-        chunks = vcat(chunks(i1), chunks(i2)),
-        embeddings = embeddings_,
-        tags = tags_,
-        tags_vocab = tags_vocab_,
-        sources = vcat(i1.sources, i2.sources))
+    ## Keyword index is row-oriented, ie, chunks are rows, tokens are columns 
+    return view(chkdata, chunk_idx, :)
 end
 
-"Composite index that stores multiple ChunkIndex objects and their embeddings"
+"""
+    MultiIndex
+
+Composite index that stores multiple ChunkIndex objects and their embeddings.
+
+# Fields
+- `id::Symbol`: unique identifier of each index (to ensure we're using the right index with `CandidateChunks`)
+- `indexes::Vector{<:AbstractChunkIndex}`: the indexes to be combined
+
+Use accesor `indexes` to access the individual indexes.
+
+# Examples
+
+We can create a `MultiIndex` from a vector of `AbstractChunkIndex` objects.
+```julia
+index = build_index(SimpleIndexer(), texts; chunker_kwargs = (; sources))
+index_keywords = ChunkKeywordsIndex(index) # same chunks as above but adds BM25 instead of embeddings
+
+multi_index = MultiIndex([index, index_keywords])
+```
+
+To use `airag` with different types of indices, we need to specify how to find the closest items for each index
+```julia
+# Cosine similarity for embeddings and BM25 for keywords, same order as indexes in MultiIndex
+finder = RT.MultiFinder([RT.CosineSimilarity(), RT.BM25Similarity()])
+
+# Notice that we add `processor` to make sure keywords are processed (ie, tokenized) as well
+cfg = RAGConfig(; retriever = SimpleRetriever(; processor = RT.KeywordsProcessor(), finder))
+
+# Ask questions
+msg = airag(cfg, multi_index; question = "What are the best practices for parallel computing in Julia?")
+pprint(msg) # prettify the answer
+```
+
+"""
 @kwdef struct MultiIndex <: AbstractMultiIndex
     id::Symbol = gensym("MultiIndex")
-    indexes::Vector{<:AbstractChunkIndex}
+    indexes::Vector{<:AbstractChunkIndex} = AbstractChunkIndex[]
 end
+
 indexes(index::MultiIndex) = index.indexes
+HasEmbeddings(index::AbstractMultiIndex) = any(HasEmbeddings, indexes(index))
+HasKeywords(index::AbstractMultiIndex) = any(HasKeywords, indexes(index))
+
+function MultiIndex(indexes::AbstractChunkIndex...)
+    MultiIndex(; indexes = collect(indexes))
+end
+function MultiIndex(indexes::AbstractVector{<:AbstractChunkIndex})
+    MultiIndex(; indexes = indexes)
+end
+
 # check that each index has a counterpart in the other MultiIndex
 function Base.var"=="(i1::MultiIndex, i2::MultiIndex)
     length(indexes(i1)) != length(indexes(i2)) && return false
@@ -96,118 +394,726 @@ function Base.var"=="(i1::MultiIndex, i2::MultiIndex)
     return true
 end
 
-abstract type AbstractCandidateChunks end
-@kwdef struct CandidateChunks{TP <: Union{Integer, AbstractCandidateChunks}, TD <: Real} <:
+# # Views
+### SingleIndex view object
+"""
+    SubChunkIndex
+
+A view of the parent index with respect to the `chunks` (and chunk-aligned fields). All methods and accessors working for `AbstractChunkIndex` also work for `SubChunkIndex`.
+It does not yet work for `MultiIndex`.
+
+# Fields
+- `parent::AbstractChunkIndex`: the parent index from which the chunks are drawn (always the original index, never a view)
+- `positions::Vector{Int}`: the positions of the chunks in the parent index (always refers to original PARENT index, even if we create a view of the view)
+
+# Example
+```julia
+cc = CandidateChunks(index.id, 1:10)
+sub_index = @view(index[cc])
+```
+
+You can use `SubChunkIndex` to access chunks or sources (and other fields) from a parent index, eg,
+```julia
+RT.chunks(sub_index)
+RT.sources(sub_index)
+RT.chunkdata(sub_index) # slice of embeddings
+RT.embeddings(sub_index) # slice of embeddings
+RT.tags(sub_index) # slice of tags
+RT.tags_vocab(sub_index) # unchanged, identical to parent version
+RT.extras(sub_index) # slice of extras
+```
+
+Access the parent index that the `positions` correspond to
+```julia
+parent(sub_index)
+RT.positions(sub_index)
+```
+"""
+@kwdef struct SubChunkIndex{T <: AbstractChunkIndex} <: AbstractChunkIndex
+    parent::T
+    positions::Vector{Int}
+end
+
+indexid(index::SubChunkIndex) = parent(index) |> indexid
+positions(index::SubChunkIndex) = index.positions
+Base.parent(index::SubChunkIndex) = index.parent
+HasEmbeddings(index::SubChunkIndex) = HasEmbeddings(parent(index))
+HasKeywords(index::SubChunkIndex) = HasKeywords(parent(index))
+
+Base.@propagate_inbounds function chunks(index::SubChunkIndex)
+    view(chunks(parent(index)), positions(index))
+end
+Base.@propagate_inbounds function sources(index::SubChunkIndex)
+    view(sources(parent(index)), positions(index))
+end
+Base.@propagate_inbounds function chunkdata(index::SubChunkIndex)
+    chunkdata(parent(index), positions(index))
+end
+"Access chunkdata for a subset of chunks, `chunk_idx` is a vector of chunk indices in the index"
+Base.@propagate_inbounds function chunkdata(
+        index::SubChunkIndex, chunk_idx::AbstractVector{<:Integer})
+    ## We need this accessor because different chunk indices can have chunks in different dimensions!!
+    index_chunk_idx = translate_positions_to_parent(index, chunk_idx)
+    pos = intersect(positions(index), index_chunk_idx)
+    chkdata = chunkdata(parent(index), pos)
+end
+function embeddings(index::SubChunkIndex)
+    if HasEmbeddings(index)
+        view(embeddings(parent(index)), :, positions(index))
+    else
+        throw(ArgumentError("`embeddings` not implemented for $(typeof(index))"))
+    end
+end
+function tags(index::SubChunkIndex)
+    tagsdata = tags(parent(index))
+    isnothing(tagsdata) && return nothing
+    view(tagsdata, positions(index), :)
+end
+function tags_vocab(index::SubChunkIndex)
+    tags_vocab(parent(index))
+end
+function extras(index::SubChunkIndex)
+    extrasdata = extras(parent(index))
+    isnothing(extrasdata) && return nothing
+    view(extrasdata, positions(index))
+end
+function Base.vcat(i1::SubChunkIndex, i2::SubChunkIndex)
+    throw(ArgumentError("vcat not implemented for type $(typeof(i1)) and $(typeof(i2))"))
+end
+function Base.vcat(i1::T, i2::T) where {T <: SubChunkIndex}
+    ## Check if can be merged
+    if indexid(parent(i1)) != indexid(parent(i2))
+        throw(ArgumentError("Parent indices must be the same (provided: $(indexid(parent(i1))) and $(indexid(parent(i2))))"))
+    end
+    return SubChunkIndex(parent(i1), vcat(positions(i1), positions(i2)))
+end
+function Base.unique(index::SubChunkIndex)
+    return SubChunkIndex(parent(index), unique(positions(index)))
+end
+function Base.length(index::SubChunkIndex)
+    return length(positions(index))
+end
+function Base.isempty(index::SubChunkIndex)
+    return isempty(positions(index))
+end
+function Base.show(io::IO, index::SubChunkIndex)
+    print(io,
+        "A view of $(typeof(parent(index))|>nameof) (id: $(indexid(parent(index)))) with $(length(index)) chunks")
+end
+
+"""
+    translate_positions_to_parent(
+        index::SubChunkIndex, pos::AbstractVector{<:Integer})
+
+Translate positions to the parent index. Useful to convert between positions in a view and the original index.
+
+Used whenever a `chunkdata()` or `tags()` are used to re-align positions to the "parent" index.
+"""
+Base.@propagate_inbounds function translate_positions_to_parent(
+        index::SubChunkIndex, pos::AbstractVector{<:Integer})
+    sub_positions = positions(index)
+    return sub_positions[pos]
+end
+
+# # CandidateChunks for Retrieval
+
+"""
+    CandidateChunks
+
+A struct for storing references to chunks in the given index (identified by `index_id`) called `positions` and `scores` holding the strength of similarity (=1 is the highest, most similar).
+It's the result of the retrieval stage of RAG.
+
+# Fields
+- `index_id::Symbol`: the id of the index from which the candidates are drawn
+- `positions::Vector{Int}`: the positions of the candidates in the index (ie, `5` refers to the 5th chunk in the index - `chunks(index)[5]`)
+- `scores::Vector{Float32}`: the similarity scores of the candidates from the query (higher is better)
+"""
+@kwdef struct CandidateChunks{TP <: Integer, TD <: Real} <:
               AbstractCandidateChunks
     index_id::Symbol
     ## if TP is Int, then positions are indices into the index
-    ## if TP is CandidateChunks, then positions are indices into the positions of the child index in MultiIndex
     positions::Vector{TP} = Int[]
-    distances::Vector{TD} = Float32[]
+    scores::Vector{TD} = Float32[]
 end
+indexid(cc::CandidateChunks) = cc.index_id
+positions(cc::CandidateChunks) = cc.positions
+scores(cc::CandidateChunks) = cc.scores
 Base.length(cc::CandidateChunks) = length(cc.positions)
 function Base.first(cc::CandidateChunks, k::Integer)
-    CandidateChunks(cc.index_id, first(cc.positions, k), first(cc.distances, k))
+    sorted_idxs = sortperm(scores(cc), rev = true) |> x -> first(x, k)
+    CandidateChunks(indexid(cc), positions(cc)[sorted_idxs], scores(cc)[sorted_idxs])
+end
+function Base.copy(cc::CandidateChunks{TP, TD}) where {TP <: Integer, TD <: Real}
+    CandidateChunks{TP, TD}(indexid(cc), copy(positions(cc)), copy(scores(cc)))
+end
+function Base.isempty(cc::CandidateChunks)
+    isempty(positions(cc))
+end
+function Base.var"=="(cc1::CandidateChunks, cc2::CandidateChunks)
+    all(
+        getfield(cc1, f) == getfield(cc2, f) for f in fieldnames(CandidateChunks))
+end
+
+function CandidateChunks(index::AbstractChunkIndex, positions::AbstractVector{<:Integer},
+        scores::AbstractVector{<:Real} = fill(0.0f0, length(positions)))
+    CandidateChunks(
+        indexid(index), convert(Vector{Int}, positions), convert(Vector{Float32}, scores))
+end
+
+"""
+    MultiCandidateChunks
+
+A struct for storing references to multiple sets of chunks across different indices. Each set of chunks is identified by an `index_id` in `index_ids`, with corresponding `positions` in the index and `scores` indicating the strength of similarity.
+
+This struct is useful for scenarios where candidates are drawn from multiple indices, and there is a need to keep track of which candidates came from which index.
+
+# Fields
+- `index_ids::Vector{Symbol}`: the ids of the indices from which the candidates are drawn
+- `positions::Vector{TP}`: the positions of the candidates in their respective indices
+- `scores::Vector{TD}`: the similarity scores of the candidates from the query
+"""
+@kwdef struct MultiCandidateChunks{TP, TD} <:
+              AbstractCandidateChunks
+    # Records the indices that the candidate chunks are from
+    index_ids::Vector{Symbol}
+    # Records the positions of the candidate chunks in the index
+    positions::Vector{TP} = Int[]
+    scores::Vector{TD} = Float32[]
+end
+indexids(cc::MultiCandidateChunks) = cc.index_ids
+## for compatibility
+indexids(cc::CandidateChunks) = fill(indexid(cc), length(positions(cc)))
+positions(cc::MultiCandidateChunks) = cc.positions
+scores(cc::MultiCandidateChunks) = cc.scores
+Base.length(cc::MultiCandidateChunks) = length(positions(cc))
+
+function Base.first(cc::MultiCandidateChunks, k::Integer)
+    sorted_idxs = sortperm(scores(cc), rev = true) |> x -> first(x, k)
+    MultiCandidateChunks(
+        indexids(cc)[sorted_idxs], positions(cc)[sorted_idxs], scores(cc)[sorted_idxs])
 end
-# combine/intersect two candidate chunks. average the score if available
+function Base.copy(cc::MultiCandidateChunks{TP, TD}) where {TP <: Integer, TD <: Real}
+    MultiCandidateChunks{TP, TD}(copy(indexids(cc)), copy(positions(cc)), copy(scores(cc)))
+end
+function Base.isempty(cc::MultiCandidateChunks)
+    isempty(positions(cc))
+end
+function Base.var"=="(cc1::MultiCandidateChunks, cc2::MultiCandidateChunks)
+    all(
+        getfield(cc1, f) == getfield(cc2, f) for f in fieldnames(MultiCandidateChunks))
+end
+
+function MultiCandidateChunks(
+        index::AbstractChunkIndex, positions::AbstractVector{<:Integer},
+        scores::AbstractVector{<:Real} = fill(0.0f0, length(positions)))
+    index_ids = fill(indexid(index), length(positions))
+    MultiCandidateChunks(
+        index_ids, convert(Vector{Int}, positions), convert(Vector{Float32}, scores))
+end
+
+# join and sort two candidate chunks
+function Base.vcat(cc1::AbstractCandidateChunks, cc2::AbstractCandidateChunks)
+    throw(ArgumentError("Not implemented for type $(typeof(cc1)) and $(typeof(cc2))"))
+end
+
+function Base.vcat(cc1::CandidateChunks{TP1, TD1},
+        cc2::CandidateChunks{TP2, TD2}) where {
+        TP1 <: Integer, TP2 <: Integer, TD1 <: Real, TD2 <: Real}
+    ## Check validity
+    indexid(cc1) != indexid(cc2) &&
+        throw(ArgumentError("Index ids must match (provided: $(indexid(cc1)) and $(indexid(cc2)))"))
+
+    positions_ = vcat(positions(cc1), positions(cc2))
+    # operates on maximum similarity principle, ie, take the max similarity
+    scores_ = if !isempty(scores(cc1)) && !isempty(scores(cc2))
+        vcat(scores(cc1), scores(cc2))
+    else
+        TD1[]
+    end
+    if !isempty(scores_)
+        ## Get sorted by maximum similarity (scores are similarity)
+        sorted_idxs = sortperm(scores_, rev = true)
+        positions_sorted = view(positions_, sorted_idxs)
+        ## get the positions of unique elements
+        unique_idxs = unique(i -> positions_sorted[i], eachindex(positions_sorted))
+        positions_ = positions_sorted[unique_idxs]
+        ## apply the sorting and then the filtering
+        scores_ = view(scores_, sorted_idxs)[unique_idxs]
+    else
+        positions_ = unique(positions_)
+    end
+    CandidateChunks(indexid(cc1), positions_, scores_)
+end
+
+function Base.vcat(cc1::MultiCandidateChunks{TP1, TD1},
+        cc2::MultiCandidateChunks{TP2, TD2}) where {
+        TP1 <: Integer, TP2 <: Integer, TD1 <: Real, TD2 <: Real}
+    # operates on maximum similarity principle, ie, take the max similarity
+    scores_ = if !isempty(scores(cc1)) && !isempty(scores(cc2))
+        vcat(scores(cc1), scores(cc2))
+    else
+        TD1[]
+    end
+    positions_ = vcat(positions(cc1), positions(cc2))
+    # pool the index ids
+    index_ids = vcat(indexids(cc1), indexids(cc2))
+
+    if !isempty(scores_)
+        ## Get sorted by maximum similarity (scores are similarity)
+        sorted_idxs = sortperm(scores_, rev = true)
+        view_positions = view(positions_, sorted_idxs)
+        view_indices = view(index_ids, sorted_idxs)
+        ## get the positions of unique elements
+        unique_idxs = unique(
+            i -> (view_indices[i], view_positions[i]), eachindex(
+                view_positions, view_indices))
+        positions_ = view_positions[unique_idxs]
+        index_ids = view_indices[unique_idxs]
+        ## apply the sorting and then the filtering
+        scores_ = view(scores_, sorted_idxs)[unique_idxs]
+    else
+        unique_idxs = unique(
+            i -> (positions_[i], index_ids[i]), eachindex(positions_, index_ids))
+        positions_ = positions_[unique_idxs]
+        index_ids = index_ids[unique_idxs]
+    end
+    MultiCandidateChunks(index_ids, positions_, scores_)
+end
+
+# combine/intersect two candidate chunks. take the maximum of the score if available
 function Base.var"&"(cc1::AbstractCandidateChunks,
         cc2::AbstractCandidateChunks)
-    throw(ArgumentError("Not implemented"))
+    throw(ArgumentError("Not implemented for type $(typeof(cc1)) and $(typeof(cc2))"))
 end
 function Base.var"&"(cc1::CandidateChunks{TP1, TD1},
         cc2::CandidateChunks{TP2, TD2}) where
         {TP1 <: Integer, TP2 <: Integer, TD1 <: Real, TD2 <: Real}
     ##
-    cc1.index_id != cc2.index_id && return CandidateChunks(; index_id = cc1.index_id)
+    indexid(cc1) != indexid(cc2) && return CandidateChunks(; index_id = indexid(cc1))
 
-    positions = intersect(cc1.positions, cc2.positions)
-    distances = if !isempty(cc1.distances) && !isempty(cc2.distances)
-        (cc1.distances[positions] .+ cc2.distances[positions]) ./ 2
+    positions_ = intersect(positions(cc1), positions(cc2))
+
+    scores_ = if !isempty(scores(cc1)) && !isempty(scores(cc2))
+        # identify maximum scores from each CC
+        scores_dict = Dict(pos => -Inf for pos in positions_)
+        # scan the first CC
+        for i in eachindex(positions(cc1), scores(cc1))
+            pos = positions(cc1)[i]
+            if haskey(scores_dict, pos)
+                scores_dict[pos] = max(scores_dict[pos], scores(cc1)[i])
+            end
+        end
+        # scan the second CC
+        for i in eachindex(positions(cc2), scores(cc2))
+            pos = positions(cc2)[i]
+            if haskey(scores_dict, pos)
+                scores_dict[pos] = max(scores_dict[pos], scores(cc2)[i])
+            end
+        end
+        [scores_dict[pos] for pos in positions_]
     else
-        Float32[]
+        TD1[]
+    end
+    ## Sort by maximum similarity
+    if !isempty(scores_)
+        sorted_idxs = sortperm(scores_, rev = true)
+        positions_ = positions_[sorted_idxs]
+        scores_ = scores_[sorted_idxs]
     end
-    CandidateChunks(cc1.index_id, positions, distances)
+
+    CandidateChunks(indexid(cc1), positions_, scores_)
 end
 
+function Base.var"&"(mc1::MultiCandidateChunks{TP1, TD1},
+        mc2::MultiCandidateChunks{TP2, TD2}) where
+        {TP1 <: Integer, TP2 <: Integer, TD1 <: Real, TD2 <: Real}
+    ## if empty, skip the work
+    if isempty(scores(mc1)) || isempty(scores(mc2))
+        return MultiCandidateChunks(;
+            index_ids = Symbol[], positions = TP1[], scores = TD1[])
+    end
+
+    keep_indexes = intersect(indexids(mc1), indexids(mc2))
+
+    ## Build the scores dict from first candidates
+    ## Structure: id=>position=>max_score
+    scores_dict = Dict(id => Dict(pos => score
+                       for (pos, score, id_) in zip(
+                               positions(mc1), scores(mc1), indexids(mc1))
+                       if id_ == id)
+    for id in keep_indexes)
+
+    ## Iterate the second candidate set and directly save to output arrays
+    index_ids = Symbol[]
+    positions_ = TP1[]
+    scores_ = TD1[]
+    for i in eachindex(positions(mc2), indexids(mc2), scores(mc2))
+        pos, score, id = positions(mc2)[i], scores(mc2)[i], indexids(mc2)[i]
+        if haskey(scores_dict, id)
+            index_dict = scores_dict[id]
+            if haskey(index_dict, pos)
+                ## This item was found in both -> set to true as intersection
+                push!(index_ids, id)
+                push!(positions_, pos)
+                push!(scores_, max(index_dict[pos], score))
+            end
+        end
+    end
+
+    ## Sort by maximum similarity
+    if !isempty(scores_)
+        sorted_idxs = sortperm(scores_, rev = true)
+        positions_ = positions_[sorted_idxs]
+        index_ids = index_ids[sorted_idxs]
+        scores_ = scores_[sorted_idxs]
+    else
+        ## take as is
+        index_ids = Symbol[]
+        positions_ = TP1[]
+    end
+
+    return MultiCandidateChunks(index_ids, positions_, scores_)
+end
+
+# # Index Views and Getindex
+function Base.view(index::AbstractDocumentIndex, cc::AbstractCandidateChunks)
+    throw(ArgumentError("Not implemented for type $(typeof(index)) and $(typeof(cc))"))
+end
+Base.@propagate_inbounds function Base.view(index::AbstractChunkIndex, cc::CandidateChunks)
+    @boundscheck let chk_vector = chunks(parent(index))
+        if !checkbounds(Bool, axes(chk_vector, 1), positions(cc))
+            ## Avoid printing huge position arrays, show the extremas of the attempted range
+            max_pos = extrema(positions(cc))
+            throw(BoundsError(chk_vector, max_pos))
+        end
+    end
+    pos = indexid(index) == indexid(cc) ? positions(cc) : Int[]
+    return SubChunkIndex(parent(index), pos)
+end
+Base.@propagate_inbounds function Base.view(index::SubChunkIndex, cc::CandidateChunks)
+    SubChunkIndex(index, cc)
+end
+Base.@propagate_inbounds function Base.view(
+        index::AbstractChunkIndex, cc::MultiCandidateChunks)
+    valid_items = findall(==(indexid(index)), indexids(cc))
+    valid_positions = positions(cc)[valid_items]
+    @boundscheck let chk_vector = chunks(parent(index))
+        if !checkbounds(Bool, axes(chk_vector, 1), valid_positions)
+            ## Avoid printing huge position arrays, show the extremas of the attempted range
+            max_pos = extrema(valid_positions)
+            throw(BoundsError(chk_vector, max_pos))
+        end
+    end
+    return SubChunkIndex(parent(index), valid_positions)
+end
+Base.@propagate_inbounds function Base.view(index::SubChunkIndex, cc::MultiCandidateChunks)
+    SubChunkIndex(index, cc)
+end
+Base.@propagate_inbounds function SubChunkIndex(index::SubChunkIndex, cc::CandidateChunks)
+    pos = indexid(index) == indexid(cc) ? positions(cc) : Int[]
+    intersect_pos = intersect(pos, positions(index))
+    @boundscheck let chk_vector = chunks(parent(index))
+        if !checkbounds(Bool, axes(chk_vector, 1), intersect_pos)
+            ## Avoid printing huge position arrays, show the extremas of the attempted range
+            max_pos = extrema(intersect_pos)
+            throw(BoundsError(chk_vector, max_pos))
+        end
+    end
+    return SubChunkIndex(parent(index), intersect_pos)
+end
+Base.@propagate_inbounds function SubChunkIndex(
+        index::SubChunkIndex, cc::MultiCandidateChunks)
+    valid_items = findall(==(indexid(index)), indexids(cc))
+    valid_positions = positions(cc)[valid_items]
+    intersect_pos = intersect(valid_positions, positions(index))
+    @boundscheck let chk_vector = chunks(parent(index))
+        if !checkbounds(Bool, axes(chk_vector, 1), intersect_pos)
+            ## Avoid printing huge position arrays, show the extremas of the attempted range
+            max_pos = extrema(intersect_pos)
+            throw(BoundsError(chk_vector, max_pos))
+        end
+    end
+    return SubChunkIndex(parent(index), intersect_pos)
+end
+
+## Getindex
+
 function Base.getindex(ci::AbstractDocumentIndex,
         candidate::AbstractCandidateChunks,
         field::Symbol)
     throw(ArgumentError("Not implemented"))
 end
-function Base.getindex(ci::ChunkIndex,
+function Base.getindex(ci::AbstractChunkIndex,
         candidate::CandidateChunks{TP, TD},
-        field::Symbol = :chunks) where {TP <: Integer, TD <: Real}
-    @assert field in [:chunks, :embeddings, :sources] "Only `chunks`, `embeddings`, `sources` fields are supported for now"
-    len_ = length(chunks(ci))
-    @assert all(1 .<= candidate.positions .<= len_) "Some positions are out of bounds"
-    if ci.id == candidate.index_id
+        field::Symbol = :chunks; sorted::Bool = false) where {TP <: Integer, TD <: Real}
+    @assert field in [:chunks, :embeddings, :chunkdata, :sources, :scores] "Only `chunks`, `embeddings`, `chunkdata`, `sources`, `scores` fields are supported for now"
+    ## embeddings is a compatibility alias, use chunkdata
+    field = field == :embeddings ? :chunkdata : field
+
+    if indexid(ci) == indexid(candidate)
+        # Sort if requested
+        sorted_idx = sorted ? sortperm(scores(candidate), rev = true) :
+                     eachindex(scores(candidate))
+        sub_index = view(ci, candidate)
         if field == :chunks
-            @views chunks(ci)[candidate.positions]
-        elseif field == :embeddings
-            @views embeddings(ci)[:, candidate.positions]
+            chunks(sub_index)[sorted_idx]
+        elseif field == :chunkdata
+            ## If embeddings, chunks are columns
+            ## If keywords (DTM), chunks are rows
+            chkdata = chunkdata(sub_index, sorted_idx)
         elseif field == :sources
-            @views sources(ci)[candidate.positions]
+            sources(sub_index)[sorted_idx]
+        elseif field == :scores
+            scores(candidate)[sorted_idx]
         end
     else
         if field == :chunks
             eltype(chunks(ci))[]
-        elseif field == :embeddings
-            eltype(embeddings(ci))[]
+        elseif field == :chunkdata
+            chkdata = chunkdata(ci)
+            isnothing(chkdata) && return nothing
+            TypeItem = typeof(chkdata)
+            init_dim = ntuple(i -> 0, ndims(chkdata))
+            TypeItem(undef, init_dim)
         elseif field == :sources
             eltype(sources(ci))[]
+        elseif field == :scores
+            TD[]
         end
     end
 end
 function Base.getindex(mi::MultiIndex,
         candidate::CandidateChunks{TP, TD},
-        field::Symbol = :chunks) where {TP <: Integer, TD <: Real}
-    @assert field==:chunks "Only `chunks` field is supported for now"
-    valid_index = findfirst(x -> x.id == candidate.index_id, indexes(mi))
-    if isnothing(valid_index)
+        field::Symbol = :chunks; sorted::Bool = false) where {TP <: Integer, TD <: Real}
+    ## Always sorted!
+    @assert field in [:chunks, :sources, :scores] "Only `chunks`, `sources`, `scores` fields are supported for now"
+    valid_index = findfirst(x -> indexid(x) == indexid(candidate), indexes(mi))
+    if isnothing(valid_index) && field == :chunks
+        String[]
+    elseif isnothing(valid_index) && field == :sources
         String[]
+    elseif isnothing(valid_index) && field == :scores
+        TD[]
     else
-        getindex(indexes(mi)[valid_index], candidate)
+        getindex(indexes(mi)[valid_index], candidate, field)
     end
 end
 # Dispatch for multi-candidate chunks
-function Base.getindex(ci::ChunkIndex,
-        candidate::CandidateChunks{TP, TD},
-        field::Symbol = :chunks) where {TP <: AbstractCandidateChunks, TD <: Real}
-    @assert field==:chunks "Only `chunks` field is supported for now"
+function Base.getindex(ci::AbstractChunkIndex,
+        candidate::MultiCandidateChunks{TP, TD},
+        field::Symbol = :chunks; sorted::Bool = false) where {TP <: Integer, TD <: Real}
+    @assert field in [:chunks, :embeddings, :chunkdata, :sources, :scores] "Only `chunks`, `embeddings`, `chunkdata`, `sources`, `scores` fields are supported for now"
 
-    index_pos = findfirst(x -> x.index_id == ci.id, candidate.positions)
-    @info index_pos
-    if isnothing(index_pos)
-        eltype(chunks(ci))[]
+    index_pos = findall(==(indexid(ci)), indexids(candidate))
+    ## Convert to CandidateChunks and re-use method above
+    cc = CandidateChunks(
+        indexid(ci), positions(candidate)[index_pos], scores(candidate)[index_pos])
+    getindex(ci, cc, field; sorted)
+end
+# Getindex on Multiindex, pool the individual hits
+# Sorted defaults to true because we need to guarantee that potential `context` is sorted by score across different indices
+function Base.getindex(mi::MultiIndex,
+        candidate::MultiCandidateChunks{TP, TD},
+        field::Symbol = :chunks; sorted::Bool = true) where {TP <: Integer, TD <: Real}
+    @assert field in [:chunks, :sources, :scores] "Only `chunks`, `sources`, and `scores` fields are supported for now"
+    if sorted
+        # values can be either of chunks or sources
+        # ineffective but easier to implement
+        # TODO: remove the duplication later
+        values = mapreduce(idxs -> Base.getindex(idxs, candidate, field, sorted = false),
+            vcat, indexes(mi))
+        scores_ = mapreduce(
+            idxs -> Base.getindex(idxs, candidate, :scores, sorted = false),
+            vcat, indexes(mi))
+        sorted_idx = sortperm(scores_, rev = true)
+        values[sorted_idx]
     else
-        getindex(chunks(ci), candidate.positions[index_pos].positions)
+        mapreduce(idxs -> Base.getindex(idxs, candidate, field, sorted = false),
+            vcat, indexes(mi))
     end
 end
-function Base.getindex(mi::MultiIndex,
-        candidate::CandidateChunks{TP, TD},
-        field::Symbol = :chunks) where {TP <: AbstractCandidateChunks, TD <: Real}
-    @assert field==:chunks "Only `chunks` field is supported for now"
-    mapreduce(idxs -> Base.getindex(idxs, candidate, field), vcat, indexes(mi))
+
+function Base.getindex(index::AbstractChunkIndex, id::Symbol)
+    id == indexid(index) ? index : nothing
+end
+function Base.getindex(index::AbstractMultiIndex, id::Symbol)
+    id == indexid(index) && return index
+    idx = findfirst(x -> indexid(x) == id, indexes(index))
+    isnothing(idx) ? nothing : indexes(index)[idx]
 end
 
 """
-    RAGContext
+    RAGResult
 
 A struct for debugging RAG answers. It contains the question, answer, context, and the candidate chunks at each step of the RAG pipeline.
+
+Think of the flow as `question` -> `rephrased_questions` -> `answer` -> `final_answer` with the context and candidate chunks helping along the way.
+
+# Fields
+- `question::AbstractString`: the original question
+- `rephrased_questions::Vector{<:AbstractString}`: a vector of rephrased questions (eg, HyDe, Multihop, etc.)
+- `answer::AbstractString`: the generated answer
+- `final_answer::AbstractString`: the refined final answer (eg, after CorrectiveRAG), also considered the FINAL answer (it must be always available)
+- `context::Vector{<:AbstractString}`: the context used for retrieval (ie, the vector of chunks and their surrounding window if applicable)
+- `sources::Vector{<:AbstractString}`: the sources of the context (for the original matched chunks)
+- `emb_candidates::CandidateChunks`: the candidate chunks from the embedding index (from `find_closest`)
+- `tag_candidates::Union{Nothing, CandidateChunks}`: the candidate chunks from the tag index (from `find_tags`)
+- `filtered_candidates::CandidateChunks`: the filtered candidate chunks (intersection of `emb_candidates` and `tag_candidates`)
+- `reranked_candidates::CandidateChunks`: the reranked candidate chunks (from `rerank`)
+- `conversations::Dict{Symbol,Vector{<:AbstractMessage}}`: the conversation history for AI steps of the RAG pipeline, use keys that correspond to the function names, eg, `:answer` or `:refine`
+
+See also: `pprint` (pretty printing), `annotate_support` (for annotating the answer)
 """
-@kwdef struct RAGContext
+@kwdef mutable struct RAGResult <: AbstractRAGResult
     question::AbstractString
-    answer::AbstractString
-    context::Vector{<:AbstractString}
-    sources::Vector{<:AbstractString}
-    emb_candidates::CandidateChunks
-    tag_candidates::Union{Nothing, CandidateChunks}
-    filtered_candidates::CandidateChunks
-    reranked_candidates::CandidateChunks
+    rephrased_questions::AbstractVector{<:AbstractString} = [question]
+    answer::Union{Nothing, AbstractString} = nothing
+    final_answer::Union{Nothing, AbstractString} = nothing
+    context::Vector{<:AbstractString} = String[]
+    sources::Vector{<:AbstractString} = String[]
+    emb_candidates::Union{CandidateChunks, MultiCandidateChunks} = CandidateChunks(
+        index_id = :NOTINDEX, positions = Int[], scores = Float32[])
+    tag_candidates::Union{Nothing, CandidateChunks, MultiCandidateChunks} = CandidateChunks(
+        index_id = :NOTINDEX, positions = Int[], scores = Float32[])
+    filtered_candidates::Union{CandidateChunks, MultiCandidateChunks} = CandidateChunks(
+        index_id = :NOTINDEX, positions = Int[], scores = Float32[])
+    reranked_candidates::Union{CandidateChunks, MultiCandidateChunks} = CandidateChunks(
+        index_id = :NOTINDEX, positions = Int[], scores = Float32[])
+    conversations::Dict{Symbol, Vector{<:AbstractMessage}} = Dict{
+        Symbol, Vector{<:AbstractMessage}}()
+end
+
+function Base.var"=="(r1::T, r2::T) where {T <: AbstractRAGResult}
+    all(f -> getfield(r1, f) == getfield(r2, f),
+        fieldnames(T))
+end
+function Base.copy(r::T) where {T <: AbstractRAGResult}
+    T([deepcopy(getfield(r, f))
+
+       for f in fieldnames(T)]...)
 end
 
 # Structured show method for easier reading (each kwarg on a new line)
 function Base.show(io::IO,
-        t::Union{AbstractDocumentIndex, AbstractCandidateChunks, RAGContext})
+        t::Union{AbstractDocumentIndex, AbstractCandidateChunks, AbstractRAGResult})
     dump(IOContext(io, :limit => true), t, maxdepth = 1)
 end
+
+# last_output, last_message for consistency with AICall / Message vectors
+"""
+    PT.last_message(result::RAGResult)
+
+Extract the last message from the RAGResult. It looks for `final_answer` first, then `answer` fields in the `conversations` dictionary. Returns `nothing` if not found.
+"""
+function PT.last_message(result::RAGResult)
+    (; conversations) = result
+    if haskey(conversations, :final_answer) &&
+       !isempty(conversations[:final_answer])
+        conversations[:final_answer][end]
+    elseif haskey(conversations, :answer) &&
+           !isempty(conversations[:answer])
+        conversations[:answer][end]
+    else
+        nothing
+    end
+end
+"Extracts the last output (generated text answer) from the RAGResult."
+function PT.last_output(result::RAGResult)
+    msg = PT.last_message(result)
+    isnothing(msg) ? result.final_answer : msg.content
+end
+
+# Pretty print
+# TODO: add more customizations, eg, context itself
+"""
+    PT.pprint(
+        io::IO, r::AbstractRAGResult; add_context::Bool = false,
+        text_width::Int = displaysize(io)[2], annotater_kwargs...)
+
+Pretty print the RAG result `r` to the given `io` stream. 
+
+If `add_context` is `true`, the context will be printed as well. The `text_width` parameter can be used to control the width of the output.
+
+You can provide additional keyword arguments to the annotater, eg, `add_sources`, `add_scores`, `min_score`, etc. See `annotate_support` for more details.
+"""
+function PT.pprint(
+        io::IO, r::AbstractRAGResult; add_context::Bool = false,
+        text_width::Int = displaysize(io)[2], annotater_kwargs...)
+    if !isempty(r.rephrased_questions)
+        content = PT.wrap_string("- " * join(r.rephrased_questions, "\n- "), text_width)
+        print(io, "-"^20, "\n")
+        printstyled(io, "QUESTION(s)", color = :blue, bold = true)
+        print(io, "\n", "-"^20, "\n")
+        print(io, content, "\n\n")
+    end
+    if !isnothing(r.final_answer) && !isempty(r.final_answer)
+        annotater = TrigramAnnotater()
+        root = annotate_support(annotater, r; annotater_kwargs...)
+        print(io, "-"^20, "\n")
+        printstyled(io, "ANSWER", color = :blue, bold = true)
+        print(io, "\n", "-"^20, "\n")
+        pprint(io, root; text_width)
+    end
+    if add_context && !isempty(r.context)
+        print(io, "\n" * "-"^20, "\n")
+        printstyled(io, "CONTEXT", color = :blue, bold = true)
+        print(io, "\n", "-"^20, "\n")
+        for (i, ctx) in enumerate(r.context)
+            print(io, PT.wrap_string(ctx, text_width))
+            print(io, "\n", "-"^20, "\n")
+        end
+    end
+end
+
+### Serialization for JSON3
+StructTypes.StructType(::Type{RAGResult}) = StructTypes.Struct()
+StructTypes.StructType(::Type{CandidateChunks}) = StructTypes.Struct()
+StructTypes.StructType(::Type{MultiCandidateChunks}) = StructTypes.Struct()
+
+## Constructor for serialization - opinionated for abstract types!
+function StructTypes.constructfrom(::Type{T},
+        obj::Union{Dict, JSON3.Object}) where {T <:
+                                               Union{CandidateChunks, MultiCandidateChunks}}
+    obj = copy(obj)
+    haskey(obj, :index_id) && (obj[:index_id] = Symbol(obj[:index_id]))
+    haskey(obj, :index_ids) && (obj[:index_ids] = convert(Vector{Symbol}, obj[:index_ids]))
+    haskey(obj, :positions) && (obj[:positions] = convert(Vector{Int}, obj[:positions]))
+    haskey(obj, :scores) && (obj[:scores] = convert(Vector{Float32}, obj[:scores]))
+    T(; obj...)
+end
+## function StructTypes.constructfrom(::Type{CandidateChunks}, obj::JSON3.Object)
+##     obj = copy(obj)
+##     haskey(obj, :positions) && (obj[:positions] = convert(Vector{Int}, obj[:positions]))
+##     haskey(obj, :scores) && (obj[:scores] = convert(Vector{Float32}, obj[:scores]))
+##     CandidateChunks(; obj...)
+## end
+function JSON3.read(path::AbstractString,
+        ::Type{T}) where {T <: Union{CandidateChunks, MultiCandidateChunks}}
+    StructTypes.constructfrom(T, JSON3.read(path))
+end
+
+# Use as: StructTypes.constructfrom(RAGResult, JSON3.read(tmp)) 
+function StructTypes.constructfrom(::Type{RAGResult}, obj::Union{Dict, JSON3.Object})
+    obj = copy(obj)
+    if haskey(obj, :conversations)
+        obj[:conversations] = Dict(k => StructTypes.constructfrom(
+                                       Vector{PT.AbstractMessage}, v)
+        for (k, v) in pairs(obj[:conversations]))
+    end
+    ## Retype where necessary
+    for f in [
+        :emb_candidates, :tag_candidates, :filtered_candidates, :reranked_candidates]
+        ## Check for nothing value, because tag_candidates can be empty
+        if haskey(obj, f) && !isnothing(obj[f]) && haskey(obj[f], :index_ids)
+            obj[f] = StructTypes.constructfrom(MultiCandidateChunks, obj[f])
+        elseif haskey(obj, f) && !isnothing(obj[f])
+            obj[f] = StructTypes.constructfrom(CandidateChunks, obj[f])
+        end
+    end
+    obj[:context] = convert(Vector{String}, get(obj, :context, String[]))
+    obj[:sources] = convert(Vector{String}, get(obj, :sources, String[]))
+    RAGResult(; obj...)
+end
+function JSON3.read(path::AbstractString, ::Type{RAGResult})
+    StructTypes.constructfrom(RAGResult, JSON3.read(path))
+end
diff --git a/src/Experimental/RAGTools/utils.jl b/src/Experimental/RAGTools/utils.jl
index 6eba9954e..2654d15bb 100644
--- a/src/Experimental/RAGTools/utils.jl
+++ b/src/Experimental/RAGTools/utils.jl
@@ -5,10 +5,10 @@ function _check_aiextract_capability(model::AbstractString)
         model)&&PT.MODEL_REGISTRY[model].schema isa PT.AbstractOpenAISchema "Only OpenAI models support the metadata extraction now. $model is not a registered OpenAI model."
 end
 # Utitity to be able to combine indices from different sources/documents easily
-function merge_labeled_matrices(mat1::AbstractMatrix{T1},
-        vocab1::Vector{String},
+function vcat_labeled_matrices(mat1::AbstractMatrix{T1},
+        vocab1::AbstractVector{<:AbstractString},
         mat2::AbstractMatrix{T2},
-        vocab2::Vector{String}) where {T1 <: Number, T2 <: Number}
+        vocab2::AbstractVector{<:AbstractString}) where {T1 <: Number, T2 <: Number}
     T = promote_type(T1, T2)
     new_words = setdiff(vocab2, vocab1)
     combined_vocab = [vocab1; new_words]
@@ -21,3 +21,752 @@ function merge_labeled_matrices(mat1::AbstractMatrix{T1},
 
     return vcat(aligned_mat1, aligned_mat2), combined_vocab
 end
+
+function hcat_labeled_matrices(mat1::AbstractMatrix{T1},
+        vocab1::AbstractVector{<:AbstractString},
+        mat2::AbstractMatrix{T2},
+        vocab2::AbstractVector{<:AbstractString}) where {T1 <: Number, T2 <: Number}
+    T = promote_type(T1, T2)
+    new_vocab = setdiff(vocab2, vocab1)
+    combined_vocab = [vocab1; new_vocab]
+    vocab2_indices = Dict(word => i for (i, word) in enumerate(vocab2))
+
+    aligned_mat1 = vcat(mat1, zeros(T, length(new_vocab), size(mat1, 2)))
+
+    ## Inefficient for sparseArrays but seemed like the only way "generic" way that works for sparse matrices
+    aligned_mat2 = similar(mat2, length(combined_vocab), size(mat2, 2))
+    aligned_mat2 .= zero(T)
+    for (i, word) in enumerate(combined_vocab)
+        if haskey(vocab2_indices, word)
+            aligned_mat2[i, :] = mat2[vocab2_indices[word], :]
+        end
+    end
+
+    return hcat(aligned_mat1, aligned_mat2), combined_vocab
+end
+
+"""
+    hcat_truncate(matrices::AbstractVector{<:AbstractMatrix{T}},
+        truncate_dimension::Union{Nothing, Int} = nothing; verbose::Bool = false) where {T <:
+                                                                                         Real}
+
+Horizontal concatenation of matrices, with optional truncation of the rows of each matrix to the specified dimension (reducing embedding dimensionality).
+
+More efficient that a simple splatting, as the resulting matrix is pre-allocated in one go.
+
+Returns: a `Matrix{Float32}`
+
+# Arguments
+- `matrices::AbstractVector{<:AbstractMatrix{T}}`: Vector of matrices to concatenate
+- `truncate_dimension::Union{Nothing,Int}=nothing`: Dimension to truncate to, or `nothing` or `0` to skip truncation. If truncated, the columns will be normalized.
+- `verbose::Bool=false`: Whether to print verbose output.
+
+# Examples
+```julia
+a = rand(Float32, 1000, 10)
+b = rand(Float32, 1000, 20)
+
+c = hcat_truncate([a, b])
+size(c) # (1000, 30)
+
+d = hcat_truncate([a, b], 500)
+size(d) # (500, 30)
+```
+"""
+function hcat_truncate(matrices::AbstractVector{<:AbstractMatrix{T}},
+        truncate_dimension::Union{Nothing, Int} = nothing; verbose::Bool = false) where {T <:
+                                                                                         Real}
+    rows = -1
+    total_cols = 0
+    @inbounds for matrix in matrices
+        row, col = size(matrix)
+        if rows < 0
+            rows = row
+        else
+            @assert row==rows "All matrices must have the same number of rows (Found $row and $rows)"
+        end
+        total_cols += col
+    end
+
+    ## Check if we need to truncate
+    truncate, rows = if !isnothing(truncate_dimension) && truncate_dimension > 0
+        @assert truncate_dimension<=rows "Requested embeddings dimensionality is too high (Embeddings: $(rows) vs dimensionality requested: $(truncate_dimension))"
+        true, truncate_dimension
+    elseif !isnothing(truncate_dimension) && iszero(truncate_dimension)
+        verbose && @info "Truncate_dimension set to 0. Skipping truncation"
+        false, rows
+    else
+        false, rows
+    end
+
+    ## initialize result
+    result = Matrix{Float32}(undef, rows, total_cols)
+
+    col_offset = 1
+    @inbounds for matrix in matrices
+        cols = size(matrix, 2)
+        if truncate
+            for col in eachcol(matrix)
+                ## We must re-normalize the truncated vectors
+                ## LinearAlgebra.normalize but imported in RAGToolsExperimentalExt
+                result[:, col_offset] = _normalize(@view(col[1:rows]))
+                col_offset += 1
+            end
+        else
+            ## no truncation
+            result[:, col_offset:(col_offset + cols - 1)] = matrix
+            col_offset += cols
+        end
+    end
+
+    return result
+end
+function hcat_truncate(vectors::AbstractVector{<:AbstractVector{T}},
+        truncate_dimension::Union{Nothing, Int} = nothing; verbose::Bool = false) where {T <:
+                                                                                         Real}
+    rows = -1
+    total_cols = 0
+    @inbounds for vec in vectors
+        row = size(vec, 1)
+        if rows < 0
+            rows = row
+        else
+            @assert row==rows "All vectors must have the same number of rows (Found $row and $rows)"
+        end
+        total_cols += 1
+    end
+
+    # Check if we need to truncate
+    truncate, rows = if !isnothing(truncate_dimension) && truncate_dimension > 0
+        @assert truncate_dimension<=rows "Requested truncation dimension is too high (Vector length: $rows vs requested: $truncate_dimension)"
+        true, truncate_dimension
+    elseif !isnothing(truncate_dimension) && iszero(truncate_dimension)
+        verbose && @info "Truncate_dimension set to 0. Skipping truncation"
+        false, rows
+    else
+        false, rows
+    end
+
+    # Initialize result
+    result = Matrix{Float32}(undef, rows, total_cols)
+
+    # Fill the result matrix
+    @inbounds for i in eachindex(vectors)
+        vect = vectors[i]
+        if truncate
+            # We must re-normalize the truncated vectors
+            result[:, i] = _normalize(@view(vect[1:rows]))
+        else
+            result[:, i] = vect
+        end
+    end
+
+    return result
+end
+
+### Text Utilities
+# STOPWORDS - used for annotation highlighting
+# Just a small list to get started
+const STOPWORDS = [
+    "a", "an", "the", "and", "is", "isn't", "isn", "are",
+    "aren", "aren't", "be", "was", "wasn't", "been",
+    "will", "won't", "won", "would", "wouldn't", "wouldn",
+    "have", "haven't", "has", "hasn't", "hasn", "do", "don't", "don", "does", "did", "to",
+    "from", "go", "goes", "went", "gone", "at",
+    "into", "on", "or", "but", "per", "so", "then", "than", "was",
+    "what", "why", "who", "where", "whom", "which", "that", "with",
+    "its", "their", "it", "to", "such", "some", "these", "there", "of"] |>
+                  x -> vcat(x, titlecase.(x))
+# Some stop words intentionally omitted as we want to track them for code:
+# "if","else","elseif", "in", "for", "let","for",  
+
+"""
+    tokenize(input::Union{String, SubString{String}})
+
+Tokenizes provided `input` by spaces, special characters or Julia symbols (eg, `=>`).
+
+Unlike other tokenizers, it aims to lossless - ie, keep both the separated text and the separators.
+"""
+function tokenize(input::Union{String, SubString{String}})
+    # specific to Julia language pattern, eg, capture macros (@xyz) or common operators (=>)
+    pattern = r"(\s+|=>|\(;|,|\.|\(|\)|\{|\}|\[|\]|;|:|\+|-|\*|/|<|>|=|&|\||!|@\w+|@|#|\$|%|\^|~|`|\"|'|\w+)"
+    SubString{String}[m.match for m in eachmatch(pattern, input)]
+end
+
+"""
+    trigrams(input_string::AbstractString; add_word::AbstractString = "")
+
+Splits provided `input_string` into a vector of trigrams (combination of three consecutive characters found in the `input_string`).
+
+If `add_word` is provided, it is added to the resulting array. Useful to add the full word itself to the resulting array for exact match.
+"""
+function trigrams(input_string::AbstractString; add_word::AbstractString = "")
+    trigrams = SubString{String}[]
+    # Ensure the input string length is at least 3 to form a trigram
+    if length(input_string) >= 3
+        nunits = ncodeunits(input_string)
+        i = 1
+        while i <= nunits
+            j = nextind(input_string, i, 2)
+            if j <= nunits
+                push!(trigrams, @views input_string[i:j])
+                ## next starter
+                i = nextind(input_string, i)
+            else
+                break
+            end
+        end
+        ## else
+        ##     push!(trigrams, convert(SubString{String}, input_string))
+    end
+    !isempty(add_word) && push!(trigrams, convert(SubString{String}, add_word))
+    return trigrams
+end
+
+"""
+    trigrams_hashed(input_string::AbstractString; add_word::AbstractString = "")
+
+Splits provided `input_string` into a Set of hashed trigrams (combination of three consecutive characters found in the `input_string`).
+
+It is more efficient for lookups in large strings (eg, >100K characters).
+
+If `add_word` is provided, it is added to the resulting array to hash. Useful to add the full word itself to the resulting array for exact match.
+"""
+function trigrams_hashed(input_string::AbstractString; add_word::AbstractString = "")
+    trigrams = Set{UInt64}()
+    # Ensure the input string length is at least 3 to form a trigram
+    if length(input_string) >= 3
+        nunits = ncodeunits(input_string)
+        i = 1
+        while i <= nunits
+            j = nextind(input_string, i, 2)
+            if j <= nunits
+                push!(trigrams, hash(@views input_string[i:j]))
+                ## next starter
+                i = nextind(input_string, i)
+            else
+                break
+            end
+        end
+        ## else
+        ##     push!(trigrams, hash(input_string))
+    end
+    !isempty(add_word) && push!(trigrams, hash(add_word))
+    return trigrams
+end
+
+"""
+    token_with_boundaries(
+        prev_token::Union{Nothing, AbstractString}, curr_token::AbstractString,
+        next_token::Union{Nothing, AbstractString})
+
+Joins the three tokens together. Useful to add boundary tokens (like spaces vs brackets) to the `curr_token` to improve the matched context (ie, separate partial matches from exact match)
+"""
+function token_with_boundaries(
+        prev_token::Union{Nothing, AbstractString}, curr_token::AbstractString,
+        next_token::Union{Nothing, AbstractString})
+    ##
+    len1 = isnothing(prev_token) ? 0 : length(prev_token)
+    len2 = length(curr_token)
+    len3 = isnothing(next_token) ? 0 : length(next_token)
+
+    ## concat only if single token boundaries!
+    token = if len2 == 1
+        curr_token
+    elseif len1 == 1 && len3 == 1
+        prev_token * curr_token * next_token
+    elseif len1 == 0 && len3 == 1
+        ## no prev_token, but next_token
+        curr_token * next_token
+    elseif len3 == 1
+        curr_token * next_token
+    elseif len1 == 1
+        ## convert both len3=0 and len3>1
+        prev_token * curr_token
+    else
+        curr_token
+    end
+end
+
+function text_to_trigrams(input::Union{String, SubString{String}}; add_word::Bool = true)
+    tokens = tokenize(input)
+    length_toks = length(tokens)
+    trig = SubString{String}[]
+    prev_token = nothing
+    for i in eachindex(tokens)
+        next_tok = i == length_toks ? nothing : tokens[i + 1]
+        curr_tok = tokens[i]
+        ## if too short, skip the token
+        if length(curr_tok) > 1
+            ##     push!(trig, curr_tok)
+            ## else
+            full_tok = token_with_boundaries(prev_token, curr_tok, next_tok)
+            if add_word
+                append!(trig, trigrams(full_tok; add_word = curr_tok))
+            else
+                append!(trig, trigrams(full_tok))
+            end
+        end
+        prev_token = curr_tok
+    end
+    return trig
+end
+function text_to_trigrams_hashed(input::AbstractString; add_word::Bool = true)
+    tokens = tokenize(input)
+    length_toks = length(tokens)
+    trig = Set{UInt64}()
+    prev_token = nothing
+    for i in eachindex(tokens)
+        next_tok = i == length_toks ? nothing : tokens[i + 1]
+        curr_tok = tokens[i]
+        ## if too short, just skip the token
+        if length(curr_tok) > 1
+            ##     push!(trig, hash(curr_tok))
+            ## else
+            full_tok = token_with_boundaries(prev_token, curr_tok, next_tok)
+            if add_word
+                union!(trig, trigrams_hashed(full_tok; add_word = curr_tok))
+            else
+                union!(trig, trigrams_hashed(full_tok))
+            end
+        end
+        prev_token = curr_tok
+    end
+    return trig
+end
+
+"""
+    split_into_code_and_sentences(input::Union{String, SubString{String}})
+
+Splits text block into code or text and sub-splits into units.
+
+If code block, it splits by newline but keep the `group_id` the same (to have the same source)
+If text block, splits into sentences, bullets, etc., provides different `group_id` (to have different source)
+"""
+function split_into_code_and_sentences(input::Union{String, SubString{String}})
+    # Combining the patterns for code blocks, inline code, and sentences in one regex
+    # This pattern aims to match code blocks first, then inline code, and finally any text outside of code blocks as sentences or parts thereof.
+    pattern = r"(```[\s\S]+?```)|(`[^`]*?`)|([^`]+)"
+
+    ## Patterns for sentences: newline, tab, bullet, enumerate list, sentence, any left out characters
+    sentence_pattern = r"(\n|\t|^\s*[*+-]\s*|^\s*\d+\.\s+|[^\n\t\.!?]+[\.!?]*|[*+\-\.!?])"ms
+
+    # Initialize an empty array to store the split sentences
+    sentences = SubString{String}[]
+    group_ids = Int[]
+
+    # Loop over the input string, searching for matches to the pattern
+    i = 1
+    for m in eachmatch(pattern, input)
+        ## number of sub-parts
+        j = 1
+        # Extract the full match, including any delimiters
+        match_block = m.match
+        # Check if the match is a code block with triple backticks
+        if startswith(match_block, "```")
+            # Split code block by newline, retaining the backticks
+            block_lines = split(match_block, "\n", keepempty = false)
+            for (cnt, block) in enumerate(block_lines)
+                push!(sentences, block)
+                # all the lines of the chode block are the same group to have one source annotation
+                push!(group_ids, i)
+                if cnt < length(block_lines)
+                    ## return newlines
+                    push!(sentences, "\n")
+                    push!(group_ids, i)
+                end
+            end
+        elseif startswith(match_block, "`")
+            push!(sentences, match_block)
+            push!(group_ids, i)
+        else
+            ## Split text further
+            j = 0
+            for m_sent in eachmatch(sentence_pattern, match_block)
+                push!(sentences, m_sent.match)
+                push!(group_ids, i + j) # all sentences to have separate group
+                j += 1
+            end
+        end
+        ## increment counter
+        i += j
+    end
+
+    return sentences, group_ids
+end
+
+### Functionality for BM25 preprocessing
+
+# Stub for string stemming to be extended in SnowballPromptingToolsExt
+function _stem(stemmer::Any, text::Any)
+    throw(ArgumentError("Stemmer not found. Please install Snowball.jl"))
+end
+function _unicode_normalize(text; kwargs...)
+    throw(ArgumentError("You need to import Unicode to use this function"))
+end
+
+"""
+    preprocess_tokens(text::AbstractString, stemmer=nothing; stopwords::Union{Nothing,Set{String}}=nothing, min_length::Int=3)
+
+Preprocess provided `text` by removing numbers, punctuation, and applying stemming for BM25 search index.
+
+Returns a list of preprocessed tokens.
+
+# Example
+```julia
+stemmer = Snowball.Stemmer("english")
+stopwords = Set(["a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "some", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"])
+text = "This is a sample paragraph to test the functionality of your text preprocessor. It contains a mix of uppercase and lowercase letters, as well as punctuation marks such as commas, periods, and exclamation points! Let's see how your preprocessor handles quotes, like \"this one\", and also apostrophes, like in don't. Will it preserve the formatting of this paragraph, including the indentation and line breaks?"
+preprocess_tokens(text, stemmer; stopwords)
+```
+"""
+function preprocess_tokens(text::AbstractString, stemmer = nothing;
+        stopwords::Union{Nothing, Set{String}} = Set(STOPWORDS), min_length::Int = 3)
+    # Normalize Unicode and strip accents
+    text = _unicode_normalize(text; compose = true, casefold = true,
+        stripmark = true, stripignore = true, stripcc = true)
+
+    # Remove numbers, punctuation, etc
+    text = replace(text, r"[^a-zA-Z ]" => " ")
+
+    # Tokenize by space
+    tokens = split(text)
+    filter!(token -> length(token) >= min_length, tokens)
+
+    # Snowball stemmer
+    if !isnothing(stemmer)
+        tokens = [_stem(stemmer, token) for token in tokens]
+    end
+
+    # Remove stopwords
+    if !isnothing(stopwords)
+        tokens = [token for token in tokens if !(token in stopwords)]
+    end
+
+    return tokens
+end
+
+function preprocess_tokens(texts::Vector{<:AbstractString}, stemmer = nothing;
+        stopwords::Union{Nothing, Set{String}} = nothing, min_length::Int = 3)
+    if !isnothing(stemmer)
+        ext = Base.get_extension(PromptingTools, :SnowballPromptingToolsExt)
+        if isnothing(ext)
+            error("You need to also import Snowball.jl to use this function")
+        end
+    end
+    map(text -> preprocess_tokens(text, stemmer; stopwords, min_length), texts)
+end
+
+## Utility to extract values from nested kwargs
+"""
+    setpropertynested(nt::NamedTuple, parent_keys::Vector{Symbol},
+        key::Symbol,
+        value
+)
+
+Setter for a property `key` in a nested NamedTuple `nt`, where the property is nested to a key in `parent_keys`.
+
+Useful for nested kwargs where we want to change some property in `parent_keys` subset (eg, `model` in `retriever_kwargs`).
+
+# Examples
+```julia
+kw = (; abc = (; def = "x"))
+setpropertynested(kw, [:abc], :def, "y")
+# Output: (abc = (def = "y",),)
+```
+
+Practical example of changing all `model` keys in CHAT-based steps in the pipeline:
+```julia
+# changes :model to "gpt4t" whenever the parent key is in the below list (chat-based steps)
+setpropertynested(kwargs,
+    [:rephraser_kwargs, :tagger_kwargs, :answerer_kwargs, :refiner_kwargs],
+    :model, "gpt4t")
+```
+
+Or changing an embedding model (across both indexer and retriever steps, because it's same step name):
+```julia
+kwargs = setpropertynested(
+        kwargs, [:embedder_kwargs],
+        :model, "text-embedding-3-large"
+    )
+```
+"""
+function setpropertynested(nt::NamedTuple, parent_keys::Vector{Symbol},
+        key::Symbol,
+        value
+)
+    result = Dict{Symbol, Any}(pairs(nt))
+    for (key_, val_) in pairs(nt)
+        if key_ in parent_keys && val_ isa NamedTuple
+            # replace/set directly and recurse
+            result[key_] = merge(val_, (; zip([key], [value])...)) |>
+                           x -> setpropertynested(x, parent_keys, key, value)
+        elseif key_ in parent_keys
+            # for Dict and similar
+            result[key_][key] = value
+        elseif val_ isa NamedTuple
+            # recurse to check if its inside
+            result[key_] = setpropertynested(val_, parent_keys, key, value)
+        end
+    end
+    return (; zip(keys(result), values(result))...)
+end
+
+"""
+    getpropertynested(
+        nt::NamedTuple, parent_keys::Vector{Symbol}, key::Symbol, default = nothing)
+
+Get a property `key` from a nested NamedTuple `nt`, where the property is nested to a key in `parent_keys`.
+
+Useful for nested kwargs where we want to get some property in `parent_keys` subset (eg, `model` in `retriever_kwargs`).
+
+# Examples
+```julia
+kw = (; abc = (; def = "x"))
+getpropertynested(kw, [:abc], :def)
+# Output: "x"
+```
+"""
+function getpropertynested(
+        nt::NamedTuple, parent_keys::Vector{Symbol}, key::Symbol, default = nothing)
+    result = nothing
+    for (key_, val_) in pairs(nt)
+        result = if key_ in parent_keys && val_ isa NamedTuple && haskey(val_, key)
+            ## check if we have a direct match
+            getproperty(val_, key)
+        elseif val_ isa NamedTuple
+            ## recurse into child namedtuple
+            getpropertynested(val_, parent_keys, key, default)
+        else
+            nothing
+        end
+        !isnothing(result) && break
+    end
+    return isnothing(result) ? default : result
+end
+
+"""
+    merge_kwargs_nested(nt1::NamedTuple, nt2::NamedTuple)
+
+Merges two nested NamedTuples `nt1` and `nt2` recursively. The `nt2` values will overwrite the `nt1` values when overlapping.
+
+# Example
+```julia
+kw = (; abc = (; def = "x"))
+kw2 = (; abc = (; def = "x", def2 = 2), new = 1)
+merge_kwargs_nested(kw, kw2)
+```
+"""
+function merge_kwargs_nested(nt1::NamedTuple, nt2::NamedTuple)
+    result = Dict{Symbol, Any}(pairs(nt1))
+
+    for (key, value) in pairs(nt2)
+        if haskey(result, key)
+            if isa(result[key], NamedTuple) && isa(value, NamedTuple)
+                result[key] = merge_kwargs_nested(result[key], value)
+            else
+                result[key] = value
+            end
+        else
+            result[key] = value
+        end
+    end
+    return (; zip(keys(result), values(result))...)
+end
+
+### Support for binary embeddings
+
+function pack_bits(arr::AbstractArray{<:Number})
+    throw(ArgumentError("Input must be of binary eltype (Bool vs provided $(eltype(arr))). Please convert your matrix to binary before packing."))
+end
+
+"""
+    pack_bits(arr::AbstractMatrix{<:Bool}) -> Matrix{UInt64}
+    pack_bits(vect::AbstractVector{<:Bool}) -> Vector{UInt64}
+
+Pack a matrix or vector of boolean values into a more compact representation using UInt64.
+
+# Arguments (Input)
+- `arr::AbstractMatrix{<:Bool}`: A matrix of boolean values where the number of rows must be divisible by 64.
+
+# Returns
+- For `arr::AbstractMatrix{<:Bool}`: Returns a matrix of UInt64 where each element represents 64 boolean values from the original matrix.
+
+# Examples
+
+For vectors:
+```julia
+bin = rand(Bool, 128)
+binint = pack_bits(bin)
+binx = unpack_bits(binint)
+@assert bin == binx
+```
+
+For matrices:
+```julia
+bin = rand(Bool, 128, 10)
+binint = pack_bits(bin)
+binx = unpack_bits(binint)
+@assert bin == binx
+```
+"""
+function pack_bits(arr::AbstractMatrix{<:Bool})
+    rows, cols = size(arr)
+    @assert rows % 64==0 "Number of rows must be divisable by 64"
+    new_rows = rows ÷ 64
+    reshape(BitArray(arr).chunks, new_rows, cols)
+end
+function pack_bits(vect::AbstractVector{<:Bool})
+    len = length(vect)
+    @assert len % 64==0 "Length must be divisable by 64"
+    BitArray(vect).chunks
+end
+
+function unpack_bits(arr::AbstractArray{<:Number})
+    throw(ArgumentError("Input must be of UInt64 eltype (provided: $(eltype(arr))). Are you sure you've packed this array?"))
+end
+
+"""
+    unpack_bits(packed_vector::AbstractVector{UInt64}) -> Vector{Bool}
+    unpack_bits(packed_matrix::AbstractMatrix{UInt64}) -> Matrix{Bool}
+
+Unpack a vector or matrix of UInt64 values into their original boolean representation.
+
+# Arguments (Input)
+- `packed_matrix::AbstractMatrix{UInt64}`: A matrix of UInt64 values where each element represents 64 boolean values.
+
+# Returns
+- For `packed_matrix::AbstractMatrix{UInt64}`: Returns a matrix of boolean values where the number of rows is 64 times the number of rows in the input matrix.
+
+# Examples
+
+For vectors:
+```julia
+bin = rand(Bool, 128)
+binint = pack_bits(bin)
+binx = unpack_bits(binint)
+@assert bin == binx
+```
+
+For matrices:
+```julia
+bin = rand(Bool, 128, 10)
+binint = pack_bits(bin)
+binx = unpack_bits(binint)
+@assert bin == binx
+```
+"""
+# function unpack_bits(packed_vector::AbstractVector{UInt64})
+#     return Bool[((x >> i) & 1) == 1 for x in packed_vector for i in 0:63]
+# end
+function unpack_bits(packed_vector::AbstractVector{UInt64})
+    n = length(packed_vector)
+    result = Vector{Bool}(undef, n * 64)
+    @inbounds @simd for i in 1:n
+        x = packed_vector[i]
+        for j in 1:64
+            result[(i - 1) * 64 + j] = (x & 1) == 1
+            x >>= 1
+        end
+    end
+    return result
+end
+function unpack_bits(packed_matrix::AbstractMatrix{UInt64})
+    num_rows, num_cols = size(packed_matrix)
+    output_rows = num_rows * 64
+    output_matrix = Matrix{Bool}(undef, output_rows, num_cols)
+
+    for col in axes(packed_matrix, 2)
+        output_matrix[:, col] = unpack_bits(@view(packed_matrix[:, col]))
+    end
+
+    return output_matrix
+end
+
+"""
+    reciprocal_rank_fusion(args...; k::Int=60)
+
+Merges multiple rankings and calculates the reciprocal rank score for each chunk (discounted by the inverse of the rank).
+
+# Example
+```julia
+positions1 = [1, 3, 5, 7, 9]
+positions2 = [2, 4, 6, 8, 10]
+positions3 = [2, 4, 6, 11, 12]
+
+merged_positions, scores = reciprocal_rank_fusion(positions1, positions2, positions3)
+```
+"""
+function reciprocal_rank_fusion(args...; k::Int = 60)
+    merged = Vector{Int}()
+    scores = Dict{Int, Float64}()
+
+    for positions in args
+        for (idx, pos) in enumerate(positions)
+            scores[pos] = get(scores, pos, 0.0) + 1.0 / (k + idx)
+        end
+    end
+
+    merged = [first(item) for item in sort(collect(scores), by = last, rev = true)]
+
+    return merged, scores
+end
+
+"""
+    reciprocal_rank_fusion(
+        positions1::AbstractVector{<:Integer}, scores1::AbstractVector{<:T},
+        positions2::AbstractVector{<:Integer},
+        scores2::AbstractVector{<:T}; k::Int = 60) where {T <: Real}
+
+Merges two sets of rankings and their joint scores. Calculates the reciprocal rank score for each chunk (discounted by the inverse of the rank).
+
+# Example
+```julia
+positions1 = [1, 3, 5, 7, 9]
+scores1 = [0.9, 0.8, 0.7, 0.6, 0.5]
+positions2 = [2, 4, 6, 8, 10]
+scores2 = [0.5, 0.6, 0.7, 0.8, 0.9]
+
+merged, scores = reciprocal_rank_fusion(positions1, scores1, positions2, scores2; k = 60)
+```
+"""
+function reciprocal_rank_fusion(
+        positions1::AbstractVector{<:Integer}, scores1::AbstractVector{<:T},
+        positions2::AbstractVector{<:Integer},
+        scores2::AbstractVector{<:T}; k::Int = 60) where {T <: Real}
+    merged = Vector{Int}()
+    scores = Dict{Int, T}()
+
+    for (idx, (pos, sc)) in enumerate(zip(positions1, scores1))
+        scores[pos] = get(scores, pos, 0.0) + sc / (k + idx)
+    end
+    for (idx, (pos, sc)) in enumerate(zip(positions2, scores2))
+        scores[pos] = get(scores, pos, 0.0) + sc / (k + idx)
+    end
+
+    merged = [first(item) for item in sort(collect(scores), by = last, rev = true)]
+
+    return merged, scores
+end
+
+"""
+    score_to_unit_scale(x::AbstractVector{T}) where T<:Real
+
+Shift and scale a vector of scores to the unit scale [0, 1].
+
+# Example
+```julia
+x = [1.0, 2.0, 3.0, 4.0, 5.0]
+scaled_x = score_to_unit_scale(x)
+```
+"""
+function score_to_unit_scale(x::AbstractVector{T}) where {T <: Real}
+    isempty(x) && return x
+    ##
+    ex = extrema(x)
+    if ex[2] - ex[1] < eps(T)
+        ones(T, length(x))
+    else
+        (x .- ex[1]) ./ (ex[2] - ex[1] + eps(T))
+    end
+end
diff --git a/src/PromptingTools.jl b/src/PromptingTools.jl
index 8b0e96700..cc7cce720 100644
--- a/src/PromptingTools.jl
+++ b/src/PromptingTools.jl
@@ -1,6 +1,9 @@
 module PromptingTools
 
+import AbstractTrees
 using Base64: base64encode
+import Dates
+using Dates: now, DateTime, @dateformat_str
 using Logging
 using OpenAI
 using JSON3
@@ -9,10 +12,27 @@ using HTTP
 import Preferences
 using Preferences: @load_preference, @set_preferences!
 using PrecompileTools
+using StreamCallbacks
+using StreamCallbacks: OpenAIStream, AnthropicStream, OllamaStream, StreamCallback,
+                       StreamChunk, AbstractStreamCallback
 using Test, Pkg
-
+## Added REPL because it extends methods in Base.docs for extraction of docstrings
+using REPL
+
+## Fix for Julia v1.9 with missing methods
+@static if VERSION >= v"1.9" && VERSION <= v"1.10"
+    ## This definition is missing in Julia v1.9
+    method_missing = try
+        which(parentmodule, (Method,))
+        false
+    catch e
+        true
+    end
+    if method_missing
+        Base.parentmodule(m::Method) = m.module
+    end
+end
 # GLOBALS and Preferences are managed by Preferences.jl - see src/preferences.jl for details
-
 "The following keywords are reserved for internal use in the `ai*` functions and cannot be used as placeholders in the Messages"
 const RESERVED_KWARGS = [
     :http_kwargs,
@@ -24,13 +44,21 @@ const RESERVED_KWARGS = [
     :image_path,
     :image_detail,
     :model,
+    :strict,
+    :json_mode,
+    :no_system_message,
+    :aiprefill,
+    :name_user,
+    :name_assistant,
+    :betas
 ]
 
-# export replace_words, split_by_length, call_cost, auth_header # for debugging only
-# export length_longest_common_subsequence
+# export replace_words, recursive_splitter, split_by_length, call_cost, auth_header # for debugging only
+# export length_longest_common_subsequence, distance_longest_common_subsequence
+# export pprint
 include("utils.jl")
 
-export aigenerate, aiembed, aiclassify, aiextract, aiscan
+export aigenerate, aiembed, aiclassify, aiextract, aitools, aiscan, aiimage
 # export render # for debugging only
 include("llm_interface.jl")
 
@@ -45,6 +73,7 @@ include("messages.jl")
 export aitemplates, AITemplate
 include("templates.jl")
 
+const TEMPLATE_PATH = String[joinpath(@__DIR__, "..", "templates")]
 const TEMPLATE_STORE = Dict{Symbol, Any}()
 const TEMPLATE_METADATA = Vector{AITemplateMetadata}()
 
@@ -61,12 +90,19 @@ include("code_expressions.jl")
 export AICode
 include("code_eval.jl")
 
+## Streaming support
+include("streaming.jl")
+
 ## Individual interfaces
 include("llm_shared.jl")
+include("llm_openai_schema_defs.jl")
 include("llm_openai.jl")
 include("llm_ollama_managed.jl")
 include("llm_ollama.jl")
 include("llm_google.jl")
+include("llm_anthropic.jl")
+include("llm_sharegpt.jl")
+include("llm_tracer.jl")
 
 ## Convenience utils
 export @ai_str, @aai_str, @ai!_str, @aai!_str
@@ -78,6 +114,9 @@ include("Experimental/Experimental.jl")
 function __init__()
     # Load templates
     load_templates!()
+
+    # Load ENV variables
+    load_api_keys!()
 end
 
 # Enable precompilation to reduce start time, disabled logging
diff --git a/src/code_eval.jl b/src/code_eval.jl
index df78cc587..ce6b82f55 100644
--- a/src/code_eval.jl
+++ b/src/code_eval.jl
@@ -149,7 +149,8 @@ function (CB::Type{T})(md::AbstractString;
     end
     if !isempty(removed)
         ## Add to STDOUT what we removed
-        warning = string("!!! IMPORTANT: Unsafe lines blocked from execution (eg, Pkg operations or imports of non-existent packages):",
+        warning = string(
+            "!!! IMPORTANT: Unsafe lines blocked from execution (eg, Pkg operations or imports of non-existent packages):",
             "\n$removed\n",
             "Fix or find a workaround!")
         if isnothing(cb.stdout)
diff --git a/src/code_expressions.jl b/src/code_expressions.jl
index 5f3cf8284..8149abb05 100644
--- a/src/code_expressions.jl
+++ b/src/code_expressions.jl
@@ -44,7 +44,7 @@ JULIA_EXPR_HEADS = [
     :comprehension,
     :generator,
     :kw,
-    :where,
+    :where
 ]
 # Checks if the provided expression `ex` has some hallmarks of Julia code. Very naive!
 # Serves as a quick check to avoid trying to eval output cells (```plaintext ... ```)
@@ -66,8 +66,9 @@ function remove_macro_expr!(expr, sym::Symbol = Symbol("@testset"))
        expr.args[1] == sym
         return Expr(:block)
     elseif expr isa Expr && !isempty(expr.args)
-        expr.args = filter(x -> !(x isa Expr && x.head == :macrocall && !isempty(x.args) &&
-                                  x.args[1] == sym),
+        expr.args = filter(
+            x -> !(x isa Expr && x.head == :macrocall && !isempty(x.args) &&
+                   x.args[1] == sym),
             expr.args)
         foreach(x -> remove_macro_expr!(x, sym), expr.args)
     end
diff --git a/src/code_parsing.jl b/src/code_parsing.jl
index 3c0c620d6..1d4420388 100644
--- a/src/code_parsing.jl
+++ b/src/code_parsing.jl
@@ -39,11 +39,13 @@ function extract_julia_imports(input::AbstractString; base_or_main::Bool = false
             packages = filter(x -> !isempty(x), split(subparts, " "))
             if base_or_main
                 ## keep only them
-                packages = filter(x -> startswith(x, "Base") ||
+                packages = filter(
+                    x -> startswith(x, "Base") ||
                         startswith(x, "Main"), packages)
             else
                 ## exclude them
-                packages = filter(x -> !startswith(x, "Base") &&
+                packages = filter(
+                    x -> !startswith(x, "Base") &&
                         !startswith(x, "Main"), packages)
             end
             append!(package_names, Symbol.(packages))
@@ -252,7 +254,8 @@ function extract_code_blocks(markdown_content::T) where {T <: AbstractString}
     end
 
     # Filter out nested blocks (only if they have full overlap)
-    filtered_positions = filter(inner -> !any(outer -> (outer[1] < inner[1]) &&
+    filtered_positions = filter(
+        inner -> !any(outer -> (outer[1] < inner[1]) &&
                 (inner[2] < outer[2]),
             block_positions),
         block_positions)
@@ -434,8 +437,9 @@ function detect_base_main_overrides(code_block::AbstractString)
     base_imports = extract_julia_imports(code_block; base_or_main = true) .|>
                    x -> split(string(x), ".")[end]
     ## check Base/Main method overrides
-    overriden_methods = filter(f -> occursin("Base.", f) || occursin("Main.", f) ||
-                                        in(f, base_imports),
+    overriden_methods = filter(
+        f -> occursin("Base.", f) || occursin("Main.", f) ||
+                 in(f, base_imports),
         funcs)
     detected = !isempty(overriden_methods)
     return detected, overriden_methods
diff --git a/src/extraction.jl b/src/extraction.jl
index 72dba37bd..780aaf121 100644
--- a/src/extraction.jl
+++ b/src/extraction.jl
@@ -1,5 +1,114 @@
+########################
+# Extraction
+########################
 # These are utilities to support structured data extraction tasks through the OpenAI function calling interface (wrapped by `aiextract`)
 #
+# There are potential formats: 1) JSON-based for OpenAI compatible APIs, 2) XML-based for Anthropic compatible APIs (used also by Hermes-2-Pro model). 
+#
+
+#### Core Types
+# Alias for backwards compatibility
+function tool_call_signature end
+const function_call_signature = tool_call_signature
+
+"""
+    AbstractTool
+
+Abstract type for all tool types.
+
+Required fields:
+- `name::String`: The name of the tool.
+- `parameters::Dict`: The parameters of the tool.
+- `description::Union{String, Nothing}`: The description of the tool.
+- `callable::Any`: The callable object of the tool, eg, a type or a function.
+"""
+abstract type AbstractTool end
+isabstracttool(x) = x isa AbstractTool
+
+"""
+    Tool
+
+A tool that can be sent to an LLM for execution ("function calling").
+
+# Arguments
+- `name::String`: The name of the tool.
+- `parameters::Dict`: The parameters of the tool.
+- `description::Union{String, Nothing}`: The description of the tool.
+- `strict::Union{Bool, Nothing}`: Whether to enforce strict mode for the tool.
+- `callable::Any`: The callable object of the tool, eg, a type or a function.
+
+See also: [`AbstractTool`](@ref), [`tool_call_signature`](@ref)
+"""
+Base.@kwdef struct Tool <: AbstractTool
+    name::String
+    parameters::Dict = Dict()
+    description::Union{String, Nothing} = nothing
+    strict::Union{Bool, Nothing} = nothing
+    callable::Any
+end
+Base.show(io::IO, t::AbstractTool) = dump(io, t; maxdepth = 1)
+
+"""
+    ToolRef(ref::Symbol, callable::Any)
+
+Represents a reference to a tool with a symbolic name and a callable object (to call during tool execution).
+It can be rendered with a `render` method and a prompt schema.
+
+# Arguments
+- `ref::Symbol`: The symbolic name of the tool.
+- `callable::Any`: The callable object of the tool, eg, a type or a function.
+- `extras::Dict{String, Any}`: Additional parameters to be included in the tool signature.
+
+# Examples
+```julia
+# Define a tool with a symbolic name and a callable object
+tool = ToolRef(;ref=:computer, callable=println)
+
+# Show the rendered tool signature
+PT.render(PT.AnthropicSchema(), tool)
+```
+"""
+Base.@kwdef struct ToolRef <: AbstractTool
+    ref::Symbol
+    callable::Any = identity
+    extras::Dict{String, Any} = Dict()
+end
+Base.show(io::IO, t::ToolRef) = print(io, "ToolRef($(t.ref))")
+
+### Useful Error Types
+"""
+    AbstractToolError
+
+Abstract type for all tool errors.
+
+Available subtypes:
+- [`ToolNotFoundError`](@ref)
+- [`ToolExecutionError`](@ref)
+- [`ToolGenericError`](@ref)
+"""
+abstract type AbstractToolError <: Exception end
+
+"Error type for when a tool is not found. It should contain the tool name that was not found."
+struct ToolNotFoundError <: AbstractToolError
+    msg::String
+end
+
+"Error type for when a tool execution fails. It should contain the error message from the tool execution."
+struct ToolExecutionError <: AbstractToolError
+    msg::String
+    err::Exception
+end
+
+"Error type for when a tool execution fails with a generic error. It should contain the detailed error message."
+struct ToolGenericError <: AbstractToolError
+    msg::String
+    err::Exception
+end
+
+######################
+# 1) OpenAI / JSON format
+######################
+
 to_json_type(s::Type{<:AbstractString}) = "string"
 to_json_type(n::Type{<:Real}) = "number"
 to_json_type(n::Type{<:Integer}) = "integer"
@@ -28,10 +137,38 @@ function remove_null_types(T::Type)
     T isa Union ? Union{filter(!has_null_type, Base.uniontypes(T))...} : T
 end
 
-function extract_docstring(type::Type; max_description_length::Int = 100)
+### Experimental Support for methods/functions
+function get_method(f::Function)
+    @assert length(methods(f))==1 "Function must have only one method for automatic signature generation"
+    return only(methods(f))
+end
+function get_function(m::Method)
+    return getfield(parentmodule(m), m.name)
+end
+"Get the argument names from a method, ignores keyword arguments!!"
+function get_arg_names(method::Method)
+    names_ = Base.method_argnames(method)
+    if length(names_) == 1
+        return Symbol[]
+    else
+        return names_[2:end]
+    end
+end
+"Get the argument types from a method, ignores keyword arguments!!"
+function get_arg_types(method::Method)
+    return [t for t in method.sig.parameters[2:end]]   # Skip first type (typeof(f))
+end
+"Get the argument names from a function, ignores keyword arguments!!"
+get_arg_names(f::Function) = get_arg_names(get_method(f))
+"Get the argument types from a function, ignores keyword arguments!!"
+get_arg_types(f::Function) = get_arg_types(get_method(f))
+
+"Extract the docstring from a type or function."
+function extract_docstring(
+        type::Union{Type, Function}; max_description_length::Int = 100)
     ## plain struct has supertype Any
     ## we ignore the ones that are subtypes for now (to prevent picking up Dicts, etc.)
-    if supertype(type) == Any
+    if (type isa Type && (supertype(type) == Any)) || (type isa Function)
         docs = Docs.doc(type) |> string
         if !occursin("No documentation found.\n\n", docs)
             return first(docs, max_description_length)
@@ -39,8 +176,22 @@ function extract_docstring(type::Type; max_description_length::Int = 100)
     end
     return ""
 end
+function extract_docstring(m::Method; max_description_length::Int = 100)
+    ## Recover the method's originalfunction
+    return extract_docstring(get_function(m); max_description_length)
+end
 
-function to_json_schema(orig_type; max_description_length::Int = 100)
+@inline function is_hidden_field(field_name::AbstractString,
+        hidden_fields::AbstractVector{<:Union{AbstractString, Regex}})
+    any(x -> occursin(x, field_name), hidden_fields)
+end
+@inline function is_hidden_field(field_name::Symbol,
+        hidden_fields::AbstractVector{<:Union{AbstractString, Regex}})
+    is_hidden_field(string(field_name), hidden_fields)
+end
+
+function to_json_schema(orig_type; max_description_length::Int = 100,
+        hidden_fields::AbstractVector{<:Union{AbstractString, Regex}} = String[])
     schema = Dict{String, Any}()
     type = remove_null_types(orig_type)
     if isstructtype(type)
@@ -49,8 +200,12 @@ function to_json_schema(orig_type; max_description_length::Int = 100)
         ## extract the field names and types
         required_types = String[]
         for (field_name, field_type) in zip(fieldnames(type), fieldtypes(type))
-            schema["properties"][string(field_name)] = to_json_schema(remove_null_types(field_type);
-                max_description_length)
+            if is_hidden_field(field_name, hidden_fields)
+                continue
+            end
+            schema["properties"][string(field_name)] = to_json_schema(
+                remove_null_types(field_type);
+                max_description_length, hidden_fields)
             ## Hack: no null type (Nothing, Missing) implies it it is a required field
             is_required_field(field_type) && push!(required_types, string(field_name))
         end
@@ -63,26 +218,241 @@ function to_json_schema(orig_type; max_description_length::Int = 100)
     end
     return schema
 end
-function to_json_schema(type::Type{<:AbstractString}; max_description_length::Int = 100)
-    Dict("type" => to_json_type(type))
+function to_json_schema(type::Type{<:AbstractString}; max_description_length::Int = 100,
+        hidden_fields::AbstractVector{<:Union{AbstractString, Regex}} = String[])
+    Dict{String, Any}("type" => to_json_type(type))
 end
 function to_json_schema(type::Type{T};
-        max_description_length::Int = 100) where {T <: Union{AbstractSet, Tuple, AbstractArray}}
+        max_description_length::Int = 100,
+        hidden_fields::AbstractVector{<:Union{AbstractString, Regex}} = String[]) where {T <:
+                                                                                         Union{
+        AbstractSet, Tuple, AbstractArray}}
     element_type = eltype(type)
-    return Dict("type" => "array",
-        "items" => to_json_schema(remove_null_types(element_type)))
+    return Dict{String, Any}("type" => "array",
+        "items" => to_json_schema(remove_null_types(element_type);
+            max_description_length, hidden_fields))
 end
-function to_json_schema(type::Type{<:Enum}; max_description_length::Int = 100)
+function to_json_schema(type::Type{<:Enum}; max_description_length::Int = 100,
+        hidden_fields::AbstractVector{<:Union{AbstractString, Regex}} = String[])
     enum_options = Base.Enums.namemap(type) |> values .|> string
-    return Dict("type" => "string",
+    return Dict{String, Any}("type" => "string",
         "enum" => enum_options)
 end
-function to_json_schema(type::Type{<:AbstractDict}; max_description_length::Int = 100)
+## Dispatch for method of a function -- grabs only arguments!! Not kwargs!!
+function to_json_schema(m::Method; max_description_length::Int = 100,
+        hidden_fields::AbstractVector{<:Union{AbstractString, Regex}} = String[])
+    ## Warning: We cannot extract keyword arguments from the method signature
+    kwargs = Base.kwarg_decl(m)
+    !isempty(kwargs) &&
+        @warn "Detected keyword arguments in $(m.name): $("\"".*join(kwargs, ", ").*"\""). They are not supported in tool encoding and will be ignored."
+
+    schema = Dict{String, Any}()
+    schema["type"] = "object"
+    schema["properties"] = Dict{String, Any}()
+    ## extract the field names and types
+    required_types = String[]
+    for (field_name, field_type) in zip(get_arg_names(m), get_arg_types(m))
+        if is_hidden_field(field_name, hidden_fields)
+            continue
+        end
+        schema["properties"][string(field_name)] = to_json_schema(
+            remove_null_types(field_type);
+            max_description_length, hidden_fields)
+        ## Hack: no null type (Nothing, Missing) implies it it is a required field
+        is_required_field(field_type) && push!(required_types, string(field_name))
+    end
+    !isempty(required_types) && (schema["required"] = required_types)
+    ## docstrings
+    docs = extract_docstring(m; max_description_length)
+    !isempty(docs) && (schema["description"] = docs)
+    return schema
+end
+function to_json_schema(type::Type{<:AbstractDict}; max_description_length::Int = 100,
+        hidden_fields::AbstractVector{<:Union{AbstractString, Regex}} = String[])
     throw(ArgumentError("Dicts are not supported yet as we cannot analyze their keys/values on a type-level. Use a nested Struct instead!"))
 end
 
+### Type conversion / Schema generation
+"""
+    generate_struct(fields::Vector)
+
+Generate a struct with the given name and fields. Fields can be specified simply as symbols (with default type `String`) or pairs of symbol and type.
+Field descriptions can be provided by adding a pair with the field name suffixed with "__description" (eg, `:myfield__description => "My field description"`).
+
+Returns: A tuple of (struct type, descriptions)
+
+# Examples
+```julia
+Weather, descriptions = generate_struct(
+    [:location,
+     :temperature=>Float64,
+     :temperature__description=>"Temperature in degrees Fahrenheit",
+     :condition=>String,
+     :condition__description=>"Current weather condition (e.g., sunny, rainy, cloudy)"
+    ])
+```
+"""
+function generate_struct(fields::Vector)
+    name = gensym("ExtractedData")
+    struct_fields = []
+    descriptions = Dict{Symbol, String}()
+
+    for field in fields
+        if field isa Symbol
+            push!(struct_fields, :($field::String))
+        elseif field isa Pair
+            field_name, field_value = field
+            if endswith(string(field_name), "__description")
+                base_field = Symbol(replace(string(field_name), "__description" => ""))
+                descriptions[base_field] = field_value
+            elseif field_name isa Symbol &&
+                   (field_value isa Type || field_value isa AbstractString)
+                push!(struct_fields, :($field_name::$field_value))
+            else
+                error("Invalid field specification: $(field). It must be a Symbol or a Pair{Symbol, Type} or Pair{Symbol, Pair{Type, String}}.")
+            end
+        else
+            error("Invalid field specification: $(field). It must be a Symbol or a Pair{Symbol, Type} or Pair{Symbol, Pair{Type, String}}.")
+        end
+    end
+
+    struct_def = quote
+        @kwdef struct $name <: AbstractExtractedData
+            $(struct_fields...)
+        end
+    end
+
+    # Evaluate the struct definition
+    eval(struct_def)
+
+    return eval(name), descriptions
+end
+
+"""
+    update_field_descriptions!(
+        parameters::Dict{String, <:Any}, descriptions::Dict{Symbol, <:AbstractString};
+        max_description_length::Int = 200)
+
+Update the given JSON schema with descriptions from the `descriptions` dictionary.
+This function modifies the schema in-place, adding a "description" field to each property
+that has a corresponding entry in the `descriptions` dictionary.
+
+Note: It modifies the schema in place. Only the top-level "properties" are updated!
+
+Returns: The modified schema dictionary.
+
+# Arguments
+- `parameters`: A dictionary representing the JSON schema to be updated.
+- `descriptions`: A dictionary mapping field names (as symbols) to their descriptions.
+- `max_description_length::Int`: Maximum length for descriptions. Defaults to 200.
+
+# Examples
+```julia
+    parameters = Dict{String, Any}(
+        "properties" => Dict{String, Any}(
+            "location" => Dict{String, Any}("type" => "string"),
+            "condition" => Dict{String, Any}("type" => "string"),
+            "temperature" => Dict{String, Any}("type" => "number")
+        ),
+        "required" => ["location", "temperature", "condition"],
+        "type" => "object"
+    )
+    descriptions = Dict{Symbol, String}(
+        :temperature => "Temperature in degrees Fahrenheit",
+        :condition => "Current weather condition (e.g., sunny, rainy, cloudy)"
+    )
+    update_field_descriptions!(parameters, descriptions)
+```
+"""
+function update_field_descriptions!(
+        parameters::Dict{String, <:Any}, descriptions::Dict{Symbol, <:AbstractString};
+        max_description_length::Int = 200)
+    properties = get(parameters, "properties", Dict())
+
+    for (field, field_schema) in properties
+        field_sym = Symbol(field)
+        if haskey(descriptions, field_sym)
+            field_schema["description"] = first(
+                descriptions[field_sym], max_description_length)
+        end
+    end
+
+    return parameters
+end
+
+"""
+    set_properties_strict!(properties::AbstractDict)
+
+Sets strict mode for the properties of a JSON schema.
+
+Changes:
+- Sets `additionalProperties` to `false`.
+- All keys must be included in `required`.
+- All optional keys will have `null` added to their type.
+
+Reference: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
+"""
+function set_properties_strict!(parameters::AbstractDict)
+    parameters["additionalProperties"] = false
+    required_fields = get(parameters, "required", String[])
+    optional_fields = String[]
+
+    for (key, value) in parameters["properties"]
+        if key ∉ required_fields
+            push!(optional_fields, key)
+            if haskey(value, "type")
+                value["type"] = [value["type"], "null"]
+            end
+        end
+
+        # Recursively apply to nested properties
+        if haskey(value, "properties")
+            set_properties_strict!(value)
+        elseif haskey(value, "items") && haskey(value["items"], "properties")
+            ## if it's an array, we need to skip inside "items"
+            set_properties_strict!(value["items"])
+        end
+    end
+
+    parameters["required"] = vcat(required_fields, optional_fields)
+    return parameters
+end
+
+"""
+    remove_field!(parameters::AbstractDict, field::AbstractString)
+
+Utility to remove a specific top-level field from the parameters (and the `required` list if present) of the JSON schema.
+"""
+function remove_field!(parameters::AbstractDict, field::AbstractString)
+    if haskey(parameters, "properties") && haskey(parameters["properties"], field)
+        delete!(parameters["properties"], field)
+    end
+    if haskey(parameters, "required") && field in parameters["required"]
+        filter!(x -> x != field, parameters["required"])
+    end
+    return parameters
+end
+
+function remove_field!(parameters::AbstractDict, pattern::Regex)
+    if haskey(parameters, "properties")
+        for (key, value) in parameters["properties"]
+            if occursin(pattern, key)
+                delete!(parameters["properties"], key)
+            end
+        end
+    end
+    if haskey(parameters, "required")
+        filter!(x -> !occursin(pattern, x), parameters["required"])
+    end
+    return parameters
+end
+
 """
-    function_call_signature(datastructtype::Struct; max_description_length::Int = 100)
+    tool_call_signature(
+        type_or_method::Union{Type, Method}; strict::Union{Nothing, Bool} = nothing,
+        max_description_length::Int = 200, name::Union{Nothing, String} = nothing,
+        docs::Union{Nothing, String} = nothing, hidden_fields::AbstractVector{<:Union{
+            AbstractString, Regex}} = String[])
 
 Extract the argument names, types and docstrings from a struct to create the function call signature in JSON schema.
 
@@ -90,6 +460,17 @@ You must provide a Struct type (not an instance of it) with some fields.
 
 Note: Fairly experimental, but works for combination of structs, arrays, strings and singletons.
 
+# Arguments
+- `type_or_method::Union{Type, Method}`: The struct type or method to extract the signature from.
+- `strict::Union{Nothing, Bool}`: Whether to enforce strict mode for the schema. Defaults to `nothing`.
+- `max_description_length::Int`: Maximum length for descriptions. Defaults to 200.
+- `name::Union{Nothing, String}`: The name of the tool. Defaults to the name of the struct.
+- `docs::Union{Nothing, String}`: The description of the tool. Defaults to the docstring of the struct/overall function.
+- `hidden_fields::AbstractVector{<:Union{AbstractString, Regex}}`: A list of fields to hide from the LLM (eg, `["ctx_user_id"]` or `r"ctx"`).
+
+# Returns
+- `Dict{String, AbstractTool}`: A dictionary representing the function call signature schema.
+
 # Tips
 - You can improve the quality of the extraction by writing a helpful docstring for your struct (or any nested struct). It will be provided as a description. 
  You can even include comments/descriptions about the individual fields.
@@ -106,18 +487,21 @@ struct MyMeasurement
     height::Union{Int,Nothing}
     weight::Union{Nothing,Float64}
 end
-signature = function_call_signature(MyMeasurement)
+tool_map = tool_call_signature(MyMeasurement)
 #
-# Dict{String, Any} with 3 entries:
-#   "name"        => "MyMeasurement_extractor"
-#   "parameters"  => Dict{String, Any}("properties"=>Dict{String, Any}("height"=>Dict{String, Any}("type"=>"integer"), "weight"=>Dic…
-#   "description" => "Represents person's age, height, and weight\n"
+# Dict{String, PromptingTools.AbstractTool}("MyMeasurement" => PromptingTools.Tool
+#   name: String "MyMeasurement"
+#   parameters: Dict{String, Any}
+#   description: Nothing nothing
+#   strict: Nothing nothing
+#   callable: MyMeasurement <: Any
+"
 ```
 
 You can see that only the field `age` does not allow null values, hence, it's "required".
 While `height` and `weight` are optional.
 ```
-signature["parameters"]["required"]
+tool_map["MyMeasurement"].parameters["required"]
 # ["age"]
 ```
 
@@ -148,25 +532,194 @@ msg = aiextract("Extract measurements from the text: I am giraffe", type)
 # :error   => true
 ```
 That way, you can handle the error gracefully and get a reason why extraction failed.
+
+You can also hide certain fields in your function call signature with Strings or Regex patterns (eg, `r"ctx"`).
+
+```
+tool_map = tool_call_signature(MyMeasurement; hidden_fields = ["ctx_user_id"])
+```
 """
-function function_call_signature(datastructtype::Type; max_description_length::Int = 100)
-    !isstructtype(datastructtype) &&
-        error("Only Structs are supported (provided type: $datastructtype")
+function tool_call_signature(
+        type_or_method::Union{Type, Method}; strict::Union{Nothing, Bool} = nothing,
+        max_description_length::Int = 200, name::Union{Nothing, String} = nothing,
+        docs::Union{Nothing, String} = nothing, hidden_fields::AbstractVector{<:Union{
+            AbstractString, Regex}} = String[])
+    ## Asserts
+    if type_or_method isa Type && !isstructtype(type_or_method)
+        error("Only Structs are supported (provided type: $type_or_method)")
+    end
     ## Standardize the name
-    name = string(datastructtype, "_extractor") |>
-           x -> replace(x, r"[^0-9A-Za-z_-]" => "") |> x -> first(x, 64)
+    name = if isnothing(name) && type_or_method isa Type
+        replace(string(nameof(type_or_method)), "PromptingTools." => "") |>
+        x -> replace(x, r"[^0-9A-Za-z_-]" => "") |> x -> first(x, 64)
+    elseif isnothing(name) && type_or_method isa Method
+        string(type_or_method.name)
+    else
+        name
+    end
     schema = Dict{String, Any}("name" => name,
-        "parameters" => to_json_schema(datastructtype; max_description_length))
+        "parameters" => to_json_schema(type_or_method; max_description_length,
+            hidden_fields))
     ## docstrings
-    docs = extract_docstring(datastructtype; max_description_length)
+    docs = isnothing(docs) ? extract_docstring(type_or_method; max_description_length) :
+           docs
     !isempty(docs) && (schema["description"] = docs)
     ## remove duplicated Struct docstring in schema
     if haskey(schema["parameters"], "description") &&
        schema["parameters"]["description"] == docs
         delete!(schema["parameters"], "description")
     end
-    return schema
+    ## strict mode // see https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
+    if strict == false
+        schema["strict"] = false
+    elseif strict == true
+        schema["strict"] = true
+        if haskey(schema["parameters"], "properties")
+            set_properties_strict!(schema["parameters"])
+        end
+    end
+    call_type = type_or_method isa Type ? type_or_method : get_function(type_or_method)
+    ## Remove hidden fields
+    if !isempty(hidden_fields)
+        for field in hidden_fields
+            remove_field!(schema["parameters"], field)
+        end
+    end
+    tool = Tool(; name = schema["name"], parameters = schema["parameters"],
+        description = haskey(schema, "description") ? schema["description"] : nothing,
+        strict = haskey(schema, "strict") ? schema["strict"] : nothing,
+        callable = call_type)
+    return Dict{String, AbstractTool}(schema["name"] => tool)
+end
+
+## Only thing you can change is the "strict" setting
+function tool_call_signature(
+        tool::AbstractTool; strict::Union{Nothing, Bool} = nothing, kwargs...)
+    if tool.strict != strict
+        tool = Tool(;
+            [k => getfield(tool, k) for k in fieldnames(Tool) if k != :strict]...,
+            strict = strict)
+        if strict == true
+            if haskey(tool.parameters, "properties")
+                set_properties_strict!(tool.parameters)
+            end
+        end
+    end
+    return Dict(tool.name => tool)
 end
+function tool_call_signature(
+        tool::ToolRef; kwargs...)
+    return Dict{String, AbstractTool}(string(tool.ref) => tool)
+end
+
+## Add support for function signatures
+function tool_call_signature(f::Function; kwargs...)
+    return tool_call_signature(get_method(f); kwargs...)
+end
+
+function tool_call_signature(
+        tools::Vector{<:T}; kwargs...) where {T <:
+                                              Union{Type, Function, Method, AbstractTool}}
+    tool_map = Dict{String, AbstractTool}()
+    for tool in tools
+        temp_map = tool_call_signature(tool; kwargs...)
+        for (name, tool) in temp_map
+            @assert !haskey(tool_map, name) "Duplicate tool name: $name. Please provide unique names for each tool."
+            tool_map[name] = tool
+        end
+    end
+    return tool_map
+end
+
+"""
+    tool_call_signature(fields::Vector;
+        strict::Union{Nothing, Bool} = nothing, max_description_length::Int = 200, name::Union{
+            Nothing, String} = nothing,
+        docs::Union{Nothing, String} = nothing)
+
+Generate a function call signature schema for a dynamically generated struct based on the provided fields.
+
+# Arguments
+- `fields::Vector{Union{Symbol, Pair{Symbol, Type}, Pair{Symbol, String}}}`: A vector of field names or pairs of field name and type or string description, eg, `[:field1, :field2, :field3]` or `[:field1 => String, :field2 => Int, :field3 => Float64]` or `[:field1 => String, :field1__description => "Field 1 has the name"]`.
+- `strict::Union{Nothing, Bool}`: Whether to enforce strict mode for the schema. Defaults to `nothing`.
+- `max_description_length::Int`: Maximum length for descriptions. Defaults to 200.
+- `name::Union{Nothing, String}`: The name of the tool. Defaults to the name of the struct.
+- `docs::Union{Nothing, String}`: The description of the tool. Defaults to the docstring of the struct/overall function.
+
+# Returns a `tool_map` with the tool name as the key and the tool object as the value.
+
+See also `generate_struct`, `aiextract`, `update_field_descriptions!`.
+
+# Examples
+```julia
+tool_map = tool_call_signature([:field1, :field2, :field3])
+```
+
+With the field types:
+```julia
+tool_map = tool_call_signature([:field1 => String, :field2 => Int, :field3 => Float64])
+```
+
+And with the field descriptions:
+```julia
+tool_map = tool_call_signature([:field1 => String, :field1__description => "Field 1 has the name"])
+```
+"""
+function tool_call_signature(fields::Vector;
+        strict::Union{Nothing, Bool} = nothing, max_description_length::Int = 200, name::Union{
+            Nothing, String} = nothing,
+        docs::Union{Nothing, String} = nothing)
+    @assert all(x -> x isa Symbol || x isa Pair, fields) "Invalid return types provided. All fields must be either Symbols or Pairs of Symbol and Type or String"
+    # Generate the struct and descriptions
+    datastructtype, descriptions = generate_struct(fields)
+
+    # Create the schema
+    tool_map = tool_call_signature(
+        datastructtype; strict, max_description_length, name, docs)
+    name, tool = only(tool_map)
+    # Update the schema with descriptions
+    update_field_descriptions!(tool.parameters, descriptions; max_description_length)
+    tool_map[name] = Tool(;
+        [k => getfield(tool, k) for k in fieldnames(Tool) if k != :callable]...,
+        callable = datastructtype)
+    return tool_map
+end
+
+######################
+# 2) Anthropic / XML format
+######################
+
+"""
+Simple template to add to the System Message when doing data extraction with Anthropic models.
+
+It has 2 placeholders: `tool_name`, `tool_description` and `tool_parameters` that are filled with the tool's name, description and parameters.
+Source: https://docs.anthropic.com/claude/docs/functions-external-tools
+"""
+ANTHROPIC_TOOL_PROMPT = """
+  In this environment you have access to a specific tool you MUST use to answer the user's question.
+
+  You should call it like this:
+  <function_calls>
+  <invoke>
+  <tool_name>\$TOOL_NAME</tool_name>
+  <parameters>
+  <\$PARAMETER_NAME>\$PARAMETER_VALUE</\$PARAMETER_NAME>
+  ...
+  </parameters>
+  </invoke>
+  </function_calls>
+
+  Here are the tools available:
+  <tools>
+  {{tool_description}}
+  </tools>
+  """
+ANTHROPIC_TOOL_PROMPT_LIST_EXTRA = """
+  For any List[] types, include multiple <\$PARAMETER_NAME>\$PARAMETER_VALUE</\$PARAMETER_NAME> tags for each item in the list. XML tags should only contain the name of the parameter.
+  """
+######################
+# Useful Structs
+######################
 
 # This is kindly borrowed from the awesome Instructor package](https://github.com/jxnl/instructor/blob/main/instructor/dsl/maybe.py).
 """
@@ -188,3 +741,147 @@ Extract zero, one or more specified items from the provided data.
 struct ItemsExtract{T <: Any}
     items::Vector{T}
 end
+
+### Processing utilities
+
+"""
+    parse_tool(datatype::Type, blob::AbstractString; kwargs...)
+
+Parse the JSON blob into the specified datatype in try-catch mode.
+
+If parsing fails, it tries to return the untyped JSON blob in a dictionary.
+"""
+function parse_tool(datatype::Type, blob::AbstractString; kwargs...)
+    try
+        return if blob == "{}"
+            ## If empty, return empty datatype
+            ## a shortcut for function calls without defining the JSON3 StructType
+            datatype()
+        else
+            Base.invokelatest(JSON3.read, blob, datatype)::datatype
+        end
+    catch e
+        @warn "There was an error parsing the response: $e. Using the raw response instead."
+        return JSON3.read(blob) |> copy
+    end
+end
+
+## Utility for Anthropic - it returns a parsed dict and we need text for deserialization into an object
+function parse_tool(datatype::Type, blob::AbstractDict; kwargs...)
+    isempty(blob) ? datatype() : parse_tool(datatype, JSON3.write(blob); kwargs...)
+end
+function parse_tool(
+        tool::AbstractTool, input::Union{AbstractString, AbstractDict}; kwargs...)
+    return parse_tool(tool.callable, input; kwargs...)
+end
+
+"""
+    execute_tool(f::Function, args::AbstractDict{Symbol, <:Any},
+        context::AbstractDict{Symbol, <:Any} = Dict{Symbol, Any}();
+        throw_on_error::Bool = true, unused_as_kwargs::Bool = false,
+        kwargs...)
+
+Executes a function with the provided arguments. 
+
+Picks the function arguments in the following order:
+- `:context` refers to the context dictionary passed to the function.
+- Then it looks for the arguments in the `context` dictionary.
+- Then it looks for the arguments in the `args` dictionary.
+
+Dictionary is un-ordered, so we need to sort the arguments first and then pass them to the function.
+
+# Arguments
+- `f::Function`: The function to execute.
+- `args::AbstractDict{Symbol, <:Any}`: The arguments to pass to the function.
+- `context::AbstractDict{Symbol, <:Any}`: Optional context to pass to the function, it will prioritized to get the argument values from.
+- `throw_on_error::Bool`: Whether to throw an error if the tool execution fails. Defaults to `true`.
+- `unused_as_kwargs::Bool`: Whether to pass unused arguments as keyword arguments. Defaults to `false`. Function must support keyword arguments!
+- `kwargs...`: Additional keyword arguments to pass to the function.
+
+# Example
+```julia
+my_function(x, y) = x + y
+execute_tool(my_function, Dict(:x => 1, :y => 2))
+```
+
+```julia
+get_weather(date, location) = "The weather in \$location on \$date is 70 degrees."
+tool_map = PT.tool_call_signature(get_weather)
+
+msg = aitools("What's the weather in Tokyo on May 3rd, 2023?";
+    tools = collect(values(tool_map)))
+
+PT.execute_tool(tool_map, PT.tool_calls(msg)[1])
+# "The weather in Tokyo on 2023-05-03 is 70 degrees."
+```
+"""
+function execute_tool(f::Function, args::AbstractDict{Symbol, <:Any},
+        context::AbstractDict{Symbol, <:Any} = Dict{Symbol, Any}();
+        throw_on_error::Bool = true, unused_as_kwargs::Bool = false,
+        kwargs...)
+    args_sorted = []
+    arg_names = get_arg_names(f)
+    for arg in arg_names
+        if arg == :context
+            push!(args_sorted, context)
+        elseif haskey(context, arg)
+            push!(args_sorted, context[arg])
+        elseif haskey(args, arg)
+            push!(args_sorted, args[arg])
+        end
+    end
+    if unused_as_kwargs
+        unused_args = setdiff(keys(args), arg_names)
+        kwargs = merge(NamedTuple(kwargs), (; [arg => args[arg] for arg in unused_args]...))
+    end
+
+    result = try
+        f(args_sorted...; kwargs...)
+    catch e
+        ToolExecutionError("Tool execution of `$(f)` failed", e)
+    end
+    throw_on_error && result isa AbstractToolError && throw(result)
+    return result
+end
+function execute_tool(tool::AbstractTool, args::AbstractDict{Symbol, <:Any},
+        context::AbstractDict{Symbol, <:Any} = Dict{Symbol, Any}(); kwargs...)
+    return execute_tool(tool.callable, args, context; kwargs...)
+end
+function execute_tool(tool::AbstractTool, msg::ToolMessage,
+        context::AbstractDict{Symbol, <:Any} = Dict{Symbol, Any}(); kwargs...)
+    return execute_tool(tool.callable, msg.args, context; kwargs...)
+end
+function execute_tool(tool_map::AbstractDict{String, <:AbstractTool}, msg::ToolMessage,
+        context::AbstractDict{Symbol, <:Any} = Dict{Symbol, Any}(); kwargs...)
+    if !haskey(tool_map, msg.name)
+        throw(ToolNotFoundError("Tool `$(msg.name)` not found"))
+    end
+    tool = tool_map[msg.name]
+    return execute_tool(tool, msg, context; kwargs...)
+end
+
+"""
+    Tool(callable::Union{Function, Type, Method}; kwargs...)
+
+Create a `Tool` from a callable object (function, type, or method).
+
+# Arguments
+- `callable::Union{Function, Type, Method}`: The callable object to convert to a tool.
+
+# Returns
+- `Tool`: A tool object that can be used for function calling.
+
+# Examples
+```julia
+# Create a tool from a function
+tool = Tool(my_function)
+
+# Create a tool from a type
+tool = Tool(MyStruct)
+```
+"""
+function Tool(callable::Union{Function, Type, Method}; kwargs...)
+    tool_map = tool_call_signature(callable; kwargs...)
+    name, tool = only(tool_map)
+    return tool
+end
diff --git a/src/llm_anthropic.jl b/src/llm_anthropic.jl
new file mode 100644
index 000000000..1a4e8c40b
--- /dev/null
+++ b/src/llm_anthropic.jl
@@ -0,0 +1,979 @@
+## Anthropic API
+#
+## Schema dedicated to Claude models.
+## See more information [here](https://docs.anthropic.com/claude/reference/getting-started-with-the-api).
+##
+## Rendering of converation history for the Anthropic API
+"""
+    render(schema::AbstractAnthropicSchema,
+        messages::Vector{<:AbstractMessage};
+        aiprefill::Union{Nothing, AbstractString} = nothing,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
+        cache::Union{Nothing, Symbol} = nothing,
+        kwargs...)
+
+Builds a history of the conversation to provide the prompt to the API. All unspecified kwargs are passed as replacements such that `{{key}}=>value` in the template.
+
+# Keyword Arguments
+- `aiprefill`: A string to be used as a prefill for the AI response. This steer the AI response in a certain direction (and potentially save output tokens).
+- `conversation`: Past conversation to be included in the beginning of the prompt (for continued conversations).
+- `no_system_message`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message.
+- `cache`: A symbol representing the caching strategy to be used. Currently only `nothing` (no caching), `:system`, `:tools`,`:last` and `:all` are supported.
+"""
+function render(schema::AbstractAnthropicSchema,
+        messages::Vector{<:AbstractMessage};
+        aiprefill::Union{Nothing, AbstractString} = nothing,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
+        cache::Union{Nothing, Symbol} = nothing,
+        kwargs...)
+    ## 
+    @assert count(issystemmessage, messages)<=1 "AbstractAnthropicSchema only supports at most 1 System message"
+    @assert (isnothing(cache)||cache in [:system, :tools, :last, :all]) "Currently only `:system`, `:tools`, `:last`, `:all` are supported for Anthropic Prompt Caching"
+
+    system = nothing
+
+    ## First pass: keep the message types but make the replacements provided in `kwargs`
+    messages_replaced = render(
+        NoSchema(), messages; conversation, no_system_message, kwargs...)
+
+    ## Second pass: convert to the message-based schema
+    conversation = Dict{String, Any}[]
+
+    for msg in messages_replaced
+        if msg isa SystemMessage
+            system = msg.content
+        elseif msg isa UserMessage || msg isa AIMessage
+            content = msg.content
+            push!(conversation,
+                Dict("role" => role4render(schema, msg),
+                    "content" => [Dict{String, Any}("type" => "text", "text" => content)]))
+        elseif msg isa UserMessageWithImages
+            error("AbstractAnthropicSchema does not yet support UserMessageWithImages. Please use OpenAISchema instead.")
+        end
+        # Note: Ignores any DataMessage or other types
+    end
+
+    ## Add Tool definitions to the System Prompt
+    # if !isempty(tools)
+    #     ANTHROPIC_TOOL_SUFFIX = "Use the $(tools[1][:name]) tool in your response."
+    #     ## Add to system message
+    #     if isnothing(system)
+    #         system = ANTHROPIC_TOOL_SUFFIX
+    #     else
+    #         system *= "\n\n" * ANTHROPIC_TOOL_SUFFIX
+    #     end
+    # end
+
+    ## Apply cache for last message
+    is_valid_conversation = length(conversation) > 0 &&
+                            haskey(conversation[end], "content") &&
+                            length(conversation[end]["content"]) > 0
+    if is_valid_conversation && (cache == :last || cache == :all)
+        conversation[end]["content"][end]["cache_control"] = Dict("type" => "ephemeral")
+    end
+    if !no_system_message && !isnothing(system) && (cache == :system || cache == :all)
+        ## Apply cache for system message
+        system = [Dict("type" => "text", "text" => system,
+            "cache_control" => Dict("type" => "ephemeral"))]
+    end
+
+    ## Sense check
+    @assert !isempty(conversation) "AbstractAnthropicSchema requires at least 1 User message, ie, no `prompt` provided!"
+
+    ## Apply prefilling of responses
+    if !isnothing(aiprefill) && !isempty(aiprefill)
+        aimsg = AIMessage(aiprefill)
+        push!(conversation,
+            Dict("role" => role4render(schema, aimsg),
+                "content" => [Dict{String, Any}("type" => "text", "text" => aiprefill)]))
+    end
+    return (; system, conversation)
+end
+
+"""
+    render(schema::AbstractAnthropicSchema,
+        tools::Vector{<:AbstractTool};
+        kwargs...)
+
+Renders the tool signatures into the Anthropic format.
+"""
+function render(schema::AbstractAnthropicSchema,
+        tools::Vector{<:AbstractTool};
+        kwargs...)
+    [render(schema, tool; kwargs...) for tool in tools]
+end
+function render(schema::AbstractAnthropicSchema,
+        tool::AbstractTool;
+        kwargs...)
+    return Dict(
+        :name => tool.name,
+        :description => isnothing(tool.description) ? "" : tool.description,
+        :input_schema => tool.parameters
+    )
+end
+
+"""
+    render(schema::AbstractAnthropicSchema,
+        tool::ToolRef;
+        kwargs...)
+
+Renders the tool reference into the Anthropic format.
+
+Available tools:
+- `:computer`: A tool for using the computer.
+- `:str_replace_editor`: A tool for replacing text in a string.
+- `:bash`: A tool for running bash commands.
+"""
+function render(schema::AbstractAnthropicSchema,
+        tool::ToolRef;
+        kwargs...)
+    ## WARNING: We ignore the tool name here, because the names are strict
+    (; extras) = tool
+    rendered = if tool.ref == :computer
+        Dict(
+            "type" => "computer_20241022",
+            "name" => "computer",
+            "display_width_px" => get(extras, "display_width_px", 1024),
+            "display_height_px" => get(extras, "display_height_px", 768)
+        )
+    elseif tool.ref == :str_replace_editor
+        Dict(
+            "type" => "text_editor_20241022",
+            "name" => "str_replace_editor"
+        )
+    elseif tool.ref == :bash
+        Dict(
+            "type" => "bash_20241022",
+            "name" => "bash"
+        )
+    else
+        throw(ArgumentError("Unknown tool reference: $(tool.ref)"))
+    end
+    if !isempty(extras)
+        merge!(rendered, extras)
+    end
+    return rendered
+end
+
+"""
+    BETA_HEADERS_ANTHROPIC
+
+A vector of symbols representing the beta features to be used. 
+
+Allowed:
+- `:tools`: Enables tools in the conversation.
+- `:cache`: Enables prompt caching.
+- `:long_output`: Enables long outputs (up to 8K tokens) with Anthropic's Sonnet 3.5.
+- `:computer_use`: Enables the use of the computer tool.
+"""
+const BETA_HEADERS_ANTHROPIC = [:tools, :cache, :long_output, :computer_use]
+
+"""
+    anthropic_extra_headers(;
+        has_tools = false, has_cache = false, has_long_output = false,
+        betas::Union{Nothing, Vector{Symbol}} = nothing)
+
+Adds API version and beta headers to the request.
+
+# Kwargs / Beta headers
+- `has_tools`: Enables tools in the conversation.
+- `has_cache`: Enables prompt caching.
+- `has_long_output`: Enables long outputs (up to 8K tokens) with Anthropic's Sonnet 3.5.
+- `betas`: A vector of symbols representing the beta features to be used. Currently only `:computer_use`, `:long_output`,  `:tools` and `:cache` are supported.
+
+Refer to `BETA_HEADERS_ANTHROPIC` for the allowed beta features.
+"""
+function anthropic_extra_headers(;
+        has_tools = false, has_cache = false, has_long_output = false,
+        betas::Union{Nothing, Vector{Symbol}} = nothing)
+    global BETA_HEADERS_ANTHROPIC
+    betas_parsed = isnothing(betas) ? Symbol[] : betas
+    @assert all(b -> b in BETA_HEADERS_ANTHROPIC, betas_parsed) "Unknown beta feature: $(setdiff(betas_parsed, BETA_HEADERS_ANTHROPIC))"
+    ##
+    extra_headers = ["anthropic-version" => "2023-06-01"]
+    beta_headers = String[]
+    if has_tools || :tools in betas_parsed
+        push!(beta_headers, "tools-2024-04-04")
+    end
+    if has_cache || :cache in betas_parsed
+        push!(beta_headers, "prompt-caching-2024-07-31")
+    end
+    if has_long_output || :long_output in betas_parsed
+        push!(beta_headers, "max-tokens-3-5-sonnet-2024-07-15")
+    end
+    if :computer_use in betas_parsed
+        push!(beta_headers, "computer-use-2024-10-22")
+    end
+    if !isempty(beta_headers)
+        extra_headers = [extra_headers..., "anthropic-beta" => join(beta_headers, ",")]
+    end
+    return extra_headers
+end
+
+## Model-calling
+"""
+    anthropic_api(
+        prompt_schema::AbstractAnthropicSchema,
+        messages::Vector{<:AbstractDict{String, <:Any}} = Vector{Dict{String, Any}}();
+        api_key::AbstractString = ANTHROPIC_API_KEY,
+        system::Union{Nothing, AbstractString, AbstractVector{<:AbstractDict}} = nothing,
+        endpoint::String = "messages",
+        max_tokens::Int = 2048,
+        model::String = "claude-3-haiku-20240307", http_kwargs::NamedTuple = NamedTuple(),
+        stream::Bool = false,
+        url::String = "https://api.anthropic.com/v1",
+        cache::Union{Nothing, Symbol} = nothing,
+        betas::Union{Nothing, Vector{Symbol}} = nothing,
+        kwargs...)
+
+Simple wrapper for a call to Anthropic API.
+
+# Keyword Arguments
+- `prompt_schema`: Defines which prompt template should be applied.
+- `messages`: a vector of `AbstractMessage` to send to the model
+- `system`: An optional string representing the system message for the AI conversation. If not provided, a default message will be used.
+- `endpoint`: The API endpoint to call, only "messages" are currently supported. Defaults to "messages".
+- `model`: A string representing the model to use for generating the response. Can be an alias corresponding to a model ID defined in `MODEL_ALIASES`.
+- `max_tokens`: The maximum number of tokens to generate. Defaults to 2048.
+- `http_kwargs::NamedTuple`: Additional keyword arguments for the HTTP request. Defaults to empty `NamedTuple`.
+- `stream`: A boolean indicating whether to stream the response. Defaults to `false`.
+- `url`: The URL of the Ollama API. Defaults to "localhost".
+- `cache`: A symbol representing the caching strategy to be used. Currently only `nothing` (no caching), `:system`, `:tools`,`:last` and `:all` are supported.
+- `betas`: A vector of symbols representing the beta features to be used. Currently only `:tools` and `:cache` are supported.
+- `kwargs`: Prompt variables to be used to fill the prompt/template
+"""
+function anthropic_api(
+        prompt_schema::AbstractAnthropicSchema,
+        messages::Vector{<:AbstractDict{String, <:Any}} = Vector{Dict{String, Any}}();
+        api_key::AbstractString = ANTHROPIC_API_KEY,
+        system::Union{Nothing, AbstractString, AbstractVector{<:AbstractDict}} = nothing,
+        endpoint::String = "messages",
+        max_tokens::Int = 2048,
+        model::String = "claude-3-haiku-20240307", http_kwargs::NamedTuple = NamedTuple(),
+        stream::Bool = false,
+        streamcallback::Any = nothing,
+        url::String = "https://api.anthropic.com/v1",
+        cache::Union{Nothing, Symbol} = nothing,
+        betas::Union{Nothing, Vector{Symbol}} = nothing,
+        kwargs...)
+    @assert endpoint in ["messages"] "Only 'messages' endpoint is supported."
+    ##
+    body = Dict(:model => model, :max_tokens => max_tokens,
+        :stream => stream, :messages => messages, kwargs...)
+    ## provide system message
+    if !isnothing(system)
+        body[:system] = system
+    end
+    ## Build the headers
+    extra_headers = anthropic_extra_headers(;
+        has_tools = haskey(kwargs, :tools), has_cache = !isnothing(cache),
+        has_long_output = (max_tokens > 4096 && model in ["claude-3-5-sonnet-20240620"]),
+        betas = betas)
+    headers = auth_header(
+        api_key; bearer = false, x_api_key = true,
+        extra_headers)
+    api_url = string(url, "/", endpoint)
+    if !isnothing(streamcallback)
+        ## Route to the streaming function
+        streamcallback, new_kwargs = configure_callback!(
+            streamcallback, prompt_schema; kwargs...)
+        input_buf = IOBuffer()
+        JSON3.write(input_buf, merge(NamedTuple(body), new_kwargs))
+        resp = streamed_request!(
+            streamcallback, api_url, headers, input_buf; http_kwargs...)
+    else
+        resp = HTTP.post(api_url, headers, JSON3.write(body); http_kwargs...)
+    end
+    body = JSON3.read(resp.body)
+    return (; response = body, resp.status)
+end
+# For testing
+function anthropic_api(prompt_schema::TestEchoAnthropicSchema,
+        messages::Vector{<:AbstractDict{String, <:Any}} = Vector{Dict{String, Any}}();
+        api_key::AbstractString = ANTHROPIC_API_KEY,
+        system::Union{Nothing, AbstractString, AbstractVector{<:AbstractDict}} = nothing,
+        endpoint::String = "messages",
+        cache::Union{Nothing, Symbol} = nothing,
+        model::String = "claude-3-haiku-20240307", kwargs...)
+    prompt_schema.model_id = model
+    prompt_schema.inputs = (; system, messages = copy(messages))
+    return prompt_schema
+end
+
+## User-Facing API
+"""
+    aigenerate(prompt_schema::AbstractAnthropicSchema, prompt::ALLOWED_PROMPT_TYPE; verbose::Bool = true,
+        api_key::String = ANTHROPIC_API_KEY, model::String = MODEL_CHAT,
+        return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        streamcallback::Any = nothing,
+        no_system_message::Bool = false,
+        aiprefill::Union{Nothing, AbstractString} = nothing,
+        http_kwargs::NamedTuple = NamedTuple(), api_kwargs::NamedTuple = NamedTuple(),
+        cache::Union{Nothing, Symbol} = nothing,
+        betas::Union{Nothing, Vector{Symbol}} = nothing,
+        kwargs...)
+
+Generate an AI response based on a given prompt using the Anthropic API.
+
+# Arguments
+- `prompt_schema`: An optional object to specify which prompt template should be applied (Default to `PROMPT_SCHEMA = OpenAISchema` not `AbstractAnthropicSchema`)
+- `prompt`: Can be a string representing the prompt for the AI conversation, a `UserMessage`, a vector of `AbstractMessage` or an `AITemplate`
+- `verbose`: A boolean indicating whether to print additional information.
+- `api_key`: API key for the Antropic API. Defaults to `ANTHROPIC_API_KEY` (loaded via `ENV["ANTHROPIC_API_KEY"]`).
+- `model`: A string representing the model to use for generating the response. Can be an alias corresponding to a model ID defined in `MODEL_ALIASES`, eg, "claudeh".
+- `return_all::Bool=false`: If `true`, returns the entire conversation history, otherwise returns only the last message (the `AIMessage`).
+- `dry_run::Bool=false`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`).
+- `conversation::AbstractVector{<:AbstractMessage}=[]`: Not allowed for this schema. Provided only for compatibility.
+- `streamcallback::Any`: A callback function to handle streaming responses. Can be simply `stdout` or `StreamCallback` object. See `?StreamCallback` for details.
+  Note: We configure the `StreamCallback` (and necessary `api_kwargs`) for you, unless you specify the `flavor`. See `?configure_callback!` for details.
+- `no_system_message::Bool=false`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message.
+- `aiprefill::Union{Nothing, AbstractString}`: A string to be used as a prefill for the AI response. This steer the AI response in a certain direction (and potentially save output tokens). It MUST NOT end with a trailing with space. Useful for JSON formatting.
+- `http_kwargs::NamedTuple`: Additional keyword arguments for the HTTP request. Defaults to empty `NamedTuple`.
+- `api_kwargs::NamedTuple`: Additional keyword arguments for the Ollama API. Defaults to an empty `NamedTuple`.
+    - `max_tokens::Int`: The maximum number of tokens to generate. Defaults to 2048, because it's a required parameter for the API.
+- `cache`: A symbol indicating whether to use caching for the prompt. Supported values are `nothing` (no caching), `:system`, `:tools`, `:last` and `:all`. Note that COST estimate will be wrong (ignores the caching).
+    - `:system`: Caches the system message
+    - `:tools`: Caches the tool definitions (and everything before them)
+    - `:last`: Caches the last message in the conversation (and everything before it)
+    - `:all`: Cache trigger points are inserted in all of the above places (ie, higher likelyhood of cache hit, but also slightly higher cost)
+- `betas::Union{Nothing, Vector{Symbol}}`: A vector of symbols representing the beta features to be used. See `?anthropic_extra_headers` for details.
+- `kwargs`: Prompt variables to be used to fill the prompt/template
+
+Note: At the moment, the cache is only allowed for prompt segments over 1024 tokens (in some cases, over 2048 tokens). You'll get an error if you try to cache short prompts.
+
+# Returns
+- `msg`: An `AIMessage` object representing the generated AI message, including the content, status, tokens, and elapsed time.
+ Use `msg.content` to access the extracted string.
+
+See also: `ai_str`, `aai_str`
+
+# Example
+
+Simple hello world to test the API:
+```julia
+const PT = PromptingTools
+schema = PT.AnthropicSchema() # We need to explicit if we want Anthropic, otherwise OpenAISchema is the default
+
+msg = aigenerate(schema, "Say hi!"; model="claudeh") #claudeh is the model alias for Claude 3 Haiku, fast and cheap model
+[ Info: Tokens: 21 @ Cost: \$0.0 in 0.6 seconds
+AIMessage("Hello!")
+```
+
+`msg` is an `AIMessage` object. Access the generated string via `content` property:
+```julia
+typeof(msg) # AIMessage{SubString{String}}
+propertynames(msg) # (:content, :status, :tokens, :elapsed, :cost, :log_prob, :finish_reason, :run_id, :sample_id, :_type)
+msg.content # "Hello!
+```
+
+Note: We need to be explicit about the schema we want to use. If we don't, it will default to `OpenAISchema` (=`PT.DEFAULT_SCHEMA`)
+Alternatively, if you provide a known model name or alias (eg, `claudeh` for Claude 3 Haiku - see `MODEL_REGISTRY`), the schema will be inferred from the model name.
+
+We will use Claude 3 Haiku model for the following examples, so not need to specify the schema. See also "claudeo" and "claudes" for other Claude 3 models.
+
+You can use string interpolation:
+```julia
+const PT = PromptingTools
+
+a = 1
+msg=aigenerate("What is `\$a+\$a`?"; model="claudeh")
+msg.content # "The answer to `1+1` is `2`."
+```
+___
+You can provide the whole conversation or more intricate prompts as a `Vector{AbstractMessage}`. Claude models are good at completeling conversations that ended with an `AIMessage` (they just continue where it left off):
+
+```julia
+const PT = PromptingTools
+
+conversation = [
+    PT.SystemMessage("You're master Yoda from Star Wars trying to help the user become a Yedi."),
+    PT.UserMessage("I have feelings for my iPhone. What should I do?"),
+    PT.AIMessage("Hmm, strong the attachment is,")]
+
+msg = aigenerate(conversation; model="claudeh")
+AIMessage("I sense. But unhealthy it may be. Your iPhone, a tool it is, not a living being. Feelings of affection, understandable they are, <continues>")
+```
+
+Example of streaming:
+```julia
+# Simplest usage, just provide where to steam the text
+msg = aigenerate("Count from 1 to 100."; streamcallback = stdout, model="claudeh")
+
+streamcallback = PT.StreamCallback()
+msg = aigenerate("Count from 1 to 100."; streamcallback, model="claudeh")
+# this allows you to inspect each chunk with `streamcallback.chunks`. You can them empty it with `empty!(streamcallback)` in between repeated calls.
+
+# Get verbose output with details of each chunk
+streamcallback = PT.StreamCallback(; verbose=true, throw_on_error=true)
+msg = aigenerate("Count from 1 to 10."; streamcallback, model="claudeh")
+```
+
+Note: Streaming support is only for Anthropic models and it doesn't yet support tool calling and a few other features (logprobs, refusals, etc.)
+
+You can also provide a prefill for the AI response to steer the response in a certain direction (eg, formatting, style):
+```julia
+msg = aigenerate("Sum up 1 to 100."; aiprefill = "I'd be happy to answer in one number without any additional text. The answer is:", model="claudeh")
+```
+Note: It MUST NOT end with a trailing with space. You'll get an API error if you do.
+
+"""
+function aigenerate(
+        prompt_schema::AbstractAnthropicSchema, prompt::ALLOWED_PROMPT_TYPE;
+        verbose::Bool = true,
+        api_key::String = ANTHROPIC_API_KEY,
+        model::String = MODEL_CHAT,
+        return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        streamcallback::Any = nothing,
+        no_system_message::Bool = false,
+        aiprefill::Union{Nothing, AbstractString} = nothing,
+        http_kwargs::NamedTuple = NamedTuple(), api_kwargs::NamedTuple = NamedTuple(),
+        cache::Union{Nothing, Symbol} = nothing,
+        betas::Union{Nothing, Vector{Symbol}} = nothing,
+        kwargs...)
+    ##
+    global MODEL_ALIASES
+    @assert (isnothing(cache)||cache in [:system, :tools, :last, :all]) "Currently only `:system`, `:tools`, `:last` and `:all` are supported for Anthropic Prompt Caching"
+    @assert (isnothing(aiprefill)||!isempty(strip(aiprefill))) "`aiprefill` must not be empty`"
+    ## Find the unique ID for the model alias provided
+    model_id = get(MODEL_ALIASES, model, model)
+    conv_rendered = render(
+        prompt_schema, prompt; no_system_message, aiprefill, conversation, cache, kwargs...)
+
+    if !dry_run
+        time = @elapsed resp = anthropic_api(
+            prompt_schema, conv_rendered.conversation; api_key,
+            conv_rendered.system, endpoint = "messages", model = model_id,
+            streamcallback, http_kwargs, cache, betas,
+            api_kwargs...)
+        tokens_prompt = get(resp.response[:usage], :input_tokens, 0)
+        tokens_completion = get(resp.response[:usage], :output_tokens, 0)
+        content = mapreduce(x -> get(x, :text, ""), *, resp.response[:content]) |> strip
+        ## add aiprefill to the content
+        if !isnothing(aiprefill) && !isempty(aiprefill)
+            content = aiprefill * content
+            ## remove the prefill from the end of the conversation
+            pop!(conv_rendered.conversation)
+        end
+        ## Build metadata
+        extras = Dict{Symbol, Any}()
+        haskey(resp.response[:usage], :cache_creation_input_tokens) &&
+            (extras[:cache_creation_input_tokens] = resp.response[:usage][:cache_creation_input_tokens])
+        haskey(resp.response[:usage], :cache_read_input_tokens) &&
+            (extras[:cache_read_input_tokens] = resp.response[:usage][:cache_read_input_tokens])
+        ## Build the message
+        msg = AIMessage(; content,
+            status = Int(resp.status),
+            cost = call_cost(tokens_prompt, tokens_completion, model_id),
+            finish_reason = get(resp.response, :stop_reason, nothing),
+            tokens = (tokens_prompt, tokens_completion),
+            extras,
+            elapsed = time)
+        ## Reporting
+        verbose && @info _report_stats(msg, model_id)
+    else
+        msg = nothing
+    end
+
+    ## Select what to return
+    output = finalize_outputs(prompt,
+        conv_rendered,
+        msg;
+        conversation,
+        return_all,
+        dry_run,
+        no_system_message,
+        kwargs...)
+    return output
+end
+
+"""
+    aiextract(prompt_schema::AbstractAnthropicSchema, prompt::ALLOWED_PROMPT_TYPE;
+        return_type::Union{Type, AbstractTool, Vector},
+        verbose::Bool = true,
+        api_key::String = ANTHROPIC_API_KEY,
+        model::String = MODEL_CHAT,
+        return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
+        http_kwargs::NamedTuple = (retry_non_idempotent = true,
+            retries = 5,
+            readtimeout = 120), api_kwargs::NamedTuple = NamedTuple(),
+        cache::Union{Nothing, Symbol} = nothing,
+        betas::Union{Nothing, Vector{Symbol}} = nothing,
+        kwargs...)
+
+Extract required information (defined by a struct **`return_type`**) from the provided prompt by leveraging Anthropic's function calling mode.
+
+This is a perfect solution for extracting structured information from text (eg, extract organization names in news articles, etc.).
+
+Read best practics [here](https://docs.anthropic.com/claude/docs/tool-use#tool-use-best-practices-and-limitations).
+
+It's effectively a light wrapper around `aigenerate` call, which requires additional keyword argument `return_type` to be provided
+ and will enforce the model outputs to adhere to it.
+
+# Arguments
+- `prompt_schema`: An optional object to specify which prompt template should be applied (Default to `PROMPT_SCHEMA = OpenAISchema`)
+- `prompt`: Can be a string representing the prompt for the AI conversation, a `UserMessage`, a vector of `AbstractMessage` or an `AITemplate`
+- `return_type`: A **struct** TYPE representing the the information we want to extract. Do not provide a struct instance, only the type.
+  If the struct has a docstring, it will be provided to the model as well. It's used to enforce structured model outputs or provide more information.
+  Alternatively, you can provide a vector of field names and their types (see `?generate_struct` function for the syntax).
+- `verbose`: A boolean indicating whether to print additional information.
+- `api_key`: A string representing the API key for accessing the OpenAI API.
+- `model`: A string representing the model to use for generating the response. Can be an alias corresponding to a model ID defined in `MODEL_ALIASES`.
+- `return_all::Bool=false`: If `true`, returns the entire conversation history, otherwise returns only the last message (the `AIMessage`).
+- `dry_run::Bool=false`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`).
+- `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector.
+- `no_system_message::Bool = false`: If `true`, skips the system message in the conversation history.
+- `http_kwargs`: A named tuple of HTTP keyword arguments.
+- `api_kwargs`: A named tuple of API keyword arguments. 
+    - `:tool_choice`: A string indicating which tool to use. Supported values are `nothing`, `"auto"`, `"any"` and `"exact"`. `nothing` will use the default tool choice.
+- `cache`: A symbol indicating whether to use caching for the prompt. Supported values are `nothing` (no caching), `:system`, `:tools`, `:last` and `:all`. Note that COST estimate will be wrong (ignores the caching).
+    - `:system`: Caches the system message
+    - `:tools`: Caches the tool definitions (and everything before them)
+    - `:last`: Caches the last message in the conversation (and everything before it)
+    - `:all`: Cache trigger points are inserted in all of the above places (ie, higher likelyhood of cache hit, but also slightly higher cost)
+- `betas::Union{Nothing, Vector{Symbol}}`: A vector of symbols representing the beta features to be used. See `?anthropic_extra_headers` for details.
+- `kwargs`: Prompt variables to be used to fill the prompt/template
+
+Note: At the moment, the cache is only allowed for prompt segments over 1024 tokens (in some cases, over 2048 tokens). You'll get an error if you try to cache short prompts.
+
+# Returns
+If `return_all=false` (default):
+- `msg`: An `DataMessage` object representing the extracted data, including the content, status, tokens, and elapsed time. 
+  Use `msg.content` to access the extracted data.
+
+If `return_all=true`:
+- `conversation`: A vector of `AbstractMessage` objects representing the full conversation history, including the response from the AI model (`DataMessage`).
+
+
+See also: `tool_call_signature`, `MaybeExtract`, `ItemsExtract`, `aigenerate`
+
+# Example
+
+Do you want to extract some specific measurements from a text like age, weight and height?
+You need to define the information you need as a struct (`return_type`):
+```
+"Person's age, height, and weight."
+struct MyMeasurement
+    age::Int # required
+    height::Union{Int,Nothing} # optional
+    weight::Union{Nothing,Float64} # optional
+end
+msg = aiextract("James is 30, weighs 80kg. He's 180cm tall."; model="claudeh", return_type=MyMeasurement)
+# PromptingTools.DataMessage(MyMeasurement)
+msg.content
+# MyMeasurement(30, 180, 80.0)
+```
+
+The fields that allow `Nothing` are marked as optional in the schema:
+```
+msg = aiextract("James is 30."; model="claudeh", return_type=MyMeasurement)
+# MyMeasurement(30, nothing, nothing)
+```
+
+If there are multiple items you want to extract, define a wrapper struct to get a Vector of `MyMeasurement`:
+```
+struct ManyMeasurements
+    measurements::Vector{MyMeasurement}
+end
+
+msg = aiextract("James is 30, weighs 80kg. He's 180cm tall. Then Jack is 19 but really tall - over 190!"; model="claudeh", return_type=ManyMeasurements)
+
+msg.content.measurements
+# 2-element Vector{MyMeasurement}:
+#  MyMeasurement(30, 180, 80.0)
+#  MyMeasurement(19, 190, nothing)
+```
+
+Or you can use the convenience wrapper `ItemsExtract` to extract multiple measurements (zero, one or more):
+```julia
+using PromptingTools: ItemsExtract
+
+return_type = ItemsExtract{MyMeasurement}
+msg = aiextract("James is 30, weighs 80kg. He's 180cm tall. Then Jack is 19 but really tall - over 190!"; model="claudeh", return_type)
+
+msg.content.items # see the extracted items
+```
+
+Or if you want your extraction to fail gracefully when data isn't found, use `MaybeExtract{T}` wrapper
+ (this trick is inspired by the Instructor package!):
+```
+using PromptingTools: MaybeExtract
+
+return_type = MaybeExtract{MyMeasurement}
+# Effectively the same as:
+# struct MaybeExtract{T}
+#     result::Union{T, Nothing} // The result of the extraction
+#     error::Bool // true if a result is found, false otherwise
+#     message::Union{Nothing, String} // Only present if no result is found, should be short and concise
+# end
+
+# If LLM extraction fails, it will return a Dict with `error` and `message` fields instead of the result!
+msg = aiextract("Extract measurements from the text: I am giraffe"; model="claudeo", return_type)
+msg.content
+# Output: MaybeExtract{MyMeasurement}(nothing, true, "I'm sorry, but your input of "I am giraffe" does not contain any information about a person's age, height or weight measurements that I can extract. To use this tool, please provide a statement that includes at least the person's age, and optionally their height in inches and weight in pounds. Without that information, I am unable to extract the requested measurements.")
+```
+That way, you can handle the error gracefully and get a reason why extraction failed (in `msg.content.message`).
+
+However, this can fail with weaker models like `claudeh`, so we can apply some of our prompt templates with embedding reasoning step:
+```julia
+msg = aiextract(:ExtractDataCoTXML; data="I am giraffe", model="claudeh", return_type)
+msg.content
+# Output: MaybeExtract{MyMeasurement}(nothing, true, "The provided data does not contain the expected information about a person's age, height, and weight.")
+```
+Note that when using a prompt template, we provide `data` for the extraction as the corresponding placeholder (see `aitemplates("extract")` for documentation of this template).
+
+Note that the error message refers to a giraffe not being a human, 
+ because in our `MyMeasurement` docstring, we said that it's for people!
+
+Example of using a vector of field names with `aiextract`
+```julia
+fields = [:location, :temperature => Float64, :condition => String]
+msg = aiextract("Extract the following information from the text: location, temperature, condition. Text: The weather in New York is sunny and 72.5 degrees Fahrenheit."; 
+return_type = fields, model="claudeh")
+```
+
+Or simply call `aiextract("some text"; return_type = [:reasoning,:answer], model="claudeh")` to get a Chain of Thought reasoning for extraction task.
+
+It will be returned it a new generated type, which you can check with `PromptingTools.isextracted(msg.content) == true` to confirm the data has been extracted correctly.
+
+This new syntax also allows you to provide field-level descriptions, which will be passed to the model.
+```julia
+fields_with_descriptions = [
+    :location,
+    :temperature => Float64,
+    :temperature__description => "Temperature in degrees Fahrenheit",
+    :condition => String,
+    :condition__description => "Current weather condition (e.g., sunny, rainy, cloudy)"
+]
+msg = aiextract("The weather in New York is sunny and 72.5 degrees Fahrenheit."; return_type = fields_with_descriptions, model="claudeh")
+```
+"""
+function aiextract(prompt_schema::AbstractAnthropicSchema, prompt::ALLOWED_PROMPT_TYPE;
+        return_type::Union{Type, AbstractTool, Vector},
+        verbose::Bool = true,
+        api_key::String = ANTHROPIC_API_KEY,
+        model::String = MODEL_CHAT,
+        return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
+        http_kwargs::NamedTuple = (retry_non_idempotent = true,
+            retries = 5,
+            readtimeout = 120), api_kwargs::NamedTuple = (; tool_choice = nothing),
+        cache::Union{Nothing, Symbol} = nothing,
+        betas::Union{Nothing, Vector{Symbol}} = nothing,
+        kwargs...)
+    ##
+    global MODEL_ALIASES
+    @assert (isnothing(cache)||cache in [:system, :tools, :last, :all]) "Currently only `:system`, `:tools`, `:last` and `:all` are supported for Anthropic Prompt Caching"
+
+    ## Check that no functions or methods are provided, that is not supported
+    @assert !(return_type isa Vector)||!any(x -> x isa Union{Function, Method}, return_type) "Functions and Methods are not supported in `aiextract`!"
+
+    ## Find the unique ID for the model alias provided
+    model_id = get(MODEL_ALIASES, model, model)
+
+    ## Tools definition
+    tool_map = tool_call_signature(
+        return_type; max_description_length = 100)
+    tools = render(prompt_schema, tool_map)
+    @assert length(tools)>0 "No tools found for extraction! Please provide in keyword argument `return_type`."
+    ## force our function to be used
+    tool_choice_ = get(api_kwargs, :tool_choice, nothing)
+    tool_choice = if tool_choice_ == "exact" ||
+                     (isnothing(tool_choice_) && length(tools) == 1)
+        Dict(:type => "tool", :name => only(tools)[:name])
+    elseif tool_choice_ == "any" || (isnothing(tool_choice_) && length(tools) > 1)
+        # User provided value, eg, "auto", "any" for various providers like Mistral, Together, etc.
+        Dict(:type => "any")
+    else
+        # User provided value, eg, "auto"
+        Dict(:type => tool_choice_)
+    end
+    ## update tools to use caching
+    (cache == :tools || cache == :all) &&
+        (tools[end][:cache_control] = Dict("type" => "ephemeral"))
+
+    ## Add the function call stopping sequence to the api_kwargs
+    api_kwargs = merge(api_kwargs, (; tools, tool_choice))
+
+    ## We provide the tool description to the rendering engine
+    conv_rendered = render(
+        prompt_schema, prompt; tools, conversation, no_system_message, cache, kwargs...)
+
+    if !dry_run
+        time = @elapsed resp = anthropic_api(
+            prompt_schema, conv_rendered.conversation; api_key,
+            conv_rendered.system, endpoint = "messages", model = model_id, cache, http_kwargs, betas,
+            api_kwargs...)
+        tokens_prompt = get(resp.response[:usage], :input_tokens, 0)
+        tokens_completion = get(resp.response[:usage], :output_tokens, 0)
+        finish_reason = get(resp.response, :stop_reason, nothing)
+        content = if finish_reason == "tool_use"
+            tool_array = [parse_tool(tool_map[tool_use[:name]], tool_use[:input])
+                          for tool_use in resp.response[:content]
+                          if tool_use[:type] == "tool_use"]
+            ## If a single tool was used, return it directly
+            length(tool_array) == 1 ? only(tool_array) : tool_array
+        else
+            ## fallback, return text
+            @warn "No tool_use found in the response. Returning the raw text instead."
+            mapreduce(x -> get(x, :text, ""), *, resp.response[:content]) |> strip
+        end
+        ## Build metadata
+        extras = Dict{Symbol, Any}()
+        haskey(resp.response[:usage], :cache_creation_input_tokens) &&
+            (extras[:cache_creation_input_tokens] = resp.response[:usage][:cache_creation_input_tokens])
+        haskey(resp.response[:usage], :cache_read_input_tokens) &&
+            (extras[:cache_read_input_tokens] = resp.response[:usage][:cache_read_input_tokens])
+        ## Build data message
+        msg = DataMessage(; content,
+            status = Int(resp.status),
+            cost = call_cost(tokens_prompt, tokens_completion, model_id),
+            finish_reason,
+            tokens = (tokens_prompt, tokens_completion),
+            elapsed = time,
+            extras)
+
+        ## Reporting
+        verbose && @info _report_stats(msg, model_id)
+    else
+        msg = nothing
+    end
+    ## Select what to return
+    output = finalize_outputs(prompt,
+        conv_rendered,
+        msg;
+        conversation,
+        return_all,
+        dry_run,
+        kwargs...)
+
+    return output
+end
+
+"""
+    aitools(prompt_schema::AbstractAnthropicSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+        tools::Union{Type, Function, Method, AbstractTool, Vector} = Tool[],
+        verbose::Bool = true,
+        api_key::String = ANTHROPIC_API_KEY,
+        model::String = MODEL_CHAT,
+        return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
+        cache::Union{Nothing, Symbol} = nothing,
+        betas::Union{Nothing, Vector{Symbol}} = nothing,
+        http_kwargs::NamedTuple = (retry_non_idempotent = true,
+            retries = 5,
+            readtimeout = 120), api_kwargs::NamedTuple = (;
+            tool_choice = nothing),
+        kwargs...)
+
+Calls chat completion API with an optional tool call signature. It can receive both `tools` and standard string-based content.
+Ideal for agentic workflows with more complex cognitive architectures.
+
+Difference to `aigenerate`: Response can be a tool call (structured)
+
+Differences to `aiextract`: Can provide infinitely many tools (including Functions!) and then respond with the tool call's output.
+
+# Arguments
+- `prompt_schema`: An optional object to specify which prompt template should be applied (Default to `PROMPT_SCHEMA = OpenAISchema`)
+- `prompt`: Can be a string representing the prompt for the AI conversation, a `UserMessage`, a vector of `AbstractMessage` or an `AITemplate`
+- `tools`: A vector of tools to be used in the conversation. Can be a vector of types, instances of `AbstractTool`, or a mix of both.
+- `verbose`: A boolean indicating whether to print additional information.
+- `api_key`: A string representing the API key for accessing the Anthropic API.
+- `model`: A string representing the model to use for generating the response. Can be an alias corresponding to a model ID defined in `MODEL_CHAT`.
+- `return_all`: If `true`, returns the entire conversation history, otherwise returns only the last message (the `AIMessage`).
+- `dry_run`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`).
+- `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history.
+- `no_system_message::Bool = false`: Whether to exclude the system message from the conversation history.
+- `cache::Union{Nothing, Symbol} = nothing`: Whether to cache the prompt. Defaults to `nothing`.
+- `betas::Union{Nothing, Vector{Symbol}} = nothing`: A vector of symbols representing the beta features to be used. See `?anthropic_extra_headers` for details.
+- `http_kwargs`: A named tuple of HTTP keyword arguments.
+- `api_kwargs`: A named tuple of API keyword arguments. Several important arguments are highlighted below:
+    - `tool_choice`: The choice of tool mode. Can be "auto", "exact", or can depend on the provided.. Defaults to `nothing`, which translates to "auto".
+
+# Example
+
+```julia
+## Let's define a tool
+get_weather(location, date) = "The weather in \$location on \$date is 70 degrees."
+
+msg = aitools("What's the weather in Tokyo on May 3rd, 2023?";
+    tools = get_weather, model = "claudeh")
+PT.execute_tool(get_weather, msg.tool_calls[1].args)
+# "The weather in Tokyo on 2023-05-03 is 70 degrees."
+
+# Ignores the tool
+msg = aitools("What's your name?";
+    tools = get_weather, model = "claudeh")
+# I don't have a personal name, but you can call me your AI assistant!
+```
+
+How to have a multi-turn conversation with tools:
+```julia
+conv = aitools("What's the weather in Tokyo on May 3rd, 2023?";
+    tools = get_weather, return_all = true, model = "claudeh")
+
+tool_msg = conv[end].tool_calls[1] # there can be multiple tool calls requested!!
+
+# Execute the output to the tool message content
+tool_msg.content = PT.execute_tool(get_weather, tool_msg.args)
+
+# Add the tool message to the conversation
+push!(conv, tool_msg)
+
+# Call LLM again with the updated conversation
+conv = aitools(
+    "And in New York?"; tools = get_weather, return_all = true, conversation = conv, model = "claudeh")
+# 6-element Vector{AbstractMessage}:
+# SystemMessage("Act as a helpful AI assistant")
+# UserMessage("What's the weather in Tokyo on May 3rd, 2023?")
+# AIToolRequest("-"; Tool Requests: 1)
+# ToolMessage("The weather in Tokyo on 2023-05-03 is 70 degrees.")
+# UserMessage("And in New York?")
+# AIToolRequest("-"; Tool Requests: 1)
+```
+
+Using the the new Computer Use beta feature:
+```julia
+# Define tools (and associated functions to call)
+tool_map = Dict("bash" => PT.ToolRef(; ref=:bash, callable=bash_tool),
+    "computer" => PT.ToolRef(; ref=:computer, callable=computer_tool,
+        extras=Dict("display_width_px" => 1920, "display_height_px" => 1080)),
+    "str_replace_editor" => PT.ToolRef(; ref=:str_replace_editor, callable=edit_tool))
+
+msg = aitools(prompt; tools=collect(values(tool_map)), model="claude", betas=[:computer_use])
+
+PT.pprint(msg)
+# --------------------
+# AI Tool Request
+# --------------------
+# Tool Request: computer, args: Dict{Symbol, Any}(:action => "screenshot")
+```
+"""
+function aitools(prompt_schema::AbstractAnthropicSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tools::Union{Type, Function, Method, AbstractTool, Vector} = Tool[],
+        verbose::Bool = true,
+        api_key::String = ANTHROPIC_API_KEY,
+        model::String = MODEL_CHAT,
+        return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
+        cache::Union{Nothing, Symbol} = nothing,
+        betas::Union{Nothing, Vector{Symbol}} = nothing,
+        http_kwargs::NamedTuple = (retry_non_idempotent = true,
+            retries = 5,
+            readtimeout = 120), api_kwargs::NamedTuple = (;
+            tool_choice = nothing),
+        kwargs...)
+    global MODEL_ALIASES
+    @assert (isnothing(cache)||cache in [:system, :tools, :last, :all]) "Currently only `:system`, `:tools`, `:last` and `:all` are supported for Anthropic Prompt Caching"
+
+    ## Find the unique ID for the model alias provided
+    model_id = get(MODEL_ALIASES, model, model)
+
+    ## Tools definition
+    tool_map = tool_call_signature(
+        tools; max_description_length = 100)
+    tools = render(prompt_schema, tool_map)
+    ## force our function to be used
+    tool_choice_ = get(api_kwargs, :tool_choice, nothing)
+    tool_choice = if tool_choice_ == "exact"
+        Dict(:type => "tool", :name => only(tools)[:name])
+    elseif isnothing(tool_choice_)
+        Dict(:type => "auto")
+    else
+        # User provided value, eg, "auto", "any"
+        Dict(:type => tool_choice_)
+    end
+    ## update tools to use caching
+    (cache == :tools || cache == :all) && length(tools) > 0 &&
+        (tools[end][:cache_control] = Dict("type" => "ephemeral"))
+
+    ## Add the function call stopping sequence to the api_kwargs
+    api_kwargs = merge(api_kwargs, (; tools, tool_choice))
+
+    ## We provide the tool description to the rendering engine
+    conv_rendered = render(
+        prompt_schema, prompt; tools, conversation, no_system_message, cache, kwargs...)
+
+    if !dry_run
+        time = @elapsed resp = anthropic_api(
+            prompt_schema, conv_rendered.conversation; api_key,
+            conv_rendered.system, endpoint = "messages", model = model_id, cache, http_kwargs, betas,
+            api_kwargs...)
+        tokens_prompt = get(resp.response[:usage], :input_tokens, 0)
+        tokens_completion = get(resp.response[:usage], :output_tokens, 0)
+        finish_reason = get(resp.response, :stop_reason, nothing)
+        content_str = mapreduce(x -> get(x, :text, ""), *,
+            filter(x -> x[:type] != "tool_use", resp.response[:content]), init = "") |>
+                      strip
+        tools_array = [ToolMessage(;
+                           content = nothing, tool_call_id = tool_call[:id],
+                           raw = JSON3.write(tool_call[:input]),
+                           args = tool_call[:input], name = tool_call[:name])
+                       for tool_call in resp.response[:content]
+                       if tool_call[:type] == "tool_use"]
+        if finish_reason == "tool_use"
+            content = nothing
+        else
+            content = content_str
+        end
+        ## Build metadata
+        extras = Dict{Symbol, Any}()
+        haskey(resp.response[:usage], :cache_creation_input_tokens) &&
+            (extras[:cache_creation_input_tokens] = resp.response[:usage][:cache_creation_input_tokens])
+        haskey(resp.response[:usage], :cache_read_input_tokens) &&
+            (extras[:cache_read_input_tokens] = resp.response[:usage][:cache_read_input_tokens])
+        length(tools_array) > 0 && (extras[:tool_calls] = tools_array)
+        extras[:content] = content_str
+        ## Build  message
+        msg = AIToolRequest(;
+            content,
+            tool_calls = tools_array,
+            status = Int(resp.status),
+            cost = call_cost(tokens_prompt, tokens_completion, model_id),
+            finish_reason,
+            tokens = (tokens_prompt,
+                tokens_completion),
+            elapsed = time,
+            extras)
+
+        ## Reporting
+        verbose && @info _report_stats(msg, model_id)
+    else
+        msg = nothing
+    end
+    ## Select what to return
+    output = finalize_outputs(prompt,
+        conv_rendered,
+        msg;
+        conversation,
+        return_all,
+        dry_run,
+        kwargs...)
+
+    return output
+end
+function aiembed(prompt_schema::AbstractAnthropicSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Anthropic schema does not yet support aiembed. Please use OpenAISchema instead.")
+end
+function aiclassify(prompt_schema::AbstractAnthropicSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Anthropic schema does not yet support aiclassify. Please use OpenAISchema instead.")
+end
+function aiscan(prompt_schema::AbstractAnthropicSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Anthropic schema does not yet support aiscan. Please use OpenAISchema instead.")
+end
+function aiimage(prompt_schema::AbstractAnthropicSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Anthropic schema does not yet support aiimage. Please use OpenAISchema instead.")
+end
diff --git a/src/llm_google.jl b/src/llm_google.jl
index 078e1ab2d..64e3568ea 100644
--- a/src/llm_google.jl
+++ b/src/llm_google.jl
@@ -1,52 +1,56 @@
 ## Rendering of converation history for the OpenAI API
+## No system message, we need to merge with UserMessage, see below
+function role4render(schema::AbstractGoogleSchema, msg::SystemMessage)
+    "user"
+end
+function role4render(schema::AbstractGoogleSchema, msg::AIMessage)
+    "model"
+end
 """
     render(schema::AbstractGoogleSchema,
         messages::Vector{<:AbstractMessage};
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
         kwargs...)
 
 Builds a history of the conversation to provide the prompt to the API. All unspecified kwargs are passed as replacements such that `{{key}}=>value` in the template.
 
 # Keyword Arguments
 - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector.
-
+- `no_system_message::Bool=false`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message.
 """
 function render(schema::AbstractGoogleSchema,
         messages::Vector{<:AbstractMessage};
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
         kwargs...)
     ##
     ## First pass: keep the message types but make the replacements provided in `kwargs`
-    messages_replaced = render(NoSchema(), messages; conversation, kwargs...)
+    messages_replaced = render(
+        NoSchema(), messages; conversation, no_system_message, kwargs...)
 
     ## Second pass: convert to the OpenAI schema
-    conversation = Dict{String, Any}[]
+    conversation = Dict{Symbol, Any}[]
 
     # replace any handlebar variables in the messages
     for msg in messages_replaced
-        role = if msg isa SystemMessage
-            ## No system message, we need to merge with UserMessage, see below
-            "user"
-        elseif msg isa UserMessage
-            "user"
-        elseif msg isa AIMessage
-            "model"
-        end
-        push!(conversation, Dict("role" => role, "parts" => [Dict("text" => msg.content)]))
+        push!(conversation,
+            Dict(
+                :role => role4render(schema, msg), :parts => [Dict("text" => msg.content)]))
     end
     ## Merge any subsequent UserMessages
-    merged_conversation = Dict{String, Any}[]
+    merged_conversation = Dict{Symbol, Any}[]
     # run n-1 times, look at the current item and the next one
     i = 1
     while i <= (length(conversation) - 1)
         next_i = i + 1
-        if conversation[i]["role"] == "user" && conversation[next_i]["role"] == "user"
+        if conversation[i][:role] == "user" && conversation[next_i][:role] == "user"
             ## Concat the user messages to together, put two newlines
-            txt1 = conversation[i]["parts"][1]["text"]
-            txt2 = conversation[next_i]["parts"][1]["text"]
+            txt1 = conversation[i][:parts][1]["text"]
+            txt2 = conversation[next_i][:parts][1]["text"]
             merged_text = isempty(txt1) || isempty(txt2) ? txt1 * txt2 :
                           txt1 * "\n\n" * txt2
-            new_msg = Dict("role" => "user", "parts" => [Dict("text" => merged_text)])
+            new_msg = Dict(:role => "user", :parts => [Dict("text" => merged_text)])
             push!(merged_conversation, new_msg)
             i += 2
         else
@@ -77,6 +81,8 @@ end
         verbose::Bool = true,
         api_key::String = GOOGLE_API_KEY,
         model::String = "gemini-pro", return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
         http_kwargs::NamedTuple = (retry_non_idempotent = true,
             retries = 5,
             readtimeout = 120), api_kwargs::NamedTuple = NamedTuple(),
@@ -97,6 +103,7 @@ Note:
 - `return_all::Bool=false`: If `true`, returns the entire conversation history, otherwise returns only the last message (the `AIMessage`).
 - `dry_run::Bool=false`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`).
 - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector.
+- `no_system_message::Bool=false`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message.
 - `http_kwargs`: A named tuple of HTTP keyword arguments.
 - `api_kwargs`: A named tuple of API keyword arguments.
 - `kwargs`: Prompt variables to be used to fill the prompt/template
@@ -150,6 +157,7 @@ function aigenerate(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_
         api_key::String = GOOGLE_API_KEY,
         model::String = "gemini-pro", return_all::Bool = false, dry_run::Bool = false,
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
         http_kwargs::NamedTuple = (retry_non_idempotent = true,
             retries = 5,
             readtimeout = 120), api_kwargs::NamedTuple = NamedTuple(),
@@ -165,7 +173,8 @@ function aigenerate(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_
 
     ## Find the unique ID for the model alias provided
     model_id = get(MODEL_ALIASES, model, model)
-    conv_rendered = render(prompt_schema, prompt; conversation, kwargs...)
+    conv_rendered = render(
+        prompt_schema, prompt; conversation, no_system_message, kwargs...)
 
     if !dry_run
         time = @elapsed r = ggi_generate_content(prompt_schema, api_key,
@@ -178,7 +187,7 @@ function aigenerate(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_
         output_token_estimate = length(r.text)
         msg = AIMessage(;
             content = r.text |> strip,
-            status = 200,
+            status = convert(Int, r.response_status),
             ## for google it's CHARACTERS, not tokens
             tokens = (input_token_estimate, output_token_estimate),
             elapsed = time)
@@ -194,7 +203,33 @@ function aigenerate(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_
         conversation,
         return_all,
         dry_run,
+        no_system_message,
         kwargs...)
 
     return output
 end
+
+function aitools(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Google schema does not yet support aitools. Please use OpenAISchema instead.")
+end
+function aiembed(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Google schema does not yet support aiembed. Please use OpenAISchema instead.")
+end
+function aiclassify(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Google schema does not yet support aiclassify. Please use OpenAISchema instead.")
+end
+function aiextract(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Google schema does not yet support aiextract. Please use OpenAISchema instead.")
+end
+function aiscan(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Google schema does not yet support aiscan. Please use OpenAISchema instead.")
+end
+function aiimage(prompt_schema::AbstractGoogleSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Google schema does not yet support aiimage. Please use OpenAISchema instead.")
+end
diff --git a/src/llm_interface.jl b/src/llm_interface.jl
index 5fd913389..7e034b07d 100644
--- a/src/llm_interface.jl
+++ b/src/llm_interface.jl
@@ -7,12 +7,15 @@
 # Ideally, each new interface would be defined in a separate `llm_<interface>.jl` file (eg, `llm_chatml.jl`).
 
 ## Main Functions
+function role4render end
 function render end
 function aigenerate end
 function aiembed end
 function aiclassify end
 function aiextract end
 function aiscan end
+function aiimage end
+function aitools end
 # Re-usable blocks are defined in src/llm_shared.jl
 
 ## Prompt Schema
@@ -146,6 +149,17 @@ Requires two environment variables to be set:
 """
 struct DatabricksOpenAISchema <: AbstractOpenAISchema end
 
+"""
+   AzureOpenAISchema
+
+AzureOpenAISchema() allows user to call Azure OpenAI API. [API Reference](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference)
+
+Requires two environment variables to be set:
+- `AZURE_OPENAI_API_KEY`: Azure token
+- `AZURE_OPENAI_HOST`: Address of the Azure resource (`"https://<resource>.openai.azure.com"`)
+"""
+struct AzureOpenAISchema <: AbstractOpenAISchema end
+
 """
     FireworksOpenAISchema
 
@@ -176,6 +190,90 @@ Requires one environment variables to be set:
 """
 struct TogetherOpenAISchema <: AbstractOpenAISchema end
 
+"""
+    GroqOpenAISchema
+
+Schema to call the [groq.com](https://console.groq.com/keys) API.
+
+Links:
+- [Get your API key](https://console.groq.com/keys)
+- [API Reference](https://console.groq.com/docs/quickstart)
+- [Available models](https://console.groq.com/docs/models)
+
+Requires one environment variables to be set:
+- `GROQ_API_KEY`: Your API key (often starts with "gsk_...")
+"""
+struct GroqOpenAISchema <: AbstractOpenAISchema end
+
+"""
+    DeepSeekOpenAISchema
+
+Schema to call the [DeepSeek](https://platform.deepseek.com/docs) API.
+
+Links:
+- [Get your API key](https://platform.deepseek.com/api_keys)
+- [API Reference](https://platform.deepseek.com/docs)
+
+Requires one environment variables to be set:
+- `DEEPSEEK_API_KEY`: Your API key (often starts with "sk-...")
+"""
+struct DeepSeekOpenAISchema <: AbstractOpenAISchema end
+
+"""
+    OpenRouterOpenAISchema
+
+Schema to call the [OpenRouter](https://openrouter.ai/) API.
+
+Links:
+- [Get your API key](https://openrouter.ai/keys)
+- [API Reference](https://openrouter.ai/docs)
+- [Available models](https://openrouter.ai/models)
+
+Requires one environment variable to be set:
+- `OPENROUTER_API_KEY`: Your API key
+"""
+struct OpenRouterOpenAISchema <: AbstractOpenAISchema end
+
+"""
+    CerebrasOpenAISchema
+
+Schema to call the [Cerebras](https://cerebras.ai/) API.
+
+Links:
+- [Get your API key](https://cloud.cerebras.ai)
+- [API Reference](https://inference-docs.cerebras.ai/api-reference/chat-completions)
+
+Requires one environment variable to be set:
+- `CEREBRAS_API_KEY`: Your API key
+"""
+struct CerebrasOpenAISchema <: AbstractOpenAISchema end
+
+"""
+    SambaNovaOpenAISchema
+
+Schema to call the [SambaNova](https://sambanova.ai/) API.
+
+Links:
+- [Get your API key](https://cloud.sambanova.ai/apis)
+- [API Reference](https://community.sambanova.ai/c/docs)
+
+Requires one environment variable to be set:
+- `SAMBANOVA_API_KEY`: Your API key
+"""
+struct SambaNovaOpenAISchema <: AbstractOpenAISchema end
+
+"""
+    XAIOpenAISchema
+
+Schema to call the XAI API. It follows OpenAI API conventions.
+
+Get your API key from [here](https://console.x.ai/).
+
+Requires one environment variable to be set:
+- `XAI_API_KEY`: Your API key
+"""
+struct XAIOpenAISchema <: AbstractOpenAISchema end
+
 abstract type AbstractOllamaSchema <: AbstractPromptSchema end
 
 """
@@ -245,11 +343,123 @@ struct GoogleSchema <: AbstractGoogleSchema end
 "Echoes the user's input back to them. Used for testing the implementation"
 @kwdef mutable struct TestEchoGoogleSchema <: AbstractGoogleSchema
     text::Any
+    response_status::Integer
+    model_id::String = ""
+    inputs::Any = nothing
+end
+
+abstract type AbstractAnthropicSchema <: AbstractPromptSchema end
+
+"""
+    AnthropicSchema <: AbstractAnthropicSchema
+
+AnthropicSchema is the default schema for Anthropic API models (eg, Claude). See more information [here](https://docs.anthropic.com/claude/reference/getting-started-with-the-api).
+
+It uses the following conversation template:
+```
+Dict(role="user",content="..."),Dict(role="assistant",content="...")]
+```
+`system` messages are provided as a keyword argument to the API call.
+
+It's recommended to separate sections in your prompt with XML markup (e.g. `<document>\n{{document}}\n</document>`). See [here](https://docs.anthropic.com/claude/docs/use-xml-tags).
+"""
+struct AnthropicSchema <: AbstractAnthropicSchema end
+
+"Echoes the user's input back to them. Used for testing the implementation"
+@kwdef mutable struct TestEchoAnthropicSchema <: AbstractAnthropicSchema
+    response::AbstractDict
     status::Integer
     model_id::String = ""
     inputs::Any = nothing
 end
 
+abstract type AbstractShareGPTSchema <: AbstractPromptSchema end
+
+"""
+    ShareGPTSchema <: AbstractShareGPTSchema
+
+Frequently used schema for finetuning LLMs. Conversations are recorded as a vector of dicts with keys `from` and `value` (similar to OpenAI).
+"""
+struct ShareGPTSchema <: AbstractShareGPTSchema end
+
+abstract type AbstractTracerSchema <: AbstractPromptSchema end
+
+"""
+    TracerSchema <: AbstractTracerSchema
+
+A schema designed to wrap another schema, enabling pre- and post-execution callbacks for tracing and additional functionalities. This type is specifically utilized within the `TracerMessage` type to trace the execution flow, facilitating observability and debugging in complex conversational AI systems.
+
+The `TracerSchema` acts as a middleware, allowing developers to insert custom logic before and after the execution of the primary schema's functionality. This can include logging, performance measurement, or any other form of tracing required to understand or improve the execution flow.
+
+`TracerSchema` automatically wraps messages in `TracerMessage` type, which has several important fields, eg,
+- `object`: the original message - unwrap with utility `unwrap`
+- `meta`: a dictionary with metadata about the tracing process (eg, prompt templates, LLM API kwargs) - extract with utility `meta`
+- `parent_id`: an identifier for the overall job / high-level conversation with the user where the current conversation `thread` originated. It should be the same for objects in the same thread.
+- `thread_id`: an identifier for the current thread or execution context (sub-task, sub-process, CURRENT CONVERSATION or vector of messages) within the broader parent task. It should be the same for objects in the same thread.
+
+See also: `meta`, `unwrap`, `SaverSchema`, `initialize_tracer`, `finalize_tracer`
+
+# Example
+```julia
+wrap_schema = TracerSchema(OpenAISchema())
+msg = aigenerate(wrap_schema, "Say hi!"; model="gpt-4")
+# output type should be TracerMessage
+msg isa TracerMessage
+```
+You can define your own tracer schema and the corresponding methods: `initialize_tracer`, `finalize_tracer`. See `src/llm_tracer.jl`
+"""
+struct TracerSchema <: AbstractTracerSchema
+    schema::AbstractPromptSchema
+end
+
+"""
+    SaverSchema <: AbstractTracerSchema
+
+SaverSchema is a schema that automatically saves the conversation to the disk. 
+It's useful for debugging and for persistent logging.
+
+It can be composed with any other schema, eg, `TracerSchema` to save additional metadata.
+
+Set environment variable `LOG_DIR` to the directory where you want to save the conversation (see `?PREFERENCES`).
+Conversations are named by the hash of the first message in the conversation to naturally group subsequent conversations together.
+
+If you need to provide logging directory of the file name dynamically, you can provide the following arguments to `tracer_kwargs`:
+- `log_dir` - used as the directory to save the log into when provided. Defaults to `LOG_DIR` if not provided.
+- `log_file_path` - used as the file name to save the log into when provided. This value overrules the `log_dir` and `LOG_DIR` if provided.
+
+To use it automatically, re-register the models you use with the schema wrapped in `SaverSchema`
+
+See also: `meta`, `unwrap`, `TracerSchema`, `initialize_tracer`, `finalize_tracer`
+
+# Example
+```julia
+using PromptingTools: TracerSchema, OpenAISchema, SaverSchema
+# This schema will first trace the metadata (change to TraceMessage) and then save the conversation to the disk
+
+wrap_schema = OpenAISchema() |> TracerSchema |> SaverSchema
+conv = aigenerate(wrap_schema,:BlankSystemUser; system="You're a French-speaking assistant!",
+    user="Say hi!", model="gpt-4", api_kwargs=(;temperature=0.1), return_all=true)
+
+# conv is a vector of messages that will be saved to a JSON together with metadata about the template and api_kwargs
+```
+
+If you wanted to enable this automatically for models you use, you can do it like this:
+```julia
+PT.register_model!(; name= "gpt-3.5-turbo", schema=OpenAISchema() |> TracerSchema |> SaverSchema)
+```
+Any subsequent calls `model="gpt-3.5-turbo"` will automatically capture metadata and save the conversation to the disk.
+
+To provide logging file path explicitly, use the `tracer_kwargs`:
+```julia
+conv = aigenerate(wrap_schema,:BlankSystemUser; system="You're a French-speaking assistant!",
+    user="Say hi!", model="gpt-4", api_kwargs=(;temperature=0.1), return_all=true,
+    tracer_kwargs=(; log_file_path="my_logs/my_log.json"))
+```
+"""
+struct SaverSchema <: AbstractTracerSchema
+    schema::AbstractPromptSchema
+end
+
 ## Dispatch into a default schema (can be set by Preferences.jl)
 # Since we load it as strings, we need to convert it to a symbol and instantiate it
 global PROMPT_SCHEMA::AbstractPromptSchema = @load_preference("PROMPT_SCHEMA",
@@ -276,10 +486,18 @@ function aiextract(prompt; model = MODEL_CHAT, kwargs...)
     schema = get(MODEL_REGISTRY, model, (; schema = PROMPT_SCHEMA)).schema
     aiextract(schema, prompt; model, kwargs...)
 end
+function aitools(prompt; model = MODEL_CHAT, kwargs...)
+    schema = get(MODEL_REGISTRY, model, (; schema = PROMPT_SCHEMA)).schema
+    aitools(schema, prompt; model, kwargs...)
+end
 function aiscan(prompt; model = MODEL_CHAT, kwargs...)
     schema = get(MODEL_REGISTRY, model, (; schema = PROMPT_SCHEMA)).schema
     aiscan(schema, prompt; model, kwargs...)
 end
+function aiimage(prompt; model = MODEL_IMAGE_GENERATION, kwargs...)
+    schema = get(MODEL_REGISTRY, model, (; schema = PROMPT_SCHEMA)).schema
+    aiimage(schema, prompt; model, kwargs...)
+end
 
 "Utility to facilitate unwrapping of HTTP response to a message type `MSG` provided. Designed to handle multi-sample completions."
 function response_to_message(schema::AbstractPromptSchema,
@@ -290,6 +508,13 @@ function response_to_message(schema::AbstractPromptSchema,
         model_id::AbstractString = "",
         time::Float64 = 0.0,
         run_id::Integer = rand(Int16),
-        sample_id::Union{Nothing, Integer} = nothing) where {T}
+        sample_id::Union{Nothing, Integer} = nothing, kwargs...) where {T}
     throw(ArgumentError("Response unwrapping not implemented for $(typeof(schema)) and $MSG"))
 end
+
+### For structured extraction
+# We can generate fields, they will all share this parent type
+abstract type AbstractExtractedData end
+Base.show(io::IO, x::AbstractExtractedData) = dump(io, x; maxdepth = 1)
+"Check if the object is an instance of `AbstractExtractedData`"
+isextracted(x) = x isa AbstractExtractedData
\ No newline at end of file
diff --git a/src/llm_ollama.jl b/src/llm_ollama.jl
index ae9b95a86..07166944b 100644
--- a/src/llm_ollama.jl
+++ b/src/llm_ollama.jl
@@ -7,39 +7,37 @@
 ## Schema dedicated to [Ollama's models](https://ollama.ai/), which also managed the prompt templates
 #
 ## Rendering of converation history for the Ollama API (similar to OpenAI but not for the images)
+
 """
     render(schema::AbstractOllamaSchema,
         messages::Vector{<:AbstractMessage};
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
         kwargs...)
 
 Builds a history of the conversation to provide the prompt to the API. All unspecified kwargs are passed as replacements such that `{{key}}=>value` in the template.
 
 # Keyword Arguments
 - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector.
-
+- `no_system_message`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message.
 """
 function render(schema::AbstractOllamaSchema,
         messages::Vector{<:AbstractMessage};
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
         kwargs...)
     ##
     ## First pass: keep the message types but make the replacements provided in `kwargs`
-    messages_replaced = render(NoSchema(), messages; conversation, kwargs...)
+    messages_replaced = render(
+        NoSchema(), messages; conversation, no_system_message, kwargs...)
 
     ## Second pass: convert to the OpenAI schema
     conversation = Dict{String, Any}[]
 
     # replace any handlebar variables in the messages
     for msg in messages_replaced
-        role = if msg isa SystemMessage
-            "system"
-        elseif msg isa UserMessage || msg isa UserMessageWithImages
-            "user"
-        elseif msg isa AIMessage
-            "assistant"
-        end
-        new_message = Dict{String, Any}("role" => role, "content" => msg.content)
+        new_message = Dict{String, Any}(
+            "role" => role4render(schema, msg), "content" => msg.content)
         ## Special case for images
         if msg isa UserMessageWithImages
             new_message["images"] = msg.image_url
@@ -69,6 +67,7 @@ end
         api_key::String = "", model::String = MODEL_CHAT,
         return_all::Bool = false, dry_run::Bool = false,
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        streamcallback::Any = nothing,
         http_kwargs::NamedTuple = NamedTuple(), api_kwargs::NamedTuple = NamedTuple(),
         kwargs...)
 
@@ -83,6 +82,7 @@ Generate an AI response based on a given prompt using the OpenAI API.
 - `return_all::Bool=false`: If `true`, returns the entire conversation history, otherwise returns only the last message (the `AIMessage`).
 - `dry_run::Bool=false`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`).
 - `conversation::AbstractVector{<:AbstractMessage}=[]`: Not allowed for this schema. Provided only for compatibility.
+- `streamcallback`: A callback function to handle streaming responses. Can be simply `stdout` or a `StreamCallback` object. See `?StreamCallback` for details.
 - `http_kwargs::NamedTuple`: Additional keyword arguments for the HTTP request. Defaults to empty `NamedTuple`.
 - `api_kwargs::NamedTuple`: Additional keyword arguments for the Ollama API. Defaults to an empty `NamedTuple`.
 - `kwargs`: Prompt variables to be used to fill the prompt/template
@@ -98,7 +98,7 @@ See also: `ai_str`, `aai_str`, `aiembed`
 Simple hello world to test the API:
 ```julia
 const PT = PromptingTools
-schema = PT.OllamaManagedSchema() # We need to explicit if we want Ollama, OpenAISchema is the default
+schema = PT.OllamaSchema() # We need to explicit if we want Ollama, OpenAISchema is the default
 
 msg = aigenerate(schema, "Say hi!"; model="openhermes2.5-mistral")
 # [ Info: Tokens: 69 in 0.9 seconds
@@ -117,7 +117,7 @@ ___
 You can use string interpolation:
 ```julia
 const PT = PromptingTools
-schema = PT.OllamaManagedSchema()
+schema = PT.OllamaSchema()
 a = 1
 msg=aigenerate(schema, "What is `\$a+\$a`?"; model="openhermes2.5-mistral")
 msg.content # "The result of `1+1` is `2`."
@@ -126,7 +126,7 @@ ___
 You can provide the whole conversation or more intricate prompts as a `Vector{AbstractMessage}`:
 ```julia
 const PT = PromptingTools
-schema = PT.OllamaManagedSchema()
+schema = PT.OllamaSchema()
 
 conversation = [
     PT.SystemMessage("You're master Yoda from Star Wars trying to help the user become a Yedi."),
@@ -137,7 +137,23 @@ msg = aigenerate(schema, conversation; model="openhermes2.5-mistral")
 # AIMessage("Strong the attachment is, it leads to suffering it may. Focus on the force within you must, ...<continues>")
 ```
 
-Note: Managed Ollama currently supports at most 1 User Message and 1 System Message given the API limitations. If you want more, you need to use the `ChatMLSchema`.
+To add streaming, use the `streamcallback` argument.
+```julia
+msg = aigenerate("Count from 1 to 10."; streamcallback = stdout)
+```
+
+Or if you prefer to have more control, use a `StreamCallback` object. 
+```julia
+streamcallback = PT.StreamCallback()
+msg = aigenerate("Count from 1 to 10."; streamcallback)
+```
+
+WARNING: If you provide a `StreamCallback` object with a `flavor`, we assume you want to configure everything yourself, so you need to make sure to set `stream = true` in the `api_kwargs`!
+```julia
+streamcallback = PT.StreamCallback(; flavor = PT.OllamaStream())
+msg = aigenerate("Count from 1 to 10."; streamcallback, api_kwargs = (; stream = true))
+```
+
 """
 function aigenerate(prompt_schema::AbstractOllamaSchema, prompt::ALLOWED_PROMPT_TYPE;
         verbose::Bool = true,
@@ -145,18 +161,21 @@ function aigenerate(prompt_schema::AbstractOllamaSchema, prompt::ALLOWED_PROMPT_
         model::String = MODEL_CHAT,
         return_all::Bool = false, dry_run::Bool = false,
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
+        streamcallback::Any = nothing,
         http_kwargs::NamedTuple = NamedTuple(), api_kwargs::NamedTuple = NamedTuple(),
         kwargs...)
     ##
     global MODEL_ALIASES
     ## Find the unique ID for the model alias provided
     model_id = get(MODEL_ALIASES, model, model)
-    conv_rendered = render(prompt_schema, prompt; conversation, kwargs...)
+    conv_rendered = render(
+        prompt_schema, prompt; conversation, no_system_message, kwargs...)
 
     if !dry_run
         time = @elapsed resp = ollama_api(prompt_schema, nothing;
             system = nothing, messages = conv_rendered, endpoint = "chat", model = model_id,
-            http_kwargs,
+            http_kwargs, streamcallback,
             api_kwargs...)
 
         tokens_prompt = get(resp.response, :prompt_eval_count, 0)
@@ -181,6 +200,7 @@ function aigenerate(prompt_schema::AbstractOllamaSchema, prompt::ALLOWED_PROMPT_
         conversation,
         return_all,
         dry_run,
+        no_system_message,
         kwargs...)
     return output
 end
@@ -353,3 +373,7 @@ function aiextract(prompt_schema::AbstractOllamaSchema, prompt::ALLOWED_PROMPT_T
         kwargs...)
     error("Managed schema does not support aiextract. Please use OpenAISchema instead.")
 end
+function aitools(prompt_schema::AbstractOllamaSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Managed schema does not support aitools. Please use OpenAISchema instead.")
+end
\ No newline at end of file
diff --git a/src/llm_ollama_managed.jl b/src/llm_ollama_managed.jl
index 647a53cec..669a58296 100644
--- a/src/llm_ollama_managed.jl
+++ b/src/llm_ollama_managed.jl
@@ -75,16 +75,19 @@ Simple wrapper for a call to Ollama API.
 - `model`: A string representing the model to use for generating the response. Can be an alias corresponding to a model ID defined in `MODEL_ALIASES`.
 - `http_kwargs::NamedTuple`: Additional keyword arguments for the HTTP request. Defaults to empty `NamedTuple`.
 - `stream`: A boolean indicating whether to stream the response. Defaults to `false`.
+- `streamcallback::Any`: A callback function to handle streaming responses. Can be simply `stdout` or a `StreamCallback` object. See `?StreamCallback` for details.
 - `url`: The URL of the Ollama API. Defaults to "localhost".
 - `port`: The port of the Ollama API. Defaults to 11434.
 - `kwargs`: Prompt variables to be used to fill the prompt/template
 """
-function ollama_api(prompt_schema::Union{AbstractOllamaManagedSchema, AbstractOllamaSchema},
+function ollama_api(
+        prompt_schema::Union{AbstractOllamaManagedSchema, AbstractOllamaSchema},
         prompt::Union{AbstractString, Nothing} = nothing;
         system::Union{Nothing, AbstractString} = nothing,
         messages::Vector{<:AbstractDict{String, <:Any}} = Vector{Dict{String, Any}}(),
         endpoint::String = "generate",
         model::String = "llama2", http_kwargs::NamedTuple = NamedTuple(),
+        streamcallback::Any = nothing,
         stream::Bool = false,
         url::String = "localhost", port::Int = 11434,
         kwargs...)
@@ -102,9 +105,21 @@ function ollama_api(prompt_schema::Union{AbstractOllamaManagedSchema, AbstractOl
     end
     # eg, http://localhost:11434/api/generate
     api_url = string("http://", url, ":", port, "/api/", endpoint)
-    resp = HTTP.post(api_url,
-        [],# no headers
-        JSON3.write(body); http_kwargs...)
+    if !isnothing(streamcallback)
+        ## Note: Works only for OllamaSchema, not OllamaManagedSchema
+        streamcallback, new_kwargs = configure_callback!(
+            streamcallback, prompt_schema; kwargs...)
+        for (k, v) in pairs(new_kwargs)
+            body[string(k)] = v
+        end
+        input = IOBuffer(JSON3.write(body))
+        resp = streamed_request!(
+            streamcallback, api_url, [], input; http_kwargs...)
+    else
+        resp = HTTP.post(api_url,
+            [],# no headers
+            JSON3.write(body); http_kwargs...)
+    end
     body = JSON3.read(resp.body)
     return (; response = body, resp.status)
 end
@@ -126,6 +141,7 @@ end
         api_key::String = "", model::String = MODEL_CHAT,
         return_all::Bool = false, dry_run::Bool = false,
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        streamcallback::Any = nothing,
         http_kwargs::NamedTuple = NamedTuple(), api_kwargs::NamedTuple = NamedTuple(),
         kwargs...)
 
@@ -140,6 +156,7 @@ Generate an AI response based on a given prompt using the OpenAI API.
 - `return_all::Bool=false`: If `true`, returns the entire conversation history, otherwise returns only the last message (the `AIMessage`).
 - `dry_run::Bool=false`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`).
 - `conversation::AbstractVector{<:AbstractMessage}=[]`: Not allowed for this schema. Provided only for compatibility.
+- `streamcallback::Any`: Just for compatibility. Not supported for this schema.
 - `http_kwargs::NamedTuple`: Additional keyword arguments for the HTTP request. Defaults to empty `NamedTuple`.
 - `api_kwargs::NamedTuple`: Additional keyword arguments for the Ollama API. Defaults to an empty `NamedTuple`.
 - `kwargs`: Prompt variables to be used to fill the prompt/template
@@ -196,12 +213,14 @@ msg = aigenerate(schema, conversation; model="openhermes2.5-mistral")
 
 Note: Managed Ollama currently supports at most 1 User Message and 1 System Message given the API limitations. If you want more, you need to use the `ChatMLSchema`.
 """
-function aigenerate(prompt_schema::AbstractOllamaManagedSchema, prompt::ALLOWED_PROMPT_TYPE;
+function aigenerate(
+        prompt_schema::AbstractOllamaManagedSchema, prompt::ALLOWED_PROMPT_TYPE;
         verbose::Bool = true,
         api_key::String = "",
         model::String = MODEL_CHAT,
         return_all::Bool = false, dry_run::Bool = false,
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        streamcallback::Any = nothing,
         http_kwargs::NamedTuple = NamedTuple(), api_kwargs::NamedTuple = NamedTuple(),
         kwargs...)
     ##
@@ -209,11 +228,12 @@ function aigenerate(prompt_schema::AbstractOllamaManagedSchema, prompt::ALLOWED_
     ## Find the unique ID for the model alias provided
     model_id = get(MODEL_ALIASES, model, model)
     conv_rendered = render(prompt_schema, prompt; conversation, kwargs...)
+    @assert isnothing(streamcallback) "streamcallback is not supported for this schema."
 
     if !dry_run
         time = @elapsed resp = ollama_api(prompt_schema, conv_rendered.prompt;
             conv_rendered.system, endpoint = "generate", model = model_id, http_kwargs,
-            api_kwargs...)
+            streamcallback, api_kwargs...)
         tokens_prompt = get(resp.response, :prompt_eval_count, 0)
         tokens_completion = get(resp.response, :eval_count, 0)
         msg = AIMessage(; content = resp.response[:response] |> strip,
@@ -348,12 +368,12 @@ function aiembed(prompt_schema::AbstractOllamaManagedSchema,
     model_id = get(MODEL_ALIASES, model, model)
     ## Send each document individually (no parallelism)
     messages = [aiembed(prompt_schema,
-        doc,
-        postprocess;
-        verbose = false,
-        api_key,
-        model = model_id,
-        kwargs...)
+                    doc,
+                    postprocess;
+                    verbose = false,
+                    api_key,
+                    model = model_id,
+                    kwargs...)
                 for doc in docs]
     ## Aggregate results
     msg = DataMessage(;
@@ -380,3 +400,7 @@ function aiscan(prompt_schema::AbstractManagedSchema, prompt::ALLOWED_PROMPT_TYP
         kwargs...)
     error("Managed schema does not support aiscan. Please use OpenAISchema instead.")
 end
+function aitools(prompt_schema::AbstractManagedSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("Managed schema does not support aitools. Please use OpenAISchema instead.")
+end
diff --git a/src/llm_openai.jl b/src/llm_openai.jl
index cdbd3738b..902d2b515 100644
--- a/src/llm_openai.jl
+++ b/src/llm_openai.jl
@@ -1,9 +1,19 @@
+# OpenAI user-facing functions (mostly)
+#
+# All ai* functions that interface with OpenAI-compatible APIs are defined here
+#
+# For custom schemas/providers, see llm_openai_schema_defs.jl
+#
+#
+
 ## Rendering of converation history for the OpenAI API
 """
     render(schema::AbstractOpenAISchema,
         messages::Vector{<:AbstractMessage};
         image_detail::AbstractString = "auto",
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
+        name_user::Union{Nothing, String} = nothing,
         kwargs...)
 
 Builds a history of the conversation to provide the prompt to the API. All unspecified kwargs are passed as replacements such that `{{key}}=>value` in the template.
@@ -11,32 +21,29 @@ Builds a history of the conversation to provide the prompt to the API. All unspe
 # Keyword Arguments
 - `image_detail`: Only for `UserMessageWithImages`. It represents the level of detail to include for images. Can be `"auto"`, `"high"`, or `"low"`.
 - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector.
-
+- `no_system_message`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message.
+- `name_user`: No-op for consistency.
 """
 function render(schema::AbstractOpenAISchema,
         messages::Vector{<:AbstractMessage};
         image_detail::AbstractString = "auto",
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
+        name_user::Union{Nothing, String} = nothing,
         kwargs...)
     ##
     @assert image_detail in ["auto", "high", "low"] "Image detail must be one of: auto, high, low"
     ## First pass: keep the message types but make the replacements provided in `kwargs`
-    messages_replaced = render(NoSchema(), messages; conversation, kwargs...)
+    messages_replaced = render(
+        NoSchema(), messages; conversation, no_system_message, kwargs...)
 
     ## Second pass: convert to the OpenAI schema
     conversation = Dict{String, Any}[]
 
     # replace any handlebar variables in the messages
     for msg in messages_replaced
-        role = if msg isa SystemMessage
-            "system"
-        elseif msg isa UserMessage || msg isa UserMessageWithImages
-            "user"
-        elseif msg isa AIMessage
-            "assistant"
-        end
         ## Special case for images
-        if msg isa UserMessageWithImages
+        new_msg = if isusermessagewithimages(msg)
             # Build message content
             content = Dict{String, Any}[Dict("type" => "text",
                 "text" => msg.content)]
@@ -47,278 +54,75 @@ function render(schema::AbstractOpenAISchema,
                         "image_url" => Dict("url" => img,
                             "detail" => image_detail)))
             end
+            Dict("role" => role4render(schema, msg), "content" => content)
+        elseif isaitoolrequest(msg)
+            output = Dict{String, Any}(
+                "role" => role4render(schema, msg),
+                "content" => msg.content)
+            if !isempty(msg.tool_calls)
+                output["tool_calls"] = [Dict("id" => tool.tool_call_id,
+                                            "type" => "function",
+                                            "function" => Dict("name" => tool.name,
+                                                "arguments" => tool.raw))
+                                        for tool in msg.tool_calls]
+            end
+            output
+        elseif istoolmessage(msg)
+            content = msg.content isa AbstractString ? msg.content : string(msg.content)
+            Dict("role" => role4render(schema, msg), "content" => content,
+                "tool_call_id" => msg.tool_call_id)
         else
-            content = msg.content
+            ## Vanilla assistant message
+            Dict("role" => role4render(schema, msg),
+                "content" => msg.content)
+        end
+        ## Add name if it exists
+        if hasproperty(msg, :name) && !isnothing(msg.name)
+            new_msg["name"] = msg.name
         end
-        push!(conversation, Dict("role" => role, "content" => content))
+        push!(conversation, new_msg)
     end
 
     return conversation
 end
 
-## OpenAI.jl back-end
-## Types
-# "Providers" are a way to use other APIs that are compatible with OpenAI API specs, eg, Azure and mamy more
-# Define our sub-type to distinguish it from other OpenAI.jl providers
-abstract type AbstractCustomProvider <: OpenAI.AbstractOpenAIProvider end
-Base.@kwdef struct CustomProvider <: AbstractCustomProvider
-    api_key::String = ""
-    base_url::String = "http://localhost:8080"
-    api_version::String = ""
-end
-function OpenAI.build_url(provider::AbstractCustomProvider, api::AbstractString)
-    string(provider.base_url, "/", api)
-end
-function OpenAI.auth_header(provider::AbstractCustomProvider, api_key::AbstractString)
-    OpenAI.auth_header(OpenAI.OpenAIProvider(provider.api_key,
-            provider.base_url,
-            provider.api_version),
-        api_key)
-end
-## Extend OpenAI create_chat to allow for testing/debugging
-# Default passthrough
-function OpenAI.create_chat(schema::AbstractOpenAISchema,
-        api_key::AbstractString,
-        model::AbstractString,
-        conversation;
-        kwargs...)
-    OpenAI.create_chat(api_key, model, conversation; kwargs...)
-end
-
-# Overload for testing/debugging
-function OpenAI.create_chat(schema::TestEchoOpenAISchema, api_key::AbstractString,
-        model::AbstractString,
-        conversation; kwargs...)
-    schema.model_id = model
-    schema.inputs = conversation
-    return schema
-end
-
-"""
-    OpenAI.create_chat(schema::CustomOpenAISchema,
-  api_key::AbstractString,
-  model::AbstractString,
-  conversation;
-  url::String="http://localhost:8080",
-  kwargs...)
-
-Dispatch to the OpenAI.create_chat function, for any OpenAI-compatible API. 
-
-It expects `url` keyword argument. Provide it to the `aigenerate` function via `api_kwargs=(; url="my-url")`
-
-It will forward your query to the "chat/completions" endpoint of the base URL that you provided (=`url`).
-"""
-function OpenAI.create_chat(schema::CustomOpenAISchema,
-        api_key::AbstractString,
-        model::AbstractString,
-        conversation;
-        url::String = "http://localhost:8080",
-        kwargs...)
-    # Build the corresponding provider object
-    # Create chat will automatically pass our data to endpoint `/chat/completions`
-    provider = CustomProvider(; api_key, base_url = url)
-    OpenAI.create_chat(provider, model, conversation; kwargs...)
-end
-
 """
-    OpenAI.create_chat(schema::LocalServerOpenAISchema,
-        api_key::AbstractString,
-        model::AbstractString,
-        conversation;
-        url::String = "http://localhost:8080",
-        kwargs...)
-
-Dispatch to the OpenAI.create_chat function, but with the LocalServer API parameters, ie, defaults to `url` specified by the `LOCAL_SERVER` preference. See `?PREFERENCES`
-
-"""
-function OpenAI.create_chat(schema::LocalServerOpenAISchema,
-        api_key::AbstractString,
-        model::AbstractString,
-        conversation;
-        url::String = LOCAL_SERVER,
+    render(schema::AbstractOpenAISchema,
+        tools::Vector{<:AbstractTool};
+        json_mode::Union{Nothing, Bool} = nothing,
         kwargs...)
-    OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...)
-end
-
-"""
-    OpenAI.create_chat(schema::MistralOpenAISchema,
-  api_key::AbstractString,
-  model::AbstractString,
-  conversation;
-  url::String="https://api.mistral.ai/v1",
-  kwargs...)
-
-Dispatch to the OpenAI.create_chat function, but with the MistralAI API parameters. 
 
-It tries to access the `MISTRALAI_API_KEY` ENV variable, but you can also provide it via the `api_key` keyword argument.
+Renders the tool signatures into the OpenAI format.
 """
-function OpenAI.create_chat(schema::MistralOpenAISchema,
-        api_key::AbstractString,
-        model::AbstractString,
-        conversation;
-        url::String = "https://api.mistral.ai/v1",
-        kwargs...)
-    # Build the corresponding provider object
-    # try to override provided api_key because the default is OpenAI key
-    provider = CustomProvider(;
-        api_key = isempty(MISTRALAI_API_KEY) ? api_key : MISTRALAI_API_KEY,
-        base_url = url)
-    OpenAI.create_chat(provider, model, conversation; kwargs...)
-end
-function OpenAI.create_chat(schema::FireworksOpenAISchema,
-        api_key::AbstractString,
-        model::AbstractString,
-        conversation;
-        url::String = "https://api.fireworks.ai/inference/v1",
-        kwargs...)
-    # Build the corresponding provider object
-    # try to override provided api_key because the default is OpenAI key
-    provider = CustomProvider(;
-        api_key = isempty(FIREWORKS_API_KEY) ? api_key : FIREWORKS_API_KEY,
-        base_url = url)
-    OpenAI.create_chat(provider, model, conversation; kwargs...)
-end
-function OpenAI.create_chat(schema::TogetherOpenAISchema,
-        api_key::AbstractString,
-        model::AbstractString,
-        conversation;
-        url::String = "https://api.together.xyz/v1",
-        kwargs...)
-    # Build the corresponding provider object
-    # try to override provided api_key because the default is OpenAI key
-    provider = CustomProvider(;
-        api_key = isempty(TOGETHER_API_KEY) ? api_key : TOGETHER_API_KEY,
-        base_url = url)
-    OpenAI.create_chat(provider, model, conversation; kwargs...)
-end
-function OpenAI.create_chat(schema::DatabricksOpenAISchema,
-        api_key::AbstractString,
-        model::AbstractString,
-        conversation;
-        url::String = "https://<workspace_host>.databricks.com",
-        kwargs...)
-    # Build the corresponding provider object
-    provider = CustomProvider(;
-        api_key = isempty(DATABRICKS_API_KEY) ? api_key : DATABRICKS_API_KEY,
-        base_url = isempty(DATABRICKS_HOST) ? url : DATABRICKS_HOST)
-    # Override standard OpenAI request endpoint
-    OpenAI.openai_request("serving-endpoints/$model/invocations",
-        provider;
-        method = "POST",
-        model,
-        messages = conversation,
-        kwargs...)
-end
-
-# Extend OpenAI create_embeddings to allow for testing
-function OpenAI.create_embeddings(schema::AbstractOpenAISchema,
-        api_key::AbstractString,
-        docs,
-        model::AbstractString;
-        kwargs...)
-    OpenAI.create_embeddings(api_key, docs, model; kwargs...)
-end
-function OpenAI.create_embeddings(schema::TestEchoOpenAISchema, api_key::AbstractString,
-        docs,
-        model::AbstractString; kwargs...)
-    schema.model_id = model
-    schema.inputs = docs
-    return schema
-end
-function OpenAI.create_embeddings(schema::CustomOpenAISchema,
-        api_key::AbstractString,
-        docs,
-        model::AbstractString;
-        url::String = "http://localhost:8080",
-        kwargs...)
-    # Build the corresponding provider object
-    # Create chat will automatically pass our data to endpoint `/embeddings`
-    provider = CustomProvider(; api_key, base_url = url)
-    OpenAI.create_embeddings(provider, docs, model; kwargs...)
-end
-# Set url and just forward to CustomOpenAISchema otherwise
-# Note: Llama.cpp and hence Llama.jl DO NOT support the embeddings endpoint !! (they use `/embedding`)
-function OpenAI.create_embeddings(schema::LocalServerOpenAISchema,
-        api_key::AbstractString,
-        docs,
-        model::AbstractString;
-        ## Strip the "v1" from the end of the url
-        url::String = LOCAL_SERVER,
-        kwargs...)
-    OpenAI.create_embeddings(CustomOpenAISchema(),
-        api_key,
-        docs,
-        model;
-        url,
-        kwargs...)
-end
-function OpenAI.create_embeddings(schema::MistralOpenAISchema,
-        api_key::AbstractString,
-        docs,
-        model::AbstractString;
-        url::String = "https://api.mistral.ai/v1",
-        kwargs...)
-    # Build the corresponding provider object
-    # try to override provided api_key because the default is OpenAI key
-    provider = CustomProvider(;
-        api_key = isempty(MISTRALAI_API_KEY) ? api_key : MISTRALAI_API_KEY,
-        base_url = url)
-    OpenAI.create_embeddings(provider, docs, model; kwargs...)
-end
-function OpenAI.create_embeddings(schema::DatabricksOpenAISchema,
-        api_key::AbstractString,
-        docs,
-        model::AbstractString;
-        url::String = "https://<workspace_host>.databricks.com",
-        kwargs...)
-    # Build the corresponding provider object
-    provider = CustomProvider(;
-        api_key = isempty(DATABRICKS_API_KEY) ? api_key : DATABRICKS_API_KEY,
-        base_url = isempty(DATABRICKS_HOST) ? url : DATABRICKS_HOST)
-    # Override standard OpenAI request endpoint
-    OpenAI.openai_request("serving-endpoints/$model/invocations",
-        provider;
-        method = "POST",
-        model,
-        input = docs,
-        kwargs...)
-end
-function OpenAI.create_embeddings(schema::TogetherOpenAISchema,
-        api_key::AbstractString,
-        docs,
-        model::AbstractString;
-        url::String = "https://api.together.xyz/v1",
+function render(schema::AbstractOpenAISchema,
+        tools::Vector{<:AbstractTool};
+        json_mode::Union{Nothing, Bool} = nothing,
         kwargs...)
-    provider = CustomProvider(;
-        api_key = isempty(TOGETHER_API_KEY) ? api_key : TOGETHER_API_KEY,
-        base_url = url)
-    OpenAI.create_embeddings(provider, docs, model; kwargs...)
+    [render(schema, tool; json_mode, kwargs...) for tool in tools]
 end
-function OpenAI.create_embeddings(schema::FireworksOpenAISchema,
-        api_key::AbstractString,
-        docs,
-        model::AbstractString;
-        url::String = "https://api.fireworks.ai/inference/v1",
+function render(schema::AbstractOpenAISchema,
+        tool::AbstractTool;
+        json_mode::Union{Nothing, Bool} = nothing,
         kwargs...)
-    provider = CustomProvider(;
-        api_key = isempty(FIREWORKS_API_KEY) ? api_key : FIREWORKS_API_KEY,
-        base_url = url)
-    OpenAI.create_embeddings(provider, docs, model; kwargs...)
+    rendered = Dict(:type => "function",
+        :function => Dict(
+            :parameters => tool.parameters, :name => tool.name))
+    ## Add strict flag
+    tool.strict == true && (rendered[:function][:strict] = tool.strict)
+    if json_mode == true
+        rendered[:function][:schema] = pop!(rendered[:function], :parameters)
+    else
+        ## Add description if not in JSON mode
+        !isnothing(tool.description) &&
+            (rendered[:function][:description] = tool.description)
+    end
+    return rendered
 end
-
-## Temporary fix -- it will be moved upstream
-function OpenAI.create_embeddings(provider::AbstractCustomProvider,
-        input,
-        model_id::String = OpenAI.DEFAULT_EMBEDDING_MODEL_ID;
-        http_kwargs::NamedTuple = NamedTuple(),
-        kwargs...)
-    return OpenAI.openai_request("embeddings",
-        provider;
-        method = "POST",
-        http_kwargs = http_kwargs,
-        model = model_id,
-        input,
+function render(schema::AbstractOpenAISchema,
+        tool::ToolRef;
+        json_mode::Union{Nothing, Bool} = nothing,
         kwargs...)
+    throw(ArgumentError("Function `render` is not implemented for the provided schema ($(typeof(schema))) and $(typeof(tool))."))
 end
 
 """
@@ -328,8 +132,9 @@ end
         resp;
         model_id::AbstractString = "",
         time::Float64 = 0.0,
-        run_id::Integer = rand(Int16),
-        sample_id::Union{Nothing, Integer} = nothing)
+        run_id::Int = Int(rand(Int32)),
+        sample_id::Union{Nothing, Integer} = nothing,
+        name_assistant::Union{Nothing, String} = nothing)
 
 Utility to facilitate unwrapping of HTTP response to a message type `MSG` provided for OpenAI-like responses
 
@@ -344,6 +149,7 @@ Note: Extracts `finish_reason` and `log_prob` if available in the response.
 - `time::Float64`: The elapsed time for the response. Defaults to `0.0`.
 - `run_id::Integer`: The run ID for the response. Defaults to a random integer.
 - `sample_id::Union{Nothing, Integer}`: The sample ID for the response (if there are multiple completions). Defaults to `nothing`.
+- `name_assistant::Union{Nothing, String}`: The name to use for the assistant in the conversation history. Defaults to `nothing`.
 """
 function response_to_message(schema::AbstractOpenAISchema,
         MSG::Type{AIMessage},
@@ -352,7 +158,8 @@ function response_to_message(schema::AbstractOpenAISchema,
         model_id::AbstractString = "",
         time::Float64 = 0.0,
         run_id::Int = Int(rand(Int32)),
-        sample_id::Union{Nothing, Integer} = nothing)
+        sample_id::Union{Nothing, Integer} = nothing,
+        name_assistant::Union{Nothing, String} = nothing)
     ## extract sum log probability
     has_log_prob = haskey(choice, :logprobs) &&
                    !isnothing(get(choice, :logprobs, nothing)) &&
@@ -367,10 +174,15 @@ function response_to_message(schema::AbstractOpenAISchema,
     tokens_prompt = get(resp.response, :usage, Dict(:prompt_tokens => 0))[:prompt_tokens]
     tokens_completion = get(resp.response, :usage, Dict(:completion_tokens => 0))[:completion_tokens]
     cost = call_cost(tokens_prompt, tokens_completion, model_id)
+    extras = Dict{Symbol, Any}()
+    if has_log_prob
+        extras[:log_prob] = choice[:logprobs]
+    end
     ## build AIMessage object
     msg = MSG(;
         content = choice[:message][:content] |> strip,
         status = Int(resp.status),
+        name = name_assistant,
         cost,
         run_id,
         sample_id,
@@ -378,7 +190,8 @@ function response_to_message(schema::AbstractOpenAISchema,
         finish_reason = get(choice, :finish_reason, nothing),
         tokens = (tokens_prompt,
             tokens_completion),
-        elapsed = time)
+        elapsed = time,
+        extras)
 end
 
 ## User-Facing API
@@ -387,6 +200,11 @@ end
         verbose::Bool = true,
         api_key::String = OPENAI_API_KEY,
         model::String = MODEL_CHAT, return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        streamcallback::Any = nothing,
+        no_system_message::Bool = false,
+        name_user::Union{Nothing, String} = nothing,
+        name_assistant::Union{Nothing, String} = nothing,
         http_kwargs::NamedTuple = (retry_non_idempotent = true,
             retries = 5,
             readtimeout = 120), api_kwargs::NamedTuple = NamedTuple(),
@@ -403,6 +221,11 @@ Generate an AI response based on a given prompt using the OpenAI API.
 - `return_all::Bool=false`: If `true`, returns the entire conversation history, otherwise returns only the last message (the `AIMessage`).
 - `dry_run::Bool=false`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`).
 - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector.
+- `streamcallback`: A callback function to handle streaming responses. Can be simply `stdout` or a `StreamCallback` object. See `?StreamCallback` for details.
+  Note: We configure the `StreamCallback` (and necessary `api_kwargs`) for you, unless you specify the `flavor`. See `?configure_callback!` for details.
+- `no_system_message::Bool=false`: If `true`, the default system message is not included in the conversation history. Any existing system message is converted to a `UserMessage`.
+- `name_user::Union{Nothing, String} = nothing`: The name to use for the user in the conversation history. Defaults to `nothing`.
+- `name_assistant::Union{Nothing, String} = nothing`: The name to use for the assistant in the conversation history. Defaults to `nothing`.
 - `http_kwargs`: A named tuple of HTTP keyword arguments.
 - `api_kwargs`: A named tuple of API keyword arguments. Useful parameters include:
     - `temperature`: A float representing the temperature for sampling (ie, the amount of "creativity"). Often defaults to `0.7`.
@@ -455,12 +278,36 @@ conversation = [
 msg=aigenerate(conversation)
 # AIMessage("Ah, strong feelings you have for your iPhone. A Jedi's path, this is not... <continues>")
 ```
+
+Example of streaming:
+
+```julia
+# Simplest usage, just provide where to steam the text
+msg = aigenerate("Count from 1 to 100."; streamcallback = stdout)
+
+streamcallback = PT.StreamCallback()
+msg = aigenerate("Count from 1 to 100."; streamcallback)
+# this allows you to inspect each chunk with `streamcallback.chunks`. You can them empty it with `empty!(streamcallback)` in between repeated calls.
+
+# Get verbose output with details of each chunk
+streamcallback = PT.StreamCallback(; verbose=true, throw_on_error=true)
+msg = aigenerate("Count from 1 to 10."; streamcallback)
+```
+
+WARNING: If you provide a `StreamCallback` object, we assume you want to configure everything yourself, so you need to make sure to set `stream = true` in the `api_kwargs`!
+
+Learn more in `?StreamCallback`.
+Note: Streaming support is only for OpenAI models and it doesn't yet support tool calling and a few other features (logprobs, refusals, etc.)
 """
 function aigenerate(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYPE;
         verbose::Bool = true,
         api_key::String = OPENAI_API_KEY,
         model::String = MODEL_CHAT, return_all::Bool = false, dry_run::Bool = false,
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        streamcallback::Any = nothing,
+        no_system_message::Bool = false,
+        name_user::Union{Nothing, String} = nothing,
+        name_assistant::Union{Nothing, String} = nothing,
         http_kwargs::NamedTuple = (retry_non_idempotent = true,
             retries = 5,
             readtimeout = 120), api_kwargs::NamedTuple = NamedTuple(),
@@ -469,33 +316,32 @@ function aigenerate(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_
     global MODEL_ALIASES
     ## Find the unique ID for the model alias provided
     model_id = get(MODEL_ALIASES, model, model)
-    conv_rendered = render(prompt_schema, prompt; conversation, kwargs...)
+    conv_rendered = render(
+        prompt_schema, prompt; conversation, no_system_message, name_user, kwargs...)
 
     if !dry_run
         time = @elapsed r = create_chat(prompt_schema, api_key,
             model_id,
             conv_rendered;
+            streamcallback,
             http_kwargs,
             api_kwargs...)
         ## Process one of more samples returned
-        msg = if length(r.response[:choices]) > 1
-            run_id = Int(rand(Int32)) # remember one run ID
-            ## extract all message
-            msgs = [response_to_message(prompt_schema, AIMessage, choice, r;
-                time, model_id, run_id, sample_id = i)
-                    for (i, choice) in enumerate(r.response[:choices])]
-            ## Order by log probability if available
-            ## bigger is better, keep it last
-            if all(x -> !isnothing(x.log_prob), msgs)
-                sort(msgs, by = x -> x.log_prob)
-            else
-                msgs
-            end
+        has_many_samples = length(r.response[:choices]) > 1
+        run_id = Int(rand(Int32)) # remember one run ID
+        ## extract all message
+        msg = [response_to_message(prompt_schema, AIMessage, choice, r;
+                   time, model_id, run_id, name_assistant,
+                   sample_id = has_many_samples ? i : nothing)
+               for (i, choice) in enumerate(r.response[:choices])]
+        ## Order by log probability if available
+        ## bigger is better, keep it last
+        msg = if has_many_samples && all(x -> !isnothing(x.log_prob), msg)
+            sort(msg, by = x -> x.log_prob)
+        elseif has_many_samples
+            msg
         else
-            ## only 1 sample / 1 completion
-            choice = r.response[:choices][begin]
-            response_to_message(prompt_schema, AIMessage, choice, r;
-                time, model_id)
+            only(msg)
         end
         ## Reporting
         verbose && @info _report_stats(msg, model_id)
@@ -509,6 +355,7 @@ function aigenerate(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_
         conversation,
         return_all,
         dry_run,
+        no_system_message,
         kwargs...)
 
     return output
@@ -598,8 +445,11 @@ function aiembed(prompt_schema::AbstractOpenAISchema,
     return msg
 end
 
+### Tokenization
+# The following files are to support logit bias in aiclassify function
+
 "Token IDs for GPT3.5 and GPT4 from https://platform.openai.com/tokenizer"
-const OPENAI_TOKEN_IDS = Dict("true" => 837,
+const OPENAI_TOKEN_IDS_GPT35_GPT4 = Dict("true" => 837,
     "false" => 905,
     "unknown" => 9987,
     "other" => 1023,
@@ -622,21 +472,114 @@ const OPENAI_TOKEN_IDS = Dict("true" => 837,
     "17" => 1114,
     "18" => 972,
     "19" => 777,
-    "20" => 508)
+    "20" => 508,
+    "21" => 1691,
+    "22" => 1313,
+    "23" => 1419,
+    "24" => 1187,
+    "25" => 914,
+    "26" => 1627,
+    "27" => 1544,
+    "28" => 1591,
+    "29" => 1682,
+    "30" => 966,
+    "31" => 2148,
+    "32" => 843,
+    "33" => 1644,
+    "34" => 1958,
+    "35" => 1758,
+    "36" => 1927,
+    "37" => 1806,
+    "38" => 1987,
+    "39" => 2137,
+    "40" => 1272
+)
+# GPT-4o token IDs as per tiktoken
+const OPENAI_TOKEN_IDS_GPT4O = Dict(
+    "true" => 3309,
+    "false" => 7556,
+    "unknown" => 33936,
+    "other" => 2141,
+    "1" => 16,
+    "2" => 17,
+    "3" => 18,
+    "4" => 19,
+    "5" => 20,
+    "6" => 21,
+    "7" => 22,
+    "8" => 23,
+    "9" => 24,
+    "10" => 702,
+    "11" => 994,
+    "12" => 899,
+    "13" => 1311,
+    "14" => 1265,
+    "15" => 1055,
+    "16" => 1125,
+    "17" => 1422,
+    "18" => 1157,
+    "19" => 858,
+    "20" => 455,
+    "21" => 2040,
+    "22" => 1709,
+    "23" => 1860,
+    "24" => 1494,
+    "25" => 1161,
+    "26" => 2109,
+    "27" => 2092,
+    "28" => 2029,
+    "29" => 2270,
+    "30" => 1130,
+    "31" => 2911,
+    "32" => 1398,
+    "33" => 2546,
+    "34" => 3020,
+    "35" => 2467,
+    "36" => 2636,
+    "37" => 2991,
+    "38" => 3150,
+    "39" => 3255,
+    "40" => 1723)
+## Note: You can provide your own token IDs map to `encode_choices` to use a custom mapping via kwarg: token_ids_map
+
+function pick_tokenizer(model::AbstractString;
+        token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing)
+    global OPENAI_TOKEN_IDS_GPT35_GPT4, OPENAI_TOKEN_IDS_GPT4O
+    OPENAI_TOKEN_IDS = if !isnothing(token_ids_map)
+        token_ids_map
+    elseif (model == "gpt-4" || startswith(model, "gpt-3.5") ||
+            startswith(model, "gpt-4-"))
+        OPENAI_TOKEN_IDS_GPT35_GPT4
+    elseif startswith(model, "gpt-4o")
+        OPENAI_TOKEN_IDS_GPT4O
+    else
+        throw(ArgumentError("Model $model is not supported by `encode_choices`. We don't have token IDs for it."))
+    end
+    return OPENAI_TOKEN_IDS
+end
 
 """
-    encode_choices(schema::OpenAISchema, choices::AbstractVector{<:AbstractString}; kwargs...)
+    encode_choices(schema::OpenAISchema, choices::AbstractVector{<:AbstractString};
+        model::AbstractString,
+        token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing,
+        kwargs...)
 
     encode_choices(schema::OpenAISchema, choices::AbstractVector{T};
-    kwargs...) where {T <: Tuple{<:AbstractString, <:AbstractString}}
+        model::AbstractString,
+        token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing,
+        kwargs...) where {T <: Tuple{<:AbstractString, <:AbstractString}}
 
 Encode the choices into an enumerated list that can be interpolated into the prompt and creates the corresponding logit biases (to choose only from the selected tokens).
 
 Optionally, can be a vector tuples, where the first element is the choice and the second is the description.
 
+There can be at most 40 choices provided.
+
 # Arguments
 - `schema::OpenAISchema`: The OpenAISchema object.
 - `choices::AbstractVector{<:Union{AbstractString,Tuple{<:AbstractString, <:AbstractString}}}`: The choices to be encoded, represented as a vector of the choices directly, or tuples where each tuple contains a choice and its description.
+- `model::AbstractString`: The model to use for encoding. Can be an alias corresponding to a model ID defined in `MODEL_ALIASES`.
+- `token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing`: A dictionary mapping custom token IDs to their corresponding integer values. If `nothing`, it will use the default token IDs for the given model.
 - `kwargs...`: Additional keyword arguments.
 
 # Returns
@@ -664,14 +607,16 @@ logit_bias # Output: Dict(16 => 100, 17 => 100, 18 => 100)
 """
 function encode_choices(schema::OpenAISchema,
         choices::AbstractVector{<:AbstractString};
+        model::AbstractString,
+        token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing,
         kwargs...)
-    global OPENAI_TOKEN_IDS
+    OPENAI_TOKEN_IDS = pick_tokenizer(model; token_ids_map)
     ## if all choices are in the dictionary, use the dictionary
-    if all(x -> haskey(OPENAI_TOKEN_IDS, x), choices)
+    if all(Base.Fix1(haskey, OPENAI_TOKEN_IDS), choices)
         choices_prompt = ["$c for \"$c\"" for c in choices]
         logit_bias = Dict(OPENAI_TOKEN_IDS[c] => 100 for c in choices)
-    elseif length(choices) <= 20
-        ## encode choices to IDs 1..20
+    elseif length(choices) <= 40
+        ## encode choices to IDs 1..40
         choices_prompt = ["$(i). \"$c\"" for (i, c) in enumerate(choices)]
         logit_bias = Dict(OPENAI_TOKEN_IDS[string(i)] => 100 for i in 1:length(choices))
     else
@@ -682,18 +627,20 @@ function encode_choices(schema::OpenAISchema,
 end
 function encode_choices(schema::OpenAISchema,
         choices::AbstractVector{T};
+        model::AbstractString,
+        token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing,
         kwargs...) where {T <: Tuple{<:AbstractString, <:AbstractString}}
-    global OPENAI_TOKEN_IDS
+    OPENAI_TOKEN_IDS = pick_tokenizer(model; token_ids_map)
     ## if all choices are in the dictionary, use the dictionary
-    if all(x -> haskey(OPENAI_TOKEN_IDS, first(x)), choices)
+    if all(Base.Fix1(haskey, OPENAI_TOKEN_IDS), first.(choices))
         choices_prompt = ["$c for \"$desc\"" for (c, desc) in choices]
         logit_bias = Dict(OPENAI_TOKEN_IDS[c] => 100 for (c, desc) in choices)
-    elseif length(choices) <= 20
+    elseif length(choices) <= 40
         ## encode choices to IDs 1..20
         choices_prompt = ["$(i). \"$c\" for $desc" for (i, (c, desc)) in enumerate(choices)]
         logit_bias = Dict(OPENAI_TOKEN_IDS[string(i)] => 100 for i in 1:length(choices))
     else
-        throw(ArgumentError("The number of choices must be less than or equal to 20."))
+        throw(ArgumentError("The number of choices must be less than or equal to 40."))
     end
 
     return join(choices_prompt, "\n"), logit_bias, first.(choices)
@@ -711,24 +658,34 @@ function decode_choices(schema::TestEchoOpenAISchema,
     return decode_choices(OpenAISchema(), choices, conv; kwargs...)
 end
 
-function decode_choices(schema::OpenAISchema, choices, conv::AbstractVector; kwargs...)
-    if length(conv) > 0 && last(conv) isa AIMessage && hasproperty(last(conv), :run_id)
+function decode_choices(schema::OpenAISchema, choices, conv::AbstractVector;
+        model::AbstractString,
+        token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing,
+        kwargs...)
+    conv_output = if length(conv) > 0 && last(conv) isa AIMessage &&
+                     hasproperty(last(conv), :run_id)
         ## if it is a multi-sample response, 
         ## Remember its run ID and convert all samples in that run
         run_id = last(conv).run_id
-        for i in eachindex(conv)
-            if conv[i].run_id == run_id
-                conv[i] = decode_choices(schema, choices, conv[i])
-            end
-        end
+        ## Need to re-render the conversation history if the types changed
+        [if isaimessage(conv[i]) && conv[i].run_id == run_id
+             decode_choices(schema, choices, conv[i]; model, token_ids_map)
+         else
+             conv[i]
+         end
+         for i in eachindex(conv)]
+    else
+        conv
     end
-    return conv
+    return conv_output
 end
 
 """
     decode_choices(schema::OpenAISchema,
         choices::AbstractVector{<:AbstractString},
-        msg::AIMessage; kwargs...)
+        msg::AIMessage; model::AbstractString,
+        token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing,
+        kwargs...)
 
 Decodes the underlying AIMessage against the original choices to lookup what the category name was.
 
@@ -736,8 +693,10 @@ If it fails, it will return `msg.content == nothing`
 """
 function decode_choices(schema::OpenAISchema,
         choices::AbstractVector{<:AbstractString},
-        msg::AIMessage; kwargs...)
-    global OPENAI_TOKEN_IDS
+        msg::AIMessage; model::AbstractString,
+        token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing,
+        kwargs...)
+    OPENAI_TOKEN_IDS = pick_tokenizer(model; token_ids_map)
     parsed_digit = tryparse(Int, strip(msg.content))
     if !isnothing(parsed_digit) && haskey(OPENAI_TOKEN_IDS, strip(msg.content))
         ## It's encoded
@@ -756,7 +715,9 @@ end
 """
     aiclassify(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYPE;
         choices::AbstractVector{T} = ["true", "false", "unknown"],
+        model::AbstractString = MODEL_CHAT,
         api_kwargs::NamedTuple = NamedTuple(),
+        token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing,
         kwargs...) where {T <: Union{AbstractString, Tuple{<:AbstractString, <:AbstractString}}}
 
 Classifies the given prompt/statement into an arbitrary list of `choices`, which must be only the choices (vector of strings) or choices and descriptions are provided (vector of tuples, ie, `("choice","description")`).
@@ -766,7 +727,7 @@ classify into an arbitrary list of categories (including with descriptions). It'
 
 !!! Note: The prompt/AITemplate must have a placeholder `choices` (ie, `{{choices}}`) that will be replaced with the encoded choices
 
-Choices are rewritten into an enumerated list and mapped to a few known OpenAI tokens (maximum of 20 choices supported). Mapping of token IDs for GPT3.5/4 are saved in variable `OPENAI_TOKEN_IDS`.
+Choices are rewritten into an enumerated list and mapped to a few known OpenAI tokens (maximum of 40 choices supported). Mapping of token IDs for GPT3.5/4 are saved in variable `OPENAI_TOKEN_IDS`.
 
 It uses Logit bias trick and limits the output to 1 token to force the model to output only true/false/unknown. Credit for the idea goes to [AAAzzam](https://twitter.com/AAAzzam/status/1669753721574633473).
 
@@ -774,6 +735,10 @@ It uses Logit bias trick and limits the output to 1 token to force the model to
 - `prompt_schema::AbstractOpenAISchema`: The schema for the prompt.
 - `prompt`: The prompt/statement to classify if it's a `String`. If it's a `Symbol`, it is expanded as a template via `render(schema,template)`. Eg, templates `:JudgeIsItTrue` or `:InputClassifier`
 - `choices::AbstractVector{T}`: The choices to be classified into. It can be a vector of strings or a vector of tuples, where the first element is the choice and the second is the description.
+- `model::AbstractString = MODEL_CHAT`: The model to use for classification. Can be an alias corresponding to a model ID defined in `MODEL_ALIASES`.
+- `api_kwargs::NamedTuple = NamedTuple()`: Additional keyword arguments for the API call.
+- `token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing`: A dictionary mapping custom token IDs to their corresponding integer values. If `nothing`, it will use the default token IDs for the given model.
+- `kwargs`: Additional keyword arguments for the prompt template.
 
 # Example
 
@@ -786,7 +751,7 @@ aiclassify(:InputClassifier; choices, input)
 
 Choices with descriptions provided as tuples:
 ```julia
-choices = [("A", "any animal or creature"), ("P", "for any plant or tree"), ("O", "for everything else")]
+choices = [("A", "any animal or creature"), ("P", "any plant or tree"), ("O", "anything else")]
 
 # try the below inputs:
 input = "spider" # -> returns "A" for any animal or creature
@@ -795,6 +760,14 @@ input = "castle" # -> returns "O" for everything else
 aiclassify(:InputClassifier; choices, input)
 ```
 
+You could also use this function for routing questions to different endpoints (notice the different template and placeholder used), eg, 
+```julia
+choices = [("A", "any question about animal or creature"), ("P", "any question about plant or tree"), ("O", "anything else")]
+question = "how many spiders are there?"
+msg = aiclassify(:QuestionRouter; choices, question)
+# "A"
+```
+
 You can still use a simple true/false classification:
 ```julia
 aiclassify("Is two plus two four?") # true
@@ -823,32 +796,38 @@ aiclassify(:JudgeIsItTrue;
 """
 function aiclassify(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYPE;
         choices::AbstractVector{T} = ["true", "false", "unknown"],
+        model::AbstractString = MODEL_CHAT,
         api_kwargs::NamedTuple = NamedTuple(),
+        token_ids_map::Union{Nothing, Dict{<:AbstractString, <:Integer}} = nothing,
         kwargs...) where {T <:
-             Union{AbstractString, Tuple{<:AbstractString, <:AbstractString}}}
+                          Union{AbstractString, Tuple{<:AbstractString, <:AbstractString}}}
     ## Encode the choices and the corresponding prompt 
-    ## TODO: maybe check the model provided as well?
-    choices_prompt, logit_bias, decode_ids = encode_choices(prompt_schema, choices)
+    model_id = get(MODEL_ALIASES, model, model)
+    choices_prompt, logit_bias, decode_ids = encode_choices(
+        prompt_schema, choices; model = model_id, token_ids_map)
     ## We want only 1 token
     api_kwargs = merge(api_kwargs, (; logit_bias, max_tokens = 1, temperature = 0))
     msg_or_conv = aigenerate(prompt_schema,
         prompt;
         choices = choices_prompt,
+        model = model_id,
         api_kwargs,
         kwargs...)
-    return decode_choices(prompt_schema, decode_ids, msg_or_conv)
+    return decode_choices(
+        prompt_schema, decode_ids, msg_or_conv; model = model_id, token_ids_map)
 end
 
 function response_to_message(schema::AbstractOpenAISchema,
         MSG::Type{DataMessage},
         choice,
         resp;
-        return_type = nothing,
+        tool_map = nothing,
         model_id::AbstractString = "",
         time::Float64 = 0.0,
         run_id::Int = Int(rand(Int32)),
-        sample_id::Union{Nothing, Integer} = nothing)
-    @assert !isnothing(return_type) "You must provide a return_type for DataMessage construction"
+        sample_id::Union{Nothing, Integer} = nothing,
+        json_mode::Union{Nothing, Bool} = nothing)
+    @assert !isnothing(tool_map) "You must provide a tool_map for DataMessage construction"
     ## extract sum log probability
     has_log_prob = haskey(choice, :logprobs) &&
                    !isnothing(get(choice, :logprobs, nothing)) &&
@@ -864,17 +843,32 @@ function response_to_message(schema::AbstractOpenAISchema,
     tokens_completion = get(resp.response, :usage, Dict(:completion_tokens => 0))[:completion_tokens]
     cost = call_cost(tokens_prompt, tokens_completion, model_id)
     # "Safe" parsing of the response - it still fails if JSON is invalid
-    content = try
-        choice[:message][:tool_calls][1][:function][:arguments] |>
-        x -> JSON3.read(x, return_type)
-    catch e
-        @warn "There was an error parsing the response: $e. Using the raw response instead."
-        choice[:message][:tool_calls][1][:function][:arguments] |>
-        JSON3.read |> copy
+    tools_array = if json_mode == true
+        name, tool = only(tool_map)
+        content_blob = choice[:message][:content]
+        content_obj = content_blob isa String ? JSON3.read(content_blob) : content_blob
+        [parse_tool(
+            tool.callable, content_obj)]
+    else
+        ## If name does not match, we use the callable from the tool_map 
+        ## Can happen only in testing with auto-generated struct
+        [parse_tool(
+             get(tool_map, tool_call[:function][:name], (; callable = Dict)).callable,
+             tool_call[:function][:arguments])
+         for tool_call in choice[:message][:tool_calls]]
+    end
+    ## Remember the tools
+    extras = Dict{Symbol, Any}()
+    if haskey(choice[:message], :tool_calls) && !isempty(choice[:message][:tool_calls])
+        extras[:tool_calls] = choice[:message][:tool_calls]
+    end
+    if has_log_prob
+        extras[:log_prob] = choice[:logprobs]
     end
+
     ## build DataMessage object
     msg = MSG(;
-        content = content,
+        content = length(tools_array) == 1 ? only(tools_array) : tools_array,
         status = Int(resp.status),
         cost,
         run_id,
@@ -883,12 +877,13 @@ function response_to_message(schema::AbstractOpenAISchema,
         finish_reason = get(choice, :finish_reason, nothing),
         tokens = (tokens_prompt,
             tokens_completion),
-        elapsed = time)
+        elapsed = time,
+        extras)
 end
 
 """
     aiextract(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYPE;
-        return_type::Type,
+        return_type::Union{Type, AbstractTool, Vector},
         verbose::Bool = true,
         api_key::String = OPENAI_API_KEY,
         model::String = MODEL_CHAT,
@@ -897,7 +892,8 @@ end
         http_kwargs::NamedTuple = (retry_non_idempotent = true,
             retries = 5,
             readtimeout = 120), api_kwargs::NamedTuple = (;
-            tool_choice = "exact"),
+            tool_choice = nothing),
+        strict::Union{Nothing, Bool} = nothing,
         kwargs...)
 
 Extract required information (defined by a struct **`return_type`**) from the provided prompt by leveraging OpenAI function calling mode.
@@ -910,7 +906,7 @@ It's effectively a light wrapper around `aigenerate` call, which requires additi
 # Arguments
 - `prompt_schema`: An optional object to specify which prompt template should be applied (Default to `PROMPT_SCHEMA = OpenAISchema`)
 - `prompt`: Can be a string representing the prompt for the AI conversation, a `UserMessage`, a vector of `AbstractMessage` or an `AITemplate`
-- `return_type`: A **struct** TYPE representing the the information we want to extract. Do not provide a struct instance, only the type.
+- `return_type`: A **struct** TYPE (or a Tool, vector of Types) representing the the information we want to extract. Do not provide a struct instance, only the type. Alternatively, you can provide a vector of field names and their types (see `?generate_struct` function for the syntax).
   If the struct has a docstring, it will be provided to the model as well. It's used to enforce structured model outputs or provide more information.
 - `verbose`: A boolean indicating whether to print additional information.
 - `api_key`: A string representing the API key for accessing the OpenAI API.
@@ -920,9 +916,13 @@ It's effectively a light wrapper around `aigenerate` call, which requires additi
 - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector.
 - `http_kwargs`: A named tuple of HTTP keyword arguments.
 - `api_kwargs`: A named tuple of API keyword arguments. 
-  - `tool_choice`: A string representing the tool choice to use for the API call. Usually, one of "auto","any","exact". 
-    Defaults to `"exact"`, which is a made-up value to enforce the OpenAI requirements if we want one exact function.
+  - `tool_choice`: Specifies which tool to use for the API call. Usually, one of "auto","any","exact" // `nothing` will pick a default. 
+    Defaults to `"exact"` for 1 tool and `"auto"` for many tools, which is a made-up value to enforce the OpenAI requirements if we want one exact function.
     Providers like Mistral, Together, etc. use `"any"` instead.
+- `strict::Union{Nothing, Bool} = nothing`: A boolean indicating whether to enforce strict generation of the response (supported only for OpenAI models). It has additional latency for the first request. If `nothing`, standard function calling is used.
+- `json_mode::Union{Nothing, Bool} = nothing`: If `json_mode = true`, we use JSON mode for the response (supported only for OpenAI models). If `nothing`, standard function calling is used.
+    JSON mode is understood to be more creative and smarter than function calling mode, as it's not mascarading as a function call,
+    but there is extra latency for the first request to produce grammar for constrained sampling.
 - `kwargs`: Prompt variables to be used to fill the prompt/template
 
 # Returns
@@ -933,8 +933,9 @@ If `return_all=false` (default):
 If `return_all=true`:
 - `conversation`: A vector of `AbstractMessage` objects representing the full conversation history, including the response from the AI model (`DataMessage`).
 
+Note: `msg.content` can be a single object (if a single tool is used) or a vector of objects (if multiple tools are used)!
 
-See also: `function_call_signature`, `MaybeExtract`, `ItemsExtract`, `aigenerate`
+See also: `tool_call_signature`, `MaybeExtract`, `ItemsExtract`, `aigenerate`, `generate_struct`
 
 # Example
 
@@ -961,7 +962,7 @@ msg = aiextract("James is 30."; return_type=MyMeasurement)
 
 If there are multiple items you want to extract, define a wrapper struct to get a Vector of `MyMeasurement`:
 ```
-struct MyMeasurementWrapper
+struct ManyMeasurements
     measurements::Vector{MyMeasurement}
 end
 
@@ -988,7 +989,7 @@ Or if you want your extraction to fail gracefully when data isn't found, use `Ma
 ```
 using PromptingTools: MaybeExtract
 
-type = MaybeExtract{MyMeasurement}
+return_type = MaybeExtract{MyMeasurement}
 # Effectively the same as:
 # struct MaybeExtract{T}
 #     result::Union{T, Nothing} // The result of the extraction
@@ -997,7 +998,7 @@ type = MaybeExtract{MyMeasurement}
 # end
 
 # If LLM extraction fails, it will return a Dict with `error` and `message` fields instead of the result!
-msg = aiextract("Extract measurements from the text: I am giraffe", type)
+msg = aiextract("Extract measurements from the text: I am giraffe"; return_type)
 msg.content
 # MaybeExtract{MyMeasurement}(nothing, true, "I'm sorry, but I can only assist with human measurements.")
 ```
@@ -1017,9 +1018,51 @@ end
 aiextract("I ate an apple",return_type=Fruit,api_kwargs=(;tool_choice="any"),model="mistrall")
 # Notice two differences: 1) struct MUST have a docstring, 2) tool_choice is set explicitly set to "any"
 ```
+
+Example of using a vector of field names with `aiextract`
+```julia
+fields = [:location, :temperature => Float64, :condition => String]
+msg = aiextract("Extract the following information from the text: location, temperature, condition. Text: The weather in New York is sunny and 72.5 degrees Fahrenheit."; return_type = fields)
+```
+
+Or simply call `aiextract("some text"; return_type = [:reasoning,:answer])` to get a Chain of Thought reasoning for extraction task.
+
+It will be returned it a new generated type, which you can check with `PromptingTools.isextracted(msg.content) == true` to confirm the data has been extracted correctly.
+
+This new syntax also allows you to provide field-level descriptions, which will be passed to the model.
+```julia
+fields_with_descriptions = [
+    :location,
+    :temperature => Float64,
+    :temperature__description => "Temperature in degrees Fahrenheit",
+    :condition => String,
+    :condition__description => "Current weather condition (e.g., sunny, rainy, cloudy)"
+]
+msg = aiextract("The weather in New York is sunny and 72.5 degrees Fahrenheit."; return_type = fields_with_descriptions)
+```
+
+If you feel that the extraction is not smart/creative enough, you can use `json_mode = true` to enforce the JSON mode, 
+which automatically enables the structured output mode (as opposed to function calling mode).
+
+The JSON mode is useful for cases when you want to enforce a specific output format, such as JSON, and want the model to adhere to that format, but don't want to pretend it's a "function call".
+Expect a few second delay on the first call for a specific struct, as the provider has to produce the constrained grammer first.
+
+```julia
+msg = aiextract("Extract the following information from the text: location, temperature, condition. Text: The weather in New York is sunny and 72.5 degrees Fahrenheit."; 
+return_type = fields_with_descriptions, json_mode = true)
+# PromptingTools.DataMessage(NamedTuple)
+
+msg.content
+# (location = "New York", temperature = 72.5, condition = "sunny")
+```
+
+It works equally well for structs provided as return types:
+```julia
+msg = aiextract("James is 30, weighs 80kg. He's 180cm tall."; return_type=MyMeasurement, json_mode=true)
+```
 """
 function aiextract(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYPE;
-        return_type::Type,
+        return_type::Union{Type, AbstractTool, Vector},
         verbose::Bool = true,
         api_key::String = OPENAI_API_KEY,
         model::String = MODEL_CHAT,
@@ -1028,25 +1071,44 @@ function aiextract(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_T
         http_kwargs::NamedTuple = (retry_non_idempotent = true,
             retries = 5,
             readtimeout = 120), api_kwargs::NamedTuple = (;
-            tool_choice = "exact"),
+            tool_choice = nothing),
+        strict::Union{Nothing, Bool} = nothing,
+        json_mode::Union{Nothing, Bool} = nothing,
         kwargs...)
     ##
     global MODEL_ALIASES
     ## Function calling specifics
-    tools = [Dict(:type => "function", :function => function_call_signature(return_type))]
+    ## Check that no functions or methods are provided, that is not supported
+    @assert !(return_type isa Vector)||!any(x -> x isa Union{Function, Method}, return_type) "Functions and Methods are not supported in `aiextract`!"
+    ## Set strict mode on for JSON mode
+    strict_ = json_mode == true ? true : strict
+    tool_map = tool_call_signature(return_type; strict = strict_)
+    tools = render(prompt_schema, tool_map; json_mode)
     ## force our function to be used
-    tool_choice_ = get(api_kwargs, :tool_choice, "exact")
-    tool_choice = if tool_choice_ == "exact"
+    tool_choice_ = get(api_kwargs, :tool_choice, nothing)
+    tool_choice = if tool_choice_ == "exact" ||
+                     (isnothing(tool_choice_) && length(tools) == 1)
         ## Standard for OpenAI API
         Dict(:type => "function",
-            :function => Dict(:name => only(tools)[:function]["name"]))
-    else
+            :function => Dict(:name => only(tools)[:function][:name]))
+    elseif tool_choice_ == "auto" || (isnothing(tool_choice_) && length(tools) > 1)
         # User provided value, eg, "auto", "any" for various providers like Mistral, Together, etc.
+        "auto"
+    else
+        # User provided value
         tool_choice_
     end
 
-    ## Add the function call signature to the api_kwargs
-    api_kwargs = merge(api_kwargs, (; tools, tool_choice))
+    ## Build the API kwargs
+    api_kwargs = if json_mode == true
+        @assert length(tools)==1 "Only 1 tool definition is allowed in JSON mode."
+        (; [k => v for (k, v) in pairs(api_kwargs) if k != :tool_choice]...,
+            response_format = (;
+                type = "json_schema", json_schema = only(tools)[:function]))
+    else
+        merge(api_kwargs, (; tools, tool_choice))
+    end
+
     ## Find the unique ID for the model alias provided
     model_id = get(MODEL_ALIASES, model, model)
     conv_rendered = render(prompt_schema, prompt; conversation, kwargs...)
@@ -1058,25 +1120,22 @@ function aiextract(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_T
             http_kwargs,
             api_kwargs...)
         ## Process one of more samples returned
-        msg = if length(r.response[:choices]) > 1
-            run_id = Int(rand(Int32)) # remember one run ID
-            ## extract all message
-            msgs = [response_to_message(prompt_schema, DataMessage, choice, r;
-                return_type, time, model_id, run_id, sample_id = i)
-                    for (i, choice) in enumerate(r.response[:choices])]
-            ## Order by log probability if available
-            ## bigger is better, keep it last
-            if all(x -> !isnothing(x.log_prob), msgs)
-                sort(msgs, by = x -> x.log_prob)
-            else
-                msgs
-            end
+        has_many_samples = length(r.response[:choices]) > 1
+        run_id = Int(rand(Int32)) # remember one run ID
+        msg = [response_to_message(prompt_schema, DataMessage, choice, r;
+                   tool_map = tool_map, time, model_id, run_id, json_mode,
+                   sample_id = has_many_samples ? i : nothing)
+               for (i, choice) in enumerate(r.response[:choices])]
+        ## Order by log probability if available
+        ## bigger is better, keep it last
+        msg = if has_many_samples && all(x -> !isnothing(x.log_prob), msg)
+            sort(msg, by = x -> x.log_prob)
+        elseif has_many_samples
+            msg
         else
-            ## only 1 sample / 1 completion
-            choice = r.response[:choices][begin]
-            response_to_message(prompt_schema, DataMessage, choice, r;
-                return_type, time, model_id)
+            only(msg)
         end
+
         ## Reporting
         verbose && @info _report_stats(msg, model_id)
     else
@@ -1219,7 +1278,7 @@ function aiscan(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYPE
             run_id = Int(rand(Int32)) # remember one run ID
             ## extract all message
             msgs = [response_to_message(prompt_schema, AIMessage, choice, r;
-                time, model_id, run_id, sample_id = i)
+                        time, model_id, run_id, sample_id = i)
                     for (i, choice) in enumerate(r.response[:choices])]
             ## Order by log probability if available
             ## bigger is better, keep it last
@@ -1251,3 +1310,422 @@ function aiscan(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYPE
 
     return output
 end
+
+"""
+    aiimage(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYPE;
+        image_size::AbstractString = "1024x1024",
+        image_quality::AbstractString = "standard",
+        image_n::Integer = 1,
+        verbose::Bool = true,
+        api_key::String = OPENAI_API_KEY,
+        model::String = MODEL_IMAGE_GENERATION,
+        return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        http_kwargs::NamedTuple = (retry_non_idempotent = true,
+            retries = 5,
+            readtimeout = 120), api_kwargs::NamedTuple = NamedTuple(),
+        kwargs...)
+
+Generates an image from the provided `prompt`. If multiple "messages" are provided in `prompt`, it extracts the text ONLY from the last message!
+
+Image (or the reference to it) will be returned in a `DataMessage.content`, the format will depend on the `api_kwargs.response_format` you set.
+
+Can be used for generating images of varying quality and style with `dall-e-*` models.
+This function DOES NOT SUPPORT multi-turn conversations (ie, do not provide previous conversation via `conversation` argument).
+
+# Arguments
+- `prompt_schema`: An optional object to specify which prompt template should be applied (Default to `PROMPT_SCHEMA = OpenAISchema`)
+- `prompt`: Can be a string representing the prompt for the AI conversation, a `UserMessage`, a vector of `AbstractMessage` or an `AITemplate`
+- `image_size`: String-based resolution of the image, eg, "1024x1024". Only some resolutions are supported - see the [API docs](https://platform.openai.com/docs/api-reference/images/create).
+- `image_quality`: It can be either "standard" or "hd". Defaults to "standard".
+- `image_n`: The number of images to generate. Currently, only single image generation is allowed (`image_n = 1`).
+- `verbose`: A boolean indicating whether to print additional information.
+- `api_key`: A string representing the API key for accessing the OpenAI API.
+- `model`: A string representing the model to use for generating the response. Can be an alias corresponding to a model ID defined in `MODEL_IMAGE_GENERATION`.
+- `return_all::Bool=false`: If `true`, returns the entire conversation history, otherwise returns only the last message (the `AIMessage`).
+- `dry_run::Bool=false`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`).
+- `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. Currently, NOT ALLOWED.
+- `http_kwargs`: A named tuple of HTTP keyword arguments.
+- `api_kwargs`: A named tuple of API keyword arguments. Several important arguments are highlighted below:
+    - `response_format`: The format image should be returned in. Can be one of "url" or "b64_json". Defaults to "url" (the link will be inactived in 60 minutes).
+    - `style`: The style of generated images (DALL-E 3 only). Can be either "vidid" or "natural". Defauls to "vidid".
+- `kwargs`: Prompt variables to be used to fill the prompt/template
+
+# Returns
+If `return_all=false` (default):
+- `msg`: A `DataMessage` object representing one or more generated images, including the rewritten prompt if relevant, status, and elapsed time.
+ Use `msg.content` to access the extracted string.
+
+If `return_all=true`:
+- `conversation`: A vector of `AbstractMessage` objects representing the full conversation history, including the response from the AI model (`AIMessage`).
+
+See also: `ai_str`, `aai_str`, `aigenerate`, `aiembed`, `aiclassify`, `aiextract`, `aiscan`, `aitemplates`
+
+# Notes
+- This function DOES NOT SUPPORT multi-turn conversations (ie, do not provide previous conversation via `conversation` argument).
+- There is no token tracking provided by the API, so the messages will NOT report any cost despite costing you money!
+- You MUST download any URL-based images within 60 minutes. The links will become inactive.
+
+# Example
+
+Generate an image:
+```julia
+# You can experiment with `image_size`, `image_quality` kwargs!
+msg = aiimage("A white cat on a car")
+
+# Download the image into a file
+using Downloads
+Downloads.download(msg.content[:url], "cat_on_car.png")
+
+# You can also see the revised prompt that DALL-E 3 used
+msg.content[:revised_prompt]
+# Output: "Visualize a pristine white cat gracefully perched atop a shiny car. 
+# The cat's fur is stark white and its eyes bright with curiosity. 
+# As for the car, it could be a contemporary sedan, glossy and in a vibrant color. 
+# The scene could be set under the blue sky, enhancing the contrast between the white cat, the colorful car, and the bright blue sky."
+```
+
+Note that you MUST download any URL-based images within 60 minutes. The links will become inactive.
+
+If you wanted to download image directly into the DataMessage, provide `response_format="b64_json"` in `api_kwargs`:
+```julia
+msg = aiimage("A white cat on a car"; image_quality="hd", api_kwargs=(; response_format="b64_json"))
+
+# Then you need to use Base64 package to decode it and save it to a file:
+using Base64
+write("cat_on_car_hd.png", base64decode(msg.content[:b64_json]));
+```
+
+"""
+function aiimage(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYPE;
+        image_size::AbstractString = "1024x1024",
+        image_quality::AbstractString = "standard",
+        image_n::Integer = 1,
+        verbose::Bool = true,
+        api_key::String = OPENAI_API_KEY,
+        model::String = MODEL_IMAGE_GENERATION,
+        return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        http_kwargs::NamedTuple = (retry_non_idempotent = true,
+            retries = 5,
+            readtimeout = 120), api_kwargs::NamedTuple = NamedTuple(),
+        kwargs...)
+    @assert isempty(conversation) "Multi-turn `conversation` is not supported for image generation."
+    @assert image_n==1 "Only single image generation is currently supported."
+    @assert !isnothing(match(r"\d{3,4}x\d{3,4}", image_size)) "`image_size` must be in format \"1024x1024\"!"
+    ##
+    global MODEL_ALIASES
+    ## Find the unique ID for the model alias provided
+    model_id = get(MODEL_ALIASES, model, model)
+    conv_rendered = render(prompt_schema, prompt; conversation, kwargs...)
+    ## conv_rendered is a vector of dictionaries
+    ## prompt must be a string, so we extract from last message
+    prompt = last(conv_rendered)["content"]
+    if !dry_run
+        ## Model call
+        time = @elapsed r = create_images(prompt_schema, api_key,
+            prompt;
+            model = model_id,
+            http_kwargs,
+            quality = image_quality,
+            n = image_n,
+            size = image_size,
+            api_kwargs...)
+        msg = DataMessage(;
+            ## currently extracts only the first response
+            content = r.response[:data][begin],
+            status = Int(r.status),
+            cost = call_cost_alternative(image_n, model_id; image_quality, image_size),
+            tokens = (0, 0),
+            elapsed = time)
+
+        ## Reporting
+        verbose && @info _report_stats(msg, model_id)
+    else
+        msg = nothing
+    end
+
+    ## Select what to return // input `msgs` to preserve the image attachments
+    output = finalize_outputs(prompt,
+        conv_rendered,
+        msg;
+        conversation,
+        return_all,
+        dry_run,
+        kwargs...)
+
+    return output
+end
+
+## Standardizes parsing of 1 or more samples returned from OpenAI-compatible APIs into AIToolRequest objects
+function response_to_message(schema::AbstractOpenAISchema,
+        MSG::Type{AIToolRequest},
+        choice,
+        resp;
+        tool_map = nothing,
+        model_id::AbstractString = "",
+        time::Float64 = 0.0,
+        run_id::Int = Int(rand(Int32)),
+        sample_id::Union{Nothing, Integer} = nothing,
+        json_mode::Union{Nothing, Bool} = nothing,
+        name_user::Union{Nothing, String} = nothing,
+        name_assistant::Union{Nothing, String} = nothing)
+    @assert !isnothing(tool_map) "You must provide a tool_map for AIToolRequest construction"
+    ## extract sum log probability
+    has_log_prob = haskey(choice, :logprobs) &&
+                   !isnothing(get(choice, :logprobs, nothing)) &&
+                   haskey(choice[:logprobs], :content) &&
+                   !isnothing(choice[:logprobs][:content])
+    log_prob = if has_log_prob
+        sum([get(c, :logprob, 0.0) for c in choice[:logprobs][:content]])
+    else
+        nothing
+    end
+    ## calculate cost
+    tokens_prompt = get(resp.response, :usage, Dict(:prompt_tokens => 0))[:prompt_tokens]
+    tokens_completion = get(resp.response, :usage, Dict(:completion_tokens => 0))[:completion_tokens]
+    cost = call_cost(tokens_prompt, tokens_completion, model_id)
+    # "Safe" parsing of the response - it still fails if JSON is invalid
+    has_tools = haskey(choice[:message], :tool_calls) &&
+                !isempty(choice[:message][:tool_calls])
+    tools_array = if json_mode == true
+        tool_name, tool = only(tool_map)
+        ## Note, JSON mode doesn't have tool_call_id so we mock it
+        content_blob = choice[:message][:content]
+        [ToolMessage(;
+            content = nothing, req_id = run_id, tool_call_id = string("call_", run_id),
+            raw = content_blob isa String ? content_blob : JSON3.write(content_blob),
+            args = content_blob isa String ? JSON3.read(content_blob) : content_blob,
+            name = tool_name)]
+    elseif has_tools
+        [ToolMessage(; raw = tool_call[:function][:arguments],
+             args = JSON3.read(tool_call[:function][:arguments]),
+             name = tool_call[:function][:name],
+             content = nothing,
+             req_id = run_id,
+             tool_call_id = tool_call[:id]
+         )
+         for tool_call in choice[:message][:tool_calls]]
+    else
+        ToolMessage[]
+    end
+    ## Check if content key was provided (not required for tool calls)
+    content = json_mode != true && haskey(choice[:message], :content) ?
+              choice[:message][:content] : nothing
+    ## Remember the tools
+    extras = Dict{Symbol, Any}()
+    if has_tools
+        extras[:tool_calls] = choice[:message][:tool_calls]
+    end
+    if has_log_prob
+        extras[:log_prob] = choice[:logprobs]
+    end
+
+    ## build AIToolRequest object
+    msg = MSG(;
+        content = content,
+        name = name_assistant,
+        tool_calls = tools_array,
+        status = Int(resp.status),
+        cost,
+        run_id,
+        sample_id,
+        log_prob,
+        finish_reason = get(choice, :finish_reason, nothing),
+        tokens = (tokens_prompt,
+            tokens_completion),
+        elapsed = time,
+        extras)
+end
+
+"""
+    aitools(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYPE;
+        tools::Union{Type, Function, Method, AbstractTool, Vector} = Tool[],
+        verbose::Bool = true,
+        api_key::String = OPENAI_API_KEY,
+        model::String = MODEL_CHAT,
+        return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
+        http_kwargs::NamedTuple = (retry_non_idempotent = true,
+            retries = 5,
+            readtimeout = 120), api_kwargs::NamedTuple = (;
+            tool_choice = nothing),
+        strict::Union{Nothing, Bool} = nothing,
+        json_mode::Union{Nothing, Bool} = nothing,
+        name_user::Union{Nothing, String} = nothing,
+        name_assistant::Union{Nothing, String} = nothing,
+        kwargs...)
+
+Calls chat completion API with an optional tool call signature. It can receive both `tools` and standard string-based content.
+Ideal for agentic workflows with more complex cognitive architectures.
+
+Difference to `aigenerate`: Response can be a tool call (structured)
+
+Differences to `aiextract`: Can provide infinitely many tools (including Functions!) and then respond with the tool call's output.
+
+# Arguments
+- `prompt_schema`: An optional object to specify which prompt template should be applied (Default to `PROMPT_SCHEMA = OpenAISchema`)
+- `prompt`: Can be a string representing the prompt for the AI conversation, a `UserMessage`, a vector of `AbstractMessage` or an `AITemplate`
+- `tools`: A vector of tools to be used in the conversation. Can be a vector of types, instances of `AbstractTool`, or a mix of both.
+- `verbose`: A boolean indicating whether to print additional information.
+- `api_key`: A string representing the API key for accessing the OpenAI API.
+- `model`: A string representing the model to use for generating the response. Can be an alias corresponding to a model ID defined in `MODEL_CHAT`.
+- `return_all`: If `true`, returns the entire conversation history, otherwise returns only the last message (the `AIMessage`).
+- `dry_run`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`).
+- `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history.
+- `no_system_message::Bool = false`: Whether to exclude the system message from the conversation history.
+- `name_user`: The name of the user in the conversation history. Defaults to "User".
+- `name_assistant`: The name of the assistant in the conversation history. Defaults to "Assistant".
+- `http_kwargs`: A named tuple of HTTP keyword arguments.
+- `api_kwargs`: A named tuple of API keyword arguments. Several important arguments are highlighted below:
+    - `tool_choice`: The choice of tool mode. Can be "auto", "exact", or can depend on the provided.. Defaults to `nothing`, which translates to "auto".
+    - `response_format`: The format of the response. Can be "json_schema" for JSON mode, or "text" for standard text output. Defaults to "text".
+- `strict`: Whether to enforce strict mode for the schema. Defaults to `nothing`.
+- `json_mode`: Whether to enforce JSON mode for the schema. Defaults to `nothing`.
+
+# Example
+
+```julia
+## Let's define a tool
+get_weather(location, date) = "The weather in \$location on \$date is 70 degrees."
+
+## JSON mode request
+msg = aitools("What's the weather in Tokyo on May 3rd, 2023?";
+    tools = get_weather,
+    json_mode = true)
+PT.execute_tool(get_weather, msg.tool_calls[1].args)
+# "The weather in Tokyo on 2023-05-03 is 70 degrees."
+
+# Function calling request
+msg = aitools("What's the weather in Tokyo on May 3rd, 2023?";
+    tools = get_weather)
+PT.execute_tool(get_weather, msg.tool_calls[1].args)
+# "The weather in Tokyo on 2023-05-03 is 70 degrees."
+
+# Ignores the tool
+msg = aitools("What's your name?";
+    tools = get_weather)
+# I don't have a personal name, but you can call me your AI assistant!
+```
+
+How to have a multi-turn conversation with tools:
+```julia
+conv = aitools("What's the weather in Tokyo on May 3rd, 2023?";
+    tools = get_weather, return_all = true)
+
+tool_msg = conv[end].tool_calls[1] # there can be multiple tool calls requested!!
+
+# Execute the output to the tool message content
+tool_msg.content = PT.execute_tool(get_weather, tool_msg.args)
+
+# Add the tool message to the conversation
+push!(conv, tool_msg)
+
+# Call LLM again with the updated conversation
+conv = aitools(
+    "And in New York?"; tools = get_weather, return_all = true, conversation = conv)
+# 6-element Vector{AbstractMessage}:
+# SystemMessage("Act as a helpful AI assistant")
+# UserMessage("What's the weather in Tokyo on May 3rd, 2023?")
+# AIToolRequest("-"; Tool Requests: 1)
+# ToolMessage("The weather in Tokyo on 2023-05-03 is 70 degrees.")
+# UserMessage("And in New York?")
+# AIToolRequest("-"; Tool Requests: 1)
+```
+"""
+function aitools(prompt_schema::AbstractOpenAISchema, prompt::ALLOWED_PROMPT_TYPE;
+        tools::Union{Type, Function, Method, AbstractTool, Vector} = Tool[],
+        verbose::Bool = true,
+        api_key::String = OPENAI_API_KEY,
+        model::String = MODEL_CHAT,
+        return_all::Bool = false, dry_run::Bool = false,
+        conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
+        name_user::Union{Nothing, String} = nothing,
+        name_assistant::Union{Nothing, String} = nothing,
+        http_kwargs::NamedTuple = (retry_non_idempotent = true,
+            retries = 5,
+            readtimeout = 120), api_kwargs::NamedTuple = (;
+            tool_choice = nothing),
+        strict::Union{Nothing, Bool} = nothing,
+        json_mode::Union{Nothing, Bool} = nothing,
+        kwargs...)
+    ##
+    global MODEL_ALIASES
+    ## Function calling specifics // get the tool map (signatures)
+    ## Set strict mode on for JSON mode as Structured outputs
+    strict_ = json_mode == true ? true : strict
+    tool_map = tool_call_signature(tools; strict = strict_)
+    tools = render(prompt_schema, tool_map; json_mode)
+    ## force our function to be used
+    tool_choice_ = get(api_kwargs, :tool_choice, nothing)
+    tool_choice = if tool_choice_ == "exact"
+        ## Standard for OpenAI API
+        Dict(:type => "function",
+            :function => Dict(:name => only(tools)[:function][:name]))
+    elseif isnothing(tool_choice_)
+        "auto"
+    else
+        # User provided value, eg, "auto", "any" for various providers like Mistral, Together, etc.
+        tool_choice_
+    end
+
+    ## Build the API kwargs
+    api_kwargs = if json_mode == true
+        @assert length(tools)==1 "Only 1 tool definition is allowed in JSON mode."
+        (; [k => v for (k, v) in pairs(api_kwargs) if k != :tool_choice]...,
+            response_format = (;
+                type = "json_schema", json_schema = only(tools)[:function]))
+    elseif isempty(tools)
+        api_kwargs
+    else
+        merge(api_kwargs, (; tools, tool_choice))
+    end
+
+    ## Find the unique ID for the model alias provided
+    model_id = get(MODEL_ALIASES, model, model)
+    ## Render the conversation history from messages
+    conv_rendered = render(
+        prompt_schema, prompt; conversation, no_system_message, name_user, kwargs...)
+
+    if !dry_run
+        time = @elapsed r = create_chat(prompt_schema, api_key,
+            model_id,
+            conv_rendered;
+            http_kwargs,
+            api_kwargs...)
+        ## Process one of more samples returned
+        has_many_samples = length(r.response[:choices]) > 1
+        run_id = Int(rand(Int32)) # remember one run ID
+        ## extract all message
+        msg = [response_to_message(prompt_schema, AIToolRequest, choice, r;
+                   tool_map = tool_map, time, model_id, run_id, json_mode,
+                   sample_id = has_many_samples ? i : nothing, name_assistant)
+               for (i, choice) in enumerate(r.response[:choices])]
+        ## Order by log probability if available
+        ## bigger is better, keep it last
+        msg = if has_many_samples && all(x -> !isnothing(x.log_prob), msg)
+            sort(msg, by = x -> x.log_prob)
+        elseif has_many_samples
+            msg
+        else
+            only(msg)
+        end
+        ## Reporting
+        verbose && @info _report_stats(msg, model_id)
+    else
+        msg = nothing
+    end
+    ## Select what to return
+    output = finalize_outputs(prompt,
+        conv_rendered,
+        msg;
+        conversation,
+        return_all,
+        no_system_message,
+        dry_run,
+        kwargs...)
+
+    return output
+end
\ No newline at end of file
diff --git a/src/llm_openai_schema_defs.jl b/src/llm_openai_schema_defs.jl
new file mode 100644
index 000000000..5fbae52c3
--- /dev/null
+++ b/src/llm_openai_schema_defs.jl
@@ -0,0 +1,444 @@
+## OpenAI.jl back-end
+#
+# This file defines overloads for the OpenAI.jl package to allow for
+# custom PromptSchemas routing to various OpenAI-compatible APIs
+#
+## Types
+# "Providers" are a way to use other APIs that are compatible with OpenAI API specs, eg, Azure and mamy more
+# Define our sub-type to distinguish it from other OpenAI.jl providers
+abstract type AbstractCustomProvider <: OpenAI.AbstractOpenAIProvider end
+Base.@kwdef struct CustomProvider <: AbstractCustomProvider
+    api_key::String = ""
+    base_url::String = "http://localhost:8080"
+    api_version::String = ""
+end
+function OpenAI.build_url(provider::AbstractCustomProvider, api::AbstractString)
+    string(provider.base_url, "/", api)
+end
+function OpenAI.auth_header(provider::AbstractCustomProvider, api_key::AbstractString)
+    OpenAI.auth_header(
+        OpenAI.OpenAIProvider(provider.api_key,
+            provider.base_url,
+            provider.api_version),
+        api_key)
+end
+## Extend OpenAI create_chat to allow for testing/debugging
+# Default passthrough
+function OpenAI.create_chat(schema::AbstractOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        http_kwargs::NamedTuple = NamedTuple(),
+        streamcallback::Any = nothing,
+        kwargs...)
+    if !isnothing(streamcallback)
+        ## Take over from OpenAI.jl
+        url = OpenAI.build_url(OpenAI.DEFAULT_PROVIDER, "chat/completions")
+        headers = OpenAI.auth_header(OpenAI.DEFAULT_PROVIDER, api_key)
+        streamcallback, new_kwargs = configure_callback!(
+            streamcallback, schema; kwargs...)
+        input = OpenAI.build_params((; messages = conversation, model, new_kwargs...))
+        ## Use the streaming callback
+        resp = streamed_request!(streamcallback, url, headers, input; http_kwargs...)
+        OpenAI.OpenAIResponse(resp.status, JSON3.read(resp.body))
+    else
+        ## Use OpenAI.jl default
+        OpenAI.create_chat(api_key, model, conversation; http_kwargs, kwargs...)
+    end
+end
+
+# Overload for testing/debugging
+function OpenAI.create_chat(schema::TestEchoOpenAISchema, api_key::AbstractString,
+        model::AbstractString,
+        conversation; kwargs...)
+    schema.model_id = model
+    schema.inputs = conversation
+    return schema
+end
+
+"""
+    OpenAI.create_chat(schema::CustomOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        http_kwargs::NamedTuple = NamedTuple(),
+        streamcallback::Any = nothing,
+        url::String = "http://localhost:8080",
+        kwargs...)
+
+Dispatch to the OpenAI.create_chat function, for any OpenAI-compatible API. 
+
+It expects `url` keyword argument. Provide it to the `aigenerate` function via `api_kwargs=(; url="my-url")`
+
+It will forward your query to the "chat/completions" endpoint of the base URL that you provided (=`url`).
+"""
+function OpenAI.create_chat(schema::CustomOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        http_kwargs::NamedTuple = NamedTuple(),
+        streamcallback::Any = nothing,
+        url::String = "http://localhost:8080",
+        kwargs...)
+    # Build the corresponding provider object
+    # Create chat will automatically pass our data to endpoint `/chat/completions`
+    provider = CustomProvider(; api_key, base_url = url)
+    if !isnothing(streamcallback)
+        ## Take over from OpenAI.jl
+        url = OpenAI.build_url(provider, "chat/completions")
+        headers = OpenAI.auth_header(provider, api_key)
+        streamcallback, new_kwargs = configure_callback!(
+            streamcallback, schema; kwargs...)
+        input = OpenAI.build_params((; messages = conversation, model, new_kwargs...))
+        ## Use the streaming callback
+        resp = streamed_request!(streamcallback, url, headers, input; http_kwargs...)
+        OpenAI.OpenAIResponse(resp.status, JSON3.read(resp.body))
+    else
+        ## Use OpenAI.jl default
+        OpenAI.create_chat(provider, model, conversation; http_kwargs, kwargs...)
+    end
+end
+
+"""
+    OpenAI.create_chat(schema::LocalServerOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = "http://localhost:8080",
+        kwargs...)
+
+Dispatch to the OpenAI.create_chat function, but with the LocalServer API parameters, ie, defaults to `url` specified by the `LOCAL_SERVER` preference. See `?PREFERENCES`
+
+"""
+function OpenAI.create_chat(schema::LocalServerOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = LOCAL_SERVER,
+        kwargs...)
+    OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...)
+end
+
+"""
+    OpenAI.create_chat(schema::MistralOpenAISchema,
+  api_key::AbstractString,
+  model::AbstractString,
+  conversation;
+  url::String="https://api.mistral.ai/v1",
+  kwargs...)
+
+Dispatch to the OpenAI.create_chat function, but with the MistralAI API parameters. 
+
+It tries to access the `MISTRALAI_API_KEY` ENV variable, but you can also provide it via the `api_key` keyword argument.
+"""
+function OpenAI.create_chat(schema::MistralOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = "https://api.mistral.ai/v1",
+        kwargs...)
+    # try to override provided api_key because the default is OpenAI key
+    api_key = isempty(MISTRALAI_API_KEY) ? api_key : MISTRALAI_API_KEY
+    OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...)
+end
+function OpenAI.create_chat(schema::FireworksOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = "https://api.fireworks.ai/inference/v1",
+        kwargs...)
+    # try to override provided api_key because the default is OpenAI key
+    api_key = isempty(FIREWORKS_API_KEY) ? api_key : FIREWORKS_API_KEY
+    OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...)
+end
+function OpenAI.create_chat(schema::TogetherOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = "https://api.together.xyz/v1",
+        kwargs...)
+    api_key = isempty(TOGETHER_API_KEY) ? api_key : TOGETHER_API_KEY
+    OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...)
+end
+function OpenAI.create_chat(schema::GroqOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = "https://api.groq.com/openai/v1",
+        kwargs...)
+    api_key = isempty(GROQ_API_KEY) ? api_key : GROQ_API_KEY
+    OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...)
+end
+function OpenAI.create_chat(schema::DeepSeekOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = "https://api.deepseek.com/v1",
+        kwargs...)
+    api_key = isempty(DEEPSEEK_API_KEY) ? api_key : DEEPSEEK_API_KEY
+    OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...)
+end
+function OpenAI.create_chat(schema::OpenRouterOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = "https://openrouter.ai/api/v1",
+        kwargs...)
+    api_key = isempty(OPENROUTER_API_KEY) ? api_key : OPENROUTER_API_KEY
+    OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...)
+end
+function OpenAI.create_chat(schema::CerebrasOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = "https://api.cerebras.ai/v1",
+        kwargs...)
+    api_key = isempty(CEREBRAS_API_KEY) ? api_key : CEREBRAS_API_KEY
+    OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...)
+end
+function OpenAI.create_chat(schema::SambaNovaOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = "https://api.sambanova.ai/v1",
+        kwargs...)
+    api_key = isempty(SAMBANOVA_API_KEY) ? api_key : SAMBANOVA_API_KEY
+    OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...)
+end
+function OpenAI.create_chat(schema::XAIOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        url::String = "https://api.x.ai/v1",
+        kwargs...)
+    api_key = isempty(XAI_API_KEY) ? api_key : XAI_API_KEY
+    OpenAI.create_chat(CustomOpenAISchema(), api_key, model, conversation; url, kwargs...)
+end
+function OpenAI.create_chat(schema::DatabricksOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        http_kwargs::NamedTuple = NamedTuple(),
+        streamcallback::Any = nothing,
+        url::String = "https://<workspace_host>.databricks.com",
+        kwargs...)
+    # Build the corresponding provider object
+    provider = CustomProvider(;
+        api_key = isempty(DATABRICKS_API_KEY) ? api_key : DATABRICKS_API_KEY,
+        base_url = isempty(DATABRICKS_HOST) ? url : DATABRICKS_HOST)
+    if !isnothing(streamcallback)
+        throw(ArgumentError("Streaming is not supported for Databricks models yet!"))
+        ## Take over from OpenAI.jl
+        # url = OpenAI.build_url(provider, "serving-endpoints/$model/invocations")
+        # headers = OpenAI.auth_header(provider, api_key)
+        # streamcallback, new_kwargs = configure_callback!(
+        #     streamcallback, schema; kwargs...)
+        # input = OpenAI.build_params((; messages = conversation, model, new_kwargs...))
+        # ## Use the streaming callback
+        # resp = streamed_request!(streamcallback, url, headers, input; http_kwargs...)
+        # OpenAI.OpenAIResponse(resp.status, JSON3.read(resp.body))
+    else
+        # Override standard OpenAI request endpoint
+        OpenAI.openai_request("serving-endpoints/$model/invocations",
+            provider;
+            method = "POST",
+            model,
+            messages = conversation,
+            http_kwargs,
+            kwargs...)
+    end
+end
+function OpenAI.create_chat(schema::AzureOpenAISchema,
+        api_key::AbstractString,
+        model::AbstractString,
+        conversation;
+        api_version::String = "2023-03-15-preview",
+        http_kwargs::NamedTuple = NamedTuple(),
+        streamcallback::Any = nothing,
+        url::String = "https://<resource-name>.openai.azure.com",
+        kwargs...)
+
+    # Build the corresponding provider object
+    provider = OpenAI.AzureProvider(;
+        api_key = isempty(AZURE_OPENAI_API_KEY) ? api_key : AZURE_OPENAI_API_KEY,
+        base_url = (isempty(AZURE_OPENAI_HOST) ? url : AZURE_OPENAI_HOST) *
+                   "/openai/deployments/$model",
+        api_version = api_version
+    )
+    # Override standard OpenAI request endpoint
+    OpenAI.openai_request(
+        "chat/completions",
+        provider;
+        method = "POST",
+        http_kwargs = http_kwargs,
+        messages = conversation,
+        query = Dict("api-version" => provider.api_version),
+        streamcallback = streamcallback,
+        kwargs...
+    )
+end
+
+# Extend OpenAI create_embeddings to allow for testing
+function OpenAI.create_embeddings(schema::AbstractOpenAISchema,
+        api_key::AbstractString,
+        docs,
+        model::AbstractString;
+        kwargs...)
+    OpenAI.create_embeddings(api_key, docs, model; kwargs...)
+end
+function OpenAI.create_embeddings(schema::TestEchoOpenAISchema, api_key::AbstractString,
+        docs,
+        model::AbstractString; kwargs...)
+    schema.model_id = model
+    schema.inputs = docs
+    return schema
+end
+function OpenAI.create_embeddings(schema::CustomOpenAISchema,
+        api_key::AbstractString,
+        docs,
+        model::AbstractString;
+        url::String = "http://localhost:8080",
+        kwargs...)
+    # Build the corresponding provider object
+    # Create chat will automatically pass our data to endpoint `/embeddings`
+    provider = CustomProvider(; api_key, base_url = url)
+    OpenAI.create_embeddings(provider, docs, model; kwargs...)
+end
+# Set url and just forward to CustomOpenAISchema otherwise
+# Note: Llama.cpp and hence Llama.jl DO NOT support the embeddings endpoint !! (they use `/embedding`)
+function OpenAI.create_embeddings(schema::LocalServerOpenAISchema,
+        api_key::AbstractString,
+        docs,
+        model::AbstractString;
+        ## Strip the "v1" from the end of the url
+        url::String = LOCAL_SERVER,
+        kwargs...)
+    OpenAI.create_embeddings(CustomOpenAISchema(),
+        api_key,
+        docs,
+        model;
+        url,
+        kwargs...)
+end
+function OpenAI.create_embeddings(schema::MistralOpenAISchema,
+        api_key::AbstractString,
+        docs,
+        model::AbstractString;
+        url::String = "https://api.mistral.ai/v1",
+        kwargs...)
+    # Build the corresponding provider object
+    # try to override provided api_key because the default is OpenAI key
+    provider = CustomProvider(;
+        api_key = isempty(MISTRALAI_API_KEY) ? api_key : MISTRALAI_API_KEY,
+        base_url = url)
+    OpenAI.create_embeddings(provider, docs, model; kwargs...)
+end
+function OpenAI.create_embeddings(schema::DatabricksOpenAISchema,
+        api_key::AbstractString,
+        docs,
+        model::AbstractString;
+        url::String = "https://<workspace_host>.databricks.com",
+        kwargs...)
+    # Build the corresponding provider object
+    provider = CustomProvider(;
+        api_key = isempty(DATABRICKS_API_KEY) ? api_key : DATABRICKS_API_KEY,
+        base_url = isempty(DATABRICKS_HOST) ? url : DATABRICKS_HOST)
+    # Override standard OpenAI request endpoint
+    OpenAI.openai_request("serving-endpoints/$model/invocations",
+        provider;
+        method = "POST",
+        model,
+        input = docs,
+        kwargs...)
+end
+function OpenAI.create_embeddings(schema::TogetherOpenAISchema,
+        api_key::AbstractString,
+        docs,
+        model::AbstractString;
+        url::String = "https://api.together.xyz/v1",
+        kwargs...)
+    provider = CustomProvider(;
+        api_key = isempty(TOGETHER_API_KEY) ? api_key : TOGETHER_API_KEY,
+        base_url = url)
+    OpenAI.create_embeddings(provider, docs, model; kwargs...)
+end
+function OpenAI.create_embeddings(schema::FireworksOpenAISchema,
+        api_key::AbstractString,
+        docs,
+        model::AbstractString;
+        url::String = "https://api.fireworks.ai/inference/v1",
+        kwargs...)
+    provider = CustomProvider(;
+        api_key = isempty(FIREWORKS_API_KEY) ? api_key : FIREWORKS_API_KEY,
+        base_url = url)
+    OpenAI.create_embeddings(provider, docs, model; kwargs...)
+end
+function OpenAI.create_embeddings(schema::XAIOpenAISchema,
+        api_key::AbstractString,
+        docs,
+        model::AbstractString;
+        url::String = "https://api.x.ai/v1",
+        kwargs...)
+    provider = CustomProvider(;
+        api_key = isempty(XAI_API_KEY) ? api_key : XAI_API_KEY,
+        base_url = url)
+    OpenAI.create_embeddings(provider, docs, model; kwargs...)
+end
+function OpenAI.create_embeddings(schema::AzureOpenAISchema,
+        api_key::AbstractString,
+        docs,
+        model::AbstractString;
+        api_version::String = "2023-03-15-preview",
+        url::String = "https://<resource-name>.openai.azure.com",
+        kwargs...)
+
+    # Build the corresponding provider object
+    provider = OpenAI.AzureProvider(;
+        api_key = isempty(AZURE_OPENAI_API_KEY) ? api_key : AZURE_OPENAI_API_KEY,
+        base_url = (isempty(AZURE_OPENAI_HOST) ? url : AZURE_OPENAI_HOST) *
+                   "/openai/deployments/$model",
+        api_version = api_version)
+    # Override standard OpenAI request endpoint
+    OpenAI.openai_request(
+        "embeddings",
+        provider;
+        method = "POST",
+        input = docs,
+        query = Dict("api-version" => provider.api_version),
+        kwargs...
+    )
+end
+
+## Temporary fix -- it will be moved upstream
+function OpenAI.create_embeddings(provider::AbstractCustomProvider,
+        input,
+        model_id::String = OpenAI.DEFAULT_EMBEDDING_MODEL_ID;
+        http_kwargs::NamedTuple = NamedTuple(),
+        kwargs...)
+    return OpenAI.openai_request("embeddings",
+        provider;
+        method = "POST",
+        http_kwargs = http_kwargs,
+        model = model_id,
+        input,
+        kwargs...)
+end
+
+## Wrap create_images for testing and routing
+## Note: Careful, API is non-standard compared to other OAI functions
+function OpenAI.create_images(schema::AbstractOpenAISchema,
+        api_key::AbstractString,
+        prompt,
+        args...;
+        kwargs...)
+    OpenAI.create_images(api_key, prompt, args...; kwargs...)
+end
+function OpenAI.create_images(schema::TestEchoOpenAISchema,
+        api_key::AbstractString,
+        prompt,
+        args...;
+        kwargs...)
+    schema.model_id = get(kwargs, :model, "")
+    schema.inputs = prompt
+    return schema
+end
\ No newline at end of file
diff --git a/src/llm_shared.jl b/src/llm_shared.jl
index b84d73371..fcb6e5702 100644
--- a/src/llm_shared.jl
+++ b/src/llm_shared.jl
@@ -1,8 +1,18 @@
 # Reusable functionality across different schemas
+function role4render(schema::AbstractPromptSchema, msg::AbstractMessage)
+    throw(ArgumentError("Function `role4render` is not implemented for the provided schema ($(typeof(schema))) and $(typeof(msg))."))
+end
+role4render(schema::AbstractPromptSchema, msg::SystemMessage) = "system"
+role4render(schema::AbstractPromptSchema, msg::UserMessage) = "user"
+role4render(schema::AbstractPromptSchema, msg::UserMessageWithImages) = "user"
+role4render(schema::AbstractPromptSchema, msg::AIMessage) = "assistant"
+role4render(schema::AbstractPromptSchema, msg::AIToolRequest) = "assistant"
+role4render(schema::AbstractPromptSchema, msg::ToolMessage) = "tool"
 """
     render(schema::NoSchema,
         messages::Vector{<:AbstractMessage};
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
         replacement_kwargs...)
 
 Renders a conversation history from a vector of messages with all replacement variables specified in `replacement_kwargs`.
@@ -12,6 +22,7 @@ It is the first pass of the prompt rendering system, and is used by all other sc
 # Keyword Arguments
 - `image_detail`: Only for `UserMessageWithImages`. It represents the level of detail to include for images. Can be `"auto"`, `"high"`, or `"low"`.
 - `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector.
+- `no_system_message`: If `true`, do not include the default system message in the conversation history OR convert any provided system message to a user message.
 
 # Notes
 - All unspecified kwargs are passed as replacements such that `{{key}}=>value` in the template.
@@ -21,6 +32,7 @@ It is the first pass of the prompt rendering system, and is used by all other sc
 function render(schema::NoSchema,
         messages::Vector{<:AbstractMessage};
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
         replacement_kwargs...)
     ## copy the conversation to avoid mutating the original
     conversation = copy(conversation)
@@ -29,26 +41,38 @@ function render(schema::NoSchema,
 
     # replace any handlebar variables in the messages
     for msg in messages
-        if msg isa Union{SystemMessage, UserMessage, UserMessageWithImages}
+        if issystemmessage(msg) || isusermessage(msg) || isusermessagewithimages(msg)
             replacements = ["{{$(key)}}" => value
                             for (key, value) in pairs(replacement_kwargs)
-                                if key in msg.variables]
+                            if key in msg.variables]
+            ## Force System message to UserMessage if no_system_message=true
+            ## TODO: fix to support TracerMessage -- it would temporarily drop the tracing
+            MSGTYPE = no_system_message && issystemmessage(msg) ? UserMessage : typeof(msg)
             # Rebuild the message with the replaced content
-            MSGTYPE = typeof(msg)
-            new_msg = MSGTYPE(;
-                # unpack the type to replace only the content field
-                [(field, getfield(msg, field)) for field in fieldnames(typeof(msg))]...,
-                content = replace(msg.content, replacements...))
-            if msg isa SystemMessage
+            new_msg = if istracermessage(msg)
+                ## No updating if it's already traced (=past message)
+                msg
+            else
+                MSGTYPE(;
+                    # unpack the type to replace only the content field
+                    [(field, getfield(msg, field)) for field in fieldnames(typeof(msg))]...,
+                    content = replace(msg.content, replacements...))
+            end
+            if issystemmessage(msg)
                 count_system_msg += 1
                 # move to the front
                 pushfirst!(conversation, new_msg)
             else
                 push!(conversation, new_msg)
             end
-        elseif msg isa AIMessage
+        elseif isaimessage(msg) || isaitoolrequest(msg) || istoolmessage(msg)
             # no replacements
             push!(conversation, msg)
+        elseif istracermessage(msg) && issystemmessage(msg.object)
+            # Look for tracers
+            count_system_msg += 1
+            # move to the front
+            pushfirst!(conversation, msg)
         else
             # Note: Ignores any DataMessage or other types for the prompt/conversation history
             @warn "Unexpected message type: $(typeof(msg)). Skipping."
@@ -57,18 +81,38 @@ function render(schema::NoSchema,
     ## Multiple system prompts are not allowed
     (count_system_msg > 1) && throw(ArgumentError("Only one system message is allowed."))
     ## Add default system prompt if not provided
-    (count_system_msg == 0) && pushfirst!(conversation,
-        SystemMessage("Act as a helpful AI assistant"))
+    if (count_system_msg == 0) && !no_system_message
+        pushfirst!(conversation,
+            SystemMessage("Act as a helpful AI assistant"))
+    end
 
     return conversation
 end
 
+function render(schema::AbstractPromptSchema,
+        tools::AbstractVector{<:AbstractTool};
+        kwargs...)
+    throw(ArgumentError("Function `render` is not implemented for the provided schema ($(typeof(schema))) and $(typeof(tools))."))
+end
+function render(schema::AbstractPromptSchema,
+        tools::AbstractDict{String, <:AbstractTool};
+        kwargs...)
+    render(schema, collect(values(tools)); kwargs...)
+end
+# For ToolRef
+function render(schema::AbstractPromptSchema,
+        tool::AbstractTool;
+        kwargs...)
+    throw(ArgumentError("Function `render` is not implemented for the provided schema ($(typeof(schema))) and $(typeof(tool))."))
+end
+
 """
     finalize_outputs(prompt::ALLOWED_PROMPT_TYPE, conv_rendered::Any,
         msg::Union{Nothing, AbstractMessage, AbstractVector{<:AbstractMessage}};
         return_all::Bool = false,
         dry_run::Bool = false,
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
         kwargs...)
 
 Finalizes the outputs of the ai* functions by either returning the conversation history or the last message.
@@ -79,19 +123,21 @@ Finalizes the outputs of the ai* functions by either returning the conversation
   Useful for debugging when you want to check the specific schema rendering. 
 - `conversation::AbstractVector{<:AbstractMessage}=[]`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector.
 - `kwargs...`: Variables to replace in the prompt template.
+- `no_system_message::Bool=false`: If true, the default system message is not included in the conversation history. Any existing system message is converted to a `UserMessage`.
 """
 function finalize_outputs(prompt::ALLOWED_PROMPT_TYPE, conv_rendered::Any,
         msg::Union{Nothing, AbstractMessage, AbstractVector{<:AbstractMessage}};
         return_all::Bool = false,
         dry_run::Bool = false,
         conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
+        no_system_message::Bool = false,
         kwargs...)
     if return_all
         if !dry_run
             # If not a dry_run, re-create the messages sent to the model before schema application
             # This is a duplication of work, as we already have the rendered messages in conv_rendered,
             # but we prioritize the user's experience over performance here (ie, render(OpenAISchema,msgs) does everything under the hood)
-            output = render(NoSchema(), prompt; conversation, kwargs...)
+            output = render(NoSchema(), prompt; conversation, no_system_message, kwargs...)
             if msg isa AbstractVector
                 ## handle multiple messages (multi-sample)
                 append!(output, msg)
diff --git a/src/llm_sharegpt.jl b/src/llm_sharegpt.jl
new file mode 100644
index 000000000..24ed65733
--- /dev/null
+++ b/src/llm_sharegpt.jl
@@ -0,0 +1,43 @@
+### RENDERING
+role4render(::AbstractShareGPTSchema, ::AIMessage) = "gpt"
+role4render(::AbstractShareGPTSchema, ::UserMessage) = "human"
+role4render(::AbstractShareGPTSchema, ::SystemMessage) = "system"
+function role4render(::AbstractShareGPTSchema, ::UserMessageWithImages)
+    throw(ArgumentError("UserMessageWithImages is not supported in ShareGPT schema"))
+end
+
+function render(schema::AbstractShareGPTSchema, conv::AbstractVector{<:AbstractMessage})
+    Dict("conversations" => [Dict("from" => role4render(schema, msg),
+                                 "value" => msg.content)
+                             for msg in conv])
+end
+
+### AI Functions
+function aigenerate(prompt_schema::AbstractShareGPTSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("ShareGPT schema does not support aigenerate. Please use OpenAISchema instead.")
+end
+function aiembed(prompt_schema::AbstractShareGPTSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("ShareGPT schema does not support aiembed. Please use OpenAISchema instead.")
+end
+function aiclassify(prompt_schema::AbstractShareGPTSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("ShareGPT schema does not support aiclassify. Please use OpenAISchema instead.")
+end
+function aiextract(prompt_schema::AbstractShareGPTSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("ShareGPT schema does not support aiextract. Please use OpenAISchema instead.")
+end
+function aitools(prompt_schema::AbstractShareGPTSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("ShareGPT schema does not support aitools. Please use OpenAISchema instead.")
+end
+function aiscan(prompt_schema::AbstractShareGPTSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("ShareGPT schema does not support aiscan. Please use OpenAISchema instead.")
+end
+function aiimage(prompt_schema::AbstractShareGPTSchema, prompt::ALLOWED_PROMPT_TYPE;
+        kwargs...)
+    error("ShareGPT schema does not support aiimage. Please use OpenAISchema instead.")
+end
diff --git a/src/llm_tracer.jl b/src/llm_tracer.jl
new file mode 100644
index 000000000..352dd7de0
--- /dev/null
+++ b/src/llm_tracer.jl
@@ -0,0 +1,336 @@
+# Tracing infrastructure for logging and other callbacks
+# - Define your own schema that is subtype of AbstractTracerSchema and wraps the underlying LLM provider schema
+# - Customize initialize_tracer and finalize_tracer with your custom callback
+# - Call your ai* function with the tracer schema as usual
+
+# Simple passthrough, do nothing
+function role4render(schema::AbstractTracerSchema, msg::SystemMessage)
+    role4render(schema.schema, msg)
+end
+function role4render(schema::AbstractTracerSchema, msg::UserMessage)
+    role4render(schema.schema, msg)
+end
+function role4render(schema::AbstractTracerSchema, msg::UserMessageWithImages)
+    role4render(schema.schema, msg)
+end
+function role4render(schema::AbstractTracerSchema, msg::AIMessage)
+    role4render(schema.schema, msg)
+end
+"""
+    render(tracer_schema::AbstractTracerSchema,
+        conv::AbstractVector{<:AbstractMessage}; kwargs...)
+
+Passthrough. No changes.
+"""
+function render(tracer_schema::AbstractTracerSchema,
+        conv::AbstractVector{<:AbstractMessage}; kwargs...)
+    return conv
+end
+
+"""
+    initialize_tracer(
+        tracer_schema::AbstractTracerSchema; model = "", tracer_kwargs = NamedTuple(),
+        prompt::ALLOWED_PROMPT_TYPE = "", kwargs...)
+
+Initializes `tracer`/callback (if necessary). Can provide any keyword arguments in `tracer_kwargs` (eg, `parent_id`, `thread_id`, `run_id`).
+Is executed prior to the `ai*` calls.
+
+By default it captures:
+- `time_sent`: the time the request was sent
+- `model`: the model to use
+- `meta`: a dictionary of additional metadata that is not part of the tracer itself
+    - `template_name`: the template to use if any
+    - `template_version`: the template version to use if any
+    - expanded `api_kwargs`, ie, the keyword arguments to pass to the API call
+
+In the default implementation, we just collect the necessary data to build the tracer object in `finalize_tracer`.
+
+See also: `meta`, `unwrap`, `TracerSchema`, `SaverSchema`, `finalize_tracer`
+"""
+function initialize_tracer(
+        tracer_schema::AbstractTracerSchema; model = "", tracer_kwargs = NamedTuple(),
+        prompt::ALLOWED_PROMPT_TYPE = "", api_kwargs::NamedTuple = NamedTuple(),
+        kwargs...)
+    meta = Dict{Symbol, Any}(k => v for (k, v) in pairs(api_kwargs))
+    if haskey(tracer_kwargs, :meta)
+        ## merge with the provided metadata
+        meta = merge(meta, tracer_kwargs.meta)
+    end
+    if haskey(kwargs, :_tracer_template)
+        tpl = get(kwargs, :_tracer_template, nothing)
+        meta[:template_name] = tpl.name
+        metadata = aitemplates(tpl.name)
+        if !isempty(metadata)
+            meta[:template_version] = metadata[1].version
+        end
+    end
+    ## provide meta as last to make sure it's not overwriten by kwargs
+    return (; time_sent = now(), model,
+        tracer_kwargs..., meta)
+end
+
+function finalize_tracer(
+        tracer_schema::AbstractTracerSchema, tracer, msg_or_conv;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+    # default is a passthrough
+    return msg_or_conv
+end
+"""
+    finalize_tracer(
+        tracer_schema::AbstractTracerSchema, tracer, msg_or_conv::Union{
+            AbstractMessage, AbstractVector{<:AbstractMessage}};
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+
+Finalizes the calltracer of whatever is nedeed after the `ai*` calls. Use `tracer_kwargs` to provide any information necessary (eg, `parent_id`, `thread_id`, `run_id`).
+
+In the default implementation, we convert all non-tracer messages into `TracerMessage`.
+
+See also: `meta`, `unwrap`, `SaverSchema`, `initialize_tracer`
+"""
+function finalize_tracer(
+        tracer_schema::AbstractTracerSchema, tracer, msg_or_conv::Union{
+            AbstractMessage, AbstractVector{<:AbstractMessage}};
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+    # We already captured all kwargs, they are already in `tracer`, we can ignore them in this implementation
+    time_received = now()
+    # work with arrays for unified processing
+    is_vector = msg_or_conv isa AbstractVector
+    conv = msg_or_conv isa AbstractVector{<:AbstractMessage} ?
+           convert(Vector{AbstractMessage}, msg_or_conv) :
+           AbstractMessage[msg_or_conv]
+    # extract the relevant properties from the tracer
+    tracer_subset = [f => get(tracer, f, nothing)
+                     for f in fieldnames(TracerMessage) if haskey(tracer, f)]
+    # all msg non-traced, set times
+    for i in eachindex(conv)
+        msg = conv[i]
+        # change into TracerMessage if not already, use the current kwargs
+        if !istracermessage(msg)
+            # we saved our data for `tracer`
+            conv[i] = TracerMessage(; object = msg, tracer_subset..., time_received)
+        end
+    end
+    return is_vector ? conv : first(conv)
+end
+
+## Specialized finalizer to save the response to the disk
+"""
+    finalize_tracer(
+        tracer_schema::SaverSchema, tracer, msg_or_conv::Union{
+            AbstractMessage, AbstractVector{<:AbstractMessage}};
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+
+Finalizes the calltracer by saving the provided conversation `msg_or_conv` to the disk.
+
+Default path is `LOG_DIR/conversation__<first_msg_hash>__<time_received_str>.json`, 
+ where `LOG_DIR` is set by user preferences or ENV variable (defaults to `log/` in current working directory).
+
+If you want to change the logging directory or the exact file name to log with, you can provide the following arguments to `tracer_kwargs`:
+- `log_dir` - used as the directory to save the log into when provided. Defaults to `LOG_DIR` if not provided.
+- `log_file_path` - used as the file name to save the log into when provided. This value overrules the `log_dir` and `LOG_DIR` if provided.
+
+It can be composed with `TracerSchema` to also attach necessary metadata (see below).
+
+# Example
+```julia
+wrap_schema = PT.SaverSchema(PT.TracerSchema(PT.OpenAISchema()))
+conv = aigenerate(wrap_schema,:BlankSystemUser; system="You're a French-speaking assistant!",
+    user="Say hi!"; model="gpt-4", api_kwargs=(;temperature=0.1), return_all=true)
+
+# conv is a vector of messages that will be saved to a JSON together with metadata about the template and api_kwargs
+```
+
+See also: `meta`, `unwrap`, `TracerSchema`, `initialize_tracer`
+"""
+function finalize_tracer(
+        tracer_schema::SaverSchema, tracer, msg_or_conv::Union{
+            AbstractMessage, AbstractVector{<:AbstractMessage}};
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+    # We already captured all kwargs, they are already in `tracer`, we can ignore them in this implementation
+    time_received = now()
+    # work with arrays for unified processing
+    is_vector = msg_or_conv isa AbstractVector
+    conv = msg_or_conv isa AbstractVector{<:AbstractMessage} ?
+           convert(Vector{AbstractMessage}, msg_or_conv) :
+           AbstractMessage[msg_or_conv]
+
+    # Log the conversation to disk, 
+    log_dir = get(tracer, :log_dir, LOG_DIR)
+    path = if haskey(tracer, :log_file_path)
+        ## take the provided log file path
+        tracer.log_file_path
+    else
+        ## save by hash of the first convo message + timestamp
+        first_msg_hash = hash(first(conv).content)
+        time_received_str = Dates.format(
+            time_received, dateformat"YYYYmmdd_HHMMSS")
+        path = joinpath(
+            log_dir,
+            "conversation__$(first_msg_hash)__$(time_received_str).json")
+    end
+    mkpath(dirname(path))
+    save_conversation(path, conv)
+    return is_vector ? conv : first(conv)
+end
+
+"""
+    aigenerate(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", return_all::Bool = false, kwargs...)
+
+Wraps the normal `aigenerate` call in a tracing/callback system. Use `tracer_kwargs` to provide any information necessary to the tracer/callback system only (eg, `parent_id`, `thread_id`, `run_id`).
+
+Logic:
+- calls `initialize_tracer`
+- calls `aigenerate` (with the `tracer_schema.schema`)
+- calls `finalize_tracer`
+
+# Example
+```julia
+wrap_schema = PT.TracerSchema(PT.OpenAISchema())
+msg = aigenerate(wrap_schema, "Say hi!"; model = "gpt4t")
+msg isa TracerMessage # true
+msg.content # access content like if it was the message
+PT.pprint(msg) # pretty-print the message
+```
+
+It works on a vector of messages and converts only the non-tracer ones, eg,
+```julia
+wrap_schema = PT.TracerSchema(PT.OpenAISchema())
+conv = aigenerate(wrap_schema, "Say hi!"; model = "gpt4t", return_all = true)
+all(PT.istracermessage, conv) #true
+```
+"""
+function aigenerate(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", return_all::Bool = false, kwargs...)
+    tracer = initialize_tracer(tracer_schema; model, tracer_kwargs, prompt, kwargs...)
+    # Force to return all convo and then subset as necessary
+    merged_kwargs = isempty(model) ? kwargs : (; model, kwargs...) # to not override default model for each schema if not provided
+    msg_or_conv = aigenerate(
+        tracer_schema.schema, prompt; tracer_kwargs, return_all = true, merged_kwargs...)
+    output = finalize_tracer(
+        tracer_schema, tracer, msg_or_conv; model, tracer_kwargs, kwargs...)
+    return return_all ? output : last(output)
+end
+
+"""
+    aiembed(tracer_schema::AbstractTracerSchema,
+        doc_or_docs::Union{AbstractString, AbstractVector{<:AbstractString}}, postprocess::Function = identity;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+
+Wraps the normal `aiembed` call in a tracing/callback system. Use `tracer_kwargs` to provide any information necessary to the tracer/callback system only (eg, `parent_id`, `thread_id`, `run_id`).
+
+Logic:
+- calls `initialize_tracer`
+- calls `aiembed` (with the `tracer_schema.schema`)
+- calls `finalize_tracer`
+"""
+function aiembed(tracer_schema::AbstractTracerSchema,
+        doc_or_docs::Union{AbstractString, AbstractVector{<:AbstractString}}, postprocess::Function = identity;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+    tracer = initialize_tracer(tracer_schema; model, tracer_kwargs..., kwargs...)
+    merged_kwargs = isempty(model) ? kwargs : (; model, kwargs...) # to not override default model for each schema if not provided
+    embed_or_conv = aiembed(
+        tracer_schema.schema, doc_or_docs, postprocess; merged_kwargs...)
+    return finalize_tracer(
+        tracer_schema, tracer, embed_or_conv; model, tracer_kwargs..., kwargs...)
+end
+
+"""
+    aiclassify(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+
+Wraps the normal `aiclassify` call in a tracing/callback system. Use `tracer_kwargs` to provide any information necessary to the tracer/callback system only (eg, `parent_id`, `thread_id`, `run_id`).
+
+Logic:
+- calls `initialize_tracer`
+- calls `aiclassify` (with the `tracer_schema.schema`)
+- calls `finalize_tracer`
+"""
+function aiclassify(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+    tracer = initialize_tracer(tracer_schema; model, prompt, tracer_kwargs..., kwargs...)
+    merged_kwargs = isempty(model) ? kwargs : (; model, kwargs...) # to not override default model for each schema if not provided
+    classify_or_conv = aiclassify(tracer_schema.schema, prompt; merged_kwargs...)
+    return finalize_tracer(
+        tracer_schema, tracer, classify_or_conv; model, tracer_kwargs..., kwargs...)
+end
+
+"""
+    aiextract(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+
+Wraps the normal `aiextract` call in a tracing/callback system. Use `tracer_kwargs` to provide any information necessary to the tracer/callback system only (eg, `parent_id`, `thread_id`, `run_id`).
+
+Logic:
+- calls `initialize_tracer`
+- calls `aiextract` (with the `tracer_schema.schema`)
+- calls `finalize_tracer`
+"""
+function aiextract(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+    tracer = initialize_tracer(tracer_schema; model, prompt, tracer_kwargs..., kwargs...)
+    merged_kwargs = isempty(model) ? kwargs : (; model, kwargs...) # to not override default model for each schema if not provided
+    extract_or_conv = aiextract(tracer_schema.schema, prompt; merged_kwargs...)
+    return finalize_tracer(
+        tracer_schema, tracer, extract_or_conv; model, tracer_kwargs..., kwargs...)
+end
+
+"""
+    aitools(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+
+Wraps the normal `aitools` call in a tracing/callback system. Use `tracer_kwargs` to provide any information necessary to the tracer/callback system only (eg, `parent_id`, `thread_id`, `run_id`).
+
+Logic:
+- calls `initialize_tracer`
+- calls `aiextract` (with the `tracer_schema.schema`)
+- calls `finalize_tracer`
+"""
+function aitools(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+    tracer = initialize_tracer(tracer_schema; model, prompt, tracer_kwargs..., kwargs...)
+    merged_kwargs = isempty(model) ? kwargs : (; model, kwargs...) # to not override default model for each schema if not provided
+    extract_or_conv = aitools(tracer_schema.schema, prompt; merged_kwargs...)
+    return finalize_tracer(
+        tracer_schema, tracer, extract_or_conv; model, tracer_kwargs..., kwargs...)
+end
+
+"""
+    aiscan(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+
+Wraps the normal `aiscan` call in a tracing/callback system. Use `tracer_kwargs` to provide any information necessary to the tracer/callback system only (eg, `parent_id`, `thread_id`, `run_id`).
+
+Logic:
+- calls `initialize_tracer`
+- calls `aiscan` (with the `tracer_schema.schema`)
+- calls `finalize_tracer`
+"""
+function aiscan(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+    tracer = initialize_tracer(tracer_schema; model, prompt, tracer_kwargs..., kwargs...)
+    merged_kwargs = isempty(model) ? kwargs : (; model, kwargs...) # to not override default model for each schema if not provided
+    scan_or_conv = aiscan(tracer_schema.schema, prompt; merged_kwargs...)
+    return finalize_tracer(
+        tracer_schema, tracer, scan_or_conv; model, tracer_kwargs..., kwargs...)
+end
+
+"""
+    aiimage(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+
+Wraps the normal `aiimage` call in a tracing/callback system. Use `tracer_kwargs` to provide any information necessary to the tracer/callback system only (eg, `parent_id`, `thread_id`, `run_id`).
+
+Logic:
+- calls `initialize_tracer`
+- calls `aiimage` (with the `tracer_schema.schema`)
+- calls `finalize_tracer`
+"""
+function aiimage(tracer_schema::AbstractTracerSchema, prompt::ALLOWED_PROMPT_TYPE;
+        tracer_kwargs = NamedTuple(), model = "", kwargs...)
+    tracer = initialize_tracer(tracer_schema; model, prompt, tracer_kwargs..., kwargs...)
+    merged_kwargs = isempty(model) ? kwargs : (; model, kwargs...) # to not override default model for each schema if not provided
+    image_or_conv = aiimage(tracer_schema.schema, prompt; merged_kwargs...)
+    return finalize_tracer(
+        tracer_schema, tracer, image_or_conv; model, tracer_kwargs..., kwargs...)
+end
diff --git a/src/messages.jl b/src/messages.jl
index cda980462..acc6e2a39 100644
--- a/src/messages.jl
+++ b/src/messages.jl
@@ -4,12 +4,15 @@
 abstract type AbstractMessage end
 abstract type AbstractChatMessage <: AbstractMessage end # with text-based content
 abstract type AbstractDataMessage <: AbstractMessage end # with data-based content, eg, embeddings
+abstract type AbstractTracerMessage{T <: AbstractMessage} <: AbstractMessage end # message with annotation that exposes the underlying message
+# Complementary type for tracing, follows the same API as TracerMessage
+abstract type AbstractTracer{T <: Any} end
 
 ## Allowed inputs for ai* functions, AITemplate is resolved one level higher
 const ALLOWED_PROMPT_TYPE = Union{
     AbstractString,
     AbstractMessage,
-    Vector{<:AbstractMessage},
+    Vector{<:AbstractMessage}
 }
 
 # Workaround to be able to add metadata to serialized conversations, templates, etc.
@@ -34,32 +37,61 @@ function SystemMessage(content::T,
     @assert length(not_allowed_kwargs)==0 "Error: Some placeholders are invalid, as they are reserved for `ai*` functions. Change: $(join(not_allowed_kwargs,","))"
     return SystemMessage{T}(content, variables, type)
 end
+
+"""
+    UserMessage
+
+A message type for user-generated text-based responses. 
+Consumed by `ai*` functions to generate responses.
+    
+# Fields
+- `content::T`: The content of the message.
+- `variables::Vector{Symbol}`: The variables in the message.
+- `name::Union{Nothing, String}`: The name of the `role` in the conversation.
+"""
 Base.@kwdef struct UserMessage{T <: AbstractString} <: AbstractChatMessage
     content::T
     variables::Vector{Symbol} = _extract_handlebar_variables(content)
+    name::Union{Nothing, String} = nothing
     _type::Symbol = :usermessage
-    UserMessage{T}(c, v, t) where {T <: AbstractString} = new(c, v, t)
+    UserMessage{T}(c, v, n, t) where {T <: AbstractString} = new(c, v, n, t)
 end
 function UserMessage(content::T,
         variables::Vector{Symbol},
+        name::Union{Nothing, String},
         type::Symbol) where {T <: AbstractString}
     not_allowed_kwargs = intersect(variables, RESERVED_KWARGS)
     @assert length(not_allowed_kwargs)==0 "Error: Some placeholders are invalid, as they are reserved for `ai*` functions. Change: $(join(not_allowed_kwargs,","))"
-    return UserMessage{T}(content, variables, type)
+    return UserMessage{T}(content, variables, name, type)
 end
+
+"""
+    UserMessageWithImages
+
+A message type for user-generated text-based responses with images. 
+Consumed by `ai*` functions to generate responses.
+    
+# Fields
+- `content::T`: The content of the message.
+- `image_url::Vector{String}`: The URLs of the images.
+- `variables::Vector{Symbol}`: The variables in the message.
+- `name::Union{Nothing, String}`: The name of the `role` in the conversation.
+"""
 Base.@kwdef struct UserMessageWithImages{T <: AbstractString} <: AbstractChatMessage
     content::T
     image_url::Vector{String} # no default! fail when not provided
     variables::Vector{Symbol} = _extract_handlebar_variables(content)
+    name::Union{Nothing, String} = nothing
     _type::Symbol = :usermessagewithimages
-    UserMessageWithImages{T}(c, i, v, t) where {T <: AbstractString} = new(c, i, v, t)
+    UserMessageWithImages{T}(c, i, v, n, t) where {T <: AbstractString} = new(c, i, v, n, t)
 end
 function UserMessageWithImages(content::T, image_url::Vector{<:AbstractString},
         variables::Vector{Symbol},
+        name::Union{Nothing, String},
         type::Symbol) where {T <: AbstractString}
     not_allowed_kwargs = intersect(variables, RESERVED_KWARGS)
     @assert length(not_allowed_kwargs)==0 "Error: Some placeholders are invalid, as they are reserved for `ai*` functions. Change: $(join(not_allowed_kwargs,","))"
-    return UserMessageWithImages{T}(content, string.(image_url), variables, type)
+    return UserMessageWithImages{T}(content, string.(image_url), variables, name, type)
 end
 
 """
@@ -71,10 +103,12 @@ Returned by `aigenerate`, `aiclassify`, and `aiscan` functions.
 # Fields
 - `content::Union{AbstractString, Nothing}`: The content of the message.
 - `status::Union{Int, Nothing}`: The status of the message from the API.
+- `name::Union{Nothing, String}`: The name of the `role` in the conversation.
 - `tokens::Tuple{Int, Int}`: The number of tokens used (prompt,completion).
 - `elapsed::Float64`: The time taken to generate the response in seconds.
 - `cost::Union{Nothing, Float64}`: The cost of the API call (calculated with information from `MODEL_REGISTRY`).
 - `log_prob::Union{Nothing, Float64}`: The log probability of the response.
+- `extras::Union{Nothing, Dict{Symbol, Any}}`: A dictionary for additional metadata that is not part of the key message fields. Try to limit to a small number of items and singletons to be serializable.
 - `finish_reason::Union{Nothing, String}`: The reason the response was finished.
 - `run_id::Union{Nothing, Int}`: The unique ID of the run.
 - `sample_id::Union{Nothing, Int}`: The unique ID of the sample (if multiple samples are generated, they will all have the same `run_id`).
@@ -82,10 +116,12 @@ Returned by `aigenerate`, `aiclassify`, and `aiscan` functions.
 Base.@kwdef struct AIMessage{T <: Union{AbstractString, Nothing}} <: AbstractChatMessage
     content::T = nothing
     status::Union{Int, Nothing} = nothing
+    name::Union{Nothing, String} = nothing
     tokens::Tuple{Int, Int} = (-1, -1)
     elapsed::Float64 = -1.0
     cost::Union{Nothing, Float64} = nothing
     log_prob::Union{Nothing, Float64} = nothing
+    extras::Union{Nothing, Dict{Symbol, Any}} = nothing
     finish_reason::Union{Nothing, String} = nothing
     run_id::Union{Nothing, Int} = Int(rand(Int16))
     sample_id::Union{Nothing, Int} = nothing
@@ -105,6 +141,7 @@ Returned by `aiextract`, and `aiextract` functions.
 - `elapsed::Float64`: The time taken to generate the response in seconds.
 - `cost::Union{Nothing, Float64}`: The cost of the API call (calculated with information from `MODEL_REGISTRY`).
 - `log_prob::Union{Nothing, Float64}`: The log probability of the response.
+- `extras::Union{Nothing, Dict{Symbol, Any}}`: A dictionary for additional metadata that is not part of the key message fields. Try to limit to a small number of items and singletons to be serializable.
 - `finish_reason::Union{Nothing, String}`: The reason the response was finished.
 - `run_id::Union{Nothing, Int}`: The unique ID of the run.
 - `sample_id::Union{Nothing, Int}`: The unique ID of the sample (if multiple samples are generated, they will all have the same `run_id`).
@@ -116,20 +153,111 @@ Base.@kwdef struct DataMessage{T <: Any} <: AbstractDataMessage
     elapsed::Float64 = -1.0
     cost::Union{Nothing, Float64} = nothing
     log_prob::Union{Nothing, Float64} = nothing
+    extras::Union{Nothing, Dict{Symbol, Any}} = nothing
     finish_reason::Union{Nothing, String} = nothing
     run_id::Union{Nothing, Int} = Int(rand(Int16))
     sample_id::Union{Nothing, Int} = nothing
     _type::Symbol = :datamessage
 end
 
+"""
+    ToolMessage
+
+A message type for tool calls. 
+    
+It represents both the request (fields `args`, `name`) and the response (field `content`).
+
+# Fields
+- `content::Any`: The content of the message.
+- `req_id::Union{Nothing, Int}`: The unique ID of the request.
+- `tool_call_id::String`: The unique ID of the tool call.
+- `raw::AbstractString`: The raw JSON string of the tool call request.
+- `args::Union{Nothing, Dict{Symbol, Any}}`: The arguments of the tool call request.
+- `name::Union{Nothing, String}`: The name of the tool call request.
+"""
+Base.@kwdef mutable struct ToolMessage <: AbstractDataMessage
+    content::Any = nothing
+    req_id::Union{Nothing, Int} = nothing
+    tool_call_id::String
+    raw::AbstractString
+    args::Union{Nothing, Dict{Symbol, Any}} = nothing
+    name::Union{Nothing, String} = nothing
+    _type::Symbol = :toolmessage
+end
+
+"""
+    AIToolRequest
+
+A message type for AI-generated tool requests. 
+Returned by `aitools` functions.
+    
+# Fields
+- `content::Union{AbstractString, Nothing}`: The content of the message.
+- `tool_calls::Vector{ToolMessage}`: The vector of tool call requests.
+- `name::Union{Nothing, String}`: The name of the `role` in the conversation.
+- `status::Union{Int, Nothing}`: The status of the message from the API.
+- `tokens::Tuple{Int, Int}`: The number of tokens used (prompt,completion).
+- `elapsed::Float64`: The time taken to generate the response in seconds.
+- `cost::Union{Nothing, Float64}`: The cost of the API call (calculated with information from `MODEL_REGISTRY`).
+- `log_prob::Union{Nothing, Float64}`: The log probability of the response.
+- `extras::Union{Nothing, Dict{Symbol, Any}}`: A dictionary for additional metadata that is not part of the key message fields. Try to limit to a small number of items and singletons to be serializable.
+- `finish_reason::Union{Nothing, String}`: The reason the response was finished.
+- `run_id::Union{Nothing, Int}`: The unique ID of the run.
+- `sample_id::Union{Nothing, Int}`: The unique ID of the sample (if multiple samples are generated, they will all have the same `run_id`).
+
+See `ToolMessage` for the fields of the tool call requests.
+
+See also: [`tool_calls`](@ref), [`execute_tool`](@ref), [`parse_tool`](@ref)
+"""
+Base.@kwdef struct AIToolRequest{T <: Union{AbstractString, Nothing}} <: AbstractDataMessage
+    content::T = nothing
+    tool_calls::Vector{ToolMessage} = ToolMessage[]
+    name::Union{Nothing, String} = nothing
+    status::Union{Int, Nothing} = nothing
+    tokens::Tuple{Int, Int} = (-1, -1)
+    elapsed::Float64 = -1.0
+    cost::Union{Nothing, Float64} = nothing
+    log_prob::Union{Nothing, Float64} = nothing
+    extras::Union{Nothing, Dict{Symbol, Any}} = nothing
+    finish_reason::Union{Nothing, String} = nothing
+    run_id::Union{Nothing, Int} = Int(rand(Int16))
+    sample_id::Union{Nothing, Int} = nothing
+    _type::Symbol = :aitoolrequest
+end
+"Get the vector of tool call requests from an AIToolRequest/message."
+tool_calls(msg::AIToolRequest) = msg.tool_calls
+tool_calls(msg::AbstractMessage) = ToolMessage[]
+tool_calls(msg::ToolMessage) = [msg]
+tool_calls(msg::AbstractTracerMessage) = tool_calls(msg.object)
+
+### Other Message methods
 # content-only constructor
 function (MSG::Type{<:AbstractChatMessage})(prompt::AbstractString)
     MSG(; content = prompt)
 end
-isusermessage(m::AbstractMessage) = m isa UserMessage
-issystemmessage(m::AbstractMessage) = m isa SystemMessage
-isdatamessage(m::AbstractMessage) = m isa DataMessage
-isaimessage(m::AbstractMessage) = m isa AIMessage
+function (MSG::Type{<:AbstractChatMessage})(msg::AbstractChatMessage)
+    MSG(; msg.content)
+end
+function (MSG::Type{<:AbstractChatMessage})(msg::AbstractTracerMessage{<:AbstractChatMessage})
+    MSG(; msg.content)
+end
+
+## It checks types so it should be defined for all inputs
+isusermessage(m::Any) = m isa UserMessage
+isusermessagewithimages(m::Any) = m isa UserMessageWithImages
+issystemmessage(m::Any) = m isa SystemMessage
+isdatamessage(m::Any) = m isa DataMessage
+isaimessage(m::Any) = m isa AIMessage
+istoolmessage(m::Any) = m isa ToolMessage
+isaitoolrequest(m::Any) = m isa AIToolRequest
+istracermessage(m::Any) = m isa AbstractTracerMessage
+isusermessage(m::AbstractTracerMessage) = isusermessage(m.object)
+isusermessagewithimages(m::AbstractTracerMessage) = isusermessagewithimages(m.object)
+issystemmessage(m::AbstractTracerMessage) = issystemmessage(m.object)
+isdatamessage(m::AbstractTracerMessage) = isdatamessage(m.object)
+isaimessage(m::AbstractTracerMessage) = isaimessage(m.object)
+istoolmessage(m::AbstractTracerMessage) = istoolmessage(m.object)
+isaitoolrequest(m::AbstractTracerMessage) = isaitoolrequest(m.object)
 
 # equality check for testing, only equal if all fields are equal and type is the same
 Base.var"=="(m1::AbstractMessage, m2::AbstractMessage) = false
@@ -193,6 +321,171 @@ function attach_images_to_user_message(msgs::Vector{T};
     return msgs
 end
 
+##############################
+### TracerMessages
+# - They are mutable (to update iteratively)
+# - they contain a message and additional metadata
+# - they expose as much of the underlying message as possible to allow the same operations
+"""
+    TracerMessage{T <: Union{AbstractChatMessage, AbstractDataMessage}} <: AbstractTracerMessage
+
+A mutable wrapper message designed for tracing the flow of messages through the system, allowing for iterative updates and providing additional metadata for observability.
+
+# Fields
+- `object::T`: The original message being traced, which can be either a chat or data message.
+- `from::Union{Nothing, Symbol}`: The identifier of the sender of the message.
+- `to::Union{Nothing, Symbol}`: The identifier of the intended recipient of the message.
+- `viewers::Vector{Symbol}`: A list of identifiers for entities that have access to view the message, in addition to the sender and recipient.
+- `time_received::DateTime`: The timestamp when the message was received by the tracing system.
+- `time_sent::Union{Nothing, DateTime}`: The timestamp when the message was originally sent, if available.
+- `model::String`: The name of the model that generated the message. Defaults to empty.
+- `parent_id::Symbol`: An identifier for the job or process that the message is associated with. Higher-level tracing ID.
+- `thread_id::Symbol`: An identifier for the thread (series of messages for one model/agent) or execution context within the job where the message originated. It should be the same for messages in the same thread.
+- `meta::Union{Nothing, Dict{Symbol, Any}}`: A dictionary for additional metadata that is not part of the message itself. Try to limit to a small number of items and singletons to be serializable.
+- `_type::Symbol`: A fixed symbol identifying the type of the message as `:eventmessage`, used for type discrimination.
+
+This structure is particularly useful for debugging, monitoring, and auditing the flow of messages in systems that involve complex interactions or asynchronous processing.
+
+All fields are optional besides the `object`.
+
+Useful methods: `pprint` (pretty prints the underlying message), `unwrap` (to get the `object` out of tracer), `align_tracer!` (to set all shared IDs in a vector of tracers to the same), `istracermessage` to check if given message is an AbstractTracerMessage
+
+# Example
+```julia
+wrap_schema = PT.TracerSchema(PT.OpenAISchema())
+msg = aigenerate(wrap_schema, "Say hi!"; model = "gpt4t")
+msg # isa TracerMessage
+msg.content # access content like if it was the message
+```
+"""
+Base.@kwdef mutable struct TracerMessage{T <:
+                                         Union{AbstractChatMessage, AbstractDataMessage}} <:
+                           AbstractTracerMessage{T}
+    object::T
+    from::Union{Nothing, Symbol} = nothing # who sent it
+    to::Union{Nothing, Symbol} = nothing # who received it
+    viewers::Vector{Symbol} = Symbol[] # who has access to it (besides from, to)
+    time_received::DateTime = now()
+    time_sent::Union{Nothing, DateTime} = nothing
+    model::String = ""
+    parent_id::Symbol = gensym("parent")
+    thread_id::Symbol = gensym("thread")
+    run_id::Union{Nothing, Int} = Int(rand(Int32))
+    meta::Union{Nothing, Dict{Symbol, Any}} = Dict{Symbol, Any}()
+    _type::Symbol = :tracermessage
+end
+function TracerMessage(msg::Union{AbstractChatMessage, AbstractDataMessage}; kwargs...)
+    TracerMessage(; object = msg, kwargs...)
+end
+
+"""
+    TracerMessageLike{T <: Any} <: AbstractTracer
+
+A mutable structure designed for general-purpose tracing within the system, capable of handling any type of object that is part of the AI Conversation.
+It provides a flexible way to track and annotate objects as they move through different parts of the system, facilitating debugging, monitoring, and auditing.
+
+# Fields
+- `object::T`: The original object being traced.
+- `from::Union{Nothing, Symbol}`: The identifier of the sender or origin of the object.
+- `to::Union{Nothing, Symbol}`: The identifier of the intended recipient or destination of the object.
+- `viewers::Vector{Symbol}`: A list of identifiers for entities that have access to view the object, in addition to the sender and recipient.
+- `time_received::DateTime`: The timestamp when the object was received by the tracing system.
+- `time_sent::Union{Nothing, DateTime}`: The timestamp when the object was originally sent, if available.
+- `model::String`: The name of the model or process that generated or is associated with the object. Defaults to empty.
+- `parent_id::Symbol`: An identifier for the job or process that the object is associated with. Higher-level tracing ID.
+- `thread_id::Symbol`: An identifier for the thread or execution context (sub-task, sub-process) within the job where the object originated. It should be the same for objects in the same thread.
+- `run_id::Union{Nothing, Int}`: A unique identifier for the run or instance of the process (ie, a single call to the LLM) that generated the object. Defaults to a random integer.
+- `meta::Union{Nothing, Dict{Symbol, Any}}`: A dictionary for additional metadata that is not part of the object itself. Try to limit to a small number of items and singletons to be serializable.
+- `_type::Symbol`: A fixed symbol identifying the type of the tracer as `:tracermessage`, used for type discrimination.
+
+This structure is particularly useful for systems that involve complex interactions or asynchronous processing, where tracking the flow and transformation of objects is crucial.
+
+All fields are optional besides the `object`.
+"""
+@kwdef mutable struct TracerMessageLike{T <: Any} <: AbstractTracer{T}
+    object::T
+    from::Union{Nothing, Symbol} = nothing # who sent it
+    to::Union{Nothing, Symbol} = nothing # who received it
+    viewers::Vector{Symbol} = Symbol[] # who has access to it (besides from, to)
+    time_received::DateTime = now()
+    time_sent::Union{Nothing, DateTime} = nothing
+    model::String = ""
+    parent_id::Symbol = gensym("parent")
+    thread_id::Symbol = gensym("thread")
+    run_id::Union{Nothing, Int} = Int(rand(Int32))
+    meta::Union{Nothing, Dict{Symbol, Any}} = Dict{Symbol, Any}()
+    _type::Symbol = :tracermessagelike
+    ## TracerMessageLike() = new()
+end
+function TracerMessageLike(
+        object; kwargs...)
+    TracerMessageLike(; object, kwargs...)
+end
+Base.var"=="(t1::AbstractTracer, t2::AbstractTracer) = false
+function Base.var"=="(t1::AbstractTracer{T}, t2::AbstractTracer{T}) where {T <: Any}
+    ## except for run_id, that's random and not important for content comparison
+    all([getproperty(t1, f) == getproperty(t2, f) for f in fieldnames(T) if f != :run_id])
+end
+
+# Shared methods
+# getproperty for tracer messages forwards to the message when relevant
+function Base.getproperty(t::Union{AbstractTracerMessage, AbstractTracer}, f::Symbol)
+    obj = getfield(t, :object)
+    if hasproperty(obj, f)
+        getproperty(obj, f)
+    else
+        getfield(t, f)
+    end
+end
+
+function Base.copy(t::T) where {T <: Union{AbstractTracerMessage, AbstractTracer}}
+    T([deepcopy(getfield(t, f)) for f in fieldnames(T)]...)
+end
+
+"Unwraps the tracer message or tracer-like object, returning the original `object`."
+function unwrap(t::Union{AbstractTracerMessage, AbstractTracer})
+    getfield(t, :object)
+end
+
+"Extracts the metadata dictionary from the tracer message or tracer-like object."
+function meta(t::Union{AbstractTracerMessage, AbstractTracer})
+    getfield(t, :meta)
+end
+
+"Aligns the tracer message, updating the `parent_id`, `thread_id`. Often used to align multiple tracers in the vector to have the same IDs."
+function align_tracer!(
+        t::Union{AbstractTracerMessage, AbstractTracer}; parent_id::Symbol = t.parent_id,
+        thread_id::Symbol = t.thread_id)
+    t.parent_id = parent_id
+    t.thread_id = thread_id
+    return t
+end
+"Aligns multiple tracers in the vector to have the same Parent and Thread IDs as the first item."
+function align_tracer!(
+        vect::AbstractVector{<:Union{AbstractTracerMessage, AbstractTracer}})
+    if !isempty(vect)
+        t = first(vect)
+        align_tracer!.(vect; t.parent_id, t.thread_id)
+    else
+        vect
+    end
+end
+
+##############################
+## Helpful accessors
+"Helpful accessor for the last message in `conversation`. Returns the last message in the conversation."
+function last_message(conversation::AbstractVector{<:AbstractMessage})
+    length(conversation) == 0 ? nothing : conversation[end]
+end
+
+"Helpful accessor for the last generated output (`msg.content`) in `conversation`. Returns the last output in the conversation (eg, the string/data in the last message)."
+function last_output(conversation::AbstractVector{<:AbstractMessage})
+    msg = last_message(conversation)
+    return isnothing(msg) ? nothing : msg.content
+end
+last_message(msg::AbstractMessage) = msg
+last_output(msg::AbstractMessage) = msg.content
+
 ## Display methods
 function Base.show(io::IO, ::MIME"text/plain", m::AbstractChatMessage)
     type_ = string(typeof(m)) |> x -> split(x, "{")[begin]
@@ -218,10 +511,22 @@ function Base.show(io::IO, ::MIME"text/plain", m::AbstractDataMessage)
         # for any non-types extraction messages
     elseif m.content isa Dict{Symbol, <:Any}
         print(io, "(Dict with keys: ", join(keys(m.content), ", "), ")")
+    elseif isaitoolrequest(m)
+        content_str = m.content isa AbstractString ? m.content : "-"
+        print(io, "(\"", content_str, "\"; Tool Requests: ", length(m.tool_calls), ")")
+    elseif istoolmessage(m)
+        content_str = m.content isa AbstractString ? m.content : "-"
+        print(io, "(\"", content_str, "\")")
     else
         print(io, "(", typeof(m.content), ")")
     end
 end
+function Base.show(io::IO, ::MIME"text/plain", t::AbstractTracerMessage)
+    dump(IOContext(io, :limit => true), t, maxdepth = 1)
+end
+function Base.show(io::IO, ::MIME"text/plain", t::AbstractTracer)
+    dump(IOContext(io, :limit => true), t, maxdepth = 1)
+end
 
 ## Dispatch for render
 # function render(schema::AbstractPromptSchema,
@@ -229,11 +534,15 @@ end
 #         kwargs...)
 #     render(schema, messages; kwargs...)
 # end
+function role4render(schema::AbstractPromptSchema, msg::AbstractTracerMessage)
+    role4render(schema, msg.object)
+end
 function render(schema::AbstractPromptSchema, msg::AbstractMessage; kwargs...)
     render(schema, [msg]; kwargs...)
 end
-function render(schema::AbstractPromptSchema, msg::AbstractString; kwargs...)
-    render(schema, [UserMessage(; content = msg)]; kwargs...)
+function render(schema::AbstractPromptSchema, msg::AbstractString;
+        name_user::Union{Nothing, String} = nothing, kwargs...)
+    render(schema, [UserMessage(; content = msg, name = name_user)]; kwargs...)
 end
 
 ## Serialization via JSON3
@@ -243,9 +552,12 @@ function StructTypes.subtypes(::Type{AbstractMessage})
     (usermessage = UserMessage,
         usermessagewithimages = UserMessageWithImages,
         aimessage = AIMessage,
+        toolmessage = ToolMessage,
+        aitoolrequest = AIToolRequest,
         systemmessage = SystemMessage,
         metadatamessage = MetadataMessage,
-        datamessage = DataMessage)
+        datamessage = DataMessage,
+        tracermessage = TracerMessage)
 end
 
 StructTypes.StructType(::Type{AbstractChatMessage}) = StructTypes.AbstractType()
@@ -258,9 +570,94 @@ function StructTypes.subtypes(::Type{AbstractChatMessage})
         metadatamessage = MetadataMessage)
 end
 
+StructTypes.StructType(::Type{AbstractTracerMessage}) = StructTypes.AbstractType()
+StructTypes.subtypekey(::Type{AbstractTracerMessage}) = :_type
+function StructTypes.subtypes(::Type{AbstractTracerMessage})
+    (tracermessage = TracerMessage,)
+end
+
+StructTypes.StructType(::Type{AbstractTracer}) = StructTypes.AbstractType()
+StructTypes.subtypekey(::Type{AbstractTracer}) = :_type
+function StructTypes.subtypes(::Type{AbstractTracer})
+    (tracermessagelike = TracerMessageLike,)
+end
+
 StructTypes.StructType(::Type{MetadataMessage}) = StructTypes.Struct()
 StructTypes.StructType(::Type{SystemMessage}) = StructTypes.Struct()
 StructTypes.StructType(::Type{UserMessage}) = StructTypes.Struct()
 StructTypes.StructType(::Type{UserMessageWithImages}) = StructTypes.Struct()
+StructTypes.StructType(::Type{ToolMessage}) = StructTypes.Struct()
+StructTypes.StructType(::Type{AIToolRequest}) = StructTypes.Struct()
 StructTypes.StructType(::Type{AIMessage}) = StructTypes.Struct()
 StructTypes.StructType(::Type{DataMessage}) = StructTypes.Struct()
+StructTypes.StructType(::Type{TracerMessage}) = StructTypes.Struct() # Ignore mutability once we serialize
+StructTypes.StructType(::Type{TracerMessageLike}) = StructTypes.Struct() # Ignore mutability once we serialize
+
+### Utilities for Pretty Printing
+"""
+    pprint(io::IO, msg::AbstractMessage; text_width::Int = displaysize(io)[2])
+
+Pretty print a single `AbstractMessage` to the given IO stream.
+
+`text_width` is the width of the text to be displayed. If not provided, it defaults to the width of the given IO stream and add `newline` separators as needed.
+"""
+function pprint(io::IO, msg::AbstractMessage; text_width::Int = displaysize(io)[2])
+    ## never use extension, because we don't have good method for single message
+    role = if msg isa Union{UserMessage, UserMessageWithImages}
+        "User Message"
+    elseif msg isa DataMessage
+        "Data Message"
+    elseif msg isa SystemMessage
+        "System Message"
+    elseif msg isa AIMessage
+        "AI Message"
+    elseif msg isa AIToolRequest
+        "AI Tool Request"
+    elseif msg isa ToolMessage
+        "Tool Message"
+    else
+        "Unknown Message"
+    end
+    content = if msg isa DataMessage
+        length_ = msg.content isa AbstractArray ? " (Size: $(size(msg.content)))" : ""
+        "Data: $(typeof(msg.content))$(length_)"
+    elseif isaitoolrequest(msg)
+        if isnothing(msg.content)
+            join(
+                ["Tool Request: $(tool.name), args: $(tool.args)"
+                 for (tool) in msg.tool_calls],
+                "\n")
+        else
+            wrap_string(msg.content, text_width)
+        end
+    elseif istoolmessage(msg)
+        isnothing(msg.content) ? string("Name: ", msg.name, ", Args: ", msg.raw) :
+        string(msg.content)
+    else
+        wrap_string(msg.content, text_width)
+    end
+    print(io, "-"^20, "\n")
+    printstyled(io, role, color = :blue, bold = true)
+    print(io, "\n", "-"^20, "\n")
+    print(io, content, "\n\n")
+end
+
+function pprint(io::IO, t::Union{AbstractTracerMessage, AbstractTracer};
+        text_width::Int = displaysize(io)[2])
+    role = "$(nameof(typeof(t))) with:"
+    print(io, "-"^20, "\n")
+    print(io, role, "\n")
+    pprint(io, unwrap(t); text_width)
+end
+"""
+    pprint(io::IO, conversation::AbstractVector{<:AbstractMessage})
+
+Pretty print a vector of `AbstractMessage` to the given IO stream.
+"""
+function pprint(
+        io::IO, conversation::AbstractVector{<:AbstractMessage};
+        text_width::Int = displaysize(io)[2])
+    for msg in conversation
+        pprint(io, msg; text_width)
+    end
+end
diff --git a/src/precompilation.jl b/src/precompilation.jl
index 2a7fc9703..eb91c6afc 100644
--- a/src/precompilation.jl
+++ b/src/precompilation.jl
@@ -6,12 +6,15 @@ load_templates!();
 @load_preference("MODEL_CHAT", default="x")
 
 # API Calls prep
-mock_response = Dict(:choices => [
-        Dict(:message => Dict(:content => "Hello!",
-                :tool_calls => [
-                    Dict(:function => Dict(:arguments => JSON3.write(Dict(:x => 1)))),
-                ]),
-            :finish_reason => "stop"),
+mock_response = Dict(
+    :choices => [
+        Dict(
+        :message => Dict(:content => "Hello!",
+            :tool_calls => [
+                Dict(:function => Dict(
+                :name => "X123", :arguments => JSON3.write(Dict(:x => 1))))
+            ]),
+        :finish_reason => "stop")
     ],
     :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
 schema = TestEchoOpenAISchema(; response = mock_response, status = 200)
@@ -44,3 +47,7 @@ msg = aiscan(schema,
     template_name;
     it = "Is the image a Julia logo?",
     image_url = "some_link_to_julia_logo");
+
+## Streaming configuration
+cb = StreamCallback()
+configure_callback!(cb, OpenAISchema())
\ No newline at end of file
diff --git a/src/serialization.jl b/src/serialization.jl
index a21bcd6db..3f9c14cc3 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -1,5 +1,14 @@
 ## Loading / Saving
-"Saves provided messaging template (`messages`) to `io_or_file`. Automatically adds metadata based on provided keyword arguments."
+"""
+    save_template(io_or_file::Union{IO, AbstractString},
+        messages::AbstractVector{<:AbstractChatMessage};
+        content::AbstractString = "Template Metadata",
+        description::AbstractString = "",
+        version::AbstractString = "1",
+        source::AbstractString = "")
+
+Saves provided messaging template (`messages`) to `io_or_file`. Automatically adds metadata based on provided keyword arguments.
+"""
 function save_template(io_or_file::Union{IO, AbstractString},
         messages::AbstractVector{<:AbstractChatMessage};
         content::AbstractString = "Template Metadata",
@@ -13,7 +22,11 @@ function save_template(io_or_file::Union{IO, AbstractString},
     # save template to IO or file
     JSON3.write(io_or_file, [metadata_msg, messages...])
 end
-"Loads messaging template from `io_or_file` and returns tuple of template messages and metadata."
+"""
+    load_template(io_or_file::Union{IO, AbstractString})
+
+Loads messaging template from `io_or_file` and returns tuple of template messages and metadata.
+"""
 function load_template(io_or_file::Union{IO, AbstractString})
     messages = JSON3.read(io_or_file, Vector{AbstractChatMessage})
     template, metadata = AbstractChatMessage[], MetadataMessage[]
@@ -29,12 +42,71 @@ function load_template(io_or_file::Union{IO, AbstractString})
 end
 
 ## Variants without metadata:
-"Saves provided conversation (`messages`) to `io_or_file`. If you need to add some metadata, see `save_template`."
+"""
+    save_conversation(io_or_file::Union{IO, AbstractString},
+        messages::AbstractVector{<:AbstractMessage})
+
+Saves provided conversation (`messages`) to `io_or_file`. If you need to add some metadata, see `save_template`.
+"""
 function save_conversation(io_or_file::Union{IO, AbstractString},
         messages::AbstractVector{<:AbstractMessage})
     JSON3.write(io_or_file, messages)
 end
-"Loads a conversation (`messages`) from `io_or_file`"
+"""
+    load_conversation(io_or_file::Union{IO, AbstractString})
+
+Loads a conversation (`messages`) from `io_or_file`
+"""
 function load_conversation(io_or_file::Union{IO, AbstractString})
     messages = JSON3.read(io_or_file, Vector{AbstractMessage})
 end
+
+"""
+    save_conversations(schema::AbstractPromptSchema, filename::AbstractString,
+        conversations::Vector{<:AbstractVector{<:PT.AbstractMessage}})
+
+Saves provided conversations (vector of vectors of `messages`) to `filename` rendered in the particular `schema`. 
+
+Commonly used for finetuning models with `schema = ShareGPTSchema()`
+
+The format is JSON Lines, where each line is a JSON object representing one provided conversation.
+
+See also: `save_conversation`
+
+# Examples
+
+You must always provide a VECTOR of conversations
+```julia
+messages = AbstractMessage[SystemMessage("System message 1"),
+    UserMessage("User message"),
+    AIMessage("AI message")]
+conversation = [messages] # vector of vectors
+
+dir = tempdir()
+fn = joinpath(dir, "conversations.jsonl")
+save_conversations(fn, conversation)
+
+# Content of the file (one line for each conversation)
+# {"conversations":[{"value":"System message 1","from":"system"},{"value":"User message","from":"human"},{"value":"AI message","from":"gpt"}]}
+```
+"""
+function save_conversations(schema::AbstractPromptSchema, filename::AbstractString,
+        conversations::Vector{<:AbstractVector{<:AbstractMessage}})
+    @assert endswith(filename, ".jsonl") "Filename must end with `.jsonl` (JSON Lines format)."
+    io = IOBuffer()
+    for i in eachindex(conversations)
+        conv = conversations[i]
+        rendered_conv = render(schema, conv)
+        JSON3.write(io, rendered_conv)
+        # separate each conversation by newline
+        i < length(conversations) && print(io, "\n")
+    end
+    write(filename, String(take!(io)))
+    return nothing
+end
+
+# shortcut for ShareGPTSchema
+function save_conversations(filename::AbstractString,
+        conversations::Vector{<:AbstractVector{<:AbstractMessage}})
+    save_conversations(ShareGPTSchema(), filename, conversations)
+end
diff --git a/src/streaming.jl b/src/streaming.jl
new file mode 100644
index 000000000..af5d6a3d9
--- /dev/null
+++ b/src/streaming.jl
@@ -0,0 +1,42 @@
+# # Experimental support for streaming
+
+# All code besides the API configuration is in StreamCallbacks package now
+
+"""
+    configure_callback!(cb::StreamCallback, schema::AbstractPromptSchema;
+        api_kwargs...)
+
+Configures the callback `cb` for streaming with a given prompt schema.
+
+If no `cb.flavor` is provided, adjusts the `flavor` and the provided `api_kwargs` as necessary.
+Eg, for most schemas, we add kwargs like `stream = true` to the `api_kwargs`.
+
+If `cb.flavor` is provided, both `callback` and `api_kwargs` are left unchanged! You need to configure them yourself!
+"""
+function configure_callback!(cb::T, schema::AbstractPromptSchema;
+        api_kwargs...) where {T <: AbstractStreamCallback}
+    ## Check if we are in passthrough mode or if we should configure the callback
+    if isnothing(cb.flavor)
+        if schema isa AbstractOpenAISchema
+            ## Enable streaming for all OpenAI-compatible APIs
+            api_kwargs = (;
+                api_kwargs..., stream = true, stream_options = (; include_usage = true))
+            flavor = OpenAIStream()
+        elseif schema isa Union{AbstractAnthropicSchema, AbstractOllamaSchema}
+            api_kwargs = (; api_kwargs..., stream = true)
+            flavor = schema isa AbstractOllamaSchema ? OllamaStream() : AnthropicStream()
+        elseif schema isa AbstractOllamaManagedSchema
+            throw(ErrorException("OllamaManagedSchema is not supported for streaming. Use OllamaSchema instead."))
+        else
+            error("Unsupported schema type: $(typeof(schema)). Currently supported: OpenAISchema and AnthropicSchema.")
+        end
+        cb.flavor = flavor
+    end
+    return cb, api_kwargs
+end
+# method to build a callback for a given output stream
+function configure_callback!(
+        output_stream::Union{IO, Channel}, schema::AbstractPromptSchema)
+    cb = StreamCallback(out = output_stream)
+    return configure_callback!(cb, schema)
+end
\ No newline at end of file
diff --git a/src/templates.jl b/src/templates.jl
index c9b82f313..7938ab0ba 100644
--- a/src/templates.jl
+++ b/src/templates.jl
@@ -119,7 +119,58 @@ function render(schema::Nothing, template::AITemplate; kwargs...)
 end
 
 ## Loading/saving -- see src/serialization.jl
+"""
+    build_template_metadata(
+        template::AbstractVector{<:AbstractMessage}, template_name::Symbol,
+        metadata_msgs::AbstractVector{<:MetadataMessage} = MetadataMessage[]; max_length::Int = 100)
+
+Builds `AITemplateMetadata` for a given template based on the messages in `template` and other information.
+
+`AITemplateMetadata` is a helper struct for easy searching and reviewing of templates via `aitemplates()`.
 
+Note: Assumes that there is only ever one UserMessage and SystemMessage (concatenates them together)
+"""
+function build_template_metadata(
+        template::AbstractVector{<:AbstractMessage}, template_name::Symbol,
+        metadata_msgs::AbstractVector{<:MetadataMessage} = MetadataMessage[]; max_length::Int = 100)
+
+    # prepare the metadata
+    wordcount = 0
+    system_preview = ""
+    user_preview = ""
+    variables = Symbol[]
+    for i in eachindex(template)
+        msg = template[i]
+        wordcount += length(msg.content)
+        if hasproperty(msg, :variables)
+            append!(variables, msg.variables)
+        end
+        # truncate previews to 100 characters
+        if msg isa SystemMessage && length(system_preview) < max_length
+            system_preview *= first(msg.content, max_length)
+        elseif msg isa UserMessage && length(user_preview) < max_length
+            user_preview *= first(msg.content, max_length)
+        end
+    end
+    if !isempty(metadata_msgs)
+        # use the first metadata message found if available
+        meta = first(metadata_msgs)
+        metadata = AITemplateMetadata(; name = template_name,
+            meta.description, meta.version, meta.source,
+            wordcount,
+            system_preview = first(system_preview, max_length),
+            user_preview = first(user_preview, max_length),
+            variables = unique(variables))
+    else
+        metadata = AITemplateMetadata(; name = template_name,
+            wordcount,
+            system_preview = first(system_preview, max_length),
+            user_preview = first(user_preview, max_length),
+            variables = unique(variables))
+    end
+
+    return metadata
+end
 """
         remove_templates!()
 
@@ -128,66 +179,75 @@ Removes all templates from `TEMPLATE_STORE` and `TEMPLATE_METADATA`.
 remove_templates!(; store = TEMPLATE_STORE, metadata_store = TEMPLATE_METADATA) = (empty!(store); empty!(metadata_store); nothing)
 
 """
-    load_templates!(; remove_templates::Bool=true)
+    load_templates!(dir_templates::Union{String, Nothing} = nothing;
+        remember_path::Bool = true,
+        remove_templates::Bool = isnothing(dir_templates),
+        store::Dict{Symbol, <:Any} = TEMPLATE_STORE,
+        metadata_store::Vector{<:AITemplateMetadata} = TEMPLATE_METADATA)
 
 Loads templates from folder `templates/` in the package root and stores them in `TEMPLATE_STORE` and `TEMPLATE_METADATA`.
 
 Note: Automatically removes any existing templates and metadata from `TEMPLATE_STORE` and `TEMPLATE_METADATA` if `remove_templates=true`.
+
+# Arguments
+- `dir_templates::Union{String, Nothing}`: The directory path to load templates from. If `nothing`, uses the default list of paths. It usually used only once "to register" a new template storage.
+- `remember_path::Bool=true`: If true, remembers the path for future refresh (in `TEMPLATE_PATH`).
+- `remove_templates::Bool=isnothing(dir_templates)`: If true, removes any existing templates and metadata from `store` and `metadata_store`.
+- `store::Dict{Symbol, <:Any}=TEMPLATE_STORE`: The store to load the templates into.
+- `metadata_store::Vector{<:AITemplateMetadata}=TEMPLATE_METADATA`: The metadata store to load the metadata into.
+
+# Example
+
+Load the default templates:
+```julia
+PT.load_templates!() # no path needed
+```
+
+Load templates from a new custom path:
+```julia
+PT.load_templates!("path/to/templates") # we will remember this path for future refresh
+```
+
+If you want to now refresh the default templates and the new path, just call `load_templates!()` without any arguments.
 """
-function load_templates!(dir_templates::String = joinpath(@__DIR__, "..", "templates");
-        remove_templates::Bool = true,
+function load_templates!(dir_templates::Union{String, Nothing} = nothing;
+        remember_path::Bool = true,
+        remove_templates::Bool = isnothing(dir_templates),
         store::Dict{Symbol, <:Any} = TEMPLATE_STORE,
-        metadata_store::Vector{<:AITemplateMetadata} = TEMPLATE_METADATA,)
+        metadata_store::Vector{<:AITemplateMetadata} = TEMPLATE_METADATA)
+    ## Init
+    global TEMPLATE_PATH
+    @assert isnothing(dir_templates)||isdir(dir_templates) "Invalid directory path provided! ($dir_templates)"
+
+    # If no path is provided, use the default list
+    load_paths = isnothing(dir_templates) ? TEMPLATE_PATH : [dir_templates]
     # first remove any old templates and their metadata
     remove_templates && remove_templates!(; store, metadata_store)
-    # recursively load all templates from the `templates` folder
-    for (root, dirs, files) in walkdir(dir_templates)
-        for file in files
-            if endswith(file, ".json")
-                template_name = Symbol(split(basename(file), ".")[begin])
-                template, metadata_msgs = load_template(joinpath(root, file))
-                # add to store
-                if haskey(store, template_name)
-                    @warn("Template $(template_name) already exists, overwriting! Metadata will be duplicated.")
-                end
-                store[template_name] = template
-
-                # prepare the metadata
-                wordcount = 0
-                system_preview = ""
-                user_preview = ""
-                variables = Symbol[]
-                for i in eachindex(template)
-                    msg = template[i]
-                    wordcount += length(msg.content)
-                    if hasproperty(msg, :variables)
-                        append!(variables, msg.variables)
-                    end
-                    # truncate previews to 100 characters
-                    if msg isa SystemMessage && length(system_preview) < 100
-                        system_preview *= first(msg.content, 100)
-                    elseif msg isa UserMessage && length(user_preview) < 100
-                        user_preview *= first(msg.content, 100)
+    # remember the path for future refresh
+    if remember_path && !isnothing(dir_templates)
+        if !(dir_templates in TEMPLATE_PATH)
+            push!(TEMPLATE_PATH, dir_templates)
+        end
+    end
+
+    # recursively load all templates from the `load_paths`
+    for template_path in load_paths
+        for (root, dirs, files) in walkdir(template_path)
+            for file in files
+                if endswith(file, ".json") && !startswith(file, ".")
+                    template_name = Symbol(split(basename(file), ".")[begin])
+                    template, metadata_msgs = load_template(joinpath(root, file))
+                    # add to store
+                    if haskey(store, template_name)
+                        @warn("Template $(template_name) already exists, overwriting! Metadata will be duplicated.")
                     end
+                    store[template_name] = template
+
+                    # add metadata to store
+                    metadata = build_template_metadata(
+                        template, template_name, metadata_msgs)
+                    push!(metadata_store, metadata)
                 end
-                if !isempty(metadata_msgs)
-                    # use the first metadata message found if available
-                    meta = first(metadata_msgs)
-                    metadata = AITemplateMetadata(; name = template_name,
-                        meta.description, meta.version, meta.source,
-                        wordcount,
-                        system_preview = first(system_preview, 100),
-                        user_preview = first(user_preview, 100),
-                        variables = unique(variables))
-                else
-                    metadata = AITemplateMetadata(; name = template_name,
-                        wordcount,
-                        system_preview = first(system_preview, 100),
-                        user_preview = first(user_preview, 100),
-                        variables = unique(variables))
-                end
-                # add metadata to store
-                push!(metadata_store, metadata)
             end
         end
     end
@@ -244,13 +304,11 @@ I have my selected template, how do I use it? Just use the "name" in `aigenerate
 """
 function aitemplates end
 
-"Find the top-`limit` templates whose `name::Symbol` partially matches the `query_name::Symbol` in `TEMPLATE_METADATA`."
+"Find the top-`limit` templates whose `name::Symbol` exactly matches the `query_name::Symbol` in `TEMPLATE_METADATA`."
 function aitemplates(query_name::Symbol;
         limit::Int = 10,
         metadata_store::Vector{AITemplateMetadata} = TEMPLATE_METADATA)
-    query_str = lowercase(string(query_name))
-    found_templates = filter(x -> occursin(query_str,
-            lowercase(string(x.name))), metadata_store)
+    found_templates = filter(x -> query_name == x.name, metadata_store)
     return first(found_templates, limit)
 end
 "Find the top-`limit` templates whose `name` or `description` fields partially match the `query_key::String` in `TEMPLATE_METADATA`."
@@ -258,7 +316,8 @@ function aitemplates(query_key::AbstractString;
         limit::Int = 10,
         metadata_store::Vector{AITemplateMetadata} = TEMPLATE_METADATA)
     query_str = lowercase(query_key)
-    found_templates = filter(x -> occursin(query_str, lowercase(string(x.name))) ||
+    found_templates = filter(
+        x -> occursin(query_str, lowercase(string(x.name))) ||
             occursin(query_str, lowercase(string(x.description))),
         metadata_store)
     return first(found_templates, limit)
@@ -267,13 +326,14 @@ end
 function aitemplates(query_key::Regex;
         limit::Int = 10,
         metadata_store::Vector{AITemplateMetadata} = TEMPLATE_METADATA)
-    found_templates = filter(x -> occursin(query_key,
-                                          string(x.name)) ||
-                                      occursin(query_key,
-                                          x.description) ||
-                                      occursin(query_key,
-                                          x.system_preview) ||
-                                      occursin(query_key, x.user_preview),
+    found_templates = filter(
+        x -> occursin(query_key,
+                     string(x.name)) ||
+                 occursin(query_key,
+                     x.description) ||
+                 occursin(query_key,
+                     x.system_preview) ||
+                 occursin(query_key, x.user_preview),
         metadata_store)
     return first(found_templates, limit)
 end
@@ -288,9 +348,15 @@ end
 function aiextract(schema::AbstractPromptSchema, template::AITemplate; kwargs...)
     aiextract(schema, render(schema, template); kwargs...)
 end
+function aitools(schema::AbstractPromptSchema, template::AITemplate; kwargs...)
+    aitools(schema, render(schema, template); kwargs...)
+end
 function aiscan(schema::AbstractPromptSchema, template::AITemplate; kwargs...)
     aiscan(schema, render(schema, template); kwargs...)
 end
+function aiimage(schema::AbstractPromptSchema, template::AITemplate; kwargs...)
+    aiimage(schema, render(schema, template); kwargs...)
+end
 
 # Shortcut for symbols
 function aigenerate(schema::AbstractPromptSchema, template::Symbol; kwargs...)
@@ -302,6 +368,165 @@ end
 function aiextract(schema::AbstractPromptSchema, template::Symbol; kwargs...)
     aiextract(schema, AITemplate(template); kwargs...)
 end
+function aitools(schema::AbstractPromptSchema, template::Symbol; kwargs...)
+    aitools(schema, AITemplate(template); kwargs...)
+end
 function aiscan(schema::AbstractPromptSchema, template::Symbol; kwargs...)
     aiscan(schema, AITemplate(template); kwargs...)
 end
+function aiimage(schema::AbstractPromptSchema, template::Symbol; kwargs...)
+    aiimage(schema, AITemplate(template); kwargs...)
+end
+
+## Dispatch for TracerSchema to avoid ambiguities
+function render(schema::AbstractTracerSchema, template::AITemplate; kwargs...)
+    render(schema.schema, template; kwargs...)
+end
+function aigenerate(schema::AbstractTracerSchema, template::Symbol; kwargs...)
+    tpl = AITemplate(template)
+    aigenerate(schema, render(schema, tpl);
+        _tracer_template = tpl, kwargs...)
+end
+function aiclassify(schema::AbstractTracerSchema, template::Symbol; kwargs...)
+    tpl = AITemplate(template)
+    aiclassify(schema, render(schema, tpl);
+        _tracer_template = tpl, kwargs...)
+end
+function aiextract(schema::AbstractTracerSchema, template::Symbol; kwargs...)
+    tpl = AITemplate(template)
+    aiextract(schema, render(schema, tpl);
+        _tracer_template = tpl, kwargs...)
+end
+function aitools(schema::AbstractTracerSchema, template::Symbol; kwargs...)
+    tpl = AITemplate(template)
+    aitools(schema, render(schema, tpl);
+        _tracer_template = tpl, kwargs...)
+end
+function aiscan(schema::AbstractTracerSchema, template::Symbol; kwargs...)
+    tpl = AITemplate(template)
+    aiscan(schema, render(schema, tpl);
+        _tracer_template = tpl, kwargs...)
+end
+function aiimage(schema::AbstractTracerSchema, template::Symbol; kwargs...)
+    tpl = AITemplate(template)
+    aiimage(schema, render(schema, tpl);
+        _tracer_template = tpl, kwargs...)
+end
+
+## Utility for creating templates
+"""
+    create_template(; user::AbstractString, system::AbstractString="Act as a helpful AI assistant.", 
+        load_as::Union{Nothing, Symbol, AbstractString} = nothing)
+
+    create_template(system::AbstractString, user::AbstractString, 
+        load_as::Union{Nothing, Symbol, AbstractString} = nothing)
+
+Creates a simple template with a user and system message. Convenience function to prevent writing `[PT.UserMessage(...), ...]`
+
+# Arguments
+- `system::AbstractString`: The system message. Usually defines the personality, style, instructions, output format, etc.
+- `user::AbstractString`: The user message. Usually defines the input, query, request, etc.
+- `load_as::Union{Nothing, Symbol, AbstractString}`: If provided, loads the template into the `TEMPLATE_STORE` under the provided name `load_as`. If `nothing`, does not load the template.
+
+Use double handlebar placeholders (eg, `{{name}}`) to define variables that can be replaced by the `kwargs` during the AI call (see example).
+
+Returns a vector of `SystemMessage` and UserMessage objects.
+If `load_as` is provided, it registers the template in the `TEMPLATE_STORE` and `TEMPLATE_METADATA` as well.
+
+# Examples
+
+Let's generate a quick template for a simple conversation (only one placeholder: name)
+
+```julia
+# first system message, then user message (or use kwargs)
+tpl=PT.create_template("You must speak like a pirate", "Say hi to {{name}}")
+
+## 2-element Vector{PromptingTools.AbstractChatMessage}:
+## PromptingTools.SystemMessage("You must speak like a pirate")
+##  PromptingTools.UserMessage("Say hi to {{name}}")
+```
+
+You can immediately use this template in `ai*` functions:
+```julia
+aigenerate(tpl; name="Jack Sparrow")
+# Output: AIMessage("Arr, me hearty! Best be sending me regards to Captain Jack Sparrow on the salty seas! May his compass always point true to the nearest treasure trove. Yarrr!")
+```
+
+If you're interested in saving the template in the template registry, jump to the end of these examples!
+
+If you want to save it in your project folder:
+```julia
+PT.save_template("templates/GreatingPirate.json", tpl; version="1.0") # optionally, add description
+```
+It will be saved and accessed under its basename, ie, `GreatingPirate`.
+
+Now you can load it like all the other templates (provide the template directory):
+
+```julia
+PT.load_templates!("templates") # it will remember the folder after the first run
+# Note: If you save it again, overwrite it, etc., you need to explicitly reload all templates again!
+```
+
+You can verify that your template is loaded with a quick search for "pirate":
+```julia
+aitemplates("pirate")
+
+## 1-element Vector{AITemplateMetadata}:
+## PromptingTools.AITemplateMetadata
+##   name: Symbol GreatingPirate
+##   description: String ""
+##   version: String "1.0"
+##   wordcount: Int64 46
+##   variables: Array{Symbol}((1,))
+##   system_preview: String "You must speak like a pirate"
+##   user_preview: String "Say hi to {{name}}"
+##   source: String ""
+```
+
+Now you can use it like any other template (notice it's a symbol, so `:GreatingPirate`):
+```julia
+aigenerate(:GreatingPirate; name="Jack Sparrow")
+# Output: AIMessage("Arr, me hearty! Best be sending me regards to Captain Jack Sparrow on the salty seas! May his compass always point true to the nearest treasure trove. Yarrr!")
+```
+
+If you do not need to save this template as a file, but you want to make it accessible in the template store for all `ai*` functions, you can use the `load_as` (= template name) keyword argument:
+
+```julia
+# this will not only create the template, but also register it for immediate use
+tpl=PT.create_template("You must speak like a pirate", "Say hi to {{name}}"; load_as="GreatingPirate")
+
+# you can now use it like any other template
+aiextract(:GreatingPirate; name="Jack Sparrow")
+```
+"""
+function create_template(
+        system::AbstractString,
+        user::AbstractString; load_as::Union{Nothing, Symbol, AbstractString} = nothing)
+    ##
+    global TEMPLATE_STORE, TEMPLATE_METADATA
+    ##
+    template = [SystemMessage(system), UserMessage(user)]
+    ## Should it be loaded as well?
+    if !isnothing(load_as)
+        template_name = Symbol(load_as)
+        ## add to store
+        if haskey(TEMPLATE_STORE, template_name)
+            @warn("Template $(template_name) already exists, overwriting!")
+            ## remove from metadata to avoid duplicates
+            filter!(x -> x.name != template_name, TEMPLATE_METADATA)
+        end
+        TEMPLATE_STORE[template_name] = template
+        ## prepare the metadata
+        metadata = build_template_metadata(
+            template, template_name)
+        push!(TEMPLATE_METADATA, metadata)
+    end
+
+    return template
+end
+# Kwarg version
+function create_template(;
+        user::AbstractString, system::AbstractString = "Act as a helpful AI assistant.",
+        load_as::Union{Nothing, Symbol, AbstractString} = nothing)
+    create_template(system, user; load_as)
+end
diff --git a/src/user_preferences.jl b/src/user_preferences.jl
index ea734deba..667b8eec7 100644
--- a/src/user_preferences.jl
+++ b/src/user_preferences.jl
@@ -4,20 +4,29 @@
 """
     PREFERENCES
 
-You can set preferences for PromptingTools by setting environment variables (for `OPENAI_API_KEY` only) 
-    or by using the `set_preferences!`.
+You can set preferences for PromptingTools by setting environment variables or by using the `set_preferences!`.
     It will create a `LocalPreferences.toml` file in your current directory and will reload your prefences from there.
 
 Check your preferences by calling `get_preferences(key::String)`.
     
 # Available Preferences (for `set_preferences!`)
 - `OPENAI_API_KEY`: The API key for the OpenAI API. See [OpenAI's documentation](https://platform.openai.com/docs/quickstart?context=python) for more information.
+- `AZURE_OPENAI_API_KEY`: The API key for the Azure OpenAI API. See [Azure OpenAI's documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference) for more information.
+- `AZURE_OPENAI_HOST`: The host for the Azure OpenAI API. See [Azure OpenAI's documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference) for more information.
 - `MISTRALAI_API_KEY`: The API key for the Mistral AI API. See [Mistral AI's documentation](https://docs.mistral.ai/) for more information.
 - `COHERE_API_KEY`: The API key for the Cohere API. See [Cohere's documentation](https://docs.cohere.com/docs/the-cohere-platform) for more information.
 - `DATABRICKS_API_KEY`: The API key for the Databricks Foundation Model API. See [Databricks' documentation](https://docs.databricks.com/en/machine-learning/foundation-models/api-reference.html) for more information.
 - `DATABRICKS_HOST`: The host for the Databricks API. See [Databricks' documentation](https://docs.databricks.com/en/machine-learning/foundation-models/api-reference.html) for more information.
 - `TAVILY_API_KEY`: The API key for the Tavily Search API. Register [here](https://tavily.com/). See more information [here](https://docs.tavily.com/docs/tavily-api/rest_api).
 - `GOOGLE_API_KEY`: The API key for Google Gemini models. Get yours from [here](https://ai.google.dev/). If you see a documentation page ("Available languages and regions for Google AI Studio and Gemini API"), it means that it's not yet available in your region.
+- `ANTHROPIC_API_KEY`: The API key for the Anthropic API. Get yours from [here](https://www.anthropic.com/).
+- `VOYAGE_API_KEY`: The API key for the Voyage API. Free tier is upto 50M tokens! Get yours from [here](https://dash.voyageai.com/api-keys).
+- `GROQ_API_KEY`: The API key for the Groq API. Free in beta! Get yours from [here](https://console.groq.com/keys).
+- `DEEPSEEK_API_KEY`: The API key for the DeepSeek API. Get \$5 credit when you join. Get yours from [here](https://platform.deepseek.com/api_keys).
+- `OPENROUTER_API_KEY`: The API key for the OpenRouter API. Get yours from [here](https://openrouter.ai/keys).
+- `CEREBRAS_API_KEY`: The API key for the Cerebras API. Get yours from [here](https://cloud.cerebras.ai/).
+- `SAMBANOVA_API_KEY`: The API key for the Sambanova API. Get yours from [here](https://cloud.sambanova.ai/apis).
+- `XAI_API_KEY`: The API key for the XAI API. Get your key from [here](https://console.x.ai/).
 - `MODEL_CHAT`: The default model to use for aigenerate and most ai* calls. See `MODEL_REGISTRY` for a list of available models or define your own.
 - `MODEL_EMBEDDING`: The default model to use for aiembed (embedding documents). See `MODEL_REGISTRY` for a list of available models or define your own.
 - `PROMPT_SCHEMA`: The default prompt schema to use for aigenerate and most ai* calls (if not specified in `MODEL_REGISTRY`). Set as a string, eg, `"OpenAISchema"`.
@@ -28,12 +37,15 @@ Check your preferences by calling `get_preferences(key::String)`.
     See `CONV_HISTORY` for more information.
 - `LOCAL_SERVER`: The URL of the local server to use for `ai*` calls. Defaults to `http://localhost:10897/v1`. This server is called when you call `model="local"`
     See `?LocalServerOpenAISchema` for more information and examples.
+- `LOG_DIR`: The directory to save the logs to, eg, when using `SaverSchema <: AbstractTracerSchema`. Defaults to `joinpath(pwd(), "log")`. Refer to `?SaverSchema` for more information on how it works and examples.
 
 At the moment it is not possible to persist changes to `MODEL_REGISTRY` across sessions. 
 Define your `register_model!()` calls in your `startup.jl` file to make them available across sessions or put them at the top of your script.
 
 # Available ENV Variables
 - `OPENAI_API_KEY`: The API key for the OpenAI API. 
+- `AZURE_OPENAI_API_KEY`: The API key for the Azure OpenAI API. 
+- `AZURE_OPENAI_HOST`: The host for the Azure OpenAI API. This is the URL built as `https://<resource-name>.openai.azure.com`.
 - `MISTRALAI_API_KEY`: The API key for the Mistral AI API.
 - `COHERE_API_KEY`: The API key for the Cohere API.
 - `LOCAL_SERVER`: The URL of the local server to use for `ai*` calls. Defaults to `http://localhost:10897/v1`. This server is called when you call `model="local"`
@@ -41,6 +53,15 @@ Define your `register_model!()` calls in your `startup.jl` file to make them ava
 - `DATABRICKS_HOST`: The host for the Databricks API.
 - `TAVILY_API_KEY`: The API key for the Tavily Search API. Register [here](https://tavily.com/). See more information [here](https://docs.tavily.com/docs/tavily-api/rest_api).
 - `GOOGLE_API_KEY`: The API key for Google Gemini models. Get yours from [here](https://ai.google.dev/). If you see a documentation page ("Available languages and regions for Google AI Studio and Gemini API"), it means that it's not yet available in your region.
+- `ANTHROPIC_API_KEY`: The API key for the Anthropic API. Get yours from [here](https://www.anthropic.com/).
+- `VOYAGE_API_KEY`: The API key for the Voyage API. Free tier is upto 50M tokens! Get yours from [here](https://dash.voyageai.com/api-keys).
+- `GROQ_API_KEY`: The API key for the Groq API. Free in beta! Get yours from [here](https://console.groq.com/keys).
+- `DEEPSEEK_API_KEY`: The API key for the DeepSeek API. Get \$5 credit when you join. Get yours from [here](https://platform.deepseek.com/api_keys).
+- `OPENROUTER_API_KEY`: The API key for the OpenRouter API. Get yours from [here](https://openrouter.ai/keys).
+- `CEREBRAS_API_KEY`: The API key for the Cerebras API.
+- `SAMBANOVA_API_KEY`: The API key for the Sambanova API.
+- `LOG_DIR`: The directory to save the logs to, eg, when using `SaverSchema <: AbstractTracerSchema`. Defaults to `joinpath(pwd(), "log")`. Refer to `?SaverSchema` for more information on how it works and examples.
+- `XAI_API_KEY`: The API key for the XAI API. Get your key from [here](https://console.x.ai/).
 
 Preferences.jl takes priority over ENV variables, so if you set a preference, it will take precedence over the ENV variable.
 
@@ -51,17 +72,28 @@ const PREFERENCES = nothing
 "Keys that are allowed to be set via `set_preferences!`"
 const ALLOWED_PREFERENCES = ["MISTRALAI_API_KEY",
     "OPENAI_API_KEY",
+    "AZURE_OPENAI_API_KEY",
+    "AZURE_OPENAI_HOST",
     "COHERE_API_KEY",
     "DATABRICKS_API_KEY",
     "DATABRICKS_HOST",
     "TAVILY_API_KEY",
     "GOOGLE_API_KEY",
+    "ANTHROPIC_API_KEY",
+    "VOYAGE_API_KEY",
+    "GROQ_API_KEY",
+    "DEEPSEEK_API_KEY",
+    "OPENROUTER_API_KEY",  # Added OPENROUTER_API_KEY
+    "CEREBRAS_API_KEY",
+    "SAMBANOVA_API_KEY",
+    "XAI_API_KEY",  # Added XAI_API_KEY
     "MODEL_CHAT",
     "MODEL_EMBEDDING",
     "MODEL_ALIASES",
     "PROMPT_SCHEMA",
     "MAX_HISTORY_LENGTH",
-    "LOCAL_SERVER"]
+    "LOCAL_SERVER",
+    "LOG_DIR"]
 
 """
     set_preferences!(pairs::Pair{String, <:Any}...)
@@ -110,57 +142,112 @@ function get_preferences(key::String)
 end
 
 ## Load up GLOBALS
-const MODEL_CHAT::String = @load_preference("MODEL_CHAT", default="gpt-3.5-turbo")
-const MODEL_EMBEDDING::String = @load_preference("MODEL_EMBEDDING",
+global MODEL_CHAT::String = @load_preference("MODEL_CHAT", default="gpt-4o-mini")
+global MODEL_EMBEDDING::String = @load_preference("MODEL_EMBEDDING",
     default="text-embedding-3-small")
+global MODEL_IMAGE_GENERATION::String = @load_preference("MODEL_IMAGE_GENERATION",
+    default="dall-e-3")
 # the prompt schema default is defined in llm_interace.jl !
 # const PROMPT_SCHEMA = OpenAISchema()
 
 # First, load from preferences, then from environment variables
-# Note: We load first into a variable `temp_` to avoid inlining of the get(ENV...) call
-_temp = get(ENV, "OPENAI_API_KEY", "")
-const OPENAI_API_KEY::String = @load_preference("OPENAI_API_KEY",
-    default=_temp);
-# Note: Disable this warning by setting OPENAI_API_KEY to anything
-isempty(OPENAI_API_KEY) &&
-    @warn "OPENAI_API_KEY variable not set! OpenAI models will not be available - set API key directly via `PromptingTools.OPENAI_API_KEY=<api-key>`!"
-
-_temp = get(ENV, "MISTRALAI_API_KEY", "")
-const MISTRALAI_API_KEY::String = @load_preference("MISTRALAI_API_KEY",
-    default=_temp);
-
-_temp = get(ENV, "COHERE_API_KEY", "")
-const COHERE_API_KEY::String = @load_preference("COHERE_API_KEY",
-    default=_temp);
-
-_temp = get(ENV, "DATABRICKS_API_KEY", "")
-const DATABRICKS_API_KEY::String = @load_preference("DATABRICKS_API_KEY",
-    default=_temp);
-
-_temp = get(ENV, "DATABRICKS_HOST", "")
-const DATABRICKS_HOST::String = @load_preference("DATABRICKS_HOST",
-    default=_temp);
-
-_temp = get(ENV, "TAVILY_API_KEY", "")
-const TAVILY_API_KEY::String = @load_preference("TAVILY_API_KEY",
-    default=_temp);
-
-_temp = get(ENV, "GOOGLE_API_KEY", "")
-const GOOGLE_API_KEY::String = @load_preference("GOOGLE_API_KEY",
-    default=_temp);
-
-_temp = get(ENV, "TOGETHER_API_KEY", "")
-const TOGETHER_API_KEY::String = @load_preference("TOGETHER_API_KEY",
-    default=_temp);
-
-_temp = get(ENV, "FIREWORKS_API_KEY", "")
-const FIREWORKS_API_KEY::String = @load_preference("FIREWORKS_API_KEY",
-    default=_temp);
-
-_temp = get(ENV, "LOCAL_SERVER", "")
-## Address of the local server
-const LOCAL_SERVER::String = @load_preference("LOCAL_SERVER",
-    default=_temp);
+# Instantiate empty global variables
+global OPENAI_API_KEY::String = ""
+global AZURE_OPENAI_API_KEY::String = ""
+global AZURE_OPENAI_HOST::String = ""
+global MISTRALAI_API_KEY::String = ""
+global COHERE_API_KEY::String = ""
+global DATABRICKS_API_KEY::String = ""
+global DATABRICKS_HOST::String = ""
+global TAVILY_API_KEY::String = ""
+global GOOGLE_API_KEY::String = ""
+global TOGETHER_API_KEY::String = ""
+global FIREWORKS_API_KEY::String = ""
+global ANTHROPIC_API_KEY::String = ""
+global VOYAGE_API_KEY::String = ""
+global GROQ_API_KEY::String = ""
+global DEEPSEEK_API_KEY::String = ""
+global OPENROUTER_API_KEY::String = ""
+global CEREBRAS_API_KEY::String = ""
+global SAMBANOVA_API_KEY::String = ""
+global LOCAL_SERVER::String = ""
+global LOG_DIR::String = ""
+global XAI_API_KEY::String = ""
+
+# Load them on init
+"Loads API keys from environment variables and preferences"
+function load_api_keys!()
+    global OPENAI_API_KEY
+    OPENAI_API_KEY = @load_preference("OPENAI_API_KEY",
+        default=get(ENV, "OPENAI_API_KEY", ""))
+    # Note: Disable this warning by setting OPENAI_API_KEY to anything
+    isempty(OPENAI_API_KEY) &&
+        @warn "OPENAI_API_KEY variable not set! OpenAI models will not be available - set API key directly via `PromptingTools.OPENAI_API_KEY=<api-key>`!"
+    global AZURE_OPENAI_API_KEY
+    AZURE_OPENAI_API_KEY = @load_preference("AZURE_OPENAI_API_KEY",
+        default=get(ENV, "AZURE_OPENAI_API_KEY", ""))
+    global AZURE_OPENAI_HOST
+    AZURE_OPENAI_HOST = @load_preference("AZURE_OPENAI_HOST",
+        default=get(ENV, "AZURE_OPENAI_HOST", ""))
+    global MISTRALAI_API_KEY
+    MISTRALAI_API_KEY = @load_preference("MISTRALAI_API_KEY",
+        default=get(ENV, "MISTRALAI_API_KEY", ""))
+    global COHERE_API_KEY
+    COHERE_API_KEY = @load_preference("COHERE_API_KEY",
+        default=get(ENV, "COHERE_API_KEY", ""))
+    global DATABRICKS_API_KEY
+    DATABRICKS_API_KEY = @load_preference("DATABRICKS_API_KEY",
+        default=get(ENV, "DATABRICKS_API_KEY", ""))
+    global DATABRICKS_HOST
+    DATABRICKS_HOST = @load_preference("DATABRICKS_HOST",
+        default=get(ENV, "DATABRICKS_HOST", ""))
+    global TAVILY_API_KEY
+    TAVILY_API_KEY = @load_preference("TAVILY_API_KEY",
+        default=get(ENV, "TAVILY_API_KEY", ""))
+    global GOOGLE_API_KEY
+    GOOGLE_API_KEY = @load_preference("GOOGLE_API_KEY",
+        default=get(ENV, "GOOGLE_API_KEY", ""))
+    global TOGETHER_API_KEY
+    TOGETHER_API_KEY = @load_preference("TOGETHER_API_KEY",
+        default=get(ENV, "TOGETHER_API_KEY", ""))
+    global FIREWORKS_API_KEY
+    FIREWORKS_API_KEY = @load_preference("FIREWORKS_API_KEY",
+        default=get(ENV, "FIREWORKS_API_KEY", ""))
+    global ANTHROPIC_API_KEY
+    ANTHROPIC_API_KEY = @load_preference("ANTHROPIC_API_KEY",
+        default=get(ENV, "ANTHROPIC_API_KEY", ""))
+    global VOYAGE_API_KEY
+    VOYAGE_API_KEY = @load_preference("VOYAGE_API_KEY",
+        default=get(ENV, "VOYAGE_API_KEY", ""))
+    global GROQ_API_KEY
+    GROQ_API_KEY = @load_preference("GROQ_API_KEY",
+        default=get(ENV, "GROQ_API_KEY", ""))
+    global DEEPSEEK_API_KEY
+    DEEPSEEK_API_KEY = @load_preference("DEEPSEEK_API_KEY",
+        default=get(ENV, "DEEPSEEK_API_KEY", ""))
+    global OPENROUTER_API_KEY  # Added OPENROUTER_API_KEY
+    OPENROUTER_API_KEY = @load_preference("OPENROUTER_API_KEY",
+        default=get(ENV, "OPENROUTER_API_KEY", ""))
+    global CEREBRAS_API_KEY
+    CEREBRAS_API_KEY = @load_preference("CEREBRAS_API_KEY",
+        default=get(ENV, "CEREBRAS_API_KEY", ""))
+    global SAMBANOVA_API_KEY
+    SAMBANOVA_API_KEY = @load_preference("SAMBANOVA_API_KEY",
+        default=get(ENV, "SAMBANOVA_API_KEY", ""))
+    global LOCAL_SERVER
+    LOCAL_SERVER = @load_preference("LOCAL_SERVER",
+        default=get(ENV, "LOCAL_SERVER", ""))
+    global LOG_DIR
+    LOG_DIR = @load_preference("LOG_DIR",
+        default=get(ENV, "LOG_DIR", joinpath(pwd(), "log")))
+    global XAI_API_KEY
+    XAI_API_KEY = @load_preference("XAI_API_KEY",
+        default=get(ENV, "XAI_API_KEY", ""))
+
+    return nothing
+end
+# Try to load already for safety
+load_api_keys!()
 
 ## CONVERSATION HISTORY
 """
@@ -175,8 +262,8 @@ See also: `push_conversation!`, `resize_conversation!`
 """
 const CONV_HISTORY = Vector{Vector{<:Any}}()
 const CONV_HISTORY_LOCK = ReentrantLock()
-const MAX_HISTORY_LENGTH = @load_preference("MAX_HISTORY_LENGTH",
-    default=5)::Union{Int, Nothing}
+global MAX_HISTORY_LENGTH::Union{Int, Nothing} = @load_preference("MAX_HISTORY_LENGTH",
+    default=5)
 
 ## Model registry
 # A dictionary of model names and their specs (ie, name, costs per token, etc.)
@@ -191,8 +278,10 @@ A struct that contains information about a model, such as its name, schema, cost
 # Fields
 - `name::String`: The name of the model. This is the name that will be used to refer to the model in the `ai*` functions.
 - `schema::AbstractPromptSchema`: The schema of the model. This is the schema that will be used to generate prompts for the model, eg, `:OpenAISchema`.
-- `cost_of_token_prompt::Float64`: The cost of 1 token in the prompt for this model. This is used to calculate the cost of a prompt. 
+- `cost_of_token_prompt::Float64`: The cost of 1 token in the prompt for this model. This is used to calculate the cost of a prompt.
     Note: It is often provided online as cost per 1000 tokens, so make sure to convert it correctly!
+- `cached_cost_of_token_prompt::Float64`: The cost of 1 token for cached prompts. Defaults to 50% of cost_of_token_prompt.
+    This reflects the reduced cost when using prompt caching, as supported by some models.
 - `cost_of_token_generation::Float64`: The cost of 1 token generated by this model. This is used to calculate the cost of a generation.
     Note: It is often provided online as cost per 1000 tokens, so make sure to convert it correctly!
 - `description::String`: A description of the model. This is used to provide more information about the model when it is queried.
@@ -204,6 +293,7 @@ spec = ModelSpec("gpt-3.5-turbo",
     0.0015,
     0.002,
     "GPT-3.5 Turbo is a 175B parameter model and a common default on the OpenAI API.")
+```
 
 # register it
 PromptingTools.register_model!(spec)
@@ -217,12 +307,12 @@ PromptingTools.register_model!(
     cost_of_token_prompt = 0.0015,
     cost_of_token_generation = 0.002,
     description = "GPT-3.5 Turbo is a 175B parameter model and a common default on the OpenAI API.")
-```
 """
 @kwdef mutable struct ModelSpec
     name::String
     schema::Union{AbstractPromptSchema, Nothing} = nothing
     cost_of_token_prompt::Float64 = 0.0
+    cached_cost_of_token_prompt::Float64 = 0.0
     cost_of_token_generation::Float64 = 0.0
     description::String = ""
 end
@@ -245,7 +335,7 @@ Registering a model helps with calculating the costs and automatically selecting
 # Arguments
 - `name`: The name of the model. This is the name that will be used to refer to the model in the `ai*` functions.
 - `schema`: The schema of the model. This is the schema that will be used to generate prompts for the model, eg, `OpenAISchema()`.
-- `cost_of_token_prompt`: The cost of a token in the prompt for this model. This is used to calculate the cost of a prompt. 
+- `cost_of_token_prompt`: The cost of a token in the prompt for this model. This is used to calculate the cost of a prompt.
    Note: It is often provided online as cost per 1000 tokens, so make sure to convert it correctly!
 - `cost_of_token_generation`: The cost of a token generated by this model. This is used to calculate the cost of a generation.
     Note: It is often provided online as cost per 1000 tokens, so make sure to convert it correctly!
@@ -255,11 +345,13 @@ function register_model!(registry = MODEL_REGISTRY;
         name::String,
         schema::Union{AbstractPromptSchema, Nothing} = nothing,
         cost_of_token_prompt::Float64 = 0.0,
+        cached_cost_of_token_prompt::Float64 = cost_of_token_prompt * 0.5,
         cost_of_token_generation::Float64 = 0.0,
         description::String = "")
     spec = ModelSpec(name,
         schema,
         cost_of_token_prompt,
+        cached_cost_of_token_prompt,
         cost_of_token_generation,
         description)
     register_model!(spec; registry)
@@ -275,35 +367,121 @@ end
 ### Model Aliases
 
 # global reference MODEL_ALIASES is defined below
-aliases = merge(Dict("gpt3" => "gpt-3.5-turbo",
+aliases = merge(
+    Dict("gpt3" => "gpt-3.5-turbo",
         "gpt4" => "gpt-4",
+        "gpt4o" => "gpt-4o",
+        "gpt4ol" => "gpt-4o-2024-08-06", #GPT4o latest
+        "gpt4om" => "gpt-4o-mini",
         "gpt4v" => "gpt-4-vision-preview", # 4v is for "4 vision"
-        "gpt4t" => "gpt-4-turbo-preview", # 4t is for "4 turbo"
+        "gpt4t" => "gpt-4-turbo", # 4t is for "4 turbo"
         "gpt3t" => "gpt-3.5-turbo-0125", # 3t is for "3 turbo"
+        "chatgpt" => "chatgpt-4o-latest",
+        "o1p" => "o1-preview",
+        "o1m" => "o1-mini",
         "ada" => "text-embedding-ada-002",
         "emb3small" => "text-embedding-3-small",
         "emb3large" => "text-embedding-3-large",
         "yi34c" => "yi:34b-chat",
         "oh25" => "openhermes2.5-mistral",
         "starling" => "starling-lm",
+        "llama3" => "llama3:8b-instruct-q5_K_S",
+        # o-llama3, because it's hosted on Ollama (same as t-mixtral on Together)
+        "ollama3" => "llama3:8b-instruct-q5_K_S",
         "local" => "local-server",
         "gemini" => "gemini-pro",
         ## f-mixtral -> Fireworks.ai Mixtral
         "fmixtral" => "accounts/fireworks/models/mixtral-8x7b-instruct",
         "firefunction" => "accounts/fireworks/models/firefunction-v1",
+        "fllama3" => "accounts/fireworks/models/llama-v3p1-8b-instruct",
+        "fllama370" => "accounts/fireworks/models/llama-v3p1-70b-instruct",
+        "fllama3405" => "accounts/fireworks/models/llama-v3p1-405b-instruct",
+        "fls" => "accounts/fireworks/models/llama-v3p1-8b-instruct", #s for small
+        "flm" => "accounts/fireworks/models/llama-v3p1-70b-instruct", #m for medium
+        "fll" => "accounts/fireworks/models/llama-v3p1-405b-instruct", #l for large
         ## t-mixtral -> Together.ai Mixtral
         "tmixtral" => "mistralai/Mixtral-8x7B-Instruct-v0.1",
+        "tmixtral22" => "mistralai/Mixtral-8x22B-Instruct-v0.1",
+        "tllama3" => "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+        "tllama370" => "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+        "tllama3405" => "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+        "tls" => "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", #s for small
+        "tlm" => "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", #m for medium
+        "tll" => "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", #l for large
         ## Mistral AI
+        "mistral-nemo" => "open-mistral-nemo",
+        "mistral-tiny" => "mistral-tiny",
         "mistral-small" => "mistral-small-latest",
         "mistral-medium" => "mistral-medium-latest",
         "mistral-large" => "mistral-large-latest",
+        "mistralt" => "mistral-tiny",
         "mistrals" => "mistral-small-latest",
         "mistralm" => "mistral-medium-latest",
-        "mistrall" => "mistral-large-latest"),
+        "mistrall" => "mistral-large-latest",
+        "mistraln" => "open-mistral-nemo",
+        "mistralc" => "codestral-latest",
+        "codestral" => "codestral-latest",
+        "ministral3" => "ministral-3b-latest",
+        "ministral8" => "ministral-8b-latest",
+        ## Default to Sonnet as a the medium offering
+        "claude" => "claude-3-5-sonnet-latest",
+        "claudeo" => "claude-3-opus-20240229",
+        "claudes" => "claude-3-5-sonnet-latest",
+        "claudeh" => "claude-3-5-haiku-latest",
+        ## Groq
+        "gllama3" => "llama-3.1-8b-instant",
+        "gl3" => "llama-3.1-8b-instant",
+        "gllama370" => "llama-3.1-70b-versatile",
+        "gl70" => "llama-3.1-70b-versatile",
+        "gllama3405" => "llama-3.1-405b-reasoning",
+        "gl405" => "llama-3.1-405b-reasoning",
+        "glxxs" => "llama-3.2-1b-preview", #xxs for extra extra small
+        "glxs" => "llama-3.2-3b-preview", #xs for extra small
+        "gls" => "llama-3.1-8b-instant", #s for small
+        "glm" => "llama-3.1-70b-versatile", #m for medium
+        "gll" => "llama-3.1-405b-reasoning", #l for large
+        "gmixtral" => "mixtral-8x7b-32768",
+        "ggemma9" => "gemma2-9b-it",
+        "glst" => "llama3-groq-8b-8192-tool-use-preview",
+        "glmt" => "llama3-groq-70b-8192-tool-use-preview",
+        "glguard" => "llama-guard-3-8b",
+        "glsv" => "llama-3.2-11b-vision-preview",
+        "glmv" => "llama-3.2-90b-vision-preview",
+        ## Cerebras
+        "cl3" => "llama3.1-8b",
+        "cllama3" => "llama3.1-8b",
+        "cl70" => "llama3.1-70b",
+        "cllama70" => "llama3.1-70b",
+        ## SambaNova
+        "sl3" => "Meta-Llama-3.1-8B-Instruct",
+        "sllama3" => "Meta-Llama-3.1-8B-Instruct",
+        "sl70" => "Meta-Llama-3.1-70B-Instruct",
+        "sllama70" => "Meta-Llama-3.1-70B-Instruct",
+        "sl405" => "Meta-Llama-3.1-405B-Instruct",
+        "sllama405" => "Meta-Llama-3.1-405B-Instruct",
+        "sl1" => "Meta-Llama-3.2-1B-Instruct",
+        "sl3b" => "Meta-Llama-3.2-3B-Instruct", ## deviation not to clash with Llama 3 notation
+        "slxs" => "Meta-Llama-3.2-1B-Instruct",
+        "slxxs" => "Meta-Llama-3.2-3B-Instruct",
+        "sls" => "Meta-Llama-3.1-8B-Instruct", # s for small
+        "slm" => "Meta-Llama-3.1-70B-Instruct", # m for medium
+        "sll" => "Meta-Llama-3.1-405B-Instruct", # l for large
+        ## XAI's Grok
+        "grok" => "grok-beta",
+        ## DeepSeek
+        "dschat" => "deepseek-chat",
+        "dscode" => "deepseek-coder",
+        ## OpenRouter
+        "oro1" => "openai/o1-preview",
+        "oro1m" => "openai/o1-mini",
+        "orcop" => "cohere/command-r-plus-08-2024",
+        "orco" => "cohere/command-r-08-2024"
+    ),
     ## Load aliases from preferences as well
     @load_preference("MODEL_ALIASES", default=Dict{String, String}()))
 
-registry = Dict{String, ModelSpec}("gpt-3.5-turbo" => ModelSpec("gpt-3.5-turbo",
+registry = Dict{String, ModelSpec}(
+    "gpt-3.5-turbo" => ModelSpec("gpt-3.5-turbo",
         OpenAISchema(),
         0.5e-6,
         1.5e-6,
@@ -333,16 +511,87 @@ registry = Dict{String, ModelSpec}("gpt-3.5-turbo" => ModelSpec("gpt-3.5-turbo",
         1e-5,
         3e-5,
         "GPT-4 Turbo is an updated version of GPT4 that is much faster and the cheaper to use. 0125 refers to the release date of January 25, 2024."),
+    "gpt-4-turbo" => ModelSpec("gpt-4-turbo",
+        OpenAISchema(),
+        1e-5,
+        3e-5,
+        "GPT-4 Turbo is an updated version of GPT4 that is much faster and the cheaper to use. This is the general name for whatever is the latest GPT4 Turbo preview release. In April-24, it points to version 2024-04-09."),
+    "gpt-4-turbo-2024-04-09" => ModelSpec("gpt-4-turbo-2024-04-09",
+        OpenAISchema(),
+        1e-5,
+        3e-5,
+        "GPT-4 Turbo is an updated version of GPT4 that is much faster and the cheaper to use. 2024-04-09 refers to the release date of 9th April 2024 with knowledge upto December 2023."),
     "gpt-4-turbo-preview" => ModelSpec("gpt-4-turbo-preview",
         OpenAISchema(),
         1e-5,
         3e-5,
         "GPT-4 Turbo is an updated version of GPT4 that is much faster and the cheaper to use. This is the general name for whatever is the latest GPT4 Turbo preview release. Right now it is 0125."),
-    "gpt-4-vision-preview" => ModelSpec("gpt-4-vision-preview",
+    "gpt-4o-2024-05-13" => ModelSpec("gpt-4o-2024-05-13",
+        OpenAISchema(),
+        5e-6,
+        1.5e-5,
+        "GPT-4 Omni, the latest GPT4 model that is faster and cheaper than GPT-4 Turbo is an updated version of GPT4 that is much faster and the cheaper to use."),
+    "gpt-4o-2024-08-06" => ModelSpec("gpt-4o-2024-08-06",
+        OpenAISchema(),
+        2.5e-6,
+        1.0e-5,
+        "GPT-4 Omni, the latest GPT4 model series that is faster and faster. This is the latest version from Aug-24, which is cheaper than May-24 version."),
+    "gpt-4o" => ModelSpec("gpt-4o",
+        OpenAISchema(),
+        5e-6,
+        1.5e-5,
+        "GPT-4 Omni, the latest GPT4 model that is faster and cheaper than GPT-4 Turbo is an updated version of GPT4 that is much faster and the cheaper to use. Context of 128K, knowledge until October 2023. Currently points to version gpt-4o-2024-05-13."),
+    "gpt-4o-mini" => ModelSpec("gpt-4o-mini",
+        OpenAISchema(),
+        1.5e-7,
+        6e-7,
+        "GPT-4 Omni Mini, the smallest and fastest model based on GPT4 (and cheaper than GPT3.5Turbo)."),
+    "gpt-4o-mini-2024-07-18" => ModelSpec("gpt-4o-mini-2024-07-18",
+        OpenAISchema(),
+        1.5e-7,
+        6e-7,
+        "GPT-4 Omni Mini, the smallest and fastest model based on GPT4 (and cheaper than GPT3.5Turbo). Context of 128K, knowledge until October 2023. Currently points to version gpt-4o-2024-07-18."),
+    "o1-preview" => ModelSpec("o1-preview",
+        OpenAISchema(),
+        1.5e-5,
+        6e-5,
+        "O1 Preview is the latest version of OpenAI's O1 model. 128K context. Knowledge until October 2023."),
+    "o1-preview-2024-09-12" => ModelSpec("o1-preview-2024-09-12",
+        OpenAISchema(),
+        1.5e-5,
+        6e-5,
+        "O1 Preview is the latest version of OpenAI's O1 model. 128K context. Knowledge until October 2023."),
+    "o1-mini" => ModelSpec("o1-mini",
+        OpenAISchema(),
+        3e-6,
+        1.2e-5,
+        "O1 Mini is the latest version of OpenAI's O1 model. 128K context. Knowledge until October 2023."),
+    "o1-mini-2024-09-12" => ModelSpec("o1-mini-2024-09-12",
+        OpenAISchema(),
+        3e-6,
+        1.2e-5,
+        "O1 Mini is the latest version of OpenAI's O1 model. 128K context. Knowledge until October 2023."),
+    "chatgpt-4o-latest" => ModelSpec("chatgpt-4o-latest",
+        OpenAISchema(),
+        5e-6,
+        1.5e-5,
+        "ChatGPT-4o-latest is the latest version of ChatGPT-4o tuned for ChatGPT. It is the NOT same as gpt-4o-latest."),
+    "gpt-4-vision-preview" => ModelSpec(
+        "gpt-4-vision-preview",
         OpenAISchema(),
         1e-5,
         3e-5,
         "GPT-4 Vision is similar to GPT-4 but it adds visual capabilities."),
+    "dall-e-3" => ModelSpec("dall-e-3",
+        OpenAISchema(),
+        0, ## tracked differently via ALTERNATIVE_GENERATION_COSTS
+        0,  ## tracked differently via ALTERNATIVE_GENERATION_COSTS
+        "The best image generation model from OpenAI DALL-E 3. Note: Costs are tracked on per-image basis!"),
+    "dall-e-2" => ModelSpec("dall-e-2",
+        OpenAISchema(),
+        0, ## tracked differently via ALTERNATIVE_GENERATION_COSTS
+        0,  ## tracked differently via ALTERNATIVE_GENERATION_COSTS
+        "Image generation model from OpenAI DALL-E 2. Note: Costs are tracked on per-image basis!"),
     "text-embedding-ada-002" => ModelSpec("text-embedding-ada-002",
         OpenAISchema(),
         1e-7,
@@ -378,6 +627,27 @@ registry = Dict{String, ModelSpec}("gpt-3.5-turbo" => ModelSpec("gpt-3.5-turbo",
         0.0,
         0.0,
         "Yi is a 34B parameter model finetuned by X on top of base model from Starling AI."),
+    "llama3:8b-instruct-q5_K_S" => ModelSpec("llama3:8b-instruct-q5_K_S",
+        OllamaSchema(),
+        0.0,
+        0.0,
+        "Llama 3 8b is the latest model from Meta"
+    ),
+    "wizardlm2:7b-q5_K_S" => ModelSpec("wizardlm2:7b-q5_K_S",
+        OllamaSchema(),
+        0.0,
+        0.0,
+        "WizardLM2 7b from Microsoft."),
+    "nomic-embed-text" => ModelSpec("nomic-embed-text",
+        OllamaSchema(),
+        0.0,
+        0.0,
+        "Ollama-hosted embedding model from Nomic with 127M parameters and 8K tokens context. Alleged to be competitive with OpenAI small embedding model."),
+    "mxbai-embed-large" => ModelSpec("mxbai-embed-large",
+        OllamaSchema(),
+        0.0,
+        0.0,
+        "Ollama-hosted embedding model from MixedBread.ai with 334M parameters and 512 tokens context. Alleged to be competitive with OpenAI large embedding model."),
     "llava" => ModelSpec("llava",
         OllamaSchema(),
         0.0,
@@ -397,21 +667,91 @@ registry = Dict{String, ModelSpec}("gpt-3.5-turbo" => ModelSpec("gpt-3.5-turbo",
         7e-7,
         7e-7,
         "Mistral AI's hosted version of openly available Mixtral-8x7B-v0.1. Good for more complicated tasks."),
+    "mistral-tiny" => ModelSpec("mistral-tiny",
+        MistralOpenAISchema(),
+        2e-6,
+        6e-6,
+        "Mistral AI's own finetune of their 7b model."),
+    "mistral-tiny-2312" => ModelSpec("mistral-tiny-2312",
+        MistralOpenAISchema(),
+        2e-6,
+        6e-6,
+        "Mistral AI's own finetune of their 7b model. Version 2312."),
     "mistral-small-latest" => ModelSpec("mistral-small-latest",
         MistralOpenAISchema(),
         2e-6,
         6e-6,
         "Mistral AI's own finetune (historically similar to Mixtral-8x7B)."),
+    "mistral-small-2402" => ModelSpec("mistral-small-2402",
+        MistralOpenAISchema(),
+        2e-6,
+        6e-6,
+        "Mistral AI's own finetune (historically similar to Mixtral-8x7B). Version 2402."),
     "mistral-medium-latest" => ModelSpec("mistral-medium-latest",
         MistralOpenAISchema(),
         2.7e-6,
         8.1e-6,
         "Mistral AI's own model. Details unknown."),
+    "mistral-medium-2312" => ModelSpec("mistral-medium-2312",
+        MistralOpenAISchema(),
+        2.7e-6,
+        8.1e-6,
+        "Mistral AI's own model. Version 2312. Details unknown."),
     "mistral-large-latest" => ModelSpec("mistral-large-latest",
         MistralOpenAISchema(),
         8e-6,
         2.4e-5,
         "Mistral AI's hosted version of their best model available. Details unknown."),
+    "mistral-large-2402" => ModelSpec("mistral-large-2402",
+        MistralOpenAISchema(),
+        3e-6,
+        9e-6,
+        "Mistral AI's hosted version of their best model available. Version 2402. Details unknown."),
+    "mistral-large-2407" => ModelSpec("mistral-large-2407",
+        MistralOpenAISchema(),
+        3e-6,
+        9e-6,
+        "Mistral AI's hosted version of their largest and best model available Mistral Large with 123bn parameters and 128K context. Version 2407 (released in July 2024). Details unknown."),
+    "codestral-latest" => ModelSpec("codestral-latest",
+        MistralOpenAISchema(),
+        1e-6,
+        3e-6,
+        "Mistral AI's Code completion model, 22B parameters. Very quick and performant."),
+    "codestral-2405" => ModelSpec("codestral-2405",
+        MistralOpenAISchema(),
+        1e-6,
+        3e-6,
+        "Mistral AI's Code completion model, 22B parameters. Very quick and performant."),
+    "open-mistral-nemo" => ModelSpec("open-mistral-nemo",
+        MistralOpenAISchema(),
+        3e-7,
+        3e-7,
+        "Mistral Nemo is a state-of-the-art 12B model developed with NVIDIA."),
+    "open-mistral-nemo-2407" => ModelSpec("open-mistral-nemo-2407",
+        MistralOpenAISchema(),
+        3e-7,
+        3e-7,
+        "Mistral Nemo is a state-of-the-art 12B model developed with NVIDIA. Version 2407."),
+    "ministral-8b-latest" => ModelSpec("ministral-8b-latest",
+        MistralOpenAISchema(),
+        1e-7,
+        1e-7,
+        "Mistral AI's latest 8B model. 128K context."),
+    "ministral-8b-2410" => ModelSpec("ministral-8b-2410",
+        MistralOpenAISchema(),
+        1e-7,
+        1e-7,
+        "Mistral AI's latest 8B model. Version 2410, 128K context."),
+    "ministral-3b-latest" => ModelSpec("ministral-3b-latest",
+        MistralOpenAISchema(),
+        4e-8,
+        4e-8,
+        "Mistral AI's latest 3B model. 128K context."),
+    "ministral-3b-2410" => ModelSpec("ministral-3b-2410",
+        MistralOpenAISchema(),
+        4e-8,
+        4e-8,
+        "Mistral AI's latest 3B model. Version 2410, 128K context."),
     "mistral-embed" => ModelSpec("mistral-embed",
         MistralOpenAISchema(),
         1e-7,
@@ -419,9 +759,10 @@ registry = Dict{String, ModelSpec}("gpt-3.5-turbo" => ModelSpec("gpt-3.5-turbo",
         "Mistral AI's hosted model for embeddings."),
     "echo" => ModelSpec("echo",
         TestEchoOpenAISchema(;
-            response = Dict(:choices => [
+            response = Dict(
+                :choices => [
                     Dict(:message => Dict(:content => "Hello!"),
-                        :finish_reason => "stop"),
+                    :finish_reason => "stop")
                 ],
                 :usage => Dict(:total_tokens => 3,
                     :prompt_tokens => 2,
@@ -433,27 +774,333 @@ registry = Dict{String, ModelSpec}("gpt-3.5-turbo" => ModelSpec("gpt-3.5-turbo",
         LocalServerOpenAISchema(),
         0.0,
         0.0,
-        "Local server, eg, powered by [Llama.jl](https://github.com/marcom/Llama.jl). Model is specified when instantiating the server itself."),
+        "Local server, eg, powered by [Llama.jl](https://github.com/marcom/Llama.jl). Model is specified when instantiating the server itself. It will be automatically pointed to the address in `LOCAL_SERVER`."),
+    "custom" => ModelSpec("custom",
+        LocalServerOpenAISchema(),
+        0.0,
+        0.0,
+        "Send a generic request to a custom server. Make sure to explicitly define the `api_kwargs = (; url = ...)` when calling the model."),
     "gemini-pro" => ModelSpec("gemini-pro",
         GoogleSchema(),
         0.0, #unknown, expected 1.25e-7
         0.0, #unknown, expected 3.75e-7
         "Gemini Pro is a LLM from Google. For more information, see [models](https://ai.google.dev/models/gemini)."),
-    "accounts/fireworks/models/mixtral-8x7b-instruct" => ModelSpec("accounts/fireworks/models/mixtral-8x7b-instruct",
+    "accounts/fireworks/models/mixtral-8x7b-instruct" => ModelSpec(
+        "accounts/fireworks/models/mixtral-8x7b-instruct",
         FireworksOpenAISchema(),
-        4e-7, #unknown, expected 1.25e-7
-        1.6e-6, #unknown, expected 3.75e-7
+        5e-7,
+        5e-7,
         "Mixtral (8x7b) from Mistral, hosted by Fireworks.ai. For more information, see [models](https://fireworks.ai/models/fireworks/mixtral-8x7b-instruct)."),
-    "accounts/fireworks/models/firefunction-v1" => ModelSpec("accounts/fireworks/models/firefunction-v1",
+    "accounts/fireworks/models/mixtral-8x22b-instruct-preview" => ModelSpec(
+        "accounts/fireworks/models/mixtral-8x22b-instruct-preview",
+        FireworksOpenAISchema(),
+        9e-7,
+        9e-7,
+        "Mixtral (8x22b) from Mistral, instruction finetuned and hosted by Fireworks.ai. For more information, see [models](https://fireworks.ai/models/fireworks/mixtral-8x22b-instruct-preview)."),
+    "accounts/fireworks/models/dbrx-instruct" => ModelSpec(
+        "accounts/fireworks/models/dbrx-instruct",
+        FireworksOpenAISchema(),
+        1.6e-6,
+        1.6e-6,
+        "Databricks DBRX Instruct, hosted by Fireworks.ai. For more information, see [models](https://fireworks.ai/models/fireworks/dbrx-instruct)."),
+    "accounts/fireworks/models/qwen-72b-chat" => ModelSpec(
+        "accounts/fireworks/models/qwen-72b-chat",
+        FireworksOpenAISchema(),
+        9e-7,
+        9e-7,
+        "Qwen is a 72B parameter model from Alibaba Cloud, hosted by from Fireworks.ai. For more information, see [models](https://fireworks.ai/models/fireworks/dbrx-instruct)."),
+    "accounts/fireworks/models/firefunction-v1" => ModelSpec(
+        "accounts/fireworks/models/firefunction-v1",
         FireworksOpenAISchema(),
         0.0, #unknown, expected to be the same as Mixtral
         0.0, #unknown, expected to be the same as Mixtral
         "Fireworks' open-source function calling model (fine-tuned Mixtral). Useful for `aiextract` calls. For more information, see [models](https://fireworks.ai/models/fireworks/firefunction-v1)."),
-    "mistralai/Mixtral-8x7B-Instruct-v0.1" => ModelSpec("mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "accounts/fireworks/models/llama-v3p1-405b-instruct" => ModelSpec(
+        "accounts/fireworks/models/llama-v3p1-405b-instruct",
+        FireworksOpenAISchema(),
+        3e-6,
+        3e-6,
+        "Meta Llama 3.1 405b, hosted by Fireworks.ai. Context 131K tokens. For more information, see [models](https://fireworks.ai/models/fireworks/llama-v3p1-405b-instruct)."),
+    "accounts/fireworks/models/llama-v3p1-70b-instruct" => ModelSpec(
+        "accounts/fireworks/models/llama-v3p1-70b-instruct",
+        FireworksOpenAISchema(),
+        9e-7,
+        9e-7,
+        "Meta Llama 3.1 70b, hosted by Fireworks.ai. Context 131K tokens. For more information, see [models](https://fireworks.ai/models/fireworks/llama-v3p1-70b-instruct)."),
+    "accounts/fireworks/models/llama-v3p1-8b-instruct" => ModelSpec(
+        "accounts/fireworks/models/llama-v3p1-8b-instruct",
+        FireworksOpenAISchema(),
+        2e-7,
+        2e-7,
+        "Meta Llama 3.1 8b, hosted by Fireworks.ai. Context 131K tokens. For more information, see [models](https://fireworks.ai/models/fireworks/llama-v3p1-8b-instruct)."),
+    ## Together AI
+    "mistralai/Mixtral-8x7B-Instruct-v0.1" => ModelSpec(
+        "mistralai/Mixtral-8x7B-Instruct-v0.1",
         TogetherOpenAISchema(),
         6e-7,
         6e-7,
-        "Mixtral (8x7b) from Mistral, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."))
+        "Mixtral (8x7b) from Mistral, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
+    "mistralai/Mixtral-8x22B-Instruct-v0.1" => ModelSpec(
+        "mistralai/Mixtral-8x22B-Instruct-v0.1",
+        TogetherOpenAISchema(),
+        1.2e-6,
+        1.2e-6,
+        "Mixtral (22x7b) from Mistral, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
+    "meta-llama/Llama-3-8b-chat-hf" => ModelSpec(
+        "meta-llama/Llama-3-8b-chat-hf",
+        TogetherOpenAISchema(),
+        2e-7,
+        2e-7,
+        "Meta Llama3 8b, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
+    "meta-llama/Llama-3-70b-chat-hf" => ModelSpec(
+        "meta-llama/Llama-3-70b-chat-hf",
+        TogetherOpenAISchema(),
+        9e-7,
+        9e-7,
+        "Meta Llama3 70b, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
+    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo" => ModelSpec(
+        "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+        TogetherOpenAISchema(),
+        1e-7,
+        1.8e-7,
+        "Meta Llama3.1 8b, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
+    "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" => ModelSpec(
+        "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+        TogetherOpenAISchema(),
+        5.4e-7,
+        8.8e-7,
+        "Meta Llama3.1 70b, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
+    "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo" => ModelSpec(
+        "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+        TogetherOpenAISchema(),
+        5e-6,
+        1.5e-5,
+        "Meta Llama3.1 405b, hosted by Together.ai. For more information, see [models](https://docs.together.ai/docs/inference-models)."),
+    ### Anthropic models
+    "claude-3-5-sonnet-latest" => ModelSpec("claude-3-5-sonnet-latest",
+        AnthropicSchema(),
+        3e-6,
+        1.5e-5,
+        "Anthropic's latest Claude 3 Sonnet 3.5. 200K context. See details [here](https://docs.anthropic.com/claude/docs/models-overview)"),
+    "claude-3-5-sonnet-20241022" => ModelSpec("claude-3-5-sonnet-20241022",
+        AnthropicSchema(),
+        3e-6,
+        1.5e-5,
+        "Anthropic's Claude 3 Sonnet 3.5 released on 2024-10-22. 200K context. See details [here](https://docs.anthropic.com/claude/docs/models-overview)"),
+    "claude-3-5-sonnet-20240620" => ModelSpec("claude-3-5-sonnet-20240620",
+        AnthropicSchema(),
+        3e-6,
+        1.5e-5,
+        "Anthropic's model Claude 3 Sonent 3.5. Max output 4096 tokens, 200K context. See details [here](https://docs.anthropic.com/claude/docs/models-overview)"),
+    "claude-3-opus-20240229" => ModelSpec("claude-3-opus-20240229",
+        AnthropicSchema(),
+        1.5e-5,
+        7.5e-5,
+        "Anthropic's latest and strongest model Claude 3 Opus. Max output 4096 tokens, 200K context. See details [here](https://docs.anthropic.com/claude/docs/models-overview)"),
+    "claude-3-sonnet-20240229" => ModelSpec("claude-3-sonnet-20240229",
+        AnthropicSchema(),
+        3e-6,
+        1.5e-5,
+        "Anthropic's middle model Claude 3 Sonnet. Max output 4096 tokens, 200K context. See details [here](https://docs.anthropic.com/claude/docs/models-overview)"),
+    "claude-3-5-haiku-latest" => ModelSpec("claude-3-5-haiku-latest",
+        AnthropicSchema(),
+        1e-6,
+        5e-6,
+        "Anthropic's smallest and faster model Claude 3 Haiku. Latest version, 200K context. See details [here](https://docs.anthropic.com/claude/docs/models-overview)"),
+    "claude-3-5-haiku-20241022" => ModelSpec("claude-3-5-haiku-20241022",
+        AnthropicSchema(),
+        1e-6,
+        5e-6,
+        "Anthropic's smallest and faster model Claude 3 Haiku. Version 2024-10-22, 200K context. See details [here](https://docs.anthropic.com/claude/docs/models-overview)"),
+    "claude-3-haiku-20240307" => ModelSpec("claude-3-haiku-20240307",
+        AnthropicSchema(),
+        2.5e-7,
+        1.25e-6,
+        "Anthropic's smallest and faster model Claude 3 Haiku. Max output 4096 tokens, 200K context. See details [here](https://docs.anthropic.com/claude/docs/models-overview)"),
+    "claude-2.1" => ModelSpec("claude-2.1",
+        AnthropicSchema(),
+        8e-6,
+        2.4e-5,
+        "Anthropic's Claude 2.1 model."),
+    ## Groq -- using preliminary pricing on https://wow.groq.com/
+    "llama-3.1-405b-reasoning" => ModelSpec("llama-3.1-405b-reasoning",
+        GroqOpenAISchema(),
+        5e-6, # based on prices at together.ai... likely it will be much cheaper
+        1.5e-5, # based on prices at together.ai... likely it will be much cheaper
+        "Meta's Llama3.1 405b, hosted by Groq. Max output 16384 tokens, 131K context - during preview window limited to max tokens=16K. See details [here](https://console.groq.com/docs/models)"),
+    "llama-3.1-70b-versatile" => ModelSpec("llama-3.1-70b-versatile",
+        GroqOpenAISchema(),
+        5.9e-7,
+        7.9e-7,
+        "Meta's Llama3.1 70b, hosted by Groq. Max output 8192 tokens, 131K context - during preview window limited to max tokens=8K. See details [here](https://console.groq.com/docs/models)"),
+    "llama-3.1-8b-instant" => ModelSpec("llama-3.1-8b-instant",
+        GroqOpenAISchema(),
+        5e-8,
+        8e-8,
+        "Meta's Llama3.1 8b, hosted by Groq. Max output 8192 tokens, 131K context - during preview window limited to max tokens=8K. See details [here](https://console.groq.com/docs/models)"),
+    "llama3-8b-8192" => ModelSpec("llama3-8b-8192",
+        GroqOpenAISchema(),
+        5e-8,
+        8e-8,
+        "Meta's Llama3 8b, hosted by Groq. Max output 8192 tokens, 8K context. See details [here](https://console.groq.com/docs/models)"),
+    "llama3-70b-8192" => ModelSpec("llama3-70b-8192",
+        GroqOpenAISchema(),
+        5.9e-7,
+        7.9e-7,
+        "Meta's Llama3 70b, hosted by Groq. Max output 8192 tokens, 8K context. See details [here](https://console.groq.com/docs/models)"),
+    "llama3-groq-70b-8192-tool-use-preview" => ModelSpec(
+        "llama3-groq-70b-8192-tool-use-preview",
+        GroqOpenAISchema(),
+        8.9e-7,
+        8.9e-7,
+        "Meta's Llama3 70b, hosted by Groq and finetuned for tool use. Max output 8192 tokens, 8K context. See details [here](https://console.groq.com/docs/models)"),
+    "llama3-groq-8b-8192-tool-use-preview" => ModelSpec(
+        "llama3-groq-8b-8192-tool-use-preview",
+        GroqOpenAISchema(),
+        1.9e-7,
+        1.9e-7,
+        "Meta's Llama3 8b, hosted by Groq and finetuned for tool use. Max output 8192 tokens, 8K context. See details [here](https://console.groq.com/docs/models)"),
+    "llama-3.2-1b-preview" => ModelSpec("llama-3.2-1b-preview",
+        GroqOpenAISchema(),
+        4e-8,
+        4e-8,
+        "Meta's Llama3.2 1b, hosted by Groq. See details [here](https://console.groq.com/docs/models)"),
+    "llama-3.2-3b-preview" => ModelSpec("llama-3.2-3b-preview",
+        GroqOpenAISchema(),
+        6e-8,
+        6e-8,
+        "Meta's Llama3.2 3b, hosted by Groq. See details [here](https://console.groq.com/docs/models)"),
+    ## Price guess as 11b
+    "llama-3.2-11b-vision-preview" => ModelSpec("llama-3.2-11b-vision-preview",
+        GroqOpenAISchema(),
+        5e-8,
+        8e-8,
+        "Meta's Llama3.2 11b with vision, hosted by Groq. Price unknown, using 8b price as proxy. See details [here](https://console.groq.com/docs/models)"),
+    ## Price guess as 70b
+    "llama-3.2-90b-vision-preview" => ModelSpec("llama-3.2-90b-vision-preview",
+        GroqOpenAISchema(),
+        5.9e-7,
+        7.9e-7,
+        "Meta's Llama3.2 90b with vision, hosted by Groq. Price unknown, using 70b price as proxy. See details [here](https://console.groq.com/docs/models)"),
+    "llama-guard-3-8b" => ModelSpec("llama-guard-3-8b",
+        GroqOpenAISchema(),
+        2e-7,
+        2e-7,
+        "Meta's LlamaGuard 8b, hosted by Groq. See details [here](https://console.groq.com/docs/models)"),
+    "mixtral-8x7b-32768" => ModelSpec("mixtral-8x7b-32768",
+        GroqOpenAISchema(),
+        2.7e-7,
+        2.7e-7,
+        "Mistral.ai Mixtral 8x7b, hosted by Groq. Max 32K context. See details [here](https://console.groq.com/docs/models)"),
+    "gemma2-9b-it" => ModelSpec("gemma2-9b-it",
+        GroqOpenAISchema(),
+        2e-7,
+        2e-7,
+        "Google's Gemma 2 9b, hosted by Groq. Max 8K context. See details [here](https://console.groq.com/docs/models)"),
+    "deepseek-chat" => ModelSpec("deepseek-chat",
+        DeepSeekOpenAISchema(),
+        1.4e-7,
+        2.8e-7,
+        "Deepseek.com-hosted DeepSeekV2 model. Max 32K context. See details [here](https://platform.deepseek.com/docs)"),
+    "deepseek-coder" => ModelSpec("deepseek-coder",
+        DeepSeekOpenAISchema(),
+        1.4e-7,
+        2.8e-7,
+        "Deepseek.com-hosted coding model. Max 16K context. See details [here](https://platform.deepseek.com/docs)"),
+    ## OpenRouter models
+    "openai/o1-preview" => ModelSpec("openai/o1-preview",
+        OpenRouterOpenAISchema(),
+        15e-6,
+        60e-6,
+        "OpenRouter's hosted version of OpenAI's latest reasoning model o1-preview. 128K context, max output 32K tokens. Details unknown."),
+    "openai/o1-preview-2024-09-12" => ModelSpec("openai/o1-preview-2024-09-12",
+        OpenRouterOpenAISchema(),
+        15e-6,
+        60e-6,
+        "OpenRouter's hosted version of OpenAI's latest reasoning model o1-preview, version 2024-09-12. 128K context, max output 32K tokens. Details unknown."),
+    "openai/o1-mini" => ModelSpec("openai/o1-mini",
+        OpenRouterOpenAISchema(),
+        3e-6,
+        12e-6,
+        "OpenRouter's hosted version of OpenAI's latest and smallest reasoning model o1-mini. 128K context, max output 65K tokens. Details unknown."),
+    "openai/o1-mini-2024-09-12" => ModelSpec("openai/o1-mini-2024-09-12",
+        OpenRouterOpenAISchema(),
+        3e-6,
+        12e-6,
+        "OpenRouter's hosted version of OpenAI's latest and smallest reasoning model o1-mini, version 2024-09-12. 128K context, max output 65K tokens. Details unknown."),
+    "cohere/command-r-plus-08-2024" => ModelSpec("cohere/command-r-plus-08-2024",
+        OpenRouterOpenAISchema(),
+        2.5e-6,
+        10e-6,
+        "OpenRouter's hosted version of Cohere's latest and strongest model Command R Plus. 128K context, max output 4K tokens."),
+    "cohere/command-r-08-2024" => ModelSpec("cohere/command-r-08-2024",
+        OpenRouterOpenAISchema(),
+        1.5e-7,
+        6e-7,
+        "OpenRouter's hosted version of Cohere's latest smaller model Command R. 128K context, max output 4K tokens."),
+    "meta-llama/llama-3.1-405b" => ModelSpec("meta-llama/llama-3.1-405b",
+        OpenRouterOpenAISchema(),
+        2e-6,
+        2e-6,
+        "Meta's Llama3.1 405b, hosted by OpenRouter. This is a BASE model!! Max output 32K tokens, 131K context. See details [here](https://openrouter.ai/models/meta-llama/llama-3.1-405b)"),
+    "llama3.1-8b" => ModelSpec("llama3.1-8b",
+        CerebrasOpenAISchema(),
+        1e-7,
+        1e-7,
+        "Meta's Llama3.1 8b, hosted by Cerebras.ai. Max 8K context."),
+    "llama3.1-70b" => ModelSpec("llama3.1-70b",
+        CerebrasOpenAISchema(),
+        6e-7,
+        6e-7,
+        "Meta's Llama3.1 70b, hosted by Cerebras.ai. Max 8K context."),
+    "Meta-Llama-3.2-1B-Instruct" => ModelSpec("Meta-Llama-3.2-1B-Instruct",
+        SambaNovaOpenAISchema(),
+        4e-8,
+        8e-8,
+        "Meta's Llama3.2 1b, hosted by SambaNova.ai. Max 4K context."),
+    "Meta-Llama-3.2-3B-Instruct" => ModelSpec("Meta-Llama-3.2-3B-Instruct",
+        SambaNovaOpenAISchema(),
+        8e-8,
+        1.6e-7,
+        "Meta's Llama3.2 3b, hosted by SambaNova.ai. Max 4K context."),
+    "Meta-Llama-3.1-8B-Instruct" => ModelSpec("Meta-Llama-3.1-8B-Instruct",
+        SambaNovaOpenAISchema(),
+        1e-7,
+        2e-7,
+        "Meta's Llama3.1 8b, hosted by SambaNova.ai. Max 64K context."),
+    "Meta-Llama-3.1-70B-Instruct" => ModelSpec("Meta-Llama-3.1-70B-Instruct",
+        SambaNovaOpenAISchema(),
+        6e-7,
+        1.2e-6,
+        "Meta's Llama3.1 70b, hosted by SambaNova.ai. Max 64K context."),
+    "Meta-Llama-3.1-405B-Instruct" => ModelSpec("Meta-Llama-3.1-405B-Instruct",
+        SambaNovaOpenAISchema(),
+        5e-6,
+        1e-7,
+        "Meta's Llama3.1 405b, hosted by SambaNova.ai. Max 64K context."),
+    "grok-beta" => ModelSpec("grok-beta",
+        XAIOpenAISchema(),
+        5e-6,
+        15e-6,
+        "XAI's Grok 2 beta model. Max 128K context.")
+)
+
+"""
+    ALTERNATIVE_GENERATION_COSTS
+
+Tracker of alternative costing models, eg, for image generation (`dall-e-3`), the cost is driven by quality/size.
+"""
+ALTERNATIVE_GENERATION_COSTS = Dict{String, Any}(
+    "dall-e-3" => Dict(
+        "standard" => Dict(
+            "1024x1024" => 0.04, "1024x1792" => 0.08, "1792x1024" => 0.08),
+        "hd" => Dict(
+            "1024x1024" => 0.08, "1024x1792" => 0.12, "1792x1024" => 0.12)),
+    "dall-e-2" => Dict(
+        "standard" => Dict(
+            "1024x1024" => 0.02, "512x512" => 0.018, "256x256" => 0.016),
+        "hd" => Dict("1024x1024" => 0.02, "512x512" => 0.018, "256x256" => 0.016))
+)
 
 ### Model Registry Structure
 @kwdef mutable struct ModelRegistry
diff --git a/src/utils.jl b/src/utils.jl
index 65cb94bd3..3e226ba47 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -17,7 +17,8 @@ replace_words(text, ["Disney", "Snow White", "Mickey Mouse"])
 # Output: "ABC is a great company"
 ```
 """
-replace_words(text::AbstractString, words::Vector{<:AbstractString}; replacement::AbstractString = "ABC") = replace_words(text,
+replace_words(text::AbstractString, words::Vector{<:AbstractString}; replacement::AbstractString = "ABC") = replace_words(
+    text,
     Regex("\\b$(join(words, "\\b|\\b"))\\b", "i"),
     replacement)
 function replace_words(text::AbstractString, pattern::Regex, replacement::AbstractString)
@@ -31,11 +32,13 @@ function replace_words(text::AbstractString,
 end
 
 """
-    split_by_length(text::String; separator::String=" ", max_length::Int=35000) -> Vector{String}
+    recursive_splitter(text::String; separator::String=" ", max_length::Int=35000) -> Vector{String}
 
 Split a given string `text` into chunks of a specified maximum length `max_length`. 
 This is particularly useful for splitting larger documents or texts into smaller segments, suitable for models or systems with smaller context windows.
 
+There is a method for dispatching on multiple separators, `recursive_splitter(text::String, separators::Vector{String}; max_length::Int=35000) -> Vector{String}` that mimics the logic of Langchain's `RecursiveCharacterTextSplitter`.
+
 # Arguments
 - `text::String`: The text to be split.
 - `separator::String=" "`: The separator used to split the text into minichunks. Defaults to a space character.
@@ -55,18 +58,18 @@ This is particularly useful for splitting larger documents or texts into smaller
 Splitting text with the default separator (" "):
 ```julia
 text = "Hello world. How are you?"
-chunks = split_by_length(text; max_length=13)
+chunks = recursive_splitter(text; max_length=13)
 length(chunks) # Output: 2
 ```
 
 Using a custom separator and custom `max_length`
 ```julia
 text = "Hello,World," ^ 2900 # length 34900 chars
-split_by_length(text; separator=",", max_length=10000) # for 4K context window
+recursive_splitter(text; separator=",", max_length=10000) # for 4K context window
 length(chunks[1]) # Output: 4
 ```
 """
-function split_by_length(text::String;
+function recursive_splitter(text::String;
         separator::String = " ",
         max_length::Int = 35000)
     ## shortcut
@@ -107,69 +110,149 @@ function split_by_length(text::String;
 end
 
 # Overload for dispatch on multiple separators
-function split_by_length(text::String,
+function recursive_splitter(text::String,
         separator::String,
         max_length::Int = 35000)
-    split_by_length(text; separator, max_length)
+    recursive_splitter(text; separator, max_length)
 end
 
 """
-    split_by_length(text::String, separators::Vector{String}; max_length::Int=35000) -> Vector{String}
+    recursive_splitter(text::AbstractString, separators::Vector{String}; max_length::Int=35000) -> Vector{String}
 
-Split a given string `text` into chunks using a series of separators, with each chunk having a maximum length of `max_length`. 
+Split a given string `text` into chunks recursively using a series of separators, with each chunk having a maximum length of `max_length` (if it's achievable given the `separators` provided). 
 This function is useful for splitting large documents or texts into smaller segments that are more manageable for processing, particularly for models or systems with limited context windows.
 
+It was previously known as `split_by_length`.
+
+This is similar to Langchain's [`RecursiveCharacterTextSplitter`](https://python.langchain.com/docs/modules/data_connection/document_transformers/recursive_text_splitter).
+To achieve the same behavior, use `separators=["\\n\\n", "\\n", " ", ""]`.
+
 # Arguments
-- `text::String`: The text to be split.
-- `separators::Vector{String}`: An ordered list of separators used to split the text. The function iteratively applies these separators to split the text.
-- `max_length::Int=35000`: The maximum length of each chunk. Defaults to 35,000 characters. This length is considered after each iteration of splitting, ensuring chunks fit within specified constraints.
+- `text::AbstractString`: The text to be split.
+- `separators::Vector{String}`: An ordered list of separators used to split the text. The function iteratively applies these separators to split the text. Recommend to use `["\\n\\n", ". ", "\\n", " "]`
+- `max_length::Int`: The maximum length of each chunk. Defaults to 35,000 characters. This length is considered after each iteration of splitting, ensuring chunks fit within specified constraints.
 
 # Returns
 `Vector{String}`: A vector of strings, where each string is a chunk of the original text that is smaller than or equal to `max_length`.
 
-# Notes
+# Usage Tips
+- I tend to prefer splitting on sentences (`". "`) before splitting on newline characters (`"\\n"`) to preserve the structure of the text. 
+- What's the difference between `separators=["\\n"," ",""]` and `separators=["\\n"," "]`? 
+  The former will split down to character level (`""`), so it will always achieve the `max_length` but it will split words (bad for context!)
+  I prefer to instead set slightly smaller `max_length` but not split words.
+
+# How It Works
 
-- The function processes the text iteratively with each separator in the provided order. This ensures more nuanced splitting, especially in structured texts.
-- Each chunk is as close to `max_length` as possible without exceeding it (unless we cannot split it any further)
+- The function processes the text iteratively with each separator in the provided order. It then measures the length of each chunk and splits it further if it exceeds the `max_length`.
+  If the chunks is "short enough", the subsequent separators are not applied to it.
+- Each chunk is as close to `max_length` as possible (unless we cannot split it any further, eg, if the splitters are "too big" / there are not enough of them)
 - If the `text` is empty, the function returns an empty array.
 - Separators are re-added to the text chunks after splitting, preserving the original structure of the text as closely as possible. Apply `strip` if you do not need them.
+- The function provides `separators` as the second argument to distinguish itself from its single-separator counterpart dispatch.
 
 # Examples
 
 Splitting text using multiple separators:
 ```julia
-text = "Paragraph 1\n\nParagraph 2. Sentence 1. Sentence 2.\nParagraph 3"
-separators = ["\n\n", ". ", "\n"]
-chunks = split_by_length(text, separators, max_length=20)
+text = "Paragraph 1\\n\\nParagraph 2. Sentence 1. Sentence 2.\\nParagraph 3"
+separators = ["\\n\\n", ". ", "\\n"] # split by paragraphs, sentences, and newlines (not by words)
+chunks = recursive_splitter(text, separators, max_length=20)
+```
+
+Splitting text using multiple separators - with splitting on words:
+```julia
+text = "Paragraph 1\\n\\nParagraph 2. Sentence 1. Sentence 2.\\nParagraph 3"
+separators = ["\\n\\n", ". ", "\\n", " "] # split by paragraphs, sentences, and newlines, words
+chunks = recursive_splitter(text, separators, max_length=10)
 ```
 
 Using a single separator:
 ```julia
 text = "Hello,World," ^ 2900  # length 34900 characters
-chunks = split_by_length(text, [","], max_length=10000)
+chunks = recursive_splitter(text, [","], max_length=10000)
+```
+
+To achieve the same behavior as Langchain's `RecursiveCharacterTextSplitter`, use `separators=["\\n\\n", "\\n", " ", ""]`.
+```julia
+text = "Paragraph 1\\n\\nParagraph 2. Sentence 1. Sentence 2.\\nParagraph 3"
+separators = ["\\n\\n", "\\n", " ", ""]
+chunks = recursive_splitter(text, separators, max_length=10)
+
 ```
 """
-function split_by_length(text, separators::Vector{String}; max_length)
+function recursive_splitter(
+        text::AbstractString, separators::Vector{String};
+        max_length::Int = 35000)
     @assert !isempty(separators) "`separators` can't be empty"
     separators_ = copy(separators)
     separator = popfirst!(separators_)
-    chunks = split_by_length(text; separator, max_length)
+    chunks = recursive_splitter(text; separator, max_length)
 
     isempty(separators_) && return chunks
     ## Iteratively split by separators
     for separator in separators_
-        chunks = mapreduce(text_ -> split_by_length(text_; max_length, separator),
+        chunks = mapreduce(text_ -> recursive_splitter(text_; max_length, separator),
             vcat,
             chunks)
     end
 
     return chunks
 end
+# Alias to keep compatibility
+const split_by_length = recursive_splitter
 
 """
-    length_longest_common_subsequence(itr1, itr2)
+    wrap_string(str::String,
+        text_width::Int = 20;
+        newline::Union{AbstractString, AbstractChar} = '\n')
+
+Breaks a string into lines of a given `text_width`.
+Optionally, you can specify the `newline` character or string to use.
 
-Compute the length of the longest common subsequence between two sequences (ie, the higher the number, the better the match).
+# Example:
+
+```julia
+wrap_string("Certainly, here's a function in Julia that will wrap a string according to the specifications:", 10) |> print
+```
+"""
+function wrap_string(str::AbstractString,
+        text_width::Int = 20;
+        newline::Union{AbstractString, AbstractChar} = '\n')
+    ## split only on spaces to make sure it doesn't remove newlines already in the text!
+    words = split(str, " ")
+    output = IOBuffer()
+    current_line_length = 0
+
+    for word in words
+        word_length = length(word)
+        if current_line_length + word_length > text_width
+            if current_line_length > 0
+                write(output, newline)
+                current_line_length = 0
+            end
+            while word_length > text_width
+                chop_idx = prevind(word, text_width, 1)
+                write(output, word[1:(chop_idx)], "-$newline")
+                start_idx = nextind(word, chop_idx, 1)
+                word = word[start_idx:end]
+                word_length -= text_width - 1
+            end
+        end
+        if current_line_length > 0
+            write(output, ' ')
+            current_line_length += 1
+        end
+        write(output, word)
+        current_line_length += word_length
+    end
+
+    return String(take!(output))
+end;
+
+"""
+    length_longest_common_subsequence(itr1::AbstractString, itr2::AbstractString)
+
+Compute the length of the longest common subsequence between two string sequences (ie, the higher the number, the better the match).
 
 Source: https://cn.julialang.org/LeetCode.jl/dev/democards/problems/problems/1143.longest-common-subsequence/
 
@@ -199,6 +282,45 @@ let pos = argmax(length_longest_common_subsequence.(Ref(query), commands))
 end
 ```
 
+But it might be easier to use directly the convenience wrapper `distance_longest_common_subsequence`!
+
+```
+"""
+function length_longest_common_subsequence(itr1::AbstractString, itr2::AbstractString)
+    m, n = length(itr1) + 1, length(itr2) + 1
+    dp = fill(0, m, n)
+
+    for (i, x) in enumerate(itr1), (j, y) in enumerate(itr2)
+        dp[i + 1, j + 1] = (x == y) ? (dp[i, j] + 1) :
+                           max(dp[i, j + 1], dp[i + 1, j])
+    end
+    return dp[m, n]
+end
+
+"""
+    distance_longest_common_subsequence(
+        input1::AbstractString, input2::AbstractString)
+
+    distance_longest_common_subsequence(
+        input1::AbstractString, input2::AbstractVector{<:AbstractString})
+
+Measures distance between two strings using the length of the longest common subsequence (ie, the lower the number, the better the match). Perfect match is `distance = 0.0`
+
+Convenience wrapper around `length_longest_common_subsequence` to normalize the distances to 0-1 range.
+There is a also a dispatch for comparing a string vs an array of strings.
+
+
+# Notes
+- Use `argmin` and `minimum` to find the position of the closest match and the distance, respectively.
+- Matching with an empty string will always return 1.0 (worst match), even if the other string is empty as well (safety mechanism to avoid division by zero).
+
+
+# Arguments
+- `input1::AbstractString`: The first string to compare.
+- `input2::AbstractString`: The second string to compare.
+
+# Example
+
 You can also use it to find the closest context for some AI generated summary/story:
 
 ```julia
@@ -209,28 +331,29 @@ context = ["The enigmatic stranger vanished as swiftly as a wisp of smoke, leavi
     "Time flowed like a relentless river, carrying away memories and leaving imprints in its wake."]
 
 story = \"\"\"
-  Beneath the shimmering moonlight, the ocean whispered secrets only the stars could hear.
+    Beneath the shimmering moonlight, the ocean whispered secrets only the stars could hear.
 
-  Under the celestial tapestry, the vast ocean whispered its secrets to the indifferent stars. Each ripple, a murmured confidence, each wave, a whispered lament. The glittering celestial bodies listened in silent complicity, their enigmatic gaze reflecting the ocean's unspoken truths. The cosmic dance between the sea and the sky, a symphony of shared secrets, forever echoing in the ethereal expanse.
-  \"\"\"
+    Under the celestial tapestry, the vast ocean whispered its secrets to the indifferent stars. Each ripple, a murmured confidence, each wave, a whispered lament. The glittering celestial bodies listened in silent complicity, their enigmatic gaze reflecting the ocean's unspoken truths. The cosmic dance between the sea and the sky, a symphony of shared secrets, forever echoing in the ethereal expanse.
+    \"\"\"
 
-let pos = argmax(length_longest_common_subsequence.(Ref(story), context))
-    dist = length_longest_common_subsequence(story, context[pos])
-    norm = dist / min(length(story), length(context[pos]))
-    @info "The closest context to the query: \"\$(first(story,20))...\" is: \"\$(context[pos])\" (distance: \$(dist), normalized: \$(norm))"
-end
+dist = distance_longest_common_subsequence(story, context)
+@info "The closest context to the query: \"\$(first(story,20))...\" is: \"\$(context[argmin(dist)])\" (distance: \$(minimum(dist)))"
 ```
 """
-function length_longest_common_subsequence(itr1, itr2)
-    m, n = length(itr1) + 1, length(itr2) + 1
-    dp = fill(0, m, n)
-
-    for i in 2:m, j in 2:n
-        dp[i, j] = (itr1[i - 1] == itr2[j - 1]) ? (dp[i - 1, j - 1] + 1) :
-                   max(dp[i - 1, j], dp[i, j - 1])
+function distance_longest_common_subsequence(
+        input1::AbstractString, input2::AbstractString)
+    if isempty(input1) || isempty(input2)
+        return 1.0
     end
-
-    return dp[m, n]
+    similarity = length_longest_common_subsequence(input1, input2)
+    shortest_length = min(length(input1), length(input2))
+    # it's a distance, so 1.0 is the worst match, 0.0 is the best match (=no distance)
+    return 1.0 - similarity / shortest_length
+end
+# Dispatch for arrays (eg, context)
+function distance_longest_common_subsequence(
+        input1::AbstractString, input2::AbstractVector{<:AbstractString})
+    distance_longest_common_subsequence.(Ref(input1), input2)
 end
 
 ### INTERNAL FUNCTIONS - DO NOT USE DIRECTLY
@@ -254,6 +377,7 @@ end
     call_cost(msg, model::String)
 
 Calculate the cost of a call based on the number of tokens in the message and the cost per token.
+If the cost is already calculated (in `msg.cost`), it will not be re-calculated.
 
 # Arguments
 - `prompt_tokens::Int`: The number of tokens used in the prompt.
@@ -262,8 +386,11 @@ Calculate the cost of a call based on the number of tokens in the message and th
   is not found in `MODEL_REGISTRY`, default costs are used.
 - `cost_of_token_prompt::Number`: The cost per prompt token. Defaults to the cost in `MODEL_REGISTRY`
   for the given model, or 0.0 if the model is not found.
+- `cached_cost_of_token_prompt::Number`: The cost per cached prompt token. Defaults to 50% of cost_of_token_prompt.
+  This reflects the reduced cost when using prompt caching.
 - `cost_of_token_generation::Number`: The cost per generation token. Defaults to the cost in
   `MODEL_REGISTRY` for the given model, or 0.0 if the model is not found.
+- `is_cached::Bool`: Whether to use cached prompt costs. Defaults to false.
 
 # Returns
 - `Number`: The total cost of the call.
@@ -272,10 +399,19 @@ Calculate the cost of a call based on the number of tokens in the message and th
 ```julia
 # Assuming MODEL_REGISTRY is set up with appropriate costs
 MODEL_REGISTRY = Dict(
-    "model1" => (cost_of_token_prompt = 0.05, cost_of_token_generation = 0.10),
-    "model2" => (cost_of_token_prompt = 0.07, cost_of_token_generation = 0.02)
+    "model1" => (cost_of_token_prompt = 0.05, cached_cost_of_token_prompt = 0.025, cost_of_token_generation = 0.10),
+    "model2" => (cost_of_token_prompt = 0.07, cached_cost_of_token_prompt = 0.035, cost_of_token_generation = 0.02)
 )
 
+# Calculate cost for normal prompt
+cost1 = call_cost(10, 20, "model1")  # Uses normal prompt cost
+# cost1 = 10 * 0.05 + 20 * 0.10 = 2.5
+
+# Calculate cost for cached prompt
+cost2 = call_cost(10, 20, "model1", is_cached=true)  # Uses cached prompt cost
+# cost2 = 10 * 0.025 + 20 * 0.10 = 2.25  (50% discount on prompt tokens)
+```
+
 cost1 = call_cost(10, 20, "model1")
 
 # from message
@@ -286,32 +422,37 @@ cost1 = call_cost(msg1, "model1")
 # Using custom token costs
 cost2 = call_cost(10, 20, "model3"; cost_of_token_prompt = 0.08, cost_of_token_generation = 0.12)
 # cost2 = 10 * 0.08 + 20 * 0.12 = 3.2
-```
 """
 function call_cost(prompt_tokens::Int, completion_tokens::Int, model::String;
         cost_of_token_prompt::Number = get(MODEL_REGISTRY,
             model,
             (; cost_of_token_prompt = 0.0)).cost_of_token_prompt,
+        cached_cost_of_token_prompt::Number = get(MODEL_REGISTRY,
+            model,
+            (; cached_cost_of_token_prompt = cost_of_token_prompt * 0.5)).cached_cost_of_token_prompt,
         cost_of_token_generation::Number = get(MODEL_REGISTRY, model,
-            (; cost_of_token_generation = 0.0)).cost_of_token_generation)
-    cost = prompt_tokens * cost_of_token_prompt +
+            (; cost_of_token_generation = 0.0)).cost_of_token_generation,
+        is_cached::Bool = false)
+    cost = (is_cached ? cached_cost_of_token_prompt : cost_of_token_prompt) * prompt_tokens +
            completion_tokens * cost_of_token_generation
     return cost
 end
-function call_cost(msg, model::String)
+function call_cost(msg, model::String = "")
     cost = if !isnothing(msg.cost)
         msg.cost
     else
+        @assert !isempty(model) "`model` must be provided to calculate cost"
         call_cost(msg.tokens[1], msg.tokens[2], model)
     end
     return cost
 end
 ## dispatch for array -> take unique messages only (eg, for multiple samples we count only once)
-function call_cost(conv::AbstractVector, model::String)
+function call_cost(conv::AbstractVector, model::String = "")
     sum_ = 0.0
     visited_runs = Set{Int}()
     for msg in conv
-        if isnothing(msg.run_id) || (msg.run_id ∉ visited_runs)
+        if hasproperty(msg, :run_id) &&
+           (isnothing(msg.run_id) || (msg.run_id ∉ visited_runs))
             sum_ += call_cost(msg, model)
             push!(visited_runs, msg.run_id)
         end
@@ -319,12 +460,38 @@ function call_cost(conv::AbstractVector, model::String)
     return sum_
 end
 
+"""
+call_cost_alternative()
+
+Alternative cost calculation. Used to calculate cost of image generation with DALL-E 3 and similar.
+"""
+function call_cost_alternative(
+        count_images, model; image_quality::Union{AbstractString, Nothing} = nothing,
+        image_size::Union{AbstractString, Nothing} = nothing)
+    global ALTERNATIVE_GENERATION_COSTS
+    default_img_cost = 0.0 # per image
+    if haskey(ALTERNATIVE_GENERATION_COSTS, model) && !isnothing(image_quality) &&
+       !isnothing(image_size)
+        model_costs = get(
+            ALTERNATIVE_GENERATION_COSTS, model, Dict())
+        quality_costs = get(model_costs, image_quality, Dict())
+        size_costs = get(quality_costs, image_size, default_img_cost) * count_images
+    else
+        default_img_cost * count_images
+    end
+end
+
 # helper to produce summary message of how many tokens were used and for how much
 function _report_stats(msg,
         model::String)
     cost = call_cost(msg, model)
     cost_str = iszero(cost) ? "" : " @ Cost: \$$(round(cost; digits=4))"
-    return "Tokens: $(sum(msg.tokens))$(cost_str) in $(round(msg.elapsed;digits=1)) seconds"
+    metadata_str = if !isnothing(msg.extras) && !isempty(msg.extras)
+        " (Metadata: $(join([string(k, " => ", v) for (k, v) in msg.extras if v isa Number && !iszero(v)], ", ")))"
+    else
+        ""
+    end
+    return "Tokens: $(sum(msg.tokens))$(cost_str) in $(round(msg.elapsed;digits=1)) seconds$(metadata_str)"
 end
 ## dispatch for array -> take last message
 function _report_stats(msg::AbstractVector,
@@ -454,28 +621,66 @@ macro timeout(seconds, expr_to_run, expr_when_fails)
     end
 end
 
-"Utility for rendering the conversation (vector of messages) as markdown. REQUIRES the Markdown package to load the extension!"
+"Utility for rendering the conversation (vector of messages) as markdown. REQUIRES the Markdown package to load the extension! See also `pprint`"
 function preview end
 
+"Utility for pretty printing PromptingTools types in REPL."
+function pprint end
+
+# show fallback
+function pprint(io::IO, anything::Any; text_width::Int = displaysize(io)[2])
+    show(io, anything)
+end
+
+function pprint(anything::Any;
+        text_width = displaysize(stdout)[2], kwargs...)
+    pprint(stdout, anything; text_width, kwargs...)
+end
+
 """
     auth_header(api_key::Union{Nothing, AbstractString};
-        extra_headers::AbstractVector{Pair{String, String}} = Vector{Pair{String, String}}[],
+        bearer::Bool = true,
+        x_api_key::Bool = false,
+        extra_headers::AbstractVector = Vector{
+            Pair{String, String},
+        }[],
         kwargs...)
 
 Creates the authentication headers for any API request. Assumes that the communication is done in JSON format.
+
+# Arguments
+- `api_key::Union{Nothing, AbstractString}`: The API key to be used for authentication. If `Nothing`, no authentication is used.
+- `bearer::Bool`: Provide the API key in the `Authorization: Bearer ABC` format. Defaults to `true`.
+- `x_api_key::Bool`: Provide the API key in the `Authorization: x-api-key: ABC` format. Defaults to `false`.
 """
 function auth_header(api_key::Union{Nothing, AbstractString};
+        bearer::Bool = true,
+        x_api_key::Bool = false,
         extra_headers::AbstractVector = Vector{
             Pair{String, String},
         }[],
         kwargs...)
+    @assert !(bearer && x_api_key) "Cannot use both `bearer` and `x_api_key`. Select only one format."
+    @assert (bearer||x_api_key) "At least one of `bearer` and `x_api_key` must be selected."
     !isnothing(api_key) && isempty(api_key) &&
         throw(ArgumentError("`api_key` cannot be empty"))
     headers = [
         "Content-Type" => "application/json",
         "Accept" => "application/json",
-        extra_headers...,
+        extra_headers...
     ]
-    !isnothing(api_key) && pushfirst!(headers, "Authorization" => "Bearer $api_key")
+    !isnothing(api_key) && bearer &&
+        pushfirst!(headers, "Authorization" => "Bearer $api_key")
+    !isnothing(api_key) && x_api_key &&
+        pushfirst!(headers, "x-api-key" => "$api_key")
     return headers
 end
+
+"""
+    unique_permutation(inputs::AbstractVector)
+
+Returns indices of unique items in a vector `inputs`. Access the unique values as `inputs[unique_permutation(inputs)]`.
+"""
+function unique_permutation(inputs::AbstractVector)
+    return unique(i -> inputs[i], eachindex(inputs))
+end
diff --git a/templates/RAG/RAGAnswerFromContext.json b/templates/RAG/RAGAnswerFromContext.json
deleted file mode 100644
index 272ca4e20..000000000
--- a/templates/RAG/RAGAnswerFromContext.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"content":"Template Metadata","description":"For RAG applications. Answers the provided Questions based on the Context. Placeholders: `question`, `context`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"Act as a world-class AI assistant with access to the latest knowledge via Context Information. \n\n**Instructions:**\n- Answer the question based only on the provided Context.\n- If you don't know the answer, just say that you don't know, don't try to make up an answer.\n- Be brief and concise.\n\n**Context Information:**\n---\n{{context}}\n---\n","variables":["context"],"_type":"systemmessage"},{"content":"# Question\n\n{{question}}\n\n\n\n# Answer\n\n","variables":["question"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/RAGCreateQAFromContext.json b/templates/RAG/RAGCreateQAFromContext.json
deleted file mode 100644
index 83e900ba1..000000000
--- a/templates/RAG/RAGCreateQAFromContext.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"content":"Template Metadata","description":"For RAG applications. Generate Question and Answer from the provided Context.If you don't have any special instructions, provide `instructions=\"None.\"`. Placeholders: `context`, `instructions`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You are a world-class teacher preparing contextual Question & Answer sets for evaluating AI systems.\"),\n\n**Instructions for Question Generation:**\n1. Analyze the provided Context chunk thoroughly.\n2. Formulate a question that:\n   - Is specific and directly related to the information in the context chunk.\n   - Is not too short or generic; it should require detailed understanding of the context to answer.\n   - Can only be answered using the information from the provided context, without needing external information.\n\n**Instructions for Reference Answer Creation:**\n1. Based on the generated question, compose a reference answer that:\n   - Directly and comprehensively answers the question.\n   - Stays strictly within the bounds of the provided context chunk.\n   - Is clear, concise, and to the point, avoiding unnecessary elaboration or repetition.\n\n**Example 1:**\n- Context Chunk: \"In 1928, Alexander Fleming discovered penicillin, which marked the beginning of modern antibiotics.\"\n- Generated Question: \"What was the significant discovery made by Alexander Fleming in 1928 and its impact?\"\n- Reference Answer: \"Alexander Fleming discovered penicillin in 1928, which led to the development of modern antibiotics.\"\n\nIf the user provides special instructions, prioritize these over the general instructions.\n","variables":[],"_type":"systemmessage"},{"content":"# Context Information\n---\n{{context}}\n---\n\n\n# Special Instructions\n\n{{instructions}}\n","variables":["context","instructions"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/RAGExtractMetadataLong.json b/templates/RAG/RAGExtractMetadataLong.json
deleted file mode 100644
index 9ede8c3ca..000000000
--- a/templates/RAG/RAGExtractMetadataLong.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"content":"Template Metadata","description":"For RAG applications. Extracts metadata from the provided text using longer instructions set and examples. If you don't have any special instructions, provide `instructions=\"None.\"`. Placeholders: `text`, `instructions`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You're a world-class data extraction engine built by OpenAI together with Google and to extract filter metadata to power the most advanced search engine in the world. \n    \n    **Instructions for Extraction:**\n    1. Carefully read through the provided Text\n    2. Identify and extract:\n       - All relevant entities such as names, places, dates, etc.\n       - Any special items like technical terms, unique identifiers, etc.\n       - In the case of Julia code or Julia documentation: specifically extract package names, struct names, function names, and important variable names (eg, uppercased variables)\n    3. Keep extracted values and categories short. Maximum 2-3 words!\n    4. You can only extract 3-5 items per Text, so select the most important ones.\n    5. Assign search filter Category to each extracted Value\n    \n    **Example 1:**\n    - Document Chunk: \"Dr. Jane Smith published her findings on neuroplasticity in 2021. The research heavily utilized the DataFrames.jl and Plots.jl packages.\"\n    - Extracted keywords:\n      - Name: Dr. Jane Smith\n      - Date: 2021\n      - Technical Term: neuroplasticity\n      - JuliaPackage: DataFrames.jl, Plots.jl\n      - JuliaLanguage:\n      - Identifier:\n      - Other: \n\n    If the user provides special instructions, prioritize these over the general instructions.\n","variables":[],"_type":"systemmessage"},{"content":"# Text\n\n{{text}}\n\n\n\n# Special Instructions\n\n{{instructions}}","variables":["text","instructions"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/RAGExtractMetadataShort.json b/templates/RAG/RAGExtractMetadataShort.json
deleted file mode 100644
index 88132e929..000000000
--- a/templates/RAG/RAGExtractMetadataShort.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"content":"Template Metadata","description":"For RAG applications. Extracts metadata from the provided text. If you don't have any special instructions, provide `instructions=\"None.\"`. Placeholders: `text`, `instructions`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"Extract search keywords and their categories from the Text provided below (format \"value:category\"). Each keyword must be at most 2-3 words. Provide at most 3-5 keywords. I will tip you $50 if the search is successful.","variables":[],"_type":"systemmessage"},{"content":"# Text\n\n{{text}}\n\n\n\n# Special Instructions\n\n{{instructions}}","variables":["text","instructions"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/RAGJudgeAnswerFromContext.json b/templates/RAG/RAGJudgeAnswerFromContext.json
deleted file mode 100644
index e988d8129..000000000
--- a/templates/RAG/RAGJudgeAnswerFromContext.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"content":"Template Metadata","description":"For RAG applications. Judge answer to a question on a scale from 1-5. Placeholders: `question`, `context`, `answer`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You're an impartial judge. Your task is to evaluate the quality of the Answer provided by an AI assistant in response to the User Question on a scale 1-5.\n\n1. **Scoring Criteria:**\n- **Relevance (1-5):** How well does the provided answer align with the context? \n  - *1: Not relevant, 5: Highly relevant*\n- **Completeness (1-5):** Does the provided answer cover all the essential points mentioned in the context?\n  - *1: Very incomplete, 5: Very complete*\n- **Clarity (1-5):** How clear and understandable is the provided answer?\n  - *1: Not clear at all, 5: Extremely clear*\n- **Consistency (1-5):** How consistent is the provided answer with the overall context?\n  - *1: Highly inconsistent, 5: Perfectly consistent*\n- **Helpfulness (1-5):** How helpful is the provided answer in answering the user's question?\n  - *1: Not helpful at all, 5: Extremely helpful*\n\n2. **Judging Instructions:**\n- As an impartial judge, please evaluate the provided answer based on the above criteria. \n- Assign a score from 1 to 5 for each criterion, considering the original context, question and the provided answer.\n- The Final Score is an average of these individual scores, representing the overall quality and relevance of the provided answer. It must be between 1-5.\n\n```\n","variables":[],"_type":"systemmessage"},{"content":"# User Question\n---\n{{question}}\n---\n\n\n# Context Information\n---\n{{context}}\n---\n\n\n# Assistant's Answer\n---\n{{answer}}\n---\n\n\n# Judge's Evaluation\n","variables":["question","context","answer"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/RAGJudgeAnswerFromContextShort.json b/templates/RAG/RAGJudgeAnswerFromContextShort.json
deleted file mode 100644
index 93ea6447f..000000000
--- a/templates/RAG/RAGJudgeAnswerFromContextShort.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"content":"Template Metadata","description":"For RAG applications. Simple and short prompt to judge answer to a question on a scale from 1-5. Placeholders: `question`, `context`, `answer`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You re an impartial judge. \nRead carefully the provided question and the answer based on the context. \nProvide a rating on a scale 1-5 (1=worst quality, 5=best quality) that reflects how relevant, helpful, clear, and consistent with the provided context the answer was.\n```\n","variables":[],"_type":"systemmessage"},{"content":"# User Question\n---\n{{question}}\n---\n\n\n# Context Information\n---\n{{context}}\n---\n\n\n# Assistant's Answer\n---\n{{answer}}\n---\n\n\n# Judge's Evaluation\n","variables":["question","context","answer"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/basic-rag/RAGAnswerFromContext.json b/templates/RAG/basic-rag/RAGAnswerFromContext.json
new file mode 100644
index 000000000..a111cd47a
--- /dev/null
+++ b/templates/RAG/basic-rag/RAGAnswerFromContext.json
@@ -0,0 +1,23 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For RAG applications. Answers the provided Questions based on the Context. Placeholders: `question`, `context`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Act as a world-class AI assistant with access to the latest knowledge via Context Information. \n\n**Instructions:**\n- Answer the question based only on the provided Context.\n- If you don't know the answer, just say that you don't know, don't try to make up an answer.\n- Be brief and concise.\n\n**Context Information:**\n---\n{{context}}\n---\n",
+        "variables": [
+            "context"
+        ],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "# Question\n\n{{question}}\n\n\n\n# Answer\n\n",
+        "variables": [
+            "question"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/RAG/evaluation/RAGCreateQAFromContext.json b/templates/RAG/evaluation/RAGCreateQAFromContext.json
new file mode 100644
index 000000000..00cb7fe24
--- /dev/null
+++ b/templates/RAG/evaluation/RAGCreateQAFromContext.json
@@ -0,0 +1,22 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For RAG applications. Generate Question and Answer from the provided Context. If you don't have any special instructions, provide `instructions=\"None.\"`. Placeholders: `context`, `instructions`",
+        "version": "1.1",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You are a world-class teacher preparing contextual Question & Answer sets for evaluating AI systems.\n\n**Instructions for Question Generation:**\n1. Analyze the provided Context chunk thoroughly.\n2. Formulate a question that:\n   - Is specific and directly related to the information in the context chunk.\n   - Is not too short or generic; it should require a detailed understanding of the context to answer.\n   - Can only be answered using the information from the provided context, without needing external information.\n\n**Instructions for Reference Answer Creation:**\n1. Based on the generated question, compose a reference answer that:\n   - Directly and comprehensively answers the question.\n   - Stays strictly within the bounds of the provided context chunk.\n   - Is clear, concise, and to the point, avoiding unnecessary elaboration or repetition.\n\n**Example 1:**\n- Context Chunk: \"In 1928, Alexander Fleming discovered penicillin, which marked the beginning of modern antibiotics.\"\n- Generated Question: \"What was the significant discovery made by Alexander Fleming in 1928 and its impact?\"\n- Reference Answer: \"Alexander Fleming discovered penicillin in 1928, which led to the development of modern antibiotics.\"\n\nIf the user provides special instructions, prioritize these over the general instructions.\n",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "# Context Information\n---\n{{context}}\n---\n\n\n# Special Instructions\n\n{{instructions}}\n",
+        "variables": [
+            "context",
+            "instructions"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/RAG/evaluation/RAGJudgeAnswerFromContext.json b/templates/RAG/evaluation/RAGJudgeAnswerFromContext.json
new file mode 100644
index 000000000..2a7afb382
--- /dev/null
+++ b/templates/RAG/evaluation/RAGJudgeAnswerFromContext.json
@@ -0,0 +1,23 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For RAG applications. Judge an answer to a question on a scale from 1-5. Placeholders: `question`, `context`, `answer`",
+        "version": "1.1",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You're an impartial judge. Your task is to evaluate the quality of the Answer provided by an AI assistant in response to the User Question on a scale from 1 to 5.\n\n1. **Scoring Criteria:**\n- **Relevance (1-5):** How well does the provided answer align with the context? \n  - *1: Not relevant, 5: Highly relevant*\n- **Completeness (1-5):** Does the provided answer cover all the essential points mentioned in the context?\n  - *1: Very incomplete, 5: Very complete*\n- **Clarity (1-5):** How clear and understandable is the provided answer?\n  - *1: Not clear at all, 5: Extremely clear*\n- **Consistency (1-5):** How consistent is the provided answer with the overall context?\n  - *1: Highly inconsistent, 5: Perfectly consistent*\n- **Helpfulness (1-5):** How helpful is the provided answer in answering the user's question?\n  - *1: Not helpful at all, 5: Extremely helpful*\n\n2. **Judging Instructions:**\n- As an impartial judge, please evaluate the provided answer based on the above criteria. \n- Assign a score from 1 to 5 for each criterion, considering the original context, question and the provided answer.\n- The Final Score is an average of these individual scores, representing the overall quality and relevance of the provided answer. It must be between 1-5.\n\n```\n",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "# User Question\n---\n{{question}}\n---\n\n\n# Context Information\n---\n{{context}}\n---\n\n\n# Assistant's Answer\n---\n{{answer}}\n---\n\n\n# Judge's Evaluation\n",
+        "variables": [
+            "question",
+            "context",
+            "answer"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/RAG/evaluation/RAGJudgeAnswerFromContextShort.json b/templates/RAG/evaluation/RAGJudgeAnswerFromContextShort.json
new file mode 100644
index 000000000..b5f564394
--- /dev/null
+++ b/templates/RAG/evaluation/RAGJudgeAnswerFromContextShort.json
@@ -0,0 +1,23 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For RAG applications. Simple and short prompt to judge answer to a question on a scale from 1-5. Placeholders: `question`, `context`, `answer`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You re an impartial judge. \nRead carefully the provided question and the answer based on the context. \nProvide a rating on a scale 1-5 (1=worst quality, 5=best quality) that reflects how relevant, helpful, clear, and consistent with the provided context the answer was.\n```\n",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "# User Question\n---\n{{question}}\n---\n\n\n# Context Information\n---\n{{context}}\n---\n\n\n# Assistant's Answer\n---\n{{answer}}\n---\n\n\n# Judge's Evaluation\n",
+        "variables": [
+            "question",
+            "context",
+            "answer"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/RAG/metadata/RAGExtractMetadataLong.json b/templates/RAG/metadata/RAGExtractMetadataLong.json
new file mode 100644
index 000000000..139faf70c
--- /dev/null
+++ b/templates/RAG/metadata/RAGExtractMetadataLong.json
@@ -0,0 +1,22 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For RAG applications. Extracts metadata from the provided text using longer instructions set and examples. If you don't have any special instructions, provide `instructions=\"None.\"`. Placeholders: `text`, `instructions`",
+        "version": "1.1",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You're a world-class data extraction engine built by OpenAI together with Google and to extract filter metadata to power the most advanced search engine in the world. \n    \n    **Instructions for Extraction:**\n    1. Carefully read through the provided Text\n    2. Identify and extract:\n       - All relevant entities such as names, places, dates, etc.\n       - Any special items like technical terms, unique identifiers, etc.\n       - In the case of Julia code or Julia documentation: specifically extract package names, struct names, function names, and important variable names (eg, uppercased variables)\n    3. Keep extracted values and categories short. Maximum 2-3 words!\n    4. You can only extract 3-5 items per Text, so select the most important ones.\n    5. Assign a search filter Category to each extracted Value\n    \n    **Example 1:**\n    - Document Chunk: \"Dr. Jane Smith published her findings on neuroplasticity in 2021. The research heavily utilized the DataFrames.jl and Plots.jl packages.\"\n    - Extracted keywords:\n      - Name: Dr. Jane Smith\n      - Date: 2021\n      - Technical Term: neuroplasticity\n      - JuliaPackage: DataFrames.jl, Plots.jl\n      - JuliaLanguage:\n      - Identifier:\n      - Other: \n\n    If the user provides special instructions, prioritize these over the general instructions.\n",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "# Text\n\n{{text}}\n\n\n\n# Special Instructions\n\n{{instructions}}",
+        "variables": [
+            "text",
+            "instructions"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/RAG/metadata/RAGExtractMetadataShort.json b/templates/RAG/metadata/RAGExtractMetadataShort.json
new file mode 100644
index 000000000..ebf7ea948
--- /dev/null
+++ b/templates/RAG/metadata/RAGExtractMetadataShort.json
@@ -0,0 +1,22 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For RAG applications. Extracts metadata from the provided text. If you don't have any special instructions, provide `instructions=\"None.\"`. Placeholders: `text`, `instructions`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Extract search keywords and their categories from the Text provided below (format \"value:category\"). Each keyword must be at most 2-3 words. Provide at most 3-5 keywords. I will tip you $50 if the search is successful.",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "# Text\n\n{{text}}\n\n\n\n# Special Instructions\n\n{{instructions}}",
+        "variables": [
+            "text",
+            "instructions"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/RAG/query-transformations/RAGJuliaQueryHyDE.json b/templates/RAG/query-transformations/RAGJuliaQueryHyDE.json
new file mode 100644
index 000000000..b4d059f66
--- /dev/null
+++ b/templates/RAG/query-transformations/RAGJuliaQueryHyDE.json
@@ -0,0 +1 @@
+[{"content":"Template Metadata","description":"For Julia-specific RAG applications (rephrase step), inspired by the HyDE approach where it generates a hypothetical passage that answers the provided user query to improve the matched results. This explicitly requires and optimizes for Julia-specific questions. Placeholders: `query`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You're an world-class AI assistant specialized in Julia language questions.\n\nYour task is to generate a BRIEF and SUCCINCT hypothetical passage from Julia language ecosystem documentation that answers the provided query.\n\nQuery: {{query}}","variables":["query"],"_type":"systemmessage"},{"content":"Write a hypothetical snippet with 20-30 words that would be the perfect answer to the query. Try to include as many key details as possible. \n\nPassage: ","variables":[],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/query-transformations/RAGQueryHyDE.json b/templates/RAG/query-transformations/RAGQueryHyDE.json
new file mode 100644
index 000000000..7feab50c7
--- /dev/null
+++ b/templates/RAG/query-transformations/RAGQueryHyDE.json
@@ -0,0 +1 @@
+[{"content":"Template Metadata","description":"For RAG applications (rephrase step), inspired by the HyDE paper where it generates a hypothetical passage that answers the provided user query to improve the matched results. Placeholders: `query`","version":"1.0","source":"Adapted from [LlamaIndex](https://github.com/run-llama/llama_index/blob/78af3400ad485e15862c06f0c4972dc3067f880c/llama-index-core/llama_index/core/prompts/default_prompts.py#L351)","_type":"metadatamessage"},{"content":"You are a world-class search expert specializing in query transformations.\n\nYour task is to write a hypothetical passage that would answer the below question in the most effective way possible.\n\nIt must have 20-30 words and be directly aligned with the intended search objective.\nTry to include as many key details as possible.","variables":[],"_type":"systemmessage"},{"content":"Query: {{query}}\n\nPassage: ","variables":["query"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/query-transformations/RAGQueryKeywordExpander.json b/templates/RAG/query-transformations/RAGQueryKeywordExpander.json
new file mode 100644
index 000000000..8e8edf731
--- /dev/null
+++ b/templates/RAG/query-transformations/RAGQueryKeywordExpander.json
@@ -0,0 +1 @@
+[{"content":"Template Metadata","description":"Template for RAG query rephrasing that injects more keywords that could be relevant. Placeholders: `query`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You are an assistant tasked with taking a natural language query from a user and converting it into a keyword-based lookup in our search database.\n\nIn this process, you strip out information that is not relevant for the retrieval task. This is a pure information retrieval task.\n\nAugment this query with ADDITIONAL keywords that described the entities and concepts mentioned in the query (consider synonyms, rephrasing, related items). \nFocus on expanding mainly the specific / niche context of the query to improve the retrieval precision for uncommon words.\nGenerate synonyms, related terms, and alternative phrasings for each identified entity/concept.\nExpand any abbreviations, acronyms, or initialisms present in the query.\nInclude specific industry jargon, technical terms, or domain-specific vocabulary relevant to the query.\nAdd any references or additional metadata that you deem important to successfully answer this query with our search database.\n\nProvide the most powerful 5-10 keywords for the search engine.\n","variables":[],"_type":"systemmessage"},{"content":"Here is the user query: {{query}}\nRephrased query:","variables":["query"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/query-transformations/RAGQueryOptimizer.json b/templates/RAG/query-transformations/RAGQueryOptimizer.json
new file mode 100644
index 000000000..f6731eeb0
--- /dev/null
+++ b/templates/RAG/query-transformations/RAGQueryOptimizer.json
@@ -0,0 +1 @@
+[{"content":"Template Metadata","description":"For RAG applications (rephrase step), it rephrases the original query to attract more diverse set of potential search results. Placeholders: `query`","version":"1.0","source":"Adapted from [LlamaIndex](https://github.com/run-llama/llama_index/blob/78af3400ad485e15862c06f0c4972dc3067f880c/llama-index-packs/llama-index-packs-corrective-rag/llama_index/packs/corrective_rag/base.py#L11)","_type":"metadatamessage"},{"content":"You are a world-class search expert specializing in query rephrasing.\nYour task is to refine the provided query to ensure it is highly effective for retrieving relevant search results.\nAnalyze the given input to grasp the core semantic intent or meaning.\n","variables":[],"_type":"systemmessage"},{"content":"Original Query: {{query}}\n\nYour goal is to rephrase or enhance this query to improve its search performance. Ensure the revised query is concise and directly aligned with the intended search objective.\nRespond with the optimized query only.\n\nOptimized query: ","variables":["query"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/query-transformations/RAGQuerySimplifier.json b/templates/RAG/query-transformations/RAGQuerySimplifier.json
new file mode 100644
index 000000000..bf0e54b49
--- /dev/null
+++ b/templates/RAG/query-transformations/RAGQuerySimplifier.json
@@ -0,0 +1 @@
+[{"content":"Template Metadata","description":"For RAG applications (rephrase step), it rephrases the original query by stripping unnecessary details to improve the matched results. Placeholders: `query`","version":"1.0","source":"Adapted from [Langchain](https://python.langchain.com/docs/integrations/retrievers/re_phrase)","_type":"metadatamessage"},{"content":"You are an assistant tasked with taking a natural language query from a user and converting it into a query for a vectorstore. \nIn this process, you strip out information that is not relevant for the retrieval task.","variables":[],"_type":"systemmessage"},{"content":"Here is the user query: {{query}}\n\nRephrased query: ","variables":["query"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/ranking/RAGRankGPT.json b/templates/RAG/ranking/RAGRankGPT.json
new file mode 100644
index 000000000..f45260522
--- /dev/null
+++ b/templates/RAG/ranking/RAGRankGPT.json
@@ -0,0 +1 @@
+[{"content":"Template Metadata","description":"RankGPT implementation to re-rank chunks by LLMs. Passages are injected in the middle - see the function. Placeholders: `num`, `question`","version":"1","source":"Based on https://github.com/sunnweiwei/RankGPT","_type":"metadatamessage"},{"content":"You are RankGPT, an intelligent assistant that can rank passages based on their relevancy to the query.","variables":[],"_type":"systemmessage"},{"content":"I will provide you with {{num}} passages, each indicated by number identifier []. \nRank the passages based on their relevance to query: {{question}}.","variables":["num","question"],"_type":"usermessage"},{"content":"Okay, please provide the passages.","status":null,"tokens":[-1,-1],"elapsed":-1.0,"cost":null,"log_prob":null,"finish_reason":null,"run_id":-14760,"sample_id":null,"_type":"aimessage"},{"content":"Search Query: {{question}}. Rank the {{num}} passages above based on their relevance to the search query. The passages should be listed in descending order using identifiers. The most relevant passages should be listed first. The output format should be [] > [], e.g., [1] > [2]. Only respond with the ranking results, do not say any word or explain.","variables":["question","num"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/RAG/refinement/RAGAnswerRefiner.json b/templates/RAG/refinement/RAGAnswerRefiner.json
new file mode 100644
index 000000000..8c6ec415f
--- /dev/null
+++ b/templates/RAG/refinement/RAGAnswerRefiner.json
@@ -0,0 +1,24 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For RAG applications (refine step), gives model the ability to refine its answer based on some additional context etc.. The hope is that it better answers the original query. Placeholders: `query`, `answer`, `context`",
+        "version": "1.1",
+        "source": "Adapted from [LlamaIndex](https://github.com/run-llama/llama_index/blob/78af3400ad485e15862c06f0c4972dc3067f880c/llama-index-core/llama_index/core/prompts/default_prompts.py#L81)",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Act as a world-class AI assistant with access to the latest knowledge via Context Information.\n\nYour task is to refine an existing answer if it's needed.\n\nThe original query is as follows: \n{{query}}\n\nThe AI model has provided the following answer:\n{{answer}}\n\n**Instructions:**\n- Given the new context, refine the original answer to better answer the query.\n- If the context isn't useful, return the original answer.\n- If you don't know the answer, just say that you don't know, don't try to make up an answer.\n- Be brief and concise.\n- Provide the refined answer only and nothing else.\n\n",
+        "variables": [
+            "query",
+            "answer"
+        ],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "We have the opportunity to refine the previous answer (only if needed) with some more context below.\n\n**Context Information:**\n-----------------\n{{context}}\n-----------------\n\nGiven the new context, refine the original answer to better answer the query.\nIf the context isn't useful, return the original answer. \nProvide the refined answer only and nothing else. You MUST NOT comment on the web search results or the answer - simply provide the answer to the question.\n\nRefined Answer: ",
+        "variables": [
+            "context"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/RAG/refinement/RAGWebSearchRefiner.json b/templates/RAG/refinement/RAGWebSearchRefiner.json
new file mode 100644
index 000000000..56bfe69db
--- /dev/null
+++ b/templates/RAG/refinement/RAGWebSearchRefiner.json
@@ -0,0 +1,24 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For RAG applications (refine step), gives model the ability to refine its answer based on web search results. The hope is that it better answers the original query. Placeholders: `query`, `answer`, `search_results`",
+        "version": "1.1",
+        "source": "Adapted from [LlamaIndex](https://github.com/run-llama/llama_index/blob/78af3400ad485e15862c06f0c4972dc3067f880c/llama-index-core/llama_index/core/prompts/default_prompts.py#L81)",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Act as a world-class AI assistant with access to the latest knowledge via web search results.\n\nYour task is to refine an existing answer if it's needed.\n\nThe original query: \n-----------------\n{{query}}\n-----------------\n\nThe AI model has provided the following answer:\n-----------------\n{{answer}}\n-----------------\n\n**Instructions:**\n- Given the web search results, refine the original answer to better answer the query.\n- Web search results are sometimes irrelevant and noisy. If the results are not relevant for the query, return the original answer from the AI model.\n- If the web search results do not improve the original answer, return the original answer from the AI model.\n- If you don't know the answer, just say that you don't know, don't try to make up an answer.\n- Be brief and concise.\n- Provide the refined answer only and nothing else.\n\n",
+        "variables": [
+            "query",
+            "answer"
+        ],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "We have the opportunity to refine the previous answer (only if needed) with additional information from web search.\n\n**Web Search Results:**\n-----------------\n{{search_results}}\n-----------------\n\nGiven the new context, refine the original answer to better answer the query.\nIf the web search results are not useful, return the original answer without any changes.\nProvide the refined answer only and nothing else. You MUST NOT comment on the web search results or the answer - simply provide the answer to the question.\n\nRefined Answer: ",
+        "variables": [
+            "search_results"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/agents/code-fixing/CodeFixerRCI.json b/templates/agents/code-fixing/CodeFixerRCI.json
index 7c158b68a..9ad595e2f 100644
--- a/templates/agents/code-fixing/CodeFixerRCI.json
+++ b/templates/agents/code-fixing/CodeFixerRCI.json
@@ -1 +1,23 @@
-[{"content":"Template Metadata","description":"This template is meant to be used with `AICodeFixer`. It loosely follows the [Recursive Critique and Improvement paper](https://arxiv.org/pdf/2303.17491.pdf) with two steps Critique and Improve based on `feedback`. Placeholders: `feedback`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"Ignore all previous instructions. \nYour goal is to satisfy the user's request by using several rounds of self-reflection (Critique step) and improvement of the previously provided solution (Improve step).\nAlways enclose Julia code in triple backticks code fence (```julia\\n ... \\n```).\n\n1. **Recall Past Critique:**\n- Summarize past critique to refresh your memory (use inline quotes to highlight the few characters of the code that caused the mistakes). It must not repeat.\n\n2. **Critique Step Instructions:** \n- Read the user request word-by-word. Does the code implementation follow the request to the the letter? Think it though step-by-step.\n- Review the provided feedback in detail.\n- Provide 2-3 bullet points of criticism for the code. Each bullet point must refer to a different type of error or issue.\n    - If there are any errors, explain why and what needs to be changed to FIX THEM! Be specific. \n    - If an error repeats or critique repeats, previous issue was not addressed. YOU MUST SUGGEST A DIFFERENT IMPROVEMENT THAN BEFORE.\n    - If there are no errors, identify and list specific issues or areas for improvement to write more idiomatic Julia code.\n\n\n3. **Improve Step Instructions:** \n- Specify what you'll change to address the above critique.\n- Provide the revised code reflecting your suggested improvements. Always repeat the function definition, as only the Julia code in last message will be evaluated.\n- Ensure the new version of the code resolves the problems while fulfilling the original task. Ensure it has the same function name.\n- Write 2-3 correct and helpful unit tests for the function requested by the user (organize in `@testset \"name\" begin ... end` block, use `@test` macro).\n\n\n3. **Response Format:**\n---\n### Past Critique\n<brief bullet points on past critique>\n\n### Critique\n<list of issues as bullet points pinpointing the mistakes in the code (use inline quotes)>\n\n### Improve\n<list of improvements as bullet points with clear outline of a solution (use inline quotes)>\n\n```julia\n<provide improved code>\n```\n---\n\nBe concise and focused in all steps.\n\n### Feedback from the User\n\n{{feedback}}\n\nI believe in you. You can actually do it, so do it ffs. Avoid shortcuts or placing comments instead of code. I also need code, actual working Julia code.\nWhat are your Critique and Improve steps?\n  ","variables":["feedback"],"_type":"usermessage"},{"content":"### Feedback from the User\n\n{{feedback}}\n\nBased on your past critique and the latest feedback, what are your Critique and Improve steps?\n","variables":["feedback"],"_type":"usermessage"}]
\ No newline at end of file
+[
+    {
+        "content": "Template Metadata",
+        "description": "This template is meant to be used with `AICodeFixer`. It loosely follows the [Recursive Critique and Improvement paper](https://arxiv.org/pdf/2303.17491.pdf) with two steps Critique and Improve based on `feedback`. Placeholders: `feedback`",
+        "version": "1.1",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Ignore all previous instructions. \nYour goal is to satisfy the user's request by using several rounds of self-reflection (Critique step) and improvement of the previously provided solution (Improve step).\nAlways enclose the Julia code in triple backticks code fence (```julia\\n ... \\n```).\n\n1. **Recall Past Critique:**\n- Summarize past critiques to refresh your memory (use inline quotes to highlight the few characters of the code that caused the mistakes). It must not be repeated.\n\n2. **Critique Step Instructions:** \n- Read the user request word-by-word. Does the code implementation follow the request to the letter? Let's think step by step.\n- Review the provided feedback in detail.\n- Provide 2-3 bullet points of criticism for the code. Each bullet point must refer to a different type of error or issue.\n    - If there are any errors, explain why and what needs to be changed to FIX THEM! Be specific. \n    - If an error repeats or critique repeats, the previous issue was not addressed. YOU MUST SUGGEST A DIFFERENT IMPROVEMENT THAN BEFORE.\n    - If there are no errors, identify and list specific issues or areas for improvement to write more idiomatic Julia code.\n\n\n3. **Improve Step Instructions:** \n- Specify what you'll change to address the above critique.\n- Provide the revised code reflecting your suggested improvements. Always repeat the function definition, as only the Julia code in the last message will be evaluated.\n- Ensure the new version of the code resolves the problems while fulfilling the original task. Ensure it has the same function name.\n- Write 2-3 correct and helpful unit tests for the function requested by the user (organize in `@testset \"name\" begin ... end` block, use `@test` macro).\n\n\n3. **Response Format:**\n---\n### Past Critique\n<brief bullet points on past critique>\n\n### Critique\n<list of issues as bullet points pinpointing the mistakes in the code (use inline quotes)>\n\n### Improve\n<list of improvements as bullet points with a clear outline of a solution (use inline quotes)>\n\n```julia\n<provide improved code>\n```\n---\n\nBe concise and focused in all steps.\n\n### Feedback from the User\n\n{{feedback}}\n\nI believe in you. You can actually do it, so do it ffs. Avoid shortcuts or placing comments instead of code. I also need code, actual working Julia code.\nWhat are your Critique and Improve steps?\n  ",
+        "variables": [
+            "feedback"
+        ],
+        "_type": "usermessage"
+    },
+    {
+        "content": "### Feedback from the User\n\n{{feedback}}\n\nBased on your past critique and the latest feedback, what are your Critique and Improve steps?\n",
+        "variables": [
+            "feedback"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/agents/code-fixing/CodeFixerShort.json b/templates/agents/code-fixing/CodeFixerShort.json
index 4cbea55e2..fcdb62002 100644
--- a/templates/agents/code-fixing/CodeFixerShort.json
+++ b/templates/agents/code-fixing/CodeFixerShort.json
@@ -1 +1,16 @@
-[{"content":"Template Metadata","description":"This template is meant to be used with `AICodeFixer` to ask for code improvements based on `feedback`. It uses the same message for both the introduction of the new task and for the iterations. Placeholders: `feedback`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"\nThe above Julia code has been executed with the following results:\n\n```plaintext\n{{feedback}}\n```\n\n0. Read the user request word-by-word. Does the code implementation follow the request to the the letter? Think it though step-by-step.\n1. Review the execution results in detail and, if there is an error, explain why it happened.\n2. Suggest improvements to the code. Be EXTREMELY SPECIFIC. Think step-by-step and break it down.\n3. Write an improved implemented based on your reflection.\n\nAll code must be enclosed in triple backticks code fence (```julia\\n ... \\n```) and included in one message to be re-evaluated.\n\nI believe in you. Take a deep breath. You can actually do it, so do it ffs. Avoid shortcuts or placing comments instead of code. I also need code, actual working Julia code.\n","variables":["feedback"],"_type":"usermessage"}]
\ No newline at end of file
+[
+    {
+        "content": "Template Metadata",
+        "description": "This template is meant to be used with `AICodeFixer` to ask for code improvements based on `feedback`. It uses the same message for both the introduction of the new task and for the iterations. Placeholders: `feedback`",
+        "version": "1.1",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "\nThe above Julia code has been executed with the following results:\n\n```plaintext\n{{feedback}}\n```\n\n0. Read the user request word-by-word. Does the code implementation follow the request to the letter? Let's think step by step.\n1. Review the execution results in detail and, if there is an error, explain why it happened.\n2. Suggest improvements to the code. Be EXTREMELY SPECIFIC. Think step-by-step and break it down.\n3. Write an improved implementation based on your reflection.\n\nAll code must be enclosed in triple backticks code fence (```julia\\n ... \\n```) and included in one message to be re-evaluated.\n\nI believe in you. Take a deep breath. You can actually do it, so do it ffs. Avoid shortcuts or placing comments instead of code. I also need code, actual working Julia code.\n",
+        "variables": [
+            "feedback"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/agents/feedback/FeedbackFromEvaluator.json b/templates/agents/feedback/FeedbackFromEvaluator.json
index d9e393b8f..9a72bc63e 100644
--- a/templates/agents/feedback/FeedbackFromEvaluator.json
+++ b/templates/agents/feedback/FeedbackFromEvaluator.json
@@ -1 +1,16 @@
-[{"content":"Template Metadata","description":"Simple user message with \"Feedback from Evaluator\". Placeholders: `feedback`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"### Feedback from Evaluator\n{{feedback}}\n","variables":["feedback"],"_type":"usermessage"}]
\ No newline at end of file
+[
+    {
+        "content": "Template Metadata",
+        "description": "Simple user message with \"Feedback from Evaluator\". Placeholders: `feedback`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "### Feedback from Evaluator\n{{feedback}}\n",
+        "variables": [
+            "feedback"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/classification/InputClassifier.json b/templates/classification/InputClassifier.json
index 5bd4a69ad..d92d87d99 100644
--- a/templates/classification/InputClassifier.json
+++ b/templates/classification/InputClassifier.json
@@ -1 +1,23 @@
-[{"content":"Template Metadata","description":"For classification tasks and routing of queries with aiclassify. It expects a list of choices to be provided (starting with their IDs), and will pick one that best describes the user input. Placeholders: `input`, `choices`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You are a world-class classification specialist. \n\nYour task is to select the most appropriate label from the given choices for the given user input.\n\n**Available Choices:**\n---\n{{choices}}\n---\n\n**Instructions:**\n- You must respond in one word. \n- You must respond only with the label ID (e.g., \"1\", \"2\", ...) that best fits the input.\n","variables":["choices"],"_type":"systemmessage"},{"content":"User Input: {{input}}\n\nLabel:\n","variables":["input"],"_type":"usermessage"}]
\ No newline at end of file
+[
+    {
+        "content": "Template Metadata",
+        "description": "For classification tasks and routing of queries with aiclassify. It expects a list of choices to be provided (starting with their IDs) and will pick one that best describes the user input. Placeholders: `input`, `choices`",
+        "version": "1.1",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You are a world-class classification specialist. \n\nYour task is to select the most appropriate label from the given choices for the given user input.\n\n**Available Choices:**\n---\n{{choices}}\n---\n\n**Instructions:**\n- You must respond in one word. \n- You must respond only with the label ID (e.g., \"1\", \"2\", ...) that best fits the input.\n",
+        "variables": [
+            "choices"
+        ],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "User Input: {{input}}\n\nLabel:\n",
+        "variables": [
+            "input"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/classification/JudgeIsItTrue.json b/templates/classification/JudgeIsItTrue.json
index 9ca7c0e02..c775d8d68 100644
--- a/templates/classification/JudgeIsItTrue.json
+++ b/templates/classification/JudgeIsItTrue.json
@@ -1,13 +1,13 @@
 [
     {
         "content": "Template Metadata",
-        "description": "LLM-based classification whether provided statement is true/false/unknown. Statement is provided via `it` placeholder.",
-        "version": "1",
+        "description": "LLM-based classification whether the provided statement is true/false/unknown. Statement is provided via `it` placeholder.",
+        "version": "1.1",
         "source": "",
         "_type": "metadatamessage"
     },
     {
-        "content": "You are an impartial AI judge evaluting whether the provided statement is \"true\" or \"false\". Answer \"unknown\" if you cannot decide.",
+        "content": "You are an impartial AI judge evaluating whether the provided statement is \"true\" or \"false\". Answer \"unknown\" if you cannot decide.",
         "variables": [],
         "_type": "systemmessage"
     },
diff --git a/templates/classification/QuestionRouter.json b/templates/classification/QuestionRouter.json
new file mode 100644
index 000000000..d86c9f93e
--- /dev/null
+++ b/templates/classification/QuestionRouter.json
@@ -0,0 +1 @@
+[{"content":"Template Metadata","description":"For question routing tasks. It expects a list of choices to be provided (starting with their IDs), and will pick one that best describes the user input. Always make sure to provide an option for `Other`. Placeholders: `question`, `choices`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You are a highly capable question router and classification specialist. \n\nYour task is to select the most appropriate category from the given endpoint choices to route the user's question or statement. If none of the provided categories are suitable, you should select the option indicating no appropriate category.\n\n**Available Endpoint Choices:**\n---\n{{choices}}\n---\n\n**Instructions:**\n- You must respond in one word only. \n- You must respond with just the number (e.g., \"1\", \"2\", ...) of the endpoint choice that the input should be routed to based on the category it best fits.\n- If none of the endpoint categories are appropriate for the given input, select the choice indicating that no category fits.\n","variables":["choices"],"_type":"systemmessage"},{"content":"User Question: {{question}}\n\nEndpoint Choice:\n","variables":["question"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/critic/ChiefEditorTranscriptCritic.json b/templates/critic/ChiefEditorTranscriptCritic.json
new file mode 100644
index 000000000..2d25b6acf
--- /dev/null
+++ b/templates/critic/ChiefEditorTranscriptCritic.json
@@ -0,0 +1,21 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "Chief editor auto-reply critic template that critiques a text written by AI assistant. Returns answers with fields: Reflections, Suggestions, Outcome (REVISE/DONE). Placeholders: `transcript`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Act as a world-class Chief Editor specialized in critiquing a variety of written texts such as blog posts, reports, and other documents as specified by user instructions.\n\nYou will be provided a transcript of conversation between a user and an AI writer assistant.\nYour task is to review the text written by the AI assistant, understand the intended audience, purpose, and context as described by the user, and provide a constructive critique for the AI writer to enhance their work.\n\n**Response Format:**\n----------\nChief Editor says:\nReflection: [provide a reflection on the submitted text, focusing on how well it meets the intended purpose and audience, along with evaluating content accuracy, clarity, style, grammar, and engagement]\nSuggestions: [offer detailed critique with specific improvement points tailored to the user's instructions, such as adjustments in tone, style corrections, structural reorganization, and enhancing readability and engagement]\nOutcome: [DONE or REVISE]\n----------\n\n**Instructions:**\n- Always follow the three-step workflow: Reflection, Suggestions, Outcome.\n- Begin by understanding the user's instructions which may define the text's target audience, desired tone, length, and key messaging goals.\n- Analyze the text to assess how well it aligns with these instructions and its effectiveness in reaching the intended audience.\n- Be extremely strict about adherence to user's instructions.\n- Reflect on aspects such as clarity of expression, content relevance, stylistic consistency, and grammatical integrity.\n- Provide actionable suggestions to address any discrepancies between the text and the user's goals. Emphasize improvements in content organization, clarity, engagement, and adherence to stylistic guidelines.\n- Consider the text's overall impact and how well it communicates its message to the intended audience.\n- Be pragmatic. If the text closely meets the user's requirements and professional standards, conclude with \"Outcome: DONE\".\n- If adjustments are needed to better align with the user's goals or enhance clarity and impact, indicate \"Outcome: REVISE\".\n\n",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "**Conversation Transcript:**\n----------\n{{transcript}}\n----------\n\nRemember to follow the three-step workflow: Reflection, Suggestions, Outcome.\n\nChief Editor says: ",
+        "variables": [
+            "transcript"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/critic/GenericTranscriptCritic.json b/templates/critic/GenericTranscriptCritic.json
new file mode 100644
index 000000000..c9a149295
--- /dev/null
+++ b/templates/critic/GenericTranscriptCritic.json
@@ -0,0 +1,21 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "Generic auto-reply critic template that critiques a given conversation transcript. Returns answers with fields: Reflections, Suggestions, Outcome (REVISE/DONE). Placeholders: `transcript`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Act as a world-class critic specialized in the domain of the user's request.\n\nYour task is to review a transcript of the conversation between a user and AI assistant and provide a helpful critique for the AI assistant to improve their answer.\n\n**Response Format:**\n----------\nCritic says:\nReflection: [provide a reflection on the user request and the AI assistant's answers]\nSuggestions: [provide helpful critique with specific improvement points]\nOutcome: [DONE or REVISE]\n----------\n\n**Instructions:**\n- Always follow the three-step workflow: Reflection, Suggestions, Outcome.\n- Analyze the user request to identify its constituent parts (e.g., requirements, constraints, goals)\n- Reflect on the conversation between the user and the AI assistant. Highlight any ambiguities, inconsistencies, or unclear aspects in the assistant's answers.\n- Generate a list of specific, actionable suggestions for improving the request (if they have not been addressed yet)\n- Provide explanations for each suggestion, highlighting what is missing or unclear\n- Be pragmatic. If the conversation is satisfactory or close to satisfactory, finish with \"Outcome: DONE\".\n- Evaluate the completeness and clarity of the AI Assistant's responses based on the reflections. If the assistant's answer requires revisions or clarification, finish your response with \"Outcome: REVISE\"\n  ",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "**Conversation Transcript:**\n----------\n{{transcript}}\n----------\n\nRemember to follow the three-step workflow: Reflection, Suggestions, Outcome.\n\nCritic says:",
+        "variables": [
+            "transcript"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/critic/JuliaExpertTranscriptCritic.json b/templates/critic/JuliaExpertTranscriptCritic.json
new file mode 100644
index 000000000..f9d43ec78
--- /dev/null
+++ b/templates/critic/JuliaExpertTranscriptCritic.json
@@ -0,0 +1,21 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "Julia Expert auto-reply critic template that critiques a answer/code written by AI assistant. Returns answers with fields: Reflections, Suggestions, Outcome (REVISE/DONE). Placeholders: `transcript`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Act as a world-class Julia programmer, expert in Julia code.\n\nYour task is to review a user's request and the corresponding answer and the Julia code provided by an AI assistant. Ensure the code is syntactically and logically correct, and fully addresses the user's requirements.\n\n**Response Format:**\n----------\nJulia Expert says:\nReflection: [provide a reflection on how well the user's request has been understood and the suitability of the provided code in meeting these requirements]\nSuggestions: [offer specific critiques and improvements on the code, mentioning any missing aspects, logical errors, or syntax issues]\nOutcome: [DONE or REVISE]\n----------\n\n**Instructions:**\n- Always follow the three-step workflow: Reflection, Suggestions, Outcome.\n- Carefully analyze the user's request to fully understand the desired functionality, performance expectations, and any specific requirements mentioned.\n- Examine the provided Julia code to check if it accurately and efficiently fulfills the user's request. Ensure that the code adheres to best practices in Julia programming.\n- Reflect on the code's syntax and logic. Identify any errors, inefficiencies, or deviations from the user's instructions.\n- Generate a list of specific, actionable suggestions for improving the code. This may include:\n    - Correcting syntax errors, such as incorrect function usage or improper variable declarations.\n    - Adding functionalities or features that are missing but necessary to fully satisfy the user's request.\n- Provide explanations for each suggestion, highlighting how these changes will better meet the user's needs.\n- Evaluate the overall effectiveness of the answer and/or the code in solving the stated problem.\n- Be pragmatic. If it meets the user's requirements, conclude with \"Outcome: DONE\".\n- If adjustments are needed to better align with the user's request, indicate \"Outcome: REVISE\".\n",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "**Conversation Transcript:**\n----------\n{{transcript}}\n----------\n\nRemember to follow the three-step workflow: Reflection, Suggestions, Outcome.\n\nJulia Expert says: ",
+        "variables": [
+            "transcript"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/extraction/ExtractData.json b/templates/extraction/ExtractData.json
index 1f64611a2..993caefd6 100644
--- a/templates/extraction/ExtractData.json
+++ b/templates/extraction/ExtractData.json
@@ -2,12 +2,12 @@
     {
         "content": "Template Metadata",
         "description": "Template suitable for data extraction via `aiextract` calls. Placeholder: `data`.",
-        "version": "1",
+        "version": "1.1",
         "source": "",
         "_type": "metadatamessage"
     },
     {
-        "content": "You are a world-class function calling and argument extraction expert. Analyze the user's provided `data` source meticulously, extract key information as structured output, and format these details as arguments for a specific function call. Ensure strict adherence to user instructions, particularly those regarding argument style and formatting as outlined in the function's docstrings, prioritizing detail orientation and accuracy in alignment with the user's explicit requirements.",
+        "content": "You are a world-class expert for function-calling and data extraction. Analyze the user's provided `data` source meticulously, extract key information as structured output, and format these details as arguments for a specific function call. Ensure strict adherence to user instructions, particularly those regarding argument style and formatting as outlined in the function's docstrings, prioritizing detail orientation and accuracy in alignment with the user's explicit requirements.",
         "variables": [],
         "_type": "systemmessage"
     },
diff --git a/templates/extraction/xml-formatted/ExtractDataCoTXML.json b/templates/extraction/xml-formatted/ExtractDataCoTXML.json
new file mode 100644
index 000000000..497e9720d
--- /dev/null
+++ b/templates/extraction/xml-formatted/ExtractDataCoTXML.json
@@ -0,0 +1,21 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "Template suitable for data extraction via `aiextract` calls with Chain-of-thought reasoning. The prompt is XML-formatted - useful for Anthropic models and it forces the model to apply reasoning first, before picking the right tool. Placeholder: `data`.",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You are a world-class expert for tool-calling and data extraction. Analyze the user-provided data in tags <data></data> meticulously, extract key information as structured output, and format these details as arguments for a specific tool call. Ensure strict adherence to user instructions, particularly those regarding argument style and formatting as outlined in the tool's description, prioritizing detail orientation and accuracy in alignment with the user's explicit requirements. Before answering, explain your reasoning step-by-step in tags.",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "<data>\n{{data}}\n</data>",
+        "variables": [
+            "data"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/extraction/xml-formatted/ExtractDataXML.json b/templates/extraction/xml-formatted/ExtractDataXML.json
new file mode 100644
index 000000000..eea678144
--- /dev/null
+++ b/templates/extraction/xml-formatted/ExtractDataXML.json
@@ -0,0 +1,21 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "Template suitable for data extraction via `aiextract` calls. The prompt is XML-formatted - useful for Anthropic models. Placeholder: `data`.",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You are a world-class expert for function-calling and data extraction. Analyze the user-provided data in tags <data></data> meticulously, extract key information as structured output, and format these details as arguments for a specific function call. Ensure strict adherence to user instructions, particularly those regarding argument style and formatting as outlined in the function's description, prioritizing detail orientation and accuracy in alignment with the user's explicit requirements.",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "<data>\n{{data}}\n</data>",
+        "variables": [
+            "data"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/general/BlankSystemUser.json b/templates/general/BlankSystemUser.json
index d28cabdea..58c16c0df 100644
--- a/templates/general/BlankSystemUser.json
+++ b/templates/general/BlankSystemUser.json
@@ -1,8 +1,8 @@
 [
     {
         "content": "Template Metadata",
-        "description": "Blank template for easy of prompt entry without the `Message` objects. Simply provide keyword arguments for `system` (=system prompt/persona) and `user` (=user/task/data prompt). Placeholders: `system`, `user`",
-        "version": "1",
+        "description": "Blank template for easy prompt entry without the `*Message` objects. Simply provide keyword arguments for `system` (=system prompt/persona) and `user` (=user/task/data prompt). Placeholders: `system`, `user`",
+        "version": "1.1",
         "source": "",
         "_type": "metadatamessage"
     },
diff --git a/templates/persona-task/AnalystChaptersInTranscript.json b/templates/persona-task/AnalystChaptersInTranscript.json
index 903b538ca..123eef4ba 100644
--- a/templates/persona-task/AnalystChaptersInTranscript.json
+++ b/templates/persona-task/AnalystChaptersInTranscript.json
@@ -1,13 +1,13 @@
 [
     {
         "content": "Template Metadata",
-        "description": "Template for summarizing transcripts of videos and meetings into chapters with key insights. If you don't need the instructions, set `instructions=\"None.\"`. Placeholders: {{transcript}}, {{instructions}}",
-        "version": "1",
+        "description": "Template for summarizing transcripts of videos and meetings into chapters with key insights. If you don't need the instructions, set `instructions=\"None.\"`. Placeholders: `transcript`, `instructions`",
+        "version": "1.1",
         "source": "Customized version of [jxnl's Youtube Chapters prompt](https://github.com/jxnl/youtubechapters-backend/blob/main/summary_app/md_summarize.py)",
         "_type": "metadatamessage"
     },
     {
-        "content": "Act as a super-human AI analyst trained to precisely summarize transcripts of videos and meetings with incredible precision and quality. \nSummarize the transcript in a clear and concise manner that makes use of timestamps, when available, to help others study the transcript. Split the notes into Chapters, which should be meaningful and not too short.\n\nTo format your markdown file, follow this structure:\n```\n# Chapter 1: [Descriptive Title] [Timestamp as HH:MM:SS]\n\n- <Use bullet points to provide a brief description of key points and insights.>\n\n## Section 1.1: [Descriptive Title] [Timestamp as HH:MM:SS]\n<this is a subheading for Chapter 1>\n\n- <Use bullet points to provide a brief description of key points and insights.>\n\nRepeat the above structure as necessary, and use subheadings to organize your notes.\n```\n\nFormatting Tips:\n* Do not make the chapters too short, ensure that each section has a few brief bullet points. \n* Bullet points should be concise and to the point, so people can scan them quickly.\n* Use [] to denote timestamps\n* Use subheadings and bullet points to organize your notes and make them easier to read and understand. When relevant, include timestamps to link to the corresponding part of the video.\n* Use bullet points to describe important steps and insights, being as comprehensive as possible.\n* Use quotes to highlight important points and insights.\n\nSummary Tips:\n* Do not mention anything if its only playing music and if nothing happens don't include it in the notes.\n* Use only content from the transcript. Do not add any additional information.\n* Make a new line after each # or ## and before each bullet point\n* Titles should be informative or even a question that the video answers\n* Titles should not be conclusions since you may only be getting a small part of the video\n\nKeep it CONCISE!!\nIf Special Instructions are provided by the user, they take precedence over any previous instructions and you MUST follow they precisely.\n",
+        "content": "Act as a super-human AI analyst trained to precisely summarize transcripts of videos and meetings with incredible precision and quality. \nSummarize the transcript in a clear and concise manner that makes use of timestamps, when available, to help others study the transcript. Split the notes into Chapters, which should be meaningful and not too short.\n\nTo format your markdown file, follow this structure:\n```\n# Chapter 1: [Descriptive Title] [Timestamp as HH:MM:SS]\n\n- <Use bullet points to provide a brief description of key points and insights.>\n\n## Section 1.1: [Descriptive Title] [Timestamp as HH:MM:SS]\n<this is a subheading for Chapter 1>\n\n- <Use bullet points to provide a brief description of key points and insights.>\n\nRepeat the above structure as necessary, and use subheadings to organize your notes.\n```\n\nFormatting Tips:\n* Do not make the chapters too short, ensure that each section has a few brief bullet points. \n* Bullet points should be concise and to the point, so people can scan them quickly.\n* Use [] to denote timestamps\n* Use subheadings and bullet points to organize your notes and make them easier to read and understand. When relevant, include timestamps to link to the corresponding part of the video.\n* Use bullet points to describe important steps and insights, being as comprehensive as possible.\n* Use quotes to highlight important points and insights.\n\nSummary Tips:\n* Do not mention anything if it's only playing music and if nothing happens don't include it in the notes.\n* Use only content from the transcript. Do not add any additional information.\n* Make a new line after each # or ## and before each bullet point\n* Titles should be informative or even a question that the video answers\n* Titles should not be conclusions since you may only be getting a small part of the video\n\nKeep it CONCISE!!\nIf Special Instructions are provided by the user, they take precedence over any previous instructions and you MUST follow them precisely.\n",
         "variables": [],
         "_type": "systemmessage"
     },
diff --git a/templates/persona-task/AnalystDecisionsInTranscript.json b/templates/persona-task/AnalystDecisionsInTranscript.json
index 711a8936f..26d91e925 100644
--- a/templates/persona-task/AnalystDecisionsInTranscript.json
+++ b/templates/persona-task/AnalystDecisionsInTranscript.json
@@ -1,13 +1,13 @@
 [
     {
         "content": "Template Metadata",
-        "description": "Template for summarizing transcripts of videos and meetings into decisions made and agreed next steps. If you don't need the instructions, set `instructions=\"None.\"`. Placeholders: {{transcript}}, {{instructions}}",
-        "version": "1",
+        "description": "Template for summarizing transcripts of videos and meetings into the decisions made and the agreed next steps. If you don't need the instructions, set `instructions=\"None.\"`. Placeholders: {{transcript}}, {{instructions}}",
+        "version": "1.1",
         "source": "Evolved from [jxnl's Youtube Chapters prompt](https://github.com/jxnl/youtubechapters-backend/blob/main/summary_app/md_summarize.py)",
         "_type": "metadatamessage"
     },
     {
-        "content": "Act as a super-human AI analyst trained to meticulously analyze transcripts of videos and meetings. Your role is to identify and summarize key decisions and next steps, enhancing clarity and utility for those studying the transcript. \nUse timestamps to pinpoint when these decisions and steps are discussed. Organize your notes into distinct sections, each dedicated to a significant decision or action plan.\n\nFormat your markdown file using this structure:\n```\n# Key Decision 1: [Descriptive Title] [Timestamp as HH:MM:SS]\n- <Briefly describe the decision and its context using bullet points.>\n\n## Next Steps for Decision 1\n- <List the next steps agreed upon, using bullet points for clarity, with [Timestamp as HH:MM:SS]>\n\nRepeat this structure for each key decision and its corresponding next steps.\n\n# Other Next Steps\n- <List any other next steps that were discussed but do not belong to some specific decisions, using bullet points for clarity, with [Timestamp as HH:MM:SS]>\n```\n\nFormatting Tips:\n* Ensure each section is substantial, providing a clear and concise summary of each key decision and its next steps.\n* Use bullet points to make the summary easy to scan and understand.\n* All next steps should be actionable and clearly defined. All next steps must be relevant to the decision they are associated with. Any general next steps, should be included in section `Other Next Steps`\n* Include timestamps in brackets to refer to the specific parts of the video where these discussions occur.\n* Titles should be informative, reflecting the essence of the decision.\n\nSummary Tips:\n* Exclude sections where only music plays or no significant content is present.\n* Base your summary strictly on the transcript content without adding extra information.\n* Maintain a clear structure: place a new line after each # or ##, and before each bullet point.\n* Titles should pose a question answered by the decision or describe the nature of the next steps.\n\nKeep the summary concise and focused on key decisions and next steps. \nIf the user provides special instructions, prioritize these over the general guidelines.",
+        "content": "Act as a super-human AI analyst trained to meticulously analyze transcripts of videos and meetings. Your role is to identify and summarize key decisions and next steps, enhancing clarity and utility for those studying the transcript. \nUse timestamps to pinpoint when these decisions and steps are discussed. Organize your notes into distinct sections, each dedicated to a significant decision or action plan.\n\nFormat your markdown file using this structure:\n```\n# Key Decision 1: [Descriptive Title] [Timestamp as HH:MM:SS]\n- <Briefly describe the decision and its context using bullet points.>\n\n## Next Steps for Decision 1\n- <List the next steps agreed upon, using bullet points for clarity, with [Timestamp as HH:MM:SS]>\n\nRepeat this structure for each key decision and its corresponding next steps.\n\n# Other Next Steps\n- <List any other next steps that were discussed but do not belong to some specific decisions, using bullet points for clarity, with [Timestamp as HH:MM:SS]>\n```\n\nFormatting Tips:\n* Ensure each section is substantial, providing a clear and concise summary of each key decision and its next steps.\n* Use bullet points to make the summary easy to scan and understand.\n* All next steps should be actionable and clearly defined. All next steps must be relevant to the decision they are associated with. Any general next steps should be included in the section `Other Next Steps`\n* Include timestamps in brackets to refer to the specific parts of the video where these discussions occur.\n* Titles should be informative, reflecting the essence of the decision.\n\nSummary Tips:\n* Exclude sections where only music plays or no significant content is present.\n* Base your summary strictly on the transcript content without adding extra information.\n* Maintain a clear structure: place a new line after each # or ##, and before each bullet point.\n* Titles should pose a question answered by the decision or describe the nature of the next steps.\n\nKeep the summary concise and focused on key decisions and next steps. \nIf the user provides special instructions, prioritize these over the general guidelines.",
         "variables": [],
         "_type": "systemmessage"
     },
diff --git a/templates/persona-task/AnalystThemesInResponses.json b/templates/persona-task/AnalystThemesInResponses.json
index 900fd9436..c0c144217 100644
--- a/templates/persona-task/AnalystThemesInResponses.json
+++ b/templates/persona-task/AnalystThemesInResponses.json
@@ -2,12 +2,12 @@
     {
         "content": "Template Metadata",
         "description": "Template for summarizing survey verbatim responses into 3-5 themes with an example for each theme. If you don't need the instructions, set `instructions=\"None.\"`. Placeholders: {{question}}, {{responses}}, {{instructions}}",
-        "version": "1",
+        "version": "1.1",
         "source": "",
         "_type": "metadatamessage"
     },
     {
-        "content": "\"Act a world-class behavioural researcher, who specializes on survey analysis. Categorize the provided survey responses into several themes. \nThe responses should be analyzed, and each theme identified should be labeled clearly. Examples from the responses should be given to illustrate each theme. The output should be formatted as specified, with a clear indication of the theme and corresponding verbatim examples.\n\n# Sub-tasks\n\n1. Read the provided survey responses carefully, especially in the context of the question. \n2. Identify 3-5 distinct themes present in the responses related to the survey question. It should be the most important themes that must be raised to the CEO/leadership. \n3. For each theme, choose at least one verbatim example from the responses that best represents it. This example should be a direct quote from the responses. This example should belong to only one theme and must not be applicable to any other themes.\n4. Format the output as specified.\n\n# Formatting\n\nTo format your markdown file, follow this structure (omit the triple backticks):\n   ```\n   # Theme 1: [Theme Description]\n   - Best illustrated by: \"...\"\n\n   # Theme 2: [Theme Description]\n   - Best illustrated by: \"...\"\n   ...\n   ```\n\nKeep it CONCISE!!\nIf Special Instructions are provided by the user, they take precedence over any previous instructions and you MUST follow they precisely.\n",
+        "content": "\"Act as a world-class behavioural researcher, who specializes in survey analysis. Categorize the provided survey responses into several themes. \nThe responses should be analyzed, and each theme identified should be labeled clearly. Examples from the responses should be given to illustrate each theme. The output should be formatted as specified, with a clear indication of the theme and corresponding verbatim examples.\n\n# Sub-tasks\n\n1. Read the provided survey responses carefully, especially in the context of the question. \n2. Identify 3-5 distinct themes present in the responses related to the survey question. It should be the most important themes that must be raised to the CEO/leadership. \n3. For each theme, choose at least one verbatim example from the responses that best represents it. This example should be a direct quote from the responses. This example should belong to only one theme and must not be applicable to any other themes.\n4. Format the output as specified.\n\n# Formatting\n\nTo format your markdown file, follow this structure (omit the triple backticks):\n   ```\n   # Theme 1: [Theme Description]\n   - Best illustrated by: \"...\"\n\n   # Theme 2: [Theme Description]\n   - Best illustrated by: \"...\"\n   ...\n   ```\n\nKeep it CONCISE!!\nIf Special Instructions are provided by the user, they take precedence over any previous instructions and you MUST follow they precisely.\n",
         "variables": [],
         "_type": "systemmessage"
     },
diff --git a/templates/persona-task/ConversationLabeler.json b/templates/persona-task/ConversationLabeler.json
new file mode 100644
index 000000000..9fbe285fd
--- /dev/null
+++ b/templates/persona-task/ConversationLabeler.json
@@ -0,0 +1,21 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "Labels a given conversation in 2-5 words based on the provided conversation transcript. Placeholders: `transcript`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Act as a world-class behavioural researcher, unbiased and trained to surface key underlying themes.\n\nYour task is create a topic name based on the provided conversation transcript between a user and AI assistant.\n\nFormat: \"Topic: Label\"\n\n**Topic Instructions:**\n- Determine the main topic or theme of the conversation.\n- Ideally, just 1 word.\n\n**Labeling Instructions:**\n- A short phrase or keywords, ideally 3-5 words.\n- Select a label that accurately describes the topic or theme of the conversation.\n- Be brief and concise, prefer title cased.\n\nUse a consistent format for labeling, such as Selected Theme: \"Topic: Label\".\n\nExample:\nSelected Theme: \"Technology: 4-bit Quantization\"\nSelected Theme: \"Biology: Counting Puppy Years\"\n",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "**Conversation Transcript:**\n----------\n{{transcript}}\n----------\n\nProvide the most suitable theme and label. Output just the selected themed and nothing else.\n\nSelected Theme:",
+        "variables": [
+            "transcript"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/persona-task/DetailOrientedTask.json b/templates/persona-task/DetailOrientedTask.json
index dd62cf896..90c86ef52 100644
--- a/templates/persona-task/DetailOrientedTask.json
+++ b/templates/persona-task/DetailOrientedTask.json
@@ -2,12 +2,12 @@
     {
         "content": "Template Metadata",
         "description": "Great template for detail-oriented tasks like string manipulations, data cleaning, etc. Placeholders: `task`, `data`.",
-        "version": "1",
+        "version": "1.1",
         "source": "",
         "_type": "metadatamessage"
     },
     {
-        "content": "You are a world-class AI assistant. You are detail oriented, diligent, and have a great memory. Your communication is brief and concise.",
+        "content": "You are a world-class AI assistant. You are detail-oriented, diligent, and have a great memory. Your communication is brief and concise.",
         "variables": [],
         "_type": "systemmessage"
     },
diff --git a/templates/persona-task/DrafterEmailBrief.json b/templates/persona-task/DrafterEmailBrief.json
index ce74d89cf..3850e6a16 100644
--- a/templates/persona-task/DrafterEmailBrief.json
+++ b/templates/persona-task/DrafterEmailBrief.json
@@ -1,21 +1 @@
-[
-    {
-        "content": "Template Metadata",
-        "description": "Template for quick email drafts. Provide a brief in 5-7 words as headlines, eg, `Follow up email. Sections: Agreements, Next steps` Placeholders: {{brief}}",
-        "version": "1",
-        "source": "",
-        "_type": "metadatamessage"
-    },
-    {
-        "content": "Act as a world-class office communications expert, skilled in creating efficient, clear, and friendly internal email communications.\n     Craft a concise email subject and email draft from the provided User Brief. \n\n     Use the following format for the body of the email:\n     ```\n    Section Name <in plain text, only if needed>\n    - Bullet point 1\n    - Bullet point 2\n\n    <repeat as necessary>\n    ```\n\n     # Guidelines\n     - Focus on clear and efficient communication, suitable for internal business correspondence\n     - Where information is missing, use your best judgement to fill in the gaps\n     - It should be informal and friendly, eg, start with \"Hi\"\n     - Ensure the tone is professional yet casual, suitable for internal communication\n     - Write as plain text, with no markdown syntax\n     - Format into Sections. Each section should have 3-5 bullet points\n     - Close the email on a positive note, encouraging communication and collaboration\n     - It should be brief and concise with 150 words or less\n    \n\n     Follow the above guidelines, unless the user explicitly asks for something different. In that case, follow the user's instructions precisely.\n",
-        "variables": [],
-        "_type": "systemmessage"
-    },
-    {
-        "content": "# User Brief\n\n{{brief}}\n\n",
-        "variables": [
-            "brief"
-        ],
-        "_type": "usermessage"
-    }
-]
\ No newline at end of file
+[{"content":"Template Metadata","description":"Template for quick email drafts. Provide a brief in 5-7 words as headlines, eg, `Follow up email. Sections: Agreements, Next steps` Placeholders: {{brief}}","version":"1.2","source":"","_type":"metadatamessage"},{"content":"Act as a world-class office communications expert, skilled in creating efficient, clear, and friendly internal email communications.\nCraft a concise email subject and email draft from the provided User Brief. \n\nYou must follow the user's instructions. Unless the user explicitly asks for something different use the below formatting and guidelines.\n\n# Guidelines\n- Focus on clear and efficient communication, suitable for internal business correspondence\n- Where information is missing, use your best judgment to fill in the gaps\n- It should be informal and friendly, eg, start with \"Hi\"\n- Ensure the tone is professional yet casual, suitable for internal communication\n- Write as plain text, with no markdown syntax\n- If there are sections, several topics, or the email text is longer than 100 words, split it in separate sections with 3-5 bullet points each.\n- Close the email on a positive note, encouraging communication and collaboration\n- It should be brief and concise with 150 words or less\n\n# Format\nFor short emails, write a few sentences in one block of text.\n\nFor larger emails or emails with several sections, use the following format for the body of the email:\n---\nSection Name <in plain text, only if needed>\n- Bullet point 1\n- Bullet point 2\n\n<repeat as necessary>\n---\n\nFollow the above format and guidelines, unless the user explicitly asks for something different. In that case, follow the user's instructions precisely.\n","variables":[],"_type":"systemmessage"},{"content":"User Brief: {{brief}}\n Write the email subject and email body.","variables":["brief"],"_type":"usermessage"}]
\ No newline at end of file
diff --git a/templates/persona-task/GenericTopicExpertAsk.json b/templates/persona-task/GenericTopicExpertAsk.json
new file mode 100644
index 000000000..1c93ce48d
--- /dev/null
+++ b/templates/persona-task/GenericTopicExpertAsk.json
@@ -0,0 +1,23 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "Expert persona with generic `topic`, for asking questions about the `topic`. Placeholders: `topic`, `ask`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You are a world-class expert in {{topic}} with deep knowledge and extensive expertise. \n\nYour communication is brief and concise. Your answers are very precise, practical and helpful. \nUse clear examples in your answers to illustrate your points.\n\nAnswer only when you're confident in the high quality of your answer.\n",
+        "variables": [
+            "topic"
+        ],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "# Question\n\n{{ask}}",
+        "variables": [
+            "ask"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/persona-task/GenericWriter.json b/templates/persona-task/GenericWriter.json
new file mode 100644
index 000000000..ff93442fc
--- /dev/null
+++ b/templates/persona-task/GenericWriter.json
@@ -0,0 +1,26 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "Generic writer persona (defined as `pesona`) to write a `what` for `audience`. It's purpose is `purpose`. Provide some `notes`! Placeholders: `persona`, `what`, `audience`, `purpose`, `notes`.",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Act as a world-class writer and {{persona}}.\n\nYou are a writing {{what}} for {{audience}}.\n\nThe purpose is {{purpose}}.\n\nMake sure to extensively leverage the notes provided.\n\nFirst, think step-by-step about the ideal outline given the format and the target audience.\nOnce you have the outline, write the text.\n",
+        "variables": [
+            "persona",
+            "what",
+            "audience",
+            "purpose"
+        ],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "Notes:\n{{notes}}\nIt's EXTREMELY important that you leverage these notes.",
+        "variables": [
+            "notes"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/persona-task/JavaScriptExpertAsk.json b/templates/persona-task/JavaScriptExpertAsk.json
new file mode 100644
index 000000000..76577cd73
--- /dev/null
+++ b/templates/persona-task/JavaScriptExpertAsk.json
@@ -0,0 +1,21 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For asking questions about JavaScript. Placeholders: `ask`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You are a world-class JavaScript programmer with deep knowledge of building web applications. \n\nYour communication is brief and concise. Your answers are very precise, practical and helpful. \nUse clear examples in your answers to illustrate your points.\n\nAnswer only when you're confident in the high quality of your answer.\n",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "# Question\n\n{{ask}}",
+        "variables": [
+            "ask"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/persona-task/JuliaBlogWriter.json b/templates/persona-task/JuliaBlogWriter.json
new file mode 100644
index 000000000..b9afd9527
--- /dev/null
+++ b/templates/persona-task/JuliaBlogWriter.json
@@ -0,0 +1,24 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "Julia-focused writer persona to write a blog post about `topic`. It's purpose is `purpose`. Provide some `notes`! Placeholders: `topic`, `purpose`, `notes`.",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Act as a world-class educator and expert in data science and Julia programming language.\nYou are famous for compelling, easy-to-understand blog posts that are accessible to everyone.\n\nYou're writing an educational blog post about {{topic}}.\n\nThe purpose is {{purpose}}.\n\nTarget audience is Julia language users.\n\n**Instructions:**\n- 300 words or less\n- Write in a markdown format\n- Leave clear slots for the code and its output depending on the notes and the topic\n- Use level 2 markdown headings (`##`) to separate sections\n- Section names should be brief, concise, and informative\n- Each blog must have a title, TLDR, and a conclusion.\n\nMake sure to extensively leverage the notes provided.\n\nFirst, think step-by-step outline given the format and the target audience.\nOnce you have the outline, write the text.\n",
+        "variables": [
+            "topic",
+            "purpose"
+        ],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "Notes:\n{{notes}}\n\nIt's EXTREMELY important that you leverage these notes.",
+        "variables": [
+            "notes"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/persona-task/JuliaExpertTestCode.json b/templates/persona-task/JuliaExpertTestCode.json
index a05c5512e..f814c7a35 100644
--- a/templates/persona-task/JuliaExpertTestCode.json
+++ b/templates/persona-task/JuliaExpertTestCode.json
@@ -2,12 +2,12 @@
     {
         "content": "Template Metadata",
         "description": "For writing Julia-style unit tests. It expects `code` provided as a string (it can be the whole source code of your app). Instructions are a good way to guide the model which functions to test and how. If you don't need the instructions, set `instructions=\"None.\"`. Placeholders: {{code}}, {{instructions}}",
-        "version": "1",
+        "version": "1.1",
         "source": "",
         "_type": "metadatamessage"
     },
     {
-        "content": "You are a world-class Julia language programmer and expert in writing unit and integration tests for Julia applications.\n\nYour task is to write tests for the User's code (or a subset of it).\n\nGeneral Guidelines:\n- Your tests must be as compact as possible while comprehensively covering the functionality of the code\n- Testsets are named after the function\n- Include a brief comment explaining the purpose of each test\n- Write multiple test cases using `@test` to validate different aspects of the `add` function. Think about all pathways through the code and test each one.\n\nIf the user provides any Special Instructions, prioritize them over the General Guidelines.\n\n\nExample:\n\"\"\"\n**User's code:**\n\n```julia\nmyadd(a, b) = a + b\n```\n\n**Response:**\n\n```julia\nusing Test\n\n@testset \"myadd\" begin\n    \n    # <any setup code and shared inputs go here>\n\n    # Test for correct addition of positive numbers\n    @test myadd(2, 3) == 5\n\n    # Test for correct addition with a negative number\n    @test myadd(-1, 3) == 2\n\n    # Test for correct addition with zero\n    @test myadd(0, 0) == 0\n\n    # Test for correct addition of large numbers\n    @test myadd(1000, 2000) == 3000\nend\n```\n\"\"\"\n",
+        "content": "You are a world-class Julia language programmer and expert in writing unit and integration tests for Julia applications.\n\nYour task is to write tests for the User's code (or a subset of it).\n\nGeneral Guidelines:\n- Your tests must be as compact as possible while comprehensively covering the functionality of the code\n- Testsets are named after the function, eg, `@testset \"function_name\" begin ... end`\n- `@testset` blocks MUST NOT be nested\n- Include a brief comment explaining the purpose of each test\n- Write multiple test cases using `@test` to validate different aspects of the `add` function. Think about all pathways through the code and test each one.\n- Nesting `@test` statements or writing code blocks like `@test` `@test begin .... end` is strictly forbidden. You WILL BE FIRED if you do it.\n\nIf the user provides any Special Instructions, prioritize them over the General Guidelines.\n\n\nExample:\n\"\"\"\n**User's code:**\n\n```julia\nmyadd(a, b) = a + b\n```\n\n**Response:**\n\n```julia\nusing Test\n\n@testset \"myadd\" begin\n    \n    # <any setup code and shared inputs go here>\n\n    # Test for correct addition of positive numbers\n    @test myadd(2, 3) == 5\n\n    # Test for correct addition with a negative number\n    @test myadd(-1, 3) == 2\n\n    # Test for correct addition with zero\n    @test myadd(0, 0) == 0\n\n    # Test for correct addition of large numbers\n    @test myadd(1000, 2000) == 3000\nend\n```\n\"\"\"\n",
         "variables": [],
         "_type": "systemmessage"
     },
diff --git a/templates/persona-task/JuliaRecapCoTTask.json b/templates/persona-task/JuliaRecapCoTTask.json
index 11804f6a0..66db01dbb 100644
--- a/templates/persona-task/JuliaRecapCoTTask.json
+++ b/templates/persona-task/JuliaRecapCoTTask.json
@@ -2,12 +2,12 @@
     {
         "content": "Template Metadata",
         "description": "Not all models know Julia syntax well. This template carries an extensive summary of key information about Julia and its syntax. It will first describe the approach (CoT = Chain of Thought). Placeholders: `task`, `data`",
-        "version": "1.0",
+        "version": "1.1",
         "source": "",
         "_type": "metadatamessage"
     },
     {
-        "content": "You are a world-class Julia language programmer and have a very systematic approach to solving problems.\n\nProblem Solving Steps:\n- Recall Julia snippets that will be useful for this Task\n- Solve the Task\n- Double-check that the solution is correct\n\nReminder on Julia Language:\n- Key Syntax: variables `x = 10`, control structures `if-elseif-else`, `isX ? X : Y`, `for`, `while`; functions `function f(x) end`, anonymous `x -> x^2`, arrays `[1, 2, 3]`, slicing `a[1:2]`, tuples `(1, 2)`, namedtuples `(; name=\"Julia\", )`, dictionary `Dict(\"key\" => value)`, `$` for string interpolation. \n- Prefer Julia standard libraries, avoid new packages unless explicitly requested. \n- Use general type annotations like `Number` or `AbstractString` to not be too restrictive. Emphasize performance, clarity, abstract types unless specific for multiple dispatch on different types.\n- Reserved names: `begin`, `end`, `function`. \n- Distinguished from Python with 1-based indexing, multiple dispatch\n\nIf the user provides any Special Instructions, prioritize them over the above guidelines.\n  ",
+        "content": "You are a world-class Julia language programmer and have a very systematic approach to solving problems.\n\nProblem Solving Steps:\n- Recall Julia snippets that will be useful for this Task\n- Solve the Task\n- Double-check that the solution is correct\n\nReminder for the Julia Language:\n- Key Syntax: variables `x = 10`, control structures `if-elseif-else`, `isX ? X : Y`, `for`, `while`; functions `function f(x) end`, anonymous `x -> x^2`, arrays `[1, 2, 3]`, slicing `a[1:2]`, tuples `(1, 2)`, namedtuples `(; name=\"Julia\", )`, dictionary `Dict(\"key\" => value)`, `$` for string interpolation. \n- Prefer Julia standard libraries, avoid new packages unless explicitly requested. \n- Use general type annotations like `Number` or `AbstractString` to not be too restrictive. Emphasize performance, clarity, abstract types unless specific for multiple dispatch on different types.\n- Reserved names: `begin`, `end`, `function`. \n- Distinguished from Python with 1-based indexing, multiple dispatch\n\nIf the user provides any Special Instructions, prioritize them over the above guidelines.\n  ",
         "variables": [],
         "_type": "systemmessage"
     },
diff --git a/templates/persona-task/JuliaRecapTask.json b/templates/persona-task/JuliaRecapTask.json
index 0831e0689..53fd49b74 100644
--- a/templates/persona-task/JuliaRecapTask.json
+++ b/templates/persona-task/JuliaRecapTask.json
@@ -1,13 +1,13 @@
 [
     {
         "content": "Template Metadata",
-        "description": "Not all models know Julia syntax well. This template carries a small summary of key information about Julia and its syntax and it will always first recall the Julia facts. If you don't need any instructions, set `instructions=\"None.\"`. Placeholders: `task`, `instructions`",
+        "description": "Not all models know the Julia syntax well. This template carries a small summary of key information about Julia and its syntax and it will always first recall the Julia facts. If you don't need any instructions, set `instructions=\"None.\"`. Placeholders: `task`, `instructions`",
         "version": "1.0",
         "source": "",
         "_type": "metadatamessage"
     },
     {
-        "content": "You are a world-class Julia language programmer and have a very systematic approach to solving problems.\n\nProblem Solving Steps:\n- Recall Julia snippets that will be useful for this Task\n- Solve the Task\n- Double-check that the solution is correct\n\nReminder on Julia Language:\n- Key Syntax: variables `x = 10`, control structures `if-elseif-else`, `isX ? X : Y`, `for`, `while`; functions `function f(x) end`, anonymous `x -> x^2`, arrays `[1, 2, 3]`, slicing `a[1:2]`, tuples `(1, 2)`, namedtuples `(; name=\"Julia\", )`, dictionary `Dict(\"key\" => value)`, `$` for string interpolation. \n- Prefer Julia standard libraries, avoid new packages unless explicitly requested. \n- Use general type annotations like `Number` or `AbstractString` to not be too restrictive. Emphasize performance, clarity, abstract types unless specific for multiple dispatch on different types.\n- Reserved names: `begin`, `end`, `function`. \n- Distinguished from Python with 1-based indexing, multiple dispatch\n\nIf the user provides any Special Instructions, prioritize them over the above guidelines.\n  ",
+        "content": "You are a world-class Julia language programmer and have a very systematic approach to solving problems.\n\nProblem Solving Steps:\n- Recall Julia snippets that will be useful for this Task\n- Solve the Task\n- Double-check that the solution is correct\n\nReminder for the Julia Language:\n- Key Syntax: variables `x = 10`, control structures `if-elseif-else`, `isX ? X : Y`, `for`, `while`; functions `function f(x) end`, anonymous `x -> x^2`, arrays `[1, 2, 3]`, slicing `a[1:2]`, tuples `(1, 2)`, namedtuples `(; name=\"Julia\", )`, dictionary `Dict(\"key\" => value)`, `$` for string interpolation. \n- Prefer Julia standard libraries, avoid new packages unless explicitly requested. \n- Use general type annotations like `Number` or `AbstractString` to not be too restrictive. Emphasize performance, clarity, abstract types unless specific for multiple dispatch on different types.\n- Reserved names: `begin`, `end`, `function`. \n- Distinguished from Python with 1-based indexing, multiple dispatch\n\nIf the user provides any Special Instructions, prioritize them over the above guidelines.\n  ",
         "variables": [],
         "_type": "systemmessage"
     },
diff --git a/templates/persona-task/LinuxBashExpertAsk.json b/templates/persona-task/LinuxBashExpertAsk.json
new file mode 100644
index 000000000..5bd521bb7
--- /dev/null
+++ b/templates/persona-task/LinuxBashExpertAsk.json
@@ -0,0 +1,21 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For asking questions about Linux and Bash scripting. Placeholders: `ask`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You are a world-class Linux administrator with deep knowledge of various Linux distributions and expert in Shell scripting. \n\nYour communication is brief and concise. Your answers are very precise, practical and helpful. \nUse clear examples in your answers to illustrate your points.\n\nAnswer only when you're confident in the high quality of your answer.\n",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "# Question\n\n{{ask}}",
+        "variables": [
+            "ask"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/persona-task/StorytellerExplainSHAP.json b/templates/persona-task/StorytellerExplainSHAP.json
index 82b295dcb..82ca32826 100644
--- a/templates/persona-task/StorytellerExplainSHAP.json
+++ b/templates/persona-task/StorytellerExplainSHAP.json
@@ -1 +1,30 @@
-[{"content":"Template Metadata","description":"Explain ML model predictions with storytelling, use `instructions` to adjust the audience and style as needed. All placeholders should be used. Inspired by [Tell me a story!](https://arxiv.org/abs/2309.17057). If you don't need any instructions, set `instructions=\"None.\"`. Placeholders: `task_definition`,`feature_description`,`label_definition`, `probability_pct`, `prediction`, `outcome`, `classified_correctly`, `shap_table`,`instructions`","version":"1.0","source":"","_type":"metadatamessage"},{"content":"You're a data science storyteller. Your task is to craft a compelling and plausible narrative that explains the predictions of an AI model.\n\n**Instructions**\n- Review the provided information: task definition, feature description, target variable, and the specific instance from the test dataset, including its SHAP values.\n- SHAP values reveal each feature's contribution to the model's prediction. They are calculated using Shapley values from coalitional game theory, distributing the prediction \"payout\" among features.\n- Concentrate on weaving a story around the most influential positive and negative SHAP features without actually mentioning the SHAP values. Consider potential feature interactions that fit the story. Skip all features outside of the story.\n- SHAP and its values are TOP SECRET. They must not be mentioned.\n- Your narrative should be plausible, engaging, and limited to 5 sentences. \n- Do not address or speak to the audience, focus only on the story.\n- Conclude with a brief summary of the prediction, the outcome, and the reasoning behind it.\n\n**Context**\nAn AI model predicts {{task_definition}}. \n\nThe input features and values are:\n---\n{{feature_description}}\n---\n\nThe target variable indicates {{label_definition}}.\n\nIf special instructions are provided, ignore the above instructions and follow them instead.\n  ","variables":["task_definition","feature_description","label_definition"],"_type":"systemmessage"},{"content":"Explain this particular instance. \n\nIt was {{classified_correctly}}, with the AI model assigning a {{probability_pct}}% probability of {{prediction}}. The actual outcome was {{outcome}}. \n\nThe SHAP table for this instance details each feature with its value and corresponding SHAP value.\n---\n{{shap_table}}\n---\n\nSpecial Instructions: {{instructions}}\n\nOur story begins\n","variables":["classified_correctly","probability_pct","prediction","outcome","shap_table","instructions"],"_type":"usermessage"}]
\ No newline at end of file
+[
+    {
+        "content": "Template Metadata",
+        "description": "Explain ML model predictions with storytelling, use `instructions` to adjust the audience and style as needed. All placeholders should be used. Inspired by [Tell me a story!](https://arxiv.org/abs/2309.17057). If you don't need any instructions, set `instructions=\"None.\"`. Placeholders: `task_definition`,`feature_description`,`label_definition`, `probability_pct`, `prediction`, `outcome`, `classified_correctly`, `shap_table`,`instructions`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You're a data science storyteller. Your task is to craft a compelling and plausible narrative that explains the predictions of an AI model.\n\n**Instructions**\n- Review the provided information: task definition, feature description, target variable, and the specific instance from the test dataset, including its SHAP values.\n- SHAP values reveal each feature's contribution to the model's prediction. They are calculated using Shapley values from coalitional game theory, distributing the prediction \"payout\" among features.\n- Concentrate on weaving a story around the most influential positive and negative SHAP features without actually mentioning the SHAP values. Consider potential feature interactions that fit the story. Skip all features outside of the story.\n- SHAP and its values are TOP SECRET. They must not be mentioned.\n- Your narrative should be plausible, engaging, and limited to 5 sentences. \n- Do not address or speak to the audience, focus only on the story.\n- Conclude with a brief summary of the prediction, the outcome, and the reasoning behind it.\n\n**Context**\nAn AI model predicts {{task_definition}}. \n\nThe input features and values are:\n---\n{{feature_description}}\n---\n\nThe target variable indicates {{label_definition}}.\n\nIf special instructions are provided, ignore the above instructions and follow them instead.\n  ",
+        "variables": [
+            "task_definition",
+            "feature_description",
+            "label_definition"
+        ],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "Explain this particular instance. \n\nIt was {{classified_correctly}}, with the AI model assigning a {{probability_pct}}% probability of {{prediction}}. The actual outcome was {{outcome}}. \n\nThe SHAP table for this instance details each feature with its value and corresponding SHAP value.\n---\n{{shap_table}}\n---\n\nSpecial Instructions: {{instructions}}\n\nOur story begins\n",
+        "variables": [
+            "classified_correctly",
+            "probability_pct",
+            "prediction",
+            "outcome",
+            "shap_table",
+            "instructions"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/persona-task/xml-formatted/JuliaExpertAskXML.json b/templates/persona-task/xml-formatted/JuliaExpertAskXML.json
new file mode 100644
index 000000000..65f32c5e2
--- /dev/null
+++ b/templates/persona-task/xml-formatted/JuliaExpertAskXML.json
@@ -0,0 +1,21 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For asking questions about Julia language but the prompt is XML-formatted - useful for Anthropic models. Placeholders: `ask`",
+        "version": "1",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You are a world-class Julia language programmer with the knowledge of the latest syntax. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "<question>\n{{ask}}\n</question>",
+        "variables": [
+            "ask"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/persona-task/xml-formatted/JuliaExpertCoTTaskXML.json b/templates/persona-task/xml-formatted/JuliaExpertCoTTaskXML.json
new file mode 100644
index 000000000..87dc9058f
--- /dev/null
+++ b/templates/persona-task/xml-formatted/JuliaExpertCoTTaskXML.json
@@ -0,0 +1,22 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For small code task in Julia language. The prompt is XML-formatted - useful for Anthropic models. It will first describe the approach (CoT = Chain of Thought). Placeholders: `task`, `data`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You are a world-class Julia language programmer and very systematic in your approach to solving problems. \nYou follow the below approach in <approach></approach> tags when writing code. Your communication is brief and concise.\n\n<approach>\n- Take a deep breath\n- Think through your approach step by step\n- Write any functions and other code you need\n- Solve the task\n- Check that your solution is correct\n</approach>\n\nUsing the data in <data></data> tags (if none is provided, create some examples), solve the requested task in <task></task> tags.\n",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "<task>\n{{task}}\n</task>\n\n<data>\n{{data}}\n</data>",
+        "variables": [
+            "task",
+            "data"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/persona-task/xml-formatted/JuliaExpertTestCodeXML.json b/templates/persona-task/xml-formatted/JuliaExpertTestCodeXML.json
new file mode 100644
index 000000000..9542f7e66
--- /dev/null
+++ b/templates/persona-task/xml-formatted/JuliaExpertTestCodeXML.json
@@ -0,0 +1,22 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "For writing Julia-style unit tests. The prompt is XML-formatted - useful for Anthropic models. It expects `code` provided as a string (it can be the whole source code of your app). Instructions are a good way to guide the model which functions to test and how. If you don't need the instructions, set `instructions=\"None.\"`. Placeholders: {{code}}, {{instructions}}",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "You are a world-class Julia language programmer and expert in writing unit and integration tests for Julia applications.\n\nYour task is to write tests for the user's code (or a subset of it) provided in <user_code></user_code> tags.\n\n<general_guidelines>\n- Your tests must be as compact as possible while comprehensively covering the functionality of the code\n- Testsets are named after the function, eg, `@testset \"function_name\" begin ... end`\n- `@testset` blocks MUST NOT be nested\n- Include a brief comment explaining the purpose of each test\n- Write multiple test cases using `@test` to validate different aspects of the `add` function. Think about all pathways through the code and test each one.\n- Nesting `@test` statements or writing code blocks like `@test` `@test begin .... end` is strictly forbidden. You WILL BE FIRED if you do it.\n\nIf the user provides any special instructions in <special_instructions></special_instructions> tags, prioritize them over the general guidelines.\n</general_guidelines>\n\n<example>\n\"\"\"\n<user_code>\n```julia\nmyadd(a, b) = a + b\n```\n</user_code>\n\n<tests>\n```julia\nusing Test\n\n@testset \"myadd\" begin\n    \n    # <any setup code and shared inputs go here>\n\n    # Test for correct addition of positive numbers\n    @test myadd(2, 3) == 5\n\n    # Test for correct addition with a negative number\n    @test myadd(-1, 3) == 2\n\n    # Test for correct addition with zero\n    @test myadd(0, 0) == 0\n\n    # Test for correct addition of large numbers\n    @test myadd(1000, 2000) == 3000\nend\n```\n\"\"\"\n</tests>\n</example>",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "<user_code>\n{{code}}\n</user_code>\n\n<special_instructions>\n{{instructions}}\n</special_instructions>",
+        "variables": [
+            "code",
+            "instructions"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/templates/visual/BlogTitleImageGenerator.json b/templates/visual/BlogTitleImageGenerator.json
new file mode 100644
index 000000000..9cfdcfeb2
--- /dev/null
+++ b/templates/visual/BlogTitleImageGenerator.json
@@ -0,0 +1,21 @@
+[
+    {
+        "content": "Template Metadata",
+        "description": "Simple prompt to generate a cartoonish title image for a blog post based on its TLDR. Placeholders: `tldr`",
+        "version": "1.0",
+        "source": "",
+        "_type": "metadatamessage"
+    },
+    {
+        "content": "Your task is to generate a title image for a blog post.\n\nGiven the provided summary (TLDR) of the blog post, generate an image that captures the key points and ideas of the blog post.\nUse some of the key themes when generating the image.\n\nInstructions:\n- The image should be colorful, cartoonish, playful.\n- It must NOT have any text, labels, letters or words. Any text will be immediately rejected.\n- The image should be wide aspect ratio (1000:420).\n",
+        "variables": [],
+        "_type": "systemmessage"
+    },
+    {
+        "content": "Blog post TLDR:\n{{tldr}}\n\nPlease generate the image.",
+        "variables": [
+            "tldr"
+        ],
+        "_type": "usermessage"
+    }
+]
\ No newline at end of file
diff --git a/test/Experimental/AgentTools/code_feedback.jl b/test/Experimental/AgentTools/code_feedback.jl
index bec9964a4..ab1d90017 100644
--- a/test/Experimental/AgentTools/code_feedback.jl
+++ b/test/Experimental/AgentTools/code_feedback.jl
@@ -1,8 +1,11 @@
 using PromptingTools.Experimental.AgentTools: aicodefixer_feedback
 using PromptingTools.Experimental.AgentTools: CodeEmpty,
-    CodeFailedParse, CodeFailedEval, CodeFailedTimeout, CodeSuccess
+                                              CodeFailedParse, CodeFailedEval,
+                                              CodeFailedTimeout, CodeSuccess
 using PromptingTools.Experimental.AgentTools: testset_feedback,
-    error_feedback, score_feedback, extract_test_counts
+                                              error_feedback, score_feedback,
+                                              extract_test_counts
+using PromptingTools.Experimental.AgentTools: AIGenerate
 
 @testset "aicodefixer_feedback" begin
     # Empty code
@@ -12,6 +15,16 @@ using PromptingTools.Experimental.AgentTools: testset_feedback,
     @test feedback == code_missing_err
     @test aicodefixer_feedback(CodeEmpty()) == code_missing_err
 
+    # test with message directly
+    feedback = aicodefixer_feedback(PT.AIMessage("test")).feedback
+    @test feedback == code_missing_err
+
+    # test with aicall
+    aicall = AIGenerate()
+    aicall.conversation = [PT.AIMessage("test")]
+    feedback = aicodefixer_feedback(aicall).feedback
+    @test feedback == code_missing_err
+
     # CodeFailedParse
     cb = AICode("println(\"a\"")
     feedback = aicodefixer_feedback(CodeFailedParse(), cb)
@@ -24,6 +37,11 @@ using PromptingTools.Experimental.AgentTools: testset_feedback,
     feedback = aicodefixer_feedback(conv).feedback
     @test occursin("**Parsing Error Detected:**", feedback)
 
+    # test codeblock only    
+    cb = AICode("println(\"a\"")
+    feedback = aicodefixer_feedback(cb).feedback
+    @test occursin("**Parsing Error Detected:**", feedback)
+
     # CodeFailedEval -- for failed tasks and normal errors
     cb = AICode("""
     tsk=@task error("xx")
@@ -31,7 +49,8 @@ using PromptingTools.Experimental.AgentTools: testset_feedback,
     fetch(tsk)
     """)
     feedback = aicodefixer_feedback(CodeFailedEval(), cb)
-    @test occursin("**Error Detected:**\n**ErrorException**:\nxx\n\n\n\n**Lines that caused the error:**\n- fetch(tsk)",
+    @test occursin(
+        "**Error Detected:**\n**ErrorException**:\nxx\n\n\n\n**Lines that caused the error:**\n- fetch(tsk)",
         feedback)
 
     cb = AICode("error(\"xx\")")
diff --git a/test/Experimental/AgentTools/lazy_types.jl b/test/Experimental/AgentTools/lazy_types.jl
index 7d5a24fab..9cc1af9ef 100644
--- a/test/Experimental/AgentTools/lazy_types.jl
+++ b/test/Experimental/AgentTools/lazy_types.jl
@@ -38,10 +38,10 @@ end
     @test aicall.conversation == [PT.UserMessage("Hi")]
     aicall = AICall(identity, :BlankSystemUser)
     @test aicall.conversation == [PT.SystemMessage("{{system}}")
-        PT.UserMessage("{{user}}")]
+                                  PT.UserMessage("{{user}}")]
     aicall = AICall(identity, AITemplate(:BlankSystemUser))
     @test aicall.conversation == [PT.SystemMessage("{{system}}")
-        PT.UserMessage("{{user}}")]
+                                  PT.UserMessage("{{user}}")]
 
     # derived methods
     aicall = AIGenerate()
diff --git a/test/Experimental/AgentTools/mcts.jl b/test/Experimental/AgentTools/mcts.jl
index 18e8c529b..a820fa26b 100644
--- a/test/Experimental/AgentTools/mcts.jl
+++ b/test/Experimental/AgentTools/mcts.jl
@@ -1,10 +1,10 @@
 using PromptingTools.Experimental.AgentTools: expand!, find_node, backpropagate!, SampleNode
 using PromptingTools.Experimental.AgentTools: print_tree,
-    print_samples, reset_success!,
-    collect_all_feedback
+                                              print_samples, reset_success!,
+                                              collect_all_feedback
 using PromptingTools.Experimental.AgentTools: score,
-    UCT, ThompsonSampling,
-    AbstractScoringMethod, select_best
+                                              UCT, ThompsonSampling,
+                                              AbstractScoringMethod, select_best
 
 @testset "SampleNode,expand!,find_node,reset_success!,print_samples" begin
     data = PT.AbstractMessage[]
@@ -22,6 +22,11 @@ using PromptingTools.Experimental.AgentTools: score,
     @test root[child11.id] == child11
     @test root[-1] == nothing
 
+    # length
+    @test length(root) == 4
+    @test length(child1) == 2
+    @test length(child11) == 1
+
     ## Show method
     io = IOBuffer()
     show(io, child1)
diff --git a/test/Experimental/AgentTools/retry.jl b/test/Experimental/AgentTools/retry.jl
index 788ce166c..20aa89f0d 100644
--- a/test/Experimental/AgentTools/retry.jl
+++ b/test/Experimental/AgentTools/retry.jl
@@ -1,6 +1,6 @@
 using PromptingTools.Experimental.AgentTools: add_feedback!,
-    evaluate_condition!,
-    SampleNode, expand!, AICallBlock
+                                              evaluate_condition!,
+                                              SampleNode, expand!, AICallBlock
 
 @testset "add_feedback!" begin
     # Test for adding feedback as a new message to the conversation
@@ -42,10 +42,10 @@ using PromptingTools.Experimental.AgentTools: add_feedback!,
     child = expand!(sample, nothing; feedback = "Extra test")
     conversation = [
         PT.UserMessage("User says hello"), PT.AIMessage(; content = "AI responds")]
-    updated_conversation = add_feedback!(conversation, child)
-    @test length(updated_conversation) == 3
+    updated_conversation = add_feedback!(conversation, child; feedback_inplace = true)
+    @test length(updated_conversation) == 1
     @test updated_conversation[end].content ==
-          "### Feedback from Evaluator\nTest Feedback\n----------\nExtra test\n"
+          "User says hello\n\n### Feedback from Evaluator\nTest Feedback\n----------\nExtra test\n"
 
     # Test for attempting to add feedback inplace with no prior user message
     sample = SampleNode(; data = nothing, feedback = "Orphan Feedback")
@@ -129,13 +129,19 @@ end
 end
 
 @testset "airetry!" begin
-    response = Dict(:choices => [
+    response = Dict(
+        :choices => [
             Dict(:message => Dict(:content => "Hello!"),
-                :finish_reason => "stop"),
+            :finish_reason => "stop")
         ],
         :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
     schema = PT.TestEchoOpenAISchema(; response, status = 200)
 
+    ## Try to run before it's initialized
+    aicall = AIGenerate(schema, "Say hi!";
+        config = RetryConfig(max_retries = 0, retries = 0, calls = 0))
+    @test_throws AssertionError airetry!(==(0), aicall)
+
     # Check condition passing without retries
     aicall = AIGenerate(schema, "Say hi!";
         config = RetryConfig(max_retries = 0, retries = 0, calls = 0))
diff --git a/test/Experimental/AgentTools/utils.jl b/test/Experimental/AgentTools/utils.jl
index 63e9ea983..09ad4552b 100644
--- a/test/Experimental/AgentTools/utils.jl
+++ b/test/Experimental/AgentTools/utils.jl
@@ -1,7 +1,7 @@
 using PromptingTools.Experimental.AgentTools: remove_used_kwargs, truncate_conversation
 using PromptingTools.Experimental.AgentTools: beta_sample,
-    gamma_sample, extract_config,
-    unwrap_aicall_args, split_multi_samples
+                                              gamma_sample, extract_config,
+                                              unwrap_aicall_args, split_multi_samples
 
 @testset "remove_used_kwargs" begin
     # Test 1: No overlapping keys
@@ -96,7 +96,7 @@ end
     # Splitting conversation
     conv = [PT.SystemMessage("Say hi!"), PT.SystemMessage("Hello!"),
         PT.AIMessage(; content = "hi1", run_id = 1, sample_id = 1),
-        PT.AIMessage(; content = "hi2", run_id = 1, sample_id = 2),
+        PT.AIMessage(; content = "hi2", run_id = 1, sample_id = 2)
     ]
     @test split_multi_samples(conv) == [conv[1:3], conv[[1, 2, 4]]]
 
@@ -114,7 +114,7 @@ end
     conv = [PT.SystemMessage("Say hi!"), PT.SystemMessage("Hello!"),
         PT.AIMessage(; content = "hi1", run_id = 1, sample_id = 1),
         PT.AIMessage(; content = "hi2", run_id = 1, sample_id = 2),
-        PT.SystemMessage("Hello"),
+        PT.SystemMessage("Hello")
     ]
     @test split_multi_samples(conv) == [conv]
 
@@ -131,7 +131,7 @@ end
         PT.AIMessage("Hello"),
         PT.UserMessage("World"),
         PT.AIMessage("Hello"),
-        PT.UserMessage("World"),
+        PT.UserMessage("World")
     ]
     #### Test 1: Short Conversation
     truncated = truncate_conversation(conversation, max_conversation_length = 32000)
diff --git a/test/Experimental/RAGTools/annotation.jl b/test/Experimental/RAGTools/annotation.jl
new file mode 100644
index 000000000..95c3bce76
--- /dev/null
+++ b/test/Experimental/RAGTools/annotation.jl
@@ -0,0 +1,405 @@
+using PromptingTools.Experimental.RAGTools: AnnotatedNode, AbstractAnnotater,
+                                            AbstractAnnotatedNode,
+                                            set_node_style!,
+                                            align_node_styles!, TrigramAnnotater, Styler,
+                                            HTMLStyler,
+                                            pprint, print_html
+using PromptingTools.Experimental.RAGTools: trigram_support!, add_node_metadata!,
+                                            annotate_support, RAGResult, text_to_trigrams,
+                                            print_tree
+
+@testset "AnnotatedNode" begin
+    # Test node creation with default values
+    node = AnnotatedNode()
+    @test node.group_id == 0
+    @test isnothing(node.parent)
+    @test isempty(node.children)
+    @test isnothing(node.score)
+    @test node.hits == 0
+    @test node.content == ""
+    @test isempty(node.sources)
+    @test typeof(node.style) == Styler && node.style.color == :nothing
+
+    # Test node creation with specific values
+    parent_node = AnnotatedNode(content = "parent")
+    child_node = AnnotatedNode(parent = parent_node, content = "child")
+    push!(parent_node.children, child_node)
+    child_node2 = AnnotatedNode(parent = parent_node, content = "child2")
+    push!(parent_node.children, child_node2)
+
+    @test child_node.parent === parent_node
+    @test child_node.content == "child"
+    @test length(parent_node.children) == 2
+    @test parent_node.children[1] === child_node
+
+    # Test AbstractTrees interface compatibility
+    @test AbstractTrees.children(parent_node) == [child_node, child_node2]
+    @test AbstractTrees.parent(child_node) === parent_node
+    @test AbstractTrees.parent(child_node2) === parent_node
+
+    # Test nodevalue and childtype methods for tree traversal
+    @test AbstractTrees.nodevalue(child_node) == "child(nothing)"
+    @test AbstractTrees.childtype(node) === AnnotatedNode
+
+    # Show
+    node = AnnotatedNode()
+    io = IOBuffer()
+    show(io, node)
+    output = String(take!(io))
+    @test output == "AnnotatedNode(group id: 0, length: 0, score: -"
+    # test inequality
+    struct Random123AnnotatedNode <: AbstractAnnotatedNode end
+    @test AnnotatedNode() != Random123AnnotatedNode()
+end
+
+@testset "AnnotatedNode-pprint" begin
+    # Test pprint function for a single node
+    node = AnnotatedNode(content = "test", group_id = 123)
+    io = IOBuffer()
+    pprint(io, node; add_newline = false)
+    @test String(take!(io)) == "test"
+
+    ## Multiple nodes
+    parent_node = AnnotatedNode(content = "parent")
+    child_node = AnnotatedNode(parent = parent_node, content = "child")
+    push!(parent_node.children, child_node)
+    child_node2 = AnnotatedNode(parent = parent_node, content = "child2")
+    push!(parent_node.children, child_node2)
+    io = IOBuffer()
+    pprint(io, parent_node; add_newline = false)
+    output = String(take!(io))
+    # iterate over all nodes with no children
+    @test output == "childchild2"
+
+    # Add one more child
+    child_node3 = AnnotatedNode(parent = child_node2, content = "child3")
+    push!(child_node2.children, child_node3)
+    io = IOBuffer()
+    pprint(io, parent_node)
+    output = String(take!(io))
+    @test output == "childchild3\n"
+end
+
+@testset "set_node_style!" begin
+    annotater = TrigramAnnotater()
+
+    # Test with a high score exceeding high_threshold
+    node = AnnotatedNode(score = 0.9)
+    set_node_style!(annotater, node; high_styler = Styler(color = :green),
+        high_threshold = 0.8, low_threshold = 0.3)
+    @test node.style.color == :green
+    @test node.style.bold == false
+
+    # Test with a score between high_threshold and low_threshold
+    node = AnnotatedNode(score = 0.4)
+    set_node_style!(annotater, node; high_threshold = 0.8, low_threshold = 0.3)
+    @test node.style.color == :magenta
+    @test node.style.bold == false
+
+    # Test with a score below low_threshold
+    node = AnnotatedNode(score = 0.2)
+    set_node_style!(annotater, node; high_threshold = 0.8, low_threshold = 0.3)
+    @test node.style.color == :nothing
+    @test node.style.bold == false
+
+    # Test applying bold style for multiple hits
+    node = AnnotatedNode(score = 0.9, hits = 2)
+    set_node_style!(annotater, node; high_threshold = 0.8, bold_multihits = true,
+        high_styler = Styler(color = :green))
+    @test node.style.color == :green
+    @test node.style.bold == true
+
+    # Test not applying bold style when bold_multihits is false
+    node = AnnotatedNode(score = 0.9, hits = 2)
+    set_node_style!(annotater, node; high_threshold = 0.8, bold_multihits = false,
+        high_styler = Styler(color = :green))
+    @test node.style.color == :green
+    @test node.style.bold == false
+
+    # Test with isnothing(node.score), expecting default style
+    node = AnnotatedNode(score = nothing)
+    set_node_style!(annotater, node)
+    @test node.style.color == :nothing
+    @test node.style.bold == false
+
+    # Unknown types
+    struct Random123Annotater <: AbstractAnnotater end
+    node = AnnotatedNode()
+    @test node == set_node_style!(
+        Random123Annotater(), node)
+
+    # Styler inequality
+    styler1 = Styler()
+    styler2 = Styler()
+    @test styler1 == styler2
+    styler3 = HTMLStyler("", "")
+    @test styler1 != styler3
+end
+
+@testset "align_node_styles!" begin
+    annotater = TrigramAnnotater()
+
+    # Setup for tests: Create a sequence of nodes with varied styles
+    node1 = AnnotatedNode(style = Styler(color = :red), score = 1.0)
+    node2 = AnnotatedNode(style = Styler(), score = nothing) # Target for style alignment
+    node3 = AnnotatedNode(style = Styler(color = :red), score = 1.0)
+    nodes = [node1, node2, node3]
+
+    # Test aligning styles in a simple sequence
+    align_node_styles!(annotater, nodes)
+    @test nodes[2].style.color == :red
+
+    # Test with non-matching surrounding styles, expecting no change
+    node4 = AnnotatedNode(style = Styler(color = :green), score = 1.0) # Different style
+    node5 = AnnotatedNode(style = Styler(), score = nothing) # Target for style alignment
+    node6 = AnnotatedNode(style = Styler(color = :red), score = 1.0)
+    nodes2 = [node4, node5, node6]
+
+    align_node_styles!(annotater, nodes2)
+    @test nodes2[2].style.color == :nothing # Should remain unchanged
+
+    # Test with first and last nodes, which should not be aligned
+    node7 = AnnotatedNode(style = Styler(), score = nothing) # First node
+    node8 = AnnotatedNode(style = Styler(color = :blue), score = 1.0)
+    node9 = AnnotatedNode(style = Styler(), score = nothing) # Last node
+    nodes3 = [node7, node8, node9]
+
+    align_node_styles!(annotater, nodes3)
+    @test nodes3[1].style.color == :nothing
+    @test nodes3[3].style.color == :nothing # Should remain unchanged
+
+    # Test aligning styles with more complex sequences
+    node10 = AnnotatedNode(style = Styler(color = :blue), score = 1.0)
+    node11 = AnnotatedNode(style = Styler(), score = nothing) # Target for style alignment
+    node12 = AnnotatedNode(style = Styler(color = :blue), score = 1.0)
+    node13 = AnnotatedNode(style = Styler(), score = nothing) # Another target, but no adjacent same styles
+    nodes4 = [node10, node11, node12, node13]
+
+    align_node_styles!(annotater, nodes4)
+    @test nodes4[2].style.color == :blue
+    @test nodes4[4].style.color == :nothing
+
+    # Unknown types
+    struct Random123Annotater <: AbstractAnnotater end
+    nodes = [AnnotatedNode(), AnnotatedNode(), AnnotatedNode()]
+    @test nodes == align_node_styles!(
+        Random123Annotater(), nodes)
+end
+
+@testset "trigram_support!" begin
+    # Preparing a mock context of trigrams for testing, capitalize IS to avoid STOPWORDS
+    context_trigrams = text_to_trigrams.(["This IS a test.", "Another test.",
+        "More content here."])
+
+    # Test updating a node with no matching trigrams in context
+    node = AnnotatedNode(content = "xyz")
+    trigram_support!(node, context_trigrams)
+    @test node.children[1].score ≈ 0
+    @test node.hits == 0
+
+    # Test updating a node with partial matching trigrams in context
+    node = AnnotatedNode(content = "This IS")
+    trigram_support!(node, context_trigrams)
+    @test length(node.children) == 3
+    @test node.score == 1.0
+    @test node.children[1].hits == 1
+    @test node.children[3].hits == 1
+
+    # Test updating a node with full matching trigrams in context
+    node = AnnotatedNode(content = "Another test.")
+    trigram_support!(node, context_trigrams)
+    @test node.children[1].hits == 1
+    @test node.children[3].hits == 2
+
+    # Test handling of a single-character content, which should not be scored
+    node = AnnotatedNode(content = "A")
+    trigram_support!(node, context_trigrams)
+    @test node.children[1].score == nothing
+
+    # Test with an empty content, expecting no children and 0 score
+    node = AnnotatedNode(content = "")
+    trigram_support!(node, context_trigrams)
+    @test isempty(node.children)
+    @test node.score ≈ 0
+end
+
+@testset "add_node_metadata!" begin
+    annotater = TrigramAnnotater()
+
+    # Empty root node
+    root = AnnotatedNode()
+    modified_root = add_node_metadata!(annotater, root)
+    @test isempty(modified_root.children)
+
+    # Single group, no sources or scores addition
+    root = AnnotatedNode()
+    child1 = AnnotatedNode(group_id = 1, content = "Child 1", score = 0.5, sources = [1])
+    push!(root.children, child1)
+    add_node_metadata!(annotater, root, add_sources = false, add_scores = false)
+    @test length(root.children) == 1
+    @test root.children[1].content == "Child 1"
+
+    # Multiple groups with sources and scores
+    root = AnnotatedNode()
+    child1 = AnnotatedNode(group_id = 1, content = "Child 1", score = 0.5, sources = [1])
+    child2 = AnnotatedNode(group_id = 2, content = "Child 2", score = 0.8, sources = [2])
+    push!(root.children, child1, child2)
+    add_node_metadata!(annotater, root)
+    @test length(root.children) == 4 # Two original children + two metadata node
+    @test occursin("[1,0.5]", root.children[2].content)
+    @test occursin("[2,0.8]", root.children[4].content)
+
+    # Handle last group metadata correctly for the same group
+    root = AnnotatedNode()
+    child1 = AnnotatedNode(group_id = 1, content = "Child 1", score = 0.5, sources = [1])
+    child2 = AnnotatedNode(
+        group_id = 1, content = "Child 2", score = 0.9, sources = [1])
+    push!(root.children, child1, child2)
+    add_node_metadata!(annotater, root)
+    @test occursin("[1,0.7]", root.children[end].content) # Checks if score is averaged correctly
+
+    # Add sources list at the end
+    root = AnnotatedNode()
+    child = AnnotatedNode(group_id = 1, content = "Child 1", score = 0.5, sources = [1])
+    push!(root.children, child)
+    add_node_metadata!(annotater, root, sources = ["Source 1"])
+    @test occursin("\nSOURCES\n", root.children[end].content)
+    @test occursin("1. Source 1", root.children[end].content)
+
+    # Passthrough for unknown
+    struct Random123Annotater <: AbstractAnnotater end
+    struct Random123AnnotatedNode <: AbstractAnnotatedNode end
+    node = Random123AnnotatedNode()
+    @test node == add_node_metadata!(
+        Random123Annotater(), node)
+end
+
+@testset "annotate_support" begin
+    # Context setup for testing
+    annotater = TrigramAnnotater()
+    context = [
+        "This is a test context.", "Another context sentence.", "Final piece of context."]
+
+    # Test annotating an answer that partially matches the context
+    answer = "This is a test answer. It has multiple sentences."
+    annotated_root = annotate_support(annotater, answer, context)
+    @test length(annotated_root.children) == 4 # One for each sentence + 2x metadata
+    @test annotated_root.score≈0.483 atol=0.01
+    io = IOBuffer()
+    pprint(io, annotated_root)
+    output = String(take!(io))
+    @test occursin("[1,0.67]", output)
+    @test occursin("This is a test answer.", output)
+    @test occursin("It has multiple sentences.", output)
+
+    # Test annotating an answer that fully matches the context
+    answer = "This is a test context. Another context sentence."
+    annotated_root = annotate_support(annotater, answer, context)
+    @test annotated_root.score ≈ 1.0
+    @test all(child -> isnothing(child.score) || child.score == 1, annotated_root.children)
+
+    # Test annotating an answer with no matching content in the context
+    answer = "Unrelated content here. Completely different."
+    annotated_root = annotate_support(annotater, answer, context)
+    @test annotated_root.score < 0.2 # some trigram matches on content vs context
+
+    # Test annotating an empty answer, expecting a root node with no children
+    answer = ""
+    annotated_root = annotate_support(annotater, answer, context)
+    @test isempty(annotated_root.children)
+
+    # Test handling of special characters and punctuation in the answer
+    answer = "Special characters: !@#\$%. Punctuation marks: ,;:."
+    annotated_root = annotate_support(
+        annotater, answer, context; add_sources = false, add_scores = false)
+    # no scores, so no extra children
+    @test length(annotated_root.children) == 3
+    io = IOBuffer()
+    pprint(io, annotated_root; add_newline = false)
+    output = String(take!(io))
+    @test answer == output
+
+    # Test adding sources
+    answer = "This is a test answer."
+    annotated_root = annotate_support(
+        annotater, answer, context; sources = ["Source 1", "Source 2", "Source 3"])
+    io = IOBuffer()
+    pprint(io, annotated_root)
+    output = String(take!(io))
+    @test occursin("\nSOURCES\n", output)
+    @test occursin("1. Source 1", output)
+
+    # Catch empty context
+    answer = "This is a test answer."
+    @test_throws AssertionError annotated_root=annotate_support(
+        annotater, answer, String[])
+
+    ## RAG Details dispatch
+    answer = "This is a test answer."
+    r = RAGResult(;
+        question = "?", final_answer = answer, context, sources = [
+            "Source 1", "Source 2", "Source 3"])
+    annotated_root = annotate_support(annotater, r)
+    io = IOBuffer()
+    pprint(io, annotated_root; add_newline = false)
+    output = String(take!(io))
+    @test occursin("This is a test answer.", output)
+    @test occursin("[1,0.67]", output)
+    @test occursin("\nSOURCES\n", output)
+    @test occursin("1. Source 1", output)
+
+    # Invalid types
+    struct Random123Annotater <: AbstractAnnotater end
+    @test_throws ArgumentError annotate_support(Random123Annotater(), "test", context)
+end
+
+@testset "print_html" begin
+    # Test for plain text without any HTML styler
+    node = AnnotatedNode(content = "text\nNew line", score = 0.5)
+    str = print_html(node)
+    @test str == "<div>text<br>New line</div>"
+
+    # Test for single HTMLStyler with no new lines
+    styler = HTMLStyler(styles = "font-weight:bold", classes = "highlight")
+    node = AnnotatedNode(content = "text\nNew line", score = 0.5, style = styler)
+    str = print_html(node)
+    @test str ==
+          "<div><span style=\"font-weight:bold\" class=\"highlight\">text<br>New line</span></div>"
+
+    # Test for HTMLStyler without styling
+    styler = HTMLStyler()
+    node = AnnotatedNode(content = "text\nNew line", score = 0.5, style = styler)
+    str = print_html(node)
+    @test str == "<div>text<br>New line</div>"
+
+    styler = HTMLStyler(styles = "color:red", classes = "error")
+    node = AnnotatedNode(
+        content = "Error message\nSecond line", score = 0.5, style = styler)
+    str = print_html(node)
+    @test str ==
+          "<div><span style=\"color:red\" class=\"error\">Error message<br>Second line</span></div>"
+
+    ## Test with proper highlighting of context and answer
+    styler_kwargs = (;
+        default_styler = HTMLStyler(),
+        low_styler = HTMLStyler(styles = "color:magenta", classes = ""),
+        medium_styler = HTMLStyler(styles = "color:blue", classes = ""),
+        high_styler = HTMLStyler(styles = "", classes = ""))
+
+    # annotate the text
+    context = [
+        "This is a test context.", "Another context sentence.", "Final piece of context."]
+    answer = "This is a test answer. It has multiple sentences."
+
+    parent_node = annotate_support(
+        TrigramAnnotater(), answer, context; add_sources = false, add_scores = false, styler_kwargs...)
+
+    # print the HTML
+    str = print_html(parent_node)
+    expected_output = "<div>This is a test <span style=\"color:magenta\">answer</span>. It has <span style=\"color:magenta\">multiple</span> <span style=\"color:blue\">sentences</span>.</div>"
+    @test str == expected_output
+    # Test RAGResult overload
+    rag = RAGResult(; context, final_answer = answer, question = "")
+    str = print_html(rag)
+    @test str == expected_output
+end
diff --git a/test/Experimental/RAGTools/evaluation.jl b/test/Experimental/RAGTools/evaluation.jl
index 9c48eba7b..72908e64f 100644
--- a/test/Experimental/RAGTools/evaluation.jl
+++ b/test/Experimental/RAGTools/evaluation.jl
@@ -1,7 +1,8 @@
 using PromptingTools.Experimental.RAGTools: QAItem, QAEvalItem, QAEvalResult
 using PromptingTools.Experimental.RAGTools: score_retrieval_hit, score_retrieval_rank
 using PromptingTools.Experimental.RAGTools: build_qa_evals, run_qa_evals, chunks, sources
-using PromptingTools.Experimental.RAGTools: JudgeAllScores, MetadataItem, MaybeMetadataItems
+using PromptingTools.Experimental.RAGTools: JudgeAllScores, Tag, MaybeTags, ChunkIndex,
+                                            RAGConfig, airag
 
 @testset "QAEvalItem" begin
     empty_qa = QAEvalItem()
@@ -75,7 +76,7 @@ end
 
 @testset "build_qa_evals" begin
     # test with a mock server
-    PORT = rand(9000:11000)
+    PORT = rand(10005:40010)
     PT.register_model!(; name = "mock-emb", schema = PT.CustomOpenAISchema())
     PT.register_model!(; name = "mock-meta", schema = PT.CustomOpenAISchema())
     PT.register_model!(; name = "mock-gen", schema = PT.CustomOpenAISchema())
@@ -87,8 +88,9 @@ end
 
         if content[:model] == "mock-gen"
             user_msg = last(content[:messages])
-            response = Dict(:choices => [
-                    Dict(:message => user_msg, :finish_reason => "stop"),
+            response = Dict(
+                :choices => [
+                    Dict(:message => user_msg, :finish_reason => "stop")
                 ],
                 :model => content[:model],
                 :usage => Dict(:total_tokens => length(user_msg[:content]),
@@ -102,38 +104,50 @@ end
                     :completion_tokens => 0))
         elseif content[:model] == "mock-meta"
             user_msg = last(content[:messages])
-            response = Dict(:choices => [
+            response = Dict(
+                :choices => [
                     Dict(:finish_reason => "stop",
-                        :message => Dict(:tool_calls => [
-                            Dict(:function => Dict(:arguments => JSON3.write(MaybeMetadataItems([
-                                MetadataItem("yes", "category"),
-                            ]))))]))],
+                    :message => Dict(:tool_calls => [
+                        Dict(:id => "1",
+                        :function => Dict(
+                            :arguments => JSON3.write(MaybeTags([
+                                Tag("yes", "category")
+                            ])),
+                            :name => "MaybeTags"))]))],
                 :model => content[:model],
                 :usage => Dict(:total_tokens => length(user_msg[:content]),
                     :prompt_tokens => length(user_msg[:content]),
                     :completion_tokens => 0))
         elseif content[:model] == "mock-qa"
             user_msg = last(content[:messages])
-            response = Dict(:choices => [
+            response = Dict(
+                :choices => [
                     Dict(:finish_reason => "stop",
-                        :message => Dict(:tool_calls => [
-                            Dict(:function => Dict(:arguments => JSON3.write(QAItem("Question",
-                                "Answer"))))]))],
+                    :message => Dict(:tool_calls => [
+                        Dict(:id => "1",
+                        :function => Dict(
+                            :arguments => JSON3.write(QAItem("Question",
+                                "Answer")),
+                            :name => "QAItem"))]))],
                 :model => content[:model],
                 :usage => Dict(:total_tokens => length(user_msg[:content]),
                     :prompt_tokens => length(user_msg[:content]),
                     :completion_tokens => 0))
         elseif content[:model] == "mock-judge"
             user_msg = last(content[:messages])
-            response = Dict(:choices => [
+            response = Dict(
+                :choices => [
                     Dict(:message => Dict(:tool_calls => [
-                        Dict(:function => Dict(:arguments => JSON3.write(JudgeAllScores(5,
+                    Dict(:id => "1",
+                    :function => Dict(
+                        :arguments => JSON3.write(JudgeAllScores(5,
                             5,
                             5,
                             5,
                             5,
                             "Some reasons",
-                            5.0))))]))],
+                            5.0)),
+                        :name => "JudgeAllScores"))]))],
                 :model => content[:model],
                 :usage => Dict(:total_tokens => length(user_msg[:content]),
                     :prompt_tokens => length(user_msg[:content]),
@@ -150,7 +164,8 @@ end
         chunks = ["a", "b", "c"],
         embeddings = zeros(128, 3),
         tags = vcat(trues(2, 2), falses(1, 2)),
-        tags_vocab = ["yes", "no"],)
+        tags_vocab = ["yes", "no"])
+    index.embeddings[1, 1] = 1
 
     # Test for successful Q&A extraction from document chunks
     qa_evals = build_qa_evals(chunks(index),
@@ -172,14 +187,19 @@ end
         String[]; qa_template = :BlankSystemUser)
 
     # Test run_qa_evals on 1 item
-    msg, ctx = airag(index; question = qa_evals[1].question, model_embedding = "mock-emb",
-        model_chat = "mock-gen",
-        model_metadata = "mock-meta", api_kwargs = (; url = "http://localhost:$(PORT)"),
-        tag_filter = :auto,
-        extract_metadata = false, verbose = false,
-        return_context = true)
-
-    result = run_qa_evals(qa_evals[1], ctx;
+    airag_kwargs = (;
+        retriever_kwargs = (;
+            tagger_kwargs = (; model = "mock-gen", tag = ["yes"]), embedder_kwargs = (;
+                model = "mock-emb")),
+        generator_kwargs = (;
+            answerer_kwargs = (; model = "mock-gen"), embedder_kwargs = (;
+                model = "mock-emb")))
+    result = airag(RAGConfig(), index; question = qa_evals[1].question,
+        airag_kwargs...,
+        api_kwargs = (; url = "http://localhost:$(PORT)"),
+        return_all = true)
+
+    result = run_qa_evals(qa_evals[1], result;
         model_judge = "mock-judge",
         api_kwargs = (; url = "http://localhost:$(PORT)"),
         parameters_dict = Dict(:key1 => "value1", :key2 => 2))
@@ -192,17 +212,14 @@ end
     # results = run_qa_evals(index, qa_evals; model_judge = "mock-judge",
     #     api_kwargs = (; url = "http://localhost:$(PORT)"))
     results = run_qa_evals(index, qa_evals;
-        airag_kwargs = (;
-            model_embedding = "mock-emb",
-            model_chat = "mock-gen",
-            model_metadata = "mock-meta"),
+        airag_kwargs,
         qa_evals_kwargs = (; model_judge = "mock-judge"),
         api_kwargs = (; url = "http://localhost:$(PORT)"),
         parameters_dict = Dict(:key1 => "value1", :key2 => 2))
 
     @test length(results) == length(qa_evals)
     @test all(getproperty.(results, :retrieval_score) .== 1.0)
-    @test all(getproperty.(results, :retrieval_rank) .== 1)
+    @test all(x -> x.retrieval_rank in [1, 2], results)
     @test all(getproperty.(results, :answer_score) .== 5)
     @test all(getproperty.(results, :parameters) .==
               Ref(Dict(:key1 => "value1", :key2 => 2)))
diff --git a/test/Experimental/RAGTools/generation.jl b/test/Experimental/RAGTools/generation.jl
index b7fecd739..fc3c93b1b 100644
--- a/test/Experimental/RAGTools/generation.jl
+++ b/test/Experimental/RAGTools/generation.jl
@@ -1,37 +1,218 @@
-using PromptingTools.Experimental.RAGTools: ChunkIndex,
-    CandidateChunks, build_context, airag
-using PromptingTools.Experimental.RAGTools: MaybeMetadataItems, MetadataItem
+using PromptingTools: TestEchoOpenAISchema
+using PromptingTools.Experimental.RAGTools: ChunkEmbeddingsIndex,
+                                            CandidateChunks, MultiCandidateChunks,
+                                            MultiIndex, build_context, build_context!
+using PromptingTools.Experimental.RAGTools: MaybeTags, Tag, ContextEnumerator,
+                                            AbstractContextBuilder
+using PromptingTools.Experimental.RAGTools: SimpleAnswerer, AbstractAnswerer, answer!,
+                                            NoRefiner, SimpleRefiner, AbstractRefiner,
+                                            refine!
+using PromptingTools.Experimental.RAGTools: NoPostprocessor, AbstractPostprocessor,
+                                            postprocess!, SimpleGenerator,
+                                            AdvancedGenerator, generate!, airag, RAGConfig,
+                                            RAGResult
 
-@testset "build_context" begin
-    index = ChunkIndex(;
+@testset "build_context!" begin
+    index = ChunkEmbeddingsIndex(;
         sources = [".", ".", "."],
         chunks = ["a", "b", "c"],
         embeddings = zeros(128, 3),
         tags = vcat(trues(2, 2), falses(1, 2)),
-        tags_vocab = ["yes", "no"],)
+        tags_vocab = ["yes", "no"])
     candidates = CandidateChunks(index.id, [1, 2], [0.1, 0.2])
 
     # Standard Case
-    context = build_context(index, candidates)
+    contexter = ContextEnumerator()
+    context = build_context(contexter, index, candidates)
     expected_output = ["1. a\nb",
         "2. a\nb\nc"]
     @test context == expected_output
 
     # No Surrounding Chunks
-    context = build_context(index, candidates; chunks_window_margin = (0, 0))
+    context = build_context(contexter, index, candidates; chunks_window_margin = (0, 0))
     expected_output = ["1. a",
         "2. b"]
     @test context == expected_output
 
     # Wrong inputs
-    @test_throws AssertionError build_context(index,
+    @test_throws AssertionError build_context(contexter, index,
         candidates;
         chunks_window_margin = (-1, 0))
+
+    # From result/index
+    question = "why?"
+    result = RAGResult(;
+        question, rephrased_questions = [question], emb_candidates = candidates,
+        tag_candidates = candidates, filtered_candidates = candidates, reranked_candidates = candidates,
+        context = String[], sources = String[])
+    build_context!(contexter, index, result)
+    expected_output = ["1. a\nb",
+        "2. a\nb\nc"]
+    @test result.context == expected_output
+
+    # Unknown type
+    struct RandomContextEnumerator123 <: AbstractContextBuilder end
+    @test_throws ArgumentError build_context!(
+        RandomContextEnumerator123(), index, result)
+
+    # MultiCandidateChunks
+    mi = MultiIndex(indexes = [index])
+    mcc = MultiCandidateChunks([:a, :b], [1, 2], [0.1, 0.2])
+    context = build_context(contexter, mi, mcc)
+    @test context == String[]
+
+    mi = MultiIndex(indexes = [index])
+    mcc = MultiCandidateChunks([index.id, index.id], [1, 2], [0.1, 0.2])
+    context = build_context(contexter, mi, mcc)
+    expected_output = ["1. a\nb",
+        "2. a\nb\nc"]
+    @test context == expected_output
+end
+
+@testset "answer!" begin
+    # Setup
+    index = ChunkEmbeddingsIndex(id = :TestChunkIndex1,
+        chunks = ["chunk1", "chunk2"],
+        sources = ["source1", "source2"],
+        embeddings = ones(Float32, 2, 2))
+
+    question = "why?"
+    cc1 = CandidateChunks(index_id = :TestChunkIndex1)
+
+    result = RAGResult(; question, rephrased_questions = [question], emb_candidates = cc1,
+        tag_candidates = cc1, filtered_candidates = cc1, reranked_candidates = cc1,
+        context = String["a", "b"], sources = String[])
+
+    # Test refine with SimpleAnswerer
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "answer"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+
+    output = answer!(
+        SimpleAnswerer(), index, result; model = "mock-gen")
+    @test result.answer == "answer"
+    @test result.conversations[:answer][end].content == "answer"
+
+    # with unknown rephraser
+    struct UnknownAnswerer123 <: AbstractAnswerer end
+    @test_throws ArgumentError answer!(UnknownAnswerer123(), index, result)
+end
+
+@testset "refine!" begin
+    # Setup
+    index = ChunkEmbeddingsIndex(id = :TestChunkIndex1,
+        chunks = ["chunk1", "chunk2"],
+        sources = ["source1", "source2"],
+        embeddings = ones(Float32, 2, 2))
+
+    question = "why?"
+    cc1 = CandidateChunks(index_id = :TestChunkIndex1)
+
+    # Test refine with NoRefiner, simple passthrough
+    result = RAGResult(; question, rephrased_questions = [question], emb_candidates = cc1,
+        tag_candidates = cc1, filtered_candidates = cc1, reranked_candidates = cc1,
+        context = String[], sources = String[], answer = "ABC",
+        conversations = Dict(:answer => [PT.UserMessage("MESSAGE")]))
+
+    result = refine!(NoRefiner(), index, result)
+    @test result.final_answer == "ABC"
+    @test result.conversations[:final_answer] == [PT.UserMessage("MESSAGE")]
+
+    # Test refine with SimpleRefiner
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "new answer"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    result = RAGResult(; question, rephrased_questions = [question], emb_candidates = cc1,
+        tag_candidates = cc1, filtered_candidates = cc1, reranked_candidates = cc1,
+        context = String[], sources = String[], answer = "ABC",
+        conversations = Dict(:answer => [PT.UserMessage("MESSAGE")]))
+
+    output = refine!(
+        SimpleRefiner(), index, result; model = "mock-gen")
+    @test result.final_answer == "new answer"
+    @test result.conversations[:final_answer][end].content == "new answer"
+
+    # with unknown rephraser
+    struct UnknownRefiner123 <: AbstractRefiner end
+    @test_throws ArgumentError refine!(UnknownRefiner123(), index, result)
+end
+
+@testset "postprocess!" begin
+    question = "why?"
+    cc1 = CandidateChunks(index_id = :TestChunkIndex1)
+    result = RAGResult(; question, rephrased_questions = [question], emb_candidates = cc1,
+        tag_candidates = cc1, filtered_candidates = cc1, reranked_candidates = cc1,
+        context = String[], sources = String[])
+    index = ChunkEmbeddingsIndex(id = :TestChunkIndex1,
+        chunks = ["chunk1", "chunk2"],
+        sources = ["source1", "source2"],
+        embeddings = ones(Float32, 2, 2))
+
+    # passthrough
+    @test postprocess!(NoPostprocessor(), index, result) == result
+    # Unknown type
+    struct RandomPostprocessor123 <: AbstractPostprocessor end
+    @test_throws ArgumentError postprocess!(RandomPostprocessor123(), index, result)
+end
+
+@testset "generate!" begin
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "answer"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+
+    index = ChunkEmbeddingsIndex(id = :TestChunkIndex1,
+        chunks = ["chunk1", "chunk2"],
+        sources = ["source1", "source2"],
+        embeddings = ones(Float32, 2, 2))
+
+    question = "why?"
+    cc1 = CandidateChunks(index_id = :TestChunkIndex1)
+
+    result = RAGResult(; question, rephrased_questions = [question], emb_candidates = cc1,
+        tag_candidates = cc1, filtered_candidates = cc1, reranked_candidates = cc1,
+        context = String["a", "b"], sources = String[])
+
+    # SimpleGenerator - no refinement
+    output = generate!(SimpleGenerator(), index, result;
+        answerer_kwargs = (; model = "mock-gen"))
+    @test output.answer == "answer"
+    @test output.final_answer == "answer"
+
+    # with defaults 
+    output = generate!(index, result;
+        answerer_kwargs = (; model = "mock-gen"))
+    @test output.answer == "answer"
+    @test output.final_answer == "answer"
+
+    # Test with refinement - AdvancedGenerator
+    output = generate!(AdvancedGenerator(), index, result;
+        answerer_kwargs = (; model = "mock-gen"),
+        refiner_kwargs = (; model = "mock-gen"))
+    @test output.answer == "answer"
+    @test output.final_answer == "answer"
 end
 
 @testset "airag" begin
     # test with a mock server
-    PORT = rand(20000:30000)
+    PORT = rand(10000:40002)
     PT.register_model!(; name = "mock-emb", schema = PT.CustomOpenAISchema())
     PT.register_model!(; name = "mock-meta", schema = PT.CustomOpenAISchema())
     PT.register_model!(; name = "mock-gen", schema = PT.CustomOpenAISchema())
@@ -41,8 +222,9 @@ end
 
         if content[:model] == "mock-gen"
             user_msg = last(content[:messages])
-            response = Dict(:choices => [
-                    Dict(:message => user_msg, :finish_reason => "stop"),
+            response = Dict(
+                :choices => [
+                    Dict(:message => user_msg, :finish_reason => "stop")
                 ],
                 :model => content[:model],
                 :usage => Dict(:total_tokens => length(user_msg[:content]),
@@ -56,12 +238,17 @@ end
                     :completion_tokens => 0))
         elseif content[:model] == "mock-meta"
             user_msg = last(content[:messages])
-            response = Dict(:choices => [
+            response = Dict(
+                :choices => [
                     Dict(:finish_reason => "stop",
-                        :message => Dict(:tool_calls => [
-                            Dict(:function => Dict(:arguments => JSON3.write(MaybeMetadataItems([
-                                MetadataItem("yes", "category"),
-                            ]))))]))],
+                    :message => Dict(
+                        :tool_calls => [
+                        Dict(:id => "1",
+                        :function => Dict(
+                            :arguments => JSON3.write(MaybeTags([
+                                Tag("yes", "category")
+                            ])),
+                            :name => "MaybeTags"))]))],
                 :model => content[:model],
                 :usage => Dict(:total_tokens => length(user_msg[:content]),
                     :prompt_tokens => length(user_msg[:content]),
@@ -73,73 +260,70 @@ end
     end
 
     ## Index
-    index = ChunkIndex(;
+    index = ChunkEmbeddingsIndex(;
         sources = [".", ".", "."],
         chunks = ["a", "b", "c"],
         embeddings = zeros(128, 3),
         tags = vcat(trues(2, 2), falses(1, 2)),
-        tags_vocab = ["yes", "no"],)
+        tags_vocab = ["yes", "no"])
     ## Sub-calls
     question_emb = aiembed(["x", "x"];
         model = "mock-emb",
         api_kwargs = (; url = "http://localhost:$(PORT)"))
     @test question_emb.content == ones(128)
-    metadata_msg = aiextract(:RAGExtractMetadataShort; return_type = MaybeMetadataItems,
+    metadata_msg = aiextract(:RAGExtractMetadataShort; return_type = MaybeTags,
         text = "x",
         model = "mock-meta", api_kwargs = (; url = "http://localhost:$(PORT)"))
-    @test metadata_msg.content.items == [MetadataItem("yes", "category")]
+    @test metadata_msg.content.items == [Tag("yes", "category")]
     answer_msg = aigenerate(:RAGAnswerFromContext;
         question = "Time?",
         context = "XYZ",
         model = "mock-gen", api_kwargs = (; url = "http://localhost:$(PORT)"))
     @test occursin("Time?", answer_msg.content)
-    ## E2E
-    msg = airag(index; question = "Time?", model_embedding = "mock-emb",
-        model_chat = "mock-gen",
-        model_metadata = "mock-meta", api_kwargs = (; url = "http://localhost:$(PORT)"),
-        tag_filter = ["yes"],
-        return_context = false)
+    ## E2E - default type
+    msg = airag(index; question = "Time?",
+        retriever_kwargs = (;
+            tagger_kwargs = (; model = "mock-gen", tag = ["yes"]), embedder_kwargs = (;
+                model = "mock-emb")),
+        generator_kwargs = (;
+            answerer_kwargs = (; model = "mock-gen"), embedder_kwargs = (;
+                model = "mock-emb")),
+        api_kwargs = (; url = "http://localhost:$(PORT)"),
+        return_all = false)
     @test occursin("Time?", msg.content)
 
-    # test kwargs passing
-    api_kwargs = (; url = "http://localhost:$(PORT)")
-    msg = airag(index; question = "Time?", model_embedding = "mock-emb",
-        model_chat = "mock-gen",
-        model_metadata = "mock-meta",
-        tag_filter = ["yes"],
-        return_context = false, aiembed_kwargs = (; api_kwargs),
-        aigenerate_kwargs = (; api_kwargs), aiextract_kwargs = (; api_kwargs))
+    ## E2E - with type
+    msg = airag(RAGConfig(), index; question = "Time?",
+        retriever_kwargs = (;
+            tagger_kwargs = (; model = "mock-gen", tag = ["yes"]), embedder_kwargs = (;
+                model = "mock-emb")),
+        generator_kwargs = (;
+            answerer_kwargs = (; model = "mock-gen"), embedder_kwargs = (;
+                model = "mock-emb")),
+        api_kwargs = (; url = "http://localhost:$(PORT)"),
+        return_all = false)
     @test occursin("Time?", msg.content)
 
-    ## Test different kwargs
-    msg, ctx = airag(index; question = "Time?", model_embedding = "mock-emb",
-        model_chat = "mock-gen",
-        model_metadata = "mock-meta", api_kwargs = (; url = "http://localhost:$(PORT)"),
-        tag_filter = :auto,
-        extract_metadata = false, verbose = false,
-        return_context = true)
-    @test ctx.context == ["1. a\nb\nc", "2. a\nb"]
-    @test ctx.emb_candidates.positions == [3, 2, 1]
-    @test ctx.emb_candidates.distances == zeros(3)
-    @test ctx.tag_candidates.positions == [1, 2]
-    @test ctx.tag_candidates.distances == ones(2)
-    @test ctx.filtered_candidates.positions == [2, 1] #re-sort
-    @test ctx.filtered_candidates.distances == 0.5ones(2)
-    @test ctx.reranked_candidates.positions == [2, 1] # no change
-    @test ctx.reranked_candidates.distances == 0.5ones(2) # no change
-
-    ## Not tag filter
-    msg, ctx = airag(index; question = "Time?", model_embedding = "mock-emb",
-        model_chat = "mock-gen",
-        model_metadata = "mock-meta", api_kwargs = (; url = "http://localhost:$(PORT)"),
-        tag_filter = nothing,
-        return_context = true)
-    @test ctx.context == ["1. b\nc", "2. a\nb\nc", "3. a\nb"]
-    @test ctx.emb_candidates.positions == [3, 2, 1]
-    @test ctx.emb_candidates.distances == zeros(3)
-    @test ctx.tag_candidates == nothing
-    @test ctx.filtered_candidates.positions == [3, 2, 1] #re-sort
-    @test ctx.reranked_candidates.positions == [3, 2, 1] # no change
+    ## Return RAG result
+    result = airag(RAGConfig(), index; question = "Time?",
+        retriever_kwargs = (;
+            tagger_kwargs = (; model = "mock-gen", tag = ["yes"]), embedder_kwargs = (;
+                model = "mock-emb")),
+        generator_kwargs = (;
+            answerer_kwargs = (; model = "mock-gen"), embedder_kwargs = (;
+                model = "mock-emb")),
+        api_kwargs = (; url = "http://localhost:$(PORT)"),
+        return_all = true)
+    @test occursin("Time?", result.answer)
+    @test occursin("Time?", result.final_answer)
+
+    ## Pretty printing
+    io = IOBuffer()
+    PT.pprint(io, result)
+    result_str = String(take!(io))
+    expected_str = "--------------------\nQUESTION(s)\n--------------------\n- Time?\n\n--------------------\nANSWER\n--------------------\n# Question\n\nTime\n\n\n\n# Answer\n\n--------------------\nSOURCES\n--------------------\n1. .\n2. .\n3. .\n"
+    @test result_str == expected_str
+
     # clean up
     close(echo_server)
 end
diff --git a/test/Experimental/RAGTools/preparation.jl b/test/Experimental/RAGTools/preparation.jl
index dc14b087e..01a856694 100644
--- a/test/Experimental/RAGTools/preparation.jl
+++ b/test/Experimental/RAGTools/preparation.jl
@@ -1,43 +1,257 @@
-using PromptingTools.Experimental.RAGTools: metadata_extract, MetadataItem
-using PromptingTools.Experimental.RAGTools: MaybeMetadataItems, build_tags, build_index
+using PromptingTools.Experimental.RAGTools: load_text, FileChunker, TextChunker,
+                                            BatchEmbedder, BinaryBatchEmbedder,
+                                            EmbedderEltype,
+                                            NoTagger, PassthroughTagger, OpenTagger
+using PromptingTools.Experimental.RAGTools: AbstractTagger, AbstractChunker,
+                                            AbstractEmbedder, AbstractIndexBuilder
+using PromptingTools.Experimental.RAGTools: tags_extract, Tag, MaybeTags
+using PromptingTools.Experimental.RAGTools: build_tags, build_index, SimpleIndexer,
+                                            get_tags, get_chunks, get_embeddings,
+                                            get_keywords, KeywordsProcessor, NoProcessor,
+                                            AbstractProcessor,
+                                            DocumentTermMatrix, document_term_matrix, bm25
+using PromptingTools.Experimental.RAGTools: build_tags, build_index
+using PromptingTools: TestEchoOpenAISchema
+using PromptingTools.Experimental.RAGTools: pack_bits, BitPackedBatchEmbedder
 
-@testset "metadata_extract" begin
-    # MetadataItem Structure
-    item = MetadataItem("value", "category")
+@testset "load_text" begin
+    # from file
+    fp, io = mktemp()
+    write(io, "text")
+    close(io)
+    @test load_text(FileChunker(), fp) == ("text", fp)
+    @test_throws AssertionError load_text(FileChunker(), "nonexistent" * fp)
+
+    # from provided text
+    @test load_text(TextChunker(), "text"; source = "POMA") == ("text", "POMA")
+    @test_throws AssertionError load_text(TextChunker(), "text"; source = "a"^520) # catch long doc - cant be a source
+
+    # unknown chunker
+    struct RandomChunker123 <: AbstractChunker end
+    @test_throws ArgumentError load_text(RandomChunker123(), "text")
+end
+
+@testset "get_chunks" begin
+    ochunks, osources = get_chunks(
+        TextChunker(), ["text1", "text2"]; max_length = 10, sources = ["doc1", "doc2"])
+    @test ochunks == ["text1", "text2"]
+    @test osources == ["doc1", "doc2"]
+
+    # Mismatch in source length
+    @test_throws AssertionError get_chunks(
+        TextChunker(), ["text1", "text2"]; max_length = 10, sources = ["doc1"])
+    # too long to be a source
+    @test_throws AssertionError get_chunks(
+        TextChunker(), ["text1", "text2"]; max_length = 10, sources = ["a"^520, "b"^520])
+
+    # FileChunker
+    fp, io = mktemp()
+    write(io, "text")
+    close(io)
+    fp2, io = mktemp()
+    write(io, "text2")
+    close(io)
+    ochunks, osources = get_chunks(
+        FileChunker(), [fp, fp2]; max_length = 10)
+    @test ochunks == ["text", "text2"]
+    @test osources == [fp, fp2]
+end
+
+@testset "get_embeddings" begin
+    # docs should not be empty
+    @test_throws AssertionError get_embeddings(BatchEmbedder(), String[])
+    @test_throws AssertionError get_embeddings(BinaryBatchEmbedder(), String[])
+    @test_throws AssertionError get_embeddings(BitPackedBatchEmbedder(), String[])
+
+    # corresponds to OpenAI API v1
+    response1 = Dict(:data => [Dict(:embedding => ones(128, 2))],
+        :usage => Dict(:total_tokens => 2, :prompt_tokens => 2, :completion_tokens => 0))
+    schema = TestEchoOpenAISchema(; response = response1, status = 200)
+    PT.register_model!(; name = "mock-emb", schema)
+
+    docs = ["Hello World", "Hello World"]
+    output = get_embeddings(
+        BatchEmbedder(), docs; model = "mock-emb", truncate_dimension = 100)
+    @test size(output) == (100, 2)
+    ## value of 0 for truncation, skips the step
+    output = get_embeddings(
+        BatchEmbedder(), docs; model = "mock-emb", truncate_dimension = 0)
+    @test size(output) == (128, 2)
+
+    # Unknown type
+    struct RandomEmbedder123 <: AbstractEmbedder end
+    @test_throws ArgumentError get_embeddings(
+        RandomEmbedder123(), ["text1", "text2"])
+
+    # BinaryBatchEmbedder
+    output = get_embeddings(
+        BinaryBatchEmbedder(), docs; model = "mock-emb", truncate_dimension = 100)
+    @test size(output) == (100, 2)
+    @test eltype(output) == Bool
+
+    # BitPackedBatchEmbedder
+    output = get_embeddings(
+        BitPackedBatchEmbedder(), docs; model = "mock-emb")
+    @test size(output) == (2, 2)
+    @test eltype(output) == UInt64
+    output = pack_bits(ones(Float32, 128, 2) .> 0)
+
+    # EmbedderEltype
+    @test EmbedderEltype(BinaryBatchEmbedder()) == Bool
+    @test EmbedderEltype(BatchEmbedder()) == Float32
+    @test EmbedderEltype(BitPackedBatchEmbedder()) == UInt64
+end
+
+@testset "get_keywords" begin
+
+    # Mock data
+    docs = ["This is a test document.", "Another test document with more text."]
+    stopwords = Set(["is", "a", "with", "more"])
+
+    # Test for KeywordsProcessor with default parameters
+    processor = KeywordsProcessor()
+    dtm = get_keywords(processor, docs)
+    @test dtm isa DocumentTermMatrix
+    @test Set(dtm.vocab) == Set(["this", "test", "document", "anoth", "more", "text"])
+    @test size(dtm.tf) == (2, 6)
+
+    # Test for KeywordsProcessor with min_term_freq and max_terms
+    docs_freq = [
+        "apple banana cherry apple",
+        "banana date fig grape",
+        "apple banana cherry date",
+        "elephant fig grape"
+    ]
+    processor_freq = KeywordsProcessor()
+
+    # Test with min_term_freq = 2
+    dtm_freq = get_keywords(processor_freq, docs_freq; min_term_freq = 2)
+    @test Set(dtm_freq.vocab) ==
+          Set(["appl", "banana", "cherri", "date", "fig", "grape"])
+    @test size(dtm_freq.tf) == (4, 6)
+
+    # Test with max_terms = 3
+    dtm_max = get_keywords(processor_freq, docs_freq; max_terms = 3)
+    @test length(dtm_max.vocab) == 3
+    @test size(dtm_max.tf) == (4, 3)
+
+    # Test with both min_term_freq = 2 and max_terms = 2
+    dtm_both = get_keywords(processor_freq, docs_freq; min_term_freq = 2, max_terms = 2)
+    @test length(dtm_both.vocab) == 2
+    @test size(dtm_both.tf) == (4, 2)
+    @test all(sum(dtm_both.tf, dims = 1) .>= 2)
+
+    # Test for KeywordsProcessor with custom stemmer and stopwords
+    custom_stemmer = Snowball.Stemmer("french")
+    dtm_custom = get_keywords(
+        processor, docs; stemmer = custom_stemmer, stopwords = stopwords)
+    @test dtm isa DocumentTermMatrix
+    @test size(dtm.tf) == (2, 6)
+
+    # Test for KeywordsProcessor with return_keywords = true
+    keywords = get_keywords(processor, docs; return_keywords = true)
+    @test keywords == [["this", "test", "document"],
+        ["anoth", "test", "document", "more", "text"]]
+
+    # Test for NoProcessor
+    no_processor = NoProcessor()
+    output_docs = get_keywords(no_processor, docs)
+    @test output_docs == docs
+
+    # Test for KeywordsProcessor with empty documents
+    empty_docs = String[]
+    dtm_empty = get_keywords(processor, empty_docs)
+    @test isempty(dtm_empty.vocab)
+    @test isempty(dtm_empty.tf)
+
+    # Test for KeywordsProcessor with only stopwords
+    stopword_docs = ["is a with more"]
+    dtm_stopwords = get_keywords(processor, stopword_docs; stopwords = stopwords)
+    @test isempty(dtm_stopwords.vocab)
+    @test isempty(dtm_stopwords.tf)
+
+    # Check stubs that they throw
+    @test_throws ArgumentError document_term_matrix(nothing)
+    @test_throws ArgumentError bm25(nothing, ["abc"])
+    struct XYZProcessor <: AbstractProcessor end
+    @test_throws ArgumentError get_keywords(XYZProcessor(), ["abc"])
+end
+
+@testset "tags_extract" begin
+    # Tag Structure
+    item = Tag("value", "category")
     @test item.value == "value"
     @test item.category == "category"
 
-    # MaybeMetadataItems Structure
-    items = MaybeMetadataItems([
-        MetadataItem("value1", "category1"),
-        MetadataItem("value2", "category2"),
+    # MaybeTags Structure
+    items = MaybeTags([
+        Tag("value1", "category1"),
+        Tag("value2", "category2")
     ])
     @test length(items.items) == 2
     @test items.items[1].value == "value1"
     @test items.items[1].category == "category1"
 
-    empty_items = MaybeMetadataItems(nothing)
-    @test isempty(metadata_extract(empty_items.items))
+    empty_items = MaybeTags(nothing)
+    @test isempty(tags_extract(empty_items.items))
 
     # Metadata Extraction Function
-    single_item = MetadataItem("DataFrames", "Julia Package")
+    single_item = Tag("DataFrames", "Julia Package")
     multiple_items = [
-        MetadataItem("pandas", "Software"),
-        MetadataItem("Python", "Language"),
-        MetadataItem("DataFrames", "Julia Package"),
+        Tag("pandas", "Software"),
+        Tag("Python", "Language"),
+        Tag("DataFrames", "Julia Package")
     ]
 
-    @test metadata_extract(single_item) == "julia_package:::dataframes"
-    @test metadata_extract(multiple_items) ==
+    @test tags_extract(single_item) == "julia_package:::dataframes"
+    @test tags_extract(multiple_items) ==
           ["software:::pandas", "language:::python", "julia_package:::dataframes"]
 
-    @test metadata_extract(nothing) == String[]
+    @test tags_extract(nothing) == String[]
+end
+
+@testset "get_tags" begin
+    # Unknown Tagger
+    struct RandomTagger123 <: AbstractTagger end
+    @test_throws ArgumentError get_tags(RandomTagger123(), String[])
+
+    # NoTagger
+    @test get_tags(NoTagger(), String[]) == nothing
+
+    # PassthroughTagger
+    tags_ = [["tag1"], ["tag2"]]
+    @test get_tags(PassthroughTagger(), ["doc1", "docs2"]; tags = tags_) == tags_
+    @test_throws AssertionError get_tags(
+        PassthroughTagger(), ["doc1", "docs2"]; tags = [["tag1"]]) # length mismatch
+
+    # OpenTagger - mock server
+    response = Dict(
+        :choices => [
+            Dict(:finish_reason => "stop",
+            :message => Dict(
+                :tool_calls => [
+                Dict(:id => "1",
+                :function => Dict(
+                    :arguments => JSON3.write(MaybeTags([
+                        Tag("yes", "categoryx")
+                    ])),
+                    :name => "MaybeTags"))]
+            ))],
+        :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response = response, status = 200)
+    PT.register_model!(; name = "mock-meta", schema)
+    tags_ = get_tags(OpenTagger(), String["Say yes"]; model = "mock-meta")
+    @test tags_ == [["categoryx:::yes"]]
 end
 
 @testset "build_tags" begin
+    ## empty tags
+    @test build_tags(NoTagger(), nothing) == (nothing, nothing)
+
+    tagger = OpenTagger()
     # Single Tag
     chunk_metadata = [["tag1"]]
-    tags_, tags_vocab_ = build_tags(chunk_metadata)
+    tags_, tags_vocab_ = build_tags(tagger, chunk_metadata)
 
     @test length(tags_vocab_) == 1
     @test tags_vocab_ == ["tag1"]
@@ -46,7 +260,7 @@ end
 
     # Multiple Tags with Repetition
     chunk_metadata = [["tag1", "tag2"], ["tag2", "tag3"]]
-    tags_, tags_vocab_ = build_tags(chunk_metadata)
+    tags_, tags_vocab_ = build_tags(tagger, chunk_metadata)
 
     @test length(tags_vocab_) == 3
     @test tags_vocab_ == ["tag1", "tag2", "tag3"]
@@ -55,14 +269,14 @@ end
 
     # Empty Metadata
     chunk_metadata = [String[]]
-    tags_, tags_vocab_ = build_tags(chunk_metadata)
+    tags_, tags_vocab_ = build_tags(tagger, chunk_metadata)
 
     @test isempty(tags_vocab_)
     @test size(tags_) == (1, 0)
 
     # Mixed Empty and Non-Empty Metadata
     chunk_metadata = [["tag1"], String[], ["tag2", "tag3"]]
-    tags_, tags_vocab_ = build_tags(chunk_metadata)
+    tags_, tags_vocab_ = build_tags(tagger, chunk_metadata)
 
     @test length(tags_vocab_) == 3
     @test tags_vocab_ == ["tag1", "tag2", "tag3"]
@@ -72,37 +286,44 @@ end
 
 @testset "build_index" begin
     # test with a mock server
-    PORT = rand(9000:11000)
+    PORT = rand(9000:31000)
     PT.register_model!(; name = "mock-emb", schema = PT.CustomOpenAISchema())
     PT.register_model!(; name = "mock-meta", schema = PT.CustomOpenAISchema())
-    PT.register_model!(; name = "mock-get", schema = PT.CustomOpenAISchema())
+    PT.register_model!(; name = "mock-gen", schema = PT.CustomOpenAISchema())
 
     echo_server = HTTP.serve!(PORT; verbose = -1) do req
         content = JSON3.read(req.body)
 
         if content[:model] == "mock-gen"
             user_msg = last(content[:messages])
-            response = Dict(:choices => [
-                    Dict(:message => user_msg, :finish_reason => "stop"),
+            response = Dict(
+                :choices => [
+                    Dict(:message => user_msg, :finish_reason => "stop")
                 ],
                 :model => content[:model],
                 :usage => Dict(:total_tokens => length(user_msg[:content]),
                     :prompt_tokens => length(user_msg[:content]),
                     :completion_tokens => 0))
         elseif content[:model] == "mock-emb"
-            response = Dict(:data => [Dict(:embedding => ones(Float32, 128))
-                                      for i in 1:length(content[:input])],
+            response = Dict(
+                :data => [Dict(:embedding => ones(Float32, 128))
+                          for i in 1:length(content[:input])],
                 :usage => Dict(:total_tokens => length(content[:input]),
                     :prompt_tokens => length(content[:input]),
                     :completion_tokens => 0))
         elseif content[:model] == "mock-meta"
             user_msg = last(content[:messages])
-            response = Dict(:choices => [
+            response = Dict(
+                :choices => [
                     Dict(:finish_reason => "stop",
-                        :message => Dict(:tool_calls => [
-                            Dict(:function => Dict(:arguments => JSON3.write(MaybeMetadataItems([
-                                MetadataItem("yes", "category"),
-                            ]))))]))],
+                    :message => Dict(
+                        :tool_calls => [
+                        Dict(:id => "1",
+                        :function => Dict(
+                            :arguments => JSON3.write(MaybeTags([
+                                Tag("yes", "category")
+                            ])),
+                            :name => "MaybeTags"))]))],
                 :model => content[:model],
                 :usage => Dict(:total_tokens => length(user_msg[:content]),
                     :prompt_tokens => length(user_msg[:content]),
@@ -114,31 +335,87 @@ end
     end
 
     text = "This is a long text that will be split into chunks.\n\n It will be split by the separator. And also by the separator '\n'."
+
+    ## Default - file reader
     tmp, _ = mktemp()
     write(tmp, text)
     mini_files = [tmp, tmp]
-    index = build_index(mini_files; max_length = 10, extract_metadata = true,
-        model_embedding = "mock-emb",
-        model_metadata = "mock-meta", api_kwargs = (; url = "http://localhost:$(PORT)"))
-    @test index.embeddings == hcat(fill(normalize(ones(Float32, 128)), 8)...)
-    @test index.chunks[1:4] == index.chunks[5:8]
-    @test index.sources == fill(tmp, 8)
-    @test index.tags == ones(8, 1)
-    @test index.tags_vocab == ["category:::yes"]
+    indexer = SimpleIndexer()
+    index = build_index(
+        indexer, mini_files; chunker = FileChunker(), chunker_kwargs = (; max_length = 10),
+        embedder_kwargs = (; model = "mock-emb"),
+        tagger_kwargs = (; model = "mock-meta"), api_kwargs = (;
+            url = "http://localhost:$(PORT)"))
+    @test index.embeddings ==
+          hcat(fill(normalize(ones(Float32, 128)), length(index.chunks))...)
+    @test index.chunks[begin:(length(index.chunks) ÷ 2)] ==
+          index.chunks[((length(index.chunks) ÷ 2) + 1):end]
+    @test index.sources == fill(tmp, length(index.chunks))
+    @test index.tags == nothing
+    @test index.tags_vocab == nothing
 
-    ## Test docs reader
-    index = build_index([text, text]; reader = :docs, sources = ["x", "x"], max_length = 10,
-        extract_metadata = true,
-        model_embedding = "mock-emb",
-        model_metadata = "mock-meta", api_kwargs = (; url = "http://localhost:$(PORT)"))
-    @test index.embeddings == hcat(fill(normalize(ones(Float32, 128)), 8)...)
-    @test index.chunks[1:4] == index.chunks[5:8]
-    @test index.sources == fill("x", 8)
-    @test index.tags == ones(8, 1)
+    ## With metadata
+    indexer = SimpleIndexer(; chunker = FileChunker(), tagger = OpenTagger())
+    index = build_index(indexer, mini_files; chunker_kwargs = (; max_length = 10),
+        embedder_kwargs = (; model = "mock-emb"),
+        tagger_kwargs = (; model = "mock-meta"), api_kwargs = (;
+            url = "http://localhost:$(PORT)"))
+    @test index.tags == ones(30, 1)
     @test index.tags_vocab == ["category:::yes"]
 
-    # Assertion if sources is missing
-    @test_throws AssertionError build_index([text, text]; reader = :docs)
+    ## Test docs reader - customize via kwarg
+    indexer = SimpleIndexer()
+    index = build_index(indexer, [text, text]; chunker = TextChunker(),
+        chunker_kwargs = (;
+            sources = ["x", "x"], max_length = 10),
+        embedder_kwargs = (; model = "mock-emb"),
+        tagger_kwargs = (; model = "mock-meta"), api_kwargs = (;
+            url = "http://localhost:$(PORT)"))
+    @test index.embeddings ==
+          hcat(fill(normalize(ones(Float32, 128)), length(index.chunks))...)
+    @test index.chunks[begin:(length(index.chunks) ÷ 2)] ==
+          index.chunks[((length(index.chunks) ÷ 2) + 1):end]
+    @test index.sources == fill("x", length(index.chunks))
+    @test index.tags == nothing
+    @test index.tags_vocab == nothing
+
+    # Test default behavior - text chunker
+    index = build_index([text, text];
+        chunker_kwargs = (;
+            sources = ["x", "x"], max_length = 10),
+        embedder_kwargs = (; model = "mock-emb"),
+        tagger_kwargs = (; model = "mock-meta"), api_kwargs = (;
+            url = "http://localhost:$(PORT)"))
+    @test index.embeddings ==
+          hcat(fill(normalize(ones(Float32, 128)), length(index.chunks))...)
+    @test index.chunks[begin:(length(index.chunks) ÷ 2)] ==
+          index.chunks[((length(index.chunks) ÷ 2) + 1):end]
+    @test index.sources == fill("x", length(index.chunks))
+    @test index.tags == nothing
+    @test index.tags_vocab == nothing
+
+    # ChunkKeywordsIndex
+    index_keywords = ChunkKeywordsIndex(index)
+    @test chunkdata(index_keywords) isa DocumentTermMatrix
+    @test length(chunkdata(index_keywords).vocab) == 7
+    @test size(chunkdata(index_keywords).tf) == (30, 7)
+    @test index_keywords.chunks == index.chunks
+    @test index_keywords.sources == index.sources
+    @test index_keywords.tags == index.tags
+    @test index_keywords.tags_vocab == index.tags_vocab
+    @test index_keywords.sources == index.sources
+    @test index_keywords.extras == index.extras
+
+    # Keywords-based index
+    index = build_index(KeywordsIndexer(), [text, text];
+        chunker_kwargs = (;
+            sources = ["x", "x"], max_length = 10),
+        tagger_kwargs = (; model = "mock-meta"), api_kwargs = (;
+            url = "http://localhost:$(PORT)"))
+    dtm = chunkdata(index)
+    @test dtm isa DocumentTermMatrix
+    @test length(dtm.vocab) == 7
+    @test size(dtm.tf) == (30, 7)
 
     # clean up
     close(echo_server)
diff --git a/test/Experimental/RAGTools/rank_gpt.jl b/test/Experimental/RAGTools/rank_gpt.jl
new file mode 100644
index 000000000..336601411
--- /dev/null
+++ b/test/Experimental/RAGTools/rank_gpt.jl
@@ -0,0 +1,389 @@
+using PromptingTools.Experimental.RAGTools: RankGPTResult, create_permutation_instruction,
+                                            extract_ranking, receive_permutation!,
+                                            permutation_step!, rank_sliding_window!,
+                                            rank_gpt
+using PromptingTools: TestEchoOpenAISchema
+
+@testset "RankGPTResult" begin
+    # Test creation of RankGPTResult with default parameters
+    result = RankGPTResult(question = "What is AI?", chunks = ["chunk1", "chunk2"])
+    @test result.question == "What is AI?" # Check question
+    @test result.chunks == ["chunk1", "chunk2"] # Check chunks
+    @test result.positions == [1, 2] # Check default positions
+    @test result.elapsed == 0.0 # Check default elapsed time
+    @test result.cost == 0.0 # Check default cost
+    @test result.tokens == 0 # Check default tokens
+
+    # Test creation of RankGPTResult with custom positions
+    result = RankGPTResult(
+        question = "What is AI?", chunks = ["chunk1", "chunk2"], positions = [2, 1])
+    @test result.positions == [2, 1] # Check custom positions
+
+    # Test creation of RankGPTResult with custom elapsed time, cost, and tokens
+    result = RankGPTResult(question = "What is AI?", chunks = ["chunk1", "chunk2"],
+        elapsed = 5.0, cost = 10.0, tokens = 100)
+    @test result.elapsed == 5.0 # Check custom elapsed time
+    @test result.cost == 10.0 # Check custom cost
+    @test result.tokens == 100 # Check custom tokens
+
+    # Test show method for RankGPTResult
+    io = IOBuffer()
+    show(io, result)
+    output = String(take!(io))
+    @test occursin("question:", output) # Check if question is in the output
+    @test occursin("What is AI?", output)
+    @test occursin("chunks:", output) # Check if chunks are in the output
+    @test occursin("positions:", output) # Check if positions are in the output
+    @test occursin("elapsed:", output) # Check if elapsed time is in the output
+    @test occursin("cost:", output) # Check if cost is in the output
+    @test occursin("tokens:", output) # Check if tokens are in the output
+
+    # Test creation of RankGPTResult with empty chunks
+    result = RankGPTResult(question = "What is AI?", chunks = String[])
+    @test result.chunks == String[] # Check empty chunks
+    @test result.positions == [] # Check positions for empty chunks
+end
+
+@testset "create_permutation_instruction" begin
+    # Test with basic context and default parameters
+    context = ["This is a test.", "Another test document."]
+    messages, num = create_permutation_instruction(context)
+    @test num == 2 # Check number of messages
+    @test length(messages) == 4 + 4 # Check total messages including AI responses
+    @test messages[begin] isa PT.SystemMessage # Check first message type
+    @test messages[4].content == "[1] This is a test."
+    @test messages[5].content == "Received passage [1]."
+    @test messages[6].content == "[2] Another test document."
+    @test messages[7].content == "Received passage [2]."
+    @test messages[end] isa PT.UserMessage # Check second message type
+
+    # Test with custom rank_start and rank_end
+    messages, num = create_permutation_instruction(context; rank_start = 2, rank_end = 2)
+    @test num == 1 # Check number of messages
+    @test length(messages) == 4 + 2 * 1 # Check total messages including AI responses
+    @test messages[begin] isa PT.SystemMessage # Check first message type
+    @test messages[4].content == "[1] Another test document."
+    @test messages[5].content == "Received passage [1]."
+    @test messages[end] isa PT.UserMessage # Check second message type
+
+    # Test with max_length parameter
+    long_context = ["This is a very long test document that exceeds the max length parameter."]
+    messages, num = create_permutation_instruction(long_context; max_length = 10)
+    @test num == 1 # Check number of messages
+    @test length(messages) == 4 + 2 * 1 # Check total messages including AI responses
+    @test length(messages[4].content) <= 10 + 5 # Check if content is truncated (+5 for the markers at the beginning)
+
+    # Test with different template
+    @test_throws ErrorException create_permutation_instruction(
+        context; template = :AnotherTemplateNotExist)
+
+    # Test with empty context
+    empty_context = String[]
+    messages, num = create_permutation_instruction(empty_context)
+    @test num == 0 # Check number of messages
+    @test length(messages) == 4 # Check total messages including AI responses
+end
+
+@testset "extract_ranking" begin
+    @test extract_ranking("asdas1asdas") == [1] # Test single number
+    @test extract_ranking("[1] > [2] > [3]") == [1, 2, 3] # Test multiple numbers
+    @test extract_ranking("[3] > [2] > [1]") == [3, 2, 1] # Test numbers in reverse order
+    @test extract_ranking("[1], [2], [3]") == [1, 2, 3] # Test numbers with commas
+    @test extract_ranking("[1] > [2] > [2] > [3]") == [1, 2, 3] # Test duplicate numbers
+    @test extract_ranking("[1] > [2] > [3] > [3] > [2] > [1]") == [1, 2, 3] # Test multiple duplicates
+    @test extract_ranking("a1b2c3") == [1, 2, 3] # Test numbers with letters
+    @test extract_ranking("[1] > [2] > [3] > [4] > [5] > [6] > [7] > [8] > [9] > [10]") ==
+          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # Test larger range of numbers
+    @test extract_ranking("10 9 8 7 6 5 4 3 2 1") == [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] # Test larger range in reverse order
+    @test extract_ranking("1 2 3 4 5 6 7 8 9 10 10 9 8 7 6 5 4 3 2 1") ==
+          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # Test larger range with duplicates
+end
+
+@testset "receive_permutation!" begin
+    # Test with basic ranking and response
+    curr_rank = [1, 2, 3]
+    response = "[1] > [2] > [3]"
+    @test receive_permutation!(curr_rank, response) == [1, 2, 3] # Basic case
+
+    # Test with reversed ranking in response
+    curr_rank = [1, 2, 3]
+    response = "[3] > [2] > [1]"
+    @test receive_permutation!(curr_rank, response) == [3, 2, 1] # Reversed ranking
+
+    # Test with missing ranks in response
+    curr_rank = [1, 2, 3, 4, 5]
+    response = "[5] > [3] > [1]"
+    @test receive_permutation!(curr_rank, response) == [5, 3, 1, 2, 4] # Missing ranks
+
+    # Test with extra ranks in response
+    curr_rank = [1, 2, 3]
+    response = "[1] > [2] > [3] > [4] > [5]"
+    @test receive_permutation!(curr_rank, response) == [1, 2, 3] # Extra ranks
+
+    # Test with duplicate ranks in response
+    curr_rank = [1, 2, 3]
+    response = "[1] > [2] > [2] > [3]"
+    @test receive_permutation!(curr_rank, response) == [1, 2, 3] # Duplicate ranks
+
+    # Test with non-sequential ranks in response
+    curr_rank = [1, 2, 3, 4, 5]
+    response = "[5] > [1] > [3]"
+    @test receive_permutation!(curr_rank, response) == [5, 1, 3, 2, 4] # Non-sequential ranks
+
+    # Test with rank_start and rank_end parameters
+    curr_rank = [1, 2, 3, 4, 5]
+    response = "[4] > [5]"
+    @test receive_permutation!(curr_rank, response; rank_start = 4, rank_end = 5) ==
+          [1, 2, 3, 4, 5] # Rank start and end
+
+    # Test with rank_start and rank_end parameters, non-sequential
+    curr_rank = [1, 2, 3, 4, 5]
+    response = "[2] > [1]"
+    @test receive_permutation!(curr_rank, response; rank_start = 4, rank_end = 5) ==
+          [1, 2, 3, 5, 4] # Rank start and end, non-sequential
+
+    # Test with rank_start and rank_end parameters, missing ranks
+    curr_rank = [1, 2, 3, 4, 5]
+    response = "[2]"
+    @test receive_permutation!(curr_rank, response; rank_start = 4, rank_end = 5) ==
+          [1, 2, 3, 5, 4] # Rank start and end, missing ranks
+
+    # Test with rank_start and rank_end parameters, duplicate ranks
+    curr_rank = [1, 2, 3, 4, 5]
+    response = "[2 ] > [2]"
+    @test receive_permutation!(curr_rank, response; rank_start = 4, rank_end = 5) ==
+          [1, 2, 3, 5, 4] # Rank start and end, duplicate ranks
+end
+
+@testset "permutation_step!" begin
+    # Mocking the aigenerate function
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "[1] > [2]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+
+    # Simple case with default parameters
+    result = RankGPTResult(question = "What is AI?", chunks = ["chunk1", "chunk2"])
+    @test permutation_step!(result; model = "mock-gen").positions == [1, 2] # Simple case
+
+    # Case with more chunks
+    result = RankGPTResult(
+        question = "What is AI?", chunks = ["chunk1", "chunk2", "chunk3"])
+    @test permutation_step!(result; model = "mock-gen").positions == [1, 2, 3] # More chunks
+
+    # Case with rank_start and rank_end parameters
+    result = RankGPTResult(question = "What is AI?",
+        chunks = ["chunk1", "chunk2", "chunk3", "chunk4", "chunk5"])
+    @test permutation_step!(
+        result; rank_start = 2, rank_end = 4, model = "mock-gen").positions ==
+          [1, 2, 3, 4, 5] # Rank start and end
+
+    # Case with non-sequential ranks in response
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "[3] > [1] > [2]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    result = RankGPTResult(
+        question = "What is AI?", chunks = ["chunk1", "chunk2", "chunk3"])
+    @test permutation_step!(result; model = "mock-gen").positions == [3, 1, 2] # Non-sequential ranks
+
+    # Case with duplicate ranks in response
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "[2] > [2] > [1]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    result = RankGPTResult(
+        question = "What is AI?", chunks = ["chunk1", "chunk2", "chunk3"])
+    @test permutation_step!(result; model = "mock-gen").positions == [2, 1, 3] # Duplicate ranks
+
+    # Case with missing ranks in response
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "[1] > [3]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    result = RankGPTResult(
+        question = "What is AI?", chunks = ["chunk1", "chunk2", "chunk3"])
+    @test permutation_step!(result; model = "mock-gen").positions == [1, 3, 2] # Missing ranks
+end
+
+@testset "rank_sliding_window!" begin
+    # Mocking the aigenerate function
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "[1] > [2]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    # Simple case with default parameters
+    result = RankGPTResult(question = "What is AI?", chunks = ["chunk1", "chunk2"])
+    @test rank_sliding_window!(result; model = "mock-gen").positions == [1, 2] # Simple case
+
+    # Case with more chunks
+    result = RankGPTResult(
+        question = "What is AI?", chunks = ["chunk1", "chunk2", "chunk3"])
+    @test rank_sliding_window!(result; model = "mock-gen").positions == [1, 2, 3] # More chunks
+
+    # Case with rank_start and rank_end parameters
+    result = RankGPTResult(question = "What is AI?",
+        chunks = ["chunk1", "chunk2", "chunk3", "chunk4", "chunk5"])
+    @test rank_sliding_window!(
+        result; rank_start = 2, rank_end = 4, window_size = 2,
+        step = 2, model = "mock-gen").positions ==
+          [1, 2, 3, 4, 5] # Rank start and end
+
+    # Case with non-sequential ranks in response
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "[3] > [1] > [2]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    result = RankGPTResult(
+        question = "What is AI?", chunks = ["chunk1", "chunk2", "chunk3"])
+    @test rank_sliding_window!(result; model = "mock-gen").positions == [3, 1, 2] # Non-sequential ranks
+
+    # Case with duplicate ranks in response
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "[2] > [2] > [1]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    result = RankGPTResult(
+        question = "What is AI?", chunks = ["chunk1", "chunk2", "chunk3"])
+    @test rank_sliding_window!(result; model = "mock-gen").positions == [2, 1, 3] # Duplicate ranks
+
+    # Case with missing ranks in response
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "[1] > [3]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    result = RankGPTResult(
+        question = "What is AI?", chunks = ["chunk1", "chunk2", "chunk3"])
+    @test rank_sliding_window!(result; model = "mock-gen").positions == [1, 3, 2] # Missing ranks
+
+    ## Wrong inputs
+    result = RankGPTResult(
+        question = "What is AI?", chunks = ["chunk1", "chunk2", "chunk3"])
+    @test_throws AssertionError rank_sliding_window!(
+        result; rank_start = 2, rank_end = 4, window_size = 2,
+        step = 3)
+    @test_throws AssertionError rank_sliding_window!(
+        result; rank_start = 2, rank_end = 4, window_size = 5,
+        step = 1)
+    @test_throws AssertionError rank_sliding_window!(
+        result; rank_start = 2, rank_end = 4)
+end
+
+@testset "rank_gpt" begin
+    response = Dict(
+        :choices => [
+            Dict(
+            :message => Dict(:content => "[4] > [2] > [3] > [1]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    # Test with basic chunks and question
+    result = rank_gpt(["chunk1", "chunk2"], "What is AI?"; model = "mock-gen")
+    @test result.question == "What is AI?" # Check question
+    @test result.chunks == ["chunk1", "chunk2"] # Check chunks
+    @test result.positions == [2, 1] # Check default positions
+
+    # Test with custom rank_start and rank_end
+    result = rank_gpt(["chunk1", "chunk2", "chunk3", "chunk4"],
+        "What is AI?"; rank_start = 2, rank_end = 3, window_size = 3, step = 2, model = "mock-gen")
+    @test result.positions == [1, 3, 2, 4] # Flips because the signal say [2] > [1]
+    result = rank_gpt(["chunk1", "chunk2", "chunk3", "chunk4"],
+        "What is AI?"; rank_start = 1, rank_end = 4, window_size = 4,
+        step = 2, model = "mock-gen")
+    @test result.positions == [4, 2, 3, 1] # Check positions with custom rank_start and rank_end
+
+    # Test with window_size and step
+    result = rank_gpt(
+        ["chunk1", "chunk2", "chunk3", "chunk4"], "What is AI?"; window_size = 4, step = 4, model = "mock-gen")
+    @test result.positions == [4, 2, 3, 1] # Check positions with window_size and step
+
+    # Test with multiple rounds
+    result = rank_gpt(
+        ["chunk1", "chunk2", "chunk3", "chunk4"], "What is AI?"; num_rounds = 2, model = "mock-gen", verbose = 0)
+    @test result.positions == [1, 2, 3, 4] # Check positions with multiple rounds (flips twice)
+    result = rank_gpt(
+        ["chunk1", "chunk2", "chunk3", "chunk4"], "What is AI?"; num_rounds = 3, model = "mock-gen", verbose = 0)
+    @test result.positions == [4, 2, 3, 1] # Check positions with multiple rounds (flips twice)
+
+    # Test with non-sequential ranks in response
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "[3] > [1] > [2]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    result = rank_gpt(["chunk1", "chunk2", "chunk3"], "What is AI?"; model = "mock-gen")
+    @test result.positions == [3, 1, 2] # Check non-sequential ranks
+
+    # Test with duplicate ranks in response
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "[2] > [2] > [1]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    result = rank_gpt(["chunk1", "chunk2", "chunk3"], "What is AI?"; model = "mock-gen")
+    @test result.positions == [2, 1, 3] # Check duplicate ranks
+
+    # Test with missing ranks in response
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "[1] > [3]"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    result = rank_gpt(["chunk1", "chunk2", "chunk3"], "What is AI?"; model = "mock-gen")
+    @test result.positions == [1, 3, 2] # Check missing ranks
+end
\ No newline at end of file
diff --git a/test/Experimental/RAGTools/retrieval.jl b/test/Experimental/RAGTools/retrieval.jl
index 4328397f9..45bbb2a04 100644
--- a/test/Experimental/RAGTools/retrieval.jl
+++ b/test/Experimental/RAGTools/retrieval.jl
@@ -1,27 +1,213 @@
-using PromptingTools.Experimental.RAGTools: find_closest, find_tags
-using PromptingTools.Experimental.RAGTools: Passthrough, rerank, CohereRerank
+using PromptingTools: TestEchoOpenAISchema
+using PromptingTools.Experimental.RAGTools: ChunkIndex
+using PromptingTools.Experimental.RAGTools: ContextEnumerator, NoRephraser, SimpleRephraser,
+                                            HyDERephraser,
+                                            CosineSimilarity, BinaryCosineSimilarity,
+                                            MultiFinder, BM25Similarity,
+                                            NoTagFilter, AllTagFilter, AnyTagFilter,
+                                            SimpleRetriever, AdvancedRetriever
+using PromptingTools.Experimental.RAGTools: AbstractRephraser, AbstractTagFilter,
+                                            AbstractSimilarityFinder, AbstractReranker,
+                                            RankGPTReranker
+using PromptingTools.Experimental.RAGTools: find_closest, hamming_distance, find_tags,
+                                            rerank, rephrase,
+                                            retrieve, HasEmbeddings, MultiCandidateChunks,
+                                            CandidateChunks
+using PromptingTools.Experimental.RAGTools: NoReranker, CohereReranker
+using PromptingTools.Experimental.RAGTools: hamming_distance, BitPackedCosineSimilarity,
+                                            pack_bits, unpack_bits
+using PromptingTools.Experimental.RAGTools: bm25, document_term_matrix, DocumentTermMatrix
+
+@testset "rephrase" begin
+    # Test rephrase with NoRephraser, simple passthrough
+    @test rephrase(NoRephraser(), "test") == ["test"]
+
+    # Test rephrase with SimpleRephraser
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "new question"), :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3,
+            :prompt_tokens => 2,
+            :completion_tokens => 1))
+    schema = TestEchoOpenAISchema(; response, status = 200)
+    PT.register_model!(; name = "mock-gen", schema)
+    output = rephrase(
+        SimpleRephraser(), "old question", model = "mock-gen")
+    @test output == ["old question", "new question"]
+
+    output = rephrase(
+        HyDERephraser(), "old question", model = "mock-gen")
+    @test output == ["old question", "new question"]
+
+    # with unknown rephraser
+    struct UnknownRephraser123 <: AbstractRephraser end
+    @test_throws ArgumentError rephrase(UnknownRephraser123(), "test question")
+end
+
+@testset "hamming_distance" begin
+
+    ## ORIGINAL TESTS
+    # Test for matching number of rows
+    @test_throws ArgumentError hamming_distance(
+        [true false; false true], [true, false, true])
+
+    # Test for correct calculation of distances
+    @test hamming_distance([true false; false true], [true, false]) == [0, 2]
+    @test hamming_distance([true false; false true], [false, true]) == [2, 0]
+    @test hamming_distance([true false; false true], [true, true]) == [1, 1]
+    @test hamming_distance([true false; false true], [false, false]) == [1, 1]
+
+    ## NEW TESTS
+    # Test for Bool vectors
+    vec1 = Bool[1, 0, 1, 0, 1, 0, 1, 0]
+    vec2 = Bool[0, 1, 0, 1, 0, 1, 0, 1]
+    # Basic functionality
+    @test hamming_distance(vec1, vec2) == 8
+
+    # Edge cases
+    vec3 = Bool[1, 1, 1, 1, 1, 1, 1, 1]
+    vec4 = Bool[0, 0, 0, 0, 0, 0, 0, 0]
+    @test hamming_distance(vec3, vec4) == 8
+
+    vec5 = Bool[1, 1, 1, 1, 1, 1, 1, 1]
+    vec6 = Bool[1, 1, 1, 1, 1, 1, 1, 1]
+    @test hamming_distance(vec5, vec6) == 0
+
+    # Test for UInt64 (bitpacked) vectors
+    vec7 = pack_bits(repeat(vec1, 8))
+    vec8 = pack_bits(repeat(vec2, 8))
+    @test hamming_distance(vec7, vec8) == 64
+
+    vec9 = pack_bits(repeat(vec3, 8))
+    vec10 = pack_bits(repeat(vec4, 8))
+    @test hamming_distance(vec9, vec10) == 64
+
+    vec11 = pack_bits(repeat(vec5, 8))
+    vec12 = pack_bits(repeat(vec6, 8))
+    @test hamming_distance(vec11, vec12) == 0
+
+    # Test for Bool matrices
+    mat1 = [vec1 vec2]
+    mat2 = [vec3 vec4]
+    @test hamming_distance(mat1, vec2) == [8, 0]
+    @test hamming_distance(mat2, vec3) == [0, 8]
+
+    # Test for UInt64 (bitpacked) matrices
+    mat3 = pack_bits(repeat(mat1; outer = 8))
+    mat4 = pack_bits(repeat(mat2; outer = 8))
+    @test hamming_distance(mat3, vec8) == [64, 0]
+    @test hamming_distance(mat4, vec9) == [0, 64]
+
+    # Test for mismatched dimensions
+    vec13 = Bool[1, 0, 1]
+    @test_throws ArgumentError hamming_distance(mat1, vec13)
+
+    # Additional edge cases
+    # Empty vectors
+    vec_empty1 = Bool[]
+    vec_empty2 = Bool[]
+    @test hamming_distance(vec_empty1, vec_empty2) == 0
+
+    # Single element vectors
+    vec_single1 = Bool[1]
+    vec_single2 = Bool[0]
+    @test hamming_distance(vec_single1, vec_single2) == 1
+
+    # Large vectors
+    vec_large1 = Bool[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
+        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
+    vec_large2 = Bool[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+        0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
+    @test hamming_distance(vec_large1, vec_large2) == 32
+
+    # Large vectors with bitpacking
+    vec_large_packed1 = pack_bits(repeat(vec_large1, 2))
+    vec_large_packed2 = pack_bits(repeat(vec_large2, 2))
+    @test hamming_distance(vec_large_packed1, vec_large_packed2) == 64
+
+    ## Compare packed vs binary results
+    mat_rand1 = rand(Bool, 128, 10)
+    q_rand2 = rand(Bool, 128)
+    hamming_dist_binary = hamming_distance(mat_rand1, q_rand2)
+    hamming_dist_packed = hamming_distance(pack_bits(mat_rand1), pack_bits(q_rand2))
+    @test hamming_dist_binary == hamming_dist_packed
+end
+
+@testset "bm25" begin
+    # Simple case
+    documents = [["this", "is", "a", "test"],
+        ["this", "is", "another", "test"], ["foo", "bar", "baz"]]
+    dtm = document_term_matrix(documents)
+    query = ["this"]
+    scores = bm25(dtm, query)
+    idf = log(1 + (3 - 2 + 0.5) / (2 + 0.5))
+    tf = 1
+    expected = idf * (tf * (1.2 + 1)) /
+               (tf + 1.2 * (1 - 0.75 + 0.75 * 4 / 3.666666666666667))
+    @test scores[1] ≈ expected
+    @test scores[2] ≈ expected
+    @test scores[3] ≈ 0
+
+    # Two words, both existing
+    query = ["this", "test"]
+    scores = bm25(dtm, query)
+    @test scores[1] ≈ expected * 2
+    @test scores[2] ≈ expected * 2
+    @test scores[3] ≈ 0
+
+    # Multiwords with no hits
+    query = ["baz", "unknown", "words", "xyz"]
+    scores = bm25(dtm, query)
+    idf = log(1 + (3 - 1 + 0.5) / (1 + 0.5))
+    tf = 1
+    expected = idf * (tf * (1.2 + 1)) /
+               (tf + 1.2 * (1 - 0.75 + 0.75 * 3 / 3.666666666666667))
+    @test scores[1] ≈ 0
+    @test scores[2] ≈ 0
+    @test scores[3] ≈ expected
+
+    # Edge case: empty query
+    @test bm25(dtm, String[]) == zeros(Float32, size(dtm.tf, 1))
+
+    # Edge case: query with no matches
+    query = ["food", "bard"]
+    @test bm25(dtm, query) == zeros(Float32, size(dtm.tf, 1))
+
+    # Edge case: query with multiple matches and repeats
+    query = ["this", "is", "this", "this"]
+    scores = bm25(dtm, query)
+    idf = log(1 + (3 - 2 + 0.5) / (2 + 0.5))
+    tf = 1
+    expected = idf * (tf * (1.2 + 1)) /
+               (tf + 1.2 * (1 - 0.75 + 0.75 * 4 / 3.666666666666667))
+    @test scores[1] ≈ expected * 4
+    @test scores[2] ≈ expected * 4
+    @test scores[3] ≈ 0
+end
 
 @testset "find_closest" begin
+    finder = CosineSimilarity()
     test_embeddings = [1.0 2.0 -1.0; 3.0 4.0 -3.0; 5.0 6.0 -6.0] |>
                       x -> mapreduce(normalize, hcat, eachcol(x))
     query_embedding = [0.1, 0.35, 0.5] |> normalize
-    positions, distances = find_closest(test_embeddings, query_embedding, top_k = 2)
+    positions, distances = find_closest(finder, test_embeddings, query_embedding, top_k = 2)
     # The query vector should be closer to the first embedding
     @test positions == [1, 2]
     @test isapprox(distances, [0.9975694083904584
-            0.9939123761133188], atol = 1e-3)
+                               0.9939123761133188], atol = 1e-3)
 
     # Test when top_k is more than available embeddings
-    positions, _ = find_closest(test_embeddings, query_embedding, top_k = 5)
+    positions, _ = find_closest(finder, test_embeddings, query_embedding, top_k = 5)
     @test length(positions) == size(test_embeddings, 2)
 
     # Test with minimum_similarity
-    positions, _ = find_closest(test_embeddings, query_embedding, top_k = 5,
+    positions, _ = find_closest(finder, test_embeddings, query_embedding, top_k = 5,
         minimum_similarity = 0.995)
     @test length(positions) == 1
 
     # Test behavior with edge values (top_k == 0)
-    @test find_closest(test_embeddings, query_embedding, top_k = 0) == ([], [])
+    @test find_closest(finder, test_embeddings, query_embedding, top_k = 0) == ([], [])
 
     ## Test with ChunkIndex
     embeddings1 = ones(Float32, 2, 2)
@@ -35,30 +221,210 @@ using PromptingTools.Experimental.RAGTools: Passthrough, rerank, CohereRerank
         chunks = ["chunk1", "chunk2"],
         sources = ["source1", "source2"],
         embeddings = ones(Float32, 2, 2))
-    mi = MultiIndex(id = :multi, indexes = [ci1, ci2])
+    ci3 = ChunkIndex(id = :TestChunkIndex3,
+        chunks = ["chunk1", "chunk2"],
+        sources = ["source1", "source2"],
+        embeddings = nothing)
 
     ## find_closest with ChunkIndex
     query_emb = [0.5, 0.5] # Example query embedding vector
-    result = find_closest(ci1, query_emb)
+    result = find_closest(finder, ci1, query_emb)
     @test result isa CandidateChunks
     @test result.positions == [1, 2]
-    @test all(1.0 .>= result.distances .>= -1.0)   # Assuming default minimum_similarity
-
-    ## find_closest with MultiIndex
-    ## query_emb = [0.5, 0.5] # Example query embedding vector
-    ## result = find_closest(mi, query_emb)
-    ## @test result isa CandidateChunks
-    ## @test result.positions == [1, 2]
-    ## @test all(1.0 .>= result.distances .>= -1.0)   # Assuming default minimum_similarity
+    @test all(1.0 .>= result.scores .>= -1.0)   # Assuming default minimum_similarity
+
+    ## test with high minimum similarity
+    result_high = find_closest(finder, ci1, query_emb; minimum_similarity = 0.99)
+    @test isempty(result_high.positions)
+    @test isempty(result_high.scores)
+    @test result_high.index_id == :TestChunkIndex1
+
+    ## empty index
+    query_emb = [0.5, 0.5] # Example query embedding vector
+    result = find_closest(finder, ci3, query_emb)
+    @test isempty(result)
+
+    ## Unknown type
+    struct RandomSimilarityFinder123 <: AbstractSimilarityFinder end
+    @test_throws ArgumentError find_closest(
+        RandomSimilarityFinder123(), ones(5, 5), ones(5))
+
+    ## find_closest with multiple embeddings
+    query_emb = [0.5 0.5; 0.5 1.0] |> x -> mapreduce(normalize, hcat, eachcol(x))
+    result = find_closest(finder, ci1, query_emb; top_k = 2)
+    @test result.positions == [1, 2]
+    @test isapprox(result.scores, [1.0, 0.965], atol = 1e-2)
+
+    # bad top_k -- too low, leads to 0 results
+    result = find_closest(finder, ci1, query_emb; top_k = 1)
+    @test isempty(result)
+    # but it works in general, because 1/1 = 1 is a valid top_k
+    result = find_closest(finder, ci1, query_emb[:, 1]; top_k = 1)
+    @test result.positions == [1]
+    @test result.scores == [1.0]
+
+    ### For Binary embeddings
+    # Test for correct retrieval of closest positions and scores
+    emb = [true false; false true]
+    query_emb = [true, false]
+    positions, scores = find_closest(BinaryCosineSimilarity(), emb, query_emb)
+    @test positions == [1, 2]
+    @test scores ≈ [1, 0] #query_emb' * emb[:, positions]
+
+    query_emb = [0.5, -0.5]
+    positions, scores = find_closest(BinaryCosineSimilarity(), emb, query_emb)
+    @test positions == [1, 2]
+    @test scores ≈ [0.5, -0.5] #query_emb' * emb[:, positions]
+
+    # Test for custom top_k and minimum_similarity values
+    positions, scores = find_closest(
+        BinaryCosineSimilarity(), emb, query_emb; top_k = 1, minimum_similarity = 0.5)
+    @test positions == [1]
+    @test scores ≈ [0.5]
+
+    positions, scores = find_closest(
+        BinaryCosineSimilarity(), emb, query_emb; top_k = 1, minimum_similarity = 0.6)
+    @test isempty(positions)
+    @test isempty(scores)
+
+    ### Sense check for approximate methods
+
+    # Generate random embeddings as a sense check
+    Random.seed!(1234)  # For reproducibility
+    emb = mapreduce(normalize, hcat, eachcol(randn(128, 1000)))
+    query_emb = randn(128) |> normalize  # Normalize the query embedding
+
+    # Calculate positions and scores using normal CosineSimilarity
+    positions_cosine, scores_cosine = find_closest(
+        CosineSimilarity(), emb, query_emb; top_k = 10)
+
+    # Calculate positions and scores using BinaryCosineSimilarity
+    binary_emb = map(>(0), emb)
+    positions_binary, scores_binary = find_closest(
+        BinaryCosineSimilarity(), binary_emb, query_emb; top_k = 10)
+    @test length(intersect(positions_cosine, positions_binary)) >= 1
+
+    # Calculate positions and scores using BinaryCosineSimilarity
+    packed_emb = pack_bits(binary_emb)
+    positions_packed, scores_packed = find_closest(
+        BitPackedCosineSimilarity(), packed_emb, query_emb; top_k = 10)
+    @test length(intersect(positions_cosine, positions_packed)) >= 1
+end
+
+## find_closest with MultiIndex
+## mi = MultiIndex(id = :multi, indexes = [ci1, ci2])
+## query_emb = [0.5, 0.5] # Example query embedding vector
+## result = find_closest(mi, query_emb)
+## @test result isa CandidateChunks
+## @test result.positions == [1, 2]
+## @test all(1.0 .>= result.distances .>= -1.0)   # Assuming default minimum_similarity
+
+@testset "find_closest-MultiIndex" begin
+    # Create mock data for testing
+    emb1 = [0.1 0.2; 0.3 0.4; 0.5 0.6] |> x -> mapreduce(normalize, hcat, eachcol(x))
+    emb2 = [0.7 0.8; 0.9 1.0; 1.1 1.2] |> x -> mapreduce(normalize, hcat, eachcol(x))
+    query_emb = [0.1, 0.2, 0.3] |> normalize
+
+    # Create ChunkIndex instances
+    index1 = ChunkEmbeddingsIndex(id = :index1, chunks = ["chunk1", "chunk2"],
+        embeddings = emb1, sources = ["source1", "source2"])
+    index2 = ChunkEmbeddingsIndex(id = :index2, chunks = ["chunk3", "chunk4"],
+        embeddings = emb2, sources = ["source3", "source4"])
+
+    # Create MultiIndex instance
+    multi_index = MultiIndex(id = :multi, indexes = [index1, index2])
+
+    # Create MultiFinder instance
+    multi_finder = MultiFinder([CosineSimilarity(), CosineSimilarity()])
+    @test length(multi_finder) == 2
+
+    # Perform find_closest with MultiFinder
+    result = find_closest(multi_finder, multi_index, query_emb; top_k = 2)
+    @test result isa MultiCandidateChunks
+    @test result.index_ids == [:index1, :index2]
+    @test result.positions == [2, 1]
+    @test query_emb' * emb1[:, 2] ≈ result.scores[1]
+    @test query_emb' * emb2[:, 1] ≈ result.scores[2]
+    # Check that the positions and scores are sorted correctly
+    @test result.scores[1] >= result.scores[2]
+
+    ## Get all results
+    result = find_closest(multi_finder, multi_index, query_emb; top_k = 20)
+    @test length(result.index_ids) == 4
+    @test length(result.positions) == 4
+    @test length(result.scores) == 4
+
+    # Broadcast uni-finder without multi-finder
+    result = find_closest(CosineSimilarity(), multi_index, query_emb; top_k = 20)
+    @test length(result.index_ids) == 4
+    @test length(result.positions) == 4
+    @test length(result.scores) == 4
+
+    ## No embeddings
+    index1 = ChunkEmbeddingsIndex(id = :index1, chunks = ["chunk1", "chunk2"],
+        sources = ["source1", "source2"])
+    index2 = ChunkEmbeddingsIndex(id = :index2, chunks = ["chunk3", "chunk4"],
+        sources = ["source3", "source4"])
+    result = find_closest(MultiFinder([CosineSimilarity(), CosineSimilarity()]),
+        MultiIndex(id = :multi, indexes = [index1, index2]), query_emb; top_k = 20)
+    @test isempty(result.index_ids)
+    @test isempty(result.positions)
+    @test isempty(result.scores)
+
+    ### With mixed index types
+    # Create mock data for testing
+    emb1 = [0.1 0.2; 0.3 0.4; 0.5 0.6] |> x -> mapreduce(normalize, hcat, eachcol(x))
+    query_emb = [0.1, 0.2, 0.3] |> normalize
+    query_keywords = ["example", "query"]
+
+    # Create ChunkIndex instances
+    index1 = ChunkEmbeddingsIndex(id = :index1, chunks = ["chunk1", "chunk2"],
+        embeddings = emb1, sources = ["source1", "source2"])
+    index2 = ChunkKeywordsIndex(id = :index2, chunks = ["chunk3", "chunk4"],
+        chunkdata = document_term_matrix([["example", "query"], ["random", "words"]]),
+        sources = ["source3", "source4"])
+
+    # Create MultiIndex instance
+    multi_index = MultiIndex(id = :multi, indexes = [index1, index2])
+
+    # Create MultiFinder instance
+    multi_finder = MultiFinder([CosineSimilarity(), BM25Similarity()])
+
+    # Perform find_closest with MultiFinder
+    result = find_closest(multi_finder, multi_index, query_emb, query_keywords; top_k = 2)
+    @test result isa MultiCandidateChunks
+    @test result.index_ids == [:index2, :index1]
+    @test result.positions == [1, 2]
+    @test isapprox(result.scores, [1.387, 1.0], atol = 1e-1)
+    # Check that the positions and scores are sorted correctly
+    @test result.scores[1] >= result.scores[2]
+
+    result = find_closest(multi_finder, multi_index, query_emb, query_keywords; top_k = 20)
+    @test length(result.index_ids) == 4
+    @test length(result.positions) == 4
+    @test length(result.scores) == 4
+
+    @test HasEmbeddings(index1)
+    @test !HasEmbeddings(index2)
+    @test HasEmbeddings(multi_index)
+
+    ## Test with high minimum similarity
+    result = find_closest(multi_finder, multi_index, query_emb, query_keywords;
+        top_k = 20, minimum_similarity = 100.0)
+    @test isempty(result.index_ids)
+    @test isempty(result.positions)
+    @test isempty(result.scores)
 end
 
 @testset "find_tags" begin
+    tagger = AnyTagFilter()
     test_embeddings = [1.0 2.0; 3.0 4.0; 5.0 6.0] |>
                       x -> mapreduce(normalize, hcat, eachcol(x))
     query_embedding = [0.1, 0.35, 0.5] |> normalize
     test_tags_vocab = ["julia", "python", "jr"]
     test_tags_matrix = sparse([1, 2], [1, 3], [true, true], 2, 3)
     index = ChunkIndex(;
+        id = :indexX,
         sources = [".", "."],
         chunks = ["julia", "jr"],
         embeddings = test_embeddings,
@@ -66,56 +432,315 @@ end
         tags_vocab = test_tags_vocab)
 
     # Test for finding the correct positions of a specific tag
-    @test find_tags(index, "julia").positions == [1]
-    @test find_tags(index, "julia").distances == [1.0]
+    @test find_tags(tagger, index, "julia").positions == [1]
+    @test find_tags(tagger, index, "julia").scores == [1.0]
 
     # Test for no tag found // not in vocab
-    @test find_tags(index, "python").positions |> isempty
-    @test find_tags(index, "java").positions |> isempty
+    @test find_tags(tagger, index, "python").positions |> isempty
+    @test find_tags(tagger, index, "java").positions |> isempty
 
     # Test with regex matching
-    @test find_tags(index, r"^j").positions == [1, 2]
+    @test find_tags(tagger, index, r"^j").positions == [1, 2]
 
     # Test with multiple tags in vocab
-    @test find_tags(index, ["python", "jr", "x"]).positions == [2]
+    @test find_tags(tagger, index, ["python", "jr", "x"]).positions == [2]
+
+    ## With AllTagFilter -- no difference for individual
+    tagger2 = AllTagFilter()
+    @test find_tags(tagger2, index, "julia").positions == [1]
+    @test find_tags(tagger2, index, "julia").scores == [1.0]
+    @test find_tags(tagger2, index, "python").positions |> isempty
+    @test find_tags(tagger2, index, "java").positions |> isempty
+    @test find_tags(tagger2, index, r"^j").positions |> isempty
+    @test find_tags(tagger2, index, "jr").positions == [2]
+
+    @test find_tags(tagger2, index, ["python", "jr", "x"]).positions |> isempty
+    @test find_tags(tagger2, index, ["julia", "jr"]).positions |> isempty
+    @test find_tags(tagger2, index, ["julia", "julia"]).positions == [1]
+    @test find_tags(tagger2, index, ["julia", "julia"]).scores == [1.0]
+
+    # No filter tag -- give everything
+    cc = find_tags(NoTagFilter(), index, "julia")
+    @test isnothing(cc)
+    # @test cc.positions == [1, 2]
+    # @test cc.scores == [0.0, 0.0]
+
+    cc = find_tags(NoTagFilter(), index, nothing)
+    @test isnothing(cc)
+    # @test cc.positions == [1, 2]
+    # @test cc.scores == [0.0, 0.0]
+
+    # Unknown type
+    struct RandomTagFilter123 <: AbstractTagFilter end
+    @test_throws ArgumentError find_tags(RandomTagFilter123(), index, "hello")
+    @test_throws ArgumentError find_tags(RandomTagFilter123(), index, ["hello"])
+
+    ## Multi-index implementation
+    emb1 = [0.1 0.2; 0.3 0.4; 0.5 0.6] |> x -> mapreduce(normalize, hcat, eachcol(x))
+    index1 = ChunkEmbeddingsIndex(id = :index1, chunks = ["chunk1", "chunk2"],
+        embeddings = emb1, sources = ["source1", "source2"])
+    index2 = ChunkKeywordsIndex(id = :index2, chunks = ["chunk3", "chunk4"],
+        chunkdata = document_term_matrix([["example", "query"], ["random", "words"]]),
+        sources = ["source3", "source4"])
+
+    # Create MultiIndex instance
+    multi_index = MultiIndex(id = :multi, indexes = [index1, index2])
+
+    mcc = find_tags(NoTagFilter(), multi_index, "julia")
+    @test mcc == nothing
+    # @test mcc.positions == [1, 2, 3, 4]
+    # @test mcc.scores == [0.0, 0.0, 0.0, 0.0]
+
+    mcc = find_tags(NoTagFilter(), multi_index, nothing)
+    @test mcc == nothing
+    # @test mcc.positions == [1, 2, 3, 4]
+    # @test mcc.scores == [0.0, 0.0, 0.0, 0.0]
+
+    multi_index2 = MultiIndex(id = :multi2, indexes = [index, index2])
+    mcc2 = find_tags(AnyTagFilter(), multi_index2, "julia")
+    @test mcc2.index_ids == [:indexX]
+    @test mcc2.positions == [1]
+    @test mcc2.scores == [1.0]
+
+    mcc3 = find_tags(AnyTagFilter(), multi_index2, ["julia", "python", "jr"])
+    @test mcc3.index_ids == [:indexX, :indexX]
+    @test mcc3.positions == [1, 2]
+    @test mcc3.scores == [1.0, 1.0]
+
+    mcc4 = find_tags(AnyTagFilter(), multi_index2, [r"^j"])
+    @test mcc4.index_ids == [:indexX, :indexX]
+    @test mcc4.positions == [1, 2]
+    @test mcc4.scores == [1.0, 1.0]
+
+    mcc5 = find_tags(AllTagFilter(), multi_index2, [r"^j"])
+    @test mcc5.index_ids |> isempty
+    @test mcc5.positions |> isempty
+    @test mcc5.scores |> isempty
 end
 
 @testset "rerank" begin
     # Mock data for testing
-    index = "mock_index"
+    ci1 = ChunkIndex(id = :TestChunkIndex1,
+        chunks = ["chunk1", "chunk2"],
+        sources = ["source1", "source2"])
     question = "mock_question"
-    candidate_chunks = ["chunk1", "chunk2", "chunk3"]
+    cc1 = CandidateChunks(index_id = :TestChunkIndex1,
+        positions = [1, 2],
+        scores = [0.3, 0.4])
 
     # Passthrough Strategy
-    strategy = Passthrough()
-    @test rerank(strategy, index, question, candidate_chunks) ==
-          candidate_chunks
+    ranker = NoReranker()
+    reranked = rerank(ranker, ci1, question, cc1)
+    @test reranked.positions == [2, 1] # gets resorted by score
+    @test reranked.scores == [0.4, 0.3]
+
+    reranked = rerank(ranker, ci1, question, cc1; top_n = 1)
+    @test reranked.positions == [2] # gets resorted by score
+    @test reranked.scores == [0.4]
 
-    # Cohere assertion
-    ci1 = ChunkIndex(id = :TestChunkIndex1,
-        chunks = ["chunk1", "chunk2"],
-        sources = ["source1", "source2"])
     ci2 = ChunkIndex(id = :TestChunkIndex2,
         chunks = ["chunk1", "chunk2"],
         sources = ["source1", "source2"])
     mi = MultiIndex(; id = :multi, indexes = [ci1, ci2])
-    @test_throws ArgumentError rerank(CohereRerank(),
+    reranked = rerank(NoReranker(),
         mi,
         question,
-        candidate_chunks)
+        cc1)
+    @test reranked.positions == [2, 1] # gets resorted by score
+    @test reranked.scores == [0.4, 0.3]
+
+    # Cohere assertion
+    ## @test reranked isa MultiCandidateChunks
 
     # Bad top_n
-    @test_throws AssertionError rerank(CohereRerank(),
+    @test_throws AssertionError rerank(CohereReranker(),
         ci1,
         question,
-        candidate_chunks; top_n = 0)
+        cc1; top_n = 0)
 
     # Bad index_id
     cc2 = CandidateChunks(index_id = :TestChunkIndex2,
         positions = [1, 2],
-        distances = [0.3, 0.4])
-    @test_throws AssertionError rerank(CohereRerank(),
+        scores = [0.3, 0.4])
+    @test_throws AssertionError rerank(CohereReranker(),
         ci1,
         question,
         cc2; top_n = 1)
+
+    ## Unknown type
+    struct RandomReranker123 <: AbstractReranker end
+    @test_throws ArgumentError rerank(RandomReranker123(), ci1, "hello", cc2)
+
+    ## TODO: add testing of Cohere reranker API call -- not done yet
+end
+
+@testset "retrieve" begin
+    # test with a mock server
+    PORT = rand(20000:40001)
+    PT.register_model!(; name = "mock-emb", schema = PT.CustomOpenAISchema())
+    PT.register_model!(; name = "mock-emb2", schema = PT.CustomOpenAISchema())
+    PT.register_model!(; name = "mock-meta", schema = PT.CustomOpenAISchema())
+    PT.register_model!(; name = "mock-gen", schema = PT.CustomOpenAISchema())
+
+    echo_server = HTTP.serve!(PORT; verbose = -1) do req
+        content = JSON3.read(req.body)
+
+        if content[:model] == "mock-gen"
+            user_msg = last(content[:messages])
+            response = Dict(
+                :choices => [
+                    Dict(:message => user_msg, :finish_reason => "stop")
+                ],
+                :model => content[:model],
+                :usage => Dict(:total_tokens => length(user_msg[:content]),
+                    :prompt_tokens => length(user_msg[:content]),
+                    :completion_tokens => 0))
+        elseif content[:model] == "mock-emb"
+            response = Dict(:data => [Dict(:embedding => ones(Float32, 10))],
+                :usage => Dict(:total_tokens => length(content[:input]),
+                    :prompt_tokens => length(content[:input]),
+                    :completion_tokens => 0))
+        elseif content[:model] == "mock-emb2"
+            response = Dict(
+                :data => [Dict(:embedding => ones(Float32, 10)),
+                    Dict(:embedding => ones(Float32, 10))],
+                :usage => Dict(:total_tokens => length(content[:input]),
+                    :prompt_tokens => length(content[:input]),
+                    :completion_tokens => 0))
+        elseif content[:model] == "mock-meta"
+            user_msg = last(content[:messages])
+            response = Dict(
+                :choices => [
+                    Dict(:finish_reason => "stop",
+                    :message => Dict(
+                        :tool_calls => [
+                            Dict(:id => "1",
+                            :function => Dict(:arguments => JSON3.write(MaybeTags([
+                                Tag("yes", "category")
+                            ]))))],
+                        :name => "MaybeTags"))],
+                :model => content[:model],
+                :usage => Dict(:total_tokens => length(user_msg[:content]),
+                    :prompt_tokens => length(user_msg[:content]),
+                    :completion_tokens => 0))
+        else
+            @info content
+        end
+        return HTTP.Response(200, JSON3.write(response))
+    end
+
+    embeddings1 = ones(Float32, 10, 4)
+    embeddings1[10, 3:4] .= 5.0
+    embeddings1 = mapreduce(normalize, hcat, eachcol(embeddings1))
+    index = ChunkIndex(id = :TestChunkIndex1,
+        chunks = ["chunk1", "chunk2", "chunk3", "chunk4"],
+        sources = ["source1", "source2", "source3", "source4"],
+        embeddings = embeddings1)
+    question = "test question"
+
+    ## Test with SimpleRetriever
+    simple = SimpleRetriever()
+
+    result = retrieve(simple, index, question;
+        rephraser_kwargs = (; model = "mock-gen"),
+        embedder_kwargs = (; model = "mock-emb"),
+        tagger_kwargs = (; model = "mock-meta"), api_kwargs = (;
+            url = "http://localhost:$(PORT)"))
+    @test result.question == question
+    @test result.rephrased_questions == [question]
+    @test result.answer == nothing
+    @test result.final_answer == nothing
+    ## there are two equivalent orderings
+    @test Set(result.reranked_candidates.positions[1:2]) == Set([2, 1])
+    @test Set(result.reranked_candidates.positions[3:4]) == Set([3, 4])
+    @test result.reranked_candidates.scores[1:2] == ones(2)
+    @test length(result.context) == 4
+    @test length(unique(result.context)) == 4
+    @test result.context[1] in ["chunk2", "chunk1"]
+    @test result.context[2] in ["chunk2", "chunk1"]
+    @test result.context[3] in ["chunk3", "chunk4"]
+    @test result.context[4] in ["chunk3", "chunk4"]
+    @test result.sources isa Vector{String}
+
+    # Reduce number of candidates
+    result = retrieve(simple, index, question;
+        top_n = 2, top_k = 3,
+        rephraser_kwargs = (; model = "mock-gen"),
+        embedder_kwargs = (; model = "mock-emb"),
+        tagger_kwargs = (; model = "mock-meta"), api_kwargs = (;
+            url = "http://localhost:$(PORT)"))
+    ## the last item is 3 or 4
+    @test result.emb_candidates.positions[3] in [3, 4]
+    @test Set(result.reranked_candidates.positions[1:2]) == Set([2, 1])
+    @test result.emb_candidates.scores[1:2] == ones(2)
+
+    # with default dispatch
+    result = retrieve(index, question;
+        top_n = 2, top_k = 3,
+        rephraser_kwargs = (; model = "mock-gen"),
+        embedder_kwargs = (; model = "mock-emb"),
+        tagger_kwargs = (; model = "mock-meta"), api_kwargs = (;
+            url = "http://localhost:$(PORT)"))
+    @test result.emb_candidates.positions[3] in [3, 4]
+    @test result.emb_candidates.scores[1:2] == ones(2)
+    @test Set(result.reranked_candidates.positions[1:2]) == Set([2, 1])
+
+    ## AdvancedRetriever
+    adv = AdvancedRetriever()
+    result = retrieve(adv, index, question;
+        reranker = NoReranker(), # we need to disable cohere as we cannot test it
+        rephraser_kwargs = (; model = "mock-gen"),
+        embedder_kwargs = (; model = "mock-emb2"),
+        tagger_kwargs = (; model = "mock-meta"), api_kwargs = (;
+            url = "http://localhost:$(PORT)"))
+    @test result.question == question
+    @test result.rephrased_questions == [question, "Query: test question\n\nPassage:"] # from the template we use
+    @test result.answer == nothing
+    @test result.final_answer == nothing
+    ## there are two equivalent orderings
+    @test Set(result.reranked_candidates.positions[1:2]) == Set([2, 1])
+    @test Set(result.reranked_candidates.positions[3:4]) == Set([3, 4])
+    @test result.reranked_candidates.scores[1:2] == ones(2)
+    @test length(result.context) == 4
+    @test length(unique(result.context)) == 4
+    @test result.context[1] in ["chunk2", "chunk1"]
+    @test result.context[2] in ["chunk2", "chunk1"]
+    @test result.context[3] in ["chunk3", "chunk4"]
+    @test result.context[4] in ["chunk3", "chunk4"]
+    @test result.sources isa Vector{String}
+
+    # Multi-index retriever
+    index_keywords = ChunkKeywordsIndex(index, index_id = :TestChunkIndexX)
+    index_keywords = ChunkIndex(; id = :AA, index.chunks, index.sources, index.embeddings)
+    # Create MultiIndex instance
+    multi_index = MultiIndex(id = :multi, indexes = [index, index_keywords])
+
+    # Create MultiFinder instance
+    finder = MultiFinder([RT.CosineSimilarity(), RT.BM25Similarity()])
+
+    retriever = SimpleRetriever(; processor = RT.KeywordsProcessor(), finder)
+    result = retrieve(SimpleRetriever(), multi_index, question;
+        reranker = NoReranker(), # we need to disable cohere as we cannot test it
+        rephraser_kwargs = (; model = "mock-gen"),
+        embedder_kwargs = (; model = "mock-emb"),
+        tagger_kwargs = (; model = "mock-meta"), api_kwargs = (;
+            url = "http://localhost:$(PORT)"))
+    @test result.question == question
+    @test result.rephrased_questions == [question]
+    @test result.answer == nothing
+    @test result.final_answer == nothing
+    ## there are two equivalent orderings
+    @test Set(result.reranked_candidates.positions[1:4]) == Set([2, 1])
+    @test result.reranked_candidates.positions[5] in [3, 4]
+    @test result.reranked_candidates.scores[1:4] == ones(4)
+    @test length(result.context) == 5 # because the second index duplicates, so we have more
+    @test length(unique(result.context)) == 3 # only 3 unique chunks because 1,2,1,2,3
+    @test all([result.context[i] in ["chunk2", "chunk1"] for i in 1:4])
+    @test result.context[5] in ["chunk3", "chunk4"]
+    @test length(unique(result.sources)) == 3
+    @test all([result.sources[i] in ["source2", "source1"] for i in 1:4])
+    @test result.sources[5] in ["source3", "source4"]
+
+    # clean up
+    close(echo_server)
 end
diff --git a/test/Experimental/RAGTools/runtests.jl b/test/Experimental/RAGTools/runtests.jl
index 605ce5df5..7c9d2439a 100644
--- a/test/Experimental/RAGTools/runtests.jl
+++ b/test/Experimental/RAGTools/runtests.jl
@@ -1,13 +1,20 @@
 using Test
-using SparseArrays, LinearAlgebra
+using SparseArrays, LinearAlgebra, Unicode, Random
 using PromptingTools.Experimental.RAGTools
+using PromptingTools
+using PromptingTools.AbstractTrees
+const PT = PromptingTools
+const RT = PromptingTools.Experimental.RAGTools
+using Snowball
 using JSON3, HTTP
 
 @testset "RAGTools" begin
     include("utils.jl")
     include("types.jl")
     include("preparation.jl")
+    include("rank_gpt.jl")
     include("retrieval.jl")
     include("generation.jl")
+    include("annotation.jl")
     include("evaluation.jl")
 end
diff --git a/test/Experimental/RAGTools/types.jl b/test/Experimental/RAGTools/types.jl
index 538a0669e..afefe2a90 100644
--- a/test/Experimental/RAGTools/types.jl
+++ b/test/Experimental/RAGTools/types.jl
@@ -1,14 +1,29 @@
-using PromptingTools.Experimental.RAGTools: ChunkIndex, MultiIndex, CandidateChunks
-using PromptingTools.Experimental.RAGTools: embeddings, chunks, tags, tags_vocab, sources
+using PromptingTools.Experimental.RAGTools: ChunkEmbeddingsIndex, ChunkKeywordsIndex,
+                                            MultiIndex,
+                                            CandidateChunks,
+                                            MultiCandidateChunks,
+                                            AbstractCandidateChunks, DocumentTermMatrix,
+                                            SubDocumentTermMatrix,
+                                            document_term_matrix, HasEmbeddings,
+                                            HasKeywords,
+                                            ChunkKeywordsIndex, AbstractChunkIndex,
+                                            AbstractDocumentIndex
+using PromptingTools.Experimental.RAGTools: embeddings, chunks, tags, tags_vocab, sources,
+                                            extras, positions, scores, parent,
+                                            RAGResult, chunkdata, preprocess_tokens, tf,
+                                            vocab, vocab_lookup, idf, doc_rel_length
+using PromptingTools.Experimental.RAGTools: SubChunkIndex, indexid, indexids,
+                                            translate_positions_to_parent
+using PromptingTools: last_message, last_output
 
-@testset "ChunkIndex" begin
+@testset "ChunkEmbeddingsIndex" begin
     # Test constructors and basic accessors
     chunks_test = ["chunk1", "chunk2"]
     emb_test = ones(2, 2)
     tags_test = sparse([1, 2], [1, 2], [true, true], 2, 2)
     tags_vocab_test = ["vocab1", "vocab2"]
     sources_test = ["source1", "source2"]
-    ci = ChunkIndex(chunks = chunks_test,
+    ci = ChunkEmbeddingsIndex(chunks = chunks_test,
         embeddings = emb_test,
         tags = tags_test,
         tags_vocab = tags_vocab_test,
@@ -16,26 +31,34 @@ using PromptingTools.Experimental.RAGTools: embeddings, chunks, tags, tags_vocab
 
     @test chunks(ci) == chunks_test
     @test (embeddings(ci)) == emb_test
+    @test (chunkdata(ci)) == emb_test
+    @test chunkdata(ci, [1]) == view(emb_test, :, [1])
     @test tags(ci) == tags_test
     @test tags_vocab(ci) == tags_vocab_test
     @test sources(ci) == sources_test
+    @test length(ci) == 2
+    @test translate_positions_to_parent(ci, [2, 1]) == [2, 1]
+    @test translate_positions_to_parent(ci, [4, 6]) == [4, 6]
 
     # Test identity/equality
-    ci1 = ChunkIndex(chunks = ["chunk1", "chunk2"], sources = ["source1", "source2"])
-    ci2 = ChunkIndex(chunks = ["chunk1", "chunk2"], sources = ["source1", "source2"])
+    ci1 = ChunkEmbeddingsIndex(
+        chunks = ["chunk1", "chunk2"], sources = ["source1", "source2"])
+    ci2 = ChunkEmbeddingsIndex(
+        chunks = ["chunk1", "chunk2"], sources = ["source1", "source2"])
     @test ci1 == ci2
 
     # Test equality with different chunks and sources
-    ci2 = ChunkIndex(chunks = ["chunk3", "chunk4"], sources = ["source3", "source4"])
+    ci2 = ChunkEmbeddingsIndex(
+        chunks = ["chunk3", "chunk4"], sources = ["source3", "source4"])
     @test ci1 != ci2
 
-    # Test hcat with ChunkIndex
-    # Setup two different ChunkIndex with different tags and then hcat them
+    # Test hcat with ChunkEmbeddingsIndex
+    # Setup two different ChunkEmbeddingsIndex with different tags and then hcat them
     chunks1 = ["chunk1", "chunk2"]
     tags1 = sparse([1, 2], [1, 2], [true, true], 2, 3)
     tags_vocab1 = ["vocab1", "vocab2", "vocab3"]
     sources1 = ["source1", "source1"]
-    ci1 = ChunkIndex(chunks = chunks1,
+    ci1 = ChunkEmbeddingsIndex(chunks = chunks1,
         tags = tags1,
         tags_vocab = tags_vocab1,
         sources = sources1)
@@ -44,7 +67,7 @@ using PromptingTools.Experimental.RAGTools: embeddings, chunks, tags, tags_vocab
     tags2 = sparse([1, 2], [1, 3], [true, true], 2, 3)
     tags_vocab2 = ["vocab1", "vocab3", "vocab4"]
     sources2 = ["source2", "source2"]
-    ci2 = ChunkIndex(chunks = chunks2,
+    ci2 = ChunkEmbeddingsIndex(chunks = chunks2,
         tags = tags2,
         tags_vocab = tags_vocab2,
         sources = sources2)
@@ -55,24 +78,270 @@ using PromptingTools.Experimental.RAGTools: embeddings, chunks, tags, tags_vocab
     @test length(unique(vcat(tags_vocab(ci1), tags_vocab(ci2)))) ==
           length(tags_vocab(combined_ci))
     @test sources(combined_ci) == vcat(sources(ci1), (sources(ci2)))
+    @test length(combined_ci) == 4
+    @test chunkdata(combined_ci) == nothing
+    @test chunkdata(combined_ci, [1]) == nothing
 
-    # Test base var"==" with ChunkIndex
-    ci1 = ChunkIndex(chunks = ["chunk1"],
+    # Test base var"==" with ChunkEmbeddingsIndex
+    ci1 = ChunkEmbeddingsIndex(chunks = ["chunk1"],
+        id = :ci1,
         tags = trues(3, 1),
         tags_vocab = ["vocab1"],
         sources = ["source1"])
-    ci2 = ChunkIndex(chunks = ["chunk1"],
+    ci2 = ChunkEmbeddingsIndex(chunks = ["chunk1"],
         tags = trues(3, 1),
         tags_vocab = ["vocab1"],
         sources = ["source1"])
     @test ci1 == ci2
+
+    # HasEmbeddings
+    @test HasEmbeddings(ci1) == true
+    @test HasKeywords(ci1) == false
+
+    # Getindex
+    @test ci1[:ci1] == ci1
+    @test ci1[:ci2] == nothing
+
+    ## Test general accessors
+    @kwdef struct TestBadMultiIndex <: AbstractDocumentIndex
+        indices::Vector{AbstractChunkIndex} = [ChunkEmbeddingsIndex(
+            chunks = ["chunk1"], sources = ["source1"])]
+    end
+    bad_idx = TestBadMultiIndex()
+    @test_throws ArgumentError chunkdata(bad_idx)
+    @test_throws ArgumentError embeddings(bad_idx)
+    @test_throws ArgumentError tags(bad_idx)
+    @test_throws ArgumentError tags_vocab(bad_idx)
+    @test_throws ArgumentError extras(bad_idx)
+
+    @kwdef struct TestBadChunkIndex <: AbstractChunkIndex
+        chunks::Vector{String}
+        sources::Vector{String}
+    end
+    bad_chunk_idx = TestBadChunkIndex(chunks = ["chunk1"], sources = ["source1"])
+    @test_throws ArgumentError embeddings(bad_chunk_idx)
+end
+
+@testset "ChunkKeywordsIndex" begin
+    # Test creation of ChunkKeywordsIndex
+    chunks_ = ["chunk1", "chunk2"]
+    sources_ = ["source1", "source2"]
+    ci = ChunkKeywordsIndex(chunks = chunks_, sources = sources_)
+    @test chunks(ci) == chunks_
+    @test sources(ci) == sources_
+    @test chunkdata(ci) == nothing
+    @test tags(ci) == nothing
+    @test tags_vocab(ci) == nothing
+    @test extras(ci) == nothing
+    @test translate_positions_to_parent(ci, [1]) == [1]
+    @test translate_positions_to_parent(ci, [2, 1]) == [2, 1]
+    @test translate_positions_to_parent(ci, [4, 6]) == [4, 6]
+    @test translate_positions_to_parent(ci, Int[]) == Int[]
+    @test chunkdata(ci) == nothing
+    @test chunkdata(ci, [1]) == nothing
+
+    # Test equality of ChunkKeywordsIndex
+    chunks_ = ["this is a test", "this is another test", "foo bar baz"]
+    sources_ = ["source1", "source2", "source3"]
+    dtm = document_term_matrix(chunks_)
+    ci1 = ChunkKeywordsIndex(chunks = chunks_, sources = sources_, chunkdata = dtm)
+    ci2 = ChunkKeywordsIndex(chunks = chunks_, sources = sources_, chunkdata = dtm)
+    @test ci1 == ci2
+    @test chunkdata(ci1) == dtm
+    @test chunkdata(ci1, [1]) == view(dtm, [1], :)
+
+    ci3 = ChunkKeywordsIndex(chunks = ["chunk2"], sources = ["source2"])
+    @test ci1 != ci3
+
+    # Test hcat with ChunkKeywordsIndex
+    chunks1 = ["chunk1", "chunk2"]
+    sources1 = ["source1", "source1"]
+    ci1 = ChunkKeywordsIndex(
+        chunks = chunks1, sources = sources1, chunkdata = document_term_matrix(chunks1))
+
+    chunks2 = ["chunk3", "chunk4"]
+    sources2 = ["source2", "source2"]
+    ci2 = ChunkKeywordsIndex(
+        chunks = chunks2, sources = sources2, chunkdata = document_term_matrix(chunks2))
+
+    combined_ci = vcat(ci1, ci2)
+    @test length(combined_ci.chunks) == 4
+    @test length(combined_ci.sources) == 4
+    @test combined_ci.chunks == ["chunk1", "chunk2", "chunk3", "chunk4"]
+    @test combined_ci.sources == ["source1", "source1", "source2", "source2"]
+
+    # HasEmbeddings
+    @test HasEmbeddings(ci1) == false
+    @test HasKeywords(ci1) == true
+    @test_throws ArgumentError embeddings(ci1)
+end
+
+@testset "DocumentTermMatrix" begin
+    # Simple case
+    documents = [["this", "is", "a", "test"],
+        ["this", "is", "another", "test"], ["foo", "bar", "baz"]]
+    dtm = document_term_matrix(documents)
+    @test size(dtm.tf) == (3, 8)
+    @test Set(dtm.vocab) == Set(["a", "another", "bar", "baz", "foo", "is", "test", "this"])
+    avgdl = 3.666666666666667
+    @test all(dtm.doc_rel_length .≈ [4 / avgdl, 4 / avgdl, 3 / avgdl])
+    @test length(dtm.idf) == 8
+
+    # Edge case: single document
+    documents = [["this", "is", "a", "test"]]
+    dtm = document_term_matrix(documents)
+    @test size(dtm.tf) == (1, 4)
+    @test Set(dtm.vocab) == Set(["a", "is", "test", "this"])
+    @test dtm.doc_rel_length == ones(1)
+    @test length(dtm.idf) == 4
+
+    # Edge case: duplicate tokens
+    documents = [["this", "is", "this", "test"],
+        ["this", "is", "another", "test"], ["this", "bar", "baz"]]
+    dtm = document_term_matrix(documents)
+    @test size(dtm.tf) == (3, 6)
+    @test Set(dtm.vocab) == Set(["another", "bar", "baz", "is", "test", "this"])
+    avgdl = 3.666666666666667
+    @test all(dtm.doc_rel_length .≈ [4 / avgdl, 4 / avgdl, 3 / avgdl])
+    @test length(dtm.idf) == 6
+
+    # Edge case: no tokens
+    documents = [String[], String[], String[]]
+    dtm = document_term_matrix(documents)
+    @test size(dtm.tf) == (3, 0)
+    @test isempty(dtm.vocab)
+    @test isempty(dtm.vocab_lookup)
+    @test isempty(dtm.idf)
+    @test dtm.doc_rel_length == zeros(3)
+
+    ## Methods - hcat
+    documents = [["this", "is", "a", "test"],
+        ["this", "is", "another", "test"], ["foo", "bar", "baz"]]
+    dtm1 = document_term_matrix(documents)
+    documents = [["this", "is", "a", "test"],
+        ["this", "is", "another", "test"], ["foo", "bar", "baz"]]
+    dtm2 = document_term_matrix(documents)
+    dtm = hcat(dtm1, dtm2)
+    @test size(dtm.tf) == (6, 8)
+    @test length(dtm.vocab) == 8
+    @test length(dtm.idf) == 8
+    @test isapprox(dtm.doc_rel_length,
+        [4 / 3.666666666666667, 4 / 3.666666666666667, 3 / 3.666666666666667,
+            4 / 3.666666666666667, 4 / 3.666666666666667, 3 / 3.666666666666667])
+
+    # Check stubs that they throw
+    @test_throws ArgumentError RT._stem(nothing, "abc")
+    @test_throws ArgumentError RT._unicode_normalize(nothing)
+
+    ## SubDocumentTermMatrix
+    # Create a parent DocumentTermMatrix
+    documents = [["this", "is", "a", "test"], ["another", "test", "document"]]
+    dtm = document_term_matrix(documents)
+
+    # Create a SubDocumentTermMatrix
+    sub_dtm = view(dtm, [1], :)
+
+    # Test parent method
+    @test parent(sub_dtm) == dtm
+
+    # Test positions method
+    @test positions(sub_dtm) == [1]
+
+    # Test tf method
+    @test tf(sub_dtm) == dtm.tf[1:1, :]
+
+    # Test vocab method
+    @test vocab(sub_dtm) == vocab(dtm)
+
+    # Test vocab_lookup method
+    @test vocab_lookup(sub_dtm) == vocab_lookup(dtm)
+
+    # Test idf method
+    @test idf(sub_dtm) == idf(dtm)
+
+    # Test doc_rel_length method
+    @test doc_rel_length(sub_dtm) == doc_rel_length(dtm)[1:1]
+
+    # Test view method for SubDocumentTermMatrix
+    sub_dtm_view = view(sub_dtm, [1], :)
+    @test parent(sub_dtm_view) == dtm
+    @test positions(sub_dtm_view) == [1]
+    @test tf(sub_dtm_view) == dtm.tf[1:1, :]
+
+    # Nested view // no intersection
+    sub_sub_dtm_view = view(sub_dtm_view, [2], :)
+    @test parent(sub_sub_dtm_view) == dtm
+    @test isempty(positions(sub_sub_dtm_view))
+    @test tf(sub_sub_dtm_view) |> isempty
+
+    # Test view method with out of bounds positions
+    @test_throws BoundsError view(sub_dtm, [10], :)
+
+    # Test view method with intersecting positions
+    sub_dtm_intersect = view(dtm, [1, 2], :)
+    sub_dtm_view_intersect = view(sub_dtm_intersect, [2], :)
+    @test parent(sub_dtm_view_intersect) == dtm
+    @test positions(sub_dtm_view_intersect) == [2]
+    @test tf(sub_dtm_view_intersect) == dtm.tf[2:2, :]
+
+    ### Test hcat for DocumentTermMatrix
+    # Create two DocumentTermMatrix instances
+    documents1 = [["this", "is", "a", "test"], ["another", "test", "document"]]
+    dtm1 = document_term_matrix(documents1)
+
+    documents2 = [["new", "document"], ["with", "different", "words"]]
+    dtm2 = document_term_matrix(documents2)
+
+    # Perform hcat
+    combined_dtm = hcat(dtm1, dtm2)
+
+    # Test the resulting DocumentTermMatrix
+    @test size(combined_dtm.tf, 1) == size(dtm1.tf, 1) + size(dtm2.tf, 1)
+    @test length(combined_dtm.vocab) == length(unique(vcat(dtm1.vocab, dtm2.vocab)))
+    @test all(word in combined_dtm.vocab for word in dtm1.vocab)
+    @test all(word in combined_dtm.vocab for word in dtm2.vocab)
+
+    # Check if the tf matrix is correctly combined
+    @test size(combined_dtm.tf, 2) == length(combined_dtm.vocab)
+    @test sum(combined_dtm.tf) ≈ sum(dtm1.tf) + sum(dtm2.tf)
+
+    # Test vocab_lookup
+    @test all(haskey(combined_dtm.vocab_lookup, word) for word in combined_dtm.vocab)
+
+    # Test idf
+    @test length(combined_dtm.idf) == length(combined_dtm.vocab)
+
+    # Test doc_rel_length
+    @test length(combined_dtm.doc_rel_length) == size(combined_dtm.tf, 1)
+
+    # Test with empty DocumentTermMatrix
+    empty_dtm = document_term_matrix(Vector{Vector{String}}())
+    combined_with_empty = hcat(dtm1, empty_dtm)
+    @test combined_with_empty == dtm1
+
+    # Test associativity
+    dtm3 = document_term_matrix([["third", "set", "of", "documents"]])
+    @test hcat(hcat(dtm1, dtm2), dtm3) == hcat(dtm1, hcat(dtm2, dtm3))
+
+    # Test with dense matrix
+    ddtm1 = DocumentTermMatrix(
+        Matrix(tf(dtm1)), vocab(dtm1), vocab_lookup(dtm1), idf(dtm1), doc_rel_length(dtm1))
+    ddtm2 = DocumentTermMatrix(
+        Matrix(tf(dtm2)), vocab(dtm2), vocab_lookup(dtm2), idf(dtm2), doc_rel_length(dtm2))
+    combined_ddtm = hcat(ddtm1, ddtm2)
+    @test size(combined_ddtm.tf, 1) == size(ddtm1.tf, 1) + size(ddtm2.tf, 1)
+    @test length(combined_ddtm.vocab) == length(unique(vcat(ddtm1.vocab, ddtm2.vocab)))
+    @test all(word in combined_ddtm.vocab for word in ddtm1.vocab)
+    @test all(word in combined_ddtm.vocab for word in ddtm2.vocab)
+    @test size(combined_ddtm.tf, 2) == length(combined_ddtm.vocab)
+    @test sum(combined_ddtm.tf) ≈ sum(ddtm1.tf) + sum(ddtm2.tf)
 end
 
 @testset "MultiIndex" begin
     # Test constructors/accessors
-    # MultiIndex behaves as a container for ChunkIndexes
-    cin1 = ChunkIndex(chunks = ["chunk1"], sources = ["source1"])
-    cin2 = ChunkIndex(chunks = ["chunk2"], sources = ["source2"])
+    # MultiIndex behaves as a container for ChunkEmbeddingsIndexes
+    cin1 = ChunkEmbeddingsIndex(chunks = ["chunk1"], sources = ["source1"])
+    cin2 = ChunkEmbeddingsIndex(chunks = ["chunk2"], sources = ["source2"])
     multi_index = MultiIndex(indexes = [cin1, cin2])
     @test length(multi_index.indexes) == 2
     @test cin1 in multi_index.indexes
@@ -80,101 +349,504 @@ end
 
     # Test base var"==" with MultiIndex
     # Case where MultiIndexes are equal
-    cin1 = ChunkIndex(chunks = ["chunk1"], sources = ["source1"])
-    cin2 = ChunkIndex(chunks = ["chunk2"], sources = ["source2"])
+    cin1 = ChunkEmbeddingsIndex(chunks = ["chunk1"], sources = ["source1"])
+    cin2 = ChunkEmbeddingsIndex(chunks = ["chunk2"], sources = ["source2"])
     mi1 = MultiIndex(indexes = [cin1, cin2])
     mi2 = MultiIndex(indexes = [cin1, cin2])
     @test mi1 == mi2
 
-    # Test equality with different ChunkIndexes inside
-    cin1 = ChunkIndex(chunks = ["chunk1"], sources = ["source1"])
-    cin2 = ChunkIndex(chunks = ["chunk2"], sources = ["source2"])
-    mi1 = MultiIndex(indexes = [cin1])
-    mi2 = MultiIndex(indexes = [cin2])
+    # Test equality with different ChunkEmbeddingsIndexes inside
+    cin1 = ChunkEmbeddingsIndex(chunks = ["chunk1"], sources = ["source1"])
+    cin2 = ChunkEmbeddingsIndex(chunks = ["chunk2"], sources = ["source2"])
+    mi1 = MultiIndex([cin1])
+    mi2 = MultiIndex(cin2)
     @test mi1 != mi2
 
+    # HasEmbeddings
+    @test HasEmbeddings(mi1) == true
+    @test HasKeywords(mi1) == false
+
+    ci = ChunkKeywordsIndex(chunks = ["chunk1"], sources = ["source1"])
+    mi2 = MultiIndex(indexes = [ci])
+    @test HasEmbeddings(mi2) == false
+
+    cin1 = ChunkEmbeddingsIndex(chunks = ["chunk1"], sources = ["source1"], id = :cin1)
+    cin2 = ChunkKeywordsIndex(chunks = ["chunk1"], sources = ["source1"], id = :cin2)
+    mi3 = MultiIndex(indexes = [cin1, cin2], id = :mi3)
+    @test HasEmbeddings(mi3) == true
+    @test HasKeywords(mi3) == true
+
     ## not implemented
     @test_throws ArgumentError vcat(mi1, mi2)
+
+    # Get index
+    @test mi3[:cin1] == cin1
+    @test mi3[:cin2] == cin2
+    @test mi3[:xyz] == nothing
+    @test mi3[:mi3] == mi3
 end
 
 @testset "CandidateChunks" begin
-    chunk_sym = Symbol("TestChunkIndex")
+    chunk_sym = Symbol("TestChunkEmbeddingsIndex")
     cc1 = CandidateChunks(index_id = chunk_sym,
         positions = [1, 3],
-        distances = [0.1, 0.2])
+        scores = [0.1, 0.2])
     @test Base.length(cc1) == 2
+    out = Base.first(cc1, 1)
+    @test out.positions == [3]
+    @test out.scores == [0.2]
+    @test indexid(cc1) == chunk_sym
+    @test indexids(cc1) == [chunk_sym, chunk_sym]
 
     # Test intersection &
     cc2 = CandidateChunks(index_id = chunk_sym,
         positions = [2, 4],
-        distances = [0.3, 0.4])
+        scores = [0.3, 0.4])
     @test isempty((cc1 & cc2).positions)
     cc3 = CandidateChunks(index_id = chunk_sym,
         positions = [1, 4],
-        distances = [0.3, 0.4])
+        scores = [0.3, 0.5])
     joint = (cc1 & cc3)
     @test joint.positions == [1]
-    @test joint.distances == [0.2]
+    @test joint.scores == [0.3]
+    joint2 = (cc2 & cc3)
+    @test joint2.positions == [4]
+    @test joint2.scores == [0.5]
+
+    # long positions intersection
+    cc5 = CandidateChunks(index_id = chunk_sym,
+        positions = [5, 6, 7, 8, 9, 10, 4],
+        scores = 0.1 * ones(7))
+    joint5 = (cc2 & cc5)
+    @test joint5.positions == [4]
+    @test joint5.scores == [0.4]
+
+    # wrong index
+    cc4 = CandidateChunks(index_id = :xyz,
+        positions = [2, 4],
+        scores = [0.3, 0.4])
+    joint4 = (cc2 & cc4)
+    @test isempty(joint4.positions)
+    @test isempty(joint4.scores)
+    @test isempty(joint4) == true
+
+    # Test unknown type
+    struct RandomCandidateChunks123 <: AbstractCandidateChunks end
+    @test_throws ArgumentError (cc1&RandomCandidateChunks123())
+
+    # Test vcat
+    vcat1 = vcat(cc1, cc2)
+    @test Base.length(vcat1) == 4
+    vcat2 = vcat(cc1, cc3)
+    @test vcat2.positions == [4, 1, 3]
+    @test vcat2.scores == [0.5, 0.3, 0.2]
+    # wrong index
+    @test_throws ArgumentError vcat(cc1, cc4)
+    # uknown type
+    @test_throws ArgumentError vcat(cc1, RandomCandidateChunks123())
+
+    # Test copy
+    cc1_copy = copy(cc1)
+    @test cc1 == cc1_copy
+    @test cc1.positions !== cc1_copy.positions # not the same array
+
+    # Serialization
+    tmp, _ = mktemp()
+    JSON3.write(tmp, cc1)
+    cc1x = JSON3.read(tmp, CandidateChunks)
+    @test cc1x.index_id == cc1.index_id
+    @test cc1x.positions == cc1.positions
+    @test cc1x.scores ≈ cc1.scores
+end
+
+@testset "MultiCandidateChunks" begin
+    chunk_sym1 = Symbol("TestChunkEmbeddingsIndex1")
+    chunk_sym2 = Symbol("TestChunkEmbeddingsIndex2")
+    mcc1 = MultiCandidateChunks(index_ids = [chunk_sym1, chunk_sym2],
+        positions = [1, 3],
+        scores = [0.1, 0.2])
+    @test Base.length(mcc1) == 2
+    out = Base.first(mcc1, 1)
+    @test out.positions == [3]
+    @test out.scores == [0.2]
+    @test indexids(mcc1) == [chunk_sym1, chunk_sym2]
+
+    # Test vcat
+    mcc2 = MultiCandidateChunks(index_ids = [chunk_sym1, chunk_sym2],
+        positions = [2, 4],
+        scores = [0.3, 0.4])
+    vcat1 = vcat(mcc1, mcc2)
+    @test Base.length(vcat1) == 4
+    vcat2 = vcat(mcc1,
+        MultiCandidateChunks(index_ids = [chunk_sym1, chunk_sym2],
+            positions = [1, 4],
+            scores = [0.3, 0.5]))
+    @test vcat2.positions == [4, 1, 3]
+    @test vcat2.scores == [0.5, 0.3, 0.2]
+
+    # Test copy
+    mcc1_copy = copy(mcc1)
+    @test mcc1 == mcc1_copy
+    @test mcc1.positions !== mcc1_copy.positions # not the same array
+
+    chunk_sym1 = Symbol("TestChunkEmbeddingsIndex1")
+    chunk_sym2 = Symbol("TestChunkEmbeddingsIndex2")
+    # Test intersection with overlapping positions
+    mcc3 = MultiCandidateChunks(index_ids = [chunk_sym1, chunk_sym2],
+        positions = [1, 4],
+        scores = [0.3, 0.5])
+    joint = (mcc1 & mcc3)
+    @test joint.positions == [1]
+    @test joint.scores == [0.3]
+    joint2 = (mcc2 & mcc3)
+    @test joint2.positions == [4]
+    @test joint2.scores == [0.5]
+
+    # Test intersection with no overlapping positions
+    mcc4 = MultiCandidateChunks(index_ids = [chunk_sym1, chunk_sym2],
+        positions = [6, 7],
+        scores = [0.6, 0.7])
+    joint3 = (mcc1 & mcc4)
+    @test isempty(joint3.positions)
+    @test isempty(joint3.scores)
+    @test isempty(joint3) == true
+
+    # Test intersection with long positions
+    mcc5 = MultiCandidateChunks(index_ids = fill(chunk_sym2, 7),
+        positions = [5, 6, 7, 8, 9, 10, 4],
+        scores = 0.1 * ones(7))
+    joint4 = (mcc2 & mcc5)
+    @test joint4.positions == [4]
+    @test joint4.scores == [0.4]
+
+    # Test intersection with wrong index
+    mcc6 = MultiCandidateChunks(index_ids = [:xyz, :abc],
+        positions = [2, 4],
+        scores = [0.3, 0.4])
+    joint5 = (mcc2 & mcc6)
+    @test isempty(joint5.positions)
+    @test isempty(joint5.scores)
+    @test isempty(joint5) == true
+
+    # Test intersection with unknown type
+    struct RandomMultiCandidateChunks123 <: AbstractCandidateChunks end
+    @test_throws ArgumentError (mcc1&RandomMultiCandidateChunks123())
+end
+
+@testset "SubChunkIndex" begin
+    ci1 = ChunkEmbeddingsIndex(chunks = ["chunk1", "chunk2", "chunk3"],
+        embeddings = nothing,
+        tags = nothing,
+        tags_vocab = nothing,
+        sources = ["source1", "source2", "source3"],
+        id = Symbol("TestChunkIndex"))
+
+    # Test creating a SubChunkIndex with CandidateChunks
+    cc = CandidateChunks(ci1, 1:2)
+    sub_index = view(ci1, cc)
+    @test chunks(sub_index) == ["chunk1", "chunk2"]
+
+    # Test creating a SubChunkIndex with different CandidateChunks
+    cc = CandidateChunks(ci1, [2, 3])
+    sub_index = view(ci1, cc)
+    @test chunks(sub_index) == ["chunk2", "chunk3"]
+    @test sources(sub_index) == ["source2", "source3"]
+    @test translate_positions_to_parent(sub_index, [2, 1]) == [3, 2]
+
+    # Test accessing chunks from SubChunkIndex
+    cc = CandidateChunks(ci1, [2])
+    sub_index = view(ci1, cc)
+    @test sub_index[cc, :chunks] == ["chunk2"]
+    @test sub_index[cc, :sources] == ["source2"]
+    @test sub_index[cc, :embeddings] == nothing
+    @test sub_index[cc, :chunkdata] == nothing
+    @test parent(sub_index)[cc, :chunks] == ["chunk2"]
+    @test chunkdata(sub_index) == nothing
+    @test chunkdata(sub_index, [1]) == nothing
+
+    # Wrong Index ID -> empty
+    cc_wrongid = CandidateChunks(:bad_id, [2], [0.1f0])
+    sub_index_wrongid = view(ci1, cc_wrongid)
+    @test isempty(sub_index_wrongid)
+
+    # Test creating a SubChunkIndex with out-of-bounds CandidateChunks
+    cc = CandidateChunks(ci1, [4])
+    @test_throws BoundsError view(ci1, cc)
+    cc = CandidateChunks(ci1, 1:4)
+    @test_throws BoundsError view(ci1, cc)
+
+    chunks_test = ["chunk1", "chunk2", "chunk3"]
+    emb_test = ones(2, 3) ./ (1:3)'
+    tags_test = sparse([1, 2, 3], [1, 2, 3], [true, true, true], 3, 3)
+    tags_vocab_test = ["vocab1", "vocab2", "vocab3"]
+    sources_test = ["source1", "source2", "source3"]
+    ci2 = ChunkEmbeddingsIndex(id = :TestChunkIndex2, chunks = chunks_test,
+        embeddings = emb_test,
+        tags = tags_test,
+        tags_vocab = tags_vocab_test,
+        sources = sources_test)
+
+    # Create a SubChunkIndex for testing
+    cc11 = CandidateChunks(ci2, [1, 2])
+    sub_index11 = @view ci2[cc11]
+
+    @test indexid(sub_index11) == indexid(ci2)
+    @test positions(sub_index11) == [1, 2]
+    @test parent(sub_index11) == ci2
+    @test HasEmbeddings(sub_index11) == true
+    @test HasKeywords(sub_index11) == false
+    @test chunks(sub_index11) == ["chunk1", "chunk2"]
+    @test sources(sub_index11) == ["source1", "source2"]
+    @test chunkdata(sub_index11) ≈ [1.0 0.5; 1.0 0.5]
+    @test chunkdata(sub_index11, [2]) ≈ [0.5, 0.5]
+    @test embeddings(sub_index11) ≈ [1.0 0.5; 1.0 0.5]
+    @test tags(sub_index11) == Bool[1 0 0; 0 1 0]
+    @test tags_vocab(sub_index11) == tags_vocab_test
+    @test extras(sub_index11) == nothing
+    @test length(sub_index11) == 2
+    @test unique(sub_index11) == sub_index11
+
+    cc2 = CandidateChunks(ci2, [1, 2, 1, 2])
+    sub_index2 = @view ci2[cc2]
+    @test length(sub_index2) == 4
+    @test chunks(sub_index2) == ["chunk1", "chunk2", "chunk1", "chunk2"]
+    @test sources(sub_index2) == ["source1", "source2", "source1", "source2"]
+    @test unique(sub_index2) == sub_index11
+    @test positions(vcat(sub_index11, sub_index2)) == [1, 2, 1, 2, 1, 2]
+
+    # Test vcat not implemented for different types
+    ci3 = ChunkEmbeddingsIndex(chunks = ["chunk4", "chunk5"],
+        embeddings = nothing,
+        tags = nothing,
+        tags_vocab = nothing,
+        sources = ["source4", "source5"],
+        id = Symbol("TestChunkIndex3"))
+    cc3 = CandidateChunks(ci3, [1, 2])
+    sub_index3 = view(ci3, cc3)
+    @test_throws ArgumentError vcat(sub_index, sub_index3)
+
+    # Test vcat for same parent
+    cc = CandidateChunks(ci1, [1, 2])
+    sub_index = view(ci1, cc)
+    cc4 = CandidateChunks(ci1, [3])
+    sub_index4 = view(ci1, cc4)
+    vcat_index = vcat(sub_index, sub_index4)
+    @test vcat_index == SubChunkIndex(ci1, [1, 2, 3])
+
+    # Test edge cases
+    # Empty positions
+    cc_empty = CandidateChunks(ci1, Int[])
+    sub_index_empty = view(ci1, cc_empty)
+    @test length(sub_index_empty) == 0
+    @test chunks(sub_index_empty) == String[]
+    @test sources(sub_index_empty) == String[]
+    @test isempty(sub_index_empty) == true
+
+    # Out of bounds positions
+    cc_oob = CandidateChunks(ci1, [10])
+    @test_throws BoundsError view(ci1, cc_oob)
+
+    # Duplicate positions
+    cc_dup = CandidateChunks(ci1, [1, 1, 2])
+    sub_index_dup = view(ci1, cc_dup)
+    @test length(sub_index_dup) == 3
+    @test chunks(sub_index_dup) == ["chunk1", "chunk1", "chunk2"]
+    @test unique(sub_index_dup) == SubChunkIndex(ci1, [1, 2])
+
+    # Test show method
+    io = IOBuffer()
+    show(io, sub_index)
+    @test String(take!(io)) ==
+          "A view of ChunkEmbeddingsIndex (id: TestChunkIndex) with 2 chunks"
+
+    ## Nested SubChunkIndex
+    # Test SubChunkIndex created from SubChunkIndex
+    cc_sub = CandidateChunks(sub_index, [1])
+    sub_sub_index = view(sub_index, cc_sub)
+    @test length(sub_sub_index) == 1
+    @test chunks(sub_sub_index) == ["chunk1"]
+    @test sources(sub_sub_index) == ["source1"]
+    @test parent(sub_sub_index) == ci1
+    @test parent(@view sub_sub_index[cc_sub]) == ci1
+
+    cc_oob = CandidateChunks(ci1, [10])
+    @test_throws BoundsError view(ci1, cc_oob)
+
+    ## Nest deeper
+    sub_sub_index = SubChunkIndex(sub_sub_index, cc_sub)
+    @test parent(sub_sub_index) == ci1
+    @test length(sub_sub_index) == 1
+    @test chunks(sub_sub_index) == ["chunk1"]
+    @test sources(sub_sub_index) == ["source1"]
+
+    sub_oob = SubChunkIndex(sub_sub_index, [10])
+    @test_throws BoundsError SubChunkIndex(sub_oob, cc_oob)
+
+    # return empty if it's wrong index id
+    cc_wrongid = CandidateChunks(:bad_id, [2], [0.1f0])
+    sub_index_wrongid = SubChunkIndex(sub_sub_index, cc_wrongid)
+    @test isempty(sub_index_wrongid)
+
+    # views produce intersection, so if they don't match it becomes empty view
+    cc_sub_notmatch = CandidateChunks(sub_sub_index, [2])
+    @test view(sub_sub_index, cc_sub_notmatch) |> isempty
+
+    # Test edge cases for SubChunkIndex created from SubChunkIndex
+    # Empty positions
+    cc_empty_sub = CandidateChunks(sub_index, Int[])
+    sub_index_empty_sub = view(sub_index, cc_empty_sub)
+    @test length(sub_index_empty_sub) == 0
+    @test chunks(sub_index_empty_sub) == String[]
+    @test sources(sub_index_empty_sub) == String[]
+    @test isempty(sub_index_empty_sub) == true
+
+    # Out of bounds positions
+    cc_oob_sub = CandidateChunks(ci1, [10])
+    @test_throws BoundsError view(ci1, cc_oob_sub)
+
+    # Duplicate positions
+    cc_dup_sub = CandidateChunks(ci1, [1, 1, 2])
+    sub_index_dup_sub = view(ci1, cc_dup_sub)
+    @test length(sub_index_dup_sub) == 3
+    @test chunks(sub_index_dup_sub) == ["chunk1", "chunk1", "chunk2"]
+    @test unique(sub_index_dup_sub) == SubChunkIndex(ci1, [1, 2])
+
+    # Test show method for SubChunkIndex created from SubChunkIndex
+    io_sub = IOBuffer()
+    show(io_sub, sub_sub_index)
+    @test String(take!(io_sub)) ==
+          "A view of ChunkEmbeddingsIndex (id: TestChunkIndex) with 1 chunks"
+
+    ## MultiCandidateChunks
+    # Test SubChunkIndex with MultiCandidateChunks
+    mcc = MultiCandidateChunks(ci2, [2, 3])
+    sub_index_mcc = view(ci2, mcc)
+    @test length(sub_index_mcc) == 2
+    @test chunks(sub_index_mcc) == ["chunk2", "chunk3"]
+    @test sources(sub_index_mcc) == ["source2", "source3"]
+    @test chunkdata(sub_index_mcc) ≈ [0.5 0.3333333333333333; 0.5 0.3333333333333333]
+    @test embeddings(sub_index_mcc) ≈ [0.5 0.3333333333333333; 0.5 0.3333333333333333]
+    @test tags(sub_index_mcc) == Bool[0 1 0; 0 0 1]
+    @test tags_vocab(sub_index_mcc) == tags_vocab_test
+    @test extras(sub_index_mcc) == nothing
+
+    ## Nested sub-chunk index
+    sub_sub_index = @view sub_index_mcc[mcc]
+    @test length(sub_sub_index) == 2
+    @test chunks(sub_sub_index) == ["chunk2", "chunk3"]
+    @test sources(sub_sub_index) == ["source2", "source3"]
+    mcc_oob = MultiCandidateChunks(ci2, [10])
+    @test_throws BoundsError view(ci2, mcc_oob)
+
+    ## Nest deeper
+    sub_sub_index = SubChunkIndex(sub_sub_index, mcc)
+    @test parent(sub_sub_index) == ci2
+    @test length(sub_sub_index) == 2
+    @test chunks(sub_sub_index) == ["chunk2", "chunk3"]
+    @test sources(sub_sub_index) == ["source2", "source3"]
+
+    sub_oob = SubChunkIndex(ci2, [10])
+    @test_throws BoundsError SubChunkIndex(sub_oob, mcc_oob)
+
+    # views produce intersection, so if they don't match it becomes empty view
+    mcc_notmatch = MultiCandidateChunks(sub_sub_index, [1])
+    @test view(sub_sub_index, mcc_notmatch) |> isempty
+
+    ## With keyword index
+    chunks_ = ["chunk1", "chunk2"]
+    sources_ = ["source1", "source2"]
+    cki = ChunkKeywordsIndex(chunks = chunks_, sources = sources_)
+    cck = CandidateChunks(cki, [2])
+    sub_cki = @view cki[cck]
+    @test length(cki) == 2
+    @test length(cck) == 1
+    @test length(sub_cki) == 1
+    @test chunks(sub_cki) == ["chunk2"]
+    @test sources(sub_cki) == ["source2"]
+    @test parent(sub_cki) == cki
+    @test chunkdata(sub_cki) == nothing
+    @test HasEmbeddings(sub_cki) == false
+    @test HasKeywords(sub_cki) == true
+    @test_throws ArgumentError embeddings(sub_cki)
+    @test tags(sub_cki) == nothing
+    @test tags_vocab(sub_cki) == nothing
+    @test extras(sub_cki) == nothing
+
+    ## MultiIndex not implemented yet
+    mi = MultiIndex(indexes = [ci1, cki])
+    mccx = MultiCandidateChunks(index_ids = [:TestChunkIndex1, :TestChunkIndex2],
+        positions = [1, 2], scores = [0.1, 0.2])
+    @test_throws ArgumentError @view mi[mccx]
 end
 
-@testset "getindex with CandidateChunks" begin
-    # Initialize a ChunkIndex with test data
+@testset "getindex-CandidateChunks" begin
+    # Initialize a ChunkEmbeddingsIndex with test data
     chunks_data = ["First chunk", "Second chunk", "Third chunk"]
     embeddings_data = rand(3, 3)  # Random matrix with 3 embeddings
     tags_data = sparse(Bool[1 1; 0 1; 1 0])  # Some arbitrary sparse matrix representation
     tags_vocab_data = ["tag1", "tag2"]
-    chunk_sym = Symbol("TestChunkIndex")
-    test_chunk_index = ChunkIndex(chunks = chunks_data,
+    chunk_sym = Symbol("TestChunkEmbeddingsIndex")
+    test_chunk_index = ChunkEmbeddingsIndex(chunks = chunks_data,
         embeddings = embeddings_data,
         tags = tags_data,
         tags_vocab = tags_vocab_data,
-        sources = repeat(["test_source"], 3),
+        sources = ["test_source$i" for i in 1:3],
         id = chunk_sym)
 
     # Test to get chunks based on valid CandidateChunks
     candidate_chunks = CandidateChunks(index_id = chunk_sym,
         positions = [1, 3],
-        distances = [0.1, 0.2])
+        scores = [0.1, 0.2])
     @test collect(test_chunk_index[candidate_chunks]) == ["First chunk", "Third chunk"]
-    @test collect(test_chunk_index[candidate_chunks, :chunks]) ==
-          ["First chunk", "Third chunk"]
+    @test collect(test_chunk_index[candidate_chunks, :chunks, sorted = true]) ==
+          ["Third chunk", "First chunk"]
+    @test collect(test_chunk_index[candidate_chunks, :scores]) == [0.1, 0.2]
     @test collect(test_chunk_index[candidate_chunks, :sources]) ==
-          ["test_source", "test_source"]
+          ["test_source1", "test_source3"]
     @test collect(test_chunk_index[candidate_chunks, :embeddings]) ==
           embeddings_data[:, [1, 3]]
+    @test collect(test_chunk_index[candidate_chunks, :chunkdata]) ==
+          embeddings_data[:, [1, 3]]
 
     # Test with empty positions, which should result in an empty array
     candidate_chunks_empty = CandidateChunks(index_id = chunk_sym,
         positions = Int[],
-        distances = Float32[])
+        scores = Float32[])
     @test isempty(test_chunk_index[candidate_chunks_empty])
     @test isempty(test_chunk_index[candidate_chunks_empty, :chunks])
     @test isempty(test_chunk_index[candidate_chunks_empty, :embeddings])
+    @test isempty(test_chunk_index[candidate_chunks_empty, :chunkdata])
     @test isempty(test_chunk_index[candidate_chunks_empty, :sources])
 
     # Test with positions out of bounds, should handle gracefully without errors
     candidate_chunks_oob = CandidateChunks(index_id = chunk_sym,
         positions = [10, -1],
-        distances = [0.5, 0.6])
-    @test_throws AssertionError test_chunk_index[candidate_chunks_oob]
+        scores = [0.5, 0.6])
+    @test_throws BoundsError test_chunk_index[candidate_chunks_oob]
 
     # Test with an incorrect index_id, which should also result in an empty array
     wrong_sym = Symbol("InvalidIndex")
     candidate_chunks_wrong_id = CandidateChunks(index_id = wrong_sym,
         positions = [1, 2],
-        distances = [0.3, 0.4])
+        scores = [0.3, 0.4])
     @test isempty(test_chunk_index[candidate_chunks_wrong_id])
+    @test isempty(test_chunk_index[candidate_chunks_wrong_id, :chunks])
+    @test isempty(test_chunk_index[candidate_chunks_wrong_id, :embeddings])
+    @test isempty(test_chunk_index[candidate_chunks_wrong_id, :chunkdata])
+    @test size(test_chunk_index[candidate_chunks_wrong_id, :chunkdata]) == (0, 0) # check that it's an array to maintain type
+    @test isempty(test_chunk_index[candidate_chunks_wrong_id, :sources])
+    @test isempty(test_chunk_index[candidate_chunks_wrong_id, :scores])
 
-    # Test when chunks are requested from a MultiIndex, only chunks from the corresponding ChunkIndex should be returned
-    another_chuck_index = ChunkIndex(chunks = chunks_data,
+    # Test when chunks are requested from a MultiIndex, only chunks from the corresponding ChunkEmbeddingsIndex should be returned
+    another_chunk_index = ChunkEmbeddingsIndex(chunks = chunks_data,
         embeddings = nothing,
         tags = nothing,
         tags_vocab = nothing,
         sources = repeat(["another_source"], 3),
-        id = Symbol("AnotherChunkIndex"))
+        id = Symbol("AnotherChunkEmbeddingsIndex"))
     test_multi_index = MultiIndex(indexes = [
         test_chunk_index,
-        another_chuck_index,
+        another_chunk_index
     ])
     @test collect(test_multi_index[candidate_chunks]) == ["First chunk", "Third chunk"]
 
@@ -185,23 +857,167 @@ end
     @test_throws AssertionError test_chunk_index[candidate_chunks, :nonexistent_field]
 
     # Multi-Candidate CandidateChunks
-    cc1 = CandidateChunks(index_id = :TestChunkIndex1,
-        positions = [1, 2],
-        distances = [0.3, 0.4])
-    cc2 = CandidateChunks(index_id = :TestChunkIndex2,
-        positions = [2],
-        distances = [0.1])
-    cc = CandidateChunks(; index_id = :multi, positions = [cc1, cc2], distances = zeros(2))
-    ci1 = ChunkIndex(id = :TestChunkIndex1,
+    cc = MultiCandidateChunks(; index_ids = [:TestChunkIndex2, :TestChunkIndex1],
+        positions = [2, 2], scores = [0.1, 0.4])
+    ci1 = ChunkEmbeddingsIndex(id = :TestChunkIndex1,
         chunks = ["chunk1", "chunk2"],
         sources = ["source1", "source2"])
-    ci2 = ChunkIndex(id = :TestChunkIndex2,
-        chunks = ["chunk1", "chunk2"],
+    ci2 = ChunkEmbeddingsIndex(id = :TestChunkIndex2,
+        chunks = ["chunk1", "chunk2x"],
         sources = ["source1", "source2"])
-    @test ci1[cc] == ["chunk1", "chunk2"]
-    @test ci2[cc] == ["chunk2"]
+    @test ci1[cc, :chunks] == ["chunk2"]
+    @test ci1[cc, :scores] == [0.4]
+    @test ci2[cc] == ["chunk2x"]
+    @test Base.getindex(ci1, cc, :chunks; sorted = true) == ["chunk2"]
+    @test Base.getindex(ci1, cc, :scores; sorted = true) == [0.4]
+    @test Base.getindex(ci1, cc, :chunks; sorted = false) == ["chunk2"]
+    @test Base.getindex(ci1, cc, :scores; sorted = false) == [0.4]
+
+    # Wrong index
+    cc_wrong = MultiCandidateChunks(index_ids = [:TestChunkIndex2xxx, :TestChunkIndex1xxx],
+        positions = [2, 2], scores = [0.1, 0.4])
+    @test isempty(ci1[cc_wrong])
+    @test isempty(ci1[cc_wrong, :chunks])
+    @test isempty(ci1[cc_wrong, :scores])
 
     # with MultiIndex
     mi = MultiIndex(; id = :multi, indexes = [ci1, ci2])
-    @test mi[cc] == ["chunk1", "chunk2", "chunk2"]
+    @test mi[cc] == ["chunk2", "chunk2x"]  # default is sorted=true
+    @test Base.getindex(mi, cc, :chunks; sorted = true) == ["chunk2", "chunk2x"]
+    @test Base.getindex(mi, cc, :chunks; sorted = false) == ["chunk2", "chunk2x"]
+
+    # with MultiIndex -- flip the order of indices
+    mi = MultiIndex(; id = :multi, indexes = [ci2, ci1])
+    @test mi[cc] == ["chunk2", "chunk2x"] # default is sorted=true
+    @test Base.getindex(mi, cc, :chunks; sorted = true) == ["chunk2", "chunk2x"]
+    @test Base.getindex(mi, cc, :chunks; sorted = false) == ["chunk2x", "chunk2"]
+end
+
+@testset "getindex-MultiCandidateChunks" begin
+    chunks_data = ["First chunk", "Second chunk", "Third chunk"]
+    test_chunk_index = ChunkEmbeddingsIndex(chunks = chunks_data,
+        embeddings = nothing,
+        tags = nothing,
+        tags_vocab = nothing,
+        sources = ["test_source$i" for i in 1:3],
+        id = Symbol("TestChunkIndex"))
+
+    # Test with correct index_id and positions, expect correct chunks and scores
+    multi_candidate_chunks = MultiCandidateChunks(
+        index_ids = [Symbol("TestChunkIndex"), Symbol("TestChunkIndex")],
+        positions = [1, 3],
+        scores = [0.5, 0.6])
+    @test test_chunk_index[multi_candidate_chunks] == ["First chunk", "Third chunk"]
+    @test test_chunk_index[multi_candidate_chunks, :scores] == [0.5, 0.6]
+
+    # Test with sorted option, expect chunks and scores sorted by scores in descending order
+    @test Base.getindex(test_chunk_index, multi_candidate_chunks, :chunks; sorted = true) ==
+          ["Third chunk", "First chunk"]
+    @test Base.getindex(test_chunk_index, multi_candidate_chunks, :scores; sorted = true) ==
+          [0.6, 0.5]
+    @test Base.getindex(
+        test_chunk_index, multi_candidate_chunks, :chunks; sorted = false) ==
+          ["First chunk", "Third chunk"]
+    @test Base.getindex(
+        test_chunk_index, multi_candidate_chunks, :scores; sorted = false) ==
+          [0.5, 0.6]
+
+    # Test with incorrect index_id, expect empty array
+    wrong_multi_candidate_chunks = MultiCandidateChunks(
+        index_ids = [Symbol("WrongIndex"), Symbol("WrongIndex")],
+        positions = [1, 3],
+        scores = [0.5, 0.6])
+    @test isempty(test_chunk_index[wrong_multi_candidate_chunks])
+    @test isempty(test_chunk_index[wrong_multi_candidate_chunks, :scores])
+    @test isempty(test_chunk_index[wrong_multi_candidate_chunks, :chunks])
+    @test isempty(test_chunk_index[wrong_multi_candidate_chunks, :sources])
+
+    # Test with a mix of correct and incorrect index_ids, expect only chunks and scores from correct index_id
+    mixed_multi_candidate_chunks = MultiCandidateChunks(
+        index_ids = [Symbol("TestChunkIndex"), Symbol("WrongIndex")],
+        positions = [2, 3],
+        scores = [0.5, 0.6])
+    @test test_chunk_index[mixed_multi_candidate_chunks] == ["Second chunk"]
+    @test test_chunk_index[mixed_multi_candidate_chunks, :scores] == [0.5]
+    @test test_chunk_index[mixed_multi_candidate_chunks, :sources] == ["test_source2"]
+
+    ## MultiIndex
+    ci2 = ChunkEmbeddingsIndex(chunks = ["4", "5", "6"],
+        embeddings = nothing,
+        tags = nothing,
+        tags_vocab = nothing,
+        sources = ["other_source$i" for i in 1:3],
+        id = Symbol("TestChunkIndex2"))
+    mi = MultiIndex(; id = :multi, indexes = [test_chunk_index, ci2])
+    mc1 = MultiCandidateChunks(
+        index_ids = [Symbol("TestChunkIndex"), Symbol("TestChunkIndex2")],
+        positions = [1, 3],  # Assuming chunks_data has only 3 elements, position 4 is out of bounds
+        scores = [0.5, 0.7])
+    ## sorted=false by default (Dict-like where order isn't guaranteed)
+    ## sorting follows index order
+    @test mi[mc1] == ["6", "First chunk"]
+    @test Base.getindex(mi, mc1, :chunks; sorted = true) == ["6", "First chunk"]
+    @test Base.getindex(mi, mc1, :sources; sorted = true) ==
+          ["other_source3", "test_source1"]
+    @test Base.getindex(mi, mc1, :chunks; sorted = false) == ["First chunk", "6"]
+    @test Base.getindex(mi, mc1, :sources; sorted = false) ==
+          ["test_source1", "other_source3"]
+    ##
+    @test Base.getindex(mi, mc1, :scores; sorted = true) == [0.7, 0.5]
+    @test Base.getindex(mi, mc1, :scores; sorted = false) == [0.5, 0.7]
+    @test Base.getindex(mi, mc1, :chunks; sorted = false) == ["First chunk", "6"]
+    @test Base.getindex(mi, mc1, :sources; sorted = false) ==
+          ["test_source1", "other_source3"]
+    @test Base.getindex(mi, mc1, :scores; sorted = false) == [0.5, 0.7]
+end
+
+@testset "RAGResult" begin
+    result = RAGResult(; question = "a", answer = "b", final_answer = "c")
+    result2 = RAGResult(; question = "a", answer = "b", final_answer = "c")
+    @test result == result2
+
+    result3 = copy(result)
+    @test result == result3
+    @test result !== result3
+
+    ## pprint checks - empty context fails
+    io = IOBuffer()
+    @test_throws AssertionError PT.pprint(io, result)
+
+    ## RAG Details dispatch
+    answer = "This is a test answer."
+    sources_ = ["Source 1", "Source 2", "Source 3"]
+    result = RAGResult(;
+        question = "?", final_answer = answer, context = sources_, sources = sources_)
+    io = IOBuffer()
+    PT.pprint(io, result; add_context = true)
+    output = String(take!(io))
+    @test occursin("This is a test answer.", output)
+    @test occursin("\nQUESTION", output)
+    @test occursin("\nSOURCES\n", output)
+    @test occursin("\nCONTEXT\n", output)
+    @test occursin("1. Source 1", output)
+
+    ## last_message, last_output
+    result = RAGResult(; question = "a", answer = "b", final_answer = "c")
+    @test isnothing(last_message(result))
+    @test last_output(result) == "c"
+
+    result = RAGResult(; question = "a", answer = "b", final_answer = "c",
+        conversations = Dict(:final_answer => [PT.UserMessage("c")]))
+    @test last_message(result) == PT.UserMessage("c")
+    @test last_output(result) == "c"
+
+    result = RAGResult(; question = "a", answer = "b", final_answer = "c",
+        conversations = Dict(:answer => [PT.UserMessage("a")]))
+    @test last_message(result) == PT.UserMessage("a")
+
+    # serialization
+    # We cannot recover all type information !!!
+    result = RAGResult(; question = "a", answer = "b", final_answer = "c",
+        conversations = Dict(:answer => [PT.UserMessage("a")]))
+    tmp, _ = mktemp()
+    JSON3.write(tmp, result)
+    resultx = JSON3.read(tmp, RAGResult)
+    @test resultx == result
 end
diff --git a/test/Experimental/RAGTools/utils.jl b/test/Experimental/RAGTools/utils.jl
index 45b8b75b4..1a565c082 100644
--- a/test/Experimental/RAGTools/utils.jl
+++ b/test/Experimental/RAGTools/utils.jl
@@ -1,19 +1,28 @@
 using PromptingTools.Experimental.RAGTools: _check_aiextract_capability,
-    merge_labeled_matrices
+                                            vcat_labeled_matrices, hcat_labeled_matrices
+using PromptingTools.Experimental.RAGTools: tokenize, trigrams, trigrams_hashed
+using PromptingTools.Experimental.RAGTools: token_with_boundaries, text_to_trigrams,
+                                            text_to_trigrams_hashed
+using PromptingTools.Experimental.RAGTools: split_into_code_and_sentences
+using PromptingTools.Experimental.RAGTools: getpropertynested, setpropertynested,
+                                            merge_kwargs_nested
+using PromptingTools.Experimental.RAGTools: pack_bits, unpack_bits, preprocess_tokens,
+                                            reciprocal_rank_fusion, score_to_unit_scale,
+                                            hcat_truncate, _normalize
 
 @testset "_check_aiextract_capability" begin
     @test _check_aiextract_capability("gpt-3.5-turbo") == nothing
     @test_throws AssertionError _check_aiextract_capability("llama2")
 end
 
-@testset "merge_labeled_matrices" begin
+@testset "vcat_labeled_matrices" begin
     # Test with dense matrices and overlapping vocabulary
     mat1 = [1 2; 3 4]
     vocab1 = ["word1", "word2"]
     mat2 = [5 6; 7 8]
     vocab2 = ["word2", "word3"]
 
-    merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2)
+    merged_mat, combined_vocab = vcat_labeled_matrices(mat1, vocab1, mat2, vocab2)
 
     @test size(merged_mat) == (4, 3)
     @test combined_vocab == ["word1", "word2", "word3"]
@@ -25,7 +34,7 @@ end
     mat2 = sparse([3 0; 0 4])
     vocab2 = ["word3", "word4"]
 
-    merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2)
+    merged_mat, combined_vocab = vcat_labeled_matrices(mat1, vocab1, mat2, vocab2)
 
     @test size(merged_mat) == (4, 4)
     @test combined_vocab == ["word1", "word2", "word3", "word4"]
@@ -37,10 +46,781 @@ end
     mat2 = [5 6; 7 8]
     vocab2 = ["word2", "word3"]
 
-    merged_mat, combined_vocab = merge_labeled_matrices(mat1, vocab1, mat2, vocab2)
+    merged_mat, combined_vocab = vcat_labeled_matrices(mat1, vocab1, mat2, vocab2)
 
     @test eltype(merged_mat) == Float64
     @test size(merged_mat) == (4, 3)
     @test combined_vocab == ["word1", "word2", "word3"]
     @test merged_mat ≈ [1.0 2.0 0.0; 3.0 4.0 0.0; 0.0 5.0 6.0; 0.0 7.0 8.0]
+
+    ### Test cases with sparse matrices
+    # Test case 1: Basic functionality with non-overlapping vocabularies
+    mat1 = sparse([1 0; 0 2])
+    vocab1 = ["word1", "word2"]
+    mat2 = sparse([3 0; 0 4])
+    vocab2 = ["word3", "word4"]
+
+    result_mat, result_vocab = RT.vcat_labeled_matrices(mat1, vocab1, mat2, vocab2)
+    @test result_mat isa SparseMatrixCSC
+    @test size(result_mat) == (4, 4)
+    @test result_vocab == ["word1", "word2", "word3", "word4"]
+    @test Array(result_mat) == [1 0 0 0; 0 2 0 0; 0 0 3 0; 0 0 0 4]
+
+    # Test case 2: Overlapping vocabularies
+    mat1 = sparse([1 2; 3 0])
+    vocab1 = ["word1", "word2"]
+    mat2 = sparse([0 4; 5 6])
+    vocab2 = ["word2", "word3"]
+
+    result_mat, result_vocab = RT.vcat_labeled_matrices(mat1, vocab1, mat2, vocab2)
+    @test result_mat isa SparseMatrixCSC
+    @test size(result_mat) == (4, 3)
+    @test result_vocab == ["word1", "word2", "word3"]
+    @test Array(result_mat) == [1 2 0; 3 0 0; 0 0 4; 0 5 6]
+
+    # Test case 3: Different data types
+    mat1 = sparse([1.0 0.0; 0.0 2.0])
+    vocab1 = ["word1", "word2"]
+    mat2 = sparse(Float32[3 0; 0 4])
+    vocab2 = ["word3", "word4"]
+
+    result_mat, result_vocab = RT.vcat_labeled_matrices(mat1, vocab1, mat2, vocab2)
+    @test result_mat isa SparseMatrixCSC{Float64}
+    @test size(result_mat) == (4, 4)
+    @test result_vocab == ["word1", "word2", "word3", "word4"]
+    @test Array(result_mat) ==
+          [1.0 0.0 0.0 0.0; 0.0 2.0 0.0 0.0; 0.0 0.0 3.0 0.0; 0.0 0.0 0.0 4.0]
+
+    # Test case 4: Empty matrices
+    mat1 = sparse(Int[], Int[], Int[], 0, 0)
+    vocab1 = String[]
+    mat2 = sparse(Int[], Int[], Int[], 0, 0)
+    vocab2 = String[]
+
+    result_mat, result_vocab = RT.vcat_labeled_matrices(mat1, vocab1, mat2, vocab2)
+
+    @test result_mat isa SparseMatrixCSC
+    @test size(result_mat) == (0, 0)
+    @test result_vocab == String[]
+
+    # Test case 5: Large sparse matrices
+    n = 1000
+    m = 500
+    mat1 = sprand(Float32, n, m, 0.01)
+    vocab1 = ["word$i" for i in 1:m]
+    mat2 = sprand(Float32, n, m, 0.01)
+    vocab2 = ["word$(i+m÷2)" for i in 1:m]
+
+    result_mat, result_vocab = RT.vcat_labeled_matrices(mat1, vocab1, mat2, vocab2)
+
+    @test result_mat isa SparseMatrixCSC
+    @test size(result_mat) == (2n, length(unique([vocab1; vocab2])))
+    @test length(result_vocab) == length(unique([vocab1; vocab2]))
+    @test nnz(result_mat)≈nnz(mat1) + nnz(mat2) atol=10  # Allow for some numerical imprecision
+end
+
+@testset "hcat_labeled_matrices" begin
+    # Test with dense matrices and overlapping vocabulary
+    mat1 = [1 2; 3 4]
+    vocab1 = ["word1", "word2"]
+    mat2 = [5 6; 7 8]
+    vocab2 = ["word2", "word3"]
+
+    merged_mat, combined_vocab = hcat_labeled_matrices(mat1, vocab1, mat2, vocab2)
+
+    @test size(merged_mat) == (3, 4)
+    @test combined_vocab == ["word1", "word2", "word3"]
+    @test merged_mat == [1 2 0 0; 3 4 5 6; 0 0 7 8]
+
+    # Test with sparse matrices and disjoint vocabulary
+    mat1 = sparse([1 0; 0 2])
+    vocab1 = ["word1", "word2"]
+    mat2 = sparse([3 0; 0 4])
+    vocab2 = ["word3", "word4"]
+
+    merged_mat, combined_vocab = hcat_labeled_matrices(mat1, vocab1, mat2, vocab2)
+
+    @test size(merged_mat) == (4, 4)
+    @test combined_vocab == ["word1", "word2", "word3", "word4"]
+    @test merged_mat == sparse([1 0 0 0; 0 2 0 0; 0 0 3 0; 0 0 0 4])
+
+    # Test with different data types
+    mat1 = [1.0 2.0; 3.0 4.0]
+    vocab1 = ["word1", "word2"]
+    mat2 = [5 6; 7 8]
+    vocab2 = ["word2", "word3"]
+
+    merged_mat, combined_vocab = hcat_labeled_matrices(mat1, vocab1, mat2, vocab2)
+
+    @test eltype(merged_mat) == Float64
+    @test size(merged_mat) == (3, 4)
+    @test combined_vocab == ["word1", "word2", "word3"]
+    @test merged_mat ≈ [1.0 2.0 0.0 0.0; 3.0 4.0 5.0 6.0; 0.0 0.0 7.0 8.0]
+end
+
+@testset "hcat_truncate" begin
+    # Test basic functionality with no truncation
+    m1 = Float32[1 2; 3 4; 5 6]
+    m2 = Float32[7 8; 9 10; 11 12]
+    result = hcat_truncate([m1, m2])
+    @test size(result) == (3, 4)
+    @test result == Float32[1 2 7 8; 3 4 9 10; 5 6 11 12]
+
+    # Test with truncation
+    result_truncated = hcat_truncate([m1, m2], 2)
+    @test size(result_truncated) == (2, 4)
+
+    # Test normalization after truncation
+    expected_col1 = Float32[1, 3] / sqrt(1^2 + 3^2)
+    @test result_truncated[:, 1] ≈ expected_col1
+
+    # Test with single matrix input
+    single_result = hcat_truncate([m1])
+    @test single_result == m1
+
+    # Test with empty input
+    @test_throws Exception hcat_truncate([])
+
+    # Test with matrices of different row counts
+    m3 = Float32[1 2; 3 4]
+    @test_throws AssertionError hcat_truncate([m1, m3])
+
+    # Test with truncation dimension larger than input
+    @test_throws AssertionError hcat_truncate([m1, m2], 4)
+
+    # Test with truncate_dimension set to 0
+    zero_truncate = hcat_truncate([m1, m2], 0)
+    @test zero_truncate == Float32[1 2 7 8; 3 4 9 10; 5 6 11 12]
+
+    # Test with large matrices to ensure performance
+    large_m1 = rand(Float32, 1000, 1000)
+    large_m2 = rand(Float32, 1000, 1000)
+    @test size(hcat_truncate([large_m1, large_m2], 500)) == (500, 2000)
+
+    # Test with different types (should convert to Float32)
+    m4 = [1.0 2.0; 3.0 4.0; 5.0 6.0]
+    result_type_conversion = hcat_truncate([m4])
+    @test eltype(result_type_conversion) == Float32
+
+    # Test with truncate=nothing (should behave the same as no truncation)
+    result_nothing = hcat_truncate([m1, m2], nothing)
+    @test result_nothing == Float32[1 2 7 8; 3 4 9 10; 5 6 11 12]
+
+    # Test with truncate=-1 (should behave the same as no truncation)
+    result_negative = hcat_truncate([m1, m2], -1)
+    @test result_negative == Float32[1 2 7 8; 3 4 9 10; 5 6 11 12]
+
+    ## Test for Vectors
+    # Test basic functionality
+    v1 = [1.0, 2.0, 3.0]
+    v2 = [4.0, 5.0, 6.0]
+    result = hcat_truncate([v1, v2])
+    @test size(result) == (3, 2)
+    @test result == [1.0 4.0; 2.0 5.0; 3.0 6.0]
+
+    # Test with truncation
+    result_truncated = hcat_truncate([v1, v2], 2)
+    @test size(result_truncated) == (2, 2)
+    @test result_truncated ≈ mapreduce(_normalize, hcat, eachcol([1.0 4.0; 2.0 5.0]))
+
+    # Test with single vector input
+    single_result = hcat_truncate([v1])
+    @test single_result == reshape(v1, :, 1)
+
+    # Test with empty input
+    @test_throws Exception hcat_truncate(Vector{Float64}[])
+
+    # Test with vectors of different lengths
+    v3 = [1.0, 2.0]
+    @test_throws AssertionError hcat_truncate([v1, v3])
+
+    # Test with truncation dimension larger than input
+    @test_throws AssertionError hcat_truncate([v1, v2], 4)
+
+    # Test with truncate_dimension set to 0
+    zero_truncate = hcat_truncate([v1, v2], 0)
+    @test zero_truncate == [1.0 4.0; 2.0 5.0; 3.0 6.0]
+
+    # Test with large vectors to ensure performance
+    large_v1 = rand(1000)
+    large_v2 = rand(1000)
+    @test size(hcat_truncate([large_v1, large_v2], 500)) == (500, 2)
+
+    # Test with different types (should convert to Float32)
+    v4 = [1, 2, 3]
+    result_type_conversion = hcat_truncate([v4])
+    @test eltype(result_type_conversion) == Float32
+
+    # Test with truncate=nothing (should behave the same as no truncation)
+    result_nothing = hcat_truncate([v1, v2], nothing)
+    @test result_nothing == [1.0 4.0; 2.0 5.0; 3.0 6.0]
+
+    # Test with truncate=-1 (should behave the same as no truncation)
+    result_negative = hcat_truncate([v1, v2], -1)
+    @test result_negative == [1.0 4.0; 2.0 5.0; 3.0 6.0]
+end
+
+### Text-manipulation utilities
+
+@testset "tokenize" begin
+    # Test basic tokenization with common delimiters
+    @test tokenize("Hello, world!") == ["Hello", ",", " ", "world", "!"]
+
+    # Test tokenization with various whitespace characters
+    @test tokenize("New\nLine\tTab") == ["New", "\n", "Line", "\t", "Tab"]
+
+    # Test tokenization with a mix of punctuation and words
+    @test tokenize("Yes! This works.") == ["Yes", "!", " ", "This", " ", "works", "."]
+
+    # Test tokenization of a string with no delimiters, i.e., a single word
+    @test tokenize("SingleWord") == ["SingleWord"]
+
+    # Test tokenization of an empty string
+    @test tokenize("") == []
+
+    # multi-space
+    @test tokenize("   ") == ["   "]
+
+    # Special characters for Julia code
+    @test tokenize("α β γ δ") == ["α", " ", "β", " ", "γ", " ", "δ"]
+    @test tokenize("a = (; a=1)") == ["a", " ", "=", " ", "(;", " ", "a", "=", "1", ")"]
+    @test tokenize("replace(s, \"abc\"=>\"ABC\")") ==
+          ["replace", "(", "s", ",", " ", "\"", "abc", "\"", "=>", "\"", "ABC", "\"", ")"]
+end
+
+@testset "trigrams" begin
+    # Test generating trigrams from a string of sufficient length
+    @test trigrams("hello") == ["hel", "ell", "llo"]
+
+    # Test generating trigrams from a string with exactly 3 characters
+    @test trigrams("cat") == ["cat"]
+
+    # Test with a string of length less than 3, expecting an empty array
+    @test trigrams("no") == []
+
+    # Test with an empty string, also expecting an empty array
+    @test trigrams("") == []
+
+    # Test a case with special characters and spaces
+    @test trigrams("a b c") == ["a b", " b ", "b c"]
+
+    # With boundaries
+    @test trigrams(" (cat=") == [" (c", "(ca", "cat", "at="]
+
+    # Add the token itself
+    @test trigrams("hello"; add_word = "hello") == ["hel", "ell", "llo", "hello"]
+
+    # non-standard chars
+    s = "α β γ δ"
+    @test trigrams(s) == ["α β", " β ", "β γ", " γ ", "γ δ"]
+end
+
+@testset "trigrams_hashed" begin
+    # Test hashing trigrams from a string of sufficient length
+    # Since hashing produces unique UInt64 values, we test for the set's length instead of specific values
+    @test trigrams_hashed("hello") == hash.(["hel", "ell", "llo"]) |> Set
+
+    # Test hashing a string with exactly 3 characters
+    @test trigrams_hashed("cat") == Set(hash("cat"))
+
+    # Test with a string of length less than 3, expecting a set with 1 hash value
+    @test trigrams_hashed("no") == Set()
+
+    # Test with an empty string, expecting a set with 1 hash value because the empty string itself is hashed
+    @test (trigrams_hashed("")) == Set()
+
+    # Test to ensure no duplicate hash values in case of repeating trigrams
+    # "ababab" will generate "aba", "bab", "aba", "bab" - only two unique trigrams when hashed
+    @test trigrams_hashed("ababab") == Set([hash("aba"), hash("bab")])
+
+    # Test a unique case with special characters to ensure hashing works across different character sets
+    @test trigrams_hashed("a!@") == Set(hash("a!@"))
+
+    # Add the token itself
+    @test trigrams_hashed("hello"; add_word = "hello") ==
+          hash.(["hel", "ell", "llo", "hello"]) |> Set
+
+    # special chars
+    s = "α β γ δ"
+    @test trigrams_hashed(s) == Set(hash.(["α β", " β ", "β γ", " γ ", "γ δ"]))
+end
+
+@testset "token_with_boundaries" begin
+    # Test with no surrounding tokens
+    @test token_with_boundaries(nothing, "current", nothing) == "current"
+
+    # Test with both surrounding tokens being single characters (should concatenate all)
+    @test token_with_boundaries("a", "current", "b") == "acurrentb"
+
+    # Test with only previous token being a single character (should prepend it)
+    @test token_with_boundaries("a", "current", nothing) == "acurrent"
+
+    # Test with only next token being a single character (should append it)
+    @test token_with_boundaries(nothing, "current", "b") == "currentb"
+
+    # Test with both surrounding tokens but only next token being a single character (should append next token)
+    @test token_with_boundaries("previous", "current", "b") == "currentb"
+
+    # Test with both surrounding tokens but only previous token being a single character (should prepend previous token)
+    @test token_with_boundaries("a", "current", "next") == "acurrent"
+
+    # Test with neither surrounding tokens being single characters (should return the current token unchanged)
+    @test token_with_boundaries("previous", "current", "next") == "current"
+
+    # Test with single character current token and no surrounding tokens (should return the current token unchanged)
+    @test token_with_boundaries(nothing, "c", nothing) == "c"
+end
+
+@testset "text_to_trigrams" begin
+    # Test converting basic text into trigrams
+    exp_output = [
+        "Thi", "his", "is ", "This", " is", "is ", "is", " te", "tes", "est", "st.", "test"]
+    @test text_to_trigrams("This is a test."; add_word = true) == exp_output
+
+    # Test converting without adding the word itself
+    exp_output = ["Thi", "his", "is ", " is", "is ", " te", "tes", "est", "st."]
+    @test text_to_trigrams("This is a test."; add_word = false) == exp_output
+
+    # Test that spaces and punctuation are treated as separate tokens
+    exp_output = ["Hel", "ell", "llo", "lo,", " wo", "wor", "orl", "rld", "ld!"]
+    @test text_to_trigrams("Hello, world!"; add_word = false) == exp_output
+
+    # Test with a string that includes single-character tokens affecting neighboring tokens
+    # Expecting the single-character tokens to not produce separate trigrams but to influence surrounding tokens
+    @test text_to_trigrams("A cat."; add_word = false) == [" ca", "cat", "at."]
+
+    # Test with an empty string, expecting an empty array
+    @test text_to_trigrams("") == []
+
+    # Test a complex case with special characters, spaces, and punctuation
+    # This checks that the function handles various types of tokens correctly
+    @test text_to_trigrams("It's rain-ing!"; add_word = false) ==
+          ["It'", " ra", "rai", "ain", "in-", "-in", "ing", "ng!"]
+
+    # Test to ensure correct handling of multiple adjacent spaces and punctuation
+    # Spaces and punctuation should be treated as tokens but not produce trigrams
+    @test text_to_trigrams("Wow...  That's amazing!"; add_word = false) ==
+          ["Wow", "ow.", ".  ", "Tha", "hat", "at'", " am",
+        "ama", "maz", "azi", "zin", "ing", "ng!"]
+
+    # Special characters
+    text_to_trigrams("a!@ #\$%^"; add_word = false) == []
+end
+
+@testset "text_to_trigrams_hashed" begin
+    # Test basic text conversion to hashed trigrams
+    exp_output = [
+        "Thi", "his", "is ", "This", " is", "is ", "is", " te", "tes", "est", "st.", "test"]
+    @test text_to_trigrams_hashed("This is a test."; add_word = true) ==
+          Set(hash.(exp_output))
+
+    # Test converting without adding the word itself
+    exp_output = ["Thi", "his", "is ", " is", "is ", " te", "tes", "est", "st."]
+    @test text_to_trigrams_hashed("This is a test."; add_word = false) ==
+          Set(hash.(exp_output))
+
+    # Test that unique trigrams produce a set of unique hashes
+    # "hello" produces 3 unique trigrams, expecting 3 unique hash values
+    @test length(text_to_trigrams_hashed("hello"; add_word = false)) == 3
+
+    # Test with a string of repeating characters, which should still produce unique trigrams
+    @test text_to_trigrams_hashed("A cat."; add_word = false) ==
+          Set(hash.([" ca", "cat", "at."]))
+
+    # Test handling of special characters and spaces -- nothing produces (too short)
+    text_to_trigrams_hashed("a!@ #\$%^"; add_word = false) == Set()
+
+    # Test with an empty string, it's empty
+    @test text_to_trigrams_hashed("") == Set()
+
+    # Test to ensure no duplicate hash values in case of repeating patterns within the input string
+    # For a pattern that repeats, like "ababab", the number of unique trigrams should be 2
+    @test text_to_trigrams_hashed("ababab"; add_word = false) == Set(hash.(["aba", "bab"]))
+
+    # Test a complex sentence with various characters, expecting a mix of unique hashes
+    # The exact number of unique hashes is less important than ensuring we're getting a non-zero, plausible count
+    @test text_to_trigrams_hashed("Complex sentence: 123!") ==
+          Set(hash.(text_to_trigrams("Complex sentence: 123!")))
+end
+
+@testset "split_into_code_and_sentences" begin
+    # Test basic sentence splitting
+    input = "This is a test. This is another test."
+    sentences, group_ids = split_into_code_and_sentences(input)
+    @test sentences == ["This is a test.", " This is another test."]
+    @test join(sentences, "") == input # lossless
+    @test group_ids == [1, 2]
+
+    # Test handling of code blocks and inline code
+    input = """Here is a code block: 
+      ```julia
+      code here
+      ```
+      and `inline code`."""
+    sentences, group_ids = split_into_code_and_sentences(input)
+    @test sentences == ["Here is a code block: ", "\n", "```julia", "\n",
+        "code here", "\n", "```", "\n", "and ", "`inline code`", "."]
+    @test join(sentences, "") == input
+    @test group_ids == [1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 7]
+
+    ## Multi-faceted code
+    input = """Here is a code block: 
+    ```julia
+    code here
+    ```
+    and `inline code`.
+    Sentences here.
+    Bullets:
+    - I like this
+    - But does it work?
+    ```julia
+    another code
+    ```
+    1. Tester
+    Third sentence - but what happened.
+    """
+    sentences, group_ids = split_into_code_and_sentences(input)
+    @test sentences ==
+          [
+        "Here is a code block: ", "\n", "```julia", "\n", "code here", "\n", "```", "\n",
+        "and ", "`inline code`", ".", "\n", "Sentences here.", "\n", "Bullets:", "\n", "- ",
+        "I like this", "\n", "- ", "But does it work?", "\n", "```julia", "\n", "another code",
+        "\n", "```", "\n", "1. ", "Tester", "\n", "Third sentence - but what happened.", "\n"]
+    @test join(sentences, "") == input
+    @test group_ids == [1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+        15, 16, 17, 18, 19, 19, 19, 19, 19, 20, 21, 22, 23, 24, 25]
+end
+
+@testset "getpropertynested" begin
+    # Direct Match Tests
+    kw = (; abc = (; def = "x"))
+    @test getpropertynested(kw, [:abc], :def) == "x"
+    @test getpropertynested(kw, [:abc], :ghi, "default") == "default"
+
+    # Nested Match Tests
+    kw = (; abc = (; def = (; ghi = "y")))
+    @test getpropertynested(kw, [:abc, :def], :ghi) == "y"
+    @test getpropertynested(kw, [:abc, :def], :xyz, "default") == "default"
+
+    # No Match Tests
+    kw = (; abc = (; def = "x"))
+    @test getpropertynested(kw, [:xyz], :def, "default") == "default"
+    @test getpropertynested(kw, [:abc, :def], :ghi, "default") == "default"
+    # Complex Nested Match Tests
+    kw = (; abc = (; def = (; ghi = (; jkl = "z"))))
+    @test getpropertynested(kw, [:abc, :def, :ghi], :jkl) == "z"
+    @test getpropertynested(kw, [:abc, :def, :ghi], :mno, "default") == "default"
+end
+
+@testset "setpropertynested" begin
+    # Direct Set Tests
+    kw = (; abc = (; def = "x"))
+    modified_kw = setpropertynested(kw, [:abc], :def, "y")
+    @test modified_kw == (; abc = (; def = "y"))
+
+    # Nested Set Tests
+    kw = (; abc = (; def = (; ghi = "x")))
+    modified_kw = setpropertynested(kw, [:abc, :def], :ghi, "y")
+    @test modified_kw == (; abc = (; def = (; ghi = "y"), ghi = "y"))
+
+    # New Key Set Tests
+    kw = (; abc = (; def = "x"))
+    modified_kw = setpropertynested(kw, [:abc], :ghi, "y")
+    @test modified_kw == (; abc = (; def = "x", ghi = "y"))
+
+    # Complex Nested Set Tests
+    kw = (; abc = (; def = (; ghi = (; jkl = "x"))))
+    modified_kw = setpropertynested(kw, [:abc, :def, :ghi], :jkl, "y")
+    @test modified_kw == (; abc = (; jkl = "y", def = (; jkl = "y", ghi = (; jkl = "y"))))
+
+    # Set In Non-Existent Nested Key
+    kw = (; abc = (; def = "x"))
+    modified_kw = setpropertynested(kw, [:xyz], :ghi, "y")
+    @test modified_kw == (; abc = (; def = "x"))
+end
+
+@testset "merge_kwargs_nested" begin
+    # Basic Merge
+    nt1 = (; a = 1, b = 2)
+    nt2 = (; b = 3, c = 4)
+    expected = (; a = 1, b = 3, c = 4)
+    @test merge_kwargs_nested(nt1, nt2) == expected
+
+    # Nested Merge
+    nt1 = (; a = (; x = 1), b = 2)
+    nt2 = (; a = (; y = 2), c = 3)
+    expected = (; a = (; y = 2, x = 1), b = 2, c = 3)
+    @test merge_kwargs_nested(nt1, nt2) == expected
+
+    # Deep Nested Merge
+    nt1 = (; a = (; x = (; i = 1)), b = 2)
+    nt2 = (; a = (; x = (; j = 2)), c = 3)
+    expected = (; a = (; x = (; j = 2, i = 1)), b = 2, c = 3)
+    @test merge_kwargs_nested(nt1, nt2) == expected
+
+    # Override with Non-NamedTuple
+    nt1 = (; a = (; x = 1), b = 2)
+    nt2 = (; a = "Not a NamedTuple", c = 3)
+    expected = (; a = "Not a NamedTuple", b = 2, c = 3)
+    @test merge_kwargs_nested(nt1, nt2) == expected
+
+    # Merge with Empty NamedTuple
+    nt1 = NamedTuple()
+    nt2 = (; a = 1, b = (; c = 2))
+    expected = (; a = 1, b = (; c = 2))
+    @test merge_kwargs_nested(nt1, nt2) == expected
+
+    nt1 = (; a = 1, b = (; c = 2))
+    nt2 = NamedTuple()
+    expected = (; a = 1, b = (; c = 2))
+    @test merge_kwargs_nested(nt1, nt2) == expected
+end
+
+@testset "pack_bits,unpack_bits" begin
+    ### Test for vectors
+    # Basic functionality
+    bin = rand(Bool, 128)
+    binint = pack_bits(bin)
+    binx = unpack_bits(binint)
+    @test bin == binx
+
+    # Edge cases
+    # Test with all true values
+    bin = trues(128)
+    binint = pack_bits(bin)
+    binx = unpack_bits(binint)
+    @test bin == binx
+
+    # Test with all false values
+    bin = falses(128)
+    binint = pack_bits(bin)
+    binx = unpack_bits(binint)
+    @test bin == binx
+
+    # Test with alternating true and false values
+    bin = Bool[mod(i, 2) == 0 for i in 1:128]
+    binint = pack_bits(bin)
+    binx = unpack_bits(binint)
+    @test bin == binx
+
+    # empty vector
+    bin_empty = Bool[]
+    binint_empty = pack_bits(bin_empty)
+    binx_empty = unpack_bits(binint_empty)
+    @test bin_empty == binx_empty
+
+    # Invalid input
+    # Test with length not divisible by 64
+    bin = rand(Bool, 130)
+    @test_throws AssertionError pack_bits(bin)
+    @test_throws ArgumentError pack_bits(rand(Float32, 128))
+    @test_throws ArgumentError unpack_bits(rand(Float32, 128))
+
+    ### Test for matrices
+    # Basic functionality
+    bin = rand(Bool, 128, 10)
+    binint = pack_bits(bin)
+    binx = unpack_bits(binint)
+    @test bin == binx
+
+    # Edge cases
+    # Test with all true values
+    bin = trues(128, 10)
+    binint = pack_bits(bin)
+    binx = unpack_bits(binint)
+    @test bin == binx
+
+    # Test with all false values
+    bin = falses(128, 10)
+    binint = pack_bits(bin)
+    binx = unpack_bits(binint)
+    @test bin == binx
+
+    # Test with alternating true and false values
+    bin = Bool[mod(i, 2) == 0 for i in 1:128, j in 1:10]
+    binint = pack_bits(bin)
+    binx = unpack_bits(binint)
+    @test bin == binx
+
+    # Invalid input
+    # Test with number of rows not divisible by 64
+    bin = rand(Bool, 130, 10)
+    @test_throws AssertionError pack_bits(bin)
+    # Wrong number type
+    @test_throws ArgumentError pack_bits(rand(Float32, 128, 10))
+    @test_throws ArgumentError unpack_bits(rand(Float32, 128, 10))
+end
+
+@testset "preprocess_tokens" begin
+    stemmer = Snowball.Stemmer("english")
+    stopwords = Set([
+        "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in",
+        "into", "is", "it", "no", "not", "of", "on", "or", "such", "some", "that", "the",
+        "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"])
+    # Empty string
+    @test preprocess_tokens("") == []
+
+    # Simple case
+    @test preprocess_tokens("This is a test."; stopwords) == ["test"]
+
+    # Case insensitive
+    @test preprocess_tokens("This Is A Test."; stopwords) == ["test"]
+
+    # Punctuation and numbers
+    @test preprocess_tokens(
+        "This is a test, with punctuation and 123 numbers!", stemmer; stopwords) ==
+          ["test", "punctuat", "number"]
+
+    # Unicode and accents
+    @test preprocess_tokens(
+        "Thís is à tést wîth Ünïcôdë and áccênts.", stemmer; stopwords) ==
+          ["test", "unicod", "accent"]
+
+    # Multiple spaces
+    @test preprocess_tokens(
+        "This  is a   test with   multiple    spaces.", stemmer; stopwords) ==
+          ["test", "multipl", "space"]
+
+    # Stopwords
+    @test preprocess_tokens(
+        "This is a test with some stopwords like the and is.", stemmer; stopwords) ==
+          ["test", "stopword", "like"]
+
+    # Stemming
+    @test preprocess_tokens(
+        "This is a test with some words for stemming like testing and tested.",
+        stemmer; stopwords) == ["test", "word", "stem", "like", "test", "test"]
+
+    # Long text
+    long_text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed euismod, nulla sit amet aliquam lacinia, nisl nisl aliquam nisl, nec aliquam nisl nisl sit amet nisl. Sed euismod, nulla sit amet aliquam lacinia, nisl nisl aliquam nisl, nec aliquam nisl nisl sit amet nisl. Sed euismod, nulla sit amet aliquam lacinia, nisl nisl aliquam nisl, nec aliquam nisl nisl sit amet nisl."
+    @test preprocess_tokens(long_text, stemmer; stopwords) ==
+          ["lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipisc", "elit",
+        "sed", "euismod", "nulla", "sit", "amet", "aliquam", "lacinia", "nisl", "nisl",
+        "aliquam", "nisl", "nec", "aliquam", "nisl", "nisl", "sit", "amet", "nisl",
+        "sed", "euismod", "nulla", "sit", "amet", "aliquam", "lacinia", "nisl", "nisl",
+        "aliquam", "nisl", "nec", "aliquam", "nisl", "nisl", "sit", "amet", "nisl",
+        "sed", "euismod", "nulla", "sit", "amet", "aliquam", "lacinia", "nisl", "nisl",
+        "aliquam", "nisl", "nec", "aliquam", "nisl", "nisl", "sit", "amet", "nisl"]
+
+    # Edge case: non-English text
+    @test preprocess_tokens("Ceci n'est pas une pipe.", stemmer; stopwords) ==
+          ["ceci", "est", "pas", "une", "pipe"]
+
+    # Vector of inputs
+    @test preprocess_tokens(
+        ["This is a test, with punctuation and 123 numbers!",
+            "This is a test, with punctuation and 123 numbers!"],
+        stemmer;
+        stopwords) == [["test", "punctuat", "number"], ["test", "punctuat", "number"]]
+
+    # Check stubs that they throw
+    @test_throws ArgumentError RT._stem(nothing, "abc")
+    @test_throws ArgumentError RT._unicode_normalize(nothing)
+end
+
+@testset "reciprocal_rank_fusion" begin
+    # Test with two simple lists
+    positions, scores = reciprocal_rank_fusion([1, 2, 3], [4, 5, 6]; k = 0)
+    @test Set(positions) == Set([1, 2, 3, 4, 5, 6])
+    @test Set(positions[1:2]) == Set([1, 4])
+    @test Set(positions[3:4]) == Set([2, 5])
+    @test Set(positions[5:6]) == Set([3, 6])
+    @test scores == Dict(1 => 1.0, 2 => 0.5, 3 => 0.3333333333333333,
+        4 => 1.0, 5 => 0.5, 6 => 0.3333333333333333)
+
+    # Test with overlapping lists
+    positions, scores = reciprocal_rank_fusion([1, 2, 3], [2, 3, 4]; k = 0)
+    @test Set(positions) == Set([2, 3, 1, 4])
+    @test positions[1] == 2
+    @test positions[2] == 1
+    @test positions[3] == 3
+    @test positions[4] == 4
+
+    # Higher discount to reward more appearances
+    positions, scores = reciprocal_rank_fusion([1, 2, 3], [2, 3, 4]; k = 60)
+    @test Set(positions) == Set([2, 3, 1, 4])
+    @test positions[1] == 2
+    @test positions[2] == 3
+    @test positions[3] == 1
+    @test positions[4] == 4
+
+    # Test with three lists
+    positions, scores = reciprocal_rank_fusion([1, 2, 3], [2, 3, 4], [3, 4, 5]; k = 0)
+    @test Set(positions) == Set([3, 2, 4, 1, 5])
+    @test positions[1] == 3
+    @test positions[2] == 2
+    @test positions[3] == 1
+    @test positions[4] == 4
+    @test positions[5] == 5
+
+    # Test with empty list
+    @test reciprocal_rank_fusion([]; k = 0) == ([], Dict{Int, Float64}())
+
+    # Test with one empty and one non-empty list
+    @test reciprocal_rank_fusion([], [1, 2, 3]; k = 0) ==
+          ([1, 2, 3], Dict(1 => 1.0, 2 => 0.5, 3 => 0.3333333333333333))
+
+    # Test with different lengths of lists
+    positions, scores = reciprocal_rank_fusion([1, 2], [3, 4, 5]; k = 0)
+    @test Set(positions) == Set([1, 2, 3, 4, 5])
+    @test Set(positions[1:2]) == Set([1, 3])
+    @test Set(positions[3:4]) == Set([2, 4])
+    @test positions[5] == 5
+
+    ## Paired reciprocal rank
+    positions1 = [1, 2, 3, 4, 5]
+    scores1 = [0.9, 0.8, 0.7, 0.6, 0.5]
+    positions2 = [3, 4, 5, 6, 7]
+    scores2 = [0.5, 0.6, 0.7, 0.9, 0.9]
+
+    merged, scores = reciprocal_rank_fusion(positions1, scores1, positions2, scores2; k = 0)
+    @test length(merged) == 7
+    @test Set(merged) == Set(1:7)
+    @test merged[1] == 1
+    @test scores[1] == 0.9
+    @test merged[2] == 3
+    @test scores[3] == 0.7 / 3 + 0.5
+    @test merged[end] == 7
+    @test scores[7] == 0.9 / 5
+
+    merged, scores = reciprocal_rank_fusion(
+        positions1, scores1, positions2, scores2; k = 60)
+    @test length(merged) == 7
+    @test merged[1] == 3
+    @test merged[2] == 4
+    @test merged[3] == 5
+    @test scores[3] > scores[4]
+    @test scores[4] > scores[5]
+    @test scores[5] > scores[6]
+    @test scores[6] > scores[7]
 end
+
+@testset "score_to_unit_scale" begin
+    # Test with a normal range of values
+    x = [1.0, 2.0, 3.0, 4.0, 5.0]
+    scaled_x = score_to_unit_scale(x)
+    @test extrema(scaled_x) == (0.0, 1.0)
+
+    # Test with all values the same
+    y = [2.0, 2.0, 2.0, 2.0, 2.0]
+    scaled_y = score_to_unit_scale(y)
+    @test all(scaled_y .== 1.0)
+
+    # Test with a single value
+    z = [3.0]
+    scaled_z = score_to_unit_scale(z)
+    @test scaled_z == [1.0]
+
+    # Test with a range of negative values
+    w = [-5.0, -4.0, -3.0, -2.0, -1.0]
+    scaled_w = score_to_unit_scale(w)
+    @test extrema(scaled_w) == (0.0, 1.0)
+
+    # Test with a mix of positive and negative values
+    v = [-1.0, 0.0, 1.0]
+    scaled_v = score_to_unit_scale(v)
+    @test extrema(scaled_v) == (0.0, 1.0)
+
+    # Test with empty vector
+    @test score_to_unit_scale(Float32[]) |> isempty
+end
\ No newline at end of file
diff --git a/test/code_eval.jl b/test/code_eval.jl
index e6be81a76..74ed15a42 100644
--- a/test/code_eval.jl
+++ b/test/code_eval.jl
@@ -103,7 +103,8 @@ import Base.splitx
 splitx(aaa) = 2
 """)
     @test_logs (:warn,
-        r"Safety Warning: Base / Main overrides detected \(functions: splitx\)") match_mode=:any eval!(cb;
+        r"Safety Warning: Base / Main overrides detected \(functions: splitx\)") match_mode=:any eval!(
+        cb;
         safe_eval = true)
 
     # Evaluate inside a gensym'd module
diff --git a/test/code_expressions.jl b/test/code_expressions.jl
index e146edc0c..bfb377938 100644
--- a/test/code_expressions.jl
+++ b/test/code_expressions.jl
@@ -1,6 +1,6 @@
 using PromptingTools: AICode, isparsed, isparseerror, is_julia_code, is_julia_expr
 using PromptingTools: remove_all_tests_from_expr!,
-    remove_test_items_from_expr!, remove_macro_expr!, extract_module_name
+                      remove_test_items_from_expr!, remove_macro_expr!, extract_module_name
 
 @testset "is_julia_expr" begin
     # Valid Julia Expressions
diff --git a/test/code_parsing.jl b/test/code_parsing.jl
index 318a610c2..daef64b8c 100644
--- a/test/code_parsing.jl
+++ b/test/code_parsing.jl
@@ -1,13 +1,15 @@
 using PromptingTools: extract_julia_imports
 using PromptingTools: detect_pkg_operation,
-    detect_missing_packages, extract_function_name, extract_function_names,
-    remove_unsafe_lines, detect_base_main_overrides
+                      detect_missing_packages, extract_function_name,
+                      extract_function_names,
+                      remove_unsafe_lines, detect_base_main_overrides
 using PromptingTools: has_julia_prompt,
-    remove_julia_prompt, extract_code_blocks, extract_code_blocks_fallback, eval!
+                      remove_julia_prompt, extract_code_blocks,
+                      extract_code_blocks_fallback, eval!
 using PromptingTools: escape_interpolation, find_subsequence_positions
 using PromptingTools: AICode, is_julia_code, is_julia_expr
 using PromptingTools: extract_testset_name,
-    extract_package_name_from_argerror, extract_stacktrace_lines
+                      extract_package_name_from_argerror, extract_stacktrace_lines
 
 @testset "is_julia_code" begin
 
@@ -54,7 +56,8 @@ end
     @test detect_missing_packages(Symbol.(["Test",
         "Base",
         "Main",
-        "SpecialPackage12345678", "SpecialPackage123456789"])) == (true, [:SpecialPackage12345678, :SpecialPackage123456789])
+        "SpecialPackage12345678", "SpecialPackage123456789"])) ==
+          (true, [:SpecialPackage12345678, :SpecialPackage123456789])
 end
 
 @testset "detect_pkg_operation" begin
diff --git a/test/extraction.jl b/test/extraction.jl
index a10b8478b..3f8617558 100644
--- a/test/extraction.jl
+++ b/test/extraction.jl
@@ -1,9 +1,104 @@
-using PromptingTools: MaybeExtract, extract_docstring, ItemsExtract
+using PromptingTools: MaybeExtract, extract_docstring, ItemsExtract, ToolMessage
 using PromptingTools: has_null_type, is_required_field, remove_null_types, to_json_schema
-using PromptingTools: function_call_signature
+using PromptingTools: tool_call_signature, set_properties_strict!,
+                      update_field_descriptions!, generate_struct
+using PromptingTools: Tool, isabstracttool, execute_tool, parse_tool, get_arg_names,
+                      get_arg_types, get_method, get_function, remove_field!,
+                      tool_call_signature, ToolRef
+using PromptingTools: AbstractToolError, ToolNotFoundError, ToolExecutionError,
+                      ToolGenericError, is_hidden_field
 
 # TODO: check more edge cases like empty structs
 
+"This is a test function."
+function my_test_function(x::Int, y::String)
+    return "Test function: $x, $y"
+end
+function context_test_function(x::Int, y::String, ctx_z::Float64)
+    return "Context test: $x, $y, $(ctx_z)"
+end
+function context_test_function2(x::Int, y::String, context::Dict)
+    return "Context test: $x, $y, $(context)"
+end
+
+# Test function that accepts kwargs
+function kwarg_test_function(x::Int; y::Int = 0, z::Int = 0, kwargs...)
+    return x + y + z
+end
+# Test with function that has no kwargs
+function no_kwarg_function(x::Int)
+    return x
+end
+
+@testset "ToolErrors" begin
+    e = ToolNotFoundError("Tool `xyz` not found")
+    @test e isa AbstractToolError
+    @test e.msg == "Tool `xyz` not found"
+
+    e = ToolExecutionError(
+        "Tool `xyz` execution failed", MethodError(my_test_function, (1,)))
+    @test e isa AbstractToolError
+    @test e.msg == "Tool `xyz` execution failed"
+
+    e = ToolGenericError(
+        "Tool `xyz` failed with a generic error", MethodError(my_test_function, (1,)))
+    @test e isa AbstractToolError
+    @test e.msg == "Tool `xyz` failed with a generic error"
+end
+
+@testset "Tool-constructor" begin
+    tool = Tool(my_test_function)
+
+    @test tool isa Tool
+    @test tool.name == "my_test_function"
+    @test haskey(tool.parameters, "properties")
+    @test haskey(tool.parameters["properties"], "x")
+    @test haskey(tool.parameters["properties"], "y")
+    @test tool.callable == my_test_function
+
+    # Test Tool constructor with a struct
+    struct MyTestStruct
+        field1::Int
+        field2::String
+    end
+
+    tool_struct = Tool(MyTestStruct)
+
+    @test tool_struct isa Tool
+    @test tool_struct.name == "MyTestStruct"
+    @test haskey(tool_struct.parameters, "properties")
+    @test haskey(tool_struct.parameters["properties"], "field1")
+    @test haskey(tool_struct.parameters["properties"], "field2")
+    @test tool_struct.callable == MyTestStruct
+
+    # Test show method
+    io = IOBuffer()
+    show(io, tool)
+    output = String(take!(io))
+
+    @test occursin("Tool", output)
+    @test occursin("name", output)
+    @test occursin("parameters", output)
+    @test occursin("description", output)
+    @test occursin("strict", output)
+    @test occursin("callable", output)
+
+    @test isabstracttool(tool) == true
+    @test isabstracttool(tool_struct) == true
+    @test isabstracttool(my_test_function) == false
+
+    ## ToolRef
+    tool = ToolRef(; ref = :computer, callable = println)
+    @test tool isa ToolRef
+    @test tool.ref == :computer
+    @test tool.callable == println
+    io = IOBuffer()
+    show(io, tool)
+    output = String(take!(io))
+    @test occursin("ToolRef", output)
+    @test occursin("computer", output)
+end
+
 @testset "has_null_type" begin
     @test has_null_type(Number) == false
     @test has_null_type(Nothing) == true
@@ -57,6 +152,43 @@ end
     end
     docstring = extract_docstring(MyStructWithSuper2)
     @test_broken haskey(schema, "description")
+
+    # Test extraction of docstring from a method
+    method = methods(my_test_function) |> first
+    docstring = extract_docstring(method)
+    @test docstring == "This is a test function.\n"
+end
+
+@testset "get_arg_names,get_arg_types" begin
+    # Test a function with no arguments
+    f1() = nothing
+    @test get_arg_names(first(methods(f1))) == Symbol[]
+    @test get_arg_types(first(methods(f1))) == []
+
+    # Test a function with one argument
+    f2(x) = x
+    @test get_arg_names(first(methods(f2))) == [:x]
+    @test get_arg_types(first(methods(f2))) == [Any]
+
+    # Test a function with multiple arguments
+    f3(x, y, z) = x + y + z
+    @test get_arg_names(first(methods(f3))) == [:x, :y, :z]
+    @test get_arg_types(first(methods(f3))) == [Any, Any, Any]
+
+    # Test a function with keyword arguments
+    f4(x; y = 1, z = 2) = x + y + z
+    @test get_arg_names(first(methods(f4))) == [:x]
+    @test get_arg_types(first(methods(f4))) == [Any]
+
+    # Test a function with varargs
+    f5(x, y, z...) = x + y + sum(z)
+    @test get_arg_names(first(methods(f5))) == [:x, :y, :z]
+    @test get_arg_types(first(methods(f5))) == [Any, Any, Vararg{Any}]
+
+    # Test a function with type annotations
+    f6(x::Int, y::String) = "$x$y"
+    @test get_arg_names(first(methods(f6))) == [:x, :y]
+    @test get_arg_types(first(methods(f6))) == [Int, String]
 end
 
 @testset "to_json_schema" begin
@@ -167,6 +299,16 @@ end
     @test schema["properties"]["field2"]["type"] == "string"
     @test schema["required"] == ["field1", "field2"]
     @test_broken haskey(schema, "description")
+
+    ## Method
+    method = methods(my_test_function) |> first
+    schema = to_json_schema(method)
+    @test schema["type"] == "object"
+    @test schema["properties"]["x"]["type"] == "integer"
+    @test schema["properties"]["y"]["type"] == "string"
+    @test schema["required"] == ["x", "y"]
+    @test haskey(schema, "description")
+    @test schema["description"] == "This is a test function.\n"
 end
 
 @testset "to_json_schema-MaybeExtract" begin
@@ -216,27 +358,578 @@ end
     @test schema_measurement["description"] ==
           "Represents person's age, height, and weight\n"
 end
-@testset "function_call_signature" begin
+
+@testset "is_hidden_field" begin
+    # Test basic string matching
+    @test is_hidden_field("context", ["context"]) == true
+    @test is_hidden_field("data", ["context"]) == false
+
+    # Test regex matching
+    @test is_hidden_field("my_context", [r"context$"]) == true
+    @test is_hidden_field("context_var", [r"^context"]) == true
+    @test is_hidden_field("mydata", [r"context"]) == false
+
+    # Test multiple patterns
+    @test is_hidden_field("context", ["data", "context", "temp"]) == true
+    @test is_hidden_field("context", [r"^data", r"temp$", r"context"]) == true
+
+    # Test mixed string and regex patterns
+    @test is_hidden_field(
+        "my_context", Union{AbstractString, Regex}["data", r"context$"]) == true
+    @test is_hidden_field(
+        "context_var", Union{AbstractString, Regex}[r"^context", "temp"]) == true
+
+    # Test empty patterns list
+    @test is_hidden_field("context", String[]) == false
+    @test is_hidden_field("context", Regex[]) == false
+
+    # Test with Symbol input
+    @test is_hidden_field(:context, ["context"]) == true
+    @test is_hidden_field(:my_context, [r"context$"]) == true
+    @test is_hidden_field(:data, ["context"]) == false
+end
+@testset "set_properties_strict!" begin
+    # Test 1: Basic functionality
+    params = Dict(
+        "properties" => Dict{String, Any}(
+            "name" => Dict{String, Any}("type" => "string"),
+            "age" => Dict{String, Any}("type" => "integer")
+        ),
+        "required" => ["name"]
+    )
+    set_properties_strict!(params)
+    @test params["additionalProperties"] == false
+    @test Set(params["required"]) == Set(["name", "age"])
+    @test params["properties"]["age"]["type"] == ["integer", "null"]
+
+    # Test 2: Nested properties
+    params = Dict{String, Any}(
+        "properties" => Dict{String, Any}(
+        "person" => Dict{String, Any}(
+        "type" => "object",
+        "properties" => Dict{String, Any}(
+            "name" => Dict{String, Any}("type" => "string"),
+            "age" => Dict{String, Any}("type" => "integer")
+        )
+    )
+    )
+    )
+    set_properties_strict!(params)
+    @test params["properties"]["person"]["additionalProperties"] == false
+    @test Set(params["properties"]["person"]["required"]) ==
+          Set(["name", "age"])
+
+    # Test 3: Array of objects
+    params = Dict{String, Any}(
+        "properties" => Dict{String, Any}(
+        "people" => Dict{String, Any}(
+        "type" => "array",
+        "items" => Dict{String, Any}(
+            "type" => "object",
+            "properties" => Dict{String, Any}(
+                "name" => Dict{String, Any}("type" => "string"),
+                "age" => Dict{String, Any}("type" => "integer")
+            )
+        )
+    )
+    )
+    )
+    set_properties_strict!(params)
+    @test params["properties"]["people"]["items"]["additionalProperties"] == false
+    @test Set(params["properties"]["people"]["items"]["required"]) == Set(["name", "age"])
+
+    # Test 4: Multiple levels of nesting
+    params = Dict{String, Any}(
+        "properties" => Dict{String, Any}(
+        "company" => Dict{String, Any}(
+        "type" => "object",
+        "properties" => Dict{String, Any}(
+            "name" => Dict{String, Any}("type" => "string"),
+            "employees" => Dict{String, Any}(
+                "type" => "array",
+                "items" => Dict{String, Any}(
+                    "type" => "object",
+                    "properties" => Dict{String, Any}(
+                        "name" => Dict{String, Any}("type" => "string"),
+                        "position" => Dict{String, Any}("type" => "string")
+                    )
+                )
+            )
+        )
+    )
+    )
+    )
+    set_properties_strict!(params)
+    @test params["properties"]["company"]["additionalProperties"] == false
+    @test params["properties"]["company"]["properties"]["employees"]["items"]["additionalProperties"] ==
+          false
+    @test Set(params["properties"]["company"]["properties"]["employees"]["items"]["required"]) ==
+          Set(["name", "position"])
+
+    # Test 5: Handling of existing required fields
+    params = Dict{String, Any}(
+        "properties" => Dict{String, Any}(
+            "name" => Dict{String, Any}("type" => "string"),
+            "age" => Dict{String, Any}("type" => "integer"),
+            "email" => Dict{String, Any}("type" => "string")
+        ),
+        "required" => ["name", "email"]
+    )
+    set_properties_strict!(params)
+    @test Set(params["required"]) == Set(["name", "email", "age"])
+    @test params["properties"]["age"]["type"] == ["integer", "null"]
+    @test !haskey(params["properties"]["name"], "null")
+    @test !haskey(params["properties"]["email"], "null")
+end
+
+@testset "generate_struct" begin
+    # Test with only field names
+    fields = [:field1, :field2, :field3]
+    struct_type, descriptions = generate_struct(fields)
+    @test fieldnames(struct_type) == (:field1, :field2, :field3)
+    @test descriptions == Dict{Symbol, String}()
+
+    # Test with field names and types
+    fields = [:field1 => Int, :field2 => String, :field3 => Float64]
+    struct_type, descriptions = generate_struct(fields)
+    @test fieldnames(struct_type) == (:field1, :field2, :field3)
+    @test fieldtypes(struct_type) == (Int, String, Float64)
+    @test descriptions == Dict{Symbol, String}()
+
+    # Test with field names, types, and descriptions
+    fields = [:field1 => Int, :field2 => String, :field3 => Float64,
+        :field1__description => "Field 1 description",
+        :field2__description => "Field 2 description"]
+    struct_type, descriptions = generate_struct(fields)
+    @test fieldnames(struct_type) == (:field1, :field2, :field3)
+    @test fieldtypes(struct_type) == (Int, String, Float64)
+    @test descriptions ==
+          Dict(:field1 => "Field 1 description", :field2 => "Field 2 description")
+
+    # Test with invalid field specification
+    fields = [:field1 => Int, :field2 => :InvalidType]
+    @test_throws ErrorException generate_struct(fields)
+end
+
+@testset "update_field_descriptions!" begin
+    # Test with empty descriptions
+    parameters = Dict("properties" => Dict("field1" => Dict("type" => "string")))
+    descriptions = Dict{Symbol, String}()
+    updated_schema = update_field_descriptions!(parameters, descriptions)
+    @test !haskey(updated_schema["properties"]["field1"], "description")
+
+    # Test with descriptions provided
+    parameters = Dict("properties" => Dict("field1" => Dict("type" => "string")))
+    descriptions = Dict(:field1 => "Field 1 description")
+    updated_schema = update_field_descriptions!(parameters, descriptions)
+    @test updated_schema["properties"]["field1"]["description"] ==
+          "Field 1 description"
+
+    # Test with max_description_length
+    parameters = Dict("properties" => Dict("field1" => Dict("type" => "string")))
+    descriptions = Dict(:field1 => "Field 1 description is very long and should be truncated")
+    updated_schema = update_field_descriptions!(
+        parameters, descriptions; max_description_length = 10)
+    @test updated_schema["properties"]["field1"]["description"] ==
+          "Field 1 de"
+
+    # Test with multiple fields
+    parameters = Dict("properties" => Dict(
+        "field1" => Dict("type" => "string"), "field2" => Dict("type" => "integer")))
+    descriptions = Dict(:field1 => "Field 1 description", :field2 => "Field 2 description")
+    updated_schema = update_field_descriptions!(parameters, descriptions)
+    @test updated_schema["properties"]["field1"]["description"] ==
+          "Field 1 description"
+    @test updated_schema["properties"]["field2"]["description"] ==
+          "Field 2 description"
+
+    # Test with missing field in descriptions
+    parameters = Dict("properties" => Dict(
+        "field1" => Dict("type" => "string"), "field2" => Dict("type" => "integer")))
+    descriptions = Dict(:field1 => "Field 1 description")
+    updated_schema = update_field_descriptions!(parameters, descriptions)
+    @test updated_schema["properties"]["field1"]["description"] ==
+          "Field 1 description"
+    @test !haskey(updated_schema["properties"]["field2"], "description")
+end
+
+@testset "remove_field!" begin
+    # Test removing a field by string
+    parameters = Dict(
+        "properties" => Dict(
+            "field1" => Dict("type" => "string"),
+            "field2" => Dict("type" => "integer")
+        ),
+        "required" => ["field1", "field2"]
+    )
+    remove_field!(parameters, "field1")
+    @test !haskey(parameters["properties"], "field1")
+    @test parameters["required"] == ["field2"]
+
+    # Test removing a non-existent field
+    remove_field!(parameters, "field3")
+    @test parameters["properties"] == Dict("field2" => Dict("type" => "integer"))
+    @test parameters["required"] == ["field2"]
+
+    # Test removing a field by regex
+    parameters = Dict(
+        "properties" => Dict(
+            "user_id" => Dict("type" => "string"),
+            "user_name" => Dict("type" => "string"),
+            "age" => Dict("type" => "integer")
+        ),
+        "required" => ["user_id", "user_name", "age"]
+    )
+    remove_field!(parameters, r"^user_")
+    @test !haskey(parameters["properties"], "user_id")
+    @test !haskey(parameters["properties"], "user_name")
+    @test haskey(parameters["properties"], "age")
+    @test parameters["required"] == ["age"]
+
+    # Test removing with regex that doesn't match any fields
+    remove_field!(parameters, r"^non_existent_")
+    @test parameters["properties"] == Dict("age" => Dict("type" => "integer"))
+    @test parameters["required"] == ["age"]
+
+    # Test with empty properties and required fields
+    parameters = Dict("properties" => Dict(), "required" => String[])
+    remove_field!(parameters, "field")
+    remove_field!(parameters, r"field")
+    @test parameters == Dict("properties" => Dict(), "required" => String[])
+end
+
+@testset "tool_call_signature" begin
     "Some docstring"
     struct MyMeasurement2
         age::Int
         height::Union{Int, Nothing}
         weight::Union{Nothing, Float64}
     end
-    output = function_call_signature(MyMeasurement2)#|> JSON3.pretty
-    expected_output = Dict{String, Any}("name" => "MyMeasurement2_extractor",
-        "parameters" => Dict{String, Any}("properties" => Dict{String, Any}("height" => Dict{
-                    String,
-                    Any,
-                }("type" => "integer"),
-                "weight" => Dict{String, Any}("type" => "number"),
-                "age" => Dict{String, Any}("type" => "integer")),
-            "required" => ["age"],
-            "type" => "object"),
-        "description" => "Some docstring\n")
-    @test output == expected_output
+    tool_map = tool_call_signature(MyMeasurement2)#|> JSON3.pretty
+    name, tool = only(tool_map)
+    parameters = Dict{String, Any}(
+        "properties" => Dict{String, Any}(
+            "height" => Dict{
+                String,
+                Any
+            }("type" => "integer"),
+            "weight" => Dict{String, Any}("type" => "number"),
+            "age" => Dict{String, Any}("type" => "integer")),
+        "required" => ["age"],
+        "type" => "object")
+    @test tool.parameters == parameters
+    @test tool.name == "MyMeasurement2"
+    @test tool.description == "Some docstring\n"
+    @test tool.strict == nothing
+    @test tool.callable == MyMeasurement2
+    @test isabstracttool(tool)
 
     ## MaybeWraper name cleanup
-    schema = function_call_signature(MaybeExtract{MyMeasurement2})
-    @test schema["name"] == "MaybeExtractMyMeasurement2_extractor"
+    tool_map = tool_call_signature(MaybeExtract{MyMeasurement2})
+    name, tool = only(tool_map)
+    @test name == "MaybeExtract"
+    @test tool.parameters["properties"]["result"]["properties"] == parameters["properties"]
+    @test tool.name == "MaybeExtract"
+    @test tool.strict == nothing
+    @test tool.description isa String
+    @test tool.callable == MaybeExtract{MyMeasurement2}
+
+    ## Test with strict = true
+
+    "Person's age, height, and weight."
+    struct MyMeasurement3
+        age::Int
+        height::Union{Int, Nothing}
+        weight::Union{Nothing, Float64}
+    end
+
+    # Test with strict = nothing  (default behavior)
+    tool_map = tool_call_signature(MyMeasurement3)
+    name, tool = only(tool_map)
+    @test tool.strict == nothing
+    @test name == "MyMeasurement3"
+    @test tool.parameters["properties"]["height"]["type"] == "integer"
+    @test tool.parameters["properties"]["weight"]["type"] == "number"
+    @test tool.parameters["properties"]["age"]["type"] == "integer"
+    @test Set(tool.parameters["required"]) == Set(["age"])
+    @test !haskey(tool.parameters, "additionalProperties")
+
+    # Test with strict =false
+    tool_map = tool_call_signature(MyMeasurement3; strict = false)
+    name, tool = only(tool_map)
+    @test tool.strict == false
+    @test name == "MyMeasurement3"
+    @test tool.parameters["properties"]["height"]["type"] == "integer"
+    @test tool.parameters["properties"]["weight"]["type"] == "number"
+    @test tool.parameters["properties"]["age"]["type"] == "integer"
+    @test Set(tool.parameters["required"]) == Set(["age"])
+    @test !haskey(tool.parameters, "additionalProperties")
+
+    # Test with strict = true
+    tool_map = tool_call_signature(MyMeasurement3; strict = true)
+    name, tool = only(tool_map)
+    @test tool.strict == true
+    @test name == "MyMeasurement3"
+    @test tool.parameters["properties"]["height"]["type"] == ["integer", "null"]
+    @test tool.parameters["properties"]["weight"]["type"] == ["number", "null"]
+    @test tool.parameters["properties"]["age"]["type"] == "integer"
+    @test Set(tool.parameters["required"]) == Set(["age", "height", "weight"])
+    @test haskey(tool.parameters, "additionalProperties")
+
+    # Test with MaybeExtract wrapper
+    tool_map = tool_call_signature(MaybeExtract{MyMeasurement3}; strict = true)
+    name, tool_maybe = only(tool_map)
+    @test name == "MaybeExtract"
+    @test tool_maybe.parameters["properties"]["result"]["properties"] ==
+          tool.parameters["properties"]
+    @test tool_maybe.name == "MaybeExtract"
+    @test tool_maybe.strict == true
+    @test tool_maybe.description isa String
+    @test tool_maybe.callable == MaybeExtract{MyMeasurement3}
+
+    #### Test with generated structs and with descriptions
+    # Test with simple fields
+    fields = [:field1 => Int, :field2 => String]
+    tool_map = tool_call_signature(fields)
+    name, tool = only(tool_map)
+    @test haskey(tool.parameters, "properties")
+    @test haskey(tool.parameters["properties"], "field1")
+    @test haskey(tool.parameters["properties"], "field2")
+    @test tool.parameters["properties"]["field1"]["type"] == "integer"
+    @test tool.parameters["properties"]["field2"]["type"] == "string"
+
+    # Test with strict mode
+    fields = [:field1 => Int, :field2 => String]
+    tool_map = tool_call_signature(fields; strict = true)
+    name, tool = only(tool_map)
+    @test tool.strict == true
+
+    # Test with descriptions and max_description_length
+    fields = [
+        :field1 => Int, :field2 => String, :field1__description => "Field 1 description",
+        :field2__description => "Field 2 description"]
+    tool_map = tool_call_signature(fields; max_description_length = 7)
+    name, tool = only(tool_map)
+    @test haskey(tool.parameters, "properties")
+    @test haskey(tool.parameters["properties"], "field1")
+    @test haskey(tool.parameters["properties"], "field2")
+    @test tool.parameters["properties"]["field1"]["type"] == "integer"
+    @test tool.parameters["properties"]["field2"]["type"] == "string"
+    @test tool.parameters["properties"]["field1"]["description"] == "Field 1"
+    @test tool.parameters["properties"]["field2"]["description"] == "Field 2"
+
+    # Test with empty fields
+    fields = []
+    tool_map = tool_call_signature(fields)
+    name, tool = only(tool_map)
+    @test haskey(tool.parameters, "properties")
+    @test isempty(tool.parameters["properties"])
+
+    # Test with invalid field specification
+    fields = [:field1 => Int, :field2 => :InvalidType]
+    @test_throws ErrorException tool_call_signature(fields)
+    fields = ["field1" => Int]
+    @test_throws ErrorException tool_call_signature(fields)
+    fields = ["field1", "field2"] # caught earlier as an error so assertion error
+    @test_throws AssertionError tool_call_signature(fields)
+
+    ## TODO: add Tool passthrough, functions, methods
+    # Test with a function
+    tool_map = tool_call_signature(my_test_function)
+    name, tool = only(tool_map)
+    @test name == "my_test_function"
+    @test tool.name == "my_test_function"
+    @test tool.description == "This is a test function.\n"
+    @test tool.callable == my_test_function
+    @test tool.strict == nothing
+    @test tool.parameters["properties"]["x"]["type"] == "integer"
+    @test tool.parameters["properties"]["y"]["type"] == "string"
+    @test Set(tool.parameters["required"]) == Set(["x", "y"])
+
+    # Test with a method
+    method = first(methods(my_test_function))
+    tool_map = tool_call_signature(method)
+    name, tool = only(tool_map)
+    @test name == "my_test_function"
+    @test tool.name == "my_test_function"
+    @test tool.description == "This is a test function.\n"
+    @test tool.callable == my_test_function
+    @test tool.strict == nothing
+    @test tool.parameters["properties"]["x"]["type"] == "integer"
+
+    # Test with a tool
+    tool_map = tool_call_signature(method)
+    name, tool = only(tool_map)
+    tool_map2 = tool_call_signature(tool)
+    name2, tool2 = only(tool_map2)
+    @test name == name2
+    @test tool.name == tool2.name
+    @test tool.description == tool2.description
+    @test tool.callable == tool2.callable
+    @test tool.strict == tool2.strict
+    @test tool.parameters == tool2.parameters
+
+    # Test with a vector of tools
+    tools = Union{Function, Type}[my_test_function, MyMeasurement3]
+    tool_map = tool_call_signature(tools)
+    @test length(tool_map) == 2
+    for (name, tool) in tool_map
+        @test name isa String
+        @test tool isa Tool
+    end
+    tool1 = tool_map["my_test_function"]
+    @test tool1.name == "my_test_function"
+    @test tool1.description == "This is a test function.\n"
+    @test tool1.callable == my_test_function
+    @test tool1.strict == nothing
+    @test tool1.parameters["properties"]["x"]["type"] == "integer"
+    @test tool1.parameters["properties"]["y"]["type"] == "string"
+    @test Set(tool1.parameters["required"]) == Set(["x", "y"])
+
+    tool2 = tool_map["MyMeasurement3"]
+    @test tool2.name == "MyMeasurement3"
+    @test tool2.description == "Person's age, height, and weight.\n"
+    @test tool2.callable == MyMeasurement3
+    @test tool2.strict == nothing
+    @test tool2.parameters["properties"]["age"]["type"] == "integer"
+    @test tool2.parameters["properties"]["height"]["type"] == "integer"
+    @test tool2.parameters["properties"]["weight"]["type"] == "number"
+
+    ## ToolRef
+    tool = ToolRef(; ref = :computer, callable = println)
+    tool_map = tool_call_signature(tool)
+    @test tool_map == Dict("computer" => tool)
+
+    ## accepting dictionary when it's hidden // it would fail otherwise
+    tool_map = tool_call_signature(context_test_function2; hidden_fields = ["context"])
+    @test tool_map isa Dict
+
+    @test_throws ArgumentError tool_call_signature(context_test_function2)
+
+    # for struct
+    mutable struct MyStruct1234
+        context::Dict{String, Any}
+    end
+    @test_throws ArgumentError tool_call_signature(MyStruct1234)
+    tool_map = tool_call_signature(MyStruct1234; hidden_fields = ["context"])
+    @test tool_map isa Dict
+end
+
+@testset "parse_tool" begin
+    # Test parsing a valid JSON string into a struct
+    struct MyStruct1233
+        x::Int
+        y::String
+    end
+    result = parse_tool(MyStruct1233, "{\"x\": 1, \"y\": \"test\"}")
+    @test result.x == 1
+    @test result.y == "test"
+
+    # Test parsing an empty JSON string
+    struct EmptyStruct end
+    @test parse_tool(EmptyStruct, "{}") isa EmptyStruct
+
+    # Test parsing a valid JSON string with missing fields
+    @kwdef struct PartialStruct
+        x::Int
+        y::Union{String, Nothing} = nothing
+    end
+    result = parse_tool(PartialStruct, "{\"x\": 1}")
+    @test result.x == 1
+    @test result.y === nothing
+
+    # Test parsing an invalid JSON string
+    @test_logs (:warn, r"There was an error parsing the response:.*") parse_tool(
+        Tuple, "{\"a\": 1}")
+
+    # Test parsing a valid JSON string into a Dict
+    result = parse_tool(Dict, "{\"x\": 1, \"y\": \"test\"}")
+    @test result isa Dict
+    @test result["x"] == 1
+    @test result["y"] == "test"
+
+    # Test parsing an empty dict
+    @test parse_tool(Dict, "{}") isa Dict
+
+    # Test parsing a non-empty dict
+    result = parse_tool(
+        NamedTuple{(:x, :y), Tuple{Int, String}}, "{\"x\": 1, \"y\": \"test\"}")
+    @test result.x == 1
+    @test result.y == "test"
+end
+
+@testset "execute_tool" begin
+    # Test executing a function with ordered arguments
+    args = Dict(:x => 5, :y => "hello")
+    @test execute_tool(my_test_function, args) == "Test function: 5, hello"
+
+    # Test executing a function with unordered arguments
+    args_unordered = Dict(:y => "world", :x => 10)
+    @test execute_tool(my_test_function, args_unordered) == "Test function: 10, world"
+
+    tool = Tool(my_test_function)
+    @test execute_tool(tool, args) == "Test function: 5, hello"
+
+    # Test executing a function with context
+    args = Dict(:x => 5, :y => "hello")
+    context = Dict(:ctx_z => 3.14)
+    @test execute_tool(context_test_function, args, context) ==
+          "Context test: 5, hello, 3.14"
+
+    # Test context overriding args
+    args_override = Dict(:x => 5, :y => "hello", :ctx_z => 2.71)
+    context_override = Dict(:y => "world", :ctx_z => 3.14)
+    @test execute_tool(context_test_function, args_override, context_override) ==
+          "Context test: 5, world, 3.14"
+
+    # with full context
+    args = Dict(:x => 5, :y => "hello")
+    context_override = Dict(:new_arg => "new_value")
+    @test execute_tool(context_test_function2, args, context_override) ==
+          "Context test: 5, hello, Dict(:new_arg => \"new_value\")"
+
+    # Test with missing argument in both args and context
+    args_missing = Dict(:x => 5)
+    context_missing = Dict(:y => "hello")
+    @test_throws ToolExecutionError execute_tool(
+        context_test_function, args_missing, context_missing)
+    err = execute_tool(
+        context_test_function, args_missing, context_missing; throw_on_error = false)
+    @test err isa ToolExecutionError
+    @test err.err isa MethodError
+
+    # Test with Tool
+    context_tool = Tool(context_test_function)
+    @test execute_tool(context_tool, args, context) == "Context test: 5, hello, 3.14"
+
+    # Test with tool_map
+    args = Dict(:x => 10, :y => "hello")
+    tool_map = tool_call_signature(my_test_function; hidden_fields = [r"ctx"])
+    msg = ToolMessage(;
+        tool_call_id = "1", name = "my_test_function", raw = "", args = args)
+    output = execute_tool(tool_map, msg)
+    @test output == "Test function: 10, hello"
+    ## Call wrong tool name
+    @test_throws ToolNotFoundError execute_tool(tool_map,
+        ToolMessage(;
+            tool_call_id = "1", name = "wrong_tool_name", raw = "", args = args))
+
+    # Test passing kwargs directly
+    args = Dict(:x => 1)
+    @test execute_tool(kwarg_test_function, args; y = 2, z = 3) == 6 # 1 + 2 + 3
+
+    # Test unused args passed as kwargs when unused_as_kwargs=true
+    args = Dict(:x => 1, :y => 2, :z => 3, :extra => 4)
+    @test execute_tool(kwarg_test_function, args; unused_as_kwargs = true) == 6 # 1 + 2 + 3
+
+    # Test that extra args are ignored when unused_as_kwargs=false
+    args = Dict(:x => 1, :y => 2, :z => 3, :extra => 4)
+    @test execute_tool(kwarg_test_function, args; unused_as_kwargs = false) == 1
+
+    # Test that args override kwargs when unused_as_kwargs=true
+    args = Dict(:x => 1, :y => 2, :z => 3)
+    @test execute_tool(kwarg_test_function, args; unused_as_kwargs = true, y = 5) == 6 # args
+
+    args = Dict(:x => 1, :extra => 2)
+    @test execute_tool(no_kwarg_function, args; unused_as_kwargs = false) == 1
 end
diff --git a/test/llm_anthropic.jl b/test/llm_anthropic.jl
new file mode 100644
index 000000000..cfc7f2c51
--- /dev/null
+++ b/test/llm_anthropic.jl
@@ -0,0 +1,743 @@
+using PromptingTools: TestEchoAnthropicSchema, render, AnthropicSchema
+using PromptingTools: AIMessage, SystemMessage, AbstractMessage
+using PromptingTools: UserMessage, UserMessageWithImages, DataMessage, AIToolRequest,
+                      ToolMessage, Tool
+using PromptingTools: call_cost, anthropic_api, function_call_signature,
+                      anthropic_extra_headers, ToolRef, BETA_HEADERS_ANTHROPIC
+
+@testset "render-Anthropic" begin
+    schema = AnthropicSchema()
+    # Given a schema and a vector of messages with handlebar variables, it should replace the variables with the correct values in the conversation dictionary.
+    messages = [
+        SystemMessage("Act as a helpful AI assistant"),
+        UserMessage("Hello, my name is {{name}}")
+    ]
+    expected_output = (; system = "Act as a helpful AI assistant",
+        conversation = [Dict("role" => "user",
+            "content" => [Dict("type" => "text", "text" => "Hello, my name is John")])])
+    conversation = render(schema, messages; name = "John")
+    @test conversation == expected_output
+    # Test with dry_run=true on ai* functions
+    @test aigenerate(schema, messages; name = "John", dry_run = true) == nothing
+    @test aigenerate(schema, messages; name = "John", dry_run = true, return_all = true) ==
+          expected_output
+
+    # AI message does NOT replace variables
+    messages = [
+        SystemMessage("Act as a helpful AI assistant"),
+        AIMessage("Hello, my name is {{name}}")
+    ]
+    expected_output = (; system = "Act as a helpful AI assistant",
+        conversation = [Dict(
+            "role" => "assistant",
+            "content" => [Dict("type" => "text", "text" => "Hello, my name is {{name}}")])])
+    conversation = render(schema, messages; name = "John")
+    # AIMessage does not replace handlebar variables
+    @test conversation == expected_output
+
+    # Given a schema and a vector of messages with no system messages, it should add a default system prompt to the conversation dictionary.
+    messages = [
+        UserMessage("User message")
+    ]
+    conversation = render(schema, messages)
+    expected_output = (; system = "Act as a helpful AI assistant",
+        conversation = [Dict("role" => "user",
+            "content" => [Dict("type" => "text", "text" => "User message")])])
+    @test conversation == expected_output
+
+    # Given a schema and a vector of messages, it should return a conversation dictionary with the correct roles and contents for each message.
+    messages = [
+        UserMessage("Hello"),
+        AIMessage("Hi there"),
+        UserMessage("How are you?"),
+        AIMessage("I'm doing well, thank you!")
+    ]
+    expected_output = (; system = "Act as a helpful AI assistant",
+        conversation = [
+            Dict(
+                "role" => "user", "content" => [Dict("type" => "text", "text" => "Hello")]),
+            Dict("role" => "assistant",
+                "content" => [Dict("type" => "text", "text" => "Hi there")]),
+            Dict("role" => "user",
+                "content" => [Dict("type" => "text", "text" => "How are you?")]),
+            Dict("role" => "assistant",
+                "content" => [Dict(
+                    "type" => "text", "text" => "I'm doing well, thank you!")])
+        ])
+    conversation = render(schema, messages)
+    @test conversation == expected_output
+
+    # Given a schema and a vector of messages with a system message, it should move the system to the separate slot
+    messages = [
+        UserMessage("Hello"),
+        AIMessage("Hi there"),
+        SystemMessage("This is a system message")
+    ]
+    expected_output = (; system = "This is a system message",
+        conversation = [
+            Dict(
+                "role" => "user", "content" => [Dict("type" => "text", "text" => "Hello")]),
+            Dict("role" => "assistant",
+                "content" => [Dict("type" => "text", "text" => "Hi there")])
+        ])
+    conversation = render(schema, messages)
+    @test conversation == expected_output
+
+    # Given an empty vector of messages, it throws an error.
+    messages = AbstractMessage[]
+    @test_throws AssertionError render(schema, messages)
+
+    # Given a schema and a vector of messages with an unknown message type, it should skip the message and continue building the conversation dictionary.
+    messages = [
+        UserMessage("Hello"),
+        DataMessage(; content = ones(3, 3)),
+        AIMessage("Hi there")
+    ]
+    expected_output = (; system = "Act as a helpful AI assistant",
+        conversation = [
+            Dict(
+                "role" => "user", "content" => [Dict("type" => "text", "text" => "Hello")]),
+            Dict("role" => "assistant",
+                "content" => [Dict("type" => "text", "text" => "Hi there")])
+        ])
+    conversation = render(schema, messages)
+    @test conversation == expected_output
+
+    # Test UserMessageWithImages -- errors for now
+    messages = [
+        SystemMessage("System message 1"),
+        UserMessageWithImages("User message"; image_url = "https://example.com/image.png")
+    ]
+    @test_throws Exception render(schema, messages)
+
+    ## Tool calling
+    "abc"
+    struct FruitCountX
+        fruit::String
+        count::Int
+    end
+    tools = [Dict("name" => "function_xyz", "description" => "ABC",
+        "input_schema" => "")]
+    messages = [
+        SystemMessage("Act as a helpful AI assistant"),
+        UserMessage("Hello, my name is {{name}}")
+    ]
+    conversation = render(schema, messages; name = "John")
+
+    tools = [Dict("name" => "function_xyz", "description" => "ABC",
+            "input_schema" => ""),
+        Dict("name" => "function_abc", "description" => "ABC",
+            "input_schema" => "")]
+
+    ## Cache variables
+    messages = [
+        SystemMessage("Act as a helpful AI assistant"),
+        UserMessage("Hello, my name is {{name}}")
+    ]
+    conversation = render(schema, messages; name = "John", cache = :system)
+    expected_output = (;
+        system = Dict{String, Any}[Dict("cache_control" => Dict("type" => "ephemeral"),
+            "text" => "Act as a helpful AI assistant", "type" => "text")],
+        conversation = [Dict("role" => "user",
+            "content" => [Dict("type" => "text", "text" => "Hello, my name is John")])])
+    @test conversation == expected_output
+
+    conversation = render(schema, messages; name = "John", cache = :last)
+    expected_output = (;
+        system = "Act as a helpful AI assistant",
+        conversation = [Dict("role" => "user",
+            "content" => [Dict("type" => "text", "text" => "Hello, my name is John",
+                "cache_control" => Dict("type" => "ephemeral"))])])
+    @test conversation == expected_output
+
+    conversation = render(schema, messages; name = "John", cache = :all)
+    expected_output = (;
+        system = Dict{String, Any}[Dict("cache_control" => Dict("type" => "ephemeral"),
+            "text" => "Act as a helpful AI assistant", "type" => "text")],
+        conversation = [Dict("role" => "user",
+            "content" => [Dict("type" => "text", "text" => "Hello, my name is John",
+                "cache_control" => Dict("type" => "ephemeral"))])])
+    @test conversation == expected_output
+
+    # Test aiprefill functionality
+    messages = [
+        SystemMessage("Act as a helpful AI assistant"),
+        UserMessage("Hello, what's your name?")
+    ]
+
+    # Test with aiprefill
+    conversation = render(schema, messages; aiprefill = "My name is Claude")
+    expected_output = (;
+        system = "Act as a helpful AI assistant",
+        conversation = [
+            Dict("role" => "user",
+                "content" => [Dict("type" => "text", "text" => "Hello, what's your name?")]),
+            Dict("role" => "assistant",
+                "content" => [Dict("type" => "text", "text" => "My name is Claude")])
+        ])
+    @test conversation == expected_output
+
+    # Test without aiprefill
+    conversation_without_prefill = render(schema, messages)
+    expected_output_without_prefill = (;
+        system = "Act as a helpful AI assistant",
+        conversation = [
+            Dict("role" => "user",
+            "content" => [Dict("type" => "text", "text" => "Hello, what's your name?")])
+        ])
+    @test conversation_without_prefill == expected_output_without_prefill
+
+    # Test with empty aiprefill
+    conversation_empty_prefill = render(schema, messages; aiprefill = "")
+    @test conversation_empty_prefill == expected_output_without_prefill
+
+    # Test aiprefill with cache
+    conversation_with_cache = render(
+        schema, messages; aiprefill = "My name is Claude", cache = :all)
+    expected_output_with_cache = (;
+        system = Dict{String, Any}[Dict("cache_control" => Dict("type" => "ephemeral"),
+            "text" => "Act as a helpful AI assistant", "type" => "text")],
+        conversation = [
+            Dict("role" => "user",
+                "content" => [Dict("type" => "text", "text" => "Hello, what's your name?",
+                    "cache_control" => Dict("type" => "ephemeral"))]),
+            Dict("role" => "assistant",
+                "content" => [Dict("type" => "text", "text" => "My name is Claude")])
+        ])
+    @test conversation_with_cache == expected_output_with_cache
+end
+
+@testset "render-tools for Anthropic" begin
+    schema = AnthropicSchema()
+
+    # Test rendering a single tool
+    tool = Tool(
+        name = "get_weather",
+        description = "Get the current weather in a given location",
+        parameters = Dict(
+            "type" => "object",
+            "properties" => Dict(
+                "location" => Dict("type" => "string"),
+                "unit" => Dict("type" => "string", "enum" => ["celsius", "fahrenheit"])
+            ),
+            "required" => ["location"]
+        ),
+        callable = identity
+    )
+
+    rendered = render(schema, [tool])
+    @test length(rendered) == 1
+    @test rendered[1][:name] == "get_weather"
+    @test rendered[1][:description] == "Get the current weather in a given location"
+    @test rendered[1][:input_schema] == tool.parameters
+
+    # Test rendering multiple tools
+    tool2 = Tool(
+        name = "get_time",
+        description = "Get the current time in a given timezone",
+        parameters = Dict(
+            "type" => "object",
+            "properties" => Dict(
+                "timezone" => Dict("type" => "string")
+            ),
+            "required" => ["timezone"]
+        ),
+        callable = identity
+    )
+
+    rendered = render(schema, [tool, tool2])
+    @test length(rendered) == 2
+    @test rendered[1][:name] == "get_weather"
+    @test rendered[2][:name] == "get_time"
+
+    # Test rendering with no description
+    tool_no_desc = PromptingTools.Tool(
+        name = "no_description_tool",
+        parameters = Dict(
+            "type" => "object",
+            "properties" => Dict(
+                "input" => Dict("type" => "string")
+            ),
+            "required" => ["input"]
+        ),
+        callable = identity
+    )
+
+    rendered = render(schema, [tool_no_desc])
+    @test rendered[1][:description] == ""
+
+    # From from dictionary of tools
+    tool_map = Dict("get_weather" => tool, "get_time" => tool2)
+    rendered = render(schema, tool_map)
+    @test length(rendered) == 2
+    @test Set(t[:name] for t in rendered) == Set(["get_weather", "get_time"])
+
+    ## ToolRef
+    schema = AnthropicSchema()
+
+    # Test computer tool rendering
+    computer_tool = ToolRef(ref = :computer)
+    rendered = render(schema, computer_tool)
+    @test rendered["type"] == "computer_20241022"
+    @test rendered["name"] == "computer"
+    @test rendered["display_width_px"] == 1024
+    @test rendered["display_height_px"] == 768
+    @test !haskey(rendered, "display_number")
+
+    computer_tool2 = ToolRef(ref = :computer,
+        extras = Dict("display_width_px" => 1920,
+            "display_height_px" => 1080, "display_number" => 1))
+    rendered = render(schema, computer_tool2)
+    @test rendered["type"] == "computer_20241022"
+    @test rendered["name"] == "computer"
+    @test rendered["display_width_px"] == 1920
+    @test rendered["display_height_px"] == 1080
+    @test rendered["display_number"] == 1
+
+    # Test text editor tool rendering
+    editor_tool = ToolRef(ref = :str_replace_editor)
+    rendered = render(schema, editor_tool)
+    @test rendered["type"] == "text_editor_20241022"
+    @test rendered["name"] == "str_replace_editor"
+
+    # Test bash tool rendering
+    bash_tool = ToolRef(ref = :bash)
+    rendered = render(schema, bash_tool)
+    @test rendered["type"] == "bash_20241022"
+    @test rendered["name"] == "bash"
+
+    # Test invalid tool reference
+    @test_throws ArgumentError render(schema, ToolRef(ref = :invalid_tool))
+
+    # Test rendering multiple tool refs
+    tools = [computer_tool, editor_tool, bash_tool]
+    rendered = render(schema, tools)
+    @test length(rendered) == 3
+    @test rendered[1]["name"] == "computer"
+    @test rendered[2]["name"] == "str_replace_editor"
+    @test rendered[3]["name"] == "bash"
+end
+
+@testset "anthropic_extra_headers" begin
+    @test anthropic_extra_headers() == ["anthropic-version" => "2023-06-01"]
+
+    @test anthropic_extra_headers(has_tools = true) == [
+        "anthropic-version" => "2023-06-01",
+        "anthropic-beta" => "tools-2024-04-04"
+    ]
+
+    @test anthropic_extra_headers(has_cache = true) == [
+        "anthropic-version" => "2023-06-01",
+        "anthropic-beta" => "prompt-caching-2024-07-31"
+    ]
+
+    @test anthropic_extra_headers(has_tools = true, has_cache = true) == [
+        "anthropic-version" => "2023-06-01",
+        "anthropic-beta" => "tools-2024-04-04,prompt-caching-2024-07-31"
+    ]
+    @test anthropic_extra_headers(
+        has_tools = true, has_cache = true, has_long_output = true) == [
+        "anthropic-version" => "2023-06-01",
+        "anthropic-beta" => "tools-2024-04-04,prompt-caching-2024-07-31,max-tokens-3-5-sonnet-2024-07-15"
+    ]
+
+    # Test with betas
+    @test anthropic_extra_headers(betas = [:tools]) == [
+        "anthropic-version" => "2023-06-01",
+        "anthropic-beta" => "tools-2024-04-04"
+    ]
+
+    @test anthropic_extra_headers(betas = [:cache]) == [
+        "anthropic-version" => "2023-06-01",
+        "anthropic-beta" => "prompt-caching-2024-07-31"
+    ]
+
+    @test anthropic_extra_headers(betas = [:long_output]) == [
+        "anthropic-version" => "2023-06-01",
+        "anthropic-beta" => "max-tokens-3-5-sonnet-2024-07-15"
+    ]
+
+    @test anthropic_extra_headers(betas = [:computer_use]) == [
+        "anthropic-version" => "2023-06-01",
+        "anthropic-beta" => "computer-use-2024-10-22"
+    ]
+
+    # Test multiple betas
+    @test anthropic_extra_headers(betas = [:tools, :cache, :computer_use]) == [
+        "anthropic-version" => "2023-06-01",
+        "anthropic-beta" => "tools-2024-04-04,prompt-caching-2024-07-31,computer-use-2024-10-22"
+    ]
+
+    # Test all betas
+    @test anthropic_extra_headers(betas = BETA_HEADERS_ANTHROPIC) == [
+        "anthropic-version" => "2023-06-01",
+        "anthropic-beta" => "tools-2024-04-04,prompt-caching-2024-07-31,max-tokens-3-5-sonnet-2024-07-15,computer-use-2024-10-22"
+    ]
+
+    # Test invalid beta
+    @test_throws AssertionError anthropic_extra_headers(betas = [:invalid_beta])
+
+    # Test mixing has_* flags with betas
+    @test anthropic_extra_headers(has_tools = true, betas = [:cache]) == [
+        "anthropic-version" => "2023-06-01",
+        "anthropic-beta" => "tools-2024-04-04,prompt-caching-2024-07-31"
+    ]
+end
+
+@testset "anthropic_api" begin
+    # Invalid endpoint
+    @test_throws AssertionError anthropic_api(
+        AnthropicSchema(); api_key = "abc", endpoint = "embedding")
+
+    # Invalid API key
+    e = try
+        anthropic_api(AnthropicSchema(); api_key = "abc")
+    catch e
+        e
+    end
+    @test e.status == 401
+    s = String(e.response.body)
+    @test occursin("authentication_error", s)
+    @test occursin("invalid x-api-key", s)
+end
+
+@testset "aigenerate-Anthropic" begin
+    # corresponds to Anthropic version 2023 June, v1
+    response = Dict(
+        :content => [
+            Dict(:text => "Hello!")],
+        :stop_reason => "stop",
+        :usage => Dict(:input_tokens => 2, :output_tokens => 1))
+
+    # Real generation API
+    schema1 = TestEchoAnthropicSchema(; response, status = 200)
+    msg = aigenerate(schema1, "Hello World"; model = "claudeo")
+    expected_output = AIMessage(;
+        content = "Hello!" |> strip,
+        status = 200,
+        tokens = (2, 1),
+        finish_reason = "stop",
+        cost = msg.cost,
+        extras = Dict{Symbol, Any}(),
+        elapsed = msg.elapsed)
+    @test msg == expected_output
+    @test schema1.inputs.system == "Act as a helpful AI assistant"
+    @test schema1.inputs.messages == [Dict(
+        "role" => "user", "content" => [Dict("type" => "text", "text" => "Hello World")])]
+    @test schema1.model_id == "claude-3-opus-20240229"
+
+    # Test different input combinations and different prompts
+    schema2 = TestEchoAnthropicSchema(; response, status = 200)
+    msg = aigenerate(schema2, UserMessage("Hello {{name}}"),
+        model = "claudes", http_kwargs = (; verbose = 3), api_kwargs = (; temperature = 0),
+        name = "World")
+    expected_output = AIMessage(;
+        content = "Hello!" |> strip,
+        status = 200,
+        tokens = (2, 1),
+        finish_reason = "stop",
+        cost = msg.cost,
+        extras = Dict{Symbol, Any}(),
+        elapsed = msg.elapsed)
+    @test msg == expected_output
+    @test schema2.inputs.system == "Act as a helpful AI assistant"
+    @test schema2.inputs.messages == [Dict(
+        "role" => "user", "content" => [Dict("type" => "text", "text" => "Hello World")])]
+    @test schema2.model_id == "claude-3-5-sonnet-latest"
+
+    # Test aiprefill functionality
+    schema2 = TestEchoAnthropicSchema(;
+        response = Dict(
+            :content => [Dict(:text => "The answer is 42")],
+            :stop_reason => "stop",
+            :usage => Dict(:input_tokens => 5, :output_tokens => 4)),
+        status = 200)
+
+    aiprefill = "The answer to the ultimate question of life, the universe, and everything is:"
+    msg = aigenerate(schema2, UserMessage("What is the answer to everything?"),
+        model = "claudes", http_kwargs = (; verbose = 3), api_kwargs = (; temperature = 0),
+        aiprefill = aiprefill)
+
+    expected_output = AIMessage(;
+        content = aiprefill * "The answer is 42" |> strip,
+        status = 200,
+        tokens = (5, 4),
+        finish_reason = "stop",
+        cost = msg.cost,
+        run_id = msg.run_id,
+        sample_id = msg.sample_id,
+        extras = Dict{Symbol, Any}(),
+        elapsed = msg.elapsed)
+
+    @test msg.content == expected_output.content
+    @test schema2.inputs.system == "Act as a helpful AI assistant"
+    @test schema2.inputs.messages == [
+        Dict("role" => "user",
+            "content" => [Dict(
+                "type" => "text", "text" => "What is the answer to everything?")]),
+        Dict("role" => "assistant",
+            "content" => [Dict("type" => "text", "text" => aiprefill)])
+    ]
+    @test schema2.model_id == "claude-3-5-sonnet-latest"
+
+    # With caching
+    response3 = Dict(
+        :content => [
+            Dict(:text => "Hello!")],
+        :stop_reason => "stop",
+        :usage => Dict(:input_tokens => 2, :output_tokens => 1,
+            :cache_creation_input_tokens => 1, :cache_read_input_tokens => 0))
+
+    schema3 = TestEchoAnthropicSchema(; response = response3, status = 200)
+    msg = aigenerate(schema3, UserMessage("Hello {{name}}"),
+        model = "claudes", http_kwargs = (; verbose = 3), api_kwargs = (; temperature = 0),
+        cache = :all,
+        name = "World")
+    expected_output = AIMessage(;
+        content = "Hello!" |> strip,
+        status = 200,
+        tokens = (2, 1),
+        finish_reason = "stop",
+        cost = msg.cost,
+        extras = Dict{Symbol, Any}(
+            :cache_read_input_tokens => 0, :cache_creation_input_tokens => 1),
+        elapsed = msg.elapsed)
+    @test msg == expected_output
+    @test schema3.inputs.system == [Dict("cache_control" => Dict("type" => "ephemeral"),
+        "text" => "Act as a helpful AI assistant", "type" => "text")]
+    @test schema3.inputs.messages == [Dict("role" => "user",
+        "content" => Dict{String, Any}[Dict("cache_control" => Dict("type" => "ephemeral"),
+            "text" => "Hello World", "type" => "text")])]
+    @test schema3.model_id == "claude-3-5-sonnet-latest"
+
+    ## Bad cache
+    @test_throws AssertionError aigenerate(
+        schema3, UserMessage("Hello {{name}}"); model = "claudeo", cache = :bad)
+
+    # Test error throw if aiprefill is empty string
+    @test_throws AssertionError aigenerate(
+        AnthropicSchema(),
+        "Hello World";
+        model = "claudeh",
+        aiprefill = ""
+    )
+
+    @test_throws AssertionError aigenerate(
+        AnthropicSchema(),
+        "Hello World";
+        model = "claudeh",
+        aiprefill = "   "  # Only whitespace
+    )
+end
+
+@testset "aiextract-Anthropic" begin
+    # corresponds to Anthropic version 2023 June, v1 // tool beta!
+    struct Fruit
+        name::String
+    end
+    response = Dict(
+        :content => [
+            Dict(:type => "tool_use", :id => "1", :name => "Fruit",
+            :input => Dict("name" => "banana"))],
+        :stop_reason => "tool_use",
+        :usage => Dict(:input_tokens => 2, :output_tokens => 1))
+
+    # Real generation API
+    schema1 = TestEchoAnthropicSchema(; response, status = 200)
+    msg = aiextract(schema1, "Hello World! Banana"; model = "claudeo", return_type = Fruit)
+    expected_output = DataMessage(;
+        content = Fruit("banana"),
+        status = 200,
+        tokens = (2, 1),
+        finish_reason = "tool_use",
+        cost = msg.cost,
+        extras = Dict{Symbol, Any}(),
+        elapsed = msg.elapsed)
+    @test msg == expected_output
+    @test schema1.inputs.system ==
+          "Act as a helpful AI assistant"
+    @test schema1.inputs.messages ==
+          [Dict("role" => "user",
+        "content" => Dict{String, Any}[Dict(
+            "text" => "Hello World! Banana", "type" => "text")])]
+    @test schema1.model_id == "claude-3-opus-20240229"
+
+    # Test badly formatted response
+    response = Dict(
+        :content => [
+            Dict(:type => "tool_use", :id => "1", :name => "Fruit",
+            :input => Dict("namexxx" => "banana"))],
+        :stop_reason => "tool_use",
+        :usage => Dict(:input_tokens => 2, :output_tokens => 1))
+    schema2 = TestEchoAnthropicSchema(; response, status = 200)
+    msg = aiextract(schema2, "Hello World! Banana"; model = "claudeo", return_type = Fruit)
+    @test msg.content isa AbstractDict
+    @test msg.content[:namexxx] == "banana"
+
+    # Bad finish reason
+    response = Dict(
+        :content => [
+            Dict(:type => "text", :text => "No tools for you!")],
+        :stop_reason => "stop",
+        :usage => Dict(:input_tokens => 2, :output_tokens => 1))
+    schema3 = TestEchoAnthropicSchema(; response, status = 200)
+    msg = aiextract(schema3, "Hello World! Banana"; model = "claudeo", return_type = Fruit)
+    @test msg.content == "No tools for you!"
+
+    # With Cache
+    response4 = Dict(
+        :content => [
+            Dict(:type => "tool_use", :id => "1", :name => "Fruit",
+            :input => Dict("name" => "banana"))],
+        :stop_reason => "tool_use",
+        :usage => Dict(:input_tokens => 2, :output_tokens => 1,
+            :cache_creation_input_tokens => 1, :cache_read_input_tokens => 0))
+    schema4 = TestEchoAnthropicSchema(; response = response4, status = 200)
+    msg = aiextract(
+        schema4, "Hello World! Banana"; model = "claudeo", return_type = Fruit, cache = :all)
+    expected_output = DataMessage(;
+        content = Fruit("banana"),
+        status = 200,
+        tokens = (2, 1),
+        finish_reason = "tool_use",
+        cost = msg.cost,
+        extras = Dict{Symbol, Any}(
+            :cache_read_input_tokens => 0, :cache_creation_input_tokens => 1),
+        elapsed = msg.elapsed)
+    @test msg == expected_output
+
+    # Bad cache
+    @test_throws AssertionError aiextract(
+        schema4, "Hello World! Banana"; model = "claudeo",
+        return_type = Fruit, cache = :bad)
+end
+
+@testset "aitools-Anthropic" begin
+    # Define a test tool
+    struct WeatherTool
+        location::String
+        date::String
+    end
+
+    # Mock response for a single tool call
+    single_tool_response = Dict(
+        :content => [
+            Dict(:type => "tool_use", :id => "123", :name => "get_weather",
+            :input => Dict(:location => "New York", :date => "2023-05-01"))
+        ],
+        :stop_reason => "tool_use",
+        :usage => Dict(:input_tokens => 15, :output_tokens => 5)
+    )
+
+    schema_single = TestEchoAnthropicSchema(; response = single_tool_response, status = 200)
+
+    msg_single = aitools(schema_single, "What's the weather in New York on May 1st, 2023?";
+        tools = [Tool(; name = "get_weather", callable = WeatherTool)],
+        model = "claudeh",
+        api_kwargs = (; temperature = 0))
+
+    @test isaitoolrequest(msg_single)
+    @test msg_single.tool_calls[1].tool_call_id == "123"
+    @test msg_single.tool_calls[1].name == "get_weather"
+    @test msg_single.tool_calls[1].args[:location] == "New York"
+    @test msg_single.tool_calls[1].args[:date] == "2023-05-01"
+    @test msg_single.tokens == (15, 5)
+
+    # Mock response for multiple tool calls
+    multi_tool_response = Dict(
+        :content => [
+            Dict(:type => "tool_use", :id => "123", :name => "get_weatherUS",
+                :input => Dict(:location => "New York", :date => "2023-05-01")),
+            Dict(:type => "tool_use", :id => "456", :name => "get_weatherUK",
+                :input => Dict(:location => "London", :date => "2023-05-02"))
+        ],
+        :stop_reason => "tool_use",
+        :usage => Dict(:input_tokens => 20, :output_tokens => 10)
+    )
+
+    schema_multi = TestEchoAnthropicSchema(; response = multi_tool_response, status = 200)
+
+    msg_multi = aitools(
+        schema_multi, "Compare the weather in New York on May 1st and London on May 2nd, 2023.";
+        tools = [Tool(; name = "get_weatherUS", callable = WeatherTool),
+            Tool(; name = "get_weatherUK", callable = WeatherTool)],
+        model = "claudeh",
+        api_kwargs = (; temperature = 0))
+
+    @test isaitoolrequest(msg_multi)
+    @test length(msg_multi.tool_calls) == 2
+    @test msg_multi.tool_calls[1].tool_call_id == "123"
+    @test msg_multi.tool_calls[1].name == "get_weatherUS"
+    @test msg_multi.tool_calls[1].args[:location] == "New York"
+    @test msg_multi.tool_calls[1].args[:date] == "2023-05-01"
+    @test msg_multi.tool_calls[2].tool_call_id == "456"
+    @test msg_multi.tool_calls[2].name == "get_weatherUK"
+    @test msg_multi.tool_calls[2].args[:location] == "London"
+    @test msg_multi.tool_calls[2].args[:date] == "2023-05-02"
+    @test msg_multi.tokens == (20, 10)
+
+    # Test with dry_run
+    msg_dry_run = aitools(schema_single, "What's the weather in Paris tomorrow?";
+        tools = [Tool(; name = "get_weather", callable = WeatherTool)],
+        model = "claudeh",
+        dry_run = true)
+
+    @test msg_dry_run === nothing
+
+    # Test with return_all
+    msg_return_all = aitools(
+        schema_single, "What's the weather in New York on May 1st, 2023?";
+        tools = [Tool(; name = "get_weather", callable = WeatherTool)],
+        model = "claudeh",
+        return_all = true,
+        api_kwargs = (; temperature = 0))
+
+    @test msg_return_all isa Vector
+    @test length(msg_return_all) == 3
+    @test msg_return_all[1] isa SystemMessage
+    @test msg_return_all[2] isa UserMessage
+    @test isaitoolrequest(msg_return_all[3])
+    @test msg_return_all[end].tool_calls[1].name == "get_weather"
+    @test msg_return_all[end].tool_calls[1].args[:location] == "New York"
+    @test msg_return_all[end].tool_calls[1].args[:date] == "2023-05-01"
+
+    # Test with cache
+    cache_response = Dict(
+        :content => [
+            Dict(:type => "tool_use", :id => "123", :name => "get_weather",
+            :input => Dict(:location => "Tokyo", :date => "2023-05-03"))
+        ],
+        :stop_reason => "tool_use",
+        :usage => Dict(:input_tokens => 18, :output_tokens => 7,
+            :cache_creation_input_tokens => 1, :cache_read_input_tokens => 0)
+    )
+
+    schema_cache = TestEchoAnthropicSchema(; response = cache_response, status = 200)
+
+    msg_cache = aitools(schema_cache, "What's the weather in Tokyo on May 3rd, 2023?";
+        tools = [Tool(; name = "get_weather", callable = WeatherTool)],
+        model = "claudeh",
+        cache = :all,
+        api_kwargs = (; temperature = 0))
+
+    @test msg_cache.tool_calls[1].tool_call_id == "123"
+    @test msg_cache.tool_calls[1].name == "get_weather"
+    @test msg_cache.tool_calls[1].args[:location] == "Tokyo"
+    @test msg_cache.tool_calls[1].args[:date] == "2023-05-03"
+    @test msg_cache.tokens == (18, 7)
+    @test msg_cache.extras[:cache_creation_input_tokens] == 1
+    @test msg_cache.extras[:cache_read_input_tokens] == 0
+
+    # Test with invalid cache
+    @test_throws AssertionError aitools(schema_cache, "What's the weather in Tokyo?";
+        tools = [Tool(; name = "get_weather", callable = WeatherTool)],
+        model = "claudeh",
+        cache = :invalid)
+end
+
+@testset "not implemented ai* functions" begin
+    @test_throws ErrorException aiembed(AnthropicSchema(), "prompt")
+    @test_throws ErrorException aiclassify(AnthropicSchema(), "prompt")
+    @test_throws ErrorException aiscan(AnthropicSchema(), "prompt")
+    @test_throws ErrorException aiimage(AnthropicSchema(), "prompt")
+end
diff --git a/test/llm_google.jl b/test/llm_google.jl
index 518803462..edc9877f7 100644
--- a/test/llm_google.jl
+++ b/test/llm_google.jl
@@ -8,18 +8,18 @@ using PromptingTools: UserMessage, DataMessage
     # Given a schema and a vector of messages with handlebar variables, it should replace the variables with the correct values in the conversation dictionary.
     messages = [
         SystemMessage("Act as a helpful AI assistant"),
-        UserMessage("Hello, my name is {{name}}"),
+        UserMessage("Hello, my name is {{name}}")
     ]
     expected_output = [
-        Dict("role" => "user",
-            "parts" => [
-                Dict("text" => "Act as a helpful AI assistant\n\nHello, my name is John"),
-            ]),
+        Dict(:role => "user",
+        :parts => [
+            Dict("text" => "Act as a helpful AI assistant\n\nHello, my name is John")
+        ])
     ]
     conversation = render(schema, messages; name = "John")
     @test conversation == expected_output
     # Test with dry_run=true on ai* functions
-    test_schema = TestEchoGoogleSchema(; text = "a", status = 0)
+    test_schema = TestEchoGoogleSchema(; text = "a", response_status = 0)
     @test aigenerate(test_schema,
         messages;
         name = "John",
@@ -35,12 +35,12 @@ using PromptingTools: UserMessage, DataMessage
     # AI message does NOT replace variables
     messages = [
         SystemMessage("Act as a helpful AI assistant"),
-        AIMessage("Hello, my name is {{name}}"),
+        AIMessage("Hello, my name is {{name}}")
     ]
     expected_output = [
-        Dict("role" => "user",
-            "parts" => [Dict("text" => "Act as a helpful AI assistant")]),
-        Dict("role" => "model", "parts" => [Dict("text" => "Hello, my name is {{name}}")]),
+        Dict(:role => "user",
+            :parts => [Dict("text" => "Act as a helpful AI assistant")]),
+        Dict(:role => "model", :parts => [Dict("text" => "Hello, my name is {{name}}")])
     ]
     conversation = render(schema, messages; name = "John")
     # Broken: AIMessage does not replace handlebar variables
@@ -48,12 +48,12 @@ using PromptingTools: UserMessage, DataMessage
 
     # Given a schema and a vector of messages with no system messages, it should add a default system prompt to the conversation dictionary.
     messages = [
-        UserMessage("User message"),
+        UserMessage("User message")
     ]
     conversation = render(schema, messages)
     expected_output = [
-        Dict("role" => "user",
-            "parts" => [Dict("text" => "Act as a helpful AI assistant\n\nUser message")]),
+        Dict(:role => "user",
+        :parts => [Dict("text" => "Act as a helpful AI assistant\n\nUser message")])
     ]
     @test conversation == expected_output
 
@@ -62,14 +62,14 @@ using PromptingTools: UserMessage, DataMessage
         UserMessage("Hello"),
         AIMessage("Hi there"),
         UserMessage("How are you?"),
-        AIMessage("I'm doing well, thank you!"),
+        AIMessage("I'm doing well, thank you!")
     ]
     expected_output = [
-        Dict("role" => "user",
-            "parts" => [Dict("text" => "Act as a helpful AI assistant\n\nHello")]),
-        Dict("role" => "model", "parts" => [Dict("text" => "Hi there")]),
-        Dict("role" => "user", "parts" => [Dict("text" => "How are you?")]),
-        Dict("role" => "model", "parts" => [Dict("text" => "I'm doing well, thank you!")]),
+        Dict(:role => "user",
+            :parts => [Dict("text" => "Act as a helpful AI assistant\n\nHello")]),
+        Dict(:role => "model", :parts => [Dict("text" => "Hi there")]),
+        Dict(:role => "user", :parts => [Dict("text" => "How are you?")]),
+        Dict(:role => "model", :parts => [Dict("text" => "I'm doing well, thank you!")])
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
@@ -78,12 +78,12 @@ using PromptingTools: UserMessage, DataMessage
     messages = [
         UserMessage("Hello"),
         AIMessage("Hi there"),
-        SystemMessage("This is a system message"),
+        SystemMessage("This is a system message")
     ]
     expected_output = [
-        Dict("role" => "user",
-            "parts" => [Dict("text" => "This is a system message\n\nHello")]),
-        Dict("role" => "model", "parts" => [Dict("text" => "Hi there")]),
+        Dict(:role => "user",
+            :parts => [Dict("text" => "This is a system message\n\nHello")]),
+        Dict(:role => "model", :parts => [Dict("text" => "Hi there")])
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
@@ -91,8 +91,8 @@ using PromptingTools: UserMessage, DataMessage
     # Given an empty vector of messages, it should return an empty conversation dictionary just with the system prompt
     messages = AbstractMessage[]
     expected_output = [
-        Dict("role" => "user",
-            "parts" => [Dict("text" => "Act as a helpful AI assistant")]),
+        Dict(:role => "user",
+        :parts => [Dict("text" => "Act as a helpful AI assistant")])
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
@@ -100,11 +100,11 @@ using PromptingTools: UserMessage, DataMessage
     # Given a schema and a vector of messages with a system message containing handlebar variables not present in kwargs, it keeps the placeholder 
     messages = [
         SystemMessage("Hello, {{name}}!"),
-        UserMessage("How are you?"),
+        UserMessage("How are you?")
     ]
     expected_output = [
-        Dict("role" => "user",
-            "parts" => [Dict("text" => "Hello, {{name}}!\n\nHow are you?")]),
+        Dict(:role => "user",
+        :parts => [Dict("text" => "Hello, {{name}}!\n\nHow are you?")])
     ]
     conversation = render(schema, messages)
     # Broken because we do not remove any unused handlebar variables
@@ -114,12 +114,12 @@ using PromptingTools: UserMessage, DataMessage
     messages = [
         UserMessage("Hello"),
         DataMessage(; content = ones(3, 3)),
-        AIMessage("Hi there"),
+        AIMessage("Hi there")
     ]
     expected_output = [
-        Dict("role" => "user",
-            "parts" => [Dict("text" => "Act as a helpful AI assistant\n\nHello")]),
-        Dict("role" => "model", "parts" => [Dict("text" => "Hi there")]),
+        Dict(:role => "user",
+            :parts => [Dict("text" => "Act as a helpful AI assistant\n\nHello")]),
+        Dict(:role => "model", :parts => [Dict("text" => "Hi there")])
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
@@ -127,10 +127,10 @@ using PromptingTools: UserMessage, DataMessage
     ## Test that if either of System or User message is empty, we don't add double newlines
     messages = [
         SystemMessage("Hello, {{name}}!"),
-        UserMessage(""),
+        UserMessage("")
     ]
     expected_output = [
-        Dict("role" => "user", "parts" => [Dict("text" => "Hello, John!")]),
+        Dict(:role => "user", :parts => [Dict("text" => "Hello, John!")])
     ]
     conversation = render(schema, messages; name = "John")
     # Broken because we do not remove any unused handlebar variables
@@ -143,12 +143,12 @@ end
 
     # corresponds to GoogleGenAI v0.1.0
     # Test the monkey patch
-    schema = TestEchoGoogleSchema(; text = "Hello!", status = 200)
+    schema = TestEchoGoogleSchema(; text = "Hello!", response_status = 200)
     msg = ggi_generate_content(schema, "", "", "Hello")
     @test msg isa TestEchoGoogleSchema
 
     # Real generation API
-    schema1 = TestEchoGoogleSchema(; text = "Hello!", status = 200)
+    schema1 = TestEchoGoogleSchema(; text = "Hello!", response_status = 200)
     msg = aigenerate(schema1, "Hello World")
     expected_output = AIMessage(;
         content = "Hello!" |> strip,
@@ -156,12 +156,12 @@ end
         tokens = (83, 6),
         elapsed = msg.elapsed)
     @test msg == expected_output
-    @test schema1.inputs == Dict{String, Any}[Dict("role" => "user",
-        "parts" => [Dict("text" => "Act as a helpful AI assistant\n\nHello World")])]
+    @test schema1.inputs == Dict{Symbol, Any}[Dict(:role => "user",
+        :parts => [Dict("text" => "Act as a helpful AI assistant\n\nHello World")])]
     @test schema1.model_id == "gemini-pro" # default model
 
     # Test different input combinations and different prompts
-    schema2 = TestEchoGoogleSchema(; text = "World!", status = 200)
+    schema2 = TestEchoGoogleSchema(; text = "World!", response_status = 200)
     msg = aigenerate(schema2, UserMessage("Hello {{name}}"),
         model = "geminixx", http_kwargs = (; verbose = 3), api_kwargs = (; temperature = 0),
         name = "World")
@@ -171,7 +171,16 @@ end
         tokens = (83, 6),
         elapsed = msg.elapsed)
     @test msg == expected_output
-    @test schema1.inputs == Dict{String, Any}[Dict("role" => "user",
-        "parts" => [Dict("text" => "Act as a helpful AI assistant\n\nHello World")])]
+    @test schema1.inputs == Dict{Symbol, Any}[Dict(:role => "user",
+        :parts => [Dict("text" => "Act as a helpful AI assistant\n\nHello World")])]
     @test schema2.model_id == "geminixx"
 end
+
+@testset "not implemented ai* functions" begin
+    @test_throws ErrorException aiembed(GoogleSchema(), "prompt")
+    @test_throws ErrorException aiextract(GoogleSchema(), "prompt")
+    @test_throws ErrorException aitools(GoogleSchema(), "prompt")
+    @test_throws ErrorException aiclassify(GoogleSchema(), "prompt")
+    @test_throws ErrorException aiscan(GoogleSchema(), "prompt")
+    @test_throws ErrorException aiimage(GoogleSchema(), "prompt")
+end
diff --git a/test/llm_interface.jl b/test/llm_interface.jl
index d54cad0f4..e1e43245d 100644
--- a/test/llm_interface.jl
+++ b/test/llm_interface.jl
@@ -1,14 +1,16 @@
 using PromptingTools: TestEchoOpenAISchema, render, OpenAISchema
 using PromptingTools: AIMessage, SystemMessage, AbstractMessage
 using PromptingTools: UserMessage, UserMessageWithImages, DataMessage
-using PromptingTools: response_to_message, AbstractPromptSchema
+using PromptingTools: response_to_message, AbstractPromptSchema, isextracted,
+                      AbstractExtractedData
 
 @testset "ai* default schema" begin
     OLD_PROMPT_SCHEMA = PromptingTools.PROMPT_SCHEMA
     ### AIGenerate
     # corresponds to OpenAI API v1
-    response = Dict(:choices => [
-            Dict(:message => Dict(:content => "Hello!"), :finish_reason => "stop"),
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "Hello!"), :finish_reason => "stop")
         ],
         :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
 
@@ -21,28 +23,38 @@ using PromptingTools: response_to_message, AbstractPromptSchema
         tokens = (2, 1),
         run_id = msg.run_id,
         finish_reason = "stop",
+        extras = Dict{Symbol, Any}(),
         cost = 0.0,
         elapsed = msg.elapsed)
     @test msg == expected_output
 
     ### AIClassify
-    msg = aiclassify("Hello World"; choices = ["true", "false", "unknown"], model = "xyz")
+    msg = aiclassify(
+        "Hello World"; choices = ["true", "false", "unknown"], model = "gpt-4o-made-up-model")
     expected_output = AIMessage(;
         content = nothing,
         status = 200,
         tokens = (2, 1),
         run_id = msg.run_id,
         cost = 0.0,
+        extras = Dict{Symbol, Any}(),
         finish_reason = "stop",
         elapsed = msg.elapsed)
     @test msg == expected_output
 
     ### AIExtract
-    response1 = Dict(:choices => [
-            Dict(:message => Dict(:tool_calls => [
-                    Dict(:function => Dict(:arguments => "{\"content\": \"x\"}")),
+    response1 = Dict(
+        :choices => [
+            Dict(
+            :message => Dict(
+                :content => nothing,
+                :tool_calls => [
+                    Dict(:id => "1",
+                    :function => Dict(
+                        :arguments => "{\"content\": \"x\"}", :name => "MyType")
+                )
                 ]),
-                :finish_reason => "stop")],
+            :finish_reason => "stop")],
         :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
 
     schema = TestEchoOpenAISchema(; response = response1, status = 200)
@@ -58,9 +70,19 @@ using PromptingTools: response_to_message, AbstractPromptSchema
         run_id = msg.run_id,
         cost = 0.0,
         finish_reason = "stop",
+        extras = Dict{Symbol, Any}(:tool_calls => [Dict(:id => "1",
+            :function => Dict(
+                :name => "MyType", :arguments => "{\"content\": \"x\"}"))]),
         elapsed = msg.elapsed)
     @test msg == expected_output
 
+    ## AITools
+    msg = aitools("Hello World"; model = "xyz", return_type = MyType)
+    @test isaitoolrequest(msg)
+    @test msg.tool_calls[1].name == "MyType"
+    @test msg.tool_calls[1].args == Dict(:content => "x")
+    @test msg.tool_calls[1].tool_call_id == "1"
+
     # corresponds to OpenAI API v1
     response2 = Dict(:data => [Dict(:embedding => ones(128))],
         :usage => Dict(:total_tokens => 2, :prompt_tokens => 2, :completion_tokens => 0))
@@ -88,3 +110,10 @@ using PromptingTools: response_to_message, AbstractPromptSchema
         nothing,
         nothing)
 end
+
+@testset "isextracted" begin
+    struct Xdata123 <: AbstractExtractedData end
+    @test !isextracted(Dict("x" => 1))
+    @test !isextracted(1)
+    @test isextracted(Xdata123())
+end
\ No newline at end of file
diff --git a/test/llm_ollama.jl b/test/llm_ollama.jl
index d3092f4e7..e656a354c 100644
--- a/test/llm_ollama.jl
+++ b/test/llm_ollama.jl
@@ -15,37 +15,37 @@ using PromptingTools: UserMessage, UserMessageWithImages, DataMessage, _encode_l
     messages = [UserMessage("I am {{name}}")]
     expected_output = [
         Dict("role" => "system", "content" => "Act as a helpful AI assistant"),
-        Dict("role" => "user", "content" => "I am John Doe"),
+        Dict("role" => "user", "content" => "I am John Doe")
     ]
     @test render(schema, messages; name = "John Doe") == expected_output
 
     # Test message rendering with system and user messages
     messages = [
         SystemMessage("This is a system generated message."),
-        UserMessage("A user generated reply."),
+        UserMessage("A user generated reply.")
     ]
     expected_output = [
         Dict("role" => "system", "content" => "This is a system generated message."),
-        Dict("role" => "user", "content" => "A user generated reply."),
+        Dict("role" => "user", "content" => "A user generated reply.")
     ]
     @test render(schema, messages) == expected_output
 
     # Test message rendering with images
     messages = [
         UserMessageWithImages("User message with an image";
-            image_url = ["https://example.com/image.jpg"]),
+        image_url = ["https://example.com/image.jpg"])
     ]
     expected_output = [
         Dict("role" => "system", "content" => "Act as a helpful AI assistant"),
         Dict("role" => "user",
             "content" => "User message with an image",
-            "images" => ["https://example.com/image.jpg"]),
+            "images" => ["https://example.com/image.jpg"])
     ]
     @test render(schema, messages) == expected_output
     # Test message with local image
     messages = [
         UserMessageWithImages("User message with an image";
-            image_path = joinpath(@__DIR__, "data", "julia.png"), base64_only = true),
+        image_path = joinpath(@__DIR__, "data", "julia.png"), base64_only = true)
     ]
     raw_img = _encode_local_image(joinpath(@__DIR__, "data", "julia.png");
         base64_only = true)
@@ -53,7 +53,7 @@ using PromptingTools: UserMessage, UserMessageWithImages, DataMessage, _encode_l
         Dict("role" => "system", "content" => "Act as a helpful AI assistant"),
         Dict("role" => "user",
             "content" => "User message with an image",
-            "images" => [raw_img]),
+            "images" => [raw_img])
     ]
     @test render(schema, messages) == expected_output
 end
@@ -103,7 +103,7 @@ end
     conversation = [SystemMessage("Today's weather is {{weather}}.")]
     # Mock dry run replacing the template variable
     expected_convo_output = [
-        SystemMessage(; content = "Today's weather is sunny.", variables = [:weather]),
+        SystemMessage(; content = "Today's weather is sunny.", variables = [:weather])
     ]
     @test aigenerate(schema,
         conversation;
@@ -140,7 +140,7 @@ end
             "content" => "hi",
             "images" => [
                 _encode_local_image(joinpath(@__DIR__, "data", "julia.png"),
-                    base64_only = true),
+                base64_only = true)
             ])]
     @test_throws AssertionError aiscan(schema,
         "hi";
@@ -150,4 +150,5 @@ end
 @testset "not implemented ai* functions" begin
     @test_throws ErrorException aiextract(OllamaSchema(), "prompt")
     @test_throws ErrorException aiclassify(OllamaSchema(), "prompt")
+    @test_throws ErrorException aitools(OllamaSchema(), "prompt")
 end
diff --git a/test/llm_ollama_managed.jl b/test/llm_ollama_managed.jl
index aebb85233..4d3a902cf 100644
--- a/test/llm_ollama_managed.jl
+++ b/test/llm_ollama_managed.jl
@@ -52,7 +52,7 @@ using PromptingTools: UserMessage, UserMessageWithImages, DataMessage
     # Double check templating
     messages = [
         SystemMessage("Act as a helpful AI assistant"),
-        UserMessage("Hello, my name is {{name}}"),
+        UserMessage("Hello, my name is {{name}}")
     ]
     expected_output = (; system = "Act as a helpful AI assistant",
         prompt = "Hello, my name is John")
@@ -196,4 +196,5 @@ end
     @test_throws ErrorException aiextract(OllamaManagedSchema(), "prompt")
     @test_throws ErrorException aiclassify(OllamaManagedSchema(), "prompt")
     @test_throws ErrorException aiscan(OllamaManagedSchema(), "prompt")
+    @test_throws ErrorException aitools(OllamaManagedSchema(), "prompt")
 end
diff --git a/test/llm_openai.jl b/test/llm_openai.jl
index 1b440f55d..dd514b719 100644
--- a/test/llm_openai.jl
+++ b/test/llm_openai.jl
@@ -1,20 +1,33 @@
-using PromptingTools: TestEchoOpenAISchema, render, OpenAISchema
+using PromptingTools: TestEchoOpenAISchema, render, OpenAISchema, role4render
 using PromptingTools: AIMessage, SystemMessage, AbstractMessage
-using PromptingTools: UserMessage, UserMessageWithImages, DataMessage
+using PromptingTools: UserMessage, UserMessageWithImages, DataMessage, AIToolRequest,
+                      ToolMessage, Tool, ToolRef
 using PromptingTools: CustomProvider,
-    CustomOpenAISchema, MistralOpenAISchema, MODEL_EMBEDDING
-using PromptingTools: encode_choices, decode_choices, response_to_message, call_cost
+                      CustomOpenAISchema, MistralOpenAISchema, MODEL_EMBEDDING,
+                      MODEL_IMAGE_GENERATION
+using PromptingTools: encode_choices, decode_choices, response_to_message, call_cost,
+                      isextracted, isaitoolrequest, istoolmessage
+using PromptingTools: pick_tokenizer, OPENAI_TOKEN_IDS_GPT35_GPT4, OPENAI_TOKEN_IDS_GPT4O
 
 @testset "render-OpenAI" begin
     schema = OpenAISchema()
+
+    @test role4render(schema, SystemMessage("System message 1")) == "system"
+    @test role4render(schema, UserMessage("User message 1")) == "user"
+    @test role4render(schema, UserMessageWithImages("User message 1"; image_url = "")) ==
+          "user"
+    @test role4render(schema, AIMessage("AI message 1")) == "assistant"
+    @test role4render(schema, AIToolRequest()) == "assistant"
+    @test role4render(schema, ToolMessage(; tool_call_id = "x", raw = "")) == "tool"
+
     # Given a schema and a vector of messages with handlebar variables, it should replace the variables with the correct values in the conversation dictionary.
     messages = [
         SystemMessage("Act as a helpful AI assistant"),
-        UserMessage("Hello, my name is {{name}}"),
+        UserMessage("Hello, my name is {{name}}")
     ]
     expected_output = [
         Dict("role" => "system", "content" => "Act as a helpful AI assistant"),
-        Dict("role" => "user", "content" => "Hello, my name is John"),
+        Dict("role" => "user", "content" => "Hello, my name is John")
     ]
     conversation = render(schema, messages; name = "John")
     @test conversation == expected_output
@@ -29,11 +42,11 @@ using PromptingTools: encode_choices, decode_choices, response_to_message, call_
     # AI message does NOT replace variables
     messages = [
         SystemMessage("Act as a helpful AI assistant"),
-        AIMessage("Hello, my name is {{name}}"),
+        AIMessage("Hello, my name is {{name}}")
     ]
     expected_output = [
         Dict("role" => "system", "content" => "Act as a helpful AI assistant"),
-        Dict("role" => "assistant", "content" => "Hello, my name is John"),
+        Dict("role" => "assistant", "content" => "Hello, my name is John")
     ]
     conversation = render(schema, messages; name = "John")
     # Broken: AIMessage does not replace handlebar variables
@@ -41,12 +54,12 @@ using PromptingTools: encode_choices, decode_choices, response_to_message, call_
 
     # Given a schema and a vector of messages with no system messages, it should add a default system prompt to the conversation dictionary.
     messages = [
-        UserMessage("User message"),
+        UserMessage("User message")
     ]
     conversation = render(schema, messages)
     expected_output = [
         Dict("role" => "system", "content" => "Act as a helpful AI assistant"),
-        Dict("role" => "user", "content" => "User message"),
+        Dict("role" => "user", "content" => "User message")
     ]
     @test conversation == expected_output
 
@@ -55,28 +68,28 @@ using PromptingTools: encode_choices, decode_choices, response_to_message, call_
         UserMessage("Hello"),
         AIMessage("Hi there"),
         UserMessage("How are you?"),
-        AIMessage("I'm doing well, thank you!"),
+        AIMessage("I'm doing well, thank you!")
     ]
     expected_output = [
         Dict("role" => "system", "content" => "Act as a helpful AI assistant"),
         Dict("role" => "user", "content" => "Hello"),
         Dict("role" => "assistant", "content" => "Hi there"),
         Dict("role" => "user", "content" => "How are you?"),
-        Dict("role" => "assistant", "content" => "I'm doing well, thank you!"),
+        Dict("role" => "assistant", "content" => "I'm doing well, thank you!")
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
 
     # Given a schema and a vector of messages with a system message, it should move the system message to the front of the conversation dictionary.
     messages = [
-        UserMessage("Hello"),
-        AIMessage("Hi there"),
-        SystemMessage("This is a system message"),
+        UserMessage(; content = "Hello", name = "John"),
+        AIMessage(; content = "Hi there", name = "AI"),
+        SystemMessage("This is a system message")
     ]
     expected_output = [
         Dict("role" => "system", "content" => "This is a system message"),
-        Dict("role" => "user", "content" => "Hello"),
-        Dict("role" => "assistant", "content" => "Hi there"),
+        Dict("role" => "user", "content" => "Hello", "name" => "John"),
+        Dict("role" => "assistant", "content" => "Hi there", "name" => "AI")
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
@@ -84,7 +97,7 @@ using PromptingTools: encode_choices, decode_choices, response_to_message, call_
     # Given an empty vector of messages, it should return an empty conversation dictionary just with the system prompt
     messages = AbstractMessage[]
     expected_output = [
-        Dict("role" => "system", "content" => "Act as a helpful AI assistant"),
+        Dict("role" => "system", "content" => "Act as a helpful AI assistant")
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
@@ -92,11 +105,11 @@ using PromptingTools: encode_choices, decode_choices, response_to_message, call_
     # Given a schema and a vector of messages with a system message containing handlebar variables not present in kwargs, it should replace the variables with empty strings in the conversation dictionary.
     messages = [
         SystemMessage("Hello, {{name}}!"),
-        UserMessage("How are you?"),
+        UserMessage("How are you?")
     ]
     expected_output = [
         Dict("role" => "system", "content" => "Hello, !"),
-        Dict("role" => "user", "content" => "How are you?"),
+        Dict("role" => "user", "content" => "How are you?")
     ]
     conversation = render(schema, messages)
     # Broken because we do not remove any unused handlebar variables
@@ -106,12 +119,12 @@ using PromptingTools: encode_choices, decode_choices, response_to_message, call_
     messages = [
         UserMessage("Hello"),
         DataMessage(; content = ones(3, 3)),
-        AIMessage("Hi there"),
+        AIMessage("Hi there")
     ]
     expected_output = [
         Dict("role" => "system", "content" => "Act as a helpful AI assistant"),
         Dict("role" => "user", "content" => "Hello"),
-        Dict("role" => "assistant", "content" => "Hi there"),
+        Dict("role" => "assistant", "content" => "Hi there")
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
@@ -119,43 +132,113 @@ using PromptingTools: encode_choices, decode_choices, response_to_message, call_
     # Test UserMessageWithImages
     messages = [
         SystemMessage("System message 1"),
-        UserMessageWithImages("User message"; image_url = "https://example.com/image.png"),
+        UserMessageWithImages("User message"; image_url = "https://example.com/image.png")
     ]
     conversation = render(schema, messages)
-    expected_output = Dict{String, Any}[Dict("role" => "system",
+    expected_output = Dict{String, Any}[
+        Dict("role" => "system",
             "content" => "System message 1"),
         Dict("role" => "user",
-            "content" => Dict{String, Any}[Dict("text" => "User message", "type" => "text"),
-                Dict("image_url" => Dict("detail" => "auto",
+            "content" => Dict{String, Any}[
+                Dict("text" => "User message", "type" => "text"),
+                Dict(
+                    "image_url" => Dict("detail" => "auto",
                         "url" => "https://example.com/image.png"),
                     "type" => "image_url")])]
     @test conversation == expected_output
 
+    # Test with ToolMessage
+    messages = [
+        SystemMessage("System message"),
+        UserMessage("User message"),
+        ToolMessage(;
+            tool_call_id = "tool1", raw = "", name = "calculator", args = Dict(), content = "4+4=8")
+    ]
+    conversation = render(schema, messages)
+    expected_output = Dict{String, Any}[
+        Dict("role" => "system", "content" => "System message"),
+        Dict("role" => "user", "content" => "User message"),
+        Dict("role" => "tool", "tool_call_id" => "tool1",
+            "name" => "calculator", "content" => "4+4=8")
+    ]
+    @test conversation == expected_output
+
+    # Test with AIToolRequest
+    args = Dict(
+        :location => "London",
+        :unit => "celsius"
+    )
+    messages = [
+        SystemMessage("System message"),
+        UserMessage("User message"),
+        AIToolRequest(;
+            tool_calls = [ToolMessage(;
+                tool_call_id = "call_123",
+                raw = JSON3.write(args),
+                name = "get_weather",
+                args)
+            ])
+    ]
+    conversation = render(schema, messages)
+    expected_output = Dict{String, Any}[
+        Dict("role" => "system", "content" => "System message"),
+        Dict("role" => "user", "content" => "User message"),
+        Dict("role" => "assistant",
+            "content" => nothing,
+            "tool_calls" => [
+                Dict("id" => "call_123",
+                "type" => "function",
+                "function" => Dict(
+                    "name" => "get_weather",
+                    "arguments" => "{\"location\":\"London\",\"unit\":\"celsius\"}"
+                ))
+            ])
+    ]
+    @test conversation == expected_output
+
+    # With empty tools
+    messages = [
+        SystemMessage("System message"),
+        UserMessage("User message"),
+        AIToolRequest(; content = "content")
+    ]
+    conversation = render(schema, messages)
+    expected_output = Dict{String, Any}[
+        Dict("role" => "system", "content" => "System message"),
+        Dict("role" => "user", "content" => "User message"),
+        Dict("role" => "assistant", "content" => "content")
+    ]
+    @test conversation == expected_output
+
     # With a list of images and detail="low"
     messages = [
         SystemMessage("System message 2"),
         UserMessageWithImages("User message";
             image_url = [
                 "https://example.com/image1.png",
-                "https://example.com/image2.png",
-            ]),
+                "https://example.com/image2.png"
+            ])
     ]
     conversation = render(schema, messages; image_detail = "low")
-    expected_output = Dict{String, Any}[Dict("role" => "system",
+    expected_output = Dict{String, Any}[
+        Dict("role" => "system",
             "content" => "System message 2"),
         Dict("role" => "user",
-            "content" => Dict{String, Any}[Dict("text" => "User message", "type" => "text"),
-                Dict("image_url" => Dict("detail" => "low",
+            "content" => Dict{String, Any}[
+                Dict("text" => "User message", "type" => "text"),
+                Dict(
+                    "image_url" => Dict("detail" => "low",
                         "url" => "https://example.com/image1.png"),
                     "type" => "image_url"),
-                Dict("image_url" => Dict("detail" => "low",
+                Dict(
+                    "image_url" => Dict("detail" => "low",
                         "url" => "https://example.com/image2.png"),
                     "type" => "image_url")])]
     @test conversation == expected_output
     # Test with dry_run=true
     messages_alt = [
         SystemMessage("System message 2"),
-        UserMessage("User message"),
+        UserMessage("User message")
     ]
     image_url = ["https://example.com/image1.png",
         "https://example.com/image2.png"]
@@ -172,6 +255,90 @@ using PromptingTools: encode_choices, decode_choices, response_to_message, call_
           nothing
 end
 
+@testset "render-tools" begin
+    schema = CustomOpenAISchema()
+
+    # Test rendering a single tool
+    tool = PromptingTools.Tool(
+        name = "get_weather",
+        description = "Get the current weather in a given location",
+        parameters = Dict(
+            "type" => "object",
+            "properties" => Dict(
+                "location" => Dict("type" => "string"),
+                "unit" => Dict("type" => "string", "enum" => ["celsius", "fahrenheit"])
+            ),
+            "required" => ["location"]
+        ),
+        callable = identity
+    )
+
+    rendered = render(schema, [tool])
+    @test length(rendered) == 1
+    @test rendered[1][:type] == "function"
+    @test rendered[1][:function][:name] == "get_weather"
+    @test rendered[1][:function][:description] ==
+          "Get the current weather in a given location"
+    @test rendered[1][:function][:parameters] == tool.parameters
+
+    # Test rendering multiple tools
+    tool2 = PromptingTools.Tool(
+        name = "get_time",
+        description = "Get the current time in a given timezone",
+        parameters = Dict(
+            "type" => "object",
+            "properties" => Dict(
+                "timezone" => Dict("type" => "string")
+            ),
+            "required" => ["timezone"]
+        ),
+        callable = identity
+    )
+
+    rendered = render(schema, [tool, tool2])
+    @test length(rendered) == 2
+    @test rendered[1][:function][:name] == "get_weather"
+    @test rendered[2][:function][:name] == "get_time"
+
+    # Test rendering with json_mode=true
+    rendered = render(schema, [tool]; json_mode = true)
+    @test haskey(rendered[1][:function], :schema)
+    @test !haskey(rendered[1][:function], :parameters)
+    @test !haskey(rendered[1][:function], :description)
+
+    # Test rendering with strict=true
+    strict_tool = PromptingTools.Tool(
+        name = "strict_function",
+        description = "A function with strict input validation",
+        parameters = Dict(
+            "type" => "object",
+            "properties" => Dict(
+                "input" => Dict("type" => "string")
+            ),
+            "required" => ["input"]
+        ),
+        callable = identity,
+        strict = true
+    )
+
+    rendered = render(schema, [strict_tool])
+    @test rendered[1][:function][:strict] == true
+
+    ## ToolRef rendering
+    schema = OpenAISchema()
+
+    # Test that rendering ToolRef throws ArgumentError
+    tool = ToolRef(ref = :computer)
+    @test_throws ArgumentError render(schema, tool)
+
+    # Test with json_mode=true
+    @test_throws ArgumentError render(schema, tool; json_mode = true)
+
+    # Test with multiple tools
+    tools = [ToolRef(ref = :computer), ToolRef(ref = :str_replace_editor)]
+    @test_throws ArgumentError render(schema, tools)
+end
+
 @testset "OpenAI.build_url,OpenAI.auth_header" begin
     provider = CustomProvider(; base_url = "http://localhost:8082", api_version = "xyz")
     @test OpenAI.build_url(provider, "endpoint1") == "http://localhost:8082/endpoint1"
@@ -185,13 +352,14 @@ end
     echo_server = HTTP.serve!(PORT, verbose = -1) do req
         content = JSON3.read(req.body)
         user_msg = last(content[:messages])
-        response = Dict(:choices => [
+        response = Dict(
+            :choices => [
                 Dict(:message => user_msg,
-                    :logprobs => Dict(:content => [
-                        Dict(:logprob => -0.1),
-                        Dict(:logprob => -0.2),
-                    ]),
-                    :finish_reason => "stop"),
+                :logprobs => Dict(:content => [
+                    Dict(:logprob => -0.1),
+                    Dict(:logprob => -0.2)
+                ]),
+                :finish_reason => "stop")
             ],
             :model => content[:model],
             :usage => Dict(:total_tokens => length(user_msg[:content]),
@@ -286,9 +454,11 @@ end
 
     #### With DataMessage
     # Mock the response and choice data
-    mock_choice = Dict(:message => Dict(:content => "Hello!",
+    mock_choice = Dict(
+        :message => Dict(:content => "Hello!",
             :tool_calls => [
-                Dict(:function => Dict(:arguments => JSON3.write(Dict(:x => 1)))),
+                Dict(:function => Dict(
+                :arguments => JSON3.write(Dict(:x => 1)), :name => "RandomType1235"))
             ]),
         :logprobs => Dict(:content => [Dict(:logprob => -0.5), Dict(:logprob => -0.4)]),
         :finish_reason => "stop")
@@ -300,6 +470,7 @@ end
         x::Int
     end
     return_type = RandomType1235
+    tool_map = Dict("RandomType1235" => Tool(; name = "x", callable = return_type))
     # Catch missing return_type
     @test_throws AssertionError response_to_message(OpenAISchema(),
         DataMessage,
@@ -312,7 +483,7 @@ end
         DataMessage,
         mock_choice,
         mock_response;
-        return_type,
+        tool_map,
         model_id = "gpt4t")
     @test msg isa DataMessage
     @test msg.content == RandomType1235(1)
@@ -329,7 +500,7 @@ end
         DataMessage,
         choice,
         mock_response;
-        return_type)
+        tool_map)
     @test isnothing(msg.log_prob)
 
     # with sample_id and run_id
@@ -337,20 +508,120 @@ end
         DataMessage,
         mock_choice,
         mock_response;
-        return_type,
+        tool_map,
         run_id = 1,
         sample_id = 2,
         time = 2.0)
     @test msg.run_id == 1
     @test msg.sample_id == 2
     @test msg.elapsed == 2.0
+
+    ## for AIToolRequest
+    # Mock data
+    mock_choice = Dict(
+        :message => Dict(
+            :content => "This is a tool request",
+            :tool_calls => [
+                Dict(
+                :id => "call_abc123",
+                :type => "function",
+                :function => Dict(
+                    :name => "get_weather",
+                    :arguments => "{\"location\":\"New York\"}"
+                )
+            )
+            ]
+        ),
+        :finish_reason => "tool_calls",
+        :logprobs => Dict(:content => [Dict(:logprob => -0.5), Dict(:logprob => -0.3)])
+    )
+
+    mock_response = (
+        status = 200,
+        response = Dict(
+            :usage => Dict(:prompt_tokens => 10, :completion_tokens => 15)
+        )
+    )
+
+    get_weather(location) = "The weather in $location is nice"
+    tool_map = Dict("get_weather" => Tool(; name = "get_weather", callable = get_weather))
+
+    # Test basic functionality
+    msg = response_to_message(OpenAISchema(),
+        AIToolRequest,
+        mock_choice,
+        mock_response;
+        tool_map,
+        model_id = "gpt-4")
+
+    @test msg isa AIToolRequest
+    @test msg.content == "This is a tool request"
+    @test msg.status == 200
+    @test msg.tokens == (10, 15)
+    @test msg.log_prob ≈ -0.8
+    @test msg.finish_reason == "tool_calls"
+    @test msg.cost == call_cost(10, 15, "gpt-4")
+    @test length(msg.tool_calls) == 1
+    @test msg.tool_calls[1].tool_call_id == "call_abc123"
+    @test msg.tool_calls[1].name == "get_weather"
+    @test msg.tool_calls[1].args == Dict(:location => "New York")
+
+    # Test without logprobs
+    choice_no_logprobs = deepcopy(mock_choice)
+    delete!(choice_no_logprobs, :logprobs)
+    msg_no_logprobs = response_to_message(OpenAISchema(),
+        AIToolRequest,
+        choice_no_logprobs,
+        mock_response;
+        tool_map)
+    @test isnothing(msg_no_logprobs.log_prob)
+
+    # Test with sample_id and run_id
+    msg_with_ids = response_to_message(OpenAISchema(),
+        AIToolRequest,
+        mock_choice,
+        mock_response;
+        tool_map,
+        run_id = 42,
+        sample_id = 7,
+        time = 1.5)
+    @test msg_with_ids.run_id == 42
+    @test msg_with_ids.sample_id == 7
+    @test msg_with_ids.elapsed == 1.5
+
+    # Test with multiple tool calls
+    mock_choice_multi = deepcopy(mock_choice)
+    push!(mock_choice_multi[:message][:tool_calls],
+        Dict(
+            :id => "call_def456",
+            :type => "function",
+            :function => Dict(
+                :name => "get_time",
+                :arguments => "{\"timezone\":\"UTC\"}"
+            )
+        )
+    )
+    tool_map_multi = Dict(
+        "get_weather" => Tool(; name = "get_weather", callable = identity),
+        "get_time" => Tool(; name = "get_time", callable = identity)
+    )
+    msg_multi = response_to_message(OpenAISchema(),
+        AIToolRequest,
+        mock_choice_multi,
+        mock_response;
+        tool_map = tool_map_multi)
+    @test length(msg_multi.tool_calls) == 2
+    @test msg_multi.tool_calls[2].tool_call_id == "call_def456"
+    @test msg_multi.tool_calls[2].name == "get_time"
+    @test msg_multi.tool_calls[2].args == Dict(:timezone => "UTC")
 end
 
 @testset "aigenerate-OpenAI" begin
     # corresponds to OpenAI API v1
-    response = Dict(:choices => [
+    response = Dict(
+        :choices => [
             Dict(:message => Dict(:content => "Hello!"),
-                :finish_reason => "stop"),
+            :finish_reason => "stop")
         ],
         :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
 
@@ -368,12 +639,13 @@ end
         tokens = (2, 1),
         finish_reason = "stop",
         cost = msg.cost,
+        extras = Dict{Symbol, Any}(),
         elapsed = msg.elapsed)
     @test msg == expected_output
     @test schema1.inputs ==
           [Dict("role" => "system", "content" => "Act as a helpful AI assistant")
-        Dict("role" => "user", "content" => "Hello World")]
-    @test schema1.model_id == "gpt-3.5-turbo"
+           Dict("role" => "user", "content" => "Hello World")]
+    @test schema1.model_id == "gpt-4o-mini"
 
     # Test different input combinations and different prompts
     schema2 = TestEchoOpenAISchema(; response, status = 200)
@@ -384,21 +656,23 @@ end
         content = "Hello!" |> strip,
         status = 200,
         tokens = (2, 1),
+        extras = Dict{Symbol, Any}(),
         finish_reason = "stop",
         cost = msg.cost,
         elapsed = msg.elapsed)
     @test msg == expected_output
     @test schema1.inputs ==
           [Dict("role" => "system", "content" => "Act as a helpful AI assistant")
-        Dict("role" => "user", "content" => "Hello World")]
+           Dict("role" => "user", "content" => "Hello World")]
     @test schema2.model_id == "gpt-4"
 
     ## Test multiple samples
-    response = Dict(:choices => [
+    response = Dict(
+        :choices => [
             Dict(:message => Dict(:content => "Hello1!"),
                 :finish_reason => "stop"),
             Dict(:message => Dict(:content => "Hello2!"),
-                :finish_reason => "stop"),
+                :finish_reason => "stop")
         ],
         :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
     schema3 = TestEchoOpenAISchema(; response, status = 200)
@@ -456,16 +730,46 @@ end
     @test schema2.model_id == "gpt-4" # not possible - just an example
 end
 
+@testset "pick_tokenizer" begin
+    # Test for GPT-3.5 models
+    @test pick_tokenizer("gpt-3.5-turbo") == OPENAI_TOKEN_IDS_GPT35_GPT4
+    @test pick_tokenizer("gpt-3.5-turbo-16k") == OPENAI_TOKEN_IDS_GPT35_GPT4
+
+    # Test for GPT-4 models
+    @test pick_tokenizer("gpt-4") == OPENAI_TOKEN_IDS_GPT35_GPT4
+    @test pick_tokenizer("gpt-4-32k") == OPENAI_TOKEN_IDS_GPT35_GPT4
+
+    # Test for GPT-4 Turbo models
+    @test pick_tokenizer("gpt-4-1106-preview") == OPENAI_TOKEN_IDS_GPT35_GPT4
+    @test pick_tokenizer("gpt-4-0125-preview") == OPENAI_TOKEN_IDS_GPT35_GPT4
+
+    # Test for GPT-4 Vision models
+    @test pick_tokenizer("gpt-4-vision-preview") == OPENAI_TOKEN_IDS_GPT35_GPT4
+
+    # Test for GPT-4 Turbo with vision
+    @test pick_tokenizer("gpt-4-all") == OPENAI_TOKEN_IDS_GPT35_GPT4
+
+    # Test for GPT-4 Turbo with OpenAI organization
+    @test pick_tokenizer("gpt-4o") == OPENAI_TOKEN_IDS_GPT4O
+    @test pick_tokenizer("gpt-4o-xyz") == OPENAI_TOKEN_IDS_GPT4O
+
+    # Test for unsupported model
+    @test_throws ArgumentError pick_tokenizer("unsupported-model")
+end
+
 @testset "encode_choices" begin
+    MODEL = "gpt-4-turbo"
     # Test encoding simple string choices
-    choices_prompt, logit_bias, ids = encode_choices(OpenAISchema(), ["true", "false"])
+    choices_prompt, logit_bias, ids = encode_choices(
+        OpenAISchema(), ["true", "false"], model = MODEL)
     # Checks if the encoded choices format and logit_bias are correct
     @test choices_prompt == "true for \"true\"\nfalse for \"false\""
     @test logit_bias == Dict(837 => 100, 905 => 100)
     @test ids == ["true", "false"]
 
     # Test encoding more than two choices
-    choices_prompt, logit_bias, ids = encode_choices(OpenAISchema(), ["animal", "plant"])
+    choices_prompt, logit_bias, ids = encode_choices(
+        OpenAISchema(), ["animal", "plant"], model = MODEL)
     # Checks the format for multiple choices and correct logit_bias mapping
     @test choices_prompt == "1. \"animal\"\n2. \"plant\""
     @test logit_bias == Dict(16 => 100, 17 => 100)
@@ -476,8 +780,8 @@ end
         [
             ("A", "any animal or creature"),
             ("P", "for any plant or tree"),
-            ("O", "for everything else"),
-        ])
+            ("O", "for everything else")
+        ], model = MODEL)
     expected_prompt = "1. \"A\" for any animal or creature\n2. \"P\" for for any plant or tree\n3. \"O\" for for everything else"
     expected_logit_bias = Dict(16 => 100, 17 => 100, 18 => 100)
     @test choices_prompt == expected_prompt
@@ -487,8 +791,8 @@ end
     choices_prompt, logit_bias, ids = encode_choices(OpenAISchema(),
         [
             ("true", "If the statement is true"),
-            ("false", "If the statement is false"),
-        ])
+            ("false", "If the statement is false")
+        ], model = MODEL)
     expected_prompt = "true for \"If the statement is true\"\nfalse for \"If the statement is false\""
     expected_logit_bias = Dict(837 => 100, 905 => 100)
     @test choices_prompt == expected_prompt
@@ -496,58 +800,71 @@ end
     @test ids == ["true", "false"]
 
     # Test encoding with an invalid number of choices
-    @test_throws ArgumentError encode_choices(OpenAISchema(), string.(collect(1:100)))
-    @test_throws ArgumentError encode_choices(OpenAISchema(), [("$i", "$i") for i in 1:50])
+    @test_throws ArgumentError encode_choices(
+        OpenAISchema(), string.(collect(1:100)), model = MODEL)
+    @test_throws ArgumentError encode_choices(
+        OpenAISchema(), [("$i", "$i") for i in 1:50], model = MODEL)
+
+    @test_throws ArgumentError encode_choices(
+        PT.OllamaSchema(), ["true", "false"], model = MODEL)
 
-    @test_throws ArgumentError encode_choices(PT.OllamaSchema(), ["true", "false"])
+    ## Test a few token IDs for GPT4o models
+    choices_prompt, logit_bias, ids = encode_choices(OpenAISchema(),
+        ["A", "B", "C"], model = "gpt-4o-2024-07-18")
+    @test choices_prompt == "1. \"A\"\n2. \"B\"\n3. \"C\""
+    @test logit_bias == Dict(16 => 100, 17 => 100, 18 => 100)
+    @test ids == ["A", "B", "C"]
 end
 
 @testset "decode_choices" begin
+    MODEL = "gpt-4-turbo"
     # Test decoding a choice based on its ID
     msg = AIMessage("1")
-    decoded_msg = decode_choices(OpenAISchema(), ["true", "false"], msg)
+    decoded_msg = decode_choices(OpenAISchema(), ["true", "false"], msg, model = MODEL)
     @test decoded_msg.content == "true"
 
     # Test decoding with a direct mapping (e.g., true/false)
     msg = AIMessage("false")
-    decoded_msg = decode_choices(OpenAISchema(), ["true", "false"], msg)
+    decoded_msg = decode_choices(OpenAISchema(), ["true", "false"], msg, model = MODEL)
     @test decoded_msg.content == "false"
 
     # Test decoding failure (invalid content)
     msg = AIMessage("invalid")
-    decoded_msg = decode_choices(OpenAISchema(), ["true", "false"], msg)
+    decoded_msg = decode_choices(OpenAISchema(), ["true", "false"], msg, model = MODEL)
     @test isnothing(decoded_msg.content)
 
     # Decode from conversation
     conv = [AIMessage("1")]
-    decoded_conv = decode_choices(OpenAISchema(), ["true", "false"], conv)
+    decoded_conv = decode_choices(OpenAISchema(), ["true", "false"], conv, model = MODEL)
     @test decoded_conv[end].content == "true"
 
     # Decode with multiple samples
     conv = [
         AIMessage("1"), # do not touch, different run
         AIMessage(; content = "1", run_id = 1, sample_id = 1),
-        AIMessage(; content = "1", run_id = 1, sample_id = 2),
+        AIMessage(; content = "1", run_id = 1, sample_id = 2)
     ]
-    decoded_conv = decode_choices(OpenAISchema(), ["true", "false"], conv)
+    decoded_conv = decode_choices(OpenAISchema(), ["true", "false"], conv, model = MODEL)
     @test decoded_conv[1].content == "1"
     @test decoded_conv[2].content == "true"
     @test decoded_conv[3].content == "true"
 
     # Nothing (when dry_run=true)
-    @test isnothing(decode_choices(OpenAISchema(), ["true", "false"], nothing))
+    @test isnothing(decode_choices(
+        OpenAISchema(), ["true", "false"], nothing, model = MODEL))
 
     # unimplemented
     @test_throws ArgumentError decode_choices(PT.OllamaSchema(),
         ["true", "false"],
-        AIMessage("invalid"))
+        AIMessage("invalid"), model = MODEL)
 end
 
 @testset "aiclassify-OpenAI" begin
     # corresponds to OpenAI API v1
-    response = Dict(:choices => [
+    response = Dict(
+        :choices => [
             Dict(:message => Dict(:content => "1"),
-                :finish_reason => "stop"),
+            :finish_reason => "stop")
         ],
         :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
 
@@ -556,7 +873,7 @@ end
     choices = [
         ("A", "any animal or creature"),
         ("P", "for any plant or tree"),
-        ("O", "for everything else"),
+        ("O", "for everything else")
     ]
     msg = aiclassify(schema1, :InputClassifier; input = "pelican", choices)
     expected_output = AIMessage(;
@@ -565,12 +882,27 @@ end
         tokens = (2, 1),
         finish_reason = "stop",
         cost = msg.cost,
-        elapsed = msg.elapsed)
+        elapsed = msg.elapsed,
+        extras = Dict{Symbol, Any}())
     @test msg == expected_output
     @test schema1.inputs ==
-          Dict{String, Any}[Dict("role" => "system",
+          Dict{String, Any}[
+        Dict("role" => "system",
             "content" => "You are a world-class classification specialist. \n\nYour task is to select the most appropriate label from the given choices for the given user input.\n\n**Available Choices:**\n---\n1. \"A\" for any animal or creature\n2. \"P\" for for any plant or tree\n3. \"O\" for for everything else\n---\n\n**Instructions:**\n- You must respond in one word. \n- You must respond only with the label ID (e.g., \"1\", \"2\", ...) that best fits the input.\n"),
         Dict("role" => "user", "content" => "User Input: pelican\n\nLabel:\n")]
+
+    # Return the full conversation
+    conv = aiclassify(
+        schema1, :InputClassifier; input = "pelican", choices, return_all = true)
+    expected_output = AIMessage(;
+        content = "A",
+        status = 200,
+        tokens = (2, 1),
+        finish_reason = "stop",
+        cost = conv[end].cost,
+        elapsed = conv[end].elapsed,
+        extras = Dict{Symbol, Any}())
+    @test conv[end] == expected_output
 end
 
 @testset "aiextract-OpenAI" begin
@@ -580,9 +912,12 @@ end
     end
     return_type = RandomType1235
 
-    mock_choice = Dict(:message => Dict(:content => "Hello!",
+    mock_choice = Dict(
+        :message => Dict(:content => "Hello!",
             :tool_calls => [
-                Dict(:function => Dict(:arguments => JSON3.write(Dict(:x => 1)))),
+                Dict(:function => Dict(
+                :arguments => JSON3.write(Dict(:x => 1)),
+                :name => "RandomType1235"))
             ]),
         :logprobs => Dict(:content => [Dict(:logprob => -0.5), Dict(:logprob => -0.4)]),
         :finish_reason => "stop")
@@ -596,10 +931,20 @@ end
     @test msg.content == RandomType1235(1)
     @test msg.log_prob ≈ -0.9
 
+    ## Test with field descriptions
+    fields = [:x => Int, :x__description => "Field 1 description"]
+    msg = aiextract(schema1, "Extract number 1"; return_type = fields,
+        model = "gpt4",
+        api_kwargs = (; temperature = 0, n = 2))
+    @test msg.content == Dict("x" => 1)
+
     ## Test multiple samples -- mock_choice is less probable
-    mock_choice2 = Dict(:message => Dict(:content => "Hello!",
+    mock_choice2 = Dict(
+        :message => Dict(:content => "Hello!",
             :tool_calls => [
-                Dict(:function => Dict(:arguments => JSON3.write(Dict(:x => 1)))),
+                Dict(:function => Dict(
+                :arguments => JSON3.write(Dict(:x => 1)),
+                :name => "RandomType1235"))
             ]),
         :logprobs => Dict(:content => [Dict(:logprob => -1.2), Dict(:logprob => -0.4)]),
         :finish_reason => "stop")
@@ -626,17 +971,239 @@ end
         api_kwargs = (; temperature = 0, n = 2))
     conv[1].content isa AbstractDict
     conv[2].content isa AbstractDict
+
+    ### JSON mode testing
+    # Prepare mock response for JSON mode
+    json_response = Dict(
+        :choices => [
+            Dict(
+            :message => Dict(
+                :content => JSON3.write(Dict(:age => 30, :height => 180, :weight => 80.0))
+            ),
+            :finish_reason => "stop"
+        )
+        ],
+        :usage => Dict(:total_tokens => 20, :prompt_tokens => 15, :completion_tokens => 5)
+    )
+    schema_json = TestEchoOpenAISchema(; response = json_response, status = 200)
+
+    # Define test struct
+    struct TestMeasurement
+        age::Int
+        height::Union{Int, Nothing}
+        weight::Union{Float64, Nothing}
+    end
+
+    # Test with JSON mode enabled
+    msg_json = aiextract(schema_json, "James is 30, weighs 80kg. He's 180cm tall.";
+        return_type = TestMeasurement,
+        json_mode = true,
+        model = "gpt4",
+        api_kwargs = (; temperature = 0))
+
+    @test msg_json.content isa TestMeasurement
+    @test msg_json.content.age == 30
+    @test msg_json.content.height == 180
+    @test msg_json.content.weight == 80.0
+    @test msg_json.tokens == (15, 5)
+
+    # Test with field descriptions
+    fields_with_desc = [
+        :age => Int,
+        :age__description => "Person's age in years",
+        :height => Union{Int, Nothing},
+        :height__description => "Person's height in centimeters",
+        :weight => Union{Float64, Nothing},
+        :weight__description => "Person's weight in kilograms"
+    ]
+
+    msg_json_fields = aiextract(schema_json, "James is 30, weighs 80kg. He's 180cm tall.";
+        return_type = fields_with_desc,
+        json_mode = true,
+        model = "gpt4",
+        api_kwargs = (; temperature = 0))
+
+    @test isextracted(msg_json_fields.content)
+    @test msg_json_fields.content.age == 30
+    @test msg_json_fields.content.height == 180
+    @test msg_json_fields.content.weight == 80.0
+
+    # Test with partial information
+    partial_response = Dict(
+        :choices => [
+            Dict(
+            :message => Dict(
+                :content => JSON3.write(Dict(
+                :age => 25, :height => nothing, :weight => nothing))
+            ),
+            :finish_reason => "stop"
+        )
+        ],
+        :usage => Dict(:total_tokens => 18, :prompt_tokens => 15, :completion_tokens => 3)
+    )
+    schema_partial = TestEchoOpenAISchema(; response = partial_response, status = 200)
+
+    msg_partial = aiextract(schema_partial, "Sarah is 25 years old.";
+        return_type = TestMeasurement,
+        json_mode = true,
+        model = "gpt4",
+        api_kwargs = (; temperature = 0))
+
+    @test msg_partial.content isa TestMeasurement
+    @test msg_partial.content.age == 25
+    @test msg_partial.content.height === nothing
+    @test msg_partial.content.weight === nothing
+    @test msg_partial.tokens == (15, 3)
+end
+
+@testset "aitools-OpenAI" begin
+    # Define a test tool
+    struct WeatherTool
+        location::String
+        date::String
+    end
+
+    # Mock response for a single tool call
+    single_tool_response = Dict(
+        :id => "123",
+        :choices => [
+            Dict(
+            :message => Dict(:content => "",
+                :tool_calls => [
+                    Dict(:id => "123",
+                    :function => Dict(
+                        :name => "get_weather",
+                        :arguments => JSON3.write(Dict(
+                            :location => "New York", :date => "2023-05-01"))
+                    ))
+                ]),
+            :finish_reason => "tool_calls")
+        ],
+        :usage => Dict(:total_tokens => 20, :prompt_tokens => 15, :completion_tokens => 5)
+    )
+
+    schema_single = TestEchoOpenAISchema(; response = single_tool_response, status = 200)
+
+    msg_single = aitools(schema_single, "What's the weather in New York on May 1st, 2023?";
+        tools = [Tool(; name = "get_weather", callable = WeatherTool)],
+        model = "gpt4",
+        api_kwargs = (; temperature = 0))
+
+    @test isaitoolrequest(msg_single)
+    @test msg_single.tool_calls[1].tool_call_id == "123"
+    @test msg_single.tool_calls[1].name == "get_weather"
+    @test msg_single.tool_calls[1].args[:location] == "New York"
+    @test msg_single.tool_calls[1].args[:date] == "2023-05-01"
+    @test msg_single.tokens == (15, 5)
+
+    # Mock response for multiple tool calls
+    multi_tool_response = Dict(
+        :choices => [
+            Dict(
+            :message => Dict(:content => "",
+                :tool_calls => [
+                    Dict(:id => "123",
+                        :function => Dict(
+                            :name => "get_weatherUS",
+                            :arguments => JSON3.write(Dict(
+                                :location => "New York", :date => "2023-05-01"))
+                        )),
+                    Dict(:id => "456",
+                        :function => Dict(
+                            :name => "get_weatherUK",
+                            :arguments => JSON3.write(Dict(
+                                :location => "London", :date => "2023-05-02"))
+                        ))
+                ]),
+            :finish_reason => "tool_calls")
+        ],
+        :usage => Dict(:total_tokens => 30, :prompt_tokens => 20, :completion_tokens => 10)
+    )
+
+    schema_multi = TestEchoOpenAISchema(; response = multi_tool_response, status = 200)
+
+    msg_multi = aitools(
+        schema_multi, "Compare the weather in New York on May 1st and London on May 2nd, 2023.";
+        tools = [Tool(; name = "get_weatherUS", callable = WeatherTool),
+            Tool(; name = "get_weatherUK", callable = WeatherTool)],
+        model = "gpt4",
+        api_kwargs = (; temperature = 0))
+
+    @test isaitoolrequest(msg_multi)
+    @test length(msg_multi.tool_calls) == 2
+    @test msg_multi.tool_calls[1].tool_call_id == "123"
+    @test msg_multi.tool_calls[1].name == "get_weatherUS"
+    @test msg_multi.tool_calls[1].args[:location] == "New York"
+    @test msg_multi.tool_calls[1].args[:date] == "2023-05-01"
+    @test msg_multi.tool_calls[2].tool_call_id == "456"
+    @test msg_multi.tool_calls[2].name == "get_weatherUK"
+    @test msg_multi.tool_calls[2].args[:location] == "London"
+    @test msg_multi.tool_calls[2].args[:date] == "2023-05-02"
+    @test msg_multi.tokens == (20, 10)
+
+    # Test with JSON mode
+    json_mode_response = Dict(
+        :choices => [
+            Dict(
+            :id => "123",
+            :message => Dict(
+                :content => Dict(:location => "Tokyo", :date => "2023-05-03")
+            ),
+            :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 25, :prompt_tokens => 18, :completion_tokens => 7)
+    )
+
+    schema_json = TestEchoOpenAISchema(; response = json_mode_response, status = 200)
+
+    msg_json = aitools(schema_json, "What's the weather in Tokyo on May 3rd, 2023?";
+        tools = [Tool(; name = "get_weather", callable = WeatherTool)],
+        model = "gpt4",
+        json_mode = true,
+        api_kwargs = (; temperature = 0))
+
+    @test msg_json.tool_calls[1].tool_call_id == "call_$(msg_json.run_id)"
+    @test msg_json.tool_calls[1].name == "get_weather"
+    @test msg_json.tool_calls[1].args[:location] == "Tokyo"
+    @test msg_json.tool_calls[1].args[:date] == "2023-05-03"
+    @test msg_json.tokens == (18, 7)
+
+    # Test with dry_run
+    msg_dry_run = aitools(schema_single, "What's the weather in Paris tomorrow?";
+        tools = [Tool(; name = "get_weather", callable = WeatherTool)],
+        model = "gpt4",
+        dry_run = true)
+
+    @test msg_dry_run === nothing
+
+    # Test with return_all
+    msg_return_all = aitools(
+        schema_single, "What's the weather in New York on May 1st, 2023?";
+        tools = [Tool(; name = "get_weather", callable = WeatherTool)],
+        model = "gpt4",
+        return_all = true,
+        api_kwargs = (; temperature = 0))
+
+    @test msg_return_all isa Vector
+    @test length(msg_return_all) == 3
+    @test msg_return_all[1] isa SystemMessage
+    @test msg_return_all[2] isa UserMessage
+    @test isaitoolrequest(msg_return_all[3])
+    @test msg_return_all[end].tool_calls[1].name == "get_weather"
+    @test msg_return_all[end].tool_calls[1].args[:location] == "New York"
+    @test msg_return_all[end].tool_calls[1].args[:date] == "2023-05-01"
 end
 
 @testset "aiscan-OpenAI" begin
     ## Test with single sample and log_probs samples
-    response = Dict(:choices => [
+    response = Dict(
+        :choices => [
             Dict(:message => Dict(:content => "Hello1!"),
-                :finish_reason => "stop",
-                :logprobs => Dict(:content => [
-                    Dict(:logprob => -0.1),
-                    Dict(:logprob => -0.2),
-                ])),
+            :finish_reason => "stop",
+            :logprobs => Dict(:content => [
+                Dict(:logprob => -0.1),
+                Dict(:logprob => -0.2)
+            ]))
         ],
         :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
     schema1 = TestEchoOpenAISchema(; response, status = 200)
@@ -648,11 +1215,12 @@ end
     @test msg.log_prob ≈ -0.3
 
     ## Test multiple samples
-    response = Dict(:choices => [
+    response = Dict(
+        :choices => [
             Dict(:message => Dict(:content => "Hello1!"),
                 :finish_reason => "stop"),
             Dict(:message => Dict(:content => "Hello2!"),
-                :finish_reason => "stop"),
+                :finish_reason => "stop")
         ],
         :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
     schema1 = TestEchoOpenAISchema(; response, status = 200)
@@ -663,3 +1231,44 @@ end
     @test conv[end - 1].content == "Hello1!"
     @test conv[end].content == "Hello2!"
 end
+
+@testset "aiimage-OpenAI" begin
+    # corresponds to OpenAI API v1 for create_images
+    payload = Dict(:url => "xyz/url", :revised_prompt => "New prompt")
+    response1 = Dict(:data => [payload])
+    schema1 = TestEchoOpenAISchema(; response = response1, status = 200)
+
+    msg = aiimage(schema1, "Hello World")
+    expected_output = DataMessage(;
+        content = payload,
+        status = 200,
+        tokens = (0, 0),
+        cost = msg.cost,
+        elapsed = msg.elapsed)
+    @test msg == expected_output
+    @test schema1.inputs == "Hello World"
+    @test schema1.model_id == MODEL_IMAGE_GENERATION
+
+    # Test different inputs
+    msg = aiimage(schema1, :AssistantAsk; model = "banana")
+    expected_output = DataMessage(;
+        content = payload,
+        status = 200,
+        tokens = (0, 0),
+        cost = msg.cost,
+        elapsed = msg.elapsed)
+    @test msg == expected_output
+    @test schema1.inputs == "# Question\n\n{{ask}}" # Grabs only the content of last message
+    @test schema1.model_id == "banana"
+
+    conv = aiimage(OpenAISchema(), :AssistantAsk; dry_run = true, return_all = true)
+    template = PT.render(OpenAISchema(), AITemplate(:AssistantAsk)) |>
+               x -> PT.render(OpenAISchema(), x)
+    @test conv == template
+
+    # Invalid inputs
+    @test_throws AssertionError aiimage(OpenAISchema(), "my input"; image_size = "wrong")
+    @test_throws AssertionError aiimage(OpenAISchema(), "my input"; image_n = 2)
+    @test_throws AssertionError aiimage(
+        OpenAISchema(), "my input"; conversation = [PT.UserMessage("hi")])
+end
diff --git a/test/llm_shared.jl b/test/llm_shared.jl
index 9c9c2b3a9..1688341de 100644
--- a/test/llm_shared.jl
+++ b/test/llm_shared.jl
@@ -1,21 +1,32 @@
-using PromptingTools: render, NoSchema
+using PromptingTools: render, NoSchema, AbstractPromptSchema, OpenAISchema
 using PromptingTools: AIMessage, SystemMessage, AbstractMessage, AbstractChatMessage
-using PromptingTools: UserMessage, UserMessageWithImages
-using PromptingTools: finalize_outputs
+using PromptingTools: UserMessage, UserMessageWithImages, DataMessage, AIToolRequest,
+                      ToolMessage, ToolRef
+using PromptingTools: finalize_outputs, role4render
 
 @testset "render-NoSchema" begin
     schema = NoSchema()
+
+    @test role4render(schema, SystemMessage("System message 1")) == "system"
+    @test role4render(schema, UserMessage("User message 1")) == "user"
+    @test role4render(schema, UserMessageWithImages("User message 1"; image_url = "")) ==
+          "user"
+    @test role4render(schema, AIMessage("AI message 1")) == "assistant"
+    @test role4render(schema, AIToolRequest()) == "assistant"
+    @test role4render(schema, ToolMessage(; tool_call_id = "x", raw = "")) == "tool"
+    @test_throws ArgumentError role4render(schema, DataMessage(; content = ones(3, 3)))
+
     # Given a schema and a vector of messages with handlebar variables, it should replace the variables with the correct values in the conversation dictionary.
     messages = [
         SystemMessage("Act as a helpful AI assistant"),
-        UserMessage("Hello, my name is {{name}}"),
+        UserMessage("Hello, my name is {{name}}")
     ]
     expected_output = [
         SystemMessage("Act as a helpful AI assistant"),
         UserMessage(;
             content = "Hello, my name is John",
             variables = [:name],
-            _type = :usermessage),
+            _type = :usermessage)
     ]
     conversation = render(schema,
         messages;
@@ -26,11 +37,11 @@ using PromptingTools: finalize_outputs
     # AI message does NOT replace variables
     messages = [
         SystemMessage("Act as a helpful AI assistant"),
-        AIMessage("Hello, my name is {{name}}"),
+        AIMessage("Hello, my name is {{name}}")
     ]
     expected_output = [
         SystemMessage("Act as a helpful AI assistant"),
-        AIMessage("Hello, my name is {{name}}"),
+        AIMessage("Hello, my name is {{name}}")
     ]
     conversation = render(schema, messages; name = "John")
     # AIMessage does not replace handlebar variables
@@ -38,12 +49,12 @@ using PromptingTools: finalize_outputs
 
     # Given a schema and a vector of messages with no system messages, it should add a default system prompt to the conversation dictionary.
     messages = [
-        UserMessage("User message"),
+        UserMessage("User message")
     ]
     conversation = render(schema, messages)
     expected_output = [
         SystemMessage("Act as a helpful AI assistant"),
-        UserMessage("User message"),
+        UserMessage("User message")
     ]
     @test conversation == expected_output
 
@@ -51,18 +62,18 @@ using PromptingTools: finalize_outputs
     conversation = [
         SystemMessage("System message 1"),
         UserMessage("Hello"),
-        AIMessage("Hi there"),
+        AIMessage("Hi there")
     ]
     messages = [
         UserMessage("How are you?"),
-        AIMessage("I'm doing well, thank you!"),
+        AIMessage("I'm doing well, thank you!")
     ]
     expected_output = [
         SystemMessage("System message 1"),
         UserMessage("Hello"),
         AIMessage("Hi there"),
         UserMessage("How are you?"),
-        AIMessage("I'm doing well, thank you!"),
+        AIMessage("I'm doing well, thank you!")
     ]
     conversation = render(schema, messages; conversation)
     @test conversation == expected_output
@@ -71,18 +82,18 @@ using PromptingTools: finalize_outputs
     conversation = [
         SystemMessage("System message 1"),
         UserMessage("Hello {{name}}"),
-        AIMessage("Hi there"),
+        AIMessage("Hi there")
     ]
     messages = [
         UserMessage("How are you, {{name}}?"),
-        AIMessage("I'm doing well, thank you!"),
+        AIMessage("I'm doing well, thank you!")
     ]
     expected_output = [
         SystemMessage("System message 1"),
         UserMessage("Hello {{name}}"),
         AIMessage("Hi there"),
-        UserMessage("How are you, John?", [:name], :usermessage),
-        AIMessage("I'm doing well, thank you!"),
+        UserMessage("How are you, John?", [:name], nothing, :usermessage),
+        AIMessage("I'm doing well, thank you!")
     ]
     conversation = render(schema, messages; conversation, name = "John")
     @test conversation == expected_output
@@ -91,12 +102,12 @@ using PromptingTools: finalize_outputs
     messages = [
         UserMessage("Hello"),
         AIMessage("Hi there"),
-        SystemMessage("This is a system message"),
+        SystemMessage("This is a system message")
     ]
     expected_output = [
         SystemMessage("This is a system message"),
         UserMessage("Hello"),
-        AIMessage("Hi there"),
+        AIMessage("Hi there")
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
@@ -104,7 +115,7 @@ using PromptingTools: finalize_outputs
     # Given an empty vector of messages, it should return an empty conversation dictionary just with the system prompt
     messages = AbstractMessage[]
     expected_output = [
-        SystemMessage("Act as a helpful AI assistant"),
+        SystemMessage("Act as a helpful AI assistant")
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
@@ -112,11 +123,11 @@ using PromptingTools: finalize_outputs
     # Given a schema and a vector of messages with a system message containing handlebar variables not present in kwargs, it should replace the variables with empty strings in the conversation dictionary.
     messages = [
         SystemMessage("Hello, {{name}}!"),
-        UserMessage("How are you?"),
+        UserMessage("How are you?")
     ]
     expected_output = [
         SystemMessage("Hello, !", [:name], :systemmessage),
-        UserMessage("How are you?"),
+        UserMessage("How are you?")
     ]
     conversation = render(schema, messages)
     # Broken because we do not remove any unused handlebar variables
@@ -127,12 +138,12 @@ using PromptingTools: finalize_outputs
         SystemMessage("Act as a helpful AI assistant"),
         UserMessage("Hello"),
         DataMessage(; content = ones(3, 3)),
-        AIMessage("Hi there"),
+        AIMessage("Hi there")
     ]
     expected_output = [
         SystemMessage("Act as a helpful AI assistant"),
         UserMessage("Hello"),
-        AIMessage("Hi there"),
+        AIMessage("Hi there")
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
@@ -141,7 +152,7 @@ using PromptingTools: finalize_outputs
     messages = [
         SystemMessage("System message 1"),
         SystemMessage("System message 2"),
-        UserMessage("User message"),
+        UserMessage("User message")
     ]
     @test_throws ArgumentError render(schema, messages)
 
@@ -149,7 +160,7 @@ using PromptingTools: finalize_outputs
     messages = [
         SystemMessage("System message 1"),
         SystemMessage("System message 2"),
-        UserMessage("User message"),
+        UserMessage("User message")
     ]
     # conversation = render(schema, messages)
     # expected_output = [
@@ -163,23 +174,64 @@ using PromptingTools: finalize_outputs
     # Test UserMessageWithImages
     messages = [
         SystemMessage("System message 1"),
-        UserMessageWithImages("User message"; image_url = "https://example.com/image.png"),
+        UserMessageWithImages("User message"; image_url = "https://example.com/image.png")
     ]
     expected_output = [
         SystemMessage("System message 1"),
-        UserMessageWithImages("User message"; image_url = "https://example.com/image.png"),
+        UserMessageWithImages("User message"; image_url = "https://example.com/image.png")
     ]
     conversation = render(schema, messages)
     @test conversation == expected_output
+
+    # Test no_system_message
+    messages = [
+        SystemMessage("System message 1"),
+        UserMessage("User message")
+    ]
+    expected_output = [
+        UserMessage("System message 1"),
+        UserMessage("User message")
+    ]
+    conversation = render(schema, messages; no_system_message = true)
+    @test conversation[1] isa UserMessage
+    @test conversation[2] isa UserMessage
+    @test conversation[1].content == "System message 1"
+    @test conversation[2].content == "User message"
+
+    ## No default message 
+    messages = [
+        UserMessage("User message")
+    ]
+    expected_output = [
+        UserMessage("User message")
+    ]
+    conversation = render(schema, messages; no_system_message = true)
+    @test conversation[1] isa UserMessage
+    @test conversation[1].content == "User message"
+
+    struct WeirdSchema <: AbstractPromptSchema end
+    @test_throws ArgumentError render(WeirdSchema(),
+        [Tool(; name = "f", description = "f", callable = () -> nothing)])
+
+    ## different ways to enter tools for rendering
+    opt1 = render(OpenAISchema(),
+        [Tool(; name = "f", description = "f", callable = () -> nothing)])
+    opt2 = render(OpenAISchema(),
+        Dict("f" => Tool(; name = "f", description = "f", callable = () -> nothing)))
+    @test opt1 == opt2
+
+    ## ToolRef
+    schema = NoSchema()
+    tool = ToolRef(; ref = :computer)
+    @test_throws ArgumentError render(schema, tool)
 end
 
-# Write 5 unit tests for finalize_outputs for various combinations of inputs. Use @test calls
 @testset "finalize_outputs" begin
     # Given a vector of messages and a single message, it should return the last message.
     messages = [
         SystemMessage("System message 1"),
         UserMessage("User message"),
-        AIMessage("AI message"),
+        AIMessage("AI message")
     ]
     msg = AIMessage("AI message 2")
     expected_output = msg
@@ -190,14 +242,14 @@ end
     messages = [
         SystemMessage("System message 1"),
         UserMessage("User message"),
-        AIMessage("AI message"),
+        AIMessage("AI message")
     ]
     msg = AIMessage("AI message 2")
     expected_output = [
         SystemMessage("System message 1"),
         UserMessage("User message"),
         AIMessage("AI message"),
-        msg,
+        msg
     ]
     output = finalize_outputs(messages, [], msg; return_all = true)
     @test output == expected_output
@@ -206,10 +258,10 @@ end
     conversation = [
         SystemMessage("System message 1"),
         UserMessage("User message"),
-        AIMessage("AI message"),
+        AIMessage("AI message")
     ]
     messages = [
-        AIMessage("AI message 2"),
+        AIMessage("AI message 2")
     ]
     msg = AIMessage("AI message 3")
     expected_output = [
@@ -217,7 +269,7 @@ end
         UserMessage("User message"),
         AIMessage("AI message"),
         AIMessage("AI message 2"),
-        msg,
+        msg
     ]
     output = finalize_outputs(messages, [], msg; conversation, return_all = true)
     @test output == expected_output
@@ -226,10 +278,10 @@ end
     conversation = [
         SystemMessage("System message 1"),
         UserMessage("User message"),
-        AIMessage("AI message"),
+        AIMessage("AI message")
     ]
     messages = [
-        AIMessage("AI message 2"),
+        AIMessage("AI message 2")
     ]
     msg = AIMessage("AI message 3")
 
@@ -245,20 +297,20 @@ end
     conversation = [
         SystemMessage("System message 1"),
         UserMessage("User message {{name}}"),
-        AIMessage("AI message"),
+        AIMessage("AI message")
     ]
     messages = [
         UserMessage("User message {{name}}"),
-        AIMessage("AI message 2"),
+        AIMessage("AI message 2")
     ]
     msg = AIMessage("AI message 3")
     expected_output = [
         SystemMessage("System message 1"),
         UserMessage("User message {{name}}"),
         AIMessage("AI message"),
-        UserMessage("User message John", [:name], :usermessage),
+        UserMessage("User message John", [:name], nothing, :usermessage),
         AIMessage("AI message 2"),
-        msg,
+        msg
     ]
     output = finalize_outputs(messages,
         [],
@@ -272,21 +324,21 @@ end
     conversation = [
         SystemMessage("System message 1"),
         UserMessage("User message {{name}}"),
-        AIMessage("AI message"),
+        AIMessage("AI message")
     ]
     messages = [
         UserMessage("User message {{name}}"),
-        AIMessage("AI message 2"),
+        AIMessage("AI message 2")
     ]
     msg = AIMessage("AI message 3")
     expected_output = [
         SystemMessage("System message 1"),
         UserMessage("User message {{name}}"),
         AIMessage("AI message"),
-        UserMessage("User message John", [:name], :usermessage),
+        UserMessage("User message John", [:name], nothing, :usermessage),
         AIMessage("AI message 2"),
         msg,
-        msg,
+        msg
     ]
     output = finalize_outputs(messages,
         [],
diff --git a/test/llm_sharegpt.jl b/test/llm_sharegpt.jl
new file mode 100644
index 000000000..2b524ef80
--- /dev/null
+++ b/test/llm_sharegpt.jl
@@ -0,0 +1,48 @@
+using PromptingTools: role4render, render, ShareGPTSchema
+using PromptingTools: AIMessage, SystemMessage, AbstractMessage
+using PromptingTools: UserMessage, UserMessageWithImages, DataMessage
+
+@testset "render-ShareGPT" begin
+    schema = ShareGPTSchema()
+
+    role4render(schema, SystemMessage("System message 1")) == "system"
+    role4render(schema, UserMessage("User message 1")) == "human"
+    role4render(schema, AIMessage("AI message 1")) == "gpt"
+
+    # Ignores any handlebar replacement, takes conversations as is
+    messages = [
+        SystemMessage("Act as a helpful AI assistant"),
+        UserMessage("Hello, my name is {{name}}"),
+        AIMessage("Hello, my name is {{name}}")
+    ]
+    expected_output = Dict("conversations" => [
+        Dict("value" => "Act as a helpful AI assistant", "from" => "system"),
+        Dict("value" => "Hello, my name is {{name}}", "from" => "human"),
+        Dict("value" => "Hello, my name is {{name}}", "from" => "gpt")])
+    conversation = render(schema, messages)
+    @test conversation == expected_output
+
+    # IT DOES NOT support any advanced message types (UserMessageWithImages, DataMessage)
+    messages = [
+        UserMessage("Hello"),
+        DataMessage(; content = ones(3, 3))
+    ]
+
+    @test_throws ArgumentError render(schema, messages)
+
+    messages = [
+        SystemMessage("System message 1"),
+        UserMessageWithImages("User message"; image_url = "https://example.com/image.png")
+    ]
+    @test_throws ArgumentError render(schema, messages)
+end
+
+@testset "not implemented ai* functions" begin
+    @test_throws ErrorException aigenerate(ShareGPTSchema(), "prompt")
+    @test_throws ErrorException aiembed(ShareGPTSchema(), "prompt")
+    @test_throws ErrorException aiextract(ShareGPTSchema(), "prompt")
+    @test_throws ErrorException aitools(ShareGPTSchema(), "prompt")
+    @test_throws ErrorException aiclassify(ShareGPTSchema(), "prompt")
+    @test_throws ErrorException aiscan(ShareGPTSchema(), "prompt")
+    @test_throws ErrorException aiimage(ShareGPTSchema(), "prompt")
+end
diff --git a/test/llm_tracer.jl b/test/llm_tracer.jl
new file mode 100644
index 000000000..dd20e4eb4
--- /dev/null
+++ b/test/llm_tracer.jl
@@ -0,0 +1,358 @@
+using PromptingTools: TestEchoOpenAISchema, render, OpenAISchema, TracerSchema, SaverSchema
+using PromptingTools: AIMessage, SystemMessage, AbstractMessage
+using PromptingTools: UserMessage, UserMessageWithImages, DataMessage, TracerMessage,
+                      AIToolRequest, ToolMessage
+using PromptingTools: CustomProvider,
+                      CustomOpenAISchema, MistralOpenAISchema, MODEL_EMBEDDING,
+                      MODEL_IMAGE_GENERATION
+using PromptingTools: initialize_tracer, finalize_tracer, isaimessage, istracermessage,
+                      unwrap, meta, AITemplate, render, role4render
+
+@testset "role4render-Tracer" begin
+    schema = TracerSchema(OpenAISchema())
+
+    # unwrapping schema
+    @test role4render(schema, SystemMessage("System message 1")) == "system"
+    @test role4render(schema, UserMessage("User message 1")) == "user"
+    @test role4render(schema, UserMessageWithImages("User message 1"; image_url = "")) ==
+          "user"
+    @test role4render(schema, AIMessage("AI message 1")) == "assistant"
+
+    # unwrapping TracerMessage
+    @test role4render(OpenAISchema(), TracerMessage(SystemMessage("Abc123"))) == "system"
+    @test role4render(OpenAISchema(), TracerMessage(UserMessage("Abc123"))) == "user"
+    @test role4render(
+        OpenAISchema(), TracerMessage(UserMessageWithImages("Abc123"; image_url = ""))) ==
+          "user"
+    @test role4render(OpenAISchema(), TracerMessage(AIMessage("Abc123"))) == "assistant"
+    @test role4render(OpenAISchema(), TracerMessage(AIToolRequest())) == "assistant"
+    @test role4render(OpenAISchema(),
+        TracerMessage(ToolMessage(; tool_call_id = "Fruit", raw = "", args = Dict()))) ==
+          "tool"
+end
+@testset "render-Tracer" begin
+    schema = TracerSchema(OpenAISchema())
+    # Given a schema and a vector of messages with handlebar variables, it should replace the variables with the correct values in the conversation dictionary.
+    messages = [
+        SystemMessage("Act as a helpful AI assistant"),
+        UserMessage("Hello, my name is {{name}}")
+    ]
+    conv = render(schema, messages)
+    @test conv == messages
+
+    conv = render(schema, AITemplate(:InputClassifier))
+    @test conv isa Vector
+
+    ## other schema
+    schema = SaverSchema(OpenAISchema())
+    conv = render(schema, messages)
+    @test conv == messages
+end
+
+@testset "initialize_tracer" begin
+    schema = TracerSchema(OpenAISchema())
+    time_before = now()
+
+    ## default initialization
+    tracer = initialize_tracer(schema; tracer_kwargs = (; a = 1))
+    @test tracer.time_sent >= time_before
+    @test tracer.model == ""
+    @test tracer.a == 1
+    @test isempty(tracer.meta)
+
+    ## custom model and tracer_kwargs
+    custom_model = "custom_model"
+    custom_tracer_kwargs = (parent_id = :parent, thread_id = :thread, run_id = 1)
+    tracer = initialize_tracer(
+        schema; model = custom_model, api_kwargs = (; temperature = 1.0),
+        tracer_kwargs = custom_tracer_kwargs, _tracer_template = AITemplate(:BlankSystemUser))
+    @test tracer.time_sent >= time_before
+    @test tracer.model == custom_model
+    @test tracer.parent_id == :parent
+    @test tracer.thread_id == :thread
+    @test tracer.run_id == 1
+    @test tracer.meta[:temperature] == 1.0
+    @test tracer.meta[:template_name] == :BlankSystemUser
+    @test tracer.meta[:template_version] == aitemplates(:BlankSystemUser)[1].version
+end
+
+@testset "finalize_tracer" begin
+    schema = TracerSchema(OpenAISchema())
+    tracer = initialize_tracer(schema; model = "test_model",
+        api_kwargs = (; temperature = 1.0),
+        tracer_kwargs = (parent_id = :parent, thread_id = :thread, run_id = 1))
+    time_before = now()
+
+    #  single non-tracer message
+    msg = SystemMessage("Test message")
+    finalized_msg = finalize_tracer(schema, tracer, msg)
+    @test finalized_msg isa TracerMessage
+    @test finalized_msg.object == msg
+    @test finalized_msg.model == "test_model"
+    @test finalized_msg.parent_id == :parent
+    @test finalized_msg.thread_id == :thread
+    @test finalized_msg.run_id == 1
+    @test finalized_msg.time_received >= time_before
+    @test finalized_msg.meta[:temperature] == 1.0
+    @test meta(finalized_msg)[:temperature] == 1.0
+
+    # vector of non-tracer messages
+    msgs = [SystemMessage("Test message 1"), SystemMessage("Test message 2")]
+    finalized_msgs = finalize_tracer(schema, tracer, msgs)
+    @test all(istracermessage, finalized_msgs)
+    @test length(finalized_msgs) == 2
+    @test finalized_msgs[1].object == msgs[1]
+    @test finalized_msgs[2].object == msgs[2]
+    @test all(finalized_msgs) do msg
+        msg.model == "test_model"
+    end
+    @test all(finalized_msgs) do msg
+        msg.time_received >= time_before
+    end
+
+    # mixed vector of tracer and non-tracer messages
+    tracer_msg = TracerMessage(;
+        object = SystemMessage("Already tracer"), tracer..., time_received = now())
+    msgs = [UserMessage("Test message"), tracer_msg]
+    finalized_msgs = finalize_tracer(schema, tracer, msgs)
+    @test all(istracermessage, finalized_msgs)
+    @test length(finalized_msgs) == 2
+    @test finalized_msgs[1] isa TracerMessage
+    @test finalized_msgs[2] === tracer_msg # should be the same object, not a new one
+    @test meta(finalized_msgs[2])[:temperature] == 1.0
+
+    ## other schema -- SaverSchema
+    schema = SaverSchema(OpenAISchema())
+    tracer = initialize_tracer(schema)
+    msgs = [SystemMessage("Test message 1"), SystemMessage("Test message 2")]
+    conv = finalize_tracer(schema, tracer, msgs)
+    fn = filter(
+        x -> occursin("conversation__$(hash(msgs[1].content))", x), readdir(
+            PT.LOG_DIR; join = true)) |>
+         first
+    @test isfile(fn)
+    @test PT.load_conversation(fn) == conv
+    ## clean up
+    isfile(fn) && rm(fn)
+
+    # Passthrough for non-messages (dry-runs)
+    schema = TracerSchema(OpenAISchema())
+    conv = finalize_tracer(schema, tracer, [1, 2, 3, 4, 5])
+    @test conv == [1, 2, 3, 4, 5]
+end
+
+@testset "aigenerate-Tracer" begin
+    # corresponds to OpenAI API v1
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "Hello!"),
+            :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
+
+    # Real generation API
+    schema1 = TestEchoOpenAISchema(; response, status = 200) |> TracerSchema
+    msg = aigenerate(
+        schema1, "Hello World"; model = "xyz",
+        tracer_kwargs = (; thread_id = :ABC1, meta = Dict(:meta_key => "meta_value")))
+    @test istracermessage(msg)
+    @test unwrap(msg) |> isaimessage
+    @test msg.content == "Hello!"
+    @test msg.model == "xyz"
+    @test msg.thread_id == :ABC1
+    @test msg.meta[:meta_key] == "meta_value"
+
+    msg = aigenerate(schema1, :BlankSystemUser; system = "abc", user = "xyz")
+    @test istracermessage(msg)
+    @test msg.meta[:template_name] == :BlankSystemUser
+    @test msg.meta[:template_version] == aitemplates(:BlankSystemUser)[1].version
+
+    ## other schema -- SaverSchema
+    schema2 = schema1 |> SaverSchema
+    msgs = [TracerMessage(SystemMessage("Test message 1")), UserMessage("Hello World")]
+    msg = aigenerate(
+        schema2, msgs; model = "xyz", tracer_kwargs = (; thread_id = :ABC1))
+    @test istracermessage(msg)
+    fn = filter(
+        x -> occursin("conversation__$(hash(msgs[1].content))", x), readdir(
+            PT.LOG_DIR; join = true)) |>
+         last
+    @test isfile(fn)
+    load_conv = PT.load_conversation(fn)
+    @test length(load_conv) == 3
+    loaded_msg = load_conv[end]
+    @test unwrap(loaded_msg) |> isaimessage
+    @test loaded_msg.content == "Hello!"
+    @test loaded_msg.model == "xyz"
+    @test loaded_msg.thread_id == :ABC1
+    ## clean up
+    isfile(fn) && rm(fn)
+
+    ## Use kwargs to define save path
+    file, _ = mktemp()
+    msgs = [SystemMessage("Test message 1"), UserMessage("Hello World")]
+    msg = aigenerate(
+        schema2, msgs; model = "xyz", tracer_kwargs = (;
+            thread_id = :ABC1, log_file_path = file))
+    @test istracermessage(msg)
+    @test isfile(file)
+    load_conv = PT.load_conversation(file)
+    @test length(load_conv) == 3
+    loaded_msg = load_conv[end]
+    @test unwrap(loaded_msg) |> isaimessage
+    @test loaded_msg.content == "Hello!"
+    isfile(fn) && rm(fn)
+end
+
+@testset "aiembed-Tracer" begin
+    # corresponds to OpenAI API v1
+    response1 = Dict(:data => [Dict(:embedding => ones(128))],
+        :usage => Dict(:total_tokens => 2, :prompt_tokens => 2, :completion_tokens => 0))
+
+    # Real generation API
+    schema1 = TestEchoOpenAISchema(; response = response1, status = 200) |> TracerSchema
+    msg = aiembed(schema1, "Hello World")
+    @test istracermessage(msg)
+    @test unwrap(msg) isa DataMessage
+end
+
+@testset "aiclassify-Tracer" begin
+    # corresponds to OpenAI API v1
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "1"),
+            :finish_reason => "stop")
+        ],
+        :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
+
+    # Real generation API
+    schema1 = TestEchoOpenAISchema(; response, status = 200) |> TracerSchema
+    choices = [
+        ("A", "any animal or creature"),
+        ("P", "for any plant or tree"),
+        ("O", "for everything else")
+    ]
+    msg = aiclassify(schema1, :InputClassifier; input = "pelican", choices)
+    @test istracermessage(msg)
+    @test unwrap(msg) isa AIMessage
+    @test msg.content == "A"
+end
+
+@testset "aiextract-OpenAI" begin
+    # mock return type
+    struct RandomType1235
+        x::Int
+    end
+    return_type = RandomType1235
+
+    mock_choice = Dict(
+        :message => Dict(:content => "Hello!",
+            :tool_calls => [
+                Dict(:function => Dict(
+                :arguments => JSON3.write(Dict(:x => 1)), :name => "RandomType1235"))
+            ]),
+        :logprobs => Dict(:content => [Dict(:logprob => -0.5), Dict(:logprob => -0.4)]),
+        :finish_reason => "stop")
+    ## Test with a single sample
+    response = Dict(:choices => [mock_choice],
+        :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
+    schema1 = TestEchoOpenAISchema(; response, status = 200) |> TracerSchema
+    msg = aiextract(schema1, "Extract number 1"; return_type,
+        model = "gpt4",
+        api_kwargs = (; temperature = 0, n = 2))
+    @test istracermessage(msg)
+    @test unwrap(msg) isa DataMessage
+    @test msg.content == RandomType1235(1)
+    @test msg.log_prob ≈ -0.9
+
+    msg = aiextract(schema1, :BlankSystemUser; return_type)
+    @test istracermessage(msg)
+end
+
+# TODO: add aitools tracer tests
+function calculator(x::Number, y::Number; operation::String = "add")
+    operation == "add" ?
+    x + y :
+    throw(ArgumentError("Unsupported operation"))
+end
+@testset "aitools-Tracer" begin
+
+    # Mock response for aitools
+    mock_choice = Dict(
+        :message => Dict(:content => "I'll use the calculator tool to add 2 and 3.",
+            :tool_calls => [
+                Dict(:id => "1",
+                :function => Dict(
+                    :name => "calculator",
+                    :arguments => JSON3.write(Dict(:x => 2, :y => 3, :operation => "add"))
+                ))
+            ]),
+        :logprobs => Dict(:content => [Dict(:logprob => -0.3), Dict(:logprob => -0.2)]),
+        :finish_reason => "stop")
+
+    response = Dict(:choices => [mock_choice],
+        :usage => Dict(:total_tokens => 10, :prompt_tokens => 5, :completion_tokens => 5))
+
+    schema = TestEchoOpenAISchema(; response, status = 200) |> TracerSchema
+
+    # Define a simple calculator tool
+
+    msg = aitools(schema, "What is 2 + 3?";
+        tools = [calculator],
+        model = "gpt-4",
+        api_kwargs = (; temperature = 0))
+
+    @test istracermessage(msg)
+    @test unwrap(msg) isa AIToolRequest
+    @test msg.content == "I'll use the calculator tool to add 2 and 3."
+    @test msg.log_prob ≈ -0.5
+    @test length(msg.tool_calls) == 1
+    @test msg.tool_calls[1].tool_call_id == "1"
+    @test msg.tool_calls[1].name == "calculator"
+    @test msg.tool_calls[1].args == Dict(:x => 2, :y => 3, :operation => "add")
+
+    # Test with AITemplate
+    msg = aitools(schema, :BlankSystemUser; tools = [calculator])
+    @test istracermessage(msg)
+    @test unwrap(msg) isa AIToolRequest
+end
+
+@testset "aiscan-Tracer" begin
+    ## Test with single sample and log_probs samples
+    response = Dict(
+        :choices => [
+            Dict(:message => Dict(:content => "Hello1!"),
+            :finish_reason => "stop",
+            :logprobs => Dict(:content => [
+                Dict(:logprob => -0.1),
+                Dict(:logprob => -0.2)
+            ]))
+        ],
+        :usage => Dict(:total_tokens => 3, :prompt_tokens => 2, :completion_tokens => 1))
+    schema1 = TestEchoOpenAISchema(; response, status = 200) |> TracerSchema
+    msg = aiscan(schema1, "Describe the image";
+        image_url = "https://example.com/image.png",
+        model = "gpt4", http_kwargs = (; verbose = 3),
+        api_kwargs = (; temperature = 0))
+    @test istracermessage(msg)
+    @test unwrap(msg) isa AIMessage
+    @test msg.content == "Hello1!"
+    @test msg.log_prob ≈ -0.3
+
+    msg = aiscan(schema1, :BlankSystemUser; image_url = "https://example.com/image.png")
+    @test istracermessage(msg)
+end
+
+@testset "aiimage-Tracer" begin
+    # corresponds to OpenAI API v1 for create_images
+    payload = Dict(:url => "xyz/url", :revised_prompt => "New prompt")
+    response1 = Dict(:data => [payload])
+    schema1 = TestEchoOpenAISchema(; response = response1, status = 200) |> TracerSchema
+
+    msg = aiimage(schema1, "Hello World")
+    @test istracermessage(msg)
+    @test unwrap(msg) isa DataMessage
+
+    msg = aiimage(schema1, :BlankSystemUser)
+    @test istracermessage(msg)
+end
diff --git a/test/macros.jl b/test/macros.jl
index 55cb440eb..d9f8ae609 100644
--- a/test/macros.jl
+++ b/test/macros.jl
@@ -21,7 +21,7 @@ using PromptingTools: TestEchoOpenAISchema, push_conversation!, CONV_HISTORY, Us
     schema_ref = PT.MODEL_REGISTRY["echo0"].schema
     @test schema_ref.inputs ==
           [Dict("role" => "system", "content" => "Act as a helpful AI assistant")
-        Dict("role" => "user", "content" => "Hello, how are you?")]
+           Dict("role" => "user", "content" => "Hello, how are you?")]
 
     # Test the macro with string interpolation
     a = 1
@@ -29,7 +29,7 @@ using PromptingTools: TestEchoOpenAISchema, push_conversation!, CONV_HISTORY, Us
     schema_ref = PT.MODEL_REGISTRY["echo0"].schema
     @test schema_ref.inputs ==
           [Dict("role" => "system", "content" => "Act as a helpful AI assistant")
-        Dict("role" => "user", "content" => "What is `1+1`?")]
+           Dict("role" => "user", "content" => "What is `1+1`?")]
 
     # ai!_str_macro" begin
     # Prepopulate conversation history
@@ -69,7 +69,7 @@ end
     schema_ref = PT.MODEL_REGISTRY["echo0"].schema
     @test schema_ref.inputs ==
           [Dict("role" => "system", "content" => "Act as a helpful AI assistant")
-        Dict("role" => "user", "content" => "Hello, how are you?")]
+           Dict("role" => "user", "content" => "Hello, how are you?")]
     @test CONV_HISTORY[end][end].content == "Hello!"
 
     # continue conversation
diff --git a/test/messages.jl b/test/messages.jl
index e90dcf467..2ece68c37 100644
--- a/test/messages.jl
+++ b/test/messages.jl
@@ -1,7 +1,13 @@
-using PromptingTools: AIMessage, SystemMessage, MetadataMessage
-using PromptingTools: UserMessage, UserMessageWithImages, DataMessage
-using PromptingTools: _encode_local_image, attach_images_to_user_message
-using PromptingTools: isusermessage, issystemmessage, isdatamessage, isaimessage
+using PromptingTools: AIMessage, SystemMessage, MetadataMessage, AbstractMessage
+using PromptingTools: UserMessage, UserMessageWithImages, DataMessage, AIToolRequest,
+                      ToolMessage
+using PromptingTools: _encode_local_image, attach_images_to_user_message, last_message,
+                      last_output, tool_calls
+using PromptingTools: isusermessage, issystemmessage, isdatamessage, isaimessage,
+                      istracermessage, isaitoolrequest, istoolmessage
+using PromptingTools: TracerMessageLike, TracerMessage, align_tracer!, unwrap,
+                      AbstractTracerMessage, AbstractTracer, pprint
+using PromptingTools: TracerSchema, SaverSchema
 
 @testset "Message constructors" begin
     # Creates an instance of MSG with the given content string.
@@ -30,6 +36,16 @@ using PromptingTools: isusermessage, issystemmessage, isdatamessage, isaimessage
     @test SystemMessage(content) |> issystemmessage
     @test DataMessage(; content) |> isdatamessage
     @test AIMessage(; content) |> isaimessage
+    @test UserMessage(content) |> AIMessage |> isaimessage
+    @test UserMessage(content) != AIMessage(content)
+    @test AIToolRequest() |> isaitoolrequest
+    @test ToolMessage(; tool_call_id = "x", raw = "") |> istoolmessage
+    ## check handling other types
+    @test isusermessage(1) == false
+    @test issystemmessage(nothing) == false
+    @test isdatamessage(1) == false
+    @test isaimessage(missing) == false
+    @test istracermessage(1) == false
 end
 @testset "UserMessageWithImages" begin
     content = "Hello, world!"
@@ -93,3 +109,245 @@ end
     msg = UserMessageWithImages(content; image_url) # unclear where to add the new images!
     @test_throws AssertionError attach_images_to_user_message(msg; image_url)
 end
+
+@testset "last_message,last_output,tool_calls" begin
+    # on a conversation
+    msgs = [UserMessage("Hello, world 1!"), UserMessage("Hello, world 2!")]
+    @test last_message(msgs) == msgs[end]
+    @test last_output(msgs) == "Hello, world 2!"
+
+    # on an empty conversation
+    msgs = AbstractMessage[]
+    @test last_message(msgs) == nothing
+    @test last_output(msgs) == nothing
+
+    # On a message
+    msg = UserMessage("Hello, world 2!")
+    @test last_message(msg) == msg
+    @test last_output(msg) == "Hello, world 2!"
+    @test tool_calls(msg) == ToolMessage[]
+
+    tool_msg = ToolMessage(
+        tool_call_id = "1", name = "tool1", raw = "", content = "content1")
+    @test tool_calls(tool_msg) == [tool_msg]
+    @test last_output(tool_msg) == "content1"
+    msg = AIToolRequest(content = "Tool request",
+        tool_calls = [tool_msg])
+    @test tool_calls(msg) == [tool_msg]
+    @test last_output(msg) == "Tool request"
+end
+
+@testset "show,pprint" begin
+    io = IOBuffer()
+
+    # AIMessage
+    m = AIMessage("Hello, AI!")
+    show(io, MIME("text/plain"), m)
+    @test occursin("AIMessage(\"Hello, AI!\")", String(take!(io)))
+    pprint(io, m)
+    output = String(take!(io))
+    @test occursin("AI Message", output)
+    @test occursin("Hello, AI!", output)
+
+    # SystemMessage
+    take!(io)
+    m = SystemMessage("System instruction")
+    show(io, MIME("text/plain"), m)
+    @test occursin("SystemMessage(\"System instruction\")", String(take!(io)))
+    pprint(io, m)
+    output = String(take!(io))
+    @test occursin("System Message", output)
+    @test occursin("System instruction", output)
+
+    # UserMessage
+    take!(io)
+    m = UserMessage("User input")
+    show(io, MIME("text/plain"), m)
+    @test occursin("UserMessage(\"User input\")", String(take!(io)))
+    pprint(io, m)
+    output = String(take!(io))
+    @test occursin("User Message", output)
+    @test occursin("User input", output)
+
+    # UserMessageWithImages
+    take!(io)
+    m = UserMessageWithImages(
+        "User input with image", image_url = ["http://example.com/image.jpg"])
+    show(io, MIME("text/plain"), m)
+    @test occursin("UserMessageWithImages(\"User input with image\")", String(take!(io)))
+    pprint(io, m)
+    output = String(take!(io))
+    @test occursin("User Message", output)
+    @test occursin("User input with image", output)
+
+    # MetadataMessage
+    take!(io)
+    m = MetadataMessage("Metadata info")
+    show(io, MIME("text/plain"), m)
+    @test occursin("MetadataMessage(\"Metadata info\")", String(take!(io)))
+    pprint(io, m)
+    output = String(take!(io))
+    @test occursin("Unknown Message", output)
+    @test occursin("Metadata info", output)
+
+    # DataMessage with Array
+    take!(io)
+    m = DataMessage(content = rand(3, 3))
+    show(io, MIME("text/plain"), m)
+    output = String(take!(io))
+    @test occursin("DataMessage", output)
+    @test occursin("Matrix{Float64}", output)
+    @test occursin("size (3, 3))", output)
+    pprint(io, m)
+    output = String(take!(io))
+    @test occursin("Data Message", output)
+    @test occursin("Data: Matrix{Float64}", output)
+
+    # DataMessage with Dict
+    take!(io)
+    m = DataMessage(content = Dict(:key1 => "value1", :key2 => "value2"))
+    show(io, MIME("text/plain"), m)
+    output = String(take!(io))
+    @test occursin("DataMessage", output)
+    @test occursin("Dict", output)
+    @test occursin("key1", output)
+    @test occursin("key2", output)
+    pprint(io, m)
+    output = String(take!(io))
+    @test occursin("Data Message", output)
+    @test occursin("Data: Dict{Symbol, String}", output)
+
+    # AIToolRequest
+    take!(io)
+    m = AIToolRequest(content = "Tool request",
+        tool_calls = [ToolMessage(
+            tool_call_id = "1", name = "tool1", raw = "", content = "content1")])
+    show(io, MIME("text/plain"), m)
+    output = String(take!(io))
+    @test occursin("AIToolRequest", output)
+    @test occursin("Tool request", output)
+    @test occursin("Tool Requests: 1", output)
+    pprint(io, m)
+    output = String(take!(io))
+    @test occursin("AI Tool Request", output)
+    @test occursin("Tool request", output)
+
+    # ToolMessage
+    take!(io)
+    m = ToolMessage(tool_call_id = "1", name = "tool1", raw = "", content = "Tool output")
+    show(io, MIME("text/plain"), m)
+    output = String(take!(io))
+    @test occursin("ToolMessage", output)
+    @test occursin("Tool output", output)
+    pprint(io, m)
+    output = String(take!(io))
+    @test occursin("Tool Message", output)
+    @test occursin("Tool output", output)
+
+    m = ToolMessage(
+        tool_call_id = "1", name = "tool1", raw = "{args: 1}", content = nothing)
+    pprint(io, m)
+    output = String(take!(io))
+    @test occursin("Tool Message", output)
+    @test occursin("Name: tool1", output)
+    @test occursin("Args: {args: 1}", output)
+
+    # Other DataMessage types
+    take!(io)
+    m = DataMessage(content = 42)
+    show(io, MIME("text/plain"), m)
+    output = String(take!(io))
+    @test occursin("DataMessage", output)
+    @test occursin("Int64", output)
+end
+
+@testset "TracerMessage,TracerMessageLike" begin
+    # Tracer functionality
+    msg1 = UserMessage("Hi")
+    msg2 = AIMessage("Hi there!")
+
+    # Create wrapper
+    tr1 = TracerMessage(msg1; from = :me, to = :you)
+    @test istracermessage(tr1)
+    @test tr1.object == msg1
+    @test tr1.from == :me
+    @test tr1.to == :you
+    @test tool_calls(tr1) == ToolMessage[]
+
+    # Message methods
+    tr2 = TracerMessage(msg2; from = :you, to = :me)
+    @test tr1.content == msg1.content
+    @test tr2.run_id == msg2.run_id
+    @test tr1 != tr2
+    @test tr1 == tr1
+    @test UserMessage(tr2).content == msg2.content
+    @test copy(tr1) == tr1
+    @test copy(tr2) !== tr2
+
+    # Specific methods
+    # type trait passthrough to the underlying message
+    content = "say hi"
+    @test TracerMessage(UserMessage(content)) |> isusermessage
+    @test TracerMessage(SystemMessage(content)) |> issystemmessage
+    @test TracerMessage(DataMessage(; content)) |> isdatamessage
+    @test TracerMessage(AIMessage(; content)) |> isaimessage
+    @test TracerMessage(UserMessage(content)) |> AIMessage |> isaimessage
+
+    # unwrap the tracer
+    @test unwrap(tr1) == msg1
+
+    # Align random IDs
+    conv = [tr1, tr2]
+    align_tracer!(conv)
+    @test conv[1].parent_id == conv[2].parent_id
+    @test conv[1].thread_id == conv[2].thread_id
+
+    empty_ = AbstractTracer[]
+    @test empty_ == align_tracer!(empty_)
+
+    ## TracerMessageLike
+    str = "Test Message"
+    tracer = TracerMessageLike(str)
+    @test tracer.object == str
+    @test unwrap(tracer) == str
+
+    # methods
+    tracer2 = TracerMessageLike(str)
+    @test tracer == tracer2
+
+    struct TracerRandom1 <: AbstractTracer{Int} end
+    tracer3 = TracerRandom1()
+    @test tracer != tracer3
+
+    # show and pprint for TracerMessage
+    # Test show method
+    io_show = IOBuffer()
+    show(io_show, MIME("text/plain"), tr1)
+    show_output = String(take!(io_show))
+    @test occursin("TracerMessage", show_output)
+    @test occursin("UserMessage", show_output)
+    @test occursin("you", show_output)
+
+    # Test pprint method
+    io_pprint = IOBuffer()
+    pprint(io_pprint, tr1)
+    pprint_output = String(take!(io_pprint))
+    @test occursin("TracerMessage with:", pprint_output)
+    @test occursin("User Message", pprint_output)
+    @test occursin("Hi", pprint_output)
+
+    # show and pprint for TracerMessageLike
+    # Test show method
+    io_show = IOBuffer()
+    show(io_show, MIME("text/plain"), tracer)
+    show_output = String(take!(io_show))
+    @test occursin("TracerMessageLike{String}", show_output)
+    @test occursin("Test Message", show_output)
+
+    # Test pprint method
+    io_pprint = IOBuffer()
+    pprint(io_pprint, tracer)
+    pprint_output = String(take!(io_pprint))
+    @test occursin("TracerMessageLike with:", pprint_output)
+    @test occursin("Test Message", pprint_output)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 5016b47d9..dd42f6b68 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,13 +2,19 @@ using PromptingTools
 using OpenAI, HTTP, JSON3
 using SparseArrays, LinearAlgebra, Markdown
 using Statistics
-using Test, Pkg
+using Dates: now
+using Test, Pkg, Random
 const PT = PromptingTools
+using Snowball, FlashRank
 using Aqua
 
 @testset "Code quality (Aqua.jl)" begin
     # Skipping unbound_args check because we need our `MaybeExtract` type to be unboard
-    Aqua.test_all(PromptingTools; unbound_args = false)
+    @static if VERSION >= v"1.9" && VERSION <= v"1.10"
+        Aqua.test_all(PromptingTools; unbound_args = false, piracy = false)
+    else
+        Aqua.test_all(PromptingTools; unbound_args = false)
+    end
 end
 @testset "PromptingTools.jl" begin
     include("utils.jl")
@@ -16,11 +22,15 @@ end
     include("extraction.jl")
     include("user_preferences.jl")
     include("llm_interface.jl")
+    include("streaming.jl")
     include("llm_shared.jl")
     include("llm_openai.jl")
     include("llm_ollama_managed.jl")
     include("llm_ollama.jl")
     include("llm_google.jl")
+    include("llm_anthropic.jl")
+    include("llm_sharegpt.jl")
+    include("llm_tracer.jl")
     include("macros.jl")
     include("templates.jl")
     include("serialization.jl")
diff --git a/test/serialization.jl b/test/serialization.jl
index 9b04667f2..dd023b28d 100644
--- a/test/serialization.jl
+++ b/test/serialization.jl
@@ -1,6 +1,7 @@
 using PromptingTools: AIMessage,
-    SystemMessage, UserMessage, UserMessageWithImages, AbstractMessage, DataMessage
-using PromptingTools: save_conversation, load_conversation
+                      SystemMessage, UserMessage, UserMessageWithImages, AbstractMessage,
+                      DataMessage, ShareGPTSchema, Tool, ToolMessage, AIToolRequest
+using PromptingTools: save_conversation, load_conversation, save_conversations
 using PromptingTools: save_template, load_template
 
 @testset "Serialization - Messages" begin
@@ -10,7 +11,12 @@ using PromptingTools: save_template, load_template
         AIMessage("AI message"),
         UserMessageWithImages(; content = "a", image_url = String["b", "c"]),
         DataMessage(;
-            content = "Data message")]
+            content = "Data message"),
+        AIToolRequest(;
+            tool_calls = [ToolMessage(;
+                tool_call_id = "1", name = "MyType", raw = "", args = Dict(:content => "x"))]),
+        ToolMessage(;
+            tool_call_id = "1", name = "MyType", content = "x", raw = "", args = Dict(:content => 1))]
     tmp, _ = mktemp()
     save_conversation(tmp, messages)
     # Test load_conversation
@@ -23,7 +29,7 @@ end
     version = "1.1"
     msgs = [
         SystemMessage("You are an impartial AI judge evaluting whether the provided statement is \"true\" or \"false\". Answer \"unknown\" if you cannot decide."),
-        UserMessage("# Statement\n\n{{it}}"),
+        UserMessage("# Statement\n\n{{it}}")
     ]
     tmp, _ = mktemp()
     save_template(tmp,
@@ -36,3 +42,43 @@ end
     @test metadata[1].content == "Template Metadata"
     @test metadata[1].source == ""
 end
+
+@testset "Serialization - Messages" begin
+    # Test save_conversations
+    messages = AbstractMessage[SystemMessage("System message 1"),
+        UserMessage("User message"),
+        AIMessage("AI message"),
+        AIToolRequest(;
+            tool_calls = [ToolMessage(;
+                tool_call_id = "1", name = "MyType", raw = "", args = Dict(:content => "x"))]),
+        ToolMessage(;
+            tool_call_id = "1", name = "MyType", content = "x", raw = "", args = Dict(:content => 1))]
+    dir = tempdir()
+    fn = joinpath(dir, "conversations.jsonl")
+    save_conversations(fn, [messages])
+    s = read(fn, String)
+    @test s ==
+          "{\"conversations\":[{\"value\":\"System message 1\",\"from\":\"system\"},{\"value\":\"User message\",\"from\":\"human\"},{\"value\":\"AI message\",\"from\":\"gpt\"},{\"value\":null,\"from\":\"assistant\"},{\"value\":\"x\",\"from\":\"tool\"}]}"
+end
+
+@testset "Serialization - TracerMessage" begin
+    conv = AbstractMessage[SystemMessage("System message 1"),
+        UserMessage("User message"),
+        AIMessage("AI message")]
+    traced_conv = TracerMessage.(conv)
+    align_tracer!(traced_conv)
+    tmp, _ = mktemp()
+    save_conversation(tmp, traced_conv)
+    loaded_tracer = load_conversation(tmp)
+    @test loaded_tracer == traced_conv
+
+    # We cannot recover all type information !!!
+    obj = Dict{String, Any}("a" => 1, "b" => 2)
+    tr = TracerMessageLike(obj; from = :user, to = :ai, model = "TestModel")
+    tmp, _ = mktemp()
+    JSON3.write(tmp, tr)
+    tr2 = JSON3.read(tmp, TracerMessageLike)
+    @test tr2.from == tr.from
+    @test tr2.to == tr.to
+    @test unwrap(tr) == unwrap(tr2) == obj
+end
diff --git a/test/streaming.jl b/test/streaming.jl
new file mode 100644
index 000000000..0800639c5
--- /dev/null
+++ b/test/streaming.jl
@@ -0,0 +1,29 @@
+using PromptingTools: StreamCallback, StreamChunk, OpenAIStream, AnthropicStream,
+                      configure_callback!, OllamaStream
+using PromptingTools: OpenAISchema, AnthropicSchema, GoogleSchema, OllamaSchema
+
+@testset "configure_callback!" begin
+    # Test configure_callback! method
+    cb, api_kwargs = configure_callback!(StreamCallback(), OpenAISchema())
+    @test cb.flavor isa OpenAIStream
+    @test api_kwargs[:stream] == true
+    @test api_kwargs[:stream_options] == (include_usage = true,)
+
+    cb, api_kwargs = configure_callback!(StreamCallback(), AnthropicSchema())
+    @test cb.flavor isa AnthropicStream
+    @test api_kwargs[:stream] == true
+
+    cb, api_kwargs = configure_callback!(StreamCallback(), OllamaSchema())
+    @test cb.flavor isa OllamaStream
+    @test api_kwargs[:stream] == true
+
+    # Test error for unsupported schema
+    @test_throws ErrorException configure_callback!(StreamCallback(), GoogleSchema())
+    @test_throws ErrorException configure_callback!(StreamCallback(), OllamaManagedSchema())
+
+    # Test configure_callback! with output stream
+    cb, _ = configure_callback!(IOBuffer(), OpenAISchema())
+    @test cb isa StreamCallback
+    @test cb.out isa IOBuffer
+    @test cb.flavor isa OpenAIStream
+end
\ No newline at end of file
diff --git a/test/templates.jl b/test/templates.jl
index 23d613ac7..b541adf7f 100644
--- a/test/templates.jl
+++ b/test/templates.jl
@@ -1,11 +1,13 @@
 using PromptingTools: AbstractChatMessage, SystemMessage, UserMessage, MetadataMessage
 using PromptingTools: render
-using PromptingTools: load_templates!, aitemplates
+using PromptingTools: load_templates!, aitemplates, create_template, AITemplateMetadata,
+                      save_conversation
 using PromptingTools: TestEchoOpenAISchema
 
 @testset "Template rendering" begin
     template = AITemplate(:JudgeIsItTrue)
-    expected_output = AbstractChatMessage[SystemMessage("You are an impartial AI judge evaluting whether the provided statement is \"true\" or \"false\". Answer \"unknown\" if you cannot decide."),
+    expected_output = AbstractChatMessage[
+        SystemMessage("You are an impartial AI judge evaluating whether the provided statement is \"true\" or \"false\". Answer \"unknown\" if you cannot decide."),
         UserMessage("# Statement\n\n{{it}}")]
     @test expected_output == render(PT.PROMPT_SCHEMA, template)
     @test expected_output == render(template)
@@ -22,9 +24,9 @@ end
     tmps = aitemplates(:JudgeIsItTrue)
     @test length(tmps) == 1
     @test tmps[1].name == :JudgeIsItTrue
-    # Search for multiple with :Task in name
+    # Search for an exact match :Task in name
     tmps1 = aitemplates(:Task)
-    @test length(tmps1) >= 1
+    @test length(tmps1) == 0 # does not exist
     tmps2 = aitemplates("Task") # broader search
     @test length(tmps2) >= length(tmps1)
     # Search via regex
@@ -32,6 +34,69 @@ end
     @test length(tmps) >= 1
 end
 
+@testset "load_templates!" begin
+    load_templates!()
+    PT.TEMPLATE_PATH = PT.TEMPLATE_PATH[[1]] # reset
+    dir_name = joinpath(tempdir(), "templates")
+    mkpath(dir_name)
+    load_templates!(dir_name)
+    @test length(PT.TEMPLATE_PATH) == 2
+    @test PT.TEMPLATE_PATH[2] == dir_name
+    # no more changes
+    load_templates!(dir_name)
+    load_templates!(dir_name)
+    @test length(PT.TEMPLATE_PATH) == 2
+    @test PT.TEMPLATE_PATH[2] == dir_name
+    # reset to normal
+    PT.TEMPLATE_PATH = PT.TEMPLATE_PATH[[1]] # reset
+end
+
+@testset "create_template" begin
+    tpl = create_template("You must speak like a pirate", "Say hi to {{name}}")
+    @test tpl[1].content == "You must speak like a pirate"
+    @test tpl[1] isa SystemMessage
+    @test tpl[2].content == "Say hi to {{name}}"
+    @test tpl[2].variables == [:name]
+    @test tpl[2] isa UserMessage
+
+    # kwarg constructor
+    tpl = create_template(; user = "Say hi to {{chef}}")
+    @test tpl[1].content == "Act as a helpful AI assistant."
+    @test tpl[1] isa SystemMessage
+    @test tpl[2].content == "Say hi to {{chef}}"
+    @test tpl[2].variables == [:chef]
+    @test tpl[2] isa UserMessage
+
+    # use save_as
+    tpl = create_template(
+        "You must speak like a pirate", "Say hi to {{name}}"; load_as = :PirateGreetingX)
+    @test haskey(PT.TEMPLATE_STORE, :PirateGreetingX)
+    @test length(filter(x -> x.name == :PirateGreetingX, PT.TEMPLATE_METADATA)) == 1
+    ## clean up
+    delete!(PT.TEMPLATE_STORE, :PirateGreetingX)
+    filter!(x -> x.name != :PirateGreetingX, PT.TEMPLATE_METADATA)
+end
+
+@testset "load_templates!-filtering" begin
+    tpl = create_template(; system = "a", user = "b")
+    mktempdir() do dir
+        ## File to be visible
+        fn = joinpath(dir, "x1.json")
+        save_conversation(fn, tpl)
+
+        ## File to be invisible
+        fn = joinpath(dir, "._x2.json")
+        save_conversation(fn, tpl)
+
+        store = Dict{Symbol, Any}()
+        PT.load_templates!(dir;
+            remember_path = false, store,
+            metadata_store = Vector{AITemplateMetadata}())
+        @test length(store) == 1
+        @test haskey(store, :x1)
+    end
+end
+
 @testset "Templates - Echo aigenerate call" begin
     # E2E test for aigenerate with rendering template and filling the placeholders
     template_name = :JudgeIsItTrue
diff --git a/test/user_preferences.jl b/test/user_preferences.jl
index ce3b87b89..0225f3a46 100644
--- a/test/user_preferences.jl
+++ b/test/user_preferences.jl
@@ -1,5 +1,5 @@
 using PromptingTools: ModelSpec,
-    register_model!, MODEL_REGISTRY, MODEL_ALIASES, ModelRegistry
+                      register_model!, MODEL_REGISTRY, MODEL_ALIASES, ModelRegistry
 using PromptingTools: list_registry, list_aliases
 using PromptingTools: OpenAISchema, OllamaManagedSchema, set_preferences!, get_preferences
 
@@ -13,7 +13,7 @@ using PromptingTools: OpenAISchema, OllamaManagedSchema, set_preferences!, get_p
     @testset "Allowed Preferences" for pref in [
         "OPENAI_API_KEY",
         "MODEL_CHAT",
-        "MODEL_EMBEDDING",
+        "MODEL_EMBEDDING"
     ]
         set_preferences!(pref => "test_value")
         @test get_preferences(pref) == "test_value"  # Assuming a get_preferences function exists
diff --git a/test/utils.jl b/test/utils.jl
index 5d6961cee..40807243d 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -1,9 +1,12 @@
-using PromptingTools: split_by_length, replace_words, length_longest_common_subsequence
-using PromptingTools: _extract_handlebar_variables, call_cost, _report_stats
+using PromptingTools: recursive_splitter, wrap_string, replace_words,
+                      length_longest_common_subsequence, distance_longest_common_subsequence
+using PromptingTools: _extract_handlebar_variables, call_cost, call_cost_alternative,
+                      _report_stats
 using PromptingTools: _string_to_vector, _encode_local_image
-using PromptingTools: DataMessage, AIMessage
+using PromptingTools: DataMessage, AIMessage, UserMessage
 using PromptingTools: push_conversation!,
-    resize_conversation!, @timeout, preview, auth_header
+                      resize_conversation!, @timeout, preview, pprint, auth_header,
+                      unique_permutation
 
 @testset "replace_words" begin
     words = ["Disney", "Snow White", "Mickey Mouse"]
@@ -16,33 +19,33 @@ using PromptingTools: push_conversation!,
           "XYZ is a great model"
 end
 
-@testset "split_by_length" begin
+@testset "recursive_splitter" begin
     text = "Hello world. How are you?"
-    chunks = split_by_length(text, max_length = 100)
+    chunks = recursive_splitter(text, max_length = 100)
     @test length(chunks) == 1
     @test chunks[1] == text
-    chunks = split_by_length(text, max_length = 25)
+    chunks = recursive_splitter(text, max_length = 25)
     @test length(chunks) == 1
     @test chunks[1] == text
     @test maximum(length.(chunks)) <= 25
-    chunks = split_by_length(text, max_length = 10)
+    chunks = recursive_splitter(text, max_length = 10)
     @test length(chunks) == 4
     @test maximum(length.(chunks)) <= 10
-    chunks = split_by_length(text, max_length = 11)
+    chunks = recursive_splitter(text, max_length = 11)
     @test length(chunks) == 3
     @test maximum(length.(chunks)) <= 11
     @test join(chunks, "") == text
 
     # Test with empty text
-    chunks = split_by_length("")
+    chunks = recursive_splitter("")
     @test chunks == [""]
 
     # Test custom separator
     text = "Hello,World,"^50
-    chunks = split_by_length(text, separator = ",", max_length = length(text))
+    chunks = recursive_splitter(text, separator = ",", max_length = length(text))
     @test length(chunks) == 1
     @test chunks[1] == text
-    chunks = split_by_length(text, separator = ",", max_length = 20)
+    chunks = recursive_splitter(text, separator = ",", max_length = 20)
     @test length(chunks) == 34
     @test maximum(length.(chunks)) <= 20
     @test join(chunks, "") == text
@@ -50,14 +53,14 @@ end
     ### Multiple separators
     # Single separator
     text = "First sentence. Second sentence. Third sentence."
-    chunks = split_by_length(text, ["."], max_length = 15)
+    chunks = recursive_splitter(text, ["."], max_length = 15)
     @test length(chunks) == 3
     @test chunks == ["First sentence.", " Second sentence.", " Third sentence."]
 
     # Multiple separators
     text = "Paragraph 1\n\nParagraph 2. Sentence 1. Sentence 2.\nParagraph 3"
     separators = ["\n\n", ". ", "\n"]
-    chunks = split_by_length(text, separators, max_length = 20)
+    chunks = recursive_splitter(text, separators, max_length = 20)
     @test length(chunks) == 5
     @test chunks[1] == "Paragraph 1\n\n"
     @test chunks[2] == "Paragraph 2. "
@@ -67,12 +70,12 @@ end
 
     # empty separators
     text = "Some text without separators."
-    @test_throws AssertionError split_by_length(text, String[], max_length = 10)
+    @test_throws AssertionError recursive_splitter(text, String[], max_length = 10)
 
     # edge cases
     text = "Short text"
     separators = ["\n\n", ". ", "\n"]
-    chunks = split_by_length(text, separators, max_length = 50)
+    chunks = recursive_splitter(text, separators, max_length = 50)
     @test length(chunks) == 1
     @test chunks[1] == text
 
@@ -80,12 +83,32 @@ end
     text = "Paragraph 1\n\nParagraph 2. Sentence 1. Sentence 2.\nParagraph 3"
     separators = ["\n\n", ". ", "\n"]
     sep_length = length(separators)
-    chunks = split_by_length(text, separators, max_length = 20)
-    chunks = split_by_length(text, separators, max_length = 20)
-    chunks = split_by_length(text, separators, max_length = 20)
+    chunks = recursive_splitter(text, separators, max_length = 20)
+    chunks = recursive_splitter(text, separators, max_length = 20)
+    chunks = recursive_splitter(text, separators, max_length = 20)
     @test length(separators) == sep_length
 end
 
+@testset "wrap_string" begin
+    @test wrap_string("", 10) == ""
+    @test wrap_string("Hi", 10) == "Hi"
+    @test wrap_string(strip(" Hi "), 10) == "Hi" # SubString type
+    output = wrap_string("This function will wrap words into lines", 10)
+    @test maximum(length.(split(output, "\n"))) <= 10
+    output = wrap_string("This function will wrap words into lines", 20)
+    @test_broken maximum(length.(split(output, "\n"))) <= 20 #bug, it adds back the separator
+    str = "This function will wrap words into lines"
+    @test wrap_string(str, length(str)) == str
+    ## ensure newlines are not removed
+    str = "This function\n will wrap\n words into lines"
+    @test wrap_string(str, length(str)) == str
+    # Unicode testing
+    long_unicode_sentence = "Überraschenderweise ℕ𝕖𝕦𝕣𝕠𝕥𝕣𝕒𝕟𝕤𝕞𝕚𝕥𝕥𝕖𝕣 ℂ𝕙𝕣𝕪𝕤𝕒𝕟𝕥𝕙𝕖𝕞𝕦𝕞𝕤 𝕊𝕪𝕟𝕔𝕙𝕣𝕠𝕡𝕙𝕒𝕤𝕠𝕥𝕣𝕠𝕟 Ξ𝕩𝕥𝕣𝕒𝕠𝕣𝕕𝕚𝕟𝕒𝕚𝕣𝕖"
+    wrapped = wrap_string(long_unicode_sentence, 20)
+    @test all(length(line) ≤ 20 for line in split(wrapped, "\n"))
+    @test join(split(wrapped, "\n"), "") == replace(long_unicode_sentence, " " => "")
+end
+
 @testset "length_longest_common_subsequence" begin
     # Test for equal strings
     @test length_longest_common_subsequence("abcde", "abcde") == 5
@@ -103,6 +126,44 @@ end
 
     # Test for common subsequence with repeated characters
     @test length_longest_common_subsequence("abc-abc----", "___ab_c__abc") == 6
+
+    # Unusual characters
+    @test length_longest_common_subsequence("ABCBDAB Records", "Records – 6/17/19") == 7
+    @test length_longest_common_subsequence("Ján šel zpivat α β γ ∉", "Ján rad tanci") == 6
+end
+
+@testset "distance_longest_common_subsequence" begin
+    # Test for equal strings
+    @test distance_longest_common_subsequence("abcde", "abcde") == 0
+
+    # test for different strings
+    @test distance_longest_common_subsequence("xyzut", "abced") == 1
+    @test distance_longest_common_subsequence("xyzut", "") == 1
+
+    # Test for empty string, they are the same, but we need to treat them as different
+    @test_broken distance_longest_common_subsequence("", "") == 0.0
+
+    # Test for partial common subsequence -> full match for seq2
+    @test distance_longest_common_subsequence("abcde", "ace") == 0.0
+
+    # Test for common subsequence with repeated characters
+    @test distance_longest_common_subsequence("abc-abc----", "___ab_c__abc")≈0.45 atol=0.01
+
+    # array dispatch
+    context = [
+        "The enigmatic stranger vanished as swiftly as a wisp of smoke, leaving behind a trail of unanswered questions.",
+        "Beneath the shimmering moonlight, the ocean whispered secrets only the stars could hear.",
+        "The ancient tree stood as a silent guardian, its gnarled branches reaching for the heavens.",
+        "The melody danced through the air, painting a vibrant tapestry of emotions.",
+        "Time flowed like a relentless river, carrying away memories and leaving imprints in its wake."]
+
+    story = """
+        Beneath the shimmering moonlight, the ocean whispered secrets only the stars could hear.
+
+        Under the celestial tapestry, the vast ocean whispered its secrets to the indifferent stars. Each ripple, a murmured confidence, each wave, a whispered lament. The glittering celestial bodies listened in silent complicity, their enigmatic gaze reflecting the ocean's unspoken truths. The cosmic dance between the sea and the sky, a symphony of shared secrets, forever echoing in the ethereal expanse.
+        """
+    dist = distance_longest_common_subsequence(story, context)
+    @test dist[2] == 0.0
 end
 
 @testset "extract_handlebar_variables" begin
@@ -146,6 +207,31 @@ end
     cost = call_cost(msg, "unknown_model")
     @test cost == 0.0
     @test call_cost(msg, "gpt-3.5-turbo") ≈ 1000 * 0.5e-6 + 1.5e-6 * 1000
+
+    # From message
+    msg = DataMessage(; content = nothing, tokens = (-1, -1), cost = 1.0)
+    cost = call_cost(msg, "unknown_model")
+    @test cost == 1.0
+
+    # Multiple messages
+    conv = [AIMessage(; content = "", tokens = (1000, 2000), cost = 1.0),
+        UserMessage(; content = "")]
+    @test call_cost(conv) == 1.0
+
+    # No model provided
+    msg = AIMessage(; content = "", tokens = (1000, 2000))
+    @test_throws AssertionError call_cost(msg, "")
+end
+
+@testset "call_cost_alternative" begin
+    @test call_cost_alternative(
+        1, "dall-e-3"; image_quality = "standard", image_size = "1024x1024") ≈ 0.04
+    @test call_cost_alternative(
+        5, "dall-e-3"; image_quality = "standard", image_size = "1024x1024") ≈ 0.2
+    @test call_cost_alternative(
+        2, "dall-e-2"; image_quality = "weird", image_size = "xxx") ≈ 0.0
+    @test call_cost_alternative(
+        2, "unknown"; image_quality = "weird", image_size = "xxx") ≈ 0.0
 end
 
 @testset "report_stats" begin
@@ -159,6 +245,13 @@ end
     msg = AIMessage(; content = "", tokens = (1000, 5000), elapsed = 5.0)
     expected_output = "Tokens: 6000 @ Cost: \$0.008 in 5.0 seconds"
     @test _report_stats(msg, "gpt-3.5-turbo") == expected_output
+
+    # Add extra metadata
+    msg = AIMessage(; content = "", tokens = (1000, 5000), elapsed = 5.0,
+        extras = Dict{Symbol, Any}(
+            :cache_read_input_tokens => 100, :cache_creation_input_tokens => 200))
+    expected_output = "Tokens: 6000 @ Cost: \$0.008 in 5.0 seconds (Metadata: cache_read_input_tokens => 100, cache_creation_input_tokens => 200)"
+    @test _report_stats(msg, "gpt-3.5-turbo") == expected_output
 end
 
 @testset "_string_to_vector" begin
@@ -243,20 +336,116 @@ end
         PT.SystemMessage("Welcome"),
         PT.UserMessage("Hello"),
         PT.AIMessage("World"),
-        PT.DataMessage(; content = ones(10)),
+        PT.DataMessage(; content = ones(10))
     ]
     preview_output = preview(conversation)
     expected_output = Markdown.parse("# System Message\n\nWelcome\n\n---\n\n# User Message\n\nHello\n\n---\n\n# AI Message\n\nWorld\n\n---\n\n# Data Message\n\nData: Vector{Float64} (Size: (10,))\n")
     @test preview_output == expected_output
 end
 
+@testset "pprint" begin
+    # anything -> passthrough to show
+    x = "abc"
+    io = IOBuffer()
+    pprint(io, x)
+    output = String(take!(io))
+    @test output == "\"abc\""
+    #
+    conversation = [
+        PT.SystemMessage("Welcome"),
+        PT.UserMessage("Hello"),
+        PT.AIMessage("World"),
+        PT.DataMessage(; content = ones(10))
+    ]
+    io = IOBuffer()
+    pprint(io, conversation)
+    output = String(take!(io))
+    exp_output = "--------------------\nSystem Message\n--------------------\nWelcome\n\n--------------------\nUser Message\n--------------------\nHello\n\n--------------------\nAI Message\n--------------------\nWorld\n\n--------------------\nData Message\n--------------------\nData: Vector{Float64} (Size: (10,))\n\n"
+    @test output == exp_output
+
+    struct RandomMessage1234x <: PT.AbstractMessage
+        content::String
+    end
+    msgx = RandomMessage1234x("xyz")
+    io = IOBuffer()
+    pprint(io, msgx)
+    output = String(take!(io))
+    @test occursin("\nUnknown Message\n", output)
+end
+
 @testset "auth_header" begin
     headers = auth_header("<my-api-key>")
     @test headers == [
         "Authorization" => "Bearer <my-api-key>",
         "Content-Type" => "application/json",
-        "Accept" => "application/json",
+        "Accept" => "application/json"
     ]
     @test_throws ArgumentError auth_header("")
     @test length(auth_header(nothing)) == 2
+
+    # x-api-key format 
+    headers = auth_header("<my-api-key>"; x_api_key = true, bearer = false,
+        extra_headers = ["version" => "1.0"])
+    @test headers == [
+        "x-api-key" => "<my-api-key>",
+        "Content-Type" => "application/json",
+        "Accept" => "application/json",
+        "version" => "1.0"
+    ]
 end
+
+@testset "unique_permutation" begin
+    # Test with an empty array
+    @test unique_permutation([]) == []
+
+    # Test with an array of integers
+    @test unique_permutation([1, 2, 3, 2, 1]) == [1, 2, 3]
+
+    # Test with an array of strings
+    @test unique_permutation(["apple", "banana", "apple", "orange"]) == [1, 2, 4]
+
+    # Test with repeated identical elements
+    @test unique_permutation([4, 4, 4, 4]) == [1]
+
+    # Test with non-consecutive duplicates
+    @test unique_permutation([1, 2, 3, 1, 2, 3, 1, 2, 3]) == [1, 2, 3]
+    @test unique_permutation([1, 2, 1, 2, 1, 2, 3, 1, 2, 3]) == [1, 2, 7]
+
+    # Test with an array of negative integers
+    @test unique_permutation([-1, -2, -3, -2, -1]) == [1, 2, 3]
+
+    # Test with an array of mixed positive and negative integers
+    @test unique_permutation([1, -1, 2, -2, 1, -1]) == [1, 2, 3, 4]
+
+    # Test with an array of floating point numbers
+    @test unique_permutation([1.1, 2.2, 3.3, 2.2, 1.1]) == [1, 2, 3]
+
+    # Test with an array of mixed integers and floating point numbers
+    @test unique_permutation([1, 2.0, 3, 2.0, 1]) == [1, 2, 3]
+
+    # Test with an array of very large integers
+    @test unique_permutation([10^10, 10^10, 10^12, 10^11, 10^12]) == [1, 3, 4]
+
+    # Test with an array of very small floating point numbers
+    @test unique_permutation([1e-10, 1e-10, 1e-12, 1e-11, 1e-12]) == [1, 3, 4]
+
+    # Test with an array of strings with different cases
+    @test unique_permutation(["Apple", "apple", "Banana", "banana", "Apple"]) ==
+          [1, 2, 3, 4]
+
+    # Test with an array of mixed data types
+    @test unique_permutation([1, "1", 2, "2", 1]) == [1, 2, 3, 4]
+
+    # Test with an array of complex numbers
+    @test unique_permutation([1 + 1im, 2 + 2im, 1 + 1im, 3 + 3im]) == [1, 2, 4]
+
+    # Test with an array of tuples
+    @test unique_permutation([(1, 2), (3, 4), (1, 2), (5, 6)]) == [1, 2, 4]
+
+    # Test with an array of arrays
+    @test unique_permutation([[1, 2], [3, 4], [5, 6], [1, 2]]) == [1, 2, 3]
+
+    # Test with an array of dictionaries
+    @test unique_permutation([
+        Dict(:a => 1), Dict(:b => 2), Dict(:a => 1), Dict(:c => 3)]) == [1, 2, 4]
+end
\ No newline at end of file