diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 7e620817e..bcfac0a27 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -23,6 +23,7 @@ jobs: version: - '1.9' - '1.10' + # - '1.11' # - 'nightly' os: - ubuntu-latest @@ -49,24 +50,21 @@ jobs: permissions: contents: write statuses: write + pages: write + id-token: write + actions: write steps: - - uses: actions/checkout@v3 - - uses: julia-actions/setup-julia@v1 - with: - version: '1' - - name: Configure doc environment - run: | - julia --project=docs/ -e ' - using Pkg - Pkg.develop(PackageSpec(path=pwd())) - Pkg.instantiate()' - - uses: julia-actions/julia-buildpkg@v1 - - uses: julia-actions/julia-docdeploy@v1 + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Julia + uses: julia-actions/setup-julia@v1 + - name: Pull Julia cache + uses: julia-actions/cache@v1 + - name: Install documentation dependencies + run: julia --project=docs -e 'using Pkg; pkg"dev ."; Pkg.instantiate(); Pkg.precompile(); Pkg.status()' + - name: Build and deploy docs + uses: julia-actions/julia-docdeploy@v1 env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - run: | - julia --project=docs -e ' - using Documenter: DocMeta, doctest - using PromptingTools - DocMeta.setdocmeta!(PromptingTools, :DocTestSetup, :(using PromptingTools); recursive=true) - doctest(PromptingTools)' + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token + GKSwstype: "100" # for Plots.jl plots (if you have them) + JULIA_DEBUG: "Documenter" \ No newline at end of file diff --git a/.gitignore b/.gitignore index 40f731fe8..a71aa4928 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,11 @@ /docs/build/ **/.DS_Store -**/.vscode \ No newline at end of file +**/.vscode + +# exclude scratch files +**/_* +docs/package-lock.json + +# Ignore Cursor rules +.cursorrules \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 48d9e4c66..1118085b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,10 +10,489 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +## [0.62.1] + +### Fixed +- Fixed a bug in `tool_call_signature` where hidden fields were not hidden early enough and would fail if a Dict argument was provided. It used to do the processing after, but Dicts cannot be processed, so we're now masking the fields upfront. + +## [0.62.0] + +### Added +- Added a new Claude 3.5 Haiku model (`claude-3-5-haiku-latest`) and updated the alias `claudeh` with it. +- Added support for XAI's Grok 2 beta model (`grok-beta`) and updated the alias `grok` with it. Set your ENV api key `XAI_API_KEY` to use it. + +## [0.61.0] + +### Added +- Added a new `extras` field to `ToolRef` to enable additional parameters in the tool signature (eg, `display_width_px`, `display_height_px` for the `:computer` tool). +- Added a new kwarg `unused_as_kwargs` to `execute_tool` to enable passing unused args as kwargs (see `?execute_tool` for more information). Helps with using kwarg-based functions. + +### Updated +- Updated the compat bounds for `StreamCallbacks` to enable both v0.4 and v0.5 (Fixes Julia 1.9 compatibility). +- Updated the return type of `tool_call_signature` to `Dict{String, AbstractTool}` to enable better interoperability with different tool types. + +## [0.60.0] + +### Added +- Added new Claude 3.5 Sonnet model (`claude-3-5-sonnet-latest`) and updated the alias `claude` and `claudes` with it. +- Added support for Ollama streaming with schema `OllamaSchema` (see `?StreamCallback` for more information). Schema `OllamaManaged` is NOT supported (it's legacy and will be removed in the future). +- Moved the implementation of streaming callbacks to a new `StreamCallbacks` package. +- Added new error types for tool execution to enable better error handling and reporting (see `?AbstractToolError`). +- Added support for Anthropic's new pre-trained tools via `ToolRef` (see `?ToolRef`), to enable the feature, use the `:computer_use` beta header (eg, `aitools(..., betas = [:computer_use])`). + +### Fixed +- Fixed a bug in `call_cost` where the cost was not calculated if any non-AIMessages were provided in the conversation. + +## [0.59.1] + +### Fixed +- Fixed a bug in multi-turn tool calls for OpenAI models where an empty tools array could have been, which causes an API error. + +## [0.59.0] + +### Breaking Changes +- New field `name` introduced in `AbstractChatMessage` and `AIToolRequest` messages to enable role-based workflows. It initializes to `nothing`, so it is backward compatible. + +### Added +- Extends support for structured extraction with multiple "tools" definitions (see `?aiextract`). +- Added new primitives `Tool` (to re-use tool definitions) and a function `aitools` to support mixed structured and non-structured workflows, eg, agentic workflows (see `?aitools`). +- Added a field `name` to `AbstractChatMessage` and `AIToolRequest` messages to enable role-based workflows. +- Added a support for partial argument execution with `execute_tool` function (provide your own context to override the arg values). +- Added support for [SambaNova](https://sambanova.ai/) hosted models (set your ENV `SAMBANOVA_API_KEY`). +- Added many new models from Mistral, Groq, Sambanova, OpenAI. + +### Updated +- Renamed `function_call_signature` to `tool_call_signature` to better reflect that it's used for tools, but kept a link to the old name for back-compatibility. +- Improves structured extraction for Anthropic models (now you can use `tool_choice` keyword argument to specify which tool to use or re-use your parsed tools). +- When log probs are requested, we will now also log the raw information in `AIMessage.extras[:log_prob]` field (previously we logged only the full sum). This enables more nuanced log-probability calculations for individual tokens. + +## [0.58.0] + +### Added +- Added support for [Cerebras](https://cloud.cerebras.ai) hosted models (set your ENV `CEREBRAS_API_KEY`). Available model aliases: `cl3` (Llama3.1 8bn), `cl70` (Llama3.1 70bn). +- Added a kwarg to `aiclassify` to provide a custom token ID mapping (`token_ids_map`) to work with custom tokenizers. + +### Updated +- Improved the implementation of `airetry!` to concatenate feedback from all ancestor nodes ONLY IF `feedback_inplace=true` (because otherwise LLM can see it in the message history). + +### Fixed +- Fixed a potential bug in `airetry!` where the `aicall` object was not properly validated to ensure it has been `run!` first. + +## [0.57.0] + +### Added + +- Support for [Azure OpenAI API](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference). Requires two environment variables to be st: `AZURE_OPENAI_API_KEY` and `AZURE_OPENAI_HOST`(i.e. https://.openai.azure.com). + +## [0.56.1] + +### Fixed +- Removed accidental INFO log in Anthropic's `aigenerate` +- Changed internal logging in `streamcallback` to use `@debug` when printing raw data chunks. + +## [0.56.0] + +### Updated +- Enabled Streaming for OpenAI-compatible APIs (eg, DeepSeek Coder) +- If streaming to stdout, also print a newline at the end of streaming (to separate multiple outputs). + +### Fixed +- Relaxed the type-assertions in `StreamCallback` to allow for more flexibility. + +## [0.55.0] + +### Added +- Added support for OpenAI's JSON mode for `aiextract` (just provide kwarg `json_mode=true`). Reference [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs). +- Added support for OpenRouter's API (you must set ENV `OPENROUTER_API_KEY`) to provide access to more models like Cohere Command R+ and OpenAI's o1 series. Reference [OpenRouter](https://openrouter.ai/). +- Added new OpenRouter hosted models to the model registry (prefixed with `or`): `oro1` (OpenAI's o1-preview), `oro1m` (OpenAI's o1-mini), `orcop` (Cohere's command-r-plus), `orco` (Cohere's command-r). The `or` prefix is to avoid conflicts with existing models and OpenAI's aliases, then the goal is to provide 2 letters for each model and 1 letter for additional qualifier (eg, "p" for plus, "m" for mini) -> `orcop` (OpenRouter cohere's COmmand-r-Plus). + +### Updated +- Updated FAQ with instructions on how to access new OpenAI o1 models via OpenRouter. +- Updated FAQ with instructions on how to add custom APIs (with an example `examples/adding_custom_API.jl`). + +### Fixed +- Fixed a bug in `aiclassify` for the OpenAI GPT4o models that have a different tokenizer. Unknown model IDs will throw an error. + +## [0.54.0] + +### Updated +- Improved the performance of BM25/Keywords-based indices for >10M documents. Introduced new kwargs of `min_term_freq` and `max_terms` in `RT.get_keywords` to reduce the size of the vocabulary. See `?RT.get_keywords` for more information. + +## [0.53.0] + +### Added +- Added beta headers to enable long outputs (up to 8K tokens) with Anthropic's Sonnet 3.5 (see `?anthropic_extra_headers`). +- Added a kwarg to prefill (`aiprefill`) AI responses with Anthropic's models to improve steerability (see `?aigenerate`). + +### Updated +- Documentation of `aigenerate` to make it clear that if `streamcallback` is provide WITH `flavor` set, there is no automatic configuration and the user must provide the correct `api_kwargs`. +- Grouped Anthropic's beta headers as a comma-separated string as per the latest API specification. + + +## [0.52.0] + +### Added +- Added a new EXPERIMENTAL `streamcallback` kwarg for `aigenerate` with the OpenAI and Anthropic prompt schema to enable custom streaming implementations. Simplest usage is simply with `streamcallback=stdout`, which will print each text chunk into the console. System is modular enabling custom callbacks and allowing you to inspect received chunks. See `?StreamCallback` for more information. It does not support tools yet. + +## [0.51.0] + +### Added +- Added more flexible structured extraction with `aiextract` -> now you can simply provide the field names and, optionally, their types without specifying the struct itself (in `aiextract`, provide the fields like `return_type = [:field_name => field_type]`). +- Added a way to attach field-level descriptions to the generated JSON schemas to better structured extraction (see `?update_schema_descriptions!` to see the syntax), which was not possible with struct-only extraction. + +## [0.50.0] + +### Breaking Changes +- `AIMessage` and `DataMessage` now have a new field `extras` to hold any API-specific metadata in a simple dictionary. Change is backward-compatible (defaults to `nothing`). + +### Added +- Added EXPERIMENTAL support for Anthropic's new prompt cache (see ?`aigenerate` and look for `cache` kwarg). Note that COST estimate will be wrong (ignores the caching discount for now). +- Added a new `extras` field to `AIMessage` and `DataMessage` to hold any API-specific metadata in a simple dictionary (eg, used for reporting on the cache hit/miss). + +## [0.49.0] + +### Added +- Added new OpenAI's model "chatgpt-4o-latest" to the model registry with alias "chatgpt". This model represents the latest version of ChatGPT-4o tuned specifically for ChatGPT. + +## [0.48.0] + +### Added +- Implements the new OpenAI structured output mode for `aiextract` (just provide kwarg `strict=true`). Reference [blog post](https://openai.com/index/introducing-structured-outputs-in-the-api/). + +## [0.47.0] + +### Added +- Added a new specialized method for `hcat(::DocumentTermMatrix, ::DocumentTermMatrix)` to allow for combining large DocumentTermMatrices (eg, 1M x 100K). + +### Updated +- Increased the compat bound for HTTP.jl to 1.10.8 to fix a bug with Julia 1.11. + +### Fixed +- Fixed a bug in `vcat_labeled_matrices` where extremely large DocumentTermMatrix could run out of memory. +- Fixed a bug in `score_to_unit_scale` where empty score vectors would error (now returns the empty array back). + +## [0.46.0] + +### Added +- Added a new model `gpt-4o-2024-08-06` to the model registry (alias `gpt4ol` with `l` for latest). It's the latest version of GPT4o, which is faster and cheaper than the previous version. + +## [0.45.0] + +### Breaking Change +- `getindex(::MultiIndex, ::MultiCandidateChunks)` now returns sorted chunks by default (`sorted=true`) to guarantee that potential `context` (=`chunks`) is sorted by descending similarity score across different sub-indices. + +### Updated +- Updated a `hcat` implementation in `RAGTools.get_embeddings` to reduce memory allocations for large embedding batches (c. 3x fewer allocations, see `hcat_truncate`). +- Updated `length_longest_common_subsequence` signature to work only for pairs of `AbstractString` to not fail silently when wrong arguments are provided. + +### Fixed +- Changed the default behavior of `getindex(::MultiIndex, ::MultiCandidateChunks)` to always return sorted chunks for consistency with other similar functions and correct `retrieve` behavior. This was accidentally changed in v0.40 and is now reverted to the original behavior. + +## [0.44.0] + +### Added +- Added Mistral Large 2 and Mistral-Nemo to the model registry (alias `mistral-nemo`). + +### Fixed +- Fixed a bug where `wrap_string` would not correctly split very long Unicode words. + +## [0.43.0] + +### Added +- Added Llama 3.1 registry records for Fireworks.ai (alias `fllama3`, `fllama370`, `fllama3405` and `fls`, `flm`, `fll` for small/medium/large similar to the other providers). + +## [0.42.0] + +### Added +- Registered new Meta Llama 3.1 models hosted on GroqCloud and Together.ai (eg, Groq-hosted `gllama370` has been updated to point to the latest available model and 405b model now has alias `gllama3405`). Because that's quite clunky, I've added abbreviations based on sizes small/medium/large (that is 8b, 70b, 405b) under `gls/glm/gll` for Llama 3.1 hosted on GroqCloud (similarly, we now have `tls/tlm/tll` for Llama3.1 on Together.ai). +- Generic model aliases for Groq and Together.ai for Llama3 models have been updated to point to the latest available models (Llama 3.1). +- Added Gemma2 9b model hosted on GroqCloud to the model registry (alias `ggemma9`). + +### Updated +- Minor optimizations to `SubDocumentTermMatrix` to reduce memory allocations and improve performance. + +## [0.41.0] + +### Added +- Introduced a "view" of `DocumentTermMatrix` (=`SubDocumentTermMatrix`) to allow views of Keyword-based indices (`ChunkKeywordsIndex`). It's not a pure view (TF matrix is materialized to prevent performance degradation). + +### Fixed +- Fixed a bug in `find_closest(finder::BM25Similarity, ...)` where the view of `DocumentTermMatrix` (ie, `view(DocumentTermMatrix(...), ...)`) was undefined. +- Fixed a bug where a view of a view of a `ChunkIndex` wouldn't intersect the positions (it was returning only the latest requested positions). + +## [0.40.0] + +### Added +- Introduces `RAGTools.SubChunkIndex` to allow projecting `views` of various indices. Useful for pre-filtering your data (faster and more precise retrieval). See `?RT.SubChunkIndex` for more information and how to use it. + +### Updated +- `CandidateChunks` and `MultiCandidateChunks` intersection methods updated to be an order of magnitude faster (useful for large sets like tag filters). + +### Fixed +- Fixed a bug in `find_closest(finder::BM25Similarity, ...)` where `minimum_similarity` kwarg was not implemented. + +## [0.39.0] + +### Breaking Changes +- Changed the default model for `ai*` chat functions (`PT.MODEL_CHAT`) from `gpt3t` to `gpt4om` (GPT-4o-mini). See the LLM-Leaderboard results and the release [blog post](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/). + +### Added +- Added the new GPT-4o-mini to the model registry (alias `gpt4om`). It's the smallest and fastest model based on GPT4 that is cheaper than GPT3.5Turbo. + +## [0.38.0] + +### Added +- Added a new tagging filter `RT.AllTagFilter` to `RT.find_tags`, which requires all tags to be present in a chunk. +- Added an option in `RT.get_keywords` to set the minimum length of the keywords. +- Added a new method for `reciprocal_rank_fusion` and utility for standardizing candidate chunk scores (`score_to_unit_scale`). + +## [0.37.1] + +### Fixed +- Fixed a bug in CohereReranker when it wouldn't handle correctly CandidateChunks. + +## [0.37.0] + +### Updated +- Increase compat bound for FlashRank to 0.4 + +## [0.36.0] + +### Added +- Added a prompt template for RAG query expansion for BM25 (`RAGQueryKeywordExpander`) + +### Fixed +- Fixed a small bug in the truncation step of the RankGPT's `permutation_step!` (bad indexing of string characters). +- Fixed a bug where a certain combination of `rank_start` and `rank_end` would not result the last sliding window. +- Fixed a bug where partially filled `RAGResult` would fail pretty-printing with `pprint` + +## [0.35.0] + +### Added +- Added a utility function to RAGTools `reciprocal_rank_fusion`, as a principled way to merge multiple rankings. See `?RAGTools.Experimental.reciprocal_rank_fusion` for more information. + +## [0.34.0] + +### Added +- `RankGPT` implementation for RAGTools chunk re-ranking pipeline. See `?RAGTools.Experimental.rank_gpt` for more information and corresponding reranker type `?RankGPTReranker`. + +## [0.33.2] + +### Fixed +- Add back accidentally dropped DBKS keys + +## [0.33.1] + +### Fixed +- Fixed loading RAGResult when one of the candidate fields was `nothing`. +- Utility type checks like `isusermessage`, `issystemmessage`, `isdatamessage`, `isaimessage`, `istracermessage` do not throw errors when given any arbitrary input types (previously they only worked for `AbstractMessage` types). It's a `isa` check, so it should work for all input types. +- Changed preference loading to use typed `global` instead of `const`, to fix issues with API keys not being loaded properly on start. You can now also call `PromptingTools.load_api_keys!()` to re-load the API keys (and ENV variables) manually. + +## [0.33.0] + +### Added +- Added registry record for Anthropic Claude 3.5 Sonnet with ID `claude-3-5-sonnet-20240620` (read the [blog post](https://www.anthropic.com/news/claude-3-5-sonnet)). Aliases "claude" and "claudes" have been linked to this latest Sonnet model. + +## [0.32.0] + +### Updated +- Changed behavior of `RAGTools.rerank(::FlashRanker,...)` to always dedupe input chunks (to reduce compute requirements). + +### Fixed +- Fixed a bug in verbose INFO log in `RAGTools.rerank(::FlashRanker,...)`. + +## [0.31.1] + +### Updated +- Improved the implementation of `RAGTools.unpack_bits` to be faster with fewer allocations. + +## [0.31.0] + +### Breaking Changes +- The return type of `RAGTools.find_tags(::NoTagger,...)` is now `::Nothing` instead of `CandidateChunks`/`MultiCandidateChunks` with all documents. +- `Base.getindex(::MultiIndex, ::MultiCandidateChunks)` now always returns sorted chunks for consistency with the behavior of other `getindex` methods on `*Chunks`. + +### Updated +- Cosine similarity search now uses `partialsortperm` for better performance on large datasets. +- Skip unnecessary work when the tagging functionality in the RAG pipeline is disabled (`find_tags` with `NoTagger` always returns `nothing` which improves the compiled code). +- Changed the default behavior of `getindex(::MultiIndex, ::MultiCandidateChunks)` to always return sorted chunks for consistency with other similar functions. Note that you should always use re-rankering anyway (see `FlashRank.jl`). + +## [0.30.0] + +### Fixed +- Fixed a bug on Julia 1.11 beta by adding REPL stdlib as a direct dependency. +- Fixed too restrictive argument types for `RAGTools.build_tags` method. + +## [0.29.0] + +### Added +- Added package extension for FlashRank.jl to support local ranking models. See `?RT.FlashRanker` for more information or `examples/RAG_with_FlashRank.jl` for a quick example. + + +## [0.28.0] + +### Added +- Added Mistral coding-oriented [Codestral](https://mistral.ai/news/codestral/) to the model registry, aliased as `codestral` or `mistralc`. It's very fast, performant and much cheaper than similar models. + +## [0.27.0] + +### Added +- Added a keyword-based search similarity to RAGTools to serve both for baseline evaluation and for advanced performance (by having a hybrid index with both embeddings and BM25). See `?RT.KeywordsIndexer` and `?RT.BM25Similarity` for more information, to build use `build_index(KeywordsIndexer(), texts)` or convert an existing embeddings-based index `ChunkKeywordsIndex(index)`. + +### Updated +- For naming consistency, `ChunkIndex` in RAGTools has been renamed to `ChunkEmbeddingsIndex` (with an alias `ChunkIndex` for backwards compatibility). There are now two main index types: `ChunkEmbeddingsIndex` and `ChunkKeywordsIndex` (=BM25), which can be combined into a `MultiIndex` to serve as a hybrid index. + +## [0.26.2] + +### Fixed +- Fixed a rare bug where prompt templates created on MacOS will come with metadata that breaks the prompt loader. From now on, it ignores any dotfiles (hidden files starting with "."). + +## [0.26.1] + +### Fixed +- Fixed a bug where utility `length_longest_common_subsequence` was not working with complex Unicode characters + +## [0.26.0] + +### BREAKING CHANGES +- Added new field `meta` to `TracerMessage` and `TracerMessageLike` to hold metadata in a simply dictionary. Change is backward-compatible. +- Changed behaviour of `aitemplates(name::Symbol)` to look for the exact match on the template name, not just a partial match. This is a breaking change for the `aitemplates` function only. Motivation is that having multiple matches could have introduced subtle bugs when looking up valid placeholders for a template. + +### Added +- Improved support for `aiclassify` with OpenAI models (you can now encode upto 40 choices). +- Added a template for routing questions `:QuestionRouter` (to be used with `aiclassify`) +- Improved tracing by `TracerSchema` to automatically capture crucial metadata such as any LLM API kwargs (`api_kwargs`), use of prompt templates and its version. Information is captured in `meta(tracer)` dictionary. See `?TracerSchema` for more information. +- New tracing schema `SaverSchema` allows to automatically serialize all conversations. It can be composed with other tracing schemas, eg, `TracerSchema` to automatically capture necessary metadata and serialize. See `?SaverSchema` for more information. +- Updated options for Binary embeddings (refer to release v0.18 for motivation). Adds utility functions `pack_bits` and `unpack_bits` to move between binary and UInt64 representations of embeddings. RAGTools adds the corresponding `BitPackedBatchEmbedder` and `BitPackedCosineSimilarity` for fast retrieval on these Bool<->UInt64 embeddings (credit to [**domluna's tinyRAG**](https://github.com/domluna/tinyRAG)). + +### Fixed +- Fixed a bug where `aiclassify` would not work when returning the full conversation for choices with extra descriptions + +## [0.25.0] + +### Added +- Added model registry record for the latest OpenAI GPT4 Omni model (`gpt4o`) - it's as good as GPT4, faster and cheaper. + +## [0.24.0] + +### Added +- Added support for [DeepSeek models](https://platform.deepseek.com/docs) via the `dschat` and `dscode` aliases. You can set the `DEEPSEEK_API_KEY` environment variable to your DeepSeek API key. + + +## [0.23.0] + +### Added +- Added new prompt templates for "Expert" tasks like `LinuxBashExpertAsk`, `JavascriptExpertTask`, etc. +- Added new prompt templates for self-critiquing agents like `ChiefEditorTranscriptCritic`, `JuliaExpertTranscriptCritic`, etc. + +### Updated +- Extended `aicodefixer_feedback` methods to work with `AICode` and `AIGenerate`. + +## [0.22.0] + +### Added +- Added support for [Groq](https://console.groq.com/), the fastest LLM provider out there. It's free for now, so you can try it out - you just need to set your `GROQ_API_KEY`. We've added Llama3 8b (alias "gllama3"), 70b (alias "gllama370") and Mixtral 8x7b (alias "gmixtral"). For the shortcut junkies, we also added a shorthand Llama3 8b = "gl3" (first two letters and the last digit), Llama3 70b = "gl70" (first two letters and the last two digits). + +## [0.21.0] + +### Added +- New models added to the model registry: Llama3 8b on Ollama (alias "llama3" for convenience) and on Together.ai (alias "tllama3", "t" stands for Together.ai), also adding the llama3 70b on Together.ai (alias "tllama370") and the powerful Mixtral-8x22b on Together.ai (alias "tmixtral22"). + +### Fixed +- Fixed a bug where pretty-printing `RAGResult` would forget a newline between the sources and context sections. + +## [0.20.1] + +### Fixed +- Fixed `truncate_dimension` to ignore when 0 is provided (previously it would throw an error). + +## [0.20.0] + +### Added +- Added a few new open-weights models hosted by Fireworks.ai to the registry (DBRX Instruct, Mixtral 8x22b Instruct, Qwen 72b). If you're curious about how well they work, try them! +- Added basic support for observability downstream. Created custom callback infrastructure with `initialize_tracer` and `finalize_tracer` and dedicated types are `TracerMessage` and `TracerMessageLike`. See `?TracerMessage` for more information and the corresponding `aigenerate` docstring. +- Added `MultiCandidateChunks` which can hold candidates for retrieval across many indices (it's a flat structure to be similar to `CandidateChunks` and easy to reason about). +- JSON serialization support extended for `RAGResult`, `CandidateChunks`, and `MultiCandidateChunks` to increase observability of RAG systems +- Added a new search refiner `TavilySearchRefiner` - it will search the web via Tavily API to try to improve on the RAG answer (see `?refine!`). +- Introduced a few small utilities for manipulation of nested kwargs (necessary for RAG pipelines), check out `getpropertynested`, `setpropertynested`, `merge_kwargs_nested`. + +### Updated +- [BREAKING] change to `CandidateChunks` where it's no longer allowed to be nested (ie, `cc.positions` being a list of several `CandidateChunks`). This is a breaking change for the `RAGTools` module only. We have introduced a new `MultiCandidateChunks` types that can refer to `CandidateChunks` across many indices. +- Changed default model for `RAGTools.CohereReranker` to "cohere-rerank-english-v3.0". + +### Fixed +- `wrap_string` utility now correctly splits only on spaces. Previously it would split on newlines, which would remove natural formatting of prompts/messages when displayed via `pprint` + +## [0.19.0] + +### Added +- [BREAKING CHANGE] The default GPT-4 Turbo model alias ("gpt4t") now points to the official GPT-4 Turbo endpoint ("gpt-4-turbo"). +- Adds references to `mistral-tiny` (7bn parameter model from MistralAI) to the model registry for completeness. +- Adds the new GPT-4 Turbo model (`"gpt-4-turbo-2024-04-09"`), but you can simply use alias `"gpt4t"` to access it. + +## [0.18.0] + +### Added +- Adds support for binary embeddings in RAGTools (dispatch type for `find_closest` is `finder=BinaryCosineSimilarity()`), but you can also just convert the embeddings to binary yourself (always choose `Matrix{Bool}` for speed, not `BitMatrix`) and use without any changes (very little performance difference at the moment). +- Added Ollama embedding models to the model registry ("nomic-embed-text", "mxbai-embed-large") and versioned MistralAI models. +- Added template for data extraction with Chain-of-thought reasoning: `:ExtractDataCoTXML`. +- Added data extraction support for Anthropic models (Claude 3) with `aiextract`. Try it with Claude-3 Haiku (`model="claudeh"`) and Chain-of-though template (`:ExtractDataCoTXML`). See `?aiextract` for more information and check Anthropic's [recommended practices](https://docs.anthropic.com/claude/docs/tool-use). + +## [0.17.1] + +### Fixed +- Fixed a bug in `print_html` where the custom kwargs were not being passed to the `HTML` constructor. + +## [0.17.0] + +### Added +- Added support for `aigenerate` with Anthropic API. Preset model aliases are `claudeo`, `claudes`, and `claudeh`, for Claude 3 Opus, Sonnet, and Haiku, respectively. +- Enabled the GoogleGenAI extension since `GoogleGenAI.jl` is now officially registered. You can use `aigenerate` by setting the model to `gemini` and providing the `GOOGLE_API_KEY` environment variable. +- Added utilities to make preparation of finetuning datasets easier. You can now export your conversations in JSONL format with ShareGPT formatting (eg, for Axolotl). See `?PT.save_conversations` for more information. +- Added `print_html` utility for RAGTools module to print HTML-styled RAG answer annotations for web applications (eg, Genie.jl). See `?PromptingTools.Experimental.RAGTools.print_html` for more information and examples. + +## [0.16.1] + +### Fixed +- Fixed a bug where `set_node_style!` was not accepting any Stylers except for the vanilla `Styler`. + +## [0.16.0] + +### Added +- Added pretty-printing via `PT.pprint` that does NOT depend on Markdown and splits text to adjust to the width of the output terminal. + It is useful in notebooks to add new lines. +- Added support annotations for RAGTools (see `?RAGTools.Experimental.annotate_support` for more information) to highlight which parts of the generated answer come from the provided context versus the model's knowledge base. It's useful for transparency and debugging, especially in the context of AI-generated content. You can experience it if you run the output of `airag` through pretty printing (`PT.pprint`). +- Added utility `distance_longest_common_subsequence` to find the normalized distance between two strings (or a vector of strings). Always returns a number between 0-1, where 0 means the strings are identical and 1 means they are completely different. It's useful for comparing the similarity between the context provided to the model and the generated answer. +- Added a new documentation section "Extra Tools" to highlight key functionality in various modules, eg, the available text utilities, which were previously hard to discover. +- Extended documentation FAQ with tips on tackling rate limits and other common issues with OpenAI API. +- Extended documentation with all available prompt templates. See section "Prompt Templates" in the documentation. +- Added new RAG interface underneath `airag` in `PromptingTools.RAGTools.Experimental`. Each step now has a dedicated function and a type that can be customized to achieve arbitrary logic (via defining methods for your own types). `airag` is split into two main steps: `retrieve` and `generate!`. You can use them separately or together. See `?airag` for more information. + +### Updated +- Renamed `split_by_length` text splitter to `recursive_splitter` to make it easier to discover and understand its purpose. `split_by_length` is still available as a deprecated alias. + +### Fixed +- Fixed a bug where `LOCAL_SERVER` default value was not getting picked up. Now, it defaults to `http://localhost:10897/v1` if not set in the preferences, which is the address of the OpenAI-compatible server started by Llama.jl. +- Fixed a bug in multi-line code annotation, which was assigning too optimistic scores to the generated code. Now the score of the chunk is the length-weighted score of the "top" source chunk divided by the full length of score tokens (much more robust and demanding). + +## [0.15.0] + +### Added +- Added experimental support for image generation with OpenAI DALL-E models, eg, `msg = aiimage("A white cat on a car")`. See `?aiimage` for more details. + +## [0.14.0] + +### Added +- Added a new documentation section "How it works" to explain the inner workings of the package. It's a work in progress, but it should give you a good idea of what's happening under the hood. +- Improved template loading, so if you load your custom templates once with `load_templates!("my/template/folder)`, it will remember your folder for all future re-loads. +- Added convenience function `create_template` to create templates on the fly without having to deal with `PT.UserMessage` etc. If you specify the keyword argument `load_as = "MyName"`, the template will be immediately loaded to the template registry. See `?create_template` for more information and examples. + +### Fixed + ## [0.13.0] ### Added -- Added initial support for Google Gemini models for `aigenerate` (requires environment variable `GOOGLE_API_KEY` and package [GoogleGenAI.jl](https://github.com/tylerjthomas9/GoogleGenAI.jl) to be loaded). It must be imported explicitly because it's not registered yet. +- Added initial support for Google Gemini models for `aigenerate` (requires environment variable `GOOGLE_API_KEY` and package [GoogleGenAI.jl](https://github.com/tylerjthomas9/GoogleGenAI.jl) to be loaded). It must be added explicitly as it is not yet registered. - Added a utility to compare any two string sequences (and other iterators)`length_longest_common_subsequence`. It can be used to fuzzy match strings (eg, detecting context/sources in an AI-generated response or fuzzy matching AI response to some preset categories). See the docstring for more information `?length_longest_common_subsequence`. - Rewrite of `aiclassify` to classify into an arbitrary list of categories (including with descriptions). It's a quick and easy option for "routing" and similar use cases, as it exploits the logit bias trick and outputs only 1 token. Currently, only `OpenAISchema` is supported. See `?aiclassify` for more information. - Initial support for multiple completions in one request for OpenAI-compatible API servers. Set via API kwarg `n=5` and it will request 5 completions in one request, saving the network communication time and paying the prompt tokens only once. It's useful for majority voting, diversity, or challenging agentic workflows. @@ -156,4 +635,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `aiextract` function to extract structured information from text quickly and easily. See `?aiextract` for more information. - Add `aiscan` for image scanning (ie, image comprehension tasks). You can transcribe screenshots or reason over images as if they were text. Images can be provided either as a local file (`image_path`) or as an url (`image_url`). See `?aiscan` for more information. - Add support for [Ollama.ai](https://ollama.ai/)'s local models. Only `aigenerate` and `aiembed` functions are supported at the moment. -- Add a few non-coding templates, eg, verbatim analysis (see `aitemplates("survey")`) and meeting summarization (see `aitemplates("meeting")`), and supporting utilities (non-exported): `split_by_length` and `replace_words` to make it easy to work with smaller open source models. \ No newline at end of file +- Add a few non-coding templates, eg, verbatim analysis (see `aitemplates("survey")`) and meeting summarization (see `aitemplates("meeting")`), and supporting utilities (non-exported): `split_by_length` and `replace_words` to make it easy to work with smaller open source models. diff --git a/Project.toml b/Project.toml index 95dc8ce2d..c2d232ab7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,11 +1,12 @@ name = "PromptingTools" uuid = "670122d1-24a8-4d70-bfce-740807c42192" authors = ["J S @svilupp and contributors"] -version = "0.13.0" +version = "0.62.1" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" @@ -13,23 +14,35 @@ OpenAI = "e9f21f70-7185-4079-aca2-91159181367c" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Preferences = "21216c6a-2e73-6563-6e65-726566657250" +REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StreamCallbacks = "c1b9e933-98a0-46fc-8ea7-3b58b195fb0a" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [weakdeps] +FlashRank = "22cc3f58-1757-4700-bb45-2032706e5a8d" +GoogleGenAI = "903d41d1-eaca-47dd-943b-fee3930375ab" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" +Snowball = "fb8f903a-0164-4e73-9ffe-431110250c3b" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [extensions] +FlashRankPromptingToolsExt = ["FlashRank"] +GoogleGenAIPromptingToolsExt = ["GoogleGenAI"] MarkdownPromptingToolsExt = ["Markdown"] -RAGToolsExperimentalExt = ["SparseArrays", "LinearAlgebra"] +RAGToolsExperimentalExt = ["SparseArrays", "LinearAlgebra", "Unicode"] +SnowballPromptingToolsExt = ["Snowball"] [compat] AbstractTrees = "0.4" Aqua = "0.7" Base64 = "<0.0.1, 1" -HTTP = "1" +Dates = "<0.0.1, 1" +FlashRank = "0.4" +GoogleGenAI = "0.3" +HTTP = "1.10.8" JSON3 = "1" LinearAlgebra = "<0.0.1, 1" Logging = "<0.0.1, 1" @@ -38,17 +51,20 @@ OpenAI = "0.9" Pkg = "<0.0.1, 1" PrecompileTools = "1" Preferences = "1" +REPL = "<0.0.1, 1" Random = "<0.0.1, 1" SparseArrays = "<0.0.1, 1" Statistics = "<0.0.1, 1" +StreamCallbacks = "0.4, 0.5" Test = "<0.0.1, 1" -julia = "1.9,1.10" +julia = "1.9, 1.10" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [targets] -test = ["Aqua", "SparseArrays", "Statistics", "LinearAlgebra", "Markdown"] +test = ["Aqua", "FlashRank", "SparseArrays", "Statistics", "LinearAlgebra", "Markdown", "Snowball", "Unicode"] diff --git a/README.md b/README.md index 0b0577cd0..c0bf3208b 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://svilupp.github.io/PromptingTools.jl/stable/) [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://svilupp.github.io/PromptingTools.jl/dev/) +[![Slack](https://img.shields.io/badge/slack-%23generative--ai-brightgreen.svg?logo=slack)](https://julialang.slack.com/archives/C06G90C697X) [![Build Status](https://github.com/svilupp/PromptingTools.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/svilupp/PromptingTools.jl/actions/workflows/CI.yml?query=branch%3Amain) [![Coverage](https://codecov.io/gh/svilupp/PromptingTools.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/svilupp/PromptingTools.jl) [![Aqua](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl) @@ -11,17 +12,22 @@ Streamline your life using PromptingTools.jl, the Julia package that simplifies PromptingTools.jl is not meant for building large-scale systems. It's meant to be the go-to tool in your global environment that will save you 20 minutes every day! +> [!TIP] +> Jump to the **[docs](https://svilupp.github.io/PromptingTools.jl/dev/)** + ## Quick Start with `@ai_str` and Easy Templating Getting started with PromptingTools.jl is as easy as importing the package and using the `@ai_str` macro for your questions. -Note: You will need to set your OpenAI API key as an environment variable before using PromptingTools.jl (see the [Creating OpenAI API Key](#creating-openai-api-key) section below). +Note: You will need to set your OpenAI API key as an environment variable before using PromptingTools.jl (see the [Creating OpenAI API Key](#creating-openai-api-key) section below). + +Following the introduction of [Prepaid Billing](https://help.openai.com/en/articles/8264644-what-is-prepaid-billing), you'll need to buy some credits to get started ($5 minimum). For a quick start, simply set it via `ENV["OPENAI_API_KEY"] = "your-api-key"` Install PromptingTools: ```julia using Pkg -Pkg.add("PromptingTools.jl") +Pkg.add("PromptingTools") ``` And we're ready to go! @@ -76,6 +82,7 @@ For more practical examples, see the `examples/` folder and the [Advanced Exampl - [Table of Contents](#table-of-contents) - [Why PromptingTools.jl](#why-promptingtoolsjl) - [Advanced Examples](#advanced-examples) + - [`ai*` Functions Overview](#ai-functions-overview) - [Seamless Integration Into Your Workflow](#seamless-integration-into-your-workflow) - [Advanced Prompts / Conversations](#advanced-prompts--conversations) - [Templated Prompts](#templated-prompts) @@ -89,10 +96,12 @@ For more practical examples, see the `examples/` folder and the [Advanced Exampl - [Experimental Agent Workflows / Output Validation with `airetry!`](#experimental-agent-workflows--output-validation-with-airetry) - [Using Ollama models](#using-ollama-models) - [Using MistralAI API and other OpenAI-compatible APIs](#using-mistralai-api-and-other-openai-compatible-apis) + - [Using Anthropic Models](#using-anthropic-models) - [More Examples](#more-examples) - [Package Interface](#package-interface) - [Frequently Asked Questions](#frequently-asked-questions) - [Why OpenAI](#why-openai) + - [What if I cannot access OpenAI?](#what-if-i-cannot-access-openai) - [Data Privacy and OpenAI](#data-privacy-and-openai) - [Creating OpenAI API Key](#creating-openai-api-key) - [Setting OpenAI Spending Limits](#setting-openai-spending-limits) @@ -102,6 +111,7 @@ For more practical examples, see the `examples/` folder and the [Advanced Exampl - [Instant Access from Anywhere](#instant-access-from-anywhere) - [Open Source Alternatives](#open-source-alternatives) - [Setup Guide for Ollama](#setup-guide-for-ollama) + - [How would I fine-tune a model?](#how-would-i-fine-tune-a-model) - [Roadmap](#roadmap) ## Why PromptingTools.jl @@ -118,12 +128,59 @@ Some features: ## Advanced Examples -TODOs: +### `ai*` Functions Overview + +Noteworthy functions: `aigenerate`, `aiembed`, `aiclassify`, `aiextract`, `aiscan`, `aiimage`, `aitemplates` + +All `ai*` functions have the same basic structure: + +`ai*(,; )`, + +but they differ in purpose: + +- `aigenerate` is the general-purpose function to generate any text response with LLMs, ie, it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`) +- `aiembed` is designed to extract embeddings from the AI model's response, ie, it returns `DataMessage` with field `:content` containing the embeddings (eg, `ans.content isa AbstractArray`) +- `aiextract` is designed to extract structured data from the AI model's response and return them as a Julia struct (eg, if we provide `return_type=Food`, we get `ans.content isa Food`). You need to define the return type first and then provide it as a keyword argument. +- `aitools` is designed for agentic workflows with a mix of tool calls and user inputs. It can work with simple functions and execute them. +- `aiclassify` is designed to classify the input text into (or simply respond within) a set of discrete `choices` provided by the user. It can be very useful as an LLM Judge or a router for RAG systems, as it uses the "logit bias trick" and generates exactly 1 token. It returns `AIMessage` with field `:content`, but the `:content` can be only one of the provided `choices` (eg, `ans.content in choices`) +- `aiscan` is for working with images and vision-enabled models (as an input), but it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`) similar to `aigenerate`. +- `aiimage` is for generating images (eg, with OpenAI DALL-E 3). It returns a `DataMessage`, where the field `:content` might contain either the URL to download the image from or the Base64-encoded image depending on the user-provided kwarg `api_kwargs.response_format`. +- `aitemplates` is a helper function to discover available templates and see their details (eg, `aitemplates("some keyword")` or `aitemplates(:AssistantAsk)`) + +If you're using a known `model`, you do NOT need to provide a `schema` (the first argument). + +Optional keyword arguments in `ai*` tend to be: + +- `model::String` - Which model you want to use +- `verbose::Bool` - Whether you went to see INFO logs around AI costs +- `return_all::Bool` - Whether you want the WHOLE conversation or just the AI answer (ie, whether you want to include your inputs/prompt in the output) +- `api_kwargs::NamedTuple` - Specific parameters for the model, eg, `temperature=0.0` to be NOT creative (and have more similar output in each run) +- `http_kwargs::NamedTuple` - Parameters for the HTTP.jl package, eg, `readtimeout = 120` to time out in 120 seconds if no response was received. + +**Experimental: AgentTools** + +In addition to the above list of `ai*` functions, you can also use the **"lazy" counterparts** of these functions from the experimental AgentTools module. +```julia +using PromptingTools.Experimental.AgentTools +``` + +For example, `AIGenerate()` will create a lazy instance of `aigenerate`. It is an instance of `AICall` with `aigenerate` as its ai function. +It uses exactly the same arguments and keyword arguments as `aigenerate` (see `?aigenerate` for details). + +"lazy" refers to the fact that it does NOT generate any output when instantiated (only when `run!` is called). + +Or said differently, the `AICall` struct and all its flavors (`AIGenerate`, ...) are designed to facilitate a deferred execution model (lazy evaluation) for AI functions that interact with a Language Learning Model (LLM). It stores the necessary information for an AI call and executes the underlying AI function only when supplied with a `UserMessage` or when the `run!` method is applied. This allows us to remember user inputs and trigger the LLM call repeatedly if needed, which enables automatic fixing (see `?airetry!`). + +If you would like a powerful auto-fixing workflow, you can use `airetry!`, which leverages Monte-Carlo tree search to pick the optimal trajectory of conversation based on your requirements. + +**Experimental: RAGTools** -- [ ] Add more practical examples (with DataFrames!) -- [ ] Add an example of how to build a RAG app in 50 lines +Lastly, we provide a set of tools to build RAG applications (Retrieve, Answer, Generate). -Noteworthy functions: `aigenerate`, `aiembed`, `aiclassify`, `aiextract`, `aitemplates` +It can be as simple as two calls: `build_index` and `airag` (Retrieve, Answer, Generate). + +If you then use pretty-printing with `PromptingTools.pprint`, we highlight the generated text vs text likely sourced from the context and we score how strongly is the generated answer supported by the context. +In addition, we annotate each generated chunk with a reference to which source document it likely came from (including the confidence score between 0 and 1). ### Seamless Integration Into Your Workflow Google search is great, but it's a context switch. You often have to open a few pages and read through the discussion to find the answer you need. Same with the ChatGPT website. @@ -414,7 +471,7 @@ run!(out) How is it useful? We can use the same "inputs" for repeated calls, eg, when we want to validate or regenerate some outputs. We have a function `airetry` to help us with that. -The signature of `airetry` is `airetry(condition_function, aicall::AICall, feedback_function)`. +The signature of `airetry!` is `airetry!(condition_function, aicall::AICall, feedback_function)`. It evaluates the condition `condition_function` on the `aicall` object (eg, we evaluate `f_cond(aicall) -> Bool`). If it fails, we call `feedback_function` on the `aicall` object to provide feedback for the AI model (eg, `f_feedback(aicall) -> String`) and repeat the process until it passes or until `max_retries` value is exceeded. We can catch API failures (no feedback needed, so none is provided) @@ -528,6 +585,30 @@ As you can see, it also works for any local models that you might have running o Note: At the moment, we only support `aigenerate` and `aiembed` functions for MistralAI and other OpenAI-compatible APIs. We plan to extend the support in the future. +### Using Anthropic Models + +Make sure the `ANTHROPIC_API_KEY` environment variable is set to your API key. + +```julia +# cladeuh is alias for Claude 3 Haiku +ai"Say hi!"claudeh +``` + +Preset model aliases are `claudeo`, `claudes`, and `claudeh`, for Claude 3 Opus, Sonnet, and Haiku, respectively. + +The corresponding schema is `AnthropicSchema`. + +There are several prompt templates with `XML` in the name, suggesting that they use Anthropic-friendly XML formatting for separating sections. +Find them with `aitemplates("XML")`. + +```julia +# cladeo is alias for Claude 3 Opus +msg = aigenerate( + :JuliaExpertAskXML, ask = "How to write a function to convert Date to Millisecond?", + model = "cladeo") +``` + + ### More Examples TBU... @@ -599,6 +680,13 @@ There will be situations not or cannot use it (eg, privacy, cost, etc.). In that Note: To get started with [Ollama.ai](https://ollama.ai/), see the [Setup Guide for Ollama](#setup-guide-for-ollama) section below. +### What if I cannot access OpenAI? + +There are many alternatives: + +- **Other APIs**: MistralAI, Anthropic, Google, Together, Fireworks, Voyager (the latter ones tend to give free credits upon joining!) +- **Locally-hosted models**: Llama.cpp/Llama.jl, Ollama, vLLM (see the examples and the corresponding docs) + ### Data Privacy and OpenAI At the time of writing, OpenAI does NOT use the API calls for training their models. @@ -681,7 +769,7 @@ A better way: - On a Mac, add the configuration line to your terminal's configuration file (eg, `~/.zshrc`). It will get automatically loaded every time you launch the terminal - On Windows, set it as a system variable in "Environment Variables" settings (see the Resources) -We also support Preferences.jl, so you can simply run: `PromptingTools.set_preferences!("OPENAI_API_KEY"="your-api-key")` and it will be persisted across sessions. +We also support Preferences.jl, so you can simply run: `PromptingTools.set_preferences!("OPENAI_API_KEY"=>"your-api-key")` and it will be persisted across sessions. To see the current preferences, run `PromptingTools.get_preferences("OPENAI_API_KEY")`. Be careful NOT TO COMMIT `LocalPreferences.toml` to GitHub, as it would show your API Key to the world! @@ -729,6 +817,16 @@ Show currently available models with `ollama list`. See [Ollama.ai](https://ollama.ai/) for more information. +### How would I fine-tune a model? + +Fine-tuning is a powerful technique to adapt a model to your specific use case (mostly the format/syntax/task). It requires a dataset of examples, which you can now easily generate with PromptingTools.jl! + +1. You can save any conversation (vector of messages) to a file with `PT.save_conversation("filename.json", conversation)`. + +2. Once the finetuning time comes, create a bundle of ShareGPT-formatted conversations (common finetuning format) in a single `.jsonl` file. Use `PT.save_conversations("dataset.jsonl", [conversation1, conversation2, ...])` (notice that plural "conversationS" in the function name). + +For an example of an end-to-end finetuning process, check out our sister project [JuliaLLMLeaderboard Finetuning experiment](https://github.com/svilupp/Julia-LLM-Leaderboard/blob/main/experiments/cheater-7b-finetune/README.md). It shows the process of finetuning for half a dollar with [Jarvislabs.ai](https://jarvislabs.ai/templates/axolotl) and [Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl). + ## Roadmap This is a list of features that I'd like to see in the future (in no particular order): diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 000000000..0587d7400 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,4 @@ +build/ +node_modules/ +package-lock.json +Manifest.toml \ No newline at end of file diff --git a/docs/Project.toml b/docs/Project.toml index 8dba66196..0995d35f8 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,6 +1,9 @@ [deps] DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365" +FlashRank = "22cc3f58-1757-4700-bb45-2032706e5a8d" +GoogleGenAI = "903d41d1-eaca-47dd-943b-fee3930375ab" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" @@ -8,4 +11,8 @@ Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" PromptingTools = "670122d1-24a8-4d70-bfce-740807c42192" +Snowball = "fb8f903a-0164-4e73-9ffe-431110250c3b" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[compat] +DocumenterVitepress = "0.0.7" diff --git a/docs/generate_prompt_library.jl b/docs/generate_prompt_library.jl new file mode 100644 index 000000000..52c62cafe --- /dev/null +++ b/docs/generate_prompt_library.jl @@ -0,0 +1,90 @@ +# Generates the "Prompt Library" sections of the docs +# +# 1 page for each folder in `templates/`, 1 section for each file in the folder + +## ! Config +input_files = joinpath(@__DIR__, "..", "templates", "general") |> + x -> readdir(x; join = true) +output_dir = joinpath(@__DIR__, "src", "prompts") +mkpath(output_dir); + +## Utilities +"Returns the file name and the section name." +function extract_md_hierarchy(fn) + ## find the depth of nested folders + p = splitpath(fn) + idx = findfirst(==("templates"), p) + if idx == nothing || idx >= length(p) - 1 + nothing, nothing + elseif idx == length(p) - 2 + ## no dual subfolder, duplicate name + p[idx + 1] * ".md", titlecase(p[idx + 1]) + else + ## has dual subfolder + p[idx + 1] * ".md", titlecase(p[idx + 2]) + end +end +function escape_prompt(s) + ## escape HTML tags + ## s = replace( + ## s, "\n" => "\n> ", "<" => "\\<", ">" => "\\>", "{{" => "\\{\\{", "}}" => "\\}\\}") + ## return "> " * s + """`````plaintext\n$(s)\n`````\n""" +end + +## Load the templates +# key: top-level folder, sub-folder, file +loaded_templates = Dict{String, Dict}() +for (dir, _, files) in walkdir(joinpath(@__DIR__, "..", "templates")) + for file in files + fn = joinpath(dir, file) + if endswith(fn, ".json") + dest_file, section = extract_md_hierarchy(fn) + if isnothing(dest_file) + continue + end + dest_file_path = joinpath(output_dir, dest_file) + template, metadata = PT.load_template(fn) + template_name = splitext(basename(file))[1] |> Symbol + # Assumes that there is only ever one UserMessage and SystemMessage (concats them together) + meta = PT.build_template_metadata( + template, template_name, metadata; max_length = 10^6) + ## save to loaded_templates + file_dict = get!(loaded_templates, dest_file_path, Dict()) + section_vect = get!(file_dict, section, []) + push!(section_vect, meta) + end + end +end + +## Write into files +for file_path in keys(loaded_templates) + io = IOBuffer() + println(io, + "The following file is auto-generated from the `templates` folder. For any changes, please modify the source files in the `templates` folder.\n") + println(io, + "To use these templates in `aigenerate`, simply provide the template name as a symbol, eg, `aigenerate(:MyTemplate; placeholder1 = value1)`") + println(io) + for (section, templates) in loaded_templates[file_path] + println(io, "## $(section) Templates\n") + for meta in templates + println(io, "### Template: $(meta.name)") + println(io) + println(io, "- Description: $(meta.description)") + println( + io, "- Placeholders: $(join("`" .* string.(meta.variables) .* "`",", "))") + println(io, "- Word count: $(meta.wordcount)") + println(io, "- Source: $(meta.source)") + println(io, "- Version: $(meta.version)") + println(io) + println(io, "**System Prompt:**") + println(io, escape_prompt(meta.system_preview)) + println(io) + println(io, "**User Prompt:**") + println(io, escape_prompt(meta.user_preview)) + println(io) + end + end + ## write to file + write(file_path, String(take!(io))) +end diff --git a/docs/make.jl b/docs/make.jl index e5ca9f168..2aa1caaf4 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,11 +1,18 @@ +using Documenter, DocumenterVitepress using PromptingTools -using Documenter -using SparseArrays, LinearAlgebra, Markdown +const PT = PromptingTools +using SparseArrays, LinearAlgebra, Markdown, Unicode, FlashRank using PromptingTools.Experimental.RAGTools using PromptingTools.Experimental.AgentTools using JSON3, Serialization, DataFramesMeta using Statistics: mean +## Generate the prompt documentation +include("generate_prompt_library.jl") + +# Enable debugging for vitepress +ENV["DEBUG"] = "vitepress:*" + DocMeta.setdocmeta!(PromptingTools, :DocTestSetup, :(using PromptingTools); @@ -15,39 +22,25 @@ makedocs(; modules = [ PromptingTools, PromptingTools.Experimental.RAGTools, - PromptingTools.Experimental.AgentTools, + PromptingTools.Experimental.AgentTools ], authors = "J S <49557684+svilupp@users.noreply.github.com> and contributors", repo = "https://github.com/svilupp/PromptingTools.jl/blob/{commit}{path}#{line}", sitename = "PromptingTools.jl", - format = Documenter.HTML(; - prettyurls = get(ENV, "CI", "false") == "true", - repolink = "https://github.com/svilupp/PromptingTools.jl", - canonical = "https://svilupp.github.io/PromptingTools.jl", - edit_link = "main", - size_threshold = nothing, - assets = String[]), - pages = [ - "Home" => "index.md", - "Getting Started" => "getting_started.md", - "Examples" => [ - "Various examples" => "examples/readme_examples.md", - "Using AITemplates" => "examples/working_with_aitemplates.md", - "Local models with Ollama.ai" => "examples/working_with_ollama.md", - "Google AIStudio" => "examples/working_with_google_ai_studio.md", - "Custom APIs (Mistral, Llama.cpp)" => "examples/working_with_custom_apis.md", - "Building RAG Application" => "examples/building_RAG.md", - ], - "F.A.Q." => "frequently_asked_questions.md", - "Reference" => [ - "PromptingTools.jl" => "reference.md", - "Experimental Modules" => "reference_experimental.md", - "RAGTools" => "reference_ragtools.md", - "AgentTools" => "reference_agenttools.md", - "APITools" => "reference_apitools.md", - ], - ]) + format = DocumenterVitepress.MarkdownVitepress( + repo = "https://github.com/svilupp/PromptingTools.jl", + devbranch = "main", + devurl = "dev", + deploy_url = "svilupp.github.io/PromptingTools.jl" + ), + draft = false, + source = "src", + build = "build", + ) deploydocs(; repo = "github.com/svilupp/PromptingTools.jl", + target = "build", + push_preview = true, + branch = "gh-pages", devbranch = "main") diff --git a/docs/package.json b/docs/package.json new file mode 100644 index 000000000..275146bd3 --- /dev/null +++ b/docs/package.json @@ -0,0 +1,18 @@ +{ + "devDependencies": { + "markdown-it": "^14.0.0", + "markdown-it-mathjax3": "^4.3.2", + "vitepress": "^1.3.3", + "vitepress-plugin-tabs": "^0.5.0", + "vitest": "^1.3.0" + }, + "scripts": { + "docs:dev": "vitepress dev build/.documenter", + "docs:build": "vitepress build build/.documenter", + "docs:preview": "vitepress preview build/.documenter" + }, + "dependencies": { + "@shikijs/transformers": "^1.1.7", + "markdown-it-footnote": "^4.0.0" + } +} diff --git a/docs/src/.vitepress/config.mts b/docs/src/.vitepress/config.mts new file mode 100644 index 000000000..a0a72c7b7 --- /dev/null +++ b/docs/src/.vitepress/config.mts @@ -0,0 +1,128 @@ +import { defineConfig } from 'vitepress' +import { tabsMarkdownPlugin } from 'vitepress-plugin-tabs' +import mathjax3 from "markdown-it-mathjax3"; +import footnote from "markdown-it-footnote"; + +// https://vitepress.dev/reference/site-config +export default defineConfig({ + base: 'REPLACE_ME_DOCUMENTER_VITEPRESS',// TODO: replace this in makedocs! + title: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + description: "Streamline Your Interactions with GenAI Models. Discover the power of GenerativeAI and build mini workflows to save you 20 minutes every day.", + lastUpdated: true, + cleanUrls: true, + outDir: 'REPLACE_ME_DOCUMENTER_VITEPRESS', // This is required for MarkdownVitepress to work correctly... + head: [['link', { rel: 'icon', href: 'REPLACE_ME_DOCUMENTER_VITEPRESS_FAVICON' }]], + ignoreDeadLinks: true, + + markdown: { + math: true, + config(md) { + md.use(tabsMarkdownPlugin), + md.use(mathjax3), + md.use(footnote) + }, + theme: { + light: "github-light", + dark: "github-dark"} + }, + themeConfig: { + outline: 'deep', + logo: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + search: { + provider: 'local', + options: { + detailedView: true + } + }, + nav: [ + { text: 'Home', link: '/index' }, + { text: 'Manual', + items:[ + { text: 'Getting Started', link: '/getting_started' }, + { text: 'How It Works', link: '/how_it_works' }, + { text: 'Coverage of Model Providers', link: '/coverage_of_model_providers' }, + { text: 'Examples', items: [ + { text: 'Various examples', link: '/examples/readme_examples' }, + { text: 'Using AITemplates', link: '/examples/working_with_aitemplates' }, + { text: 'Local models with Ollama.ai', link: '/examples/working_with_ollama' }, + { text: 'Google AIStudio', link: '/examples/working_with_google_ai_studio' }, + { text: 'Custom APIs (Mistral, Llama.cpp)', link: '/examples/working_with_custom_apis' }, + { text: 'Building RAG Application', link: '/examples/building_RAG' }] + }, + { text: 'Extra Tools', items: [ + { text: 'Text Utilities', link: '/extra_tools/text_utilities_intro' }, + { text: 'AgentTools', link: '/extra_tools/agent_tools_intro' }, + { text: 'RAGTools', link: '/extra_tools/rag_tools_intro' }, + { text: 'APITools', link: '/extra_tools/api_tools_intro' }] + }, + ], + }, + { text: 'F.A.Q.', link: '/frequently_asked_questions' }, + { text: 'Prompt Templates', items: [ + { text: 'General', link: '/prompts/general' }, + { text: 'Persona-Task', link: '/prompts/persona-task' }, + { text: 'Visual', link: '/prompts/visual' }, + { text: 'Classification', link: '/prompts/classification' }, + { text: 'Extraction', link: '/prompts/extraction' }, + { text: 'Agents', link: '/prompts/agents' }, + { text: 'RAG', link: '/prompts/RAG' }] + }, + { text: 'Reference', items: [ + { text: 'PromptingTools.jl', link: '/reference' }, + { text: 'Experimental Modules', link: '/reference_experimental' }, + { text: 'RAGTools', link: '/reference_ragtools' }, + { text: 'AgentTools', link: '/reference_agenttools' }, + { text: 'APITools', link: '/reference_apitools' }] + } + ], + sidebar: [ + { text: 'Home', link: '/index' }, + { text: 'Manual', + items:[ + { text: 'Getting Started', link: '/getting_started' }, + { text: 'How It Works', link: '/how_it_works' }, + { text: 'Coverage of Model Providers', link: '/coverage_of_model_providers' }, + { text: 'Examples', collapsed: true, items: [ + { text: 'Various examples', link: '/examples/readme_examples' }, + { text: 'Using AITemplates', link: '/examples/working_with_aitemplates' }, + { text: 'Local models with Ollama.ai', link: '/examples/working_with_ollama' }, + { text: 'Google AIStudio', link: '/examples/working_with_google_ai_studio' }, + { text: 'Custom APIs (Mistral, Llama.cpp)', link: '/examples/working_with_custom_apis' }, + { text: 'Building RAG Application', link: '/examples/building_RAG' }] + }, + { text: 'Extra Tools', collapsed: true, items: [ + { text: 'Text Utilities', link: '/extra_tools/text_utilities_intro' }, + { text: 'AgentTools', link: '/extra_tools/agent_tools_intro' }, + { text: 'RAGTools', link: '/extra_tools/rag_tools_intro' }, + { text: 'APITools', link: '/extra_tools/api_tools_intro' }] + }, + ], + }, + { text: 'F.A.Q.', link: '/frequently_asked_questions' }, + { text: 'Prompt Templates', collapsed: true, items: [ + { text: 'General', link: '/prompts/general' }, + { text: 'Persona-Task', link: '/prompts/persona-task' }, + { text: 'Visual', link: '/prompts/visual' }, + { text: 'Classification', link: '/prompts/classification' }, + { text: 'Extraction', link: '/prompts/extraction' }, + { text: 'Agents', link: '/prompts/agents' }, + { text: 'RAG', link: '/prompts/RAG' }] + }, + { text: 'Reference', collapsed: true, items: [ + { text: 'PromptingTools.jl', link: '/reference' }, + { text: 'Experimental Modules', link: '/reference_experimental' }, + { text: 'RAGTools', link: '/reference_ragtools' }, + { text: 'AgentTools', link: '/reference_agenttools' }, + { text: 'APITools', link: '/reference_apitools' }] + } + ], + editLink: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + socialLinks: [ + { icon: 'github', link: 'REPLACE_ME_DOCUMENTER_VITEPRESS' } + ], + footer: { + message: 'Made with Documenter.jl & VitePress & Icons by Icons8
', + copyright: `© Copyright ${new Date().getUTCFullYear()}.` + } + } +}) \ No newline at end of file diff --git a/docs/src/.vitepress/theme/index.ts b/docs/src/.vitepress/theme/index.ts new file mode 100644 index 000000000..463b5d858 --- /dev/null +++ b/docs/src/.vitepress/theme/index.ts @@ -0,0 +1,19 @@ +// .vitepress/theme/index.ts +import { h } from 'vue' +import type { Theme } from 'vitepress' +import DefaultTheme from 'vitepress/theme' + +import { enhanceAppWithTabs } from 'vitepress-plugin-tabs/client' +import './style.css' + +export default { + extends: DefaultTheme, + Layout() { + return h(DefaultTheme.Layout, null, { + // https://vitepress.dev/guide/extending-default-theme#layout-slots + }) + }, + enhanceApp({ app, router, siteData }) { + enhanceAppWithTabs(app) + } +} satisfies Theme \ No newline at end of file diff --git a/docs/src/.vitepress/theme/style.css b/docs/src/.vitepress/theme/style.css new file mode 100644 index 000000000..1772543c1 --- /dev/null +++ b/docs/src/.vitepress/theme/style.css @@ -0,0 +1,170 @@ +@import url(https://fonts.googleapis.com/css?family=Space+Mono:regular,italic,700,700italic); +@import url(https://fonts.googleapis.com/css?family=Space+Grotesk:regular,italic,700,700italic); + +/* Customize default theme styling by overriding CSS variables: +https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/vars.css + */ + + /* Layouts */ + +/* + :root { + --vp-layout-max-width: 1440px; +} */ + +.VPHero .clip { + white-space: pre; + max-width: 500px; +} + +/* Fonts */ + +:root { + /* Typography */ + --vp-font-family-base: "Barlow", "Inter var experimental", "Inter var", + -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, + Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", sans-serif; + + /* Code Snippet font */ + --vp-font-family-mono: "Fira Code", Menlo, Monaco, Consolas, "Courier New", + monospace; +} + +/* Colors */ + +:root { + --julia-blue: #4063D8; + --julia-purple: #9558B2; + --julia-red: #CB3C33; + --julia-green: #389826; + + --vp-c-brand: rgb(209, 110, 142); + --vp-c-brand-light: rgb(136,109,156); + --vp-c-brand-lighter: #9499ff; + --vp-c-brand-lightest: #bcc0ff; + --vp-c-brand-dark: #535bf2; + --vp-c-brand-darker: #454ce1; + --vp-c-brand-dimm: #212425; +} + + /* Component: Button */ + +:root { + --vp-button-brand-border: var(--vp-c-brand-light); + --vp-button-brand-text: var(--vp-c-white); + --vp-button-brand-bg: var(--vp-c-brand); + --vp-button-brand-hover-border: var(--vp-c-brand-light); + --vp-button-brand-hover-text: var(--vp-c-white); + --vp-button-brand-hover-bg: var(--vp-c-brand-light); + --vp-button-brand-active-border: var(--vp-c-brand-light); + --vp-button-brand-active-text: var(--vp-c-white); + --vp-button-brand-active-bg: var(--vp-button-brand-bg); +} + +/* Component: Home */ + +:root { + --vp-home-hero-name-color: transparent; + --vp-home-hero-name-background: -webkit-linear-gradient( + 120deg, + rgb(136,109,156) 30%, + rgb(209, 110, 142) + ); + + --vp-home-hero-image-background-image: linear-gradient( + -120deg, + rgba(136,109,156, 0.75) 10%, + rgba(47, 47, 47,0.1) 50%, + rgba(209, 110, 142,0.75) + ); + --vp-home-hero-image-filter: blur(40px); +} + +@media (min-width: 640px) { + :root { + --vp-home-hero-image-filter: blur(56px); + } +} + +@media (min-width: 960px) { + :root { + --vp-home-hero-image-filter: blur(72px); + } +} + +/* Component: Custom Block */ + +:root.dark { + --vp-custom-block-tip-border: var(--vp-c-brand); + --vp-custom-block-tip-text: var(--vp-c-brand-lightest); + --vp-custom-block-tip-bg: var(--vp-c-brand-dimm); + + /* // Tweak the color palette for blacks and dark grays */ + --vp-c-black: hsl(220 20% 9%); + --vp-c-black-pure: hsl(220, 24%, 4%); + --vp-c-black-soft: hsl(220 16% 13%); + --vp-c-black-mute: hsl(220 14% 17%); + --vp-c-gray: hsl(220 8% 56%); + --vp-c-gray-dark-1: hsl(220 10% 39%); + --vp-c-gray-dark-2: hsl(220 12% 28%); + --vp-c-gray-dark-3: hsl(220 12% 23%); + --vp-c-gray-dark-4: hsl(220 14% 17%); + --vp-c-gray-dark-5: hsl(220 16% 13%); + + /* // Backgrounds */ + /* --vp-c-bg: hsl(240, 2%, 11%); */ + --vp-custom-block-info-bg: hsl(220 14% 17%); + /* --vp-c-gutter: hsl(220 20% 9%); + + --vp-c-bg-alt: hsl(220 20% 9%); + --vp-c-bg-soft: hsl(220 14% 17%); + --vp-c-bg-mute: hsl(220 12% 23%); + */ +} + + /* Component: Algolia */ + +.DocSearch { + --docsearch-primary-color: var(--vp-c-brand) !important; +} + +/* Component: MathJax */ + +mjx-container > svg { + display: block; + margin: auto; +} + +mjx-container { + padding: 0.5rem 0; +} + +mjx-container { + display: inline-block; + margin: auto 2px -2px; +} + +mjx-container > svg { + margin: auto; + display: inline-block; +} + +/** + * Colors links + * -------------------------------------------------------------------------- */ + + :root { + --vp-c-brand-1: rgb(136,109,156); + --vp-c-brand-2: rgb(136,109,156); + --vp-c-brand-3: rgb(136,109,156); + --vp-c-sponsor: #ca2971; + --vitest-c-sponsor-hover: #c13071; +} + +.dark { + --vp-c-brand-1: rgb(209, 110, 142); + --vp-c-brand-2: rgb(209, 110, 142); + --vp-c-brand-3: rgb(209, 110, 142); + --vp-c-sponsor: rgb(209, 110, 142); + --vitest-c-sponsor-hover: #e51370; +} \ No newline at end of file diff --git a/docs/src/assets/favicon.png b/docs/src/assets/favicon.png new file mode 100644 index 000000000..b8da18308 Binary files /dev/null and b/docs/src/assets/favicon.png differ diff --git a/docs/src/assets/logo.png b/docs/src/assets/logo.png new file mode 100644 index 000000000..806ccef09 Binary files /dev/null and b/docs/src/assets/logo.png differ diff --git a/docs/src/coverage_of_model_providers.md b/docs/src/coverage_of_model_providers.md new file mode 100644 index 000000000..e2d229f56 --- /dev/null +++ b/docs/src/coverage_of_model_providers.md @@ -0,0 +1,35 @@ +```@meta +CurrentModule = PromptingTools +``` + +# Coverage of Model Providers + +PromptingTools.jl routes AI calls through the use of subtypes of AbstractPromptSchema, which determine how data is formatted and where it is sent. (For example, OpenAI models have the corresponding subtype AbstractOpenAISchema, having the corresponding schemas - OpenAISchema, CustomOpenAISchema, etc.) This ensures that the data is correctly formatted for the specific AI model provider. + +Below is an overview of the model providers supported by PromptingTools.jl, along with the corresponding schema information. + +| Abstract Schema | Schema | Model Provider | aigenerate | aiembed | aiextract | aiscan | aiimage | aiclassify | +|-------------------------|---------------------------|----------------------------------------|------------|---------|-----------|--------|---------|------------| +| AbstractOpenAISchema | OpenAISchema | OpenAI | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| AbstractOpenAISchema | CustomOpenAISchema* | Any OpenAI-compatible API (eg, vLLM)* | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| AbstractOpenAISchema | LocalServerOpenAISchema** | Any OpenAI-compatible Local server** | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| AbstractOpenAISchema | MistralOpenAISchema | Mistral AI | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| AbstractOpenAISchema | DatabricksOpenAISchema | Databricks | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| AbstractOpenAISchema | FireworksOpenAISchema | Fireworks AI | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| AbstractOpenAISchema | TogetherOpenAISchema | Together AI | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| AbstractOpenAISchema | GroqOpenAISchema | Groq | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | +| AbstractOllamaSchema | OllamaSchema | Ollama (endpoint `api/chat`) | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | +| AbstractManagedSchema | AbstractOllamaManagedSchema | Ollama (endpoint `api/generate`) | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +| AbstractAnthropicSchema | AnthropicSchema | Anthropic | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | +| AbstractGoogleSchema | GoogleSchema | Google Gemini | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | + + +\* Catch-all implementation - Requires providing a `url` with `api_kwargs` and corresponding API key. + +\*\* This schema is a flavor of CustomOpenAISchema with a `url` key preset by global preference key `LOCAL_SERVER`. It is specifically designed for seamless integration with Llama.jl and utilizes an ENV variable for the URL, making integration easier in certain workflows, such as when nested calls are involved and passing `api_kwargs` is more challenging. + +**Note 1:** `aitools` has identical support as `aiextract` for all providers, as it has the API requirements. + +**Note 2:** The `aiscan` and `aiimage` functions rely on specific endpoints being implemented by the provider. Ensure that the provider you choose supports these functionalities. + +For more detailed explanations of the functions and schema information, refer to [How It Works](https://siml.earth/PromptingTools.jl/dev/how_it_works#ai*-Functions-Overview). \ No newline at end of file diff --git a/docs/src/diagrams/rag_diagram_detailed.png b/docs/src/diagrams/rag_diagram_detailed.png new file mode 100644 index 000000000..f50309c3c Binary files /dev/null and b/docs/src/diagrams/rag_diagram_detailed.png differ diff --git a/docs/src/diagrams/rag_diagram_highlevel.png b/docs/src/diagrams/rag_diagram_highlevel.png new file mode 100644 index 000000000..bb002bf75 Binary files /dev/null and b/docs/src/diagrams/rag_diagram_highlevel.png differ diff --git a/docs/src/examples/building_RAG.md b/docs/src/examples/building_RAG.md index 9108269de..9c56aa2b2 100644 --- a/docs/src/examples/building_RAG.md +++ b/docs/src/examples/building_RAG.md @@ -9,9 +9,11 @@ Let's build a Retrieval-Augmented Generation (RAG) chatbot, tailored to navigate If you're not familiar with "RAG", start with this [article](https://towardsdatascience.com/add-your-own-data-to-an-llm-using-retrieval-augmented-generation-rag-b1958bf56a5a). +Note: You must first import `LinearAlgebra`, `SparseArrays`, and `Unicode` to use this example! + ````julia -using LinearAlgebra, SparseArrays +using LinearAlgebra, SparseArrays, Unicode using PromptingTools using PromptingTools.Experimental.RAGTools ## Note: RAGTools module is still experimental and will change in the future. Ideally, they will be cleaned up and moved to a dedicated package @@ -57,7 +59,7 @@ What does it do? - [OPTIONAL] extracts any potential tags/filters from the question and applies them to filter down the potential candidates (use `extract_metadata=true` in `build_index`, you can also provide some filters explicitly via `tag_filter`) - [OPTIONAL] re-ranks the candidate chunks (define and provide your own `rerank_strategy`, eg Cohere ReRank API) - build a context from the closest chunks (use `chunks_window_margin` to tweak if we include preceding and succeeding chunks as well, see `?build_context` for more details) -- generate an answer from the closest chunks (use `return_context=true` to see under the hood and debug your application) +- generate an answer from the closest chunks (use `return_all=true` to see under the hood and debug your application) You should save the index for later to avoid re-embedding / re-extracting the document chunks! @@ -124,7 +126,7 @@ Let's evaluate this QA item with a "judge model" (often GPT-4 is used as a judge ````julia # Note: that we used the same question, but generated a different context and answer via `airag` -msg, ctx = airag(index; evals[1].question, return_context = true); +ctx = airag(index; evals[1].question, return_all = true); # ctx is a RAGContext object that keeps all intermediate states of the RAG pipeline for easy evaluation judged = aiextract(:RAGJudgeAnswerFromContext; ctx.context, @@ -173,17 +175,16 @@ Let's run each question & answer through our eval loop in async (we do it only f ````julia results = asyncmap(evals[1:10]) do qa_item # Generate an answer -- often you want the model_judge to be the highest quality possible, eg, "GPT-4 Turbo" (alias "gpt4t) - msg, ctx = airag(index; qa_item.question, return_context = true, - top_k = 3, verbose = false, model_judge = "gpt4t") + ctx = airag(index; qa_item.question, return_all = true, verbose = false) # Evaluate the response # Note: you can log key parameters for easier analysis later - run_qa_evals(qa_item, ctx; parameters_dict = Dict(:top_k => 3), verbose = false) + run_qa_evals(qa_item, ctx; parameters_dict = Dict(:top_k => 3), verbose = false, model_judge = "gpt4t") end ## Note that the "failed" evals can show as "nothing" (failed as in there was some API error or parsing error), so make sure to handle them. results = filter(x->!isnothing(x.answer_score), results); ```` -Note: You could also use the vectorized version `results = run_qa_evals(evals)` to evaluate all items at once. +Note: You could also use the vectorized version `results = run_qa_evals(index, evals)` to evaluate all items at once. ````julia diff --git a/docs/src/examples/readme_examples.md b/docs/src/examples/readme_examples.md index 08925e101..c82224537 100644 --- a/docs/src/examples/readme_examples.md +++ b/docs/src/examples/readme_examples.md @@ -1,6 +1,56 @@ # Various Examples -Noteworthy functions: `aigenerate`, `aiembed`, `aiclassify`, `aiextract`, `aitemplates` +## `ai*` Functions Overview + +Noteworthy functions: `aigenerate`, `aiembed`, `aiclassify`, `aiextract`, `aiscan`, `aiimage`, `aitemplates` + +All `ai*` functions have the same basic structure: + +`ai*(,; )`, + +but they differ in purpose: + +- `aigenerate` is the general-purpose function to generate any text response with LLMs, ie, it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`) +- `aiembed` is designed to extract embeddings from the AI model's response, ie, it returns `DataMessage` with field `:content` containing the embeddings (eg, `ans.content isa AbstractArray`) +- `aiextract` is designed to extract structured data from the AI model's response and return them as a Julia struct (eg, if we provide `return_type=Food`, we get `ans.content isa Food`). You need to define the return type first and then provide it as a keyword argument. +- `aiclassify` is designed to classify the input text into (or simply respond within) a set of discrete `choices` provided by the user. It can be very useful as an LLM Judge or a router for RAG systems, as it uses the "logit bias trick" and generates exactly 1 token. It returns `AIMessage` with field `:content`, but the `:content` can be only one of the provided `choices` (eg, `ans.content in choices`) +- `aiscan` is for working with images and vision-enabled models (as an input), but it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`) similar to `aigenerate`. +- `aiimage` is for generating images (eg, with OpenAI DALL-E 3). It returns a `DataMessage`, where the field `:content` might contain either the URL to download the image from or the Base64-encoded image depending on the user-provided kwarg `api_kwargs.response_format`. +- `aitemplates` is a helper function to discover available templates and see their details (eg, `aitemplates("some keyword")` or `aitemplates(:AssistantAsk)`) + +If you're using a known `model`, you do NOT need to provide a `schema` (the first argument). + +Optional keyword arguments in `ai*` tend to be: + +- `model::String` - Which model you want to use +- `verbose::Bool` - Whether you went to see INFO logs around AI costs +- `return_all::Bool` - Whether you want the WHOLE conversation or just the AI answer (ie, whether you want to include your inputs/prompt in the output) +- `api_kwargs::NamedTuple` - Specific parameters for the model, eg, `temperature=0.0` to be NOT creative (and have more similar output in each run) +- `http_kwargs::NamedTuple` - Parameters for the HTTP.jl package, eg, `readtimeout = 120` to time out in 120 seconds if no response was received. + +**Experimental: AgentTools** + +In addition to the above list of `ai*` functions, you can also use the **"lazy" counterparts** of these functions from the experimental AgentTools module. +```julia +using PromptingTools.Experimental.AgentTools +``` + +For example, `AIGenerate()` will create a lazy instance of `aigenerate`. It is an instance of `AICall` with `aigenerate` as its ai function. +It uses exactly the same arguments and keyword arguments as `aigenerate` (see `?aigenerate` for details). + +"lazy" refers to the fact that it does NOT generate any output when instantiated (only when `run!` is called). + +Or said differently, the `AICall` struct and all its flavors (`AIGenerate`, ...) are designed to facilitate a deferred execution model (lazy evaluation) for AI functions that interact with a Language Learning Model (LLM). It stores the necessary information for an AI call and executes the underlying AI function only when supplied with a `UserMessage` or when the `run!` method is applied. This allows us to remember user inputs and trigger the LLM call repeatedly if needed, which enables automatic fixing (see `?airetry!`). + +**Experimental: RAGTools** + +Lastly, we provide a set of tools to build RAG applications (Retrieve, Answer, Generate). + +It can be as simple as two calls: `build_index` and `airag` (Retrieve, Answer, Generate). + +If you then use pretty-printing with `PromptingTools.pprint`, we highlight the generated text vs text likely sourced from the context and we score how strongly is the generated answer supported by the context. +In addition, we annotate each generated chunk with a reference to which source document it likely came from (including the confidence score between 0 and 1). + ## Seamless Integration Into Your Workflow Google search is great, but it's a context switch. You often have to open a few pages and read through the discussion to find the answer you need. Same with the ChatGPT website. diff --git a/docs/src/examples/working_with_google_ai_studio.md b/docs/src/examples/working_with_google_ai_studio.md index d6b07e597..9ac2804d9 100644 --- a/docs/src/examples/working_with_google_ai_studio.md +++ b/docs/src/examples/working_with_google_ai_studio.md @@ -6,10 +6,10 @@ Get an API key from [here](https://ai.google.dev/). If you see a documentation p Save the API key in your environment as `GOOGLE_API_KEY`. -We'll need `GoogleGenAI.jl` package: +We'll need `GoogleGenAI` package: ````julia -using Pkg; Pkg.add(url="https://github.com/tylerjthomas9/GoogleGenAI.jl/") +using Pkg; Pkg.add("GoogleGenAI") ```` You can now use the Gemini-1.0-Pro model like any other model in PromptingTools. We **only support `aigenerate`** at the moment. @@ -38,7 +38,7 @@ AIMessage("Hi there! As a helpful AI assistant, I'm here to help you with any qu You could achieve the same with a string macro (notice the "gemini" at the end to specify which model to use): ````julia -@ai"Say hi!"gemini +ai"Say hi!"gemini ```` ### Advanced Prompts diff --git a/docs/src/extra_tools/agent_tools_intro.md b/docs/src/extra_tools/agent_tools_intro.md new file mode 100644 index 000000000..c6f77e61f --- /dev/null +++ b/docs/src/extra_tools/agent_tools_intro.md @@ -0,0 +1,100 @@ +```@meta +CurrentModule = PromptingTools.Experimental.AgentTools +``` + +# Agent Tools Introduction + +`AgentTools` is an experimental module that provides a set of utilities for building advanced agentic workflows, code-generating and self-fixing agents. + +Import the module as follows: + +```julia +using PromptingTools.Experimental.AgentTools +# to access unexported functionality +const AT = PromptingTools.Experimental.AgentTools +``` + +## Highlights + +The main functions to be aware of are: +- `AIGenerate` - Lazy counterpart of `aigenerate()`. All `ai*` functions have a corresponding `AI*::AICall` struct that allows for deferred execution (triggered by `run!` method). +- `last_output`, `last_message` - Simple utilities to access the last output and message of the AI calls like `AIGenerate`. +- `airetry!` - A utility to automatically retry the AI call with the same inputs if the AI model fails to generate a valid output. It allows retrying many times and providing feedback to the AI model about the failure to increase its robustness. `AIGenerate` and other AI calls have a field `config::RetryConfig` where you can globally adjust the retrying behavior. +- `print_samples` - `airetry!` implements a Monte Carlo Tree Search under the hood when trying to find the best way to fix the AI model's failure. `print_samples` is a utility to print the "samples" generated by the MCTS to better understand the attempts made by the AI model to fix the failure. +- `AICode` extensions like `aicodefixer_feedback` and `error_feedback` - `AICode` is a wrapper that extracts any Julia code provided in the `AIMessage` (response from the AI model) and executes it (including catch any errors). `aicodefixer_feedback` and `error_feedback` are utilities that automatically review an outcome of `AICode` evaluation and generate the corresponding feedback for the AI model. + + +The main contribution of this module is providing the "lazy" counterparts to the `ai...` functions, which allow us to build a workflow, which can be re-executed many times with the same inputs. + +For example, `AIGenerate()` will create a lazy instance of `aigenerate`, which is an instance of `AICall` with `aigenerate` as its ai-calling function. It uses exactly the same arguments and keyword arguments as `aigenerate` (see `?aigenerate` for details). The notion of "lazy" refers to the fact that it does NOT generate any output when instantiated (only when `run!` is called). + +Or said differently, the `AICall` struct and all its flavors (`AIGenerate`, ...) are designed to facilitate a deferred execution model (lazy evaluation) for AI functions that interact with a Language Learning Model (LLM). It stores the necessary information for an AI call and executes the underlying AI function only when supplied with a `UserMessage` or when the `run!` method is applied. This allows us to remember user inputs and trigger the LLM call repeatedly if needed, which enables automatic fixing (see `?airetry!`). + +## Examples + +### Automatic Fixing of AI Calls + +We need to switch from `aigenerate` to `AIGenerate` to get the lazy version of the function. +```julia +output = AIGenerate("Say hi!"; model="gpt4t") |> run! +``` + +How is it useful? We can use the same "inputs" for repeated calls, eg, when we want to validate +or regenerate some outputs. We have a function `airetry!` to help us with that. + +The signature of `airetry` is `airetry(condition_function, aicall::AICall, feedback_function)`. + +It evaluates the condition `condition_function` on the `aicall` object (eg, we evaluate `f_cond(aicall) -> Bool`). If it fails, we call `feedback_function` on the `aicall` object to provide feedback for the AI model (eg, `f_feedback(aicall) -> String`) and repeat the process until it passes or until `max_retries` value is exceeded. + +We can **catch API failures** (no feedback needed, so none is provided) +```julia +# API failure because of a non-existent model +# RetryConfig allows us to change the "retry" behaviour of any lazy call +output = AIGenerate("say hi!"; config = RetryConfig(; catch_errors = true), + model = "NOTEXIST") +run!(output) # fails + +# we ask to wait 2s between retries and retry 2 times (can be set in `config` in aicall as well) +airetry!(isvalid, output; retry_delay = 2, max_retries = 2) +``` + +Or we can use it for **output validation** (eg, its format, its content, etc.) and feedback generation. + +Let's play a color guessing game (I'm thinking "yellow"). We'll implement two formatting checks with `airetry!`: + +```julia +# Notice that we ask for two samples (`n_samples=2`) at each attempt (to improve our chances). +# Both guesses are scored at each time step, and the best one is chosen for the next step. +# And with OpenAI, we can set `api_kwargs = (;n=2)` to get both samples simultaneously (cheaper and faster)! +out = AIGenerate( + "Guess what color I'm thinking. It could be: blue, red, black, white, yellow. Answer with 1 word only"; + verbose = false, + config = RetryConfig(; n_samples = 2), api_kwargs = (; n = 2)) +run!(out) + +## Check that the output is 1 word only, third argument is the feedback that will be provided if the condition fails +## Notice: functions operate on `aicall` as the only argument. We can use utilities like `last_output` and `last_message` to access the last message and output in the conversation. +airetry!(x -> length(split(last_output(x), r" |\\.")) == 1, out, + "You must answer with 1 word only.") + +# Note: you could also use the do-syntax, eg, +airetry!(out, "You must answer with 1 word only.") do aicall + length(split(last_output(aicall), r" |\\.")) == 1 +end +``` + +You can even add the guessing itself as an `airetry!` condition of `last_output(out) == "yellow"` and provide feedback if the guess is wrong. + +## References + +```@docs; canonical=false +AIGenerate +AICall +last_output +last_message +airetry! +print_samples +AICode +aicodefixer_feedback +error_feedback +``` diff --git a/docs/src/extra_tools/api_tools_intro.md b/docs/src/extra_tools/api_tools_intro.md new file mode 100644 index 000000000..66e987dea --- /dev/null +++ b/docs/src/extra_tools/api_tools_intro.md @@ -0,0 +1,25 @@ +```@meta +CurrentModule = PromptingTools.Experimental.APITools +``` + +# APITools Introduction + +`APITools` is an experimental module wrapping helpful APIs for working with and enhancing GenerativeAI models. + +Import the module as follows: + +```julia +using PromptingTools.Experimental.APITools +``` + +## Highlights + +Currently, there is only one function in this module `create_websearch` that leverages [Tavily.com](https://tavily.com/) search and answer engine to provide additional context. + +You need to sign up for an API key at [Tavily.com](https://tavily.com/) and set it as an environment variable `TAVILY_API_KEY` to use this function. + +## References + +```@docs; canonical=false +create_websearch +``` diff --git a/docs/src/extra_tools/rag_tools_intro.md b/docs/src/extra_tools/rag_tools_intro.md new file mode 100644 index 000000000..123479f54 --- /dev/null +++ b/docs/src/extra_tools/rag_tools_intro.md @@ -0,0 +1,311 @@ +```@meta +CurrentModule = PromptingTools.Experimental.RAGTools +``` + +# RAG Tools Introduction + +`RAGTools` is an experimental module that provides a set of utilities for building Retrieval-Augmented Generation (RAG) applications, ie, applications that generate answers by combining knowledge of the underlying AI model with the information from the user's knowledge base. + +It is designed to be powerful and flexible, allowing you to build RAG applications with minimal effort. Extend any step of the pipeline with your own custom code (see the [RAG Interface](@ref) section), or use the provided defaults to get started quickly. + +Once the API stabilizes (near term), we hope to carve it out into a separate package. + +Import the module as follows: + +```julia +# required dependencies to load the necessary extensions!!! +using LinearAlgebra, SparseArrays, Unicode, Snowball +using PromptingTools.Experimental.RAGTools +# to access unexported functionality +const RT = PromptingTools.Experimental.RAGTools +``` + + +## Highlights + +The main functions to be aware of are: +- `build_index` to build a RAG index from a list of documents (type `ChunkIndex`) +- `airag` to generate answers using the RAG model on top of the `index` built above + - `retrieve` to retrieve relevant chunks from the index for a given question + - `generate!` to generate an answer from the retrieved chunks +- `annotate_support` to highlight which parts of the RAG answer are supported by the documents in the index vs which are generated by the model, it is applied automatically if you use pretty printing with `pprint` (eg, `pprint(result)`) +- `build_qa_evals` to build a set of question-answer pairs for evaluation of the RAG model from your corpus + +The hope is to provide a modular and easily extensible set of tools for building RAG applications in Julia. Feel free to open an issue or ask in the `#generative-ai` channel in the JuliaLang Slack if you have a specific need. + +## Examples + +Let's build an index, we need to provide a starter list of documents: +```julia +sentences = [ + "Find the most comprehensive guide on Julia programming language for beginners published in 2023.", + "Search for the latest advancements in quantum computing using Julia language.", + "How to implement machine learning algorithms in Julia with examples.", + "Looking for performance comparison between Julia, Python, and R for data analysis.", + "Find Julia language tutorials focusing on high-performance scientific computing.", + "Search for the top Julia language packages for data visualization and their documentation.", + "How to set up a Julia development environment on Windows 10.", + "Discover the best practices for parallel computing in Julia.", + "Search for case studies of large-scale data processing using Julia.", + "Find comprehensive resources for mastering metaprogramming in Julia.", + "Looking for articles on the advantages of using Julia for statistical modeling.", + "How to contribute to the Julia open-source community: A step-by-step guide.", + "Find the comparison of numerical accuracy between Julia and MATLAB.", + "Looking for the latest Julia language updates and their impact on AI research.", + "How to efficiently handle big data with Julia: Techniques and libraries.", + "Discover how Julia integrates with other programming languages and tools.", + "Search for Julia-based frameworks for developing web applications.", + "Find tutorials on creating interactive dashboards with Julia.", + "How to use Julia for natural language processing and text analysis.", + "Discover the role of Julia in the future of computational finance and econometrics." +] +``` + +Let's index these "documents": + +```julia +index = build_index(sentences; chunker_kwargs=(; sources=map(i -> "Doc$i", 1:length(sentences)))) +``` + +This would be equivalent to the following `index = build_index(SimpleIndexer(), sentences)` which dispatches to the default implementation of each step via the `SimpleIndexer` struct. We provide these default implementations for the main functions as an optional argument - no need to provide them if you're running the default pipeline. + +Notice that we have provided a `chunker_kwargs` argument to the `build_index` function. These will be kwargs passed to `chunker` step. + +Now let's generate an answer to a question. + +1. Run end-to-end RAG (retrieve + generate!), return `AIMessage` +```julia +question = "What are the best practices for parallel computing in Julia?" + +msg = airag(index; question) # short for airag(RAGConfig(), index; question) +## Output: +## [ Info: Done with RAG. Total cost: \$0.0 +## AIMessage("Some best practices for parallel computing in Julia include us... +``` + +2. Explore what's happening under the hood by changing the return type - `RAGResult` contains all intermediate steps. +```julia +result = airag(index; question, return_all=true) +## RAGResult +## question: String "What are the best practices for parallel computing in Julia?" +## rephrased_questions: Array{String}((1,)) +## answer: SubString{String} +## final_answer: SubString{String} +## context: Array{String}((5,)) +## sources: Array{String}((5,)) +## emb_candidates: CandidateChunks{Int64, Float32} +## tag_candidates: CandidateChunks{Int64, Float32} +## filtered_candidates: CandidateChunks{Int64, Float32} +## reranked_candidates: CandidateChunks{Int64, Float32} +## conversations: Dict{Symbol, Vector{<:PromptingTools.AbstractMessage}} +``` + +You can still get the message from the result, see `result.conversations[:final_answer]` (the dictionary keys correspond to the function names of those steps). + + +3. If you need to customize it, break the pipeline into its sub-steps: retrieve and generate - RAGResult serves as the intermediate result. +```julia +# Retrieve which chunks are relevant to the question +result = retrieve(index, question) +# Generate an answer +result = generate!(index, result) +``` + +You can leverage a pretty-printing system with `pprint` where we automatically annotate the support of the answer by the chunks we provided to the model. +It is configurable and you can select only some of its functions (eg, scores, sources). + +```julia +pprint(result) +``` + +You'll see the following in REPL but with COLOR highlighting in the terminal. + +```plaintext +-------------------- +QUESTION(s) +-------------------- +- What are the best practices for parallel computing in Julia? + +-------------------- +ANSWER +-------------------- +Some of the best practices for parallel computing in Julia include:[1,0.7] +- Using [3,0.4]`@threads` for simple parallelism[1,0.34] +- Utilizing `Distributed` module for more complex parallel tasks[1,0.19] +- Avoiding excessive memory allocation +- Considering task granularity for efficient workload distribution + +-------------------- +SOURCES +-------------------- +1. Doc8 +2. Doc15 +3. Doc5 +4. Doc2 +5. Doc9 +``` + +See `?print_html` for the HTML version of the pretty-printing and styling system, eg, when you want to display the results in a web application based on Genie.jl/Stipple.jl. + +**How to read the output** +- Color legend: + - No color: High match with the context, can be trusted more + - Blue: Partial match against some words in the context, investigate + - Magenta (Red): No match with the context, fully generated by the model +- Square brackets: The best matching context ID + Match score of the chunk (eg, `[3,0.4]` means the highest support for the sentence is from the context chunk number 3 with a 40% match). + +Want more? + +See `examples/building_RAG.jl` for one more example. + +## RAG Interface + +### System Overview + +This system is designed for information retrieval and response generation, structured in three main phases: +- Preparation, when you create an instance of `AbstractIndex` +- Retrieval, when you surface the top most relevant chunks/items in the `index` and return `AbstractRAGResult`, which contains the references to the chunks (`AbstractCandidateChunks`) +- Generation, when you generate an answer based on the context built from the retrieved chunks, return either `AIMessage` or `AbstractRAGResult` + +The corresponding functions are `build_index`, `retrieve`, and `generate!`, respectively. +Here is the high-level diagram that shows the signature of the main functions: + +![RAG Diagram High-level](../diagrams/rag_diagram_highlevel.png) + +Notice that the first argument is a custom type for multiple dispatch. +In addition, observe the "kwargs" names, that's how the keyword arguments for each function are passed down from the higher-level functions (eg, `build_index(...; chunker_kwargs=(; separators=...)))`). It's the simplest way to customize some step of the pipeline (eg, set a custom model with a `model` kwarg or prompt template with `template` kwarg). + +The system is designed to be hackable and extensible at almost every entry point. +If you want to customize the behavior of any step, you can do so by defining a new type and defining a new method for the step you're changing, eg, +```julia +PromptingTools.Experimental.RAGTools: rerank + +struct MyReranker <: AbstractReranker end +rerank(::MyReranker, index, candidates) = ... +``` +And then you would set the `retrive` step to use your custom `MyReranker` via `reranker` kwarg, eg, `retrieve(....; reranker = MyReranker())` (or customize the main dispatching `AbstractRetriever` struct). + +The overarching principles are: +- Always dispatch / customize the behavior by defining a new `Struct` and the corresponding method for the existing functions (eg, `rerank` function for the re-ranking step). +- Custom types are provided as the first argument (the high-level functions will work without them as we provide some defaults). +- Custom types do NOT have any internal fields or DATA (with the exception of managing sub-steps of the pipeline like `AbstractRetriever` or `RAGConfig`). +- Additional data should be passed around as keyword arguments (eg, `chunker_kwargs` in `build_index` to pass data to the chunking step). The intention was to have some clearly documented default values in the docstrings of each step + to have the various options all in one place. + +### RAG Diagram + +![RAG Diagram Detailed](../diagrams/rag_diagram_detailed.png) + +**The main functions are**: + +Prepare your document index with `build_index`: +- signature: `(indexer::AbstractIndexBuilder, files_or_docs::Vector{<:AbstractString}) -> AbstractChunkIndex` +- flow: `get_chunks` -> `get_embeddings` -> `get_tags` -> `build_tags` +- dispatch types: `AbstractIndexBuilder`, `AbstractChunker`, `AbstractEmbedder`, `AbstractTagger` + +Run E2E RAG with `airag`: +- signature: `(cfg::AbstractRAGConfig, index::AbstractChunkIndex; question::AbstractString)` -> `AIMessage` or `AbstractRAGResult` +- flow: `retrieve` -> `generate!` +- dispatch types: `AbstractRAGConfig`, `AbstractRetriever`, `AbstractGenerator` + +Retrieve relevant chunks with `retrieve`: +- signature: `(retriever::AbstractRetriever, index::AbstractChunkIndex, question::AbstractString) -> AbstractRAGResult` +- flow: `rephrase` -> `get_embeddings` -> `find_closest` -> `get_tags` -> `find_tags` -> `rerank` +- dispatch types: `AbstractRAGConfig`, `AbstractRephraser`, `AbstractEmbedder`, `AbstractSimilarityFinder`, `AbstractTagger`, `AbstractTagFilter`, `AbstractReranker` + +Generate an answer from relevant chunks with `generate!`: +- signature: `(generator::AbstractGenerator, index::AbstractChunkIndex, result::AbstractRAGResult)` -> `AIMessage` or `AbstractRAGResult` +- flow: `build_context!` -> `answer!` -> `refine!` -> `postprocess!` +- dispatch types: `AbstractGenerator`, `AbstractContextBuilder`, `AbstractAnswerer`, `AbstractRefiner`, `AbstractPostprocessor` + +To discover the currently available implementations, use `subtypes` function, eg, `subtypes(AbstractReranker)`. + +#### Passing Keyword Arguments +If you need to pass keyword arguments, use the nested kwargs corresponding to the dispatch type names (`rephrase` step, has `rephraser` dispatch type and `rephraser_kwargs` for its keyword arguments). + +For example: + +```julia +cfg = RAGConfig(; retriever = AdvancedRetriever()) + +# kwargs will be big and nested, let's prepare them upfront +# we specify "custom" model for each component that calls LLM +kwargs = ( + retriever = AdvancedRetriever(), + retriever_kwargs = (; + top_k = 100, + top_n = 5, + # notice that this is effectively: retriever_kwargs/rephraser_kwargs/template + rephraser_kwargs = (; + template = :RAGQueryHyDE, + model = "custom")), + generator_kwargs = (; + # pass kwargs to `answer!` step defined by the `answerer` -> we're setting `answerer_kwargs` + answerer_kwargs = (; + model = "custom"), + # api_kwargs can be shared across all components + api_kwargs = (; + url = "http://localhost:8080"))) + +result = airag(cfg, index, question; kwargs...) +``` + +If you were one level deeper in the pipeline, working with retriever directly, you would pass: + +```julia +retriever_kwargs = (; + top_k = 100, + top_n = 5, + # notice that this is effectively: rephraser_kwargs/template + rephraser_kwargs = (; + template = :RAGQueryHyDE, + model = "custom"), + # api_kwargs can be shared across all components + api_kwargs = (; + url = "http://localhost:8080")) + +result = retrieve(AdvancedRetriever(), index, question; retriever_kwargs...) +``` + +And going even deeper, you would provide the `rephraser_kwargs` directly to the `rephrase` step, eg, +```julia +rephrase(SimpleRephraser(), question; model="custom", template = :RAGQueryHyDE, api_kwargs = (; url = "http://localhost:8080")) +``` + +### Deepdive + +**Preparation Phase:** +- Begins with `build_index`, which creates a user-defined index type from an abstract chunk index using specified dels and function strategies. +- `get_chunks` then divides the indexed data into manageable pieces based on a chunking strategy. +- `get_embeddings` generates embeddings for each chunk using an embedding strategy to facilitate similarity arches. +- Finally, `get_tags` extracts relevant metadata from each chunk, enabling tag-based filtering (hybrid search index). If there are `tags` available, `build_tags` is called to build the corresponding sparse matrix for filtering with tags. + +**Retrieval Phase:** +- The `retrieve` step is intended to find the most relevant chunks in the `index`. +- `rephrase` is called first, if we want to rephrase the query (methods like `HyDE` can improve retrieval quite a bit)! +- `get_embeddings` generates embeddings for the original + rephrased query +- `find_closest` looks up the most relevant candidates (`CandidateChunks`) using a similarity search strategy. +- `get_tags` extracts the potential tags (can be provided as part of the `airag` call, eg, when we want to use only some small part of the indexed chunks) +- `find_tags` filters the candidates to strictly match _at least one_ of the tags (if provided) +- `rerank` is called to rerank the candidates based on the reranking strategy (ie, to improve the ordering of the chunks in context). + +**Generation Phase:** +- The `generate!` step is intended to generate a response based on the retrieved chunks, provided via `AbstractRAGResult` (eg, `RAGResult`). +- `build_context!` constructs the context for response generation based on a context strategy and applies the necessary formatting +- `answer!` generates the response based on the context and the query +- `refine!` is called to refine the response (optional, defaults to passthrough) +- `postprocessing!` is available for any final touches to the response or to potentially save or format the results (eg, automatically save to the disk) + +Note that all generation steps are mutating the `RAGResult` object. + +See more details and corresponding functions and types in `src/Experimental/RAGTools/rag_interface.jl`. + +## References + +```@docs; canonical=false +build_index +airag +retrieve +generate! +annotate_support +build_qa_evals +``` diff --git a/docs/src/extra_tools/text_utilities_intro.md b/docs/src/extra_tools/text_utilities_intro.md new file mode 100644 index 000000000..9cd747884 --- /dev/null +++ b/docs/src/extra_tools/text_utilities_intro.md @@ -0,0 +1,43 @@ +```@meta +CurrentModule = PromptingTools +``` + +# Text Utilities + +Working with Generative AI (and in particular with the text modality), requires a lot of text manipulation. PromptingTools.jl provides a set of utilities to make this process easier and more efficient. + + +## Highlights + +The main functions to be aware of are +- `recursive_splitter` to split the text into sentences and words (of a desired length `max_length`) +- `replace_words` to mask some sensitive words in your text before sending it to AI +- `wrap_string` for wrapping the text into a desired length by adding newlines (eg, to fit some large text into your terminal width) +- `length_longest_common_subsequence` to find the length of the longest common subsequence between two strings (eg, to compare the similarity between the context provided and generated text) +- `distance_longest_common_subsequence` a companion utility for `length_longest_common_subsequence` to find the normalized distance between two strings. Always returns a number between 0-1, where 0 means the strings are identical and 1 means they are completely different. + +You can import them simply via: +```julia +using PromptingTools: recursive_splitter, replace_words, wrap_string, length_longest_common_subsequence, distance_longest_common_subsequence +``` + +There are many more (especially in the AgentTools and RAGTools experimental modules)! + +RAGTools module contains the following text utilities: +- `split_into_code_and_sentences` to split a string into code and sentences +- `tokenize` to tokenize a string (eg, a sentence) into words +- `trigrams` to generate trigrams from a string (eg, a word) +- `text_to_trigrams` to generate trigrams from a larger string (ie, effectively wraps the three functions above) +- `STOPWORDS` a set of common stopwords (very brief) + +Feel free to open an issue or ask in the `#generative-ai` channel in the JuliaLang Slack if you have a specific need. + +## References + +```@docs; canonical=false +recursive_splitter +replace_words +wrap_string +length_longest_common_subsequence +distance_longest_common_subsequence +``` diff --git a/docs/src/frequently_asked_questions.md b/docs/src/frequently_asked_questions.md index 5f34172c3..5e6c3ea54 100644 --- a/docs/src/frequently_asked_questions.md +++ b/docs/src/frequently_asked_questions.md @@ -8,6 +8,13 @@ There will be situations not or cannot use it (eg, privacy, cost, etc.). In that Note: To get started with [Ollama.ai](https://ollama.ai/), see the [Setup Guide for Ollama](#setup-guide-for-ollama) section below. +### What if I cannot access OpenAI? + +There are many alternatives: + +- **Other APIs**: MistralAI, Anthropic, Google, Together, Fireworks, Voyager (the latter ones tend to give free credits upon joining!) +- **Locally-hosted models**: Llama.cpp/Llama.jl, Ollama, vLLM (see the examples and the corresponding docs) + ## Data Privacy and OpenAI At the time of writing, OpenAI does NOT use the API calls for training their models. @@ -54,6 +61,35 @@ The solution is to force a new precompilation, so you can do any of the below: 2) Update the PromptingTools package (runs precompilation automatically) 3) Delete your compiled cache in `.julia` DEPOT (usually `.julia/compiled/v1.10/PromptingTools`). You can do it manually in the file explorer or via Julia REPL: `rm("~/.julia/compiled/v1.10/PromptingTools", recursive=true, force=true)` +## Getting an error "Rate limit exceeded" from OpenAI? + +Have you opened a new account recently? It is quite likely that you've exceeded the free tier limits. + +OpenAI has a rate limit on the number of requests and the number of tokens you can make in a given period. If you exceed either of these, you will receive a "Rate limit exceeded" error. +"Free tier" (ie, before you pay the first 5 USD) has very low limits, eg, maximum of 3 requests per minute. See the [OpenAI Rate Limits](https://platform.openai.com/docs/guides/rate-limits/usage-tiers?context=tier-free) for more information. + +If you look at the HTTP response headers in the error, you can see the limits remaining and how long until it resets, eg, `x-ratelimit-remaining-*` and `x-ratelimit-reset-*`. + +If you want to avoid this error, you have two options: + +1) Put a simple `sleep(x)` after every request, where `x` is calculated so that the number of your requests stays below the limit. +2) Use `ntasks` keyword argument in `asyncmap` to limit the number of concurrent requests. Eg, let's assume you want to process 100x c. 10,000 tokens, but your tier limit is only 60,000 tokens per minute. + If we know that one request takes c. 10 seconds, it means that with `ntasks=1` we would send 6 requests per minute, which already maxes out our limit. + If we set `ntasks=2`, we could process 12 requests per minute, so we would need our limit to be 120,000 tokens per minute. + ```julia + # simple asyncmap loop with 2 concurrent requests; otherwise, same syntax as `map` + asyncmap(my_prompts; ntasks=2) do prompt + aigenerate(prompt) + end + ``` + +## Getting the error "429 Too Many Requests"? +Assuming you have not just sent hundreds of requests, this error might be related to insufficient "credits" in your account balance. + +See the error message. If it says "You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors", you'll need to re-charge your account balance. Visit [Billing overview](https://platform.openai.com/settings/organization/billing/overview). + +Please note that, unlike ChatGPT, OpenAI API is NOT free. However, individual requests are extremely cheap (eg, tenth of a cent), so if you charge $5, it might last you up to hundreds of requests (depending on the models and prompts). + ## Setting OpenAI Spending Limits OpenAI allows you to set spending limits directly on your account dashboard to prevent unexpected costs. @@ -66,7 +102,7 @@ A good start might be a soft limit of c.$5 and a hard limit of c.$10 - you can a Resources: - [OpenAI Forum](https://community.openai.com/t/how-to-set-a-price-limit/13086) -### How much does it cost? Is it worth paying for? +## How much does it cost? Is it worth paying for? If you use a local model (eg, with Ollama), it's free. If you use any commercial APIs (eg, OpenAI), you will likely pay per "token" (a sub-word unit). @@ -83,6 +119,33 @@ Assuming the price per call was $0.0001, you'd pay 2 cents for the job and save Resources: - [OpenAI Pricing per 1000 tokens](https://openai.com/pricing) +## How to try new OpenAI models if I'm not Tier 5 customer? + +As of September 2024, you cannot access the new o1 models via API unless you're a Tier 5 customer. + +Fortunately, you can use OpenRouter to access these new models. + +1) Get your API key from [OpenRouter](https://openrouter.ai/keys) +2) Add some minimum [Credits](https://openrouter.ai/credits) to the account (eg, $5). +3) Set it as an environment variable (or use local preferences): `ENV["OPENROUTER_API_KEY"] = ""` +4) Use the model aliases with `or` prefix, eg, `oro1` for o1-preview or `oro1m` for o1-mini. + +Example: +```julia +# Let's use o1-preview model hosted on OpenRouter ("or" prefix) +msg = aigenerate("What is the meaning of life?"; model="oro1") +``` + +Note: There are some quirks for the o1 models. +For example, the new o1 series does NOT support `SystemMessage` yet, so OpenRouter does some tricks (likely converting them to normal user messages). +To be in control of this behavior and have comparable behavior to the native OpenAI API, you can use kwarg `no_system_message=true` in `aigenerate` to ensure OpenRouter does not do any tricks. + +Example: +```julia +# Let's use o1-mini and disable adding automatic system message +msg = aigenerate("What is the meaning of life?"; model="oro1m", no_system_message=true) +``` + ## Configuring the Environment Variable for API Key This is a guide for OpenAI's API key, but it works for any other API key you might need (eg, `MISTRALAI_API_KEY` for MistralAI API). @@ -111,7 +174,7 @@ Resources: You can also set the API key in `LocalPreferences.toml`, so it persists across sessions and projects. -Use: `PromptingTools.set_preferences!("OPENAI_API_KEY"="your-api-key")` +Use: `PromptingTools.set_preferences!("OPENAI_API_KEY"=>"your-api-key")` To double-check, run `PromptingTools.get_preferences("OPENAI_API_KEY")` and you should see your key! @@ -166,6 +229,19 @@ There are three ways how you can customize your workflows (especially when you u 2) Register your model and its associated schema (`PT.register_model!(; name="123", schema=PT.OllamaSchema())`). You won't have to specify the schema anymore only the model name. See [Working with Ollama](#working-with-ollama) for more information. 3) Override your default model (`PT.MODEL_CHAT`) and schema (`PT.PROMPT_SCHEMA`). It can be done persistently with Preferences, eg, `PT.set_preferences!("PROMPT_SCHEMA" => "OllamaSchema", "MODEL_CHAT"=>"llama2")`. +## Using Custom API Providers like Azure or Databricks + +Several providers are directly supported (eg, Databricks), check the available prompt schemas (eg, `subtypes(PT.AbstractOpenAISchema)`). + +If you need a custom URL or a few keyword parameters, refer to the implementation of DatabricksOpenAISchema. +You effectively need to create your own prompt schema (`struct MySchema <: PT.AbstractOpenAISchema`) and override the OpenAI.jl behavior. The easiest way is to provide your custom method for `OpenAI.create_chat` and customize the `url`, `api_key`, and other `kwargs` fields. +You can follow the implementation of `create_chat` for `DatabricksOpenAISchema` in `src/llm_openAI.jl`. + +Once your schema is ready, you can register the necessary models via `PT.register_model!(; name="myschema", schema=MySchema())`. +You can also add aliases for easier access (eg, `PT.MODEL_ALIASES["mymodel"] = "my-model-with-really-long-name"`). + +If you would like to use some heavily customized API, eg, your company's internal LLM proxy (to change headers, URL paths, etc.), refer to the example `examples/adding_custom_API.jl` in the repo. + ## How to have Multi-turn Conversations? Let's say you would like to respond back to a model's response. How to do it? @@ -201,134 +277,289 @@ conversation = aigenerate("What's my name?"; return_all=true, conversation) ``` Notice that the last message is the response to the second request, but with `return_all=true` we can see the whole conversation from the beginning. -## Explain What Happens Under the Hood +## How to have typed responses? + +Our responses are always in `AbstractMessage` types to ensure we can also handle downstream processing, error handling, and self-healing code (see `airetry!`). -4 Key Concepts/Objects: -- Schemas -> object of type `AbstractPromptSchema` that determines which methods are called and, hence, what providers/APIs are used -- Prompts -> the information you want to convey to the AI model -- Messages -> the basic unit of communication between the user and the AI model (eg, `UserMessage` vs `AIMessage`) -- Prompt Templates -> re-usable "prompts" with placeholders that you can replace with your inputs at the time of making the request +A good use case for a typed response is when you have a complicated control flow and would like to group and handle certain outcomes differently. You can easily do it as an extra step after the response is received. -When you call `aigenerate`, roughly the following happens: `render` -> `UserMessage`(s) -> `render` -> `OpenAI.create_chat` -> ... -> `AIMessage`. +Trivially, we can use `aiclassifier` for Bool statements, eg, +```julia +# We can do either +mybool = tryparse(Bool, aiclassify("Is two plus two four?")) isa Bool # true + +# or simply check equality +msg = aiclassify("Is two plus two four?") # true +mybool = msg.content == "true" +``` + +Now a more complicated example with multiple categories mapping to an enum: +```julia +choices = [("A", "any animal or creature"), ("P", "for any plant or tree"), ("O", "for everything else")] -We'll deep dive into an example in the end. +# Set up the return types we want +@enum Categories A P O +string_to_category = Dict("A" => A, "P" => P,"O" => O) -### Schemas +# Run an example +input = "spider" +msg = aiclassify(:InputClassifier; choices, input) -For your "message" to reach an AI model, it needs to be formatted and sent to the right place. +mytype = string_to_category[msg.content] # A (for animal) +``` +How does it work? `aiclassify` guarantees to output one of our choices (and it handles some of the common quirks)! -We leverage the multiple dispatch around the "schemas" to pick the right logic. -All schemas are subtypes of `AbstractPromptSchema` and there are many subtypes, eg, `OpenAISchema <: AbstractOpenAISchema <:AbstractPromptSchema`. +How would we achieve the same with `aigenerate` and arbitrary struct? +We need to use the "lazy" `AIGenerate` struct and `airetry!` to ensure we get the response and then we can process it further. -For example, if you provide `schema = OpenAISchema()`, the system knows that: -- it will have to format any user inputs to OpenAI's "message specification" (a vector of dictionaries, see their API documentation). Function `render(OpenAISchema(),...)` will take care of the rendering. -- it will have to send the message to OpenAI's API. We will use the amazing `OpenAI.jl` package to handle the communication. +`AIGenerate` has two fields you should know about: +- `conversation` - eg, the vector of "messages" in the current conversation (same as what you get from `aigenerate` with `return_all=true`) +- `success` - a boolean flag if the request was successful AND if it passed any subsequent `airetry!` calls -### Prompts +Let's mimic a case where our "program" should return one of three types: `SmallInt`, `LargeInt`, `FailedResponse`. -Prompt is loosely the information you want to convey to the AI model. It can be a question, a statement, or a command. It can have instructions or some context, eg, previous conversation. +We first need to define our custom types: +```julia -You need to remember that Large Language Models (LLMs) are **stateless**. They don't remember the previous conversation/request, so you need to provide the whole history/context every time (similar to how REST APIs work). +# not needed, just to show a fully typed example +abstract type MyAbstractResponse end +struct SmallInt <: MyAbstractResponse + number::Int +end +struct LargeInt <: MyAbstractResponse + number::Int +end +struct FailedResponse <: MyAbstractResponse + content::String +end +``` -Prompts that we send to the LLMs are effectively a sequence of messages (`<:AbstractMessage`). +Let's define our "program" as a function to be cleaner. Notice that we use `AIGenerate` and `airetry!` to ensure we get the response and then we can process it further. -### Messages +```julia +using PromptingTools.Experimental.AgentTools + +function give_me_number(prompt::String)::MyAbstractResponse + # Generate the response + response = AIGenerate(prompt; config=RetryConfig(;max_retries=2)) |> run! + + # Check if it's parseable as Int, if not, send back to be fixed + # syntax: airetry!(CONDITION-TO-CHECK, , FEEDBACK-TO-MODEL) + airetry!(x->tryparse(Int,last_output(x))|>!isnothing, response, "Wrong output format! Answer with digits and nothing else. The number is:") + + if response.success != true + ## we failed to generate a parseable integer + return FailedResponse("I failed to get the response. Last output: $(last_output(response))") + end + number = tryparse(Int,last_output(response)) + return number < 1000 ? SmallInt(number) : LargeInt(number) +end + +give_me_number("How many car seats are in Porsche 911T?") +## [ Info: Condition not met. Retrying... +## [ Info: Condition not met. Retrying... +## SmallInt(2) +``` -Messages are the basic unit of communication between the user and the AI model. +We ultimately received our custom type `SmallInt` with the number of car seats in the Porsche 911T (I hope it's correct!). -There are 5 main types of messages (`<:AbstractMessage`): +If you want to access the full conversation history (all the attempts and feedback), simply output the `response` object and explore `response.conversation`. -- `SystemMessage` - this contains information about the "system", eg, how it should behave, format its output, etc. (eg, `You're a world-class Julia programmer. You write brief and concise code.) -- `UserMessage` - the information "from the user", ie, your question/statement/task -- `UserMessageWithImages` - the same as `UserMessage`, but with images (URLs or Base64-encoded images) -- `AIMessage` - the response from the AI model, when the "output" is text -- `DataMessage` - the response from the AI model, when the "output" is data, eg, embeddings with `aiembed` or user-defined structs with `aiextract` +## How to quickly create a prompt template? -### Prompt Templates +Many times, you will want to create a prompt template that you can reuse with different inputs (eg, to create templates for AIHelpMe or LLMTextAnalysis). -We want to have re-usable "prompts", so we provide you with a system to retrieve pre-defined prompts with placeholders (eg, `{{name}}`) that you can replace with your inputs at the time of making the request. +Previously, you would have to create a vector of `SystemMessage` and `UserMessage` objects and then save it to a disk and reload. +Now, you can use the `create_template` function to do it for you. It's designed for quick prototyping, so it skips the serialization step and loads it directly into the template store (ie, you can use it like any other templates - try `aitemplates()` search). -"AI Templates" as we call them (`AITemplate`) are usually a vector of `SystemMessage` and a `UserMessage` with specific purpose/task. +The syntax is simple: `create_template(;user=, system=, load_as=