From ac751a9556679ccf953f41545af7a14b922278ed Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Mon, 26 Feb 2024 20:47:34 +0000 Subject: [PATCH 001/115] Revert "update deps" This reverts commit ff4aee096a018010cea8859bd17ae7faee80144a. --- CHANGELOG.md | 5 +---- Project.toml | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 48d9e4c66..d2d831fc6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.13.0] ### Added -- Added initial support for Google Gemini models for `aigenerate` (requires environment variable `GOOGLE_API_KEY` and package [GoogleGenAI.jl](https://github.com/tylerjthomas9/GoogleGenAI.jl) to be loaded). It must be imported explicitly because it's not registered yet. +- Added initial support for Google Gemini models for `aigenerate` (requires environment variable `GOOGLE_API_KEY` and package [GoogleGenAI.jl](https://github.com/tylerjthomas9/GoogleGenAI.jl) to be loaded). - Added a utility to compare any two string sequences (and other iterators)`length_longest_common_subsequence`. It can be used to fuzzy match strings (eg, detecting context/sources in an AI-generated response or fuzzy matching AI response to some preset categories). See the docstring for more information `?length_longest_common_subsequence`. - Rewrite of `aiclassify` to classify into an arbitrary list of categories (including with descriptions). It's a quick and easy option for "routing" and similar use cases, as it exploits the logit bias trick and outputs only 1 token. Currently, only `OpenAISchema` is supported. See `?aiclassify` for more information. - Initial support for multiple completions in one request for OpenAI-compatible API servers. Set via API kwarg `n=5` and it will request 5 completions in one request, saving the network communication time and paying the prompt tokens only once. It's useful for majority voting, diversity, or challenging agentic workflows. @@ -30,9 +30,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Updated - Updated names of endpoints and prices of Mistral.ai models as per the [latest announcement](https://mistral.ai/technology/#models) and [pricing](https://docs.mistral.ai/platform/pricing/). Eg, `mistral-small` -> `mistral-small-latest`. In addition, the latest Mistral model has been added `mistral-large-latest` (aliased as `mistral-large` and `mistrall`, same for the others). `mistral-small-latest` and `mistral-large-latest` now support function calling, which means they will work with `aiextract` (You need to explicitly provide `tool_choice`, see the docs `?aiextract`). -## Removed -- Removed package extension for GoogleGenAI.jl, as it's not yet registered. Users must load the code manually for now. - ## [0.12.0] ### Added diff --git a/Project.toml b/Project.toml index 95dc8ce2d..d774f22bc 100644 --- a/Project.toml +++ b/Project.toml @@ -17,11 +17,13 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [weakdeps] +GoogleGenAI = "903d41d1-eaca-47dd-943b-fee3930375ab" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [extensions] +GoogleGenAIPromptingToolsExt = ["GoogleGenAI"] MarkdownPromptingToolsExt = ["Markdown"] RAGToolsExperimentalExt = ["SparseArrays", "LinearAlgebra"] @@ -29,6 +31,7 @@ RAGToolsExperimentalExt = ["SparseArrays", "LinearAlgebra"] AbstractTrees = "0.4" Aqua = "0.7" Base64 = "<0.0.1, 1" +GoogleGenAI = "0.1.0" HTTP = "1" JSON3 = "1" LinearAlgebra = "<0.0.1, 1" From 423faefee48c0834fce9836aad2a7845f41475ea Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Mon, 26 Feb 2024 20:49:36 +0000 Subject: [PATCH 002/115] remove GoogleGenAI (#83) --- CHANGELOG.md | 5 ++++- Project.toml | 3 --- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2d831fc6..c841ce728 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.13.0] ### Added -- Added initial support for Google Gemini models for `aigenerate` (requires environment variable `GOOGLE_API_KEY` and package [GoogleGenAI.jl](https://github.com/tylerjthomas9/GoogleGenAI.jl) to be loaded). +- Added initial support for Google Gemini models for `aigenerate` (requires environment variable `GOOGLE_API_KEY` and package [GoogleGenAI.jl](https://github.com/tylerjthomas9/GoogleGenAI.jl) to be loaded). It must be added explicitly as it is not yet registered. - Added a utility to compare any two string sequences (and other iterators)`length_longest_common_subsequence`. It can be used to fuzzy match strings (eg, detecting context/sources in an AI-generated response or fuzzy matching AI response to some preset categories). See the docstring for more information `?length_longest_common_subsequence`. - Rewrite of `aiclassify` to classify into an arbitrary list of categories (including with descriptions). It's a quick and easy option for "routing" and similar use cases, as it exploits the logit bias trick and outputs only 1 token. Currently, only `OpenAISchema` is supported. See `?aiclassify` for more information. - Initial support for multiple completions in one request for OpenAI-compatible API servers. Set via API kwarg `n=5` and it will request 5 completions in one request, saving the network communication time and paying the prompt tokens only once. It's useful for majority voting, diversity, or challenging agentic workflows. @@ -30,6 +30,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Updated - Updated names of endpoints and prices of Mistral.ai models as per the [latest announcement](https://mistral.ai/technology/#models) and [pricing](https://docs.mistral.ai/platform/pricing/). Eg, `mistral-small` -> `mistral-small-latest`. In addition, the latest Mistral model has been added `mistral-large-latest` (aliased as `mistral-large` and `mistrall`, same for the others). `mistral-small-latest` and `mistral-large-latest` now support function calling, which means they will work with `aiextract` (You need to explicitly provide `tool_choice`, see the docs `?aiextract`). +## Removed +- Removed package extension for GoogleGenAI.jl, as it's not yet registered. Users must load the code manually for now. + ## [0.12.0] ### Added diff --git a/Project.toml b/Project.toml index d774f22bc..95dc8ce2d 100644 --- a/Project.toml +++ b/Project.toml @@ -17,13 +17,11 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [weakdeps] -GoogleGenAI = "903d41d1-eaca-47dd-943b-fee3930375ab" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [extensions] -GoogleGenAIPromptingToolsExt = ["GoogleGenAI"] MarkdownPromptingToolsExt = ["Markdown"] RAGToolsExperimentalExt = ["SparseArrays", "LinearAlgebra"] @@ -31,7 +29,6 @@ RAGToolsExperimentalExt = ["SparseArrays", "LinearAlgebra"] AbstractTrees = "0.4" Aqua = "0.7" Base64 = "<0.0.1, 1" -GoogleGenAI = "0.1.0" HTTP = "1" JSON3 = "1" LinearAlgebra = "<0.0.1, 1" From 320b9b78338d45b9991384f600ca878cb7cb9ea3 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Tue, 27 Feb 2024 09:28:01 +0000 Subject: [PATCH 003/115] fix docs --- src/Experimental/AgentTools/retry.jl | 39 ++++++++++++++++------------ 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/Experimental/AgentTools/retry.jl b/src/Experimental/AgentTools/retry.jl index 7e2099258..27dac99ce 100644 --- a/src/Experimental/AgentTools/retry.jl +++ b/src/Experimental/AgentTools/retry.jl @@ -4,17 +4,16 @@ verbose::Bool = true, throw::Bool = false, evaluate_all::Bool = true, feedback_expensive::Bool = false, max_retries::Union{Nothing, Int} = nothing, retry_delay::Union{Nothing, Int} = nothing) -Evaluates the condition `f_cond` on the `aicall` object (eg, we evaluate `f_cond(aicall) -> Bool`). -If the condition is not met, it will return the best sample to retry from and provide `feedback` to `aicall`. That's why it's mutating. -It will retry running the `aicall` `max_retries` times. -If `throw` is `true`, it will throw an error if the function does not return `true` after `max_retries` retries. +Evaluates the condition `f_cond` on the `aicall` object. +If the condition is not met, it will return the best sample to retry from and provide `feedback` (string or function) to `aicall`. That's why it's mutating. +It will retry maximum `max_retries` times, with `throw=true`, an error will be thrown if the condition is not met after `max_retries` retries. -If feedback is provided (not empty), it will be append it to the conversation before the retry. -If a function is provided, it must accept the `aicall` object as the only argument and return a string. +Function signatures +- `f_cond(aicall::AICallBlock) -> Bool`, ie, it must accept the aicall object and return a boolean value. +- `feedback` can be a string or `feedback(aicall::AICallBlock) -> String`, ie, it must accept the aicall object and return a string. -Function `f_cond` is expected to accept the `aicall` object as the only argument. -It must return a boolean value, which indicates whether the condition is met. -You can leverage the `last_message`, `last_output`, and `AICode` functions to access the last message, last output and code blocks in the conversation, respectively. +You can leverage the `last_message`, `last_output`, and `AICode` functions to access the last message, last output and execute code blocks in the conversation, respectively. +See examples below. # Good Use Cases - Retry with API failures/drops (add `retry_delay=2` to wait 2s between retries) @@ -62,6 +61,7 @@ run!(out) # fails airetry!(isvalid, out; retry_delay = 2, max_retries = 2) ``` + If you provide arguments to the aicall, we try to honor them as much as possible in the following calls, eg, set low verbosity ```julia @@ -71,6 +71,7 @@ run!(out) # No info message, you just see `success = false` in the properties of the AICall ``` + Let's show a toy example to demonstrate the runtime checks / guardrails for the model output. We'll play a color guessing game (I'm thinking "yellow"): @@ -84,25 +85,30 @@ out = AIGenerate( config = RetryConfig(; n_samples = 2), api_kwargs = (; n = 2)) run!(out) + ## Check that the output is 1 word only, third argument is the feedback that will be provided if the condition fails ## Notice: functions operate on `aicall` as the only argument. We can use utilities like `last_output` and `last_message` to access the last message and output in the conversation. airetry!(x -> length(split(last_output(x), r" |\\.")) == 1, out, "You must answer with 1 word only.") + ## Let's ensure that the output is in lowercase - simple and short airetry!(x -> all(islowercase, last_output(x)), out, "You must answer in lowercase.") # [ Info: Condition not met. Retrying... + ## Let's add final hint - it took us 2 retries airetry!(x -> startswith(last_output(x), "y"), out, "It starts with \"y\"") # [ Info: Condition not met. Retrying... # [ Info: Condition not met. Retrying... + ## We end up with the correct answer last_output(out) # Output: "yellow" ``` + Let's explore how we got here. We save the various attempts in a "tree" (SampleNode object) You can access it in `out.samples`, which is the ROOT of the tree (top level). @@ -169,8 +175,9 @@ Note: `airetry!` will attempt to fix the model `max_retries` times. If you set `throw=true`, it will throw an ErrorException if the condition is not met after `max_retries` retries. + +Let's define a mini program to guess the number and use `airetry!` to guide the model to the correct answer: ```julia -# Let's define a mini program to guess the number \"\"\" llm_guesser() @@ -264,7 +271,7 @@ end ``` Note that if there are multiple "branches" the model will see only the feedback of its own and its ancestors not the other "branches". -If you want to show all object, set `n_samples=1`, so all fixing happens sequantially and model sees all feedback (less powerful if model falls into a bad state). +If you wanted to provide ALL feedback, set `RetryConfig(; n_samples=1)` to remove any "branching". It fixing will be done sequentially in one conversation and the model will see all feedback (less powerful if the model falls into a bad state). Alternatively, you can tweak the feedback function. # See Also @@ -474,25 +481,25 @@ Adds formatted feedback to the `conversation` based on the `sample` node feedbac sample = SampleNode(; data = nothing, feedback = "Feedback X") conversation = [PT.UserMessage("I say hi!"), PT.AIMessage(; content = "I say hi!")] conversation = AT.add_feedback!(conversation, sample) -conversation[end].content == "### Feedback from Evaluator\nFeedback X\n" +conversation[end].content == "### Feedback from Evaluator\\nFeedback X\\n" Inplace feedback: ```julia conversation = [PT.UserMessage("I say hi!"), PT.AIMessage(; content = "I say hi!")] conversation = AT.add_feedback!(conversation, sample; feedback_inplace = true) -conversation[end].content == "I say hi!\n\n### Feedback from Evaluator\nFeedback X\n" +conversation[end].content == "I say hi!\\n\\n### Feedback from Evaluator\\nFeedback X\\n" ``` Sample with ancestors with feedback: ```julia -sample_p = SampleNode(; data = nothing, feedback = "\nFeedback X") +sample_p = SampleNode(; data = nothing, feedback = "\\nFeedback X") sample = expand!(sample_p, nothing) -sample.feedback = "\nFeedback Y" +sample.feedback = "\\nFeedback Y" conversation = [PT.UserMessage("I say hi!"), PT.AIMessage(; content = "I say hi!")] conversation = AT.add_feedback!(conversation, sample) conversation[end].content == -"### Feedback from Evaluator\n\nFeedback X\n----------\n\nFeedback Y\n" +"### Feedback from Evaluator\\n\\nFeedback X\\n----------\\n\\nFeedback Y\\n" ``` """ function add_feedback!(conversation::AbstractVector{<:PT.AbstractMessage}, From 18e1b7125bc4d6915e8d72f7df34ce5942868d32 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Wed, 28 Feb 2024 21:20:05 +0000 Subject: [PATCH 004/115] Templating utilities (#84) --- CHANGELOG.md | 3 + Project.toml | 2 +- docs/make.jl | 9 +- docs/src/frequently_asked_questions.md | 172 +++++------- docs/src/getting_started.md | 4 + docs/src/how_it_works.md | 367 +++++++++++++++++++++++++ src/PromptingTools.jl | 3 +- src/templates.jl | 239 ++++++++++++---- test/templates.jl | 40 ++- test/utils.jl | 6 +- 10 files changed, 668 insertions(+), 177 deletions(-) create mode 100644 docs/src/how_it_works.md diff --git a/CHANGELOG.md b/CHANGELOG.md index c841ce728..a592a8a17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Added a new documentation section "How it works" to explain the inner workings of the package. It's a work in progress, but it should give you a good idea of what's happening under the hood. +- Improved template loading, so if you load your custom templates once with `load_templates!("my/template/folder)`, it will remember your folder for all future re-loads. +- Added convenience function `create_template` to create templates on the fly without having to deal with `PT.UserMessage` etc. See `?create_template` for more information. ### Fixed diff --git a/Project.toml b/Project.toml index 95dc8ce2d..da0c0e797 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PromptingTools" uuid = "670122d1-24a8-4d70-bfce-740807c42192" authors = ["J S @svilupp and contributors"] -version = "0.13.0" +version = "0.14.0-DEV" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/docs/make.jl b/docs/make.jl index e5ca9f168..eeb1cc6d4 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -15,7 +15,7 @@ makedocs(; modules = [ PromptingTools, PromptingTools.Experimental.RAGTools, - PromptingTools.Experimental.AgentTools, + PromptingTools.Experimental.AgentTools ], authors = "J S <49557684+svilupp@users.noreply.github.com> and contributors", repo = "https://github.com/svilupp/PromptingTools.jl/blob/{commit}{path}#{line}", @@ -30,13 +30,14 @@ makedocs(; pages = [ "Home" => "index.md", "Getting Started" => "getting_started.md", + "How It Works" => "how_it_works.md", "Examples" => [ "Various examples" => "examples/readme_examples.md", "Using AITemplates" => "examples/working_with_aitemplates.md", "Local models with Ollama.ai" => "examples/working_with_ollama.md", "Google AIStudio" => "examples/working_with_google_ai_studio.md", "Custom APIs (Mistral, Llama.cpp)" => "examples/working_with_custom_apis.md", - "Building RAG Application" => "examples/building_RAG.md", + "Building RAG Application" => "examples/building_RAG.md" ], "F.A.Q." => "frequently_asked_questions.md", "Reference" => [ @@ -44,8 +45,8 @@ makedocs(; "Experimental Modules" => "reference_experimental.md", "RAGTools" => "reference_ragtools.md", "AgentTools" => "reference_agenttools.md", - "APITools" => "reference_apitools.md", - ], + "APITools" => "reference_apitools.md" + ] ]) deploydocs(; diff --git a/docs/src/frequently_asked_questions.md b/docs/src/frequently_asked_questions.md index 5f34172c3..923b3e915 100644 --- a/docs/src/frequently_asked_questions.md +++ b/docs/src/frequently_asked_questions.md @@ -201,134 +201,90 @@ conversation = aigenerate("What's my name?"; return_all=true, conversation) ``` Notice that the last message is the response to the second request, but with `return_all=true` we can see the whole conversation from the beginning. -## Explain What Happens Under the Hood +## How to have typed responses? -4 Key Concepts/Objects: -- Schemas -> object of type `AbstractPromptSchema` that determines which methods are called and, hence, what providers/APIs are used -- Prompts -> the information you want to convey to the AI model -- Messages -> the basic unit of communication between the user and the AI model (eg, `UserMessage` vs `AIMessage`) -- Prompt Templates -> re-usable "prompts" with placeholders that you can replace with your inputs at the time of making the request +Our responses are always in `AbstractMessage` types to ensure we can also handle downstream processing, error handling, and self-healing code (see `airetry!`). -When you call `aigenerate`, roughly the following happens: `render` -> `UserMessage`(s) -> `render` -> `OpenAI.create_chat` -> ... -> `AIMessage`. - -We'll deep dive into an example in the end. - -### Schemas - -For your "message" to reach an AI model, it needs to be formatted and sent to the right place. - -We leverage the multiple dispatch around the "schemas" to pick the right logic. -All schemas are subtypes of `AbstractPromptSchema` and there are many subtypes, eg, `OpenAISchema <: AbstractOpenAISchema <:AbstractPromptSchema`. - -For example, if you provide `schema = OpenAISchema()`, the system knows that: -- it will have to format any user inputs to OpenAI's "message specification" (a vector of dictionaries, see their API documentation). Function `render(OpenAISchema(),...)` will take care of the rendering. -- it will have to send the message to OpenAI's API. We will use the amazing `OpenAI.jl` package to handle the communication. - -### Prompts - -Prompt is loosely the information you want to convey to the AI model. It can be a question, a statement, or a command. It can have instructions or some context, eg, previous conversation. - -You need to remember that Large Language Models (LLMs) are **stateless**. They don't remember the previous conversation/request, so you need to provide the whole history/context every time (similar to how REST APIs work). - -Prompts that we send to the LLMs are effectively a sequence of messages (`<:AbstractMessage`). - -### Messages - -Messages are the basic unit of communication between the user and the AI model. - -There are 5 main types of messages (`<:AbstractMessage`): - -- `SystemMessage` - this contains information about the "system", eg, how it should behave, format its output, etc. (eg, `You're a world-class Julia programmer. You write brief and concise code.) -- `UserMessage` - the information "from the user", ie, your question/statement/task -- `UserMessageWithImages` - the same as `UserMessage`, but with images (URLs or Base64-encoded images) -- `AIMessage` - the response from the AI model, when the "output" is text -- `DataMessage` - the response from the AI model, when the "output" is data, eg, embeddings with `aiembed` or user-defined structs with `aiextract` - -### Prompt Templates - -We want to have re-usable "prompts", so we provide you with a system to retrieve pre-defined prompts with placeholders (eg, `{{name}}`) that you can replace with your inputs at the time of making the request. - -"AI Templates" as we call them (`AITemplate`) are usually a vector of `SystemMessage` and a `UserMessage` with specific purpose/task. - -For example, the template `:AssistantAsk` is defined loosely as: +A good use case for a typed response is when you have a complicated control flow and would like to group and handle certain outcomes differently. You can easily do it as an extra step after the response is received. +Trivially, we can use `aiclassifier` for Bool statements, eg, ```julia - template = [SystemMessage("You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer."), - UserMessage("# Question\n\n{{ask}}")] -``` - -Notice that we have a placeholder `ask` (`{{ask}}`) that you can replace with your question without having to re-write the generic system instructions. - -When you provide a Symbol (eg, `:AssistantAsk`) to ai* functions, thanks to the multiple dispatch, it recognizes that it's an `AITemplate(:AssistantAsk)` and looks it up. +# We can do either +mybool = tryparse(Bool, aiclassify("Is two plus two four?")) isa Bool # true -You can discover all available templates with `aitemplates("some keyword")` or just see the details of some template `aitemplates(:AssistantAsk)`. - -### Walkthrough Example - -```julia -using PromptingTools -const PT = PromptingTools - -# Let's say this is our ask -msg = aigenerate(:AssistantAsk; ask="What is the capital of France?") - -# it is effectively the same as: -msg = aigenerate(PT.OpenAISchema(), PT.AITemplate(:AssistantAsk); ask="What is the capital of France?", model="gpt3t") +# or simply check equality +msg = aiclassify("Is two plus two four?") # true +mybool = msg.content == "true" ``` -There is no `model` provided, so we use the default `PT.MODEL_CHAT` (effectively GPT3.5-Turbo). Then we look it up in `PT.MDOEL_REGISTRY` and use the associated schema for it (`OpenAISchema` in this case). - -The next step is to render the template, replace the placeholders and render it for the OpenAI model. - +Now a more complicated example with multiple categories mapping to an enum: ```julia -# Let's remember out schema -schema = PT.OpenAISchema() -ask = "What is the capital of France?" -``` +choices = [("A", "any animal or creature"), ("P", "for any plant or tree"), ("O", "for everything else")] -First, we obtain the template (no placeholder replacement yet) and "expand it" -```julia -template_rendered = PT.render(schema, AITemplate(:AssistantAsk); ask) -``` +# Set up the return types we want +@enum Categories A P O +string_to_category = Dict("A" => A, "P" => P,"O" => O) -```plaintext -2-element Vector{PromptingTools.AbstractChatMessage}: - PromptingTools.SystemMessage("You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.") - PromptingTools.UserMessage{String}("# Question\n\n{{ask}}", [:ask], :usermessage) -``` +# Run an example +input = "spider" +msg = aiclassify(:InputClassifier; choices, input) -Second, we replace the placeholders -```julia -rendered_for_api = PT.render(schema, template_rendered; ask) -``` - -```plaintext -2-element Vector{Dict{String, Any}}: - Dict("role" => "system", "content" => "You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.") - Dict("role" => "user", "content" => "# Question\n\nWhat is the capital of France?") +mytype = string_to_category[msg.content] # A (for animal) ``` +How does it work? `aiclassify` guarantees to output one of our choices (and it handles some of the common quirks)! -Notice that the placeholders are only replaced in the second step. The final output here is a vector of messages with "role" and "content" keys, which is the format required by the OpenAI API. +How would we achieve the same with `aigenerate` and arbitrary struct? +We need to use the "lazy" `AIGenerate` struct and `airetry!` to ensure we get the response and then we can process it further. -As a side note, under the hood, the second step is done in two steps: +`AIGenerate` has two fields you should know about: +- `conversation` - eg, the vector of "messages" in the current conversation (same as what you get from `aigenerate` with `return_all=true`) +- `success` - a boolean flag if the request was successful AND if it passed any subsequent `airetry!` calls -- replace the placeholders `messages_rendered = PT.render(PT.NoSchema(), template_rendered; ask)` -> returns a vector of Messages! -- then, we convert the messages to the format required by the provider/schema `PT.render(schema, messages_rendered)` -> returns the OpenAI formatted messages - - -Next, we send the above `rendered_for_api` to the OpenAI API and get the response back. +Let's mimic a case where our "program" should return one of three types: `SmallInt`, `LargeInt`, `FailedResponse`. +We first need to define our custom types: ```julia -using OpenAI -OpenAI.create_chat(api_key, model, rendered_for_api) + +# not needed, just to show a fully typed example +abstract type MyAbstractResponse end +struct SmallInt <: MyAbstractResponse + number::Int +end +struct LargeInt <: MyAbstractResponse + number::Int +end +struct FailedResponse <: MyAbstractResponse + content::String +end ``` -The last step is to take the JSON response from the API and convert it to the `AIMessage` object. +Let's define our "program" as a function to be cleaner. Notice that we use `AIGenerate` and `airetry!` to ensure we get the response and then we can process it further. ```julia -# simplification for educational purposes -msg = AIMessage(; content = r.response[:choices][1][:message][:content]) +using PromptingTools.Experimental.AgentTools + +function give_me_number(prompt::String)::MyAbstractResponse + # Generate the response + response = AIGenerate(prompt; config=RetryConfig(;max_retries=2)) |> run! + + # Check if it's parseable as Int, if not, send back to be fixed + # syntax: airetry!(CONDITION-TO-CHECK, , FEEDBACK-TO-MODEL) + airetry!(x->tryparse(Int,last_output(x))|>!isnothing, response, "Wrong output format! Answer with digits and nothing else. The number is:") + + if response.success != true + ## we failed to generate a parseable integer + return FailedResponse("I failed to get the response. Last output: $(last_output(response))") + end + number = tryparse(Int,last_output(response)) + return number < 1000 ? SmallInt(number) : LargeInt(number) +end + +give_me_number("How many car seats are in Porsche 911T?") +## [ Info: Condition not met. Retrying... +## [ Info: Condition not met. Retrying... +## SmallInt(2) ``` -In practice, there are more fields we extract, so we define a utility for it: `PT.response_to_message`. Especially, since with parameter `n`, you can request multiple AI responses at once, so we want to re-use our response processing logic. -That's it! I hope you've learned something new about how PromptingTools.jl works under the hood. \ No newline at end of file +We ultimately received our custom type `SmallInt` with the number of car seats in the Porsche 911T (I hope it's correct!). + +If you want to access the full conversation history (all the attempts and feedback), simply output the `response` object and explore `response.conversation`. \ No newline at end of file diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md index bb25e667c..c445ca7d1 100644 --- a/docs/src/getting_started.md +++ b/docs/src/getting_started.md @@ -1,3 +1,7 @@ +```@meta +CurrentModule = PromptingTools +``` + # Getting Started ## Prerequisites diff --git a/docs/src/how_it_works.md b/docs/src/how_it_works.md new file mode 100644 index 000000000..ad9a4dec0 --- /dev/null +++ b/docs/src/how_it_works.md @@ -0,0 +1,367 @@ +```@meta +CurrentModule = PromptingTools +``` + +# How It Works + +This is an advanced section that explains how PromptingTools.jl works under the hood. It is not necessary to understand this to use the package, but it can be helpful for debugging and understanding the limitations of the package. + +We'll start with the key concepts and then walk through an example of `aigenerate` to see how it all fits together. + +## Key Concepts + +5 Key Concepts (/Objects): +- API/Model Providers -> The method that gives you access to Large Language Models (LLM), it can be an API (eg, OpenAI) or a locally-hosted application (eg, Llama.cpp or Ollama) +- Schemas -> object of type `AbstractPromptSchema` that determines which methods are called and, hence, what providers/APIs are used +- Prompts -> the information you want to convey to the AI model +- Messages -> the basic unit of communication between the user and the AI model (eg, `UserMessage` vs `AIMessage`) +- Prompt Templates -> re-usable "prompts" with placeholders that you can replace with your inputs at the time of making the request + +When you call `aigenerate`, roughly the following happens: `render` -> `UserMessage`(s) -> `render` -> `OpenAI.create_chat` -> ... -> `AIMessage`. + +### API/Model Providers + +You can think of "API/Model Providers" as the method that gives you access to Large Language Models (LLM). It can be an API (eg, OpenAI) or a locally-hosted application (eg, Llama.cpp or Ollama). + +You interact with them via the `schema` object, which is a subtype of `AbstractPromptSchema`, +eg, there is an `OpenAISchema` for the provider "OpenAI" and its supertype `AbstractOpenAISchema` is for all other providers that mimic the OpenAI API. + +### Schemas + +For your "message" to reach an AI model, it needs to be formatted and sent to the right place (-> provider!). + +We leverage the multiple dispatch around the "schemas" to pick the right logic. +All schemas are subtypes of `AbstractPromptSchema` and there are many subtypes, eg, `OpenAISchema <: AbstractOpenAISchema <:AbstractPromptSchema`. + +For example, if you provide `schema = OpenAISchema()`, the system knows that: +- it will have to format any user inputs to OpenAI's "message specification" (a vector of dictionaries, see their API documentation). Function `render(OpenAISchema(),...)` will take care of the rendering. +- it will have to send the message to OpenAI's API. We will use the amazing `OpenAI.jl` package to handle the communication. + +### Prompts + +Prompt is loosely the information you want to convey to the AI model. It can be a question, a statement, or a command. It can have instructions or some context, eg, previous conversation. + +You need to remember that Large Language Models (LLMs) are **stateless**. They don't remember the previous conversation/request, so you need to provide the whole history/context every time (similar to how REST APIs work). + +Prompts that we send to the LLMs are effectively a sequence of messages (`<:AbstractMessage`). + +### Messages + +Messages are the basic unit of communication between the user and the AI model. + +There are 5 main types of messages (`<:AbstractMessage`): + +- `SystemMessage` - this contains information about the "system", eg, how it should behave, format its output, etc. (eg, `You're a world-class Julia programmer. You write brief and concise code.) +- `UserMessage` - the information "from the user", ie, your question/statement/task +- `UserMessageWithImages` - the same as `UserMessage`, but with images (URLs or Base64-encoded images) +- `AIMessage` - the response from the AI model, when the "output" is text +- `DataMessage` - the response from the AI model, when the "output" is data, eg, embeddings with `aiembed` or user-defined structs with `aiextract` + +### Prompt Templates + +We want to have re-usable "prompts", so we provide you with a system to retrieve pre-defined prompts with placeholders (eg, `{{name}}`) that you can replace with your inputs at the time of making the request. + +"AI Templates" as we call them (`AITemplate`) are usually a vector of `SystemMessage` and a `UserMessage` with specific purpose/task. + +For example, the template `:AssistantAsk` is defined loosely as: + +```julia + template = [SystemMessage("You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer."), + UserMessage("# Question\n\n{{ask}}")] +``` + +Notice that we have a placeholder `ask` (`{{ask}}`) that you can replace with your question without having to re-write the generic system instructions. + +When you provide a Symbol (eg, `:AssistantAsk`) to ai* functions, thanks to the multiple dispatch, it recognizes that it's an `AITemplate(:AssistantAsk)` and looks it up. + +You can discover all available templates with `aitemplates("some keyword")` or just see the details of some template `aitemplates(:AssistantAsk)`. + +## ai* Functions + +The above steps are implemented in the `ai*` functions, eg, `aigenerate`, `aiembed`, `aiextract`, etc. They all have the same basic structure: + +`ai*(,; )`, + +but they differ in purpose: + +- `aigenerate` is the general-purpose function to generate any text response with LLMs, ie, it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`) +- `aiembed` is designed to extract embeddings from the AI model's response, ie, it returns `DataMessage` with field `:content` containing the embeddings (eg, `ans.content isa AbstractArray`) +- `aiextract` is designed to extract structured data from the AI model's response and return them as a Julia struct (eg, if we provide `return_type=Food`, we get `ans.content isa Food`). You need to define the return type first and then provide it as a keyword argument. +- `aiclassify` is designed to classify the input text into (or simply respond within) a set of discrete `choices` provided by the user. It can be very useful as an LLM Judge or a router for RAG systems, as it uses the "logit bias trick" and generates exactly 1 token. It returns `AIMessage` with field `:content`, but the `:content` can be only one of the provided `choices` (eg, `ans.content in choices`) +- `aiscan` is for working with images and vision-enabled models (as an input), but it returns `AIMessage` with field `:content` containing the generated text (eg, `ans.content isa AbstractString`) similar to `aigenerate`. +- `aitemplates` is a helper function to discover available templates and see their details (eg, `aitemplates("some keyword")` or `aitemplates(:AssistantAsk)`) + +In addition to the above list, you can also use the **"lazy" counterparts** of these functions from the experimental AgentTools module. +```julia +using PromptingTools.Experimental.AgentTools +``` + +For example, `AIGenerate()` will create a lazy instance of `aigenerate`. It is an instance of `AICall` with `aigenerate` as its ai function. +It uses exactly the same arguments and keyword arguments as `aigenerate` (see `?aigenerate` for details). + +"lazy" refers to the fact that it does NOT generate any output when instantiated (only when `run!` is called). + +Or said differently, the `AICall` struct and all its flavors (`AIGenerate`, ...) are designed to facilitate a deferred execution model (lazy evaluation) for AI functions that interact with a Language Learning Model (LLM). It stores the necessary information for an AI call and executes the underlying AI function only when supplied with a `UserMessage` or when the `run!` method is applied. + +This approach allows us to remember user inputs and trigger the LLM call repeatedly if needed, which enables automatic fixing (see `?airetry!`). + +Example: +```julia +result = AIGenerate(:JuliaExpertAsk; ask="xyz", model="abc", api_kwargs=(; temperature=0.1)) +result |> run! + +# Is equivalent to +result = aigenerate(:JuliaExpertAsk; ask="xyz", model="abc", api_kwargs=(; temperature=0.1), return_all=true) +# The only difference is that we default to `return_all=true` with lazy types because we have a dedicated `conversation` field, which makes it much easier +``` + +Lazy AI calls and self-healing mechanisms unlock much more robust and useful LLM workflows! + +## Walkthroughs + +### Walkthrough Example for `aigenerate` + +```julia +using PromptingTools +const PT = PromptingTools + +# Let's say this is our ask +msg = aigenerate(:AssistantAsk; ask="What is the capital of France?") + +# it is effectively the same as: +msg = aigenerate(PT.OpenAISchema(), PT.AITemplate(:AssistantAsk); ask="What is the capital of France?", model="gpt3t") +``` + +There is no `model` provided, so we use the default `PT.MODEL_CHAT` (effectively GPT3.5-Turbo). Then we look it up in `PT.MDOEL_REGISTRY` and use the associated schema for it (`OpenAISchema` in this case). + +The next step is to render the template, replace the placeholders and render it for the OpenAI model. + +```julia +# Let's remember out schema +schema = PT.OpenAISchema() +ask = "What is the capital of France?" +``` + +First, we obtain the template (no placeholder replacement yet) and "expand it" +```julia +template_rendered = PT.render(schema, AITemplate(:AssistantAsk); ask) +``` + +```plaintext +2-element Vector{PromptingTools.AbstractChatMessage}: + PromptingTools.SystemMessage("You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.") + PromptingTools.UserMessage{String}("# Question\n\n{{ask}}", [:ask], :usermessage) +``` + +Second, we replace the placeholders +```julia +rendered_for_api = PT.render(schema, template_rendered; ask) +``` + +```plaintext +2-element Vector{Dict{String, Any}}: + Dict("role" => "system", "content" => "You are a world-class AI assistant. Your communication is brief and concise. You're precise and answer only when you're confident in the high quality of your answer.") + Dict("role" => "user", "content" => "# Question\n\nWhat is the capital of France?") +``` + +Notice that the placeholders are only replaced in the second step. The final output here is a vector of messages with "role" and "content" keys, which is the format required by the OpenAI API. + +As a side note, under the hood, the second step is done in two sub-steps: + +- replace the placeholders `messages_rendered = PT.render(PT.NoSchema(), template_rendered; ask)` -> returns a vector of Messages! +- then, we convert the messages to the format required by the provider/schema `PT.render(schema, messages_rendered)` -> returns the OpenAI formatted messages + +Next, we send the above `rendered_for_api` to the OpenAI API and get the response back. + +```julia +using OpenAI +OpenAI.create_chat(api_key, model, rendered_for_api) +``` + +The last step is to take the JSON response from the API and convert it to the `AIMessage` object. + +```julia +# simplification for educational purposes +msg = AIMessage(; content = r.response[:choices][1][:message][:content]) +``` + +In practice, there are more fields we extract, so we define a utility for it: `PT.response_to_message`. Especially, since with parameter `n`, you can request multiple AI responses at once, so we want to re-use our response processing logic. + +That's it! I hope you've learned something new about how PromptingTools.jl works under the hood. + +## Walkthrough Example for `aiextract` + +Whereas `aigenerate` is a general-purpose function to generate any text response with LLMs, `aiextract` is designed to extract structured data from the AI model's response and return them as a Julia struct. + +It's a bit more complicated than `aigenerate` because it needs to handle the JSON schema of the return type (= our struct). + +Let's define a toy example of a struct and see how `aiextract` works under the hood. +```julia +using PromptingTools +const PT = PromptingTools + +""" +Extract the name of the food from the sentence. Extract any provided adjectives for the food as well. + +Example: "I am eating a crunchy bread." -> Food("bread", ["crunchy"]) +""" +struct Food + name::String # required field! + adjectives::Union{Nothing,Vector{String}} # not required because `Nothing` is allowed +end + +msg = aiextract("I just ate a delicious and juicy apple."; return_type=Food) +msg.content +# Food("apple", ["delicious", "juicy"]) +``` + +You can see that we sent a prompt to the AI model and it returned a `Food` object. +We provided some light guidance as a docstring of the return type, but the AI model did the heavy lifting. + +`aiextract` leverages native "function calling" (supported by OpenAI, Fireworks, Together, and many others). + +We encode the user-provided `return_type` into the corresponding JSON schema and create the payload as per the specifications of the provider. + +Let's how that's done: +```julia +sig = PT.function_call_signature(Food) +## Dict{String, Any} with 3 entries: +## "name" => "Food_extractor" +## "parameters" => Dict{String, Any}("properties"=>Dict{String, Any}("name"=>Dict("type"=>"string"), "adjectives"=>Dict{String, … +## "description" => "Extract the food from the sentence. Extract any provided adjectives for the food as well.\n\nExample: " +``` +You can see that we capture the field names and types in `parameters` and the description in `description` key. + +Furthermore, if we zoom in on the "parameter" field, you can see that we encode not only the names and types but also whether the fields are required (ie, do they allow `Nothing`) +You can see below that the field `adjectives` accepts `Nothing`, so it's not required. Only the `name` field is required. +```julia +sig["parameters"] +## Dict{String, Any} with 3 entries: +## "properties" => Dict{String, Any}("name"=>Dict("type"=>"string"), "adjectives"=>Dict{String, Any}("items"=>Dict("type"=>"strin… +## "required" => ["name"] +## "type" => "object" +``` + +For `aiextract`, the signature is provided to the API provider via `tools` parameter, eg, + +`api_kwargs = (; tools = [Dict(:type => "function", :function => sig)])` + +Optionally, we can provide also `tool_choice` parameter to specify which tool to use if we provided multiple (differs across providers). + +When the message is returned, we extract the JSON object in the response and decode it into Julia object via `JSON3.read(obj, Food)`. For example, +```julia +model_response = Dict(:tool_calls => [Dict(:function => Dict(:arguments => JSON3.write(Dict("name" => "apple", "adjectives" => ["delicious", "juicy"]))))]) +food = JSON3.read(model_response[:tool_calls][1][:function][:arguments], Food) +# Output: Food("apple", ["delicious", "juicy"]) +``` + +This is why you can sometimes have errors when you use abstract types in your `return_type` -> to enable that, you would need to set the right `StructTypes` behavior for your abstract type (see the JSON3.jl documentation for more details on how to do that). + +It works quite well for concrete types and "vanilla" structs, though. + +Unfortunately, function calling is generally NOT supported by locally-hosted / open-source models, +so let's try to build a workaround with `aigenerate` + +You need to pick a bigger / more powerful model, as it's NOT an easy task to output a correct JSON specification. +My laptop isn't too powerful and I don't like waiting, so I'm going to use Mixtral model hosted on Together.ai (you get \$25 credit when you join)! + +```julia +model = "tmixtral" # tmixtral is an alias for "mistralai/Mixtral-8x7B-Instruct-v0.1" on Together.ai and it automatically sets `schema = TogetherOpenAISchema()` +``` + +We'll add the signature to the prompt and we'll request the JSON output in two places - in the prompt and in the `api_kwargs` (to ensure that the model outputs the JSON via "grammar") +NOTE: You can write much better and more specific prompt if you have a specific task / return type in mind + you should make sure that the prompt + struct description make sense together! + +Let's define a prompt and `return_type`. Notice that we add several placeholders (eg, `{{description}}`) to fill with user inputs later. +```julia +prompt = """ +You're a world-class data extraction engine. + +Your task is to extract information formatted as per the user provided schema. +You MUST response in JSON format. + +**Example:** +--------- +Description: "Extract the Car from the sentence. Extract the corresponding brand and model as well." +Input: "I drive a black Porsche 911 Turbo." +Schema: "{\"properties\":{\"model\":{\"type\":\"string\"},\"brand\":{\"type\":\"string\"}},\"required\":[\"brand\",\"model\"],\"type\":\"object\"}" +Output: "{\"model\":\"911 Turbo\",\"brand\":\"Porsche\"}" +--------- + +**User Request:** +Description: {{description}} +Input: {{input}} +Schema: {{signature}} +Output: + +You MUST OUTPUT in JSON format. +""" +``` + +We need to extract the "signature of our `return_type` and put it in the right placeholders. +Let's generate now! +```julia +sig = PT.function_call_signature(Food) +result = aigenerate(prompt; input="I just ate a delicious and juicy apple.", + schema=JSON3.write(sig["parameters"]), description=sig["description"], + ## We provide the JSON output requirement as per API docs: https://docs.together.ai/docs/json-mode + model, api_kwargs=(; response_format=Dict("type" => "json_object"), temperature=0.2), return_all=true) +result[end].content +## "{\n \"adjectives\": [\"delicious\", \"juicy\"],\n \"food\": \"apple\"\n}" +``` + +We're using a smaller model, so the output is not perfect. +Let's try to load into our object: +```julia +obj = JSON3.read(result[end].content, Food) +# Output: ERROR: MethodError: Cannot `convert` an object of type Nothing to an object of type String +``` + +Unfortunately, we get an error because the model mixed up the key "name" for "food", so it cannot be parsed. + +Fortunately, we can do better and use automatic fixing! +All we need to do is to change from `aigenerate` -> `AIGenerate` (and use `airetry!`) + +The signature of `AIGenerate` is identical to `aigenerate` with the exception of `config` field, where we can influence the future `retry` behaviour. +```julia +result = AIGenerate(prompt; input="I just ate a delicious and juicy apple.", + schema=JSON3.write(sig["parameters"]), description=sig["description"], + ## We provide the JSON output requirement as per API docs: https://docs.together.ai/docs/json-mode + model, api_kwargs=(; response_format=Dict("type" => "json_object"), temperature=0.2), + ## limit the number of retries, default is 10 rounds + config=RetryConfig(; max_retries=3)) +run!(result) # run! triggers the generation step (to have some AI output to check) +``` + +Let's set up a retry mechanism with some practical feedback. We'll leverage `airetry!` to automatically retry the request and provide feedback to the model. +Think of `airetry!` as `@assert` on steroids: + +`@assert CONDITION MESSAGE` → `airetry! CONDITION MESSAGE` + +The main benefits of `airetry!` are: +- It can retry automatically, not just throw an error +- It manages the "conversation’ (list of messages) for you, including adding user-provided feedback to help generate better output + +```julia +feedback = "The output is not in the correct format. The keys should be $(join([string("\"$f\"") for f in fieldnames(Food)],", "))." +# We use do-syntax with provide the `CONDITION` (it must return Bool) +airetry!(result, feedback) do conv + ## try to convert + obj = try + JSON3.read(last_output(conv), Food) + catch e + ## you could save the error and provide as feedback (eg, into a slot in the `:memory` field of the AICall object) + e + end + ## Check if the conversion was successful; if it's `false`, it will retry + obj isa Food # -> Bool +end +food = JSON3.read(last_output(result), Food) +## [ Info: Condition not met. Retrying... +## Output: Food("apple", ["delicious", "juicy"]) +``` + +It took 1 retry (see `result.config.retries`) and we have the correct output from an open-source model! + +If you're interested in the `result` object, it's a struct (`AICall`) with a field `conversation`, which holds the conversation up to this point. +AIGenerate is an alias for AICall using `aigenerate` function. See `?AICall` (the underlying struct type) for more details on the fields and methods available. \ No newline at end of file diff --git a/src/PromptingTools.jl b/src/PromptingTools.jl index 8b0e96700..a4b0999f9 100644 --- a/src/PromptingTools.jl +++ b/src/PromptingTools.jl @@ -23,7 +23,7 @@ const RESERVED_KWARGS = [ :image_url, :image_path, :image_detail, - :model, + :model ] # export replace_words, split_by_length, call_cost, auth_header # for debugging only @@ -45,6 +45,7 @@ include("messages.jl") export aitemplates, AITemplate include("templates.jl") +const TEMPLATE_PATH = String[joinpath(@__DIR__, "..", "templates")] const TEMPLATE_STORE = Dict{Symbol, Any}() const TEMPLATE_METADATA = Vector{AITemplateMetadata}() diff --git a/src/templates.jl b/src/templates.jl index c9b82f313..4a7efd852 100644 --- a/src/templates.jl +++ b/src/templates.jl @@ -128,66 +128,107 @@ Removes all templates from `TEMPLATE_STORE` and `TEMPLATE_METADATA`. remove_templates!(; store = TEMPLATE_STORE, metadata_store = TEMPLATE_METADATA) = (empty!(store); empty!(metadata_store); nothing) """ - load_templates!(; remove_templates::Bool=true) + load_templates!(dir_templates::Union{String, Nothing} = nothing; + remember_path::Bool = true, + remove_templates::Bool = isnothing(dir_templates), + store::Dict{Symbol, <:Any} = TEMPLATE_STORE, + metadata_store::Vector{<:AITemplateMetadata} = TEMPLATE_METADATA) Loads templates from folder `templates/` in the package root and stores them in `TEMPLATE_STORE` and `TEMPLATE_METADATA`. Note: Automatically removes any existing templates and metadata from `TEMPLATE_STORE` and `TEMPLATE_METADATA` if `remove_templates=true`. + +# Arguments +- `dir_templates::Union{String, Nothing}`: The directory path to load templates from. If `nothing`, uses the default list of paths. It usually used only once "to register" a new template storage. +- `remember_path::Bool=true`: If true, remembers the path for future refresh (in `TEMPLATE_PATH`). +- `remove_templates::Bool=isnothing(dir_templates)`: If true, removes any existing templates and metadata from `store` and `metadata_store`. +- `store::Dict{Symbol, <:Any}=TEMPLATE_STORE`: The store to load the templates into. +- `metadata_store::Vector{<:AITemplateMetadata}=TEMPLATE_METADATA`: The metadata store to load the metadata into. + +# Example + +Load the default templates: +```julia +PT.load_templates!() # no path needed +``` + +Load templates from a new custom path: +```julia +PT.load_templates!("path/to/templates") # we will remember this path for future refresh +``` + +If you want to now refresh the default templates and the new path, just call `load_templates!()` without any arguments. """ -function load_templates!(dir_templates::String = joinpath(@__DIR__, "..", "templates"); - remove_templates::Bool = true, +function load_templates!(dir_templates::Union{String, Nothing} = nothing; + remember_path::Bool = true, + remove_templates::Bool = isnothing(dir_templates), store::Dict{Symbol, <:Any} = TEMPLATE_STORE, - metadata_store::Vector{<:AITemplateMetadata} = TEMPLATE_METADATA,) + metadata_store::Vector{<:AITemplateMetadata} = TEMPLATE_METADATA) + ## Init + global TEMPLATE_PATH + @assert isnothing(dir_templates)||isdir(dir_templates) "Invalid directory path provided! ($dir_templates)" + + # If no path is provided, use the default list + load_paths = isnothing(dir_templates) ? TEMPLATE_PATH : [dir_templates] # first remove any old templates and their metadata remove_templates && remove_templates!(; store, metadata_store) - # recursively load all templates from the `templates` folder - for (root, dirs, files) in walkdir(dir_templates) - for file in files - if endswith(file, ".json") - template_name = Symbol(split(basename(file), ".")[begin]) - template, metadata_msgs = load_template(joinpath(root, file)) - # add to store - if haskey(store, template_name) - @warn("Template $(template_name) already exists, overwriting! Metadata will be duplicated.") - end - store[template_name] = template - - # prepare the metadata - wordcount = 0 - system_preview = "" - user_preview = "" - variables = Symbol[] - for i in eachindex(template) - msg = template[i] - wordcount += length(msg.content) - if hasproperty(msg, :variables) - append!(variables, msg.variables) + # remember the path for future refresh + if remember_path && !isnothing(dir_templates) + if !(dir_templates in TEMPLATE_PATH) + push!(TEMPLATE_PATH, dir_templates) + end + end + + # recursively load all templates from the `load_paths` + for template_path in load_paths + for (root, dirs, files) in walkdir(template_path) + for file in files + if endswith(file, ".json") + template_name = Symbol(split(basename(file), ".")[begin]) + template, metadata_msgs = load_template(joinpath(root, file)) + # add to store + if haskey(store, template_name) + @warn("Template $(template_name) already exists, overwriting! Metadata will be duplicated.") end - # truncate previews to 100 characters - if msg isa SystemMessage && length(system_preview) < 100 - system_preview *= first(msg.content, 100) - elseif msg isa UserMessage && length(user_preview) < 100 - user_preview *= first(msg.content, 100) + store[template_name] = template + + # prepare the metadata + wordcount = 0 + system_preview = "" + user_preview = "" + variables = Symbol[] + for i in eachindex(template) + msg = template[i] + wordcount += length(msg.content) + if hasproperty(msg, :variables) + append!(variables, msg.variables) + end + # truncate previews to 100 characters + if msg isa SystemMessage && length(system_preview) < 100 + system_preview *= first(msg.content, 100) + elseif msg isa UserMessage && length(user_preview) < 100 + user_preview *= first(msg.content, 100) + end end + if !isempty(metadata_msgs) + # use the first metadata message found if available + meta = first(metadata_msgs) + metadata = AITemplateMetadata(; name = template_name, + meta.description, meta.version, meta.source, + wordcount, + system_preview = first(system_preview, 100), + user_preview = first(user_preview, 100), + variables = unique(variables)) + else + metadata = AITemplateMetadata(; name = template_name, + wordcount, + system_preview = first(system_preview, 100), + user_preview = first(user_preview, 100), + variables = unique(variables)) + end + # add metadata to store + push!(metadata_store, metadata) end - if !isempty(metadata_msgs) - # use the first metadata message found if available - meta = first(metadata_msgs) - metadata = AITemplateMetadata(; name = template_name, - meta.description, meta.version, meta.source, - wordcount, - system_preview = first(system_preview, 100), - user_preview = first(user_preview, 100), - variables = unique(variables)) - else - metadata = AITemplateMetadata(; name = template_name, - wordcount, - system_preview = first(system_preview, 100), - user_preview = first(user_preview, 100), - variables = unique(variables)) - end - # add metadata to store - push!(metadata_store, metadata) end end end @@ -249,7 +290,8 @@ function aitemplates(query_name::Symbol; limit::Int = 10, metadata_store::Vector{AITemplateMetadata} = TEMPLATE_METADATA) query_str = lowercase(string(query_name)) - found_templates = filter(x -> occursin(query_str, + found_templates = filter( + x -> occursin(query_str, lowercase(string(x.name))), metadata_store) return first(found_templates, limit) end @@ -258,7 +300,8 @@ function aitemplates(query_key::AbstractString; limit::Int = 10, metadata_store::Vector{AITemplateMetadata} = TEMPLATE_METADATA) query_str = lowercase(query_key) - found_templates = filter(x -> occursin(query_str, lowercase(string(x.name))) || + found_templates = filter( + x -> occursin(query_str, lowercase(string(x.name))) || occursin(query_str, lowercase(string(x.description))), metadata_store) return first(found_templates, limit) @@ -267,13 +310,14 @@ end function aitemplates(query_key::Regex; limit::Int = 10, metadata_store::Vector{AITemplateMetadata} = TEMPLATE_METADATA) - found_templates = filter(x -> occursin(query_key, - string(x.name)) || - occursin(query_key, - x.description) || - occursin(query_key, - x.system_preview) || - occursin(query_key, x.user_preview), + found_templates = filter( + x -> occursin(query_key, + string(x.name)) || + occursin(query_key, + x.description) || + occursin(query_key, + x.system_preview) || + occursin(query_key, x.user_preview), metadata_store) return first(found_templates, limit) end @@ -305,3 +349,82 @@ end function aiscan(schema::AbstractPromptSchema, template::Symbol; kwargs...) aiscan(schema, AITemplate(template); kwargs...) end + +## Utility for creating templates +""" + create_template(; user::AbstractString, system::AbstractString="Act as a helpful AI assistant.") + + create_template(system::AbstractString, user::AbstractString) + +Creates a simple template with a user and system message. Convenience function to prevent writing `[PT.UserMessage(...), ...]` + +# Arguments +- `system::AbstractString`: The system message. Usually defines the personality, style, instructions, output format, etc. +- `user::AbstractString`: The user message. Usually defines the input, query, request, etc. + +Use double handlebar placeholders (eg, `{{name}}`) to define variables that can be replaced by the `kwargs` during the AI call (see example). + +Returns a vector of `SystemMessage` and UserMessage objects. + +# Examples + +Let's generate a quick template for a simple conversation (only one placeholder: name) +```julia +# first system message, then user message (or use kwargs) +tpl=PT.create_template("You must speak like a pirate", "Say hi to {{name}}") + +## 2-element Vector{PromptingTools.AbstractChatMessage}: +## PromptingTools.SystemMessage("You must speak like a pirate") +## PromptingTools.UserMessage("Say hi to {{name}}") +``` + +You can immediately use this template in `ai*` functions: +```julia +aigenerate(tpl; name="Jack Sparrow") +# Output: AIMessage("Arr, me hearty! Best be sending me regards to Captain Jack Sparrow on the salty seas! May his compass always point true to the nearest treasure trove. Yarrr!") +``` + +If you want to save it in your project folder: +```julia +PT.save_template("templates/GreatingPirate.json", tpl; version="1.0") # optionally, add description +``` +It will be saved and accessed under its basename, ie, `GreatingPirate`. + +Now you can load it like all the other templates (provide the template directory): +``` +PT.load_templates!("templates") # it will remember the folder after the first run +# Note: If you save it again, overwrite it, etc., you need to explicitly reload all templates again! +``` + +You can verify that your template is loaded with a quick search for "pirate": +```julia +aitemplates("pirate") + +## 1-element Vector{AITemplateMetadata}: +## PromptingTools.AITemplateMetadata +## name: Symbol GreatingPirate +## description: String "" +## version: String "1.0" +## wordcount: Int64 46 +## variables: Array{Symbol}((1,)) +## system_preview: String "You must speak like a pirate" +## user_preview: String "Say hi to {{name}}" +## source: String "" +``` + +Now you can use it like any other template (notice it's a symbol, so `:GreatingPirate`): +```julia +aigenerate(:GreatingPirate; name="Jack Sparrow") +# Output: AIMessage("Arr, me hearty! Best be sending me regards to Captain Jack Sparrow on the salty seas! May his compass always point true to the nearest treasure trove. Yarrr!") +```` +""" +function create_template( + system::AbstractString, + user::AbstractString) + return [SystemMessage(system), UserMessage(user)] +end +# Kwarg version +function create_template(; + user::AbstractString, system::AbstractString = "Act as a helpful AI assistant.") + create_template(system, user) +end diff --git a/test/templates.jl b/test/templates.jl index 23d613ac7..a9a63ebb2 100644 --- a/test/templates.jl +++ b/test/templates.jl @@ -1,11 +1,12 @@ using PromptingTools: AbstractChatMessage, SystemMessage, UserMessage, MetadataMessage using PromptingTools: render -using PromptingTools: load_templates!, aitemplates +using PromptingTools: load_templates!, aitemplates, create_template using PromptingTools: TestEchoOpenAISchema @testset "Template rendering" begin template = AITemplate(:JudgeIsItTrue) - expected_output = AbstractChatMessage[SystemMessage("You are an impartial AI judge evaluting whether the provided statement is \"true\" or \"false\". Answer \"unknown\" if you cannot decide."), + expected_output = AbstractChatMessage[ + SystemMessage("You are an impartial AI judge evaluting whether the provided statement is \"true\" or \"false\". Answer \"unknown\" if you cannot decide."), UserMessage("# Statement\n\n{{it}}")] @test expected_output == render(PT.PROMPT_SCHEMA, template) @test expected_output == render(template) @@ -32,6 +33,41 @@ end @test length(tmps) >= 1 end +@testset "load_templates!" begin + load_templates!() + PT.TEMPLATE_PATH = PT.TEMPLATE_PATH[[1]] # reset + dir_name = joinpath(tempdir(), "templates") + dir_name in PT.TEMPLATE_PATH + mkpath(dir_name) + load_templates!(dir_name) + @test length(PT.TEMPLATE_PATH) == 2 + @test PT.TEMPLATE_PATH[2] == dir_name + # no more changes + load_templates!(dir_name) + load_templates!(dir_name) + @test length(PT.TEMPLATE_PATH) == 2 + @test PT.TEMPLATE_PATH[2] == dir_name + # reset to normal + PT.TEMPLATE_PATH = PT.TEMPLATE_PATH[[1]] # reset +end + +@testset "create_template" begin + tpl = create_template("You must speak like a pirate", "Say hi to {{name}}") + @test tpl[1].content == "You must speak like a pirate" + @test tpl[1] isa SystemMessage + @test tpl[2].content == "Say hi to {{name}}" + @test tpl[2].variables == [:name] + @test tpl[2] isa UserMessage + + # kwarg constructor + tpl = create_template(; user = "Say hi to {{chef}}") + @test tpl[1].content == "Act as a helpful AI assistant." + @test tpl[1] isa SystemMessage + @test tpl[2].content == "Say hi to {{chef}}" + @test tpl[2].variables == [:chef] + @test tpl[2] isa UserMessage +end + @testset "Templates - Echo aigenerate call" begin # E2E test for aigenerate with rendering template and filling the placeholders template_name = :JudgeIsItTrue diff --git a/test/utils.jl b/test/utils.jl index 5d6961cee..1e4fbbeb3 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -3,7 +3,7 @@ using PromptingTools: _extract_handlebar_variables, call_cost, _report_stats using PromptingTools: _string_to_vector, _encode_local_image using PromptingTools: DataMessage, AIMessage using PromptingTools: push_conversation!, - resize_conversation!, @timeout, preview, auth_header + resize_conversation!, @timeout, preview, auth_header @testset "replace_words" begin words = ["Disney", "Snow White", "Mickey Mouse"] @@ -243,7 +243,7 @@ end PT.SystemMessage("Welcome"), PT.UserMessage("Hello"), PT.AIMessage("World"), - PT.DataMessage(; content = ones(10)), + PT.DataMessage(; content = ones(10)) ] preview_output = preview(conversation) expected_output = Markdown.parse("# System Message\n\nWelcome\n\n---\n\n# User Message\n\nHello\n\n---\n\n# AI Message\n\nWorld\n\n---\n\n# Data Message\n\nData: Vector{Float64} (Size: (10,))\n") @@ -255,7 +255,7 @@ end @test headers == [ "Authorization" => "Bearer ", "Content-Type" => "application/json", - "Accept" => "application/json", + "Accept" => "application/json" ] @test_throws ArgumentError auth_header("") @test length(auth_header(nothing)) == 2 From 520098565f2d29d440d9facf7d5ec98cd986cb60 Mon Sep 17 00:00:00 2001 From: J S <49557684+svilupp@users.noreply.github.com> Date: Thu, 29 Feb 2024 09:24:08 +0000 Subject: [PATCH 005/115] update docs + version (#85) --- CHANGELOG.md | 8 +- Project.toml | 2 +- docs/src/frequently_asked_questions.md | 66 +++++++++++- docs/src/how_it_works.md | 2 + src/templates.jl | 135 +++++++++++++++++-------- test/templates.jl | 9 ++ 6 files changed, 178 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a592a8a17..ddbe50be7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +### Fixed + +## [0.14.0] + ### Added - Added a new documentation section "How it works" to explain the inner workings of the package. It's a work in progress, but it should give you a good idea of what's happening under the hood. - Improved template loading, so if you load your custom templates once with `load_templates!("my/template/folder)`, it will remember your folder for all future re-loads. -- Added convenience function `create_template` to create templates on the fly without having to deal with `PT.UserMessage` etc. See `?create_template` for more information. +- Added convenience function `create_template` to create templates on the fly without having to deal with `PT.UserMessage` etc. If you specify the keyword argument `load_as = "MyName"`, the template will be immediately loaded to the template registry. See `?create_template` for more information and examples. ### Fixed diff --git a/Project.toml b/Project.toml index da0c0e797..f9666002d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PromptingTools" uuid = "670122d1-24a8-4d70-bfce-740807c42192" authors = ["J S @svilupp and contributors"] -version = "0.14.0-DEV" +version = "0.14.0" [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" diff --git a/docs/src/frequently_asked_questions.md b/docs/src/frequently_asked_questions.md index 923b3e915..824d26e5b 100644 --- a/docs/src/frequently_asked_questions.md +++ b/docs/src/frequently_asked_questions.md @@ -287,4 +287,68 @@ give_me_number("How many car seats are in Porsche 911T?") We ultimately received our custom type `SmallInt` with the number of car seats in the Porsche 911T (I hope it's correct!). -If you want to access the full conversation history (all the attempts and feedback), simply output the `response` object and explore `response.conversation`. \ No newline at end of file +If you want to access the full conversation history (all the attempts and feedback), simply output the `response` object and explore `response.conversation`. + +## How to quickly create a prompt template? + +Many times, you will want to create a prompt template that you can reuse with different inputs (eg, to create templates for AIHelpMe or LLMTextAnalysis). + +Previously, you would have to create a vector of `SystemMessage` and `UserMessage` objects and then save it to a disk and reload. +Now, you can use the `create_template` function to do it for you. It's designed for quick prototyping, so it skips the serialization step and loads it directly into the template store (ie, you can use it like any other templates - try `aitemplates()` search). + +The syntax is simple: `create_template(;user=, system=, load_as=