Add cached prompt costs feature with 50% discount

- Add cached_cost_of_token_prompt field to ModelSpec - Update register_model! to support cached costs - Modify call_cost to handle cached prompts - Add documentation for new features
svilupp · Nov 7, 2024 · e3eb4c1 · e3eb4c1
1 parent 31b1c27
commit e3eb4c1
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 9 deletions.
diff --git a/src/user_preferences.jl b/src/user_preferences.jl
@@ -278,8 +278,10 @@ A struct that contains information about a model, such as its name, schema, cost
 # Fields
 - `name::String`: The name of the model. This is the name that will be used to refer to the model in the `ai*` functions.
 - `schema::AbstractPromptSchema`: The schema of the model. This is the schema that will be used to generate prompts for the model, eg, `:OpenAISchema`.
-- `cost_of_token_prompt::Float64`: The cost of 1 token in the prompt for this model. This is used to calculate the cost of a prompt. 
+- `cost_of_token_prompt::Float64`: The cost of 1 token in the prompt for this model. This is used to calculate the cost of a prompt.
     Note: It is often provided online as cost per 1000 tokens, so make sure to convert it correctly!
+- `cached_cost_of_token_prompt::Float64`: The cost of 1 token for cached prompts. Defaults to 50% of cost_of_token_prompt.
+    This reflects the reduced cost when using prompt caching, as supported by some models.
 - `cost_of_token_generation::Float64`: The cost of 1 token generated by this model. This is used to calculate the cost of a generation.
     Note: It is often provided online as cost per 1000 tokens, so make sure to convert it correctly!
 - `description::String`: A description of the model. This is used to provide more information about the model when it is queried.
@@ -291,6 +293,7 @@ spec = ModelSpec("gpt-3.5-turbo",
     0.0015,
     0.002,
     "GPT-3.5 Turbo is a 175B parameter model and a common default on the OpenAI API.")
+```
 
 # register it
 PromptingTools.register_model!(spec)
@@ -304,12 +307,12 @@ PromptingTools.register_model!(
     cost_of_token_prompt = 0.0015,
     cost_of_token_generation = 0.002,
     description = "GPT-3.5 Turbo is a 175B parameter model and a common default on the OpenAI API.")
-```
 """
 @kwdef mutable struct ModelSpec
     name::String
     schema::Union{AbstractPromptSchema, Nothing} = nothing
     cost_of_token_prompt::Float64 = 0.0
+    cached_cost_of_token_prompt::Float64 = 0.0
     cost_of_token_generation::Float64 = 0.0
     description::String = ""
 end
@@ -332,7 +335,7 @@ Registering a model helps with calculating the costs and automatically selecting
 # Arguments
 - `name`: The name of the model. This is the name that will be used to refer to the model in the `ai*` functions.
 - `schema`: The schema of the model. This is the schema that will be used to generate prompts for the model, eg, `OpenAISchema()`.
-- `cost_of_token_prompt`: The cost of a token in the prompt for this model. This is used to calculate the cost of a prompt. 
+- `cost_of_token_prompt`: The cost of a token in the prompt for this model. This is used to calculate the cost of a prompt.
    Note: It is often provided online as cost per 1000 tokens, so make sure to convert it correctly!
 - `cost_of_token_generation`: The cost of a token generated by this model. This is used to calculate the cost of a generation.
     Note: It is often provided online as cost per 1000 tokens, so make sure to convert it correctly!
@@ -342,11 +345,13 @@ function register_model!(registry = MODEL_REGISTRY;
         name::String,
         schema::Union{AbstractPromptSchema, Nothing} = nothing,
         cost_of_token_prompt::Float64 = 0.0,
+        cached_cost_of_token_prompt::Float64 = cost_of_token_prompt * 0.5,
         cost_of_token_generation::Float64 = 0.0,
         description::String = "")
     spec = ModelSpec(name,
         schema,
         cost_of_token_prompt,
+        cached_cost_of_token_prompt,
         cost_of_token_generation,
         description)
     register_model!(spec; registry)

diff --git a/src/utils.jl b/src/utils.jl
@@ -386,8 +386,11 @@ If the cost is already calculated (in `msg.cost`), it will not be re-calculated.
   is not found in `MODEL_REGISTRY`, default costs are used.
 - `cost_of_token_prompt::Number`: The cost per prompt token. Defaults to the cost in `MODEL_REGISTRY`
   for the given model, or 0.0 if the model is not found.
+- `cached_cost_of_token_prompt::Number`: The cost per cached prompt token. Defaults to 50% of cost_of_token_prompt.
+  This reflects the reduced cost when using prompt caching.
 - `cost_of_token_generation::Number`: The cost per generation token. Defaults to the cost in
   `MODEL_REGISTRY` for the given model, or 0.0 if the model is not found.
+- `is_cached::Bool`: Whether to use cached prompt costs. Defaults to false.
 
 # Returns
 - `Number`: The total cost of the call.
@@ -396,10 +399,19 @@ If the cost is already calculated (in `msg.cost`), it will not be re-calculated.
 ```julia
 # Assuming MODEL_REGISTRY is set up with appropriate costs
 MODEL_REGISTRY = Dict(
-    "model1" => (cost_of_token_prompt = 0.05, cost_of_token_generation = 0.10),
-    "model2" => (cost_of_token_prompt = 0.07, cost_of_token_generation = 0.02)
+    "model1" => (cost_of_token_prompt = 0.05, cached_cost_of_token_prompt = 0.025, cost_of_token_generation = 0.10),
+    "model2" => (cost_of_token_prompt = 0.07, cached_cost_of_token_prompt = 0.035, cost_of_token_generation = 0.02)
 )
 
+# Calculate cost for normal prompt
+cost1 = call_cost(10, 20, "model1")  # Uses normal prompt cost
+# cost1 = 10 * 0.05 + 20 * 0.10 = 2.5
+
+# Calculate cost for cached prompt
+cost2 = call_cost(10, 20, "model1", is_cached=true)  # Uses cached prompt cost
+# cost2 = 10 * 0.025 + 20 * 0.10 = 2.25  (50% discount on prompt tokens)
+```
+
 cost1 = call_cost(10, 20, "model1")
 
 # from message
@@ -410,15 +422,18 @@ cost1 = call_cost(msg1, "model1")
 # Using custom token costs
 cost2 = call_cost(10, 20, "model3"; cost_of_token_prompt = 0.08, cost_of_token_generation = 0.12)
 # cost2 = 10 * 0.08 + 20 * 0.12 = 3.2
-```
 """
 function call_cost(prompt_tokens::Int, completion_tokens::Int, model::String;
         cost_of_token_prompt::Number = get(MODEL_REGISTRY,
             model,
             (; cost_of_token_prompt = 0.0)).cost_of_token_prompt,
+        cached_cost_of_token_prompt::Number = get(MODEL_REGISTRY,
+            model,
+            (; cached_cost_of_token_prompt = cost_of_token_prompt * 0.5)).cached_cost_of_token_prompt,
         cost_of_token_generation::Number = get(MODEL_REGISTRY, model,
-            (; cost_of_token_generation = 0.0)).cost_of_token_generation)
-    cost = prompt_tokens * cost_of_token_prompt +
+            (; cost_of_token_generation = 0.0)).cost_of_token_generation,
+        is_cached::Bool = false)
+    cost = (is_cached ? cached_cost_of_token_prompt : cost_of_token_prompt) * prompt_tokens +
            completion_tokens * cost_of_token_generation
     return cost
 end
@@ -668,4 +683,4 @@ Returns indices of unique items in a vector `inputs`. Access the unique values a
 """
 function unique_permutation(inputs::AbstractVector)
     return unique(i -> inputs[i], eachindex(inputs))
-end
+end