From b2e86a80e02b590c71cfea8fb3c3cc839947023f Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 10 Dec 2024 15:54:40 +0000 Subject: [PATCH] =?UTF-8?q?refactor:=20=E2=99=BB=EF=B8=8F=20replace=20ephe?= =?UTF-8?q?meral=20with=20cacheControl?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../content/docs/reference/scripts/context.md | 6 ++--- packages/core/src/anthropic.ts | 26 ++++++++++--------- packages/core/src/promptdom.ts | 9 +++++-- packages/core/src/types/prompt_template.d.ts | 9 ++++++- .../genaisrc/summarize-cached.genai.mjs | 2 +- 5 files changed, 33 insertions(+), 19 deletions(-) diff --git a/docs/src/content/docs/reference/scripts/context.md b/docs/src/content/docs/reference/scripts/context.md index dd6241cd4..b62f14f02 100644 --- a/docs/src/content/docs/reference/scripts/context.md +++ b/docs/src/content/docs/reference/scripts/context.md @@ -203,12 +203,12 @@ def("FILE", env.files, { sliceSample: 100 }) You can use `cacheControl: "ephemeral"` to specify that the prompt can be cached for a short amount of time, and enable prompt caching optimization, which is supported (differently) by various LLM providers. -```js "ephemeral: true" +```js 'cacheControl("ephemeral")' $`...`.cacheControl("ephemeral") ``` -```js "ephemeral: true" -def("FILE", env.files, { ephemeral: true }) +```js '"cacheControl: "ephemeral"' +def("FILE", env.files, { cacheControl: "ephemeral" }) ``` Read more about [prompt caching](/genaiscript/reference/scripts/prompt-caching). diff --git a/packages/core/src/anthropic.ts b/packages/core/src/anthropic.ts index 330fda2af..dd1bec9d1 100644 --- a/packages/core/src/anthropic.ts +++ b/packages/core/src/anthropic.ts @@ -27,6 +27,7 @@ import { ChatCompletionRequestCacheKey, getChatCompletionCache, } from "./chatcache" +import { traceFetchPost } from "./fetch" const convertFinishReason = ( stopReason: Anthropic.Message["stop_reason"] @@ -300,30 +301,31 @@ export const AnthropicChatCompletion: ChatCompletionHandler = async ( let finishReason: ChatCompletionResponse["finishReason"] let usage: ChatCompletionResponse["usage"] | undefined const toolCalls: ChatCompletionToolCall[] = [] + const tools = convertTools(req.tools) + + const mreq = deleteUndefinedValues({ + model, + tools, + messages, + max_tokens: req.max_tokens || ANTHROPIC_MAX_TOKEN, + temperature: req.temperature, + top_p: req.top_p, + stream: true, + }) + trace.detailsFenced("✉️ body", mreq, "json") trace.appendContent("\n") try { const messagesApi = caching ? anthropic.beta.promptCaching.messages : anthropic.messages - const stream = messagesApi.stream({ - model, - tools: convertTools(req.tools), - messages, - max_tokens: req.max_tokens || ANTHROPIC_MAX_TOKEN, - temperature: req.temperature, - top_p: req.top_p, - stream: true, - ...headers, - }) - + const stream = messagesApi.stream({ ...mreq, ...headers }) for await (const chunk of stream) { if (cancellationToken?.isCancellationRequested) { finishReason = "cancel" break } - switch (chunk.type) { case "message_start": usage = convertUsage( diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index 5b3574578..b8596fbb1 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -479,7 +479,9 @@ export function createDefData( options?: DefDataOptions ) { if (data === undefined) return undefined - let { format, headers, priority, ephemeral } = options || {} + let { format, headers, priority, cacheControl } = options || {} + cacheControl = + cacheControl ?? (options?.ephemeral ? "ephemeral" : undefined) if ( !format && Array.isArray(data) && @@ -513,7 +515,10 @@ ${trimNewlines(text)} ${trimNewlines(text)} ` // TODO maxTokens does not work well with data - return createTextNode(value, { priority, ephemeral }) + return createTextNode(value, { + priority, + ephemeral: cacheControl === "ephemeral", + }) } // Function to append a child node to a parent node. diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index 4e78a2849..243740cfa 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -897,6 +897,8 @@ interface FenceOptions extends LineNumberingOptions, FenceFormatOptions { schema?: string } +type PromptCacheControlType = "ephemeral" + interface ContextExpansionOptions { /** * Specifies an maximum of estimated tokens for this entry; after which it will be truncated. @@ -916,9 +918,14 @@ interface ContextExpansionOptions { flex?: number /** - * This text is likely to change and will probably break the prefix cache. + * @deprecated use cacheControl instead */ ephemeral?: boolean + + /** + * Caching policy for this text. `ephemeral` means the prefix can be cached for a short amount of time. + */ + cacheControl?: PromptCacheControlType } interface RangeOptions { diff --git a/packages/sample/genaisrc/summarize-cached.genai.mjs b/packages/sample/genaisrc/summarize-cached.genai.mjs index 07dc7713a..9f7964450 100644 --- a/packages/sample/genaisrc/summarize-cached.genai.mjs +++ b/packages/sample/genaisrc/summarize-cached.genai.mjs @@ -9,7 +9,7 @@ script({ ], }) -def("FILE", env.files, { ephemeral: true }) +def("FILE", env.files, { cacheControl: "ephemeral" }) $` Summarize all files in FILE in a single paragraph.