Skip to content

Commit

Permalink
refactor: ♻️ replace ephemeral with cacheControl
Browse files Browse the repository at this point in the history
  • Loading branch information
pelikhan committed Dec 10, 2024
1 parent 58efce0 commit b2e86a8
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 19 deletions.
6 changes: 3 additions & 3 deletions docs/src/content/docs/reference/scripts/context.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,12 +203,12 @@ def("FILE", env.files, { sliceSample: 100 })
You can use `cacheControl: "ephemeral"` to specify that the prompt can be cached
for a short amount of time, and enable prompt caching optimization, which is supported (differently) by various LLM providers.
```js "ephemeral: true"
```js 'cacheControl("ephemeral")'
$`...`.cacheControl("ephemeral")
```
```js "ephemeral: true"
def("FILE", env.files, { ephemeral: true })
```js '"cacheControl: "ephemeral"'
def("FILE", env.files, { cacheControl: "ephemeral" })
```
Read more about [prompt caching](/genaiscript/reference/scripts/prompt-caching).
Expand Down
26 changes: 14 additions & 12 deletions packages/core/src/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
ChatCompletionRequestCacheKey,
getChatCompletionCache,
} from "./chatcache"
import { traceFetchPost } from "./fetch"

const convertFinishReason = (
stopReason: Anthropic.Message["stop_reason"]
Expand Down Expand Up @@ -300,30 +301,31 @@ export const AnthropicChatCompletion: ChatCompletionHandler = async (
let finishReason: ChatCompletionResponse["finishReason"]
let usage: ChatCompletionResponse["usage"] | undefined
const toolCalls: ChatCompletionToolCall[] = []
const tools = convertTools(req.tools)

const mreq = deleteUndefinedValues({
model,
tools,
messages,
max_tokens: req.max_tokens || ANTHROPIC_MAX_TOKEN,
temperature: req.temperature,
top_p: req.top_p,
stream: true,
})

trace.detailsFenced("✉️ body", mreq, "json")
trace.appendContent("\n")

try {
const messagesApi = caching
? anthropic.beta.promptCaching.messages
: anthropic.messages
const stream = messagesApi.stream({
model,
tools: convertTools(req.tools),
messages,
max_tokens: req.max_tokens || ANTHROPIC_MAX_TOKEN,
temperature: req.temperature,
top_p: req.top_p,
stream: true,
...headers,
})

const stream = messagesApi.stream({ ...mreq, ...headers })
for await (const chunk of stream) {
if (cancellationToken?.isCancellationRequested) {
finishReason = "cancel"
break
}

switch (chunk.type) {
case "message_start":
usage = convertUsage(
Expand Down
9 changes: 7 additions & 2 deletions packages/core/src/promptdom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,9 @@ export function createDefData(
options?: DefDataOptions
) {
if (data === undefined) return undefined
let { format, headers, priority, ephemeral } = options || {}
let { format, headers, priority, cacheControl } = options || {}
cacheControl =
cacheControl ?? (options?.ephemeral ? "ephemeral" : undefined)
if (
!format &&
Array.isArray(data) &&
Expand Down Expand Up @@ -513,7 +515,10 @@ ${trimNewlines(text)}
${trimNewlines(text)}
`
// TODO maxTokens does not work well with data
return createTextNode(value, { priority, ephemeral })
return createTextNode(value, {
priority,
ephemeral: cacheControl === "ephemeral",
})
}

// Function to append a child node to a parent node.
Expand Down
9 changes: 8 additions & 1 deletion packages/core/src/types/prompt_template.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,8 @@ interface FenceOptions extends LineNumberingOptions, FenceFormatOptions {
schema?: string
}

type PromptCacheControlType = "ephemeral"

interface ContextExpansionOptions {
/**
* Specifies an maximum of estimated tokens for this entry; after which it will be truncated.
Expand All @@ -916,9 +918,14 @@ interface ContextExpansionOptions {
flex?: number

/**
* This text is likely to change and will probably break the prefix cache.
* @deprecated use cacheControl instead
*/
ephemeral?: boolean

/**
* Caching policy for this text. `ephemeral` means the prefix can be cached for a short amount of time.
*/
cacheControl?: PromptCacheControlType
}

interface RangeOptions {
Expand Down
2 changes: 1 addition & 1 deletion packages/sample/genaisrc/summarize-cached.genai.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ script({
],
})

def("FILE", env.files, { ephemeral: true })
def("FILE", env.files, { cacheControl: "ephemeral" })

$`
Summarize all files in FILE in a single paragraph.
Expand Down

0 comments on commit b2e86a8

Please sign in to comment.