diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/docs/genaisrc/genaiscript.d.ts +++ b/docs/genaisrc/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/genaisrc/genaiscript.d.ts +++ b/genaisrc/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index 58883f2b75..2e39591086 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -470,16 +470,17 @@ async function processChatMessage( const node = ctx.node checkCancelled(cancellationToken) // expand template - const { errors, prompt } = await renderPromptNode( + const { errors, userPrompt } = await renderPromptNode( options.model, node, { + flexTokens: options.flexTokens, trace, } ) - if (prompt?.trim().length) { - trace.detailsFenced(`💬 message`, prompt, "markdown") - messages.push({ role: "user", content: prompt }) + if (userPrompt?.trim().length) { + trace.detailsFenced(`💬 message`, userPrompt, "markdown") + messages.push({ role: "user", content: userPrompt }) needsNewTurn = true } else trace.item("no message") if (errors?.length) { diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 0251c83119..6292ab9625 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -227,3 +227,5 @@ export const CONSOLE_COLOR_WARNING = 95 export const CONSOLE_COLOR_ERROR = 91 export const PLAYWRIGHT_DEFAULT_BROWSER = "chromium" +export const MAX_TOKENS_ELLIPSE = "..." +export const ESTIMATE_TOKEN_OVERHEAD = 2 \ No newline at end of file diff --git a/packages/core/src/expander.ts b/packages/core/src/expander.ts index 9d9b34b551..f598e81d40 100644 --- a/packages/core/src/expander.ts +++ b/packages/core/src/expander.ts @@ -73,7 +73,7 @@ export async function callExpander( const node = ctx.node if (provider !== MODEL_PROVIDER_AICI) { const { - prompt, + userPrompt, assistantPrompt, images: imgs, errors, @@ -83,8 +83,11 @@ export async function callExpander( outputProcessors: ops, chatParticipants: cps, fileOutputs: fos, - } = await renderPromptNode(model, node, { trace }) - text = prompt + } = await renderPromptNode(model, node, { + flexTokens: options.flexTokens, + trace, + }) + text = userPrompt assistantText = assistantPrompt images = imgs schemas = schs @@ -175,7 +178,7 @@ export async function expandTemplate( const systems = resolveSystems(prj, template) const systemTemplates = systems.map((s) => prj.getTemplate(s)) // update options - options.lineNumbers = + const lineNumbers = options.lineNumbers ?? template.lineNumbers ?? systemTemplates.some((s) => s?.lineNumbers) @@ -186,7 +189,7 @@ export async function expandTemplate( host.defaultModelOptions.temperature const topP = options.topP ?? normalizeFloat(env.vars["top_p"]) ?? template.topP - const max_tokens = + const maxTokens = options.maxTokens ?? normalizeInt(env.vars["maxTokens"]) ?? normalizeInt(env.vars["max_tokens"]) ?? @@ -197,6 +200,11 @@ export async function expandTemplate( normalizeInt(env.vars["max_tool_calls"]) ?? template.maxToolCalls ?? MAX_TOOL_CALLS + const flexTokens = + options.flexTokens ?? + normalizeInt(env.vars["flexTokens"]) ?? + normalizeInt(env.vars["flex_tokens"]) ?? + template.flexTokens let seed = options.seed ?? normalizeInt(env.vars["seed"]) ?? template.seed if (seed !== undefined) seed = seed >> 0 @@ -207,7 +215,16 @@ export async function expandTemplate( trace.startDetails("🧬 prompt") trace.detailsFenced("📓 script source", template.jsSource, "js") - const prompt = await callExpander(prj, template, env, trace, options) + const prompt = await callExpander(prj, template, env, trace, { + ...options, + maxTokens, + maxToolCalls, + flexTokens, + seed, + topP, + temperature, + lineNumbers, + }) const images = prompt.images const schemas = prompt.schemas @@ -339,7 +356,7 @@ ${schemaTs} model, temperature, topP, - max_tokens, + maxTokens, maxToolCalls, seed, responseType, diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/packages/core/src/genaisrc/genaiscript.d.ts +++ b/packages/core/src/genaisrc/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts index 1ecdef503b..31b443cce8 100644 --- a/packages/core/src/promptcontext.ts +++ b/packages/core/src/promptcontext.ts @@ -275,6 +275,7 @@ export async function createPromptContext( messages: msgs, chatParticipants: cps, } = await renderPromptNode(genOptions.model, node, { + flexTokens: genOptions.flexTokens, trace, }) diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index d096cb3851..fa4a1dd43b 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -6,7 +6,11 @@ import { estimateTokens } from "./tokens" import { MarkdownTrace, TraceOptions } from "./trace" import { arrayify, assert, toStringList, trimNewlines } from "./util" import { YAMLStringify } from "./yaml" -import { MARKDOWN_PROMPT_FENCE, PROMPT_FENCE } from "./constants" +import { + MARKDOWN_PROMPT_FENCE, + MAX_TOKENS_ELLIPSE, + PROMPT_FENCE, +} from "./constants" import { parseModelIdentifier } from "./models" import { toChatCompletionUserMessage } from "./chat" import { errorMessage } from "./error" @@ -418,7 +422,7 @@ export async function visitNode(node: PromptNode, visitor: PromptNodeVisitor) { } export interface PromptNodeRender { - prompt: string + userPrompt: string assistantPrompt: string images: PromptImage[] errors: unknown[] @@ -536,6 +540,19 @@ async function resolvePromptNode( return { errors: err } } +function truncateText( + content: string, + maxTokens: number, + encoder: TokenEncoder +): string { + const tokens = estimateTokens(content, encoder) + const end = Math.max( + 3, + Math.floor((maxTokens * content.length) / tokens) - 1 + ) + return content.slice(0, end) + MAX_TOKENS_ELLIPSE +} + async function truncatePromptNode( model: string, node: PromptNode, @@ -550,6 +567,7 @@ async function truncatePromptNode( resolved?: string tokens?: number maxTokens?: number + preview?: string }) => { if ( !n.error && @@ -557,13 +575,14 @@ async function truncatePromptNode( n.maxTokens !== undefined && n.tokens > n.maxTokens ) { - const value = n.resolved.slice( - 0, - Math.floor((n.maxTokens * n.resolved.length) / n.tokens) + n.resolved = n.preview = truncateText( + n.resolved, + n.maxTokens, + encoder ) - n.resolved = value - n.tokens = estimateTokens(value, encoder) + n.tokens = estimateTokens(n.resolved, encoder) truncated = true + trace.log(`truncated text to ${n.tokens} tokens`) } } @@ -574,12 +593,14 @@ async function truncatePromptNode( n.maxTokens !== undefined && n.tokens > n.maxTokens ) { - n.resolved.content = n.resolved.content.slice( - 0, - Math.floor((n.maxTokens * n.resolved.content.length) / n.tokens) + n.resolved.content = n.preview = truncateText( + n.resolved.content, + n.maxTokens, + encoder ) n.tokens = estimateTokens(n.resolved.content, encoder) truncated = true + trace.log(`truncated def ${n.name} to ${n.tokens} tokens`) } } @@ -593,6 +614,53 @@ async function truncatePromptNode( return truncated } +async function flexPromptNode( + root: PromptNode, + options?: { flexTokens: number } & TraceOptions +): Promise { + const PRIORITY_DEFAULT = 0 + + const { trace, flexTokens } = options || {} + + // collect all notes + const nodes: PromptNode[] = [] + await visitNode(root, { + node: (n) => { + nodes.push(n) + }, + }) + const totalTokens = nodes.reduce( + (total, node) => total + (node.tokens ?? 0), + 0 + ) + + if (totalTokens < flexTokens) { + // no need to flex + return + } + + // inspired from priompt, prompt-tsx, gpt-4 + // sort by priority + nodes.sort( + (a, b) => + (a.priority ?? PRIORITY_DEFAULT) - (b.priority ?? PRIORITY_DEFAULT) + ) + const flexNodes = nodes.filter((n) => n.flex !== undefined) + const totalFlex = flexNodes.reduce((total, node) => total + node.flex, 0) + + const totalReserve = 0 + const totalRemaining = Math.max(0, flexTokens - totalReserve) + for (const node of flexNodes) { + const proportion = node.flex / totalFlex + const tokenBudget = Math.min( + node.maxTokens ?? Infinity, + Math.floor(totalRemaining * proportion) + ) + node.maxTokens = tokenBudget + trace.log(`flexed ${node.type} to ${tokenBudget} tokens`) + } +} + async function tracePromptNode( trace: MarkdownTrace, root: PromptNode, @@ -628,19 +696,26 @@ async function tracePromptNode( export async function renderPromptNode( modelId: string, node: PromptNode, - options?: TraceOptions + options?: { flexTokens?: number } & TraceOptions ): Promise { - const { trace } = options || {} + const { trace, flexTokens } = options || {} const { model } = parseModelIdentifier(modelId) const encoder = await resolveTokenEncoder(model) await resolvePromptNode(model, node) await tracePromptNode(trace, node) + if (flexTokens) + await flexPromptNode(node, { + ...options, + flexTokens, + }) + const truncated = await truncatePromptNode(model, node, options) if (truncated) await tracePromptNode(trace, node, { label: "truncated" }) - let prompt = "" + let systemPrompt = "" + let userPrompt = "" let assistantPrompt = "" const images: PromptImage[] = [] const errors: unknown[] = [] @@ -655,12 +730,12 @@ export async function renderPromptNode( text: async (n) => { if (n.error) errors.push(n.error) const value = n.resolved - if (value != undefined) prompt += value + "\n" + if (value != undefined) userPrompt += value + "\n" }, def: async (n) => { if (n.error) errors.push(n.error) const value = n.resolved - if (value !== undefined) prompt += renderDefNode(n) + "\n" + if (value !== undefined) userPrompt += renderDefNode(n) + "\n" }, assistant: async (n) => { if (n.error) errors.push(n.error) @@ -670,7 +745,7 @@ export async function renderPromptNode( stringTemplate: async (n) => { if (n.error) errors.push(n.error) const value = n.resolved - if (value != undefined) prompt += value + "\n" + if (value != undefined) userPrompt += value + "\n" }, image: async (n) => { if (n.error) errors.push(n.error) @@ -691,8 +766,8 @@ export async function renderPromptNode( const value = n.resolved if (value) { for (const [filename, content] of Object.entries(value)) { - prompt += content - prompt += "\n" + userPrompt += content + userPrompt += "\n" if (trace) trace.detailsFenced( `📦 import template ${filename}`, @@ -727,7 +802,7 @@ export async function renderPromptNode( ${trimNewlines(schemaText)} \`\`\` ` - prompt += text + userPrompt += text n.tokens = estimateTokens(text, encoder) if (trace && format !== "json") trace.detailsFenced( @@ -771,7 +846,7 @@ ${trimNewlines(schemaText)} const fods = fileOutputs?.filter((f) => !!f.description) if (fods?.length > 0) { - prompt += ` + userPrompt += ` ## File generation rules When generating files, use the following rules which are formatted as "file glob: description": @@ -782,15 +857,15 @@ ${fods.map((fo) => ` ${fo.pattern}: ${fo.description}`)} } const messages: ChatCompletionMessageParam[] = [ - toChatCompletionUserMessage(prompt, images), + toChatCompletionUserMessage(userPrompt, images), ] if (assistantPrompt) messages.push({ role: "assistant", content: assistantPrompt, }) - const res = { - prompt, + const res = { + userPrompt, assistantPrompt, images, schemas, diff --git a/packages/core/src/promptrunner.ts b/packages/core/src/promptrunner.ts index 19155c4ef3..c353d9cced 100644 --- a/packages/core/src/promptrunner.ts +++ b/packages/core/src/promptrunner.ts @@ -107,7 +107,7 @@ export async function runTemplate( statusText, temperature, topP, - max_tokens, + maxTokens, seed, responseType, responseSchema, @@ -164,10 +164,10 @@ export async function runTemplate( responseType, responseSchema, model, - temperature: temperature, - maxTokens: max_tokens, - topP: topP, - seed: seed, + temperature, + maxTokens, + topP, + seed, } const fileEdits: Record = {} const changelogs: string[] = [] diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts index ca9f6020fd..89fc75b5ee 100644 --- a/packages/core/src/runpromptcontext.ts +++ b/packages/core/src/runpromptcontext.ts @@ -64,6 +64,14 @@ export function createChatTurnGenerationContext( const res: PromptTemplateString = Object.freeze(< PromptTemplateString >{ + priority: (priority) => { + current.priority = priority + return res + }, + flex: (value) => { + current.flex = value + return res + }, jinja: (data) => { current.transforms.push((t) => jinjaRender(t, data)) return res diff --git a/packages/core/src/template.ts b/packages/core/src/template.ts index caa14d3e1a..f56b5e796c 100644 --- a/packages/core/src/template.ts +++ b/packages/core/src/template.ts @@ -264,6 +264,7 @@ export async function parsePromptScript( c.checkNumber("temperature") c.checkNumber("topP") c.checkNumber("seed") + c.checkNat("flexTokens") c.checkStringArray("system") c.checkStringArray("files") diff --git a/packages/core/src/tokens.ts b/packages/core/src/tokens.ts index 636d320074..4f781aa2d0 100644 --- a/packages/core/src/tokens.ts +++ b/packages/core/src/tokens.ts @@ -1,11 +1,12 @@ +import { ESTIMATE_TOKEN_OVERHEAD } from "./constants" import { logVerbose } from "./util" export function estimateTokens(text: string, encoder: TokenEncoder) { if (!text?.length) return 0 try { - return encoder(text).length + return encoder(text).length + ESTIMATE_TOKEN_OVERHEAD } catch (e) { logVerbose(e) - return text.length >> 2 + return (text.length >> 2) + ESTIMATE_TOKEN_OVERHEAD } } diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index e175469aa5..3ea75dbe29 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -223,6 +223,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -697,11 +702,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1459,6 +1473,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/sample/genaisrc/flex.genai.mts b/packages/sample/genaisrc/flex.genai.mts new file mode 100644 index 0000000000..167c1eb2bb --- /dev/null +++ b/packages/sample/genaisrc/flex.genai.mts @@ -0,0 +1,29 @@ +script({ + model: "openai:gpt-3.5-turbo", + files: ["src/rag/markdown.md"], + system: [], + flexTokens: 20, + tests: { + asserts: [ + { + type: "not-icontains", + value: "ABRACADABRA", + }, + { + type: "not-icontains", + value: "MONKEY", + }, + ], + }, +}) + +// will be trimmed +def("FILE", env.files, { flex: 1 }) + +// will be trimmed +$`What is Markdown? + Markdown is a lightweight markup language that you can use to add formatting elements to plaintext text documents. Created by John Gruber in 2004, Markdown is now one of the world’s most popular markup languages. +PRINT ABRACADABRA!`.flex(2) + +$`This one is flexed. +PRINT MONKEY!`.flex(1) diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/packages/sample/genaisrc/genaiscript.d.ts +++ b/packages/sample/genaisrc/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/packages/sample/genaisrc/node/genaiscript.d.ts +++ b/packages/sample/genaisrc/node/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/packages/sample/genaisrc/python/genaiscript.d.ts +++ b/packages/sample/genaisrc/python/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/packages/sample/genaisrc/style/genaiscript.d.ts +++ b/packages/sample/genaisrc/style/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/packages/sample/src/aici/genaiscript.d.ts +++ b/packages/sample/src/aici/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/packages/sample/src/errors/genaiscript.d.ts +++ b/packages/sample/src/errors/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/sample/src/genaiscript.d.ts b/packages/sample/src/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/packages/sample/src/genaiscript.d.ts +++ b/packages/sample/src/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/packages/sample/src/makecode/genaiscript.d.ts +++ b/packages/sample/src/makecode/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/packages/sample/src/tla/genaiscript.d.ts +++ b/packages/sample/src/tla/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/packages/sample/src/vision/genaiscript.d.ts +++ b/packages/sample/src/vision/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts index 5aabaea966..543f84da2c 100644 --- a/slides/genaisrc/genaiscript.d.ts +++ b/slides/genaisrc/genaiscript.d.ts @@ -282,6 +282,11 @@ interface ScriptRuntimeOptions { * Default value for emitting line numbers in fenced code blocks. */ lineNumbers?: boolean + + /** + * Budget of tokens to apply the prompt flex renderer. + */ + flexTokens?: number } type PromptParameterType = @@ -756,11 +761,20 @@ interface FenceOptions { } interface ContextExpansionOptions { - priority?: number /** * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated. */ maxTokens?: number + /* + * Value that is conceptually similar to a zIndex (higher number == higher priority). + * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context. + */ + priority?: number + /** + * Controls the proportion of tokens allocated from the container's budget to this element. + * It defaults to 1 on all elements. + */ + flex?: number } interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { @@ -1518,6 +1532,16 @@ interface FileOutput { interface ImportTemplateOptions {} interface PromptTemplateString { + /** + * Set a priority similar to CSS z-index + * to control the trimming of the prompt when the context is full + * @param priority + */ + priority(value: number): PromptTemplateString + /** + * Sets the context layout flex weight + */ + flex(value: number): PromptTemplateString /** * Applies jinja template to the string lazily * @param data jinja data