From 786833a905c8625d268feb7c5e778a276c80ed9b Mon Sep 17 00:00:00 2001
From: Peli de Halleux <pelikhan@users.noreply.github.com>
Date: Fri, 6 Sep 2024 15:28:01 +0000
Subject: [PATCH 1/7] Add ellipse on token limit exceed and create new script
 file in sample package

---
 packages/core/src/constants.ts          |  1 +
 packages/core/src/promptdom.ts          | 23 ++++++++++++++---------
 packages/sample/genaisrc/flex.genai.mts | 18 ++++++++++++++++++
 3 files changed, 33 insertions(+), 9 deletions(-)
 create mode 100644 packages/sample/genaisrc/flex.genai.mts

diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts
index 0251c83119..20b5b9ca92 100644
--- a/packages/core/src/constants.ts
+++ b/packages/core/src/constants.ts
@@ -227,3 +227,4 @@ export const CONSOLE_COLOR_WARNING = 95
 export const CONSOLE_COLOR_ERROR = 91
 
 export const PLAYWRIGHT_DEFAULT_BROWSER = "chromium"
+export const MAX_TOKENS_ELLIPSE = "..."
diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts
index d096cb3851..a75de7298e 100644
--- a/packages/core/src/promptdom.ts
+++ b/packages/core/src/promptdom.ts
@@ -6,7 +6,11 @@ import { estimateTokens } from "./tokens"
 import { MarkdownTrace, TraceOptions } from "./trace"
 import { arrayify, assert, toStringList, trimNewlines } from "./util"
 import { YAMLStringify } from "./yaml"
-import { MARKDOWN_PROMPT_FENCE, PROMPT_FENCE } from "./constants"
+import {
+    MARKDOWN_PROMPT_FENCE,
+    MAX_TOKENS_ELLIPSE,
+    PROMPT_FENCE,
+} from "./constants"
 import { parseModelIdentifier } from "./models"
 import { toChatCompletionUserMessage } from "./chat"
 import { errorMessage } from "./error"
@@ -550,6 +554,7 @@ async function truncatePromptNode(
         resolved?: string
         tokens?: number
         maxTokens?: number
+        preview?: string
     }) => {
         if (
             !n.error &&
@@ -557,11 +562,9 @@ async function truncatePromptNode(
             n.maxTokens !== undefined &&
             n.tokens > n.maxTokens
         ) {
-            const value = n.resolved.slice(
-                0,
-                Math.floor((n.maxTokens * n.resolved.length) / n.tokens)
-            )
-            n.resolved = value
+            const end = Math.floor((n.maxTokens * n.resolved.length) / n.tokens)
+            const value = n.resolved.slice(0, end) + MAX_TOKENS_ELLIPSE
+            n.resolved = n.preview = value
             n.tokens = estimateTokens(value, encoder)
             truncated = true
         }
@@ -574,11 +577,13 @@ async function truncatePromptNode(
             n.maxTokens !== undefined &&
             n.tokens > n.maxTokens
         ) {
-            n.resolved.content = n.resolved.content.slice(
-                0,
-                Math.floor((n.maxTokens * n.resolved.content.length) / n.tokens)
+            const end = Math.floor(
+                (n.maxTokens * n.resolved.content.length) / n.tokens
             )
+            n.resolved.content =
+                n.resolved.content.slice(0, end) + MAX_TOKENS_ELLIPSE
             n.tokens = estimateTokens(n.resolved.content, encoder)
+            n.preview = n.resolved.content
             truncated = true
         }
     }
diff --git a/packages/sample/genaisrc/flex.genai.mts b/packages/sample/genaisrc/flex.genai.mts
new file mode 100644
index 0000000000..5bd2905994
--- /dev/null
+++ b/packages/sample/genaisrc/flex.genai.mts
@@ -0,0 +1,18 @@
+script({
+    files: ["src/rag/markdown.md"],
+})
+
+// will be trimmed
+def("FILE", env.files, { maxTokens: 5 })
+
+// will be trimmed
+$`What is Markdown?
+ Markdown is a lightweight markup language that you can use to add formatting elements to plaintext text documents. Created by John Gruber in 2004, Markdown is now one of the world’s most popular markup languages. 
+
+Using Markdown is different than using a WYSIWYG editor. In an application like Microsoft Word, you click buttons to format words and phrases, and the changes are visible immediately. Markdown isn’t like that. When you create a Markdown-formatted file, you add Markdown syntax to the text to indicate which words and phrases should look different.
+
+For example, to denote a heading, you add a number sign before it (e.g., # Heading One). Or to make a phrase bold, you add two asterisks before and after it (e.g., **this text is bold**). It may take a while to get used to seeing Markdown syntax in your text, especially if you’re accustomed to WYSIWYG applications. The screenshot below shows a Markdown file displayed in the Visual Studio Code text editor....
+
+PRINT ABRACADABRA!
+
+`.maxTokens(5)

From fa0aaae1bc1f280d1c874bd34fb9bccba3d5e6d0 Mon Sep 17 00:00:00 2001
From: pelikhan <jhalleux@microsoft.com>
Date: Fri, 6 Sep 2024 09:22:30 -0700
Subject: [PATCH 2/7] Add flex layout options and priority setting in
 PromptTemplateString for better prompt rendering control

---
 docs/genaisrc/genaiscript.d.ts                | 38 ++++++++++++++++++-
 genaisrc/genaiscript.d.ts                     | 38 ++++++++++++++++++-
 packages/core/src/genaisrc/genaiscript.d.ts   | 38 ++++++++++++++++++-
 packages/core/src/runpromptcontext.ts         | 11 ++++++
 packages/core/src/types/prompt_template.d.ts  | 38 ++++++++++++++++++-
 packages/sample/genaisrc/genaiscript.d.ts     | 38 ++++++++++++++++++-
 .../sample/genaisrc/node/genaiscript.d.ts     | 38 ++++++++++++++++++-
 .../sample/genaisrc/python/genaiscript.d.ts   | 38 ++++++++++++++++++-
 .../sample/genaisrc/style/genaiscript.d.ts    | 38 ++++++++++++++++++-
 packages/sample/src/aici/genaiscript.d.ts     | 38 ++++++++++++++++++-
 packages/sample/src/errors/genaiscript.d.ts   | 38 ++++++++++++++++++-
 packages/sample/src/genaiscript.d.ts          | 38 ++++++++++++++++++-
 packages/sample/src/makecode/genaiscript.d.ts | 38 ++++++++++++++++++-
 packages/sample/src/tla/genaiscript.d.ts      | 38 ++++++++++++++++++-
 packages/sample/src/vision/genaiscript.d.ts   | 38 ++++++++++++++++++-
 slides/genaisrc/genaiscript.d.ts              | 38 ++++++++++++++++++-
 16 files changed, 566 insertions(+), 15 deletions(-)

diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/docs/genaisrc/genaiscript.d.ts
+++ b/docs/genaisrc/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/genaisrc/genaiscript.d.ts
+++ b/genaisrc/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/packages/core/src/genaisrc/genaiscript.d.ts
+++ b/packages/core/src/genaisrc/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts
index ca9f6020fd..7148dac30f 100644
--- a/packages/core/src/runpromptcontext.ts
+++ b/packages/core/src/runpromptcontext.ts
@@ -64,6 +64,17 @@ export function createChatTurnGenerationContext(
             const res: PromptTemplateString = Object.freeze(<
                 PromptTemplateString
             >{
+                priority: (priority) => {
+                    current.priority = priority
+                    return res
+                },
+                flex: (options) => {
+                    const { grow, basis, reserve } = options
+                    if (grow !== undefined) current.flexGrow = grow
+                    if (basis !== undefined) current.flexBasis = basis
+                    if (reserve !== undefined) current.flexReserve = reserve
+                    return res
+                },
                 jinja: (data) => {
                     current.transforms.push((t) => jinjaRender(t, data))
                     return res
diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts
index e175469aa5..7bd46bc2ce 100644
--- a/packages/core/src/types/prompt_template.d.ts
+++ b/packages/core/src/types/prompt_template.d.ts
@@ -697,11 +697,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1459,6 +1480,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/packages/sample/genaisrc/genaiscript.d.ts
+++ b/packages/sample/genaisrc/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/packages/sample/genaisrc/node/genaiscript.d.ts
+++ b/packages/sample/genaisrc/node/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/packages/sample/genaisrc/python/genaiscript.d.ts
+++ b/packages/sample/genaisrc/python/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/packages/sample/genaisrc/style/genaiscript.d.ts
+++ b/packages/sample/genaisrc/style/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/packages/sample/src/aici/genaiscript.d.ts
+++ b/packages/sample/src/aici/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/packages/sample/src/errors/genaiscript.d.ts
+++ b/packages/sample/src/errors/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/genaiscript.d.ts b/packages/sample/src/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/packages/sample/src/genaiscript.d.ts
+++ b/packages/sample/src/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/packages/sample/src/makecode/genaiscript.d.ts
+++ b/packages/sample/src/makecode/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/packages/sample/src/tla/genaiscript.d.ts
+++ b/packages/sample/src/tla/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/packages/sample/src/vision/genaiscript.d.ts
+++ b/packages/sample/src/vision/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts
index 5aabaea966..92c6bb827f 100644
--- a/slides/genaisrc/genaiscript.d.ts
+++ b/slides/genaisrc/genaiscript.d.ts
@@ -756,11 +756,32 @@ interface FenceOptions {
 }
 
 interface ContextExpansionOptions {
-    priority?: number
     /**
      * Specifies an maximum of estimated tokesn for this entry; after which it will be truncated.
      */
     maxTokens?: number
+    /*
+     * Value that is conceptually similar to a zIndex (higher number == higher priority).
+     * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
+     */
+    priority?: number
+    /**
+     * Allows an element to use the remainder of its parent's token budget when it's rendered.
+     */
+    flexGrow?: number
+    /**
+     * Controls the proportion of tokens allocated from the container's budget to this element.
+     * It defaults to 1 on all elements.
+     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
+     */
+    flexBasis?: number
+    /**
+     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
+     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
+     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
+     * This is only useful in conjunction with flexGrow.
+     */
+    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1518,6 +1539,21 @@ interface FileOutput {
 interface ImportTemplateOptions {}
 
 interface PromptTemplateString {
+    /**
+     * Set a priority similar to CSS z-index
+     * to control the trimming of the prompt when the context is full
+     * @param priority
+     */
+    priority(value: number): PromptTemplateString
+    /**
+     * Sets the context layout flex weight
+     * @param weight
+     */
+    flex(options: {
+        grow?: number
+        reserve?: number
+        basis?: number
+    }): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data

From 606a68919e465cb9b0e581a376d4c1a51155864d Mon Sep 17 00:00:00 2001
From: pelikhan <jhalleux@microsoft.com>
Date: Fri, 6 Sep 2024 09:40:43 -0700
Subject: [PATCH 3/7] Refactor variable names and add flexing logic in prompt
 rendering functions

---
 packages/core/src/chat.ts      |  8 +++----
 packages/core/src/expander.ts  |  4 ++--
 packages/core/src/promptdom.ts | 44 ++++++++++++++++++++++++----------
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts
index 58883f2b75..937376ea87 100644
--- a/packages/core/src/chat.ts
+++ b/packages/core/src/chat.ts
@@ -470,16 +470,16 @@ async function processChatMessage(
                 const node = ctx.node
                 checkCancelled(cancellationToken)
                 // expand template
-                const { errors, prompt } = await renderPromptNode(
+                const { errors, userPrompt } = await renderPromptNode(
                     options.model,
                     node,
                     {
                         trace,
                     }
                 )
-                if (prompt?.trim().length) {
-                    trace.detailsFenced(`💬 message`, prompt, "markdown")
-                    messages.push({ role: "user", content: prompt })
+                if (userPrompt?.trim().length) {
+                    trace.detailsFenced(`💬 message`, userPrompt, "markdown")
+                    messages.push({ role: "user", content: userPrompt })
                     needsNewTurn = true
                 } else trace.item("no message")
                 if (errors?.length) {
diff --git a/packages/core/src/expander.ts b/packages/core/src/expander.ts
index 9d9b34b551..74a6020a42 100644
--- a/packages/core/src/expander.ts
+++ b/packages/core/src/expander.ts
@@ -73,7 +73,7 @@ export async function callExpander(
         const node = ctx.node
         if (provider !== MODEL_PROVIDER_AICI) {
             const {
-                prompt,
+                userPrompt,
                 assistantPrompt,
                 images: imgs,
                 errors,
@@ -84,7 +84,7 @@ export async function callExpander(
                 chatParticipants: cps,
                 fileOutputs: fos,
             } = await renderPromptNode(model, node, { trace })
-            text = prompt
+            text = userPrompt
             assistantText = assistantPrompt
             images = imgs
             schemas = schs
diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts
index a75de7298e..6d3eecf2af 100644
--- a/packages/core/src/promptdom.ts
+++ b/packages/core/src/promptdom.ts
@@ -422,7 +422,7 @@ export async function visitNode(node: PromptNode, visitor: PromptNodeVisitor) {
 }
 
 export interface PromptNodeRender {
-    prompt: string
+    userPrompt: string
     assistantPrompt: string
     images: PromptImage[]
     errors: unknown[]
@@ -567,6 +567,7 @@ async function truncatePromptNode(
             n.resolved = n.preview = value
             n.tokens = estimateTokens(value, encoder)
             truncated = true
+            trace.log(`truncated text to ${n.tokens} tokens`)
         }
     }
 
@@ -585,6 +586,7 @@ async function truncatePromptNode(
             n.tokens = estimateTokens(n.resolved.content, encoder)
             n.preview = n.resolved.content
             truncated = true
+            trace.log(`truncated def ${n.name} to ${n.tokens} tokens`)
         }
     }
 
@@ -598,6 +600,20 @@ async function truncatePromptNode(
     return truncated
 }
 
+async function flexPromptNode(
+    model: string,
+    node: PromptNode,
+    options?: TraceOptions
+): Promise<boolean> {
+    const FLEX_BASIS_DEFAULT = 1
+    const FLEX_GROW_DEFAULT = Infinity
+    const { trace } = options || {}
+    const encoder = await resolveTokenEncoder(model)
+    let flexed = false
+
+    return flexed
+}
+
 async function tracePromptNode(
     trace: MarkdownTrace,
     root: PromptNode,
@@ -645,7 +661,11 @@ export async function renderPromptNode(
     const truncated = await truncatePromptNode(model, node, options)
     if (truncated) await tracePromptNode(trace, node, { label: "truncated" })
 
-    let prompt = ""
+    const flexed = await flexPromptNode(model, node, options)
+    if (flexed) await tracePromptNode(trace, node, { label: "flexed" })
+
+    let systemPrompt = ""
+    let userPrompt = ""
     let assistantPrompt = ""
     const images: PromptImage[] = []
     const errors: unknown[] = []
@@ -660,12 +680,12 @@ export async function renderPromptNode(
         text: async (n) => {
             if (n.error) errors.push(n.error)
             const value = n.resolved
-            if (value != undefined) prompt += value + "\n"
+            if (value != undefined) userPrompt += value + "\n"
         },
         def: async (n) => {
             if (n.error) errors.push(n.error)
             const value = n.resolved
-            if (value !== undefined) prompt += renderDefNode(n) + "\n"
+            if (value !== undefined) userPrompt += renderDefNode(n) + "\n"
         },
         assistant: async (n) => {
             if (n.error) errors.push(n.error)
@@ -675,7 +695,7 @@ export async function renderPromptNode(
         stringTemplate: async (n) => {
             if (n.error) errors.push(n.error)
             const value = n.resolved
-            if (value != undefined) prompt += value + "\n"
+            if (value != undefined) userPrompt += value + "\n"
         },
         image: async (n) => {
             if (n.error) errors.push(n.error)
@@ -696,8 +716,8 @@ export async function renderPromptNode(
             const value = n.resolved
             if (value) {
                 for (const [filename, content] of Object.entries(value)) {
-                    prompt += content
-                    prompt += "\n"
+                    userPrompt += content
+                    userPrompt += "\n"
                     if (trace)
                         trace.detailsFenced(
                             `📦 import template ${filename}`,
@@ -732,7 +752,7 @@ export async function renderPromptNode(
 ${trimNewlines(schemaText)}
 \`\`\`
 `
-            prompt += text
+            userPrompt += text
             n.tokens = estimateTokens(text, encoder)
             if (trace && format !== "json")
                 trace.detailsFenced(
@@ -776,7 +796,7 @@ ${trimNewlines(schemaText)}
 
     const fods = fileOutputs?.filter((f) => !!f.description)
     if (fods?.length > 0) {
-        prompt += `
+        userPrompt += `
 ## File generation rules
 
 When generating files, use the following rules which are formatted as "file glob: description":
@@ -787,15 +807,15 @@ ${fods.map((fo) => `   ${fo.pattern}: ${fo.description}`)}
     }
 
     const messages: ChatCompletionMessageParam[] = [
-        toChatCompletionUserMessage(prompt, images),
+        toChatCompletionUserMessage(userPrompt, images),
     ]
     if (assistantPrompt)
         messages.push(<ChatCompletionAssistantMessageParam>{
             role: "assistant",
             content: assistantPrompt,
         })
-    const res = {
-        prompt,
+    const res = <PromptNodeRender>{
+        userPrompt,
         assistantPrompt,
         images,
         schemas,

From 265eb2a7e5ef7816799b21bc22fa063796c2e98f Mon Sep 17 00:00:00 2001
From: pelikhan <jhalleux@microsoft.com>
Date: Fri, 6 Sep 2024 12:07:03 -0700
Subject: [PATCH 4/7] Refactor token truncation and streamlining flex options
 for prompt rendering logic

---
 docs/genaisrc/genaiscript.d.ts                | 12 ---
 genaisrc/genaiscript.d.ts                     | 12 ---
 packages/core/src/genaisrc/genaiscript.d.ts   | 12 ---
 packages/core/src/promptdom.ts                | 91 ++++++++++++++-----
 packages/core/src/runpromptcontext.ts         |  4 +-
 packages/core/src/types/prompt_template.d.ts  | 12 ---
 packages/sample/genaisrc/flex.genai.mts       |  8 +-
 packages/sample/genaisrc/genaiscript.d.ts     | 12 ---
 .../sample/genaisrc/node/genaiscript.d.ts     | 12 ---
 .../sample/genaisrc/python/genaiscript.d.ts   | 12 ---
 .../sample/genaisrc/style/genaiscript.d.ts    | 12 ---
 packages/sample/src/aici/genaiscript.d.ts     | 12 ---
 packages/sample/src/errors/genaiscript.d.ts   | 12 ---
 packages/sample/src/genaiscript.d.ts          | 12 ---
 packages/sample/src/makecode/genaiscript.d.ts | 12 ---
 packages/sample/src/tla/genaiscript.d.ts      | 12 ---
 packages/sample/src/vision/genaiscript.d.ts   | 12 ---
 slides/genaisrc/genaiscript.d.ts              | 12 ---
 18 files changed, 74 insertions(+), 209 deletions(-)

diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/docs/genaisrc/genaiscript.d.ts
+++ b/docs/genaisrc/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/genaisrc/genaiscript.d.ts
+++ b/genaisrc/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/packages/core/src/genaisrc/genaiscript.d.ts
+++ b/packages/core/src/genaisrc/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts
index 6d3eecf2af..d20daca8fc 100644
--- a/packages/core/src/promptdom.ts
+++ b/packages/core/src/promptdom.ts
@@ -540,6 +540,16 @@ async function resolvePromptNode(
     return { errors: err }
 }
 
+function truncateText(
+    content: string,
+    maxTokens: number,
+    encoder: TokenEncoder
+): string {
+    const tokens = estimateTokens(content, encoder)
+    const end = Math.floor((maxTokens * content.length) / tokens)
+    return content.slice(0, end) + MAX_TOKENS_ELLIPSE
+}
+
 async function truncatePromptNode(
     model: string,
     node: PromptNode,
@@ -562,10 +572,12 @@ async function truncatePromptNode(
             n.maxTokens !== undefined &&
             n.tokens > n.maxTokens
         ) {
-            const end = Math.floor((n.maxTokens * n.resolved.length) / n.tokens)
-            const value = n.resolved.slice(0, end) + MAX_TOKENS_ELLIPSE
-            n.resolved = n.preview = value
-            n.tokens = estimateTokens(value, encoder)
+            n.resolved = n.preview = truncateText(
+                n.resolved,
+                n.maxTokens,
+                encoder
+            )
+            n.tokens = estimateTokens(n.resolved, encoder)
             truncated = true
             trace.log(`truncated text to ${n.tokens} tokens`)
         }
@@ -578,13 +590,12 @@ async function truncatePromptNode(
             n.maxTokens !== undefined &&
             n.tokens > n.maxTokens
         ) {
-            const end = Math.floor(
-                (n.maxTokens * n.resolved.content.length) / n.tokens
+            n.resolved.content = n.preview = truncateText(
+                n.resolved.content,
+                n.maxTokens,
+                encoder
             )
-            n.resolved.content =
-                n.resolved.content.slice(0, end) + MAX_TOKENS_ELLIPSE
             n.tokens = estimateTokens(n.resolved.content, encoder)
-            n.preview = n.resolved.content
             truncated = true
             trace.log(`truncated def ${n.name} to ${n.tokens} tokens`)
         }
@@ -602,16 +613,49 @@ async function truncatePromptNode(
 
 async function flexPromptNode(
     model: string,
-    node: PromptNode,
-    options?: TraceOptions
-): Promise<boolean> {
+    root: PromptNode,
+    options?: { maxTokens: number } & TraceOptions
+): Promise<void> {
+    const PRIORITY_DEFAULT = 0
     const FLEX_BASIS_DEFAULT = 1
-    const FLEX_GROW_DEFAULT = Infinity
-    const { trace } = options || {}
-    const encoder = await resolveTokenEncoder(model)
-    let flexed = false
 
-    return flexed
+    const { trace, maxTokens } = options || {}
+
+    // collect all notes
+    const nodes: PromptNode[] = []
+    visitNode(root, {
+        node: (n) => {
+            nodes.push(n)
+        },
+    })
+    const totalTokens = nodes.reduce(
+        (total, node) => total + (node.tokens ?? 0),
+        0
+    )
+
+    if (totalTokens < maxTokens) {
+        // no need to flex
+        return
+    }
+
+    // inspired from priompt, prompt-tsx, gpt-4
+    // sort by priority
+    nodes.sort(
+        (a, b) =>
+            (a.priority ?? PRIORITY_DEFAULT) - (b.priority ?? PRIORITY_DEFAULT)
+    )
+    const totalBasis = nodes.reduce(
+        (total, node) => total + (node.flexBasis ?? FLEX_BASIS_DEFAULT),
+        0
+    )
+
+    const totalReserve = 0
+    const totalRemaining = Math.max(0, maxTokens - totalReserve)
+    for (const node of nodes) {
+        const proportion = (node.flexBasis ?? FLEX_BASIS_DEFAULT) / totalBasis
+        const tokenBudget = Math.floor(totalRemaining * proportion)
+        node.maxTokens = tokenBudget
+    }
 }
 
 async function tracePromptNode(
@@ -649,21 +693,24 @@ async function tracePromptNode(
 export async function renderPromptNode(
     modelId: string,
     node: PromptNode,
-    options?: TraceOptions
+    options?: { maxTokens?: number } & TraceOptions
 ): Promise<PromptNodeRender> {
-    const { trace } = options || {}
+    const { trace, maxTokens } = options || {}
     const { model } = parseModelIdentifier(modelId)
     const encoder = await resolveTokenEncoder(model)
 
     await resolvePromptNode(model, node)
     await tracePromptNode(trace, node)
 
+    if (maxTokens)
+        await flexPromptNode(model, node, {
+            ...options,
+            maxTokens,
+        })
+
     const truncated = await truncatePromptNode(model, node, options)
     if (truncated) await tracePromptNode(trace, node, { label: "truncated" })
 
-    const flexed = await flexPromptNode(model, node, options)
-    if (flexed) await tracePromptNode(trace, node, { label: "flexed" })
-
     let systemPrompt = ""
     let userPrompt = ""
     let assistantPrompt = ""
diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts
index 7148dac30f..e43ad814a1 100644
--- a/packages/core/src/runpromptcontext.ts
+++ b/packages/core/src/runpromptcontext.ts
@@ -69,10 +69,8 @@ export function createChatTurnGenerationContext(
                     return res
                 },
                 flex: (options) => {
-                    const { grow, basis, reserve } = options
-                    if (grow !== undefined) current.flexGrow = grow
+                    const { basis } = options
                     if (basis !== undefined) current.flexBasis = basis
-                    if (reserve !== undefined) current.flexReserve = reserve
                     return res
                 },
                 jinja: (data) => {
diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts
index 7bd46bc2ce..4031a9a462 100644
--- a/packages/core/src/types/prompt_template.d.ts
+++ b/packages/core/src/types/prompt_template.d.ts
@@ -706,23 +706,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/sample/genaisrc/flex.genai.mts b/packages/sample/genaisrc/flex.genai.mts
index 5bd2905994..c1459b389e 100644
--- a/packages/sample/genaisrc/flex.genai.mts
+++ b/packages/sample/genaisrc/flex.genai.mts
@@ -1,5 +1,6 @@
 script({
     files: ["src/rag/markdown.md"],
+    maxTokens: 20,
 })
 
 // will be trimmed
@@ -8,11 +9,10 @@ def("FILE", env.files, { maxTokens: 5 })
 // will be trimmed
 $`What is Markdown?
  Markdown is a lightweight markup language that you can use to add formatting elements to plaintext text documents. Created by John Gruber in 2004, Markdown is now one of the world’s most popular markup languages. 
+PRINT ABRACADABRA!`.maxTokens(5)
 
-Using Markdown is different than using a WYSIWYG editor. In an application like Microsoft Word, you click buttons to format words and phrases, and the changes are visible immediately. Markdown isn’t like that. When you create a Markdown-formatted file, you add Markdown syntax to the text to indicate which words and phrases should look different.
+$`Using Markdown is different than using a WYSIWYG editor. In an application like Microsoft Word, you click buttons to format words and phrases, and the changes are visible immediately. Markdown isn’t like that. When you create a Markdown-formatted file, you add Markdown syntax to the text to indicate which words and phrases should look different.
 
 For example, to denote a heading, you add a number sign before it (e.g., # Heading One). Or to make a phrase bold, you add two asterisks before and after it (e.g., **this text is bold**). It may take a while to get used to seeing Markdown syntax in your text, especially if you’re accustomed to WYSIWYG applications. The screenshot below shows a Markdown file displayed in the Visual Studio Code text editor....
 
-PRINT ABRACADABRA!
-
-`.maxTokens(5)
+PRINT ABRACADABRA!`
diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/packages/sample/genaisrc/genaiscript.d.ts
+++ b/packages/sample/genaisrc/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/packages/sample/genaisrc/node/genaiscript.d.ts
+++ b/packages/sample/genaisrc/node/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/packages/sample/genaisrc/python/genaiscript.d.ts
+++ b/packages/sample/genaisrc/python/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/packages/sample/genaisrc/style/genaiscript.d.ts
+++ b/packages/sample/genaisrc/style/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/packages/sample/src/aici/genaiscript.d.ts
+++ b/packages/sample/src/aici/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/packages/sample/src/errors/genaiscript.d.ts
+++ b/packages/sample/src/errors/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/sample/src/genaiscript.d.ts b/packages/sample/src/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/packages/sample/src/genaiscript.d.ts
+++ b/packages/sample/src/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/packages/sample/src/makecode/genaiscript.d.ts
+++ b/packages/sample/src/makecode/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/packages/sample/src/tla/genaiscript.d.ts
+++ b/packages/sample/src/tla/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/packages/sample/src/vision/genaiscript.d.ts
+++ b/packages/sample/src/vision/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts
index 92c6bb827f..621cadc4ed 100644
--- a/slides/genaisrc/genaiscript.d.ts
+++ b/slides/genaisrc/genaiscript.d.ts
@@ -765,23 +765,11 @@ interface ContextExpansionOptions {
      * If a rendered prompt has more message tokens than can fit into the available context window, the prompt renderer prunes messages with the lowest priority from the ChatMessages result, preserving the order in which they were declared. This means your extension code can safely declare TSX components for potentially large pieces of context like conversation history and codebase context.
      */
     priority?: number
-    /**
-     * Allows an element to use the remainder of its parent's token budget when it's rendered.
-     */
-    flexGrow?: number
     /**
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
-     * For example, if you have the elements <><Foo /><Bar /></> and a 100 token budget, each element would be allocated 50 tokens in its PromptSizing.tokenBudget. If you instead render <><Foo /><Bar flexBasis={2} /></>, Bar would receive 66 tokens and Foo would receive 33.
      */
     flexBasis?: number
-    /**
-     * Controls the number of tokens reserved from the container's budget before this element gets rendered.
-     * For example, if you have a 100 token budget and the elements <><Foo /><Bar flexGrow={1} flexBasis={30}></>,
-     * then Foo would receive a PromptSizing.tokenBudget of 70, and Bar would receive however many tokens of the 100 that Foo didn't use.
-     * This is only useful in conjunction with flexGrow.
-     */
-    flexReserve?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {

From 5442319fcbe0fce178a3c817200116445020e7b1 Mon Sep 17 00:00:00 2001
From: pelikhan <jhalleux@microsoft.com>
Date: Fri, 6 Sep 2024 14:26:21 -0700
Subject: [PATCH 5/7] Add maxTokens option to flexPromptNode and adjust token
 estimation with overhead constant

---
 docs/genaisrc/genaiscript.d.ts                |  9 ++------
 genaisrc/genaiscript.d.ts                     |  9 ++------
 packages/core/src/chat.ts                     |  1 +
 packages/core/src/constants.ts                |  1 +
 packages/core/src/expander.ts                 | 21 +++++++++++++-----
 packages/core/src/genaisrc/genaiscript.d.ts   |  9 ++------
 packages/core/src/promptcontext.ts            |  1 +
 packages/core/src/promptdom.ts                | 22 +++++++++----------
 packages/core/src/promptrunner.ts             | 10 ++++-----
 packages/core/src/runpromptcontext.ts         |  5 ++---
 packages/core/src/tokens.ts                   |  5 +++--
 packages/core/src/types/prompt_template.d.ts  |  9 ++------
 packages/sample/genaisrc/flex.genai.mts       |  8 +++----
 packages/sample/genaisrc/genaiscript.d.ts     |  9 ++------
 .../sample/genaisrc/node/genaiscript.d.ts     |  9 ++------
 .../sample/genaisrc/python/genaiscript.d.ts   |  9 ++------
 .../sample/genaisrc/style/genaiscript.d.ts    |  9 ++------
 packages/sample/src/aici/genaiscript.d.ts     |  9 ++------
 packages/sample/src/errors/genaiscript.d.ts   |  9 ++------
 packages/sample/src/genaiscript.d.ts          |  9 ++------
 packages/sample/src/makecode/genaiscript.d.ts |  9 ++------
 packages/sample/src/tla/genaiscript.d.ts      |  9 ++------
 packages/sample/src/vision/genaiscript.d.ts   |  9 ++------
 slides/genaisrc/genaiscript.d.ts              |  9 ++------
 24 files changed, 73 insertions(+), 136 deletions(-)

diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/docs/genaisrc/genaiscript.d.ts
+++ b/docs/genaisrc/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/genaisrc/genaiscript.d.ts
+++ b/genaisrc/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts
index 937376ea87..a7caa73639 100644
--- a/packages/core/src/chat.ts
+++ b/packages/core/src/chat.ts
@@ -474,6 +474,7 @@ async function processChatMessage(
                     options.model,
                     node,
                     {
+                        maxTokens: options.maxTokens,
                         trace,
                     }
                 )
diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts
index 20b5b9ca92..6292ab9625 100644
--- a/packages/core/src/constants.ts
+++ b/packages/core/src/constants.ts
@@ -228,3 +228,4 @@ export const CONSOLE_COLOR_ERROR = 91
 
 export const PLAYWRIGHT_DEFAULT_BROWSER = "chromium"
 export const MAX_TOKENS_ELLIPSE = "..."
+export const ESTIMATE_TOKEN_OVERHEAD = 2
\ No newline at end of file
diff --git a/packages/core/src/expander.ts b/packages/core/src/expander.ts
index 74a6020a42..53b92d1d0b 100644
--- a/packages/core/src/expander.ts
+++ b/packages/core/src/expander.ts
@@ -83,7 +83,10 @@ export async function callExpander(
                 outputProcessors: ops,
                 chatParticipants: cps,
                 fileOutputs: fos,
-            } = await renderPromptNode(model, node, { trace })
+            } = await renderPromptNode(model, node, {
+                maxTokens: options.maxTokens,
+                trace,
+            })
             text = userPrompt
             assistantText = assistantPrompt
             images = imgs
@@ -175,7 +178,7 @@ export async function expandTemplate(
     const systems = resolveSystems(prj, template)
     const systemTemplates = systems.map((s) => prj.getTemplate(s))
     // update options
-    options.lineNumbers =
+    const lineNumbers =
         options.lineNumbers ??
         template.lineNumbers ??
         systemTemplates.some((s) => s?.lineNumbers)
@@ -186,7 +189,7 @@ export async function expandTemplate(
         host.defaultModelOptions.temperature
     const topP =
         options.topP ?? normalizeFloat(env.vars["top_p"]) ?? template.topP
-    const max_tokens =
+    const maxTokens =
         options.maxTokens ??
         normalizeInt(env.vars["maxTokens"]) ??
         normalizeInt(env.vars["max_tokens"]) ??
@@ -207,7 +210,15 @@ export async function expandTemplate(
     trace.startDetails("🧬 prompt")
     trace.detailsFenced("📓 script source", template.jsSource, "js")
 
-    const prompt = await callExpander(prj, template, env, trace, options)
+    const prompt = await callExpander(prj, template, env, trace, {
+        ...options,
+        maxTokens,
+        maxToolCalls,
+        seed,
+        topP,
+        temperature,
+        lineNumbers,
+    })
 
     const images = prompt.images
     const schemas = prompt.schemas
@@ -339,7 +350,7 @@ ${schemaTs}
         model,
         temperature,
         topP,
-        max_tokens,
+        maxTokens,
         maxToolCalls,
         seed,
         responseType,
diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/packages/core/src/genaisrc/genaiscript.d.ts
+++ b/packages/core/src/genaisrc/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts
index 1ecdef503b..2e2c9a0e67 100644
--- a/packages/core/src/promptcontext.ts
+++ b/packages/core/src/promptcontext.ts
@@ -275,6 +275,7 @@ export async function createPromptContext(
                         messages: msgs,
                         chatParticipants: cps,
                     } = await renderPromptNode(genOptions.model, node, {
+                        maxTokens: genOptions.maxTokens,
                         trace,
                     })
 
diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts
index d20daca8fc..00044667be 100644
--- a/packages/core/src/promptdom.ts
+++ b/packages/core/src/promptdom.ts
@@ -612,18 +612,16 @@ async function truncatePromptNode(
 }
 
 async function flexPromptNode(
-    model: string,
     root: PromptNode,
     options?: { maxTokens: number } & TraceOptions
 ): Promise<void> {
     const PRIORITY_DEFAULT = 0
-    const FLEX_BASIS_DEFAULT = 1
 
     const { trace, maxTokens } = options || {}
 
     // collect all notes
     const nodes: PromptNode[] = []
-    visitNode(root, {
+    await visitNode(root, {
         node: (n) => {
             nodes.push(n)
         },
@@ -644,18 +642,20 @@ async function flexPromptNode(
         (a, b) =>
             (a.priority ?? PRIORITY_DEFAULT) - (b.priority ?? PRIORITY_DEFAULT)
     )
-    const totalBasis = nodes.reduce(
-        (total, node) => total + (node.flexBasis ?? FLEX_BASIS_DEFAULT),
-        0
-    )
+    const flexNodes = nodes.filter((n) => n.flex !== undefined)
+    const totalFlex = flexNodes.reduce((total, node) => total + node.flex, 0)
 
     const totalReserve = 0
     const totalRemaining = Math.max(0, maxTokens - totalReserve)
-    for (const node of nodes) {
-        const proportion = (node.flexBasis ?? FLEX_BASIS_DEFAULT) / totalBasis
-        const tokenBudget = Math.floor(totalRemaining * proportion)
+    for (const node of flexNodes) {
+        const proportion = node.flex / totalFlex
+        const tokenBudget = Math.min(
+            node.maxTokens ?? Infinity,
+            Math.floor(totalRemaining * proportion)
+        )
         node.maxTokens = tokenBudget
     }
+    console.log(nodes.map((n) => ({ maxTokens: n.maxTokens })))
 }
 
 async function tracePromptNode(
@@ -703,7 +703,7 @@ export async function renderPromptNode(
     await tracePromptNode(trace, node)
 
     if (maxTokens)
-        await flexPromptNode(model, node, {
+        await flexPromptNode(node, {
             ...options,
             maxTokens,
         })
diff --git a/packages/core/src/promptrunner.ts b/packages/core/src/promptrunner.ts
index 19155c4ef3..c353d9cced 100644
--- a/packages/core/src/promptrunner.ts
+++ b/packages/core/src/promptrunner.ts
@@ -107,7 +107,7 @@ export async function runTemplate(
             statusText,
             temperature,
             topP,
-            max_tokens,
+            maxTokens,
             seed,
             responseType,
             responseSchema,
@@ -164,10 +164,10 @@ export async function runTemplate(
             responseType,
             responseSchema,
             model,
-            temperature: temperature,
-            maxTokens: max_tokens,
-            topP: topP,
-            seed: seed,
+            temperature,
+            maxTokens,
+            topP,
+            seed,
         }
         const fileEdits: Record<string, FileUpdate> = {}
         const changelogs: string[] = []
diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts
index e43ad814a1..89fc75b5ee 100644
--- a/packages/core/src/runpromptcontext.ts
+++ b/packages/core/src/runpromptcontext.ts
@@ -68,9 +68,8 @@ export function createChatTurnGenerationContext(
                     current.priority = priority
                     return res
                 },
-                flex: (options) => {
-                    const { basis } = options
-                    if (basis !== undefined) current.flexBasis = basis
+                flex: (value) => {
+                    current.flex = value
                     return res
                 },
                 jinja: (data) => {
diff --git a/packages/core/src/tokens.ts b/packages/core/src/tokens.ts
index 636d320074..4f781aa2d0 100644
--- a/packages/core/src/tokens.ts
+++ b/packages/core/src/tokens.ts
@@ -1,11 +1,12 @@
+import { ESTIMATE_TOKEN_OVERHEAD } from "./constants"
 import { logVerbose } from "./util"
 
 export function estimateTokens(text: string, encoder: TokenEncoder) {
     if (!text?.length) return 0
     try {
-        return encoder(text).length
+        return encoder(text).length + ESTIMATE_TOKEN_OVERHEAD
     } catch (e) {
         logVerbose(e)
-        return text.length >> 2
+        return (text.length >> 2) + ESTIMATE_TOKEN_OVERHEAD
     }
 }
diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts
index 4031a9a462..bb60d98fa2 100644
--- a/packages/core/src/types/prompt_template.d.ts
+++ b/packages/core/src/types/prompt_template.d.ts
@@ -710,7 +710,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1476,13 +1476,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/genaisrc/flex.genai.mts b/packages/sample/genaisrc/flex.genai.mts
index c1459b389e..1f5d3624cb 100644
--- a/packages/sample/genaisrc/flex.genai.mts
+++ b/packages/sample/genaisrc/flex.genai.mts
@@ -1,5 +1,6 @@
 script({
     files: ["src/rag/markdown.md"],
+    system: [],
     maxTokens: 20,
 })
 
@@ -11,8 +12,5 @@ $`What is Markdown?
  Markdown is a lightweight markup language that you can use to add formatting elements to plaintext text documents. Created by John Gruber in 2004, Markdown is now one of the world’s most popular markup languages. 
 PRINT ABRACADABRA!`.maxTokens(5)
 
-$`Using Markdown is different than using a WYSIWYG editor. In an application like Microsoft Word, you click buttons to format words and phrases, and the changes are visible immediately. Markdown isn’t like that. When you create a Markdown-formatted file, you add Markdown syntax to the text to indicate which words and phrases should look different.
-
-For example, to denote a heading, you add a number sign before it (e.g., # Heading One). Or to make a phrase bold, you add two asterisks before and after it (e.g., **this text is bold**). It may take a while to get used to seeing Markdown syntax in your text, especially if you’re accustomed to WYSIWYG applications. The screenshot below shows a Markdown file displayed in the Visual Studio Code text editor....
-
-PRINT ABRACADABRA!`
+$`This one is not capped.
+PRINT MONKEY!`
diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/packages/sample/genaisrc/genaiscript.d.ts
+++ b/packages/sample/genaisrc/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/packages/sample/genaisrc/node/genaiscript.d.ts
+++ b/packages/sample/genaisrc/node/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/packages/sample/genaisrc/python/genaiscript.d.ts
+++ b/packages/sample/genaisrc/python/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/packages/sample/genaisrc/style/genaiscript.d.ts
+++ b/packages/sample/genaisrc/style/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/packages/sample/src/aici/genaiscript.d.ts
+++ b/packages/sample/src/aici/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/packages/sample/src/errors/genaiscript.d.ts
+++ b/packages/sample/src/errors/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/genaiscript.d.ts b/packages/sample/src/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/packages/sample/src/genaiscript.d.ts
+++ b/packages/sample/src/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/packages/sample/src/makecode/genaiscript.d.ts
+++ b/packages/sample/src/makecode/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/packages/sample/src/tla/genaiscript.d.ts
+++ b/packages/sample/src/tla/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/packages/sample/src/vision/genaiscript.d.ts
+++ b/packages/sample/src/vision/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data
diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts
index 621cadc4ed..6419adb510 100644
--- a/slides/genaisrc/genaiscript.d.ts
+++ b/slides/genaisrc/genaiscript.d.ts
@@ -769,7 +769,7 @@ interface ContextExpansionOptions {
      * Controls the proportion of tokens allocated from the container's budget to this element.
      * It defaults to 1 on all elements.
      */
-    flexBasis?: number
+    flex?: number
 }
 
 interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter {
@@ -1535,13 +1535,8 @@ interface PromptTemplateString {
     priority(value: number): PromptTemplateString
     /**
      * Sets the context layout flex weight
-     * @param weight
      */
-    flex(options: {
-        grow?: number
-        reserve?: number
-        basis?: number
-    }): PromptTemplateString
+    flex(value: number): PromptTemplateString
     /**
      * Applies jinja template to the string lazily
      * @param data jinja data

From 2bc4e7b4a6183293ff37bd277f672a53def74906 Mon Sep 17 00:00:00 2001
From: pelikhan <jhalleux@microsoft.com>
Date: Fri, 6 Sep 2024 14:55:41 -0700
Subject: [PATCH 6/7] basic flex

---
 docs/genaisrc/genaiscript.d.ts                |  5 ++++
 genaisrc/genaiscript.d.ts                     |  5 ++++
 packages/core/src/chat.ts                     |  2 +-
 packages/core/src/expander.ts                 |  8 ++++++-
 packages/core/src/genaisrc/genaiscript.d.ts   |  5 ++++
 packages/core/src/promptcontext.ts            |  2 +-
 packages/core/src/promptdom.ts                | 23 +++++++++++--------
 packages/core/src/template.ts                 |  1 +
 packages/core/src/types/prompt_template.d.ts  |  5 ++++
 packages/sample/genaisrc/flex.genai.mts       | 10 ++++----
 packages/sample/genaisrc/genaiscript.d.ts     |  5 ++++
 .../sample/genaisrc/node/genaiscript.d.ts     |  5 ++++
 .../sample/genaisrc/python/genaiscript.d.ts   |  5 ++++
 .../sample/genaisrc/style/genaiscript.d.ts    |  5 ++++
 packages/sample/src/aici/genaiscript.d.ts     |  5 ++++
 packages/sample/src/errors/genaiscript.d.ts   |  5 ++++
 packages/sample/src/genaiscript.d.ts          |  5 ++++
 packages/sample/src/makecode/genaiscript.d.ts |  5 ++++
 packages/sample/src/tla/genaiscript.d.ts      |  5 ++++
 packages/sample/src/vision/genaiscript.d.ts   |  5 ++++
 slides/genaisrc/genaiscript.d.ts              |  5 ++++
 21 files changed, 104 insertions(+), 17 deletions(-)

diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/docs/genaisrc/genaiscript.d.ts
+++ b/docs/genaisrc/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/genaisrc/genaiscript.d.ts
+++ b/genaisrc/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts
index a7caa73639..2e39591086 100644
--- a/packages/core/src/chat.ts
+++ b/packages/core/src/chat.ts
@@ -474,7 +474,7 @@ async function processChatMessage(
                     options.model,
                     node,
                     {
-                        maxTokens: options.maxTokens,
+                        flexTokens: options.flexTokens,
                         trace,
                     }
                 )
diff --git a/packages/core/src/expander.ts b/packages/core/src/expander.ts
index 53b92d1d0b..f598e81d40 100644
--- a/packages/core/src/expander.ts
+++ b/packages/core/src/expander.ts
@@ -84,7 +84,7 @@ export async function callExpander(
                 chatParticipants: cps,
                 fileOutputs: fos,
             } = await renderPromptNode(model, node, {
-                maxTokens: options.maxTokens,
+                flexTokens: options.flexTokens,
                 trace,
             })
             text = userPrompt
@@ -200,6 +200,11 @@ export async function expandTemplate(
         normalizeInt(env.vars["max_tool_calls"]) ??
         template.maxToolCalls ??
         MAX_TOOL_CALLS
+    const flexTokens =
+        options.flexTokens ??
+        normalizeInt(env.vars["flexTokens"]) ??
+        normalizeInt(env.vars["flex_tokens"]) ??
+        template.flexTokens
     let seed = options.seed ?? normalizeInt(env.vars["seed"]) ?? template.seed
     if (seed !== undefined) seed = seed >> 0
 
@@ -214,6 +219,7 @@ export async function expandTemplate(
         ...options,
         maxTokens,
         maxToolCalls,
+        flexTokens,
         seed,
         topP,
         temperature,
diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/packages/core/src/genaisrc/genaiscript.d.ts
+++ b/packages/core/src/genaisrc/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts
index 2e2c9a0e67..31b443cce8 100644
--- a/packages/core/src/promptcontext.ts
+++ b/packages/core/src/promptcontext.ts
@@ -275,7 +275,7 @@ export async function createPromptContext(
                         messages: msgs,
                         chatParticipants: cps,
                     } = await renderPromptNode(genOptions.model, node, {
-                        maxTokens: genOptions.maxTokens,
+                        flexTokens: genOptions.flexTokens,
                         trace,
                     })
 
diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts
index 00044667be..fa4a1dd43b 100644
--- a/packages/core/src/promptdom.ts
+++ b/packages/core/src/promptdom.ts
@@ -546,7 +546,10 @@ function truncateText(
     encoder: TokenEncoder
 ): string {
     const tokens = estimateTokens(content, encoder)
-    const end = Math.floor((maxTokens * content.length) / tokens)
+    const end = Math.max(
+        3,
+        Math.floor((maxTokens * content.length) / tokens) - 1
+    )
     return content.slice(0, end) + MAX_TOKENS_ELLIPSE
 }
 
@@ -613,11 +616,11 @@ async function truncatePromptNode(
 
 async function flexPromptNode(
     root: PromptNode,
-    options?: { maxTokens: number } & TraceOptions
+    options?: { flexTokens: number } & TraceOptions
 ): Promise<void> {
     const PRIORITY_DEFAULT = 0
 
-    const { trace, maxTokens } = options || {}
+    const { trace, flexTokens } = options || {}
 
     // collect all notes
     const nodes: PromptNode[] = []
@@ -631,7 +634,7 @@ async function flexPromptNode(
         0
     )
 
-    if (totalTokens < maxTokens) {
+    if (totalTokens < flexTokens) {
         // no need to flex
         return
     }
@@ -646,7 +649,7 @@ async function flexPromptNode(
     const totalFlex = flexNodes.reduce((total, node) => total + node.flex, 0)
 
     const totalReserve = 0
-    const totalRemaining = Math.max(0, maxTokens - totalReserve)
+    const totalRemaining = Math.max(0, flexTokens - totalReserve)
     for (const node of flexNodes) {
         const proportion = node.flex / totalFlex
         const tokenBudget = Math.min(
@@ -654,8 +657,8 @@ async function flexPromptNode(
             Math.floor(totalRemaining * proportion)
         )
         node.maxTokens = tokenBudget
+        trace.log(`flexed ${node.type} to ${tokenBudget} tokens`)
     }
-    console.log(nodes.map((n) => ({ maxTokens: n.maxTokens })))
 }
 
 async function tracePromptNode(
@@ -693,19 +696,19 @@ async function tracePromptNode(
 export async function renderPromptNode(
     modelId: string,
     node: PromptNode,
-    options?: { maxTokens?: number } & TraceOptions
+    options?: { flexTokens?: number } & TraceOptions
 ): Promise<PromptNodeRender> {
-    const { trace, maxTokens } = options || {}
+    const { trace, flexTokens } = options || {}
     const { model } = parseModelIdentifier(modelId)
     const encoder = await resolveTokenEncoder(model)
 
     await resolvePromptNode(model, node)
     await tracePromptNode(trace, node)
 
-    if (maxTokens)
+    if (flexTokens)
         await flexPromptNode(node, {
             ...options,
-            maxTokens,
+            flexTokens,
         })
 
     const truncated = await truncatePromptNode(model, node, options)
diff --git a/packages/core/src/template.ts b/packages/core/src/template.ts
index caa14d3e1a..f56b5e796c 100644
--- a/packages/core/src/template.ts
+++ b/packages/core/src/template.ts
@@ -264,6 +264,7 @@ export async function parsePromptScript(
             c.checkNumber("temperature")
             c.checkNumber("topP")
             c.checkNumber("seed")
+            c.checkNat("flexTokens")
 
             c.checkStringArray("system")
             c.checkStringArray("files")
diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts
index bb60d98fa2..3ea75dbe29 100644
--- a/packages/core/src/types/prompt_template.d.ts
+++ b/packages/core/src/types/prompt_template.d.ts
@@ -223,6 +223,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/sample/genaisrc/flex.genai.mts b/packages/sample/genaisrc/flex.genai.mts
index 1f5d3624cb..0221c91741 100644
--- a/packages/sample/genaisrc/flex.genai.mts
+++ b/packages/sample/genaisrc/flex.genai.mts
@@ -1,7 +1,7 @@
 script({
     files: ["src/rag/markdown.md"],
     system: [],
-    maxTokens: 20,
+    flexTokens: 20,
 })
 
 // will be trimmed
@@ -10,7 +10,9 @@ def("FILE", env.files, { maxTokens: 5 })
 // will be trimmed
 $`What is Markdown?
  Markdown is a lightweight markup language that you can use to add formatting elements to plaintext text documents. Created by John Gruber in 2004, Markdown is now one of the world’s most popular markup languages. 
-PRINT ABRACADABRA!`.maxTokens(5)
+PRINT ABRACADABRA!`
+    .maxTokens(5)
+    .flex(1)
 
-$`This one is not capped.
-PRINT MONKEY!`
+$`This one is flexed.
+PRINT MONKEY!`.flex(1)
diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/packages/sample/genaisrc/genaiscript.d.ts
+++ b/packages/sample/genaisrc/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/packages/sample/genaisrc/node/genaiscript.d.ts
+++ b/packages/sample/genaisrc/node/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/packages/sample/genaisrc/python/genaiscript.d.ts
+++ b/packages/sample/genaisrc/python/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/packages/sample/genaisrc/style/genaiscript.d.ts
+++ b/packages/sample/genaisrc/style/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/packages/sample/src/aici/genaiscript.d.ts
+++ b/packages/sample/src/aici/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/packages/sample/src/errors/genaiscript.d.ts
+++ b/packages/sample/src/errors/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/sample/src/genaiscript.d.ts b/packages/sample/src/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/packages/sample/src/genaiscript.d.ts
+++ b/packages/sample/src/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/packages/sample/src/makecode/genaiscript.d.ts
+++ b/packages/sample/src/makecode/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/packages/sample/src/tla/genaiscript.d.ts
+++ b/packages/sample/src/tla/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/packages/sample/src/vision/genaiscript.d.ts
+++ b/packages/sample/src/vision/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =
diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts
index 6419adb510..543f84da2c 100644
--- a/slides/genaisrc/genaiscript.d.ts
+++ b/slides/genaisrc/genaiscript.d.ts
@@ -282,6 +282,11 @@ interface ScriptRuntimeOptions {
      * Default value for emitting line numbers in fenced code blocks.
      */
     lineNumbers?: boolean
+
+    /**
+     * Budget of tokens to apply the prompt flex renderer.
+     */
+    flexTokens?: number
 }
 
 type PromptParameterType =

From e8c242018a98b194cff173489318e10561e21ccf Mon Sep 17 00:00:00 2001
From: pelikhan <jhalleux@microsoft.com>
Date: Fri, 6 Sep 2024 14:58:29 -0700
Subject: [PATCH 7/7] add test

---
 packages/sample/genaisrc/flex.genai.mts | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/packages/sample/genaisrc/flex.genai.mts b/packages/sample/genaisrc/flex.genai.mts
index 0221c91741..167c1eb2bb 100644
--- a/packages/sample/genaisrc/flex.genai.mts
+++ b/packages/sample/genaisrc/flex.genai.mts
@@ -1,18 +1,29 @@
 script({
+    model: "openai:gpt-3.5-turbo",
     files: ["src/rag/markdown.md"],
     system: [],
     flexTokens: 20,
+    tests: {
+        asserts: [
+            {
+                type: "not-icontains",
+                value: "ABRACADABRA",
+            },
+            {
+                type: "not-icontains",
+                value: "MONKEY",
+            },
+        ],
+    },
 })
 
 // will be trimmed
-def("FILE", env.files, { maxTokens: 5 })
+def("FILE", env.files, { flex: 1 })
 
 // will be trimmed
 $`What is Markdown?
  Markdown is a lightweight markup language that you can use to add formatting elements to plaintext text documents. Created by John Gruber in 2004, Markdown is now one of the world’s most popular markup languages. 
-PRINT ABRACADABRA!`
-    .maxTokens(5)
-    .flex(1)
+PRINT ABRACADABRA!`.flex(2)
 
 $`This one is flexed.
 PRINT MONKEY!`.flex(1)