diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/docs/genaisrc/genaiscript.d.ts +++ b/docs/genaisrc/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx index 22711de8ce..9f58d33364 100644 --- a/docs/src/content/docs/reference/scripts/system.mdx +++ b/docs/src/content/docs/reference/scripts/system.mdx @@ -1041,8 +1041,12 @@ defTool( let log = await github.downloadWorkflowJobLog(job_id, { llmify: true, }) - if (parsers.tokens(log) > 1000) - log = "...(truncated, tool long)...\n" + log.slice(-3000) + if ((await tokenizers.count(log)) > 1000) { + log = await tokenizers.truncate(log, 1000, { last: true }) + const annotations = await parsers.annotations(log) + if (annotations.length > 0) + log += "\n\n" + YAML.stringify(annotations) + } return log } ) diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/genaisrc/genaiscript.d.ts +++ b/genaisrc/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/auto/genaiscript.d.ts b/packages/auto/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/auto/genaiscript.d.ts +++ b/packages/auto/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/core/src/genaisrc/genaiscript.d.ts +++ b/packages/core/src/genaisrc/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/core/src/genaisrc/system.github_actions.genai.mjs b/packages/core/src/genaisrc/system.github_actions.genai.mjs index efe2e75c33..143c16711f 100644 --- a/packages/core/src/genaisrc/system.github_actions.genai.mjs +++ b/packages/core/src/genaisrc/system.github_actions.genai.mjs @@ -114,8 +114,12 @@ defTool( let log = await github.downloadWorkflowJobLog(job_id, { llmify: true, }) - if (parsers.tokens(log) > 1000) - log = "...(truncated, tool long)...\n" + log.slice(-3000) + if ((await tokenizers.count(log)) > 1000) { + log = await tokenizers.truncate(log, 1000, { last: true }) + const annotations = await parsers.annotations(log) + if (annotations.length > 0) + log += "\n\n" + YAML.stringify(annotations) + } return log } ) diff --git a/packages/core/src/globals.ts b/packages/core/src/globals.ts index f5712874a1..7b503ed3f0 100644 --- a/packages/core/src/globals.ts +++ b/packages/core/src/globals.ts @@ -16,10 +16,13 @@ import { readText } from "./fs" import { logVerbose } from "./util" import { GitHubClient } from "./github" import { GitClient } from "./git" +import { estimateTokens, truncateTextToTokens } from "./tokens" +import { resolveTokenEncoder } from "./encoders" +import { runtimeHost } from "./host" /** * This file defines global utilities and installs them into the global context. - * It includes functions to parse and stringify various data formats, handle errors, + * It includes functions to parse and stringify various data formats, handle errors, * and manage GitHub and Git clients. The utilities are frozen to prevent modification. */ @@ -118,6 +121,22 @@ export function installGlobals() { // Instantiate Git client glb.git = new GitClient() + glb.tokenizers = Object.freeze({ + count: async (text, options) => { + const encoder = await resolveTokenEncoder( + options?.model || runtimeHost.defaultModelOptions.model + ) + const c = await estimateTokens(text, encoder) + return c + }, + truncate: async (text, maxTokens, options) => { + const encoder = await resolveTokenEncoder( + options?.model || runtimeHost.defaultModelOptions.model + ) + return await truncateTextToTokens(text, maxTokens, encoder, options) + }, + }) + /** * Asynchronous function to fetch text from a URL or file. * Handles both HTTP(S) URLs and local workspace files. diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index d280eab346..6aa3a77140 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -1028,6 +1028,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload diff --git a/packages/core/src/types/prompt_type.d.ts b/packages/core/src/types/prompt_type.d.ts index 9123564bcf..8a9fc2e566 100644 --- a/packages/core/src/types/prompt_type.d.ts +++ b/packages/core/src/types/prompt_type.d.ts @@ -216,6 +216,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/genaisrc/blog/genaiscript.d.ts b/packages/sample/genaisrc/blog/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/sample/genaisrc/blog/genaiscript.d.ts +++ b/packages/sample/genaisrc/blog/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/sample/genaisrc/genaiscript.d.ts +++ b/packages/sample/genaisrc/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/sample/genaisrc/node/genaiscript.d.ts +++ b/packages/sample/genaisrc/node/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/sample/genaisrc/python/genaiscript.d.ts +++ b/packages/sample/genaisrc/python/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/sample/genaisrc/style/genaiscript.d.ts +++ b/packages/sample/genaisrc/style/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/sample/src/aici/genaiscript.d.ts +++ b/packages/sample/src/aici/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/sample/src/errors/genaiscript.d.ts +++ b/packages/sample/src/errors/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/genaiscript.d.ts b/packages/sample/src/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/sample/src/genaiscript.d.ts +++ b/packages/sample/src/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/sample/src/makecode/genaiscript.d.ts +++ b/packages/sample/src/makecode/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/sample/src/tla/genaiscript.d.ts +++ b/packages/sample/src/tla/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/sample/src/vision/genaiscript.d.ts +++ b/packages/sample/src/vision/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/vscode/genaisrc/genaiscript.d.ts b/packages/vscode/genaisrc/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/packages/vscode/genaisrc/genaiscript.d.ts +++ b/packages/vscode/genaisrc/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts index 3ccc1309b3..a92f8a9285 100644 --- a/slides/genaisrc/genaiscript.d.ts +++ b/slides/genaisrc/genaiscript.d.ts @@ -1106,6 +1106,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3175,6 +3197,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url