diff --git a/.gitignore.genai b/.gitignore.genai index 4da551b07c..4308234bf1 100644 --- a/.gitignore.genai +++ b/.gitignore.genai @@ -1,3 +1,4 @@ +.vscode/* **/genaiscript.d.ts **/yarn.lock THIRD_PARTY_LICENSES.md diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 0284595dd6..a115e8e883 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -32,21 +32,6 @@ "showReuseMessage": false, "clear": true } - }, - { - "label": "rv", - "type": "shell", - "command": "node packages/cli/built/genaiscript.cjs run rv", - "detail": "genai reviewer", - "problemMatcher": "$tsc", - "presentation": { - "echo": true, - "reveal": "always", - "focus": false, - "panel": "shared", - "showReuseMessage": false, - "clear": true - } } ] } diff --git a/README.md b/README.md index d824fd4d58..9e4dee6747 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ The quick brown fox jumps over the lazy dog. Grep or fuzz search [files](https://microsoft.github.io/genaiscript/reference/scripts/files). ```js -const { files } = await workspace.grep(/[a-z][a-z0-9]+/, "**/*.md") +const { files } = await workspace.grep(/[a-z][a-z0-9]+/, { globs: "*.md" }) ``` ### LLM Tools diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/docs/genaisrc/genaiscript.d.ts +++ b/docs/genaisrc/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/docs/genaisrc/image-alt-text.genai.js b/docs/genaisrc/image-alt-text.genai.js index 9499da02f5..9a06ecfa13 100644 --- a/docs/genaisrc/image-alt-text.genai.js +++ b/docs/genaisrc/image-alt-text.genai.js @@ -1,7 +1,7 @@ script({ title: "Image Alt Text generator", description: "Generate alt text for images", - model: "openai:gpt-4-turbo-v", + model: "openai:gpt-4o", group: "docs", maxTokens: 4000, temperature: 0, diff --git a/docs/src/components/BuiltinAgents.mdx b/docs/src/components/BuiltinAgents.mdx index 6364956eac..8e9e96bf06 100644 --- a/docs/src/components/BuiltinAgents.mdx +++ b/docs/src/components/BuiltinAgents.mdx @@ -6,6 +6,7 @@ import { LinkCard } from '@astrojs/starlight/components'; ### Builtin Agents + diff --git a/docs/src/components/BuiltinTools.mdx b/docs/src/components/BuiltinTools.mdx index b6a730b7eb..1a7c56f8d7 100644 --- a/docs/src/components/BuiltinTools.mdx +++ b/docs/src/components/BuiltinTools.mdx @@ -9,6 +9,7 @@ import { LinkCard } from '@astrojs/starlight/components'; + @@ -28,6 +29,7 @@ import { LinkCard } from '@astrojs/starlight/components'; + diff --git a/docs/src/components/CreateScript.mdx b/docs/src/components/CreateScript.mdx index da5e46f91f..196982698d 100644 --- a/docs/src/components/CreateScript.mdx +++ b/docs/src/components/CreateScript.mdx @@ -4,16 +4,16 @@ description: Learn how to create a new script using Visual Studio Code or other keywords: create script, VSCode, command palette, CLI, script generation --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import { Tabs, TabItem } from "@astrojs/starlight/components" -Use the `> GenAiScript: Create new script...` command in the [command palette](https://code.visualstudio.com/docs/getstarted/userinterface#_command-palette) +Use the `> GenAiScript: Create new script...` command in the [command palette](https://code.visualstudio.com/docs/getstarted/userinterface#_command-palette) (`Ctrl+Shift+P` on Windows/Linux, `โ‡งโŒ˜P` on Mac) to create a new script. -![The command palette and the create script command](../assets/vscode-create-new-script.png) +![A command palette with a search bar showing ">createn" and a suggestion below for "GenAIScript: Create new script..." highlighted in blue.](../assets/vscode-create-new-script.png) diff --git a/docs/src/content/docs/blog/drafts/code-review-with-genai.md b/docs/src/content/docs/blog/drafts/code-review-with-genai.md index 83f6c54a56..92d163b6fb 100644 --- a/docs/src/content/docs/blog/drafts/code-review-with-genai.md +++ b/docs/src/content/docs/blog/drafts/code-review-with-genai.md @@ -6,13 +6,12 @@ tags: ["code review", "development", "programming", "automation"] authors: genaiscript canonical_url: https://microsoft.github.io/genaiscript/blog/code-review-with-genai description: Dive into automated code review processes with GenAI, enhancing - developer efficiency and code quality. - + developer efficiency and code quality. --- ## Introducing "Code Review with GenAI" ๐Ÿง -Have you ever wished for an extra set of eyes while coding? Well, GenAI has got your back! Let's delve into the "Reviewer" script, which automates the code review process, making it a breeze for developers. This powerful script for the GenAIScript platform can be found [here on GitHub](https://github.com/microsoft/genaiscript/blob/main/packages/vscode/genaisrc/rv.genai.mts). +Have you ever wished for an extra set of eyes while coding? Well, GenAI has got your back! Let's delve into the "Reviewer" script, which automates the code review process, making it a breeze for developers. This powerful script for the GenAIScript platform can be found [here on GitHub](https://github.com/microsoft/genaiscript/blob/main/packages/vscode/genaisrc/prr.genai.mts). ### What is the Script About? diff --git a/docs/src/content/docs/guides/ask-my-image.mdx b/docs/src/content/docs/guides/ask-my-image.mdx index a39dd33f7a..e320d63e1c 100644 --- a/docs/src/content/docs/guides/ask-my-image.mdx +++ b/docs/src/content/docs/guides/ask-my-image.mdx @@ -16,7 +16,7 @@ The quick-start guide illustrates how to write a GenAIScript that takes input fr ```js script({ title: "Apply a script to an image", - model: "openai:gpt-4-turbo-v", + model: "openai:gpt-4o", }) ``` 4. Use [defImages](/genaiscript/reference/scripts/images/) to ingest the image file into the model context: diff --git a/docs/src/content/docs/guides/search-and-transform.mdx b/docs/src/content/docs/guides/search-and-transform.mdx index 42e745309f..e96038543b 100644 --- a/docs/src/content/docs/guides/search-and-transform.mdx +++ b/docs/src/content/docs/guides/search-and-transform.mdx @@ -69,9 +69,9 @@ that allows to efficiently search for a pattern in files (this is the same searc that powers the Visual Studio Code search). ```js "workspace.grep" -const { pattern, glob } = env.vars +const { pattern, globs } = env.vars const patternRx = new RegExp(pattern, "g") -const { files } = await workspace.grep(patternRx, glob) +const { files } = await workspace.grep(patternRx, { globs }) ``` ## Compute Transforms diff --git a/docs/src/content/docs/index.mdx b/docs/src/content/docs/index.mdx index cdbc3302be..4f2b77f17c 100644 --- a/docs/src/content/docs/index.mdx +++ b/docs/src/content/docs/index.mdx @@ -249,7 +249,7 @@ The quick brown fox jumps over the lazy dog. Grep or fuzz search [files](/genaiscript/referen/script/files) ```js wrap -const { files } = await workspace.grep(/[a-z][a-z0-9]+/, "**/*.md") +const { files } = await workspace.grep(/[a-z][a-z0-9]+/, { globs: "*.md" }) ``` diff --git a/docs/src/content/docs/reference/cli/commands.md b/docs/src/content/docs/reference/cli/commands.md index ce2a1e6ce7..65cd28aebd 100644 --- a/docs/src/content/docs/reference/cli/commands.md +++ b/docs/src/content/docs/reference/cli/commands.md @@ -407,30 +407,6 @@ Options: -h, --help display help for command ``` -## `workspace` - -``` -Usage: genaiscript workspace [options] [command] - -Workspace tasks - -Options: - -h, --help display help for command - -Commands: - grep [files...] - help [command] display help for command -``` - -### `workspace grep` - -``` -Usage: genaiscript workspace grep [options] [files...] - -Options: - -h, --help display help for command -``` - ## `info` ``` diff --git a/docs/src/content/docs/reference/scripts/images.md b/docs/src/content/docs/reference/scripts/images.md index f7626da382..a54a4c632e 100644 --- a/docs/src/content/docs/reference/scripts/images.md +++ b/docs/src/content/docs/reference/scripts/images.md @@ -6,7 +6,7 @@ sidebar: order: 10 --- -Images can be added to the prompt for models that support this feature (like `gpt-4-turbo-v`). +Images can be added to the prompt for models that support this feature (like `gpt-4o`). Use the `defImages` function to declare the images. Supported images will vary with models but typically include `PNG`, `JPEG`, `WEBP`, and `GIF`. Both local files and URLs are supported. diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx index 4502c8f94a..c239c0f815 100644 --- a/docs/src/content/docs/reference/scripts/system.mdx +++ b/docs/src/content/docs/reference/scripts/system.mdx @@ -96,6 +96,58 @@ $`- You are concise. ````` +### `system.agent_docs` + +Agent that can query on the documentation. + + + + + +`````js wrap title="system.agent_docs" +system({ + title: "Agent that can query on the documentation.", +}) + +const docsRoot = env.vars.docsRoot || "docs" +const samplesRoot = env.vars.samplesRoot || "packages/sample/genaisrc/" + +defAgent( + "docs", + "query the documentation", + async (ctx) => { + ctx.$`Your are a helpfull LLM agent that is an expert at Technical documentation. You can provide the best analyzis to any query about the documentation. + + Analyze QUERY and respond with the requested information. + + ## Tools + + The 'md_find_files' can perform a grep search over the documentation files and return the title, description, and filename for each match. + To optimize search, conver the QUERY request into keywords or a regex pattern. + + Try multiple searches if you cannot find relevant files. + + ## Context + + - the documentation is stored in markdown/MDX files in the ${docsRoot} folder + ${samplesRoot ? `- the code samples are stored in the ${samplesRoot} folder` : ""} + ` + }, + { + system: ["system.explanations", "system.github_info"], + tools: [ + "md_find_files", + "md_read_frontmatterm", + "fs_find_files", + "fs_read_file", + ], + maxTokens: 5000, + } +) + +````` + + ### `system.agent_fs` Agent that can find, search or read files to accomplish tasks @@ -151,8 +203,10 @@ defAgent( "query a repository using Git to accomplish tasks. Provide all the context information available to execute git queries.", `Your are a helpfull LLM agent that can use the git tools to query the current repository. Answer the question in QUERY. - - The current repository is the same as github repository.`, - { model, system: ["system.github_info"], tools: ["git"] } + - The current repository is the same as github repository. + - Prefer using diff to compare files rather than listing files. Listing files is only useful when you need to read the content of the files. + `, + { model, system: ["system.git_info", "system.github_info"], tools: ["git"] } ) ````` @@ -177,7 +231,9 @@ defAgent( "github", "query GitHub to accomplish tasks", `Your are a helpfull LLM agent that can query GitHub to accomplish tasks. Answer the question in QUERY. - Prefer diffing job logs rather downloading entire logs which can be very large.`, + - Prefer diffing job logs rather downloading entire logs which can be very large. + - Pull Requests ar a specialized type of issues. + `, { model, system: [ @@ -562,10 +618,15 @@ defTool( }, async (args) => { const { context, filename, otherfilename } = args - context.log(`diff: ${filename} ${filename}`) + context.log(`fs diff ${filename}..${otherfilename}`) + if (filename === otherfilename) return "" + const f = await workspace.readText(filename) const of = await workspace.readText(otherfilename) return parsers.diff(f, of) + }, + { + maxTokens: 20000, } ) @@ -616,7 +677,7 @@ defTool( `ls ${glob} ${pattern ? `| grep ${pattern}` : ""} ${frontmatter ? "--frontmatter" : ""}` ) const res = pattern - ? (await workspace.grep(pattern, glob, { readText: false })).files + ? (await workspace.grep(pattern, { glob, readText: false })).files : await workspace.findFiles(glob, { readText: false }) if (!res?.length) return "No files found." @@ -717,6 +778,9 @@ defTool( content = lines.slice(line_start, line_end).join("\n") } return content + }, + { + maxTokens: 10000, } ) @@ -729,6 +793,7 @@ git read operations Tools to query a git repository. +- tool `git_branch_default`: Gets the default branch using git. - tool `git_branch_current`: Gets the current branch using git. - tool `git_branch_list`: List all branches using git. - tool `git_diff`: Computes file diffs using the git diff command. If the diff is too large, it returns the list of modified/added files. @@ -742,6 +807,15 @@ system({ description: "Tools to query a git repository.", }) +defTool( + "git_branch_default", + "Gets the default branch using git.", + {}, + async () => { + return await git.defaultBranch() + } +) + defTool( "git_branch_current", "Gets the current branch using git.", @@ -803,7 +877,8 @@ defTool( ...rest, }) return res - } + }, + { maxTokens: 20000 } ) defTool( @@ -861,6 +936,27 @@ defTool("git_last_tag", "Gets the last tag using git.", {}, async () => { ````` +### `system.git_info` + +Git repository information + + + + + +`````js wrap title="system.git_info" +system({ + title: "Git repository information", +}) + +const branch = await git.branch() +const defaultBranch = await git.defaultBranch() + +$`git: The current branch is ${branch} and the default branch is ${defaultBranch}.` + +````` + + ### `system.github_actions` github workflows @@ -989,8 +1085,12 @@ defTool( let log = await github.downloadWorkflowJobLog(job_id, { llmify: true, }) - if (parsers.tokens(log) > 1000) - log = "...(truncated, tool long)...\n" + log.slice(-3000) + if ((await tokenizers.count(log)) > 1000) { + log = await tokenizers.truncate(log, 1000, { last: true }) + const annotations = await parsers.annotations(log) + if (annotations.length > 0) + log += "\n\n" + YAML.stringify(annotations) + } return log } ) @@ -1106,7 +1206,7 @@ system({ const info = await github.info() if (info?.owner) { - const { auth, owner, repo, baseUrl } = info + const { owner, repo, baseUrl } = info $`- current github repository: ${owner}/${repo}` if (baseUrl) $`- current github base url: ${baseUrl}` } @@ -1448,6 +1548,95 @@ defTool( ````` +### `system.md_find_files` + +Tools to help with documentation tasks + + + +- tool `md_find_files`: Get the file structure of the documentation markdown/MDX files. Retursn filename, title, description for each match. Use pattern to specify a regular expression to search for in the file content. + +`````js wrap title="system.md_find_files" +system({ + title: "Tools to help with documentation tasks", +}) + +const model = (env.vars.mdSummaryModel = "gpt-4o-mini") + +defTool( + "md_find_files", + "Get the file structure of the documentation markdown/MDX files. Retursn filename, title, description for each match. Use pattern to specify a regular expression to search for in the file content.", + { + type: "object", + properties: { + path: { + type: "string", + description: "root path to search for markdown/MDX files", + }, + pattern: { + type: "string", + description: + "regular expression pattern to search for in the file content.", + }, + question: { + type: "string", + description: "Question to ask when computing the summary", + }, + }, + }, + async (args) => { + const { path, pattern, context, question } = args + context.log( + `docs: ls ${path} ${pattern ? `| grep ${pattern}` : ""} --frontmatter ${question ? `--ask ${question}` : ""}` + ) + const matches = pattern + ? (await workspace.grep(pattern, { path, readText: true })).files + : await workspace.findFiles(path + "/**/*.{md,mdx}", { + readText: true, + }) + if (!matches?.length) return "No files found." + const q = await host.promiseQueue(5) + const files = await q.mapAll(matches, async ({ filename, content }) => { + const file = { + filename, + } + try { + const fm = await parsers.frontmatter(content) + if (fm) { + file.title = fm.title + file.description = fm.description + } + const { text: summary } = await runPrompt( + (_) => { + _.def("CONTENT", content, { language: "markdown" }) + _.$`As a professional summarizer, create a concise and comprehensive summary of the provided text, be it an article, post, conversation, or passage, while adhering to these guidelines: + ${question ? `* ${question}` : ""} + * The summary is intended for an LLM, not a human. + * Craft a summary that is detailed, thorough, in-depth, and complex, while maintaining clarity and conciseness. + * Incorporate main ideas and essential information, eliminating extraneous language and focusing on critical aspects. + * Rely strictly on the provided text, without including external information. + * Format the summary in one single paragraph form for easy understanding. Keep it short. + * Generate a list of keywords that are relevant to the text.` + }, + { + label: `summarize ${filename}`, + cache: "md_find_files_summary", + model, + } + ) + file.summary = summary + } catch (e) {} + return file + }) + const res = YAML.stringify(files) + return res + }, + { maxTokens: 20000 } +) + +````` + + ### `system.md_frontmatter` Markdown frontmatter reader diff --git a/docs/src/content/docs/samples/rv.mdx b/docs/src/content/docs/samples/prr.mdx similarity index 95% rename from docs/src/content/docs/samples/rv.mdx rename to docs/src/content/docs/samples/prr.mdx index c048d47fee..815012d666 100644 --- a/docs/src/content/docs/samples/rv.mdx +++ b/docs/src/content/docs/samples/prr.mdx @@ -6,7 +6,7 @@ sidebar: --- import { Code } from "@astrojs/starlight/components" -import source from "../../../../../packages/vscode/genaisrc/rv.genai.mts?raw" +import source from "../../../../../packages/vscode/genaisrc/prr.genai.mts?raw" Let's delve into the "Reviewer" script, which automates the code review process, making it a breeze for developers. @@ -14,12 +14,12 @@ Let's delve into the "Reviewer" script, which automates the code review process, ```ts script({ - title: "Reviewer", - description: "Review the current files", + title: "Pull Request Reviewer", + description: "Review the current pull request", model: "openai:gpt-4o", system: ["system.annotations"], tools: ["fs_find_files", "fs_read_text"], - cache: "rv", + cache: "prr", parameters: { errors: { type: "boolean", diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/genaisrc/genaiscript.d.ts +++ b/genaisrc/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/auto/genaiscript.d.ts b/packages/auto/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/auto/genaiscript.d.ts +++ b/packages/auto/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 783ac52bb2..376f1652d0 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -43,7 +43,6 @@ import { serializeError, } from "../../core/src/error" // Error handling utilities import { CORE_VERSION, GITHUB_REPO } from "../../core/src/version" // Core version and repository info -import { grep } from "./grep" // Grep functionality import { logVerbose } from "../../core/src/util" // Utility logging import { semverSatisfies } from "../../core/src/semver" // Semantic version checking @@ -339,12 +338,6 @@ export async function cli() { .option("-o, --out ", "output folder") .action(prompty2genaiscript) // Action to convert prompty files - // Define 'workspace' command group for workspace-related tasks - const workspace = program - .command("workspace") - .description("Workspace tasks") - workspace.command("grep").arguments(" [files...]").action(grep) // Action to grep files in workspace - // Define 'info' command group for utility information tasks const info = program.command("info").description("Utility tasks") info.command("help") diff --git a/packages/cli/src/grep.ts b/packages/cli/src/grep.ts deleted file mode 100644 index 77ed31213f..0000000000 --- a/packages/cli/src/grep.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { grepSearch } from "../../core/src/grep" - -/** - * This module exports a function to perform a grep-like search on specified files. - */ - -/** - * Asynchronously performs a grep search pattern on given files and outputs the results. - * - * This function takes a search pattern and an array of file paths, and uses the `grepSearch` - * function to find matches. The results are then output to the console. - * - * @param pattern - The search pattern to match within files. - * @param files - An array of file paths to search within. - */ -export async function grep(pattern: string, files: string[]) { - // Perform the search using the grepSearch function and await the result - const res = await grepSearch(pattern, files) - - // Output the filenames from the search results, each on a new line - console.log(res.files.map((f) => f.filename).join("\n")) -} diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index a5c7df9e3c..fe654acfc6 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -522,7 +522,11 @@ export async function runScript( } } } - // final fail + + logVerbose(`genaiscript: ${result.status}`) + stats.log() + if (outTraceFilename) logVerbose(` trace: ${outTraceFilename}`) + if (result.status !== "success" && result.status !== "cancelled") { const msg = errorMessage(result.error) ?? @@ -534,8 +538,5 @@ export async function runScript( if (failOnErrors && result.annotations?.some((a) => a.severity === "error")) return fail("error annotations found", ANNOTATION_ERROR_CODE) - logVerbose("genaiscript: done") - stats.log() - if (outTraceFilename) logVerbose(` trace: ${outTraceFilename}`) return { exitCode: 0, result } } diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index 2173130726..ef3a51611c 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -188,12 +188,19 @@ async function runToolCalls( return { tool, args: tu.parameters } }) } else { - const tool = tools.find((f) => f.spec.name === call.name) + let tool = tools.find((f) => f.spec.name === call.name) if (!tool) { logVerbose(JSON.stringify(call, null, 2)) - throw new Error( + logVerbose( `tool ${call.name} not found in ${tools.map((t) => t.spec.name).join(", ")}` ) + tool = { + spec: { + name: call.name, + description: "unknown tool", + }, + impl: async () => "unknown tool", + } } todos = [{ tool, args: callArgs }] } @@ -203,7 +210,7 @@ async function runToolCalls( const { tool, args } = todo const { maxTokens: maxToolContentTokens = MAX_TOOL_CONTENT_TOKENS, - } = tool.spec + } = tool.options || {} const context: ToolCallContext = { log: (txt: string) => { logVerbose(txt) @@ -596,40 +603,20 @@ export async function executeChatSession( } = genOptions traceLanguageModelConnection(trace, genOptions, connectionToken) const tools: ChatCompletionTool[] = toolDefinitions?.length - ? toolDefinitions.map((f) => ({ - type: "function", - function: f.spec as any, - })) + ? toolDefinitions.map( + (f) => + { + type: "function", + function: { + name: f.spec.name, + description: f.spec.description, + parameters: f.spec.parameters as any, + }, + } + ) : undefined trace.startDetails(`๐Ÿง  llm chat`) - if (toolDefinitions?.length) - trace.details( - `๐Ÿ› ๏ธ tools`, - dedent`\`\`\`ts - ${toolDefinitions - .map( - (t) => dedent`/** - * ${t.spec.description}${ - t.spec.parameters?.type === "object" - ? Object.entries( - t.spec.parameters.properties || {} - ) - .filter(([, ps]) => ps.description) - .map( - ([pn, ps]) => - `\n * @param ${pn} ${ps.description}` - ) - .join("") - : "" - } - */ - function ${t.spec.name}(${JSONSchemaToFunctionParameters(t.spec.parameters)}) - ` - ) - .join("\n")} - \`\`\` - ` - ) + if (toolDefinitions?.length) trace.detailsFenced(`๐Ÿ› ๏ธ tools`, tools, "yaml") try { let genVars: Record while (true) { diff --git a/packages/core/src/fetch.ts b/packages/core/src/fetch.ts index bcc77ee20f..72609c0001 100644 --- a/packages/core/src/fetch.ts +++ b/packages/core/src/fetch.ts @@ -165,7 +165,8 @@ export function traceFetchPost( ${Object.entries(headers) .map(([k, v]) => `-H "${k}: ${v}"`) .join("\\\n")} \\ --d '${JSON.stringify(body, null, 2).replace(/'/g, "'\\''")}' +-d '${JSON.stringify(body, null, 2).replace(/'/g, "'\\''")}' +--no-buffer ` if (trace) trace.detailsFenced(`โœ‰๏ธ fetch`, cmd, "bash") else logVerbose(cmd) diff --git a/packages/core/src/file.ts b/packages/core/src/file.ts index cf8645b6d2..5ddd110280 100644 --- a/packages/core/src/file.ts +++ b/packages/core/src/file.ts @@ -178,7 +178,8 @@ export async function resolveFileDataUri( } // Read file from local storage else { - bytes = new Uint8Array(await host.readFile(filename)) + const buf = await host.readFile(filename) + bytes = new Uint8Array(buf) } const mime = (await fileTypeFromBuffer(bytes))?.mime diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/core/src/genaisrc/genaiscript.d.ts +++ b/packages/core/src/genaisrc/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/core/src/genaisrc/system.agent_docs.genai.mjs b/packages/core/src/genaisrc/system.agent_docs.genai.mjs new file mode 100644 index 0000000000..513a3898ea --- /dev/null +++ b/packages/core/src/genaisrc/system.agent_docs.genai.mjs @@ -0,0 +1,39 @@ +system({ + title: "Agent that can query on the documentation.", +}) + +const docsRoot = env.vars.docsRoot || "docs" +const samplesRoot = env.vars.samplesRoot || "packages/sample/genaisrc/" + +defAgent( + "docs", + "query the documentation", + async (ctx) => { + ctx.$`Your are a helpfull LLM agent that is an expert at Technical documentation. You can provide the best analyzis to any query about the documentation. + + Analyze QUERY and respond with the requested information. + + ## Tools + + The 'md_find_files' can perform a grep search over the documentation files and return the title, description, and filename for each match. + To optimize search, conver the QUERY request into keywords or a regex pattern. + + Try multiple searches if you cannot find relevant files. + + ## Context + + - the documentation is stored in markdown/MDX files in the ${docsRoot} folder + ${samplesRoot ? `- the code samples are stored in the ${samplesRoot} folder` : ""} + ` + }, + { + system: ["system.explanations", "system.github_info"], + tools: [ + "md_find_files", + "md_read_frontmatterm", + "fs_find_files", + "fs_read_file", + ], + maxTokens: 5000, + } +) diff --git a/packages/core/src/genaisrc/system.agent_git.genai.mjs b/packages/core/src/genaisrc/system.agent_git.genai.mjs index d3d53161c0..4304e4703c 100644 --- a/packages/core/src/genaisrc/system.agent_git.genai.mjs +++ b/packages/core/src/genaisrc/system.agent_git.genai.mjs @@ -9,6 +9,8 @@ defAgent( "query a repository using Git to accomplish tasks. Provide all the context information available to execute git queries.", `Your are a helpfull LLM agent that can use the git tools to query the current repository. Answer the question in QUERY. - - The current repository is the same as github repository.`, - { model, system: ["system.github_info"], tools: ["git"] } + - The current repository is the same as github repository. + - Prefer using diff to compare files rather than listing files. Listing files is only useful when you need to read the content of the files. + `, + { model, system: ["system.git_info", "system.github_info"], tools: ["git"] } ) diff --git a/packages/core/src/genaisrc/system.agent_github.genai.mjs b/packages/core/src/genaisrc/system.agent_github.genai.mjs index f5a1882367..83c44071f9 100644 --- a/packages/core/src/genaisrc/system.agent_github.genai.mjs +++ b/packages/core/src/genaisrc/system.agent_github.genai.mjs @@ -8,7 +8,9 @@ defAgent( "github", "query GitHub to accomplish tasks", `Your are a helpfull LLM agent that can query GitHub to accomplish tasks. Answer the question in QUERY. - Prefer diffing job logs rather downloading entire logs which can be very large.`, + - Prefer diffing job logs rather downloading entire logs which can be very large. + - Pull Requests ar a specialized type of issues. + `, { model, system: [ diff --git a/packages/core/src/genaisrc/system.fs_diff_files.genai.mjs b/packages/core/src/genaisrc/system.fs_diff_files.genai.mjs index b30fd2743e..4b343ffee7 100644 --- a/packages/core/src/genaisrc/system.fs_diff_files.genai.mjs +++ b/packages/core/src/genaisrc/system.fs_diff_files.genai.mjs @@ -5,7 +5,7 @@ system({ defTool( "fs_diff_files", - "Computes a diff between two files.", + "Computes a diff between two different files. Use git diff instead to compare versions of a file.", { type: "object", properties: { @@ -24,9 +24,14 @@ defTool( }, async (args) => { const { context, filename, otherfilename } = args - context.log(`diff: ${filename} ${filename}`) + context.log(`fs diff ${filename}..${otherfilename}`) + if (filename === otherfilename) return "" + const f = await workspace.readText(filename) const of = await workspace.readText(otherfilename) return parsers.diff(f, of) + }, + { + maxTokens: 20000, } ) diff --git a/packages/core/src/genaisrc/system.fs_find_files.genai.mjs b/packages/core/src/genaisrc/system.fs_find_files.genai.mjs index a29c880dd7..7c8bce50b7 100644 --- a/packages/core/src/genaisrc/system.fs_find_files.genai.mjs +++ b/packages/core/src/genaisrc/system.fs_find_files.genai.mjs @@ -33,7 +33,7 @@ defTool( `ls ${glob} ${pattern ? `| grep ${pattern}` : ""} ${frontmatter ? "--frontmatter" : ""}` ) const res = pattern - ? (await workspace.grep(pattern, glob, { readText: false })).files + ? (await workspace.grep(pattern, { glob, readText: false })).files : await workspace.findFiles(glob, { readText: false }) if (!res?.length) return "No files found." diff --git a/packages/core/src/genaisrc/system.fs_read_file.genai.mjs b/packages/core/src/genaisrc/system.fs_read_file.genai.mjs index a7cb8b96cb..1d837a99df 100644 --- a/packages/core/src/genaisrc/system.fs_read_file.genai.mjs +++ b/packages/core/src/genaisrc/system.fs_read_file.genai.mjs @@ -51,5 +51,8 @@ defTool( content = lines.slice(line_start, line_end).join("\n") } return content + }, + { + maxTokens: 10000, } ) diff --git a/packages/core/src/genaisrc/system.git.genai.mjs b/packages/core/src/genaisrc/system.git.genai.mjs index a92bf3c5cb..73e79e3f2d 100644 --- a/packages/core/src/genaisrc/system.git.genai.mjs +++ b/packages/core/src/genaisrc/system.git.genai.mjs @@ -3,6 +3,15 @@ system({ description: "Tools to query a git repository.", }) +defTool( + "git_branch_default", + "Gets the default branch using git.", + {}, + async () => { + return await git.defaultBranch() + } +) + defTool( "git_branch_current", "Gets the current branch using git.", @@ -64,7 +73,8 @@ defTool( ...rest, }) return res - } + }, + { maxTokens: 20000 } ) defTool( diff --git a/packages/core/src/genaisrc/system.git_info.genai.mjs b/packages/core/src/genaisrc/system.git_info.genai.mjs new file mode 100644 index 0000000000..9c53d8aa4b --- /dev/null +++ b/packages/core/src/genaisrc/system.git_info.genai.mjs @@ -0,0 +1,8 @@ +system({ + title: "Git repository information", +}) + +const branch = await git.branch() +const defaultBranch = await git.defaultBranch() + +$`git: The current branch is ${branch} and the default branch is ${defaultBranch}.` diff --git a/packages/core/src/genaisrc/system.github_actions.genai.mjs b/packages/core/src/genaisrc/system.github_actions.genai.mjs index efe2e75c33..143c16711f 100644 --- a/packages/core/src/genaisrc/system.github_actions.genai.mjs +++ b/packages/core/src/genaisrc/system.github_actions.genai.mjs @@ -114,8 +114,12 @@ defTool( let log = await github.downloadWorkflowJobLog(job_id, { llmify: true, }) - if (parsers.tokens(log) > 1000) - log = "...(truncated, tool long)...\n" + log.slice(-3000) + if ((await tokenizers.count(log)) > 1000) { + log = await tokenizers.truncate(log, 1000, { last: true }) + const annotations = await parsers.annotations(log) + if (annotations.length > 0) + log += "\n\n" + YAML.stringify(annotations) + } return log } ) diff --git a/packages/core/src/genaisrc/system.github_info.genai.mjs b/packages/core/src/genaisrc/system.github_info.genai.mjs index 6b82682902..4287e30b3e 100644 --- a/packages/core/src/genaisrc/system.github_info.genai.mjs +++ b/packages/core/src/genaisrc/system.github_info.genai.mjs @@ -4,7 +4,7 @@ system({ const info = await github.info() if (info?.owner) { - const { auth, owner, repo, baseUrl } = info + const { owner, repo, baseUrl } = info $`- current github repository: ${owner}/${repo}` if (baseUrl) $`- current github base url: ${baseUrl}` } diff --git a/packages/core/src/genaisrc/system.md_find_files.genai.mjs b/packages/core/src/genaisrc/system.md_find_files.genai.mjs new file mode 100644 index 0000000000..a9d353944e --- /dev/null +++ b/packages/core/src/genaisrc/system.md_find_files.genai.mjs @@ -0,0 +1,76 @@ +system({ + title: "Tools to help with documentation tasks", +}) + +const model = (env.vars.mdSummaryModel = "gpt-4o-mini") + +defTool( + "md_find_files", + "Get the file structure of the documentation markdown/MDX files. Retursn filename, title, description for each match. Use pattern to specify a regular expression to search for in the file content.", + { + type: "object", + properties: { + path: { + type: "string", + description: "root path to search for markdown/MDX files", + }, + pattern: { + type: "string", + description: + "regular expression pattern to search for in the file content.", + }, + question: { + type: "string", + description: "Question to ask when computing the summary", + }, + }, + }, + async (args) => { + const { path, pattern, context, question } = args + context.log( + `docs: ls ${path} ${pattern ? `| grep ${pattern}` : ""} --frontmatter ${question ? `--ask ${question}` : ""}` + ) + const matches = pattern + ? (await workspace.grep(pattern, { path, readText: true })).files + : await workspace.findFiles(path + "/**/*.{md,mdx}", { + readText: true, + }) + if (!matches?.length) return "No files found." + const q = await host.promiseQueue(5) + const files = await q.mapAll(matches, async ({ filename, content }) => { + const file = { + filename, + } + try { + const fm = await parsers.frontmatter(content) + if (fm) { + file.title = fm.title + file.description = fm.description + } + const { text: summary } = await runPrompt( + (_) => { + _.def("CONTENT", content, { language: "markdown" }) + _.$`As a professional summarizer, create a concise and comprehensive summary of the provided text, be it an article, post, conversation, or passage, while adhering to these guidelines: + ${question ? `* ${question}` : ""} + * The summary is intended for an LLM, not a human. + * Craft a summary that is detailed, thorough, in-depth, and complex, while maintaining clarity and conciseness. + * Incorporate main ideas and essential information, eliminating extraneous language and focusing on critical aspects. + * Rely strictly on the provided text, without including external information. + * Format the summary in one single paragraph form for easy understanding. Keep it short. + * Generate a list of keywords that are relevant to the text.` + }, + { + label: `summarize ${filename}`, + cache: "md_find_files_summary", + model, + } + ) + file.summary = summary + } catch (e) {} + return file + }) + const res = YAML.stringify(files) + return res + }, + { maxTokens: 20000 } +) diff --git a/packages/core/src/globals.ts b/packages/core/src/globals.ts index f5712874a1..7b503ed3f0 100644 --- a/packages/core/src/globals.ts +++ b/packages/core/src/globals.ts @@ -16,10 +16,13 @@ import { readText } from "./fs" import { logVerbose } from "./util" import { GitHubClient } from "./github" import { GitClient } from "./git" +import { estimateTokens, truncateTextToTokens } from "./tokens" +import { resolveTokenEncoder } from "./encoders" +import { runtimeHost } from "./host" /** * This file defines global utilities and installs them into the global context. - * It includes functions to parse and stringify various data formats, handle errors, + * It includes functions to parse and stringify various data formats, handle errors, * and manage GitHub and Git clients. The utilities are frozen to prevent modification. */ @@ -118,6 +121,22 @@ export function installGlobals() { // Instantiate Git client glb.git = new GitClient() + glb.tokenizers = Object.freeze({ + count: async (text, options) => { + const encoder = await resolveTokenEncoder( + options?.model || runtimeHost.defaultModelOptions.model + ) + const c = await estimateTokens(text, encoder) + return c + }, + truncate: async (text, maxTokens, options) => { + const encoder = await resolveTokenEncoder( + options?.model || runtimeHost.defaultModelOptions.model + ) + return await truncateTextToTokens(text, maxTokens, encoder, options) + }, + }) + /** * Asynchronous function to fetch text from a URL or file. * Handles both HTTP(S) URLs and local workspace files. diff --git a/packages/core/src/grep.ts b/packages/core/src/grep.ts index 9be15ef367..d1b10f4827 100644 --- a/packages/core/src/grep.ts +++ b/packages/core/src/grep.ts @@ -3,13 +3,20 @@ import { runtimeHost } from "./host" import { JSONLTryParse } from "./jsonl" import { resolveFileContent } from "./file" import { uniq } from "es-toolkit" +import { addLineNumbers } from "./liner" +import { arrayify } from "./util" +import { YAMLStringify } from "./yaml" export async function grepSearch( query: string | RegExp, - globs: string[], - options?: TraceOptions & { readText?: boolean } -): Promise<{ files: WorkspaceFile[] }> { + options?: TraceOptions & { + path?: string[] + glob?: string[] + readText?: boolean + } +): Promise<{ files: WorkspaceFile[]; matches: WorkspaceFile[] }> { const { rgPath } = await import("@lvce-editor/ripgrep") + const { path: paths, glob: globs, readText } = options || {} const args: string[] = ["--json", "--multiline", "--context", "3"] if (typeof query === "string") { args.push("--smart-case", query) @@ -17,13 +24,15 @@ export async function grepSearch( if (query.ignoreCase) args.push("--ignore-case") args.push(query.source) } - for (const glob of globs) { - args.push("-g") - args.push(glob) - } + if (globs) + for (const glob of globs) { + args.push("--glob") + args.push(glob.replace(/^\*\*\//, "")) + } + if (paths) args.push(...arrayify(paths)) const res = await runtimeHost.exec(undefined, rgPath, args, options) const resl = JSONLTryParse(res.stdout) as { - type: "match" + type: "match" | "context" | "begin" | "end" data: { path: { text: string @@ -37,7 +46,18 @@ export async function grepSearch( .filter(({ type }) => type === "match") .map(({ data }) => data.path.text) ).map((filename) => { filename }) - if (options?.readText !== false) + const matches = resl + .filter(({ type }) => type === "match") + .map( + ({ data }) => + { + filename: data.path.text, + content: addLineNumbers(data.lines.text.trimEnd(), { + startLine: data.line_number, + }), + } + ) + if (readText !== false) for (const file of files) await resolveFileContent(file) - return { files } + return { files, matches } } diff --git a/packages/core/src/liner.ts b/packages/core/src/liner.ts index a4d8dd980c..66b894e0a9 100644 --- a/packages/core/src/liner.ts +++ b/packages/core/src/liner.ts @@ -1,6 +1,7 @@ // This module provides functions to add and remove line numbers from text. // It includes special handling for "diff" formatted text. +import { start } from "repl" import { llmifyDiff, tryParseDiff } from "./diff" /** @@ -11,7 +12,11 @@ import { llmifyDiff, tryParseDiff } from "./diff" * @param language - Optional parameter to specify the text format. Special handling for "diff". * @returns The text with line numbers added, or processed diff text if applicable. */ -export function addLineNumbers(text: string, language?: string) { +export function addLineNumbers( + text: string, + options?: { language?: string; startLine?: number } +) { + const { language, startLine = 1 } = options || {} if (language === "diff" || tryParseDiff(text)) { const diffed = llmifyDiff(text) // Process the text with a special function for diffs if (diffed !== undefined) return diffed // Return processed text if diff handling was successful @@ -19,7 +24,7 @@ export function addLineNumbers(text: string, language?: string) { return text .split("\n") // Split text into lines - .map((line, i) => `[${i + 1}] ${line}`) // Add line numbers in the format "[line_number] " + .map((line, i) => `[${i + startLine}] ${line}`) // Add line numbers in the format "[line_number] " .join("\n") // Join lines back into a single string } diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts index 114037188d..a22eab6bed 100644 --- a/packages/core/src/openai.ts +++ b/packages/core/src/openai.ts @@ -7,7 +7,7 @@ import { } from "./constants" import { estimateTokens } from "./tokens" import { ChatCompletionHandler, LanguageModel, LanguageModelInfo } from "./chat" -import { RequestError, errorMessage } from "./error" +import { RequestError, errorMessage, serializeError } from "./error" import { createFetch, traceFetchPost } from "./fetch" import { parseModelIdentifier } from "./models" import { JSON5TryParse } from "./json5" @@ -102,6 +102,18 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( } let postReq: any = r2 + // stream_options fails in some cases + if (model === "gpt-4-turbo-v") delete r2.stream_options + if ( + req.messages.find( + (msg) => + msg.role === "user" && + typeof msg.content !== "string" && + msg.content.some((c) => c.type === "image_url") + ) + ) + delete r2.stream_options // crash on usage computation + let url = "" const toolCalls: ChatCompletionToolCall[] = [] @@ -180,42 +192,10 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( let chatResp = "" let pref = "" let usage: ChatCompletionUsage + let error: SerializedError const decoder = host.createUTF8Decoder() - if (r.body.getReader) { - const reader = r.body.getReader() - while (!cancellationToken?.isCancellationRequested) { - const { done, value } = await reader.read() - if (done) break - doChunk(value) - } - } else { - for await (const value of r.body as any) { - if (cancellationToken?.isCancellationRequested) break - doChunk(value) - } - } - if (cancellationToken?.isCancellationRequested) finishReason = "cancel" - - trace.appendContent("\n\n") - trace.itemValue(`๐Ÿ finish reason`, finishReason) - if (usage) { - trace.itemValue( - `๐Ÿช™ tokens`, - `${usage.total_tokens} total, ${usage.prompt_tokens} prompt, ${usage.completion_tokens} completion` - ) - } - - if (done && finishReason === "stop") - await cacheStore.set( - cachedKey, - { text: chatResp, finishReason }, - { trace } - ) - - return { text: chatResp, toolCalls, finishReason, usage } - - function doChunk(value: Uint8Array) { + const doChunk = (value: Uint8Array) => { // Massage and parse the chunk of data let chunk = decoder.decode(value, { stream: true }) @@ -278,6 +258,43 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( } pref = chunk } + + try { + if (r.body.getReader) { + const reader = r.body.getReader() + while (!cancellationToken?.isCancellationRequested) { + const { done, value } = await reader.read() + if (done) break + doChunk(value) + } + } else { + for await (const value of r.body as any) { + if (cancellationToken?.isCancellationRequested) break + doChunk(value) + } + } + if (cancellationToken?.isCancellationRequested) finishReason = "cancel" + } catch (e) { + finishReason = "fail" + error = serializeError(e) + } + + trace.appendContent("\n\n") + trace.itemValue(`๐Ÿ finish reason`, finishReason) + if (usage) { + trace.itemValue( + `๐Ÿช™ tokens`, + `${usage.total_tokens} total, ${usage.prompt_tokens} prompt, ${usage.completion_tokens} completion` + ) + } + + if (done && finishReason === "stop") + await cacheStore.set( + cachedKey, + { text: chatResp, finishReason }, + { trace } + ) + return { text: chatResp, toolCalls, finishReason, usage, error } } async function listModels( diff --git a/packages/core/src/promptcontext.ts b/packages/core/src/promptcontext.ts index 0224fbfdc4..94bf7ebcba 100644 --- a/packages/core/src/promptcontext.ts +++ b/packages/core/src/promptcontext.ts @@ -69,18 +69,35 @@ export async function createPromptContext( }) return res }, - grep: async (query, globs, options) => { - // Perform a grep search on specified files + grep: async ( + query, + options: string | WorkspaceGrepOptions, + options2?: WorkspaceGrepOptions + ) => { + if (typeof options === "string") { + const p = runtimeHost.path + .dirname(options) + .replace(/(^|\/)\*\*$/, "") + const g = runtimeHost.path.basename(options) + options = { + path: p, + glob: g, + ...(options2 || {}), + } + } + const { path, glob, ...rest } = options || {} const grepTrace = trace.startTraceDetails( - `๐ŸŒ grep ${HTMLEscape(typeof query === "string" ? query : query.source)}` + `๐ŸŒ grep ${HTMLEscape(typeof query === "string" ? query : query.source)} ${glob ? `--glob ${glob}` : ""} ${path || ""}` ) try { - const { files } = await grepSearch(query, arrayify(globs), { + const { files, matches } = await grepSearch(query, { + path: arrayify(path), + glob: arrayify(glob), + ...rest, trace: grepTrace, - ...options, }) - grepTrace.files(files, { model, secrets: env.secrets }) - return { files } + grepTrace.files(matches, { model, secrets: env.secrets }) + return { files, matches } } finally { grepTrace.endDetails() } diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index e76c4e3086..f9820e9851 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -8,13 +8,10 @@ import { MarkdownTrace, TraceOptions } from "./trace" import { arrayify, assert, toStringList, trimNewlines } from "./util" import { YAMLStringify } from "./yaml" import { - DEDENT_INSPECT_MAX_DEPTH, MARKDOWN_PROMPT_FENCE, - MAX_TOKENS_ELLIPSE, PROMPT_FENCE, TEMPLATE_ARG_DATA_SLICE_SAMPLE, TEMPLATE_ARG_FILE_MAX_TOKENS, - TOKEN_TRUNCATION_THRESHOLD, } from "./constants" import { parseModelIdentifier } from "./models" import { toChatCompletionUserMessage } from "./chat" @@ -30,7 +27,6 @@ import { resolveTokenEncoder } from "./encoders" import { expandFiles } from "./fs" import { interpolateVariables } from "./mustache" import { createDiff } from "./diff" -import { total } from "@tidyjs/tidy" // Definition of the PromptNode interface which is an essential part of the code structure. export interface PromptNode extends ContextExpansionOptions { @@ -127,12 +123,13 @@ export interface PromptSchemaNode extends PromptNode { } // Interface for a function node. -export interface PromptToolNode extends PromptNode, DefToolOptions { +export interface PromptToolNode extends PromptNode { type: "tool" name: string // Function name description: string // Description of the function parameters: JSONSchema // Parameters for the function impl: ChatFunctionHandler // Implementation of the function + options?: DefToolOptions } // Interface for a file merge node. @@ -222,7 +219,7 @@ function renderDefNode(def: PromptDefNode): string { : PROMPT_FENCE const norm = (s: string, lang: string) => { s = (s || "").replace(/\n*$/, "") - if (s && lineNumbers) s = addLineNumbers(s, lang) + if (s && lineNumbers) s = addLineNumbers(s, { language: lang }) if (s) s += "\n" return s } @@ -315,13 +312,13 @@ export function createToolNode( assert(!!description) assert(parameters !== undefined) assert(impl !== undefined) - return { - ...(options || {}), + return { type: "tool", name, description: dedent(description), parameters, impl, + options, } } @@ -979,10 +976,15 @@ ${trimNewlines(schemaText)} ) }, tool: (n) => { - const { name, description, parameters, impl: fn } = n + const { name, description, parameters, impl: fn, options } = n tools.push({ - spec: { name, description, parameters }, + spec: { + name, + description, + parameters, + }, impl: fn, + options, }) trace.detailsFenced( `๐Ÿ› ๏ธ tool ${name}`, diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts index b0626c87fa..cb99e166dc 100644 --- a/packages/core/src/runpromptcontext.ts +++ b/packages/core/src/runpromptcontext.ts @@ -20,7 +20,7 @@ import { GenerationOptions } from "./generation" import { promptParametersSchemaToJSONSchema } from "./parameters" import { consoleLogFormat } from "./logging" import { isGlobMatch } from "./glob" -import { arrayify, logVerbose } from "./util" +import { arrayify, logError, logVerbose } from "./util" import { renderShellOutput } from "./chatrender" import { jinjaRender } from "./jinja" import { mustacheRender } from "./mustache" @@ -45,7 +45,12 @@ import { import { renderAICI } from "./aici" import { resolveSystems, resolveTools } from "./systems" import { callExpander } from "./expander" -import { isCancelError, NotSupportedError, serializeError } from "./error" +import { + errorMessage, + isCancelError, + NotSupportedError, + serializeError, +} from "./error" import { resolveLanguageModel } from "./lm" import { concurrentLimit } from "./concurrency" import { Project } from "./ast" @@ -245,8 +250,9 @@ export function createChatGenerationContext( | AgenticToolProviderCallback, description: string, parameters: PromptParametersSchema | JSONSchemaObject, - fn: ChatFunctionHandler - ) => void = (name, description, parameters, fn) => { + fn: ChatFunctionHandler, + defOptions?: DefToolOptions + ) => void = (name, description, parameters, fn, defOptions) => { if (name === undefined || name === null) throw new Error("tool name is missing") @@ -255,7 +261,13 @@ export function createChatGenerationContext( promptParametersSchemaToJSONSchema(parameters) appendChild( node, - createToolNode(name, description, parameterSchema, fn) + createToolNode( + name, + description, + parameterSchema, + fn, + defOptions + ) ) } else if ((name as ToolCallback | AgenticToolCallback).impl) { const tool = name as ToolCallback | AgenticToolCallback @@ -265,7 +277,8 @@ export function createChatGenerationContext( tool.spec.name, tool.spec.description, tool.spec.parameters as any, - tool.impl + tool.impl, + defOptions ) ) } else if ((name as AgenticToolProviderCallback).functions) { @@ -277,7 +290,8 @@ export function createChatGenerationContext( tool.spec.name, tool.spec.description, tool.spec.parameters as any, - tool.impl + tool.impl, + defOptions ) ) } @@ -515,8 +529,10 @@ export function createChatGenerationContext( chatParticipants = cps messages.push(...msgs) - if (errors?.length) + if (errors?.length) { + logError(errors.map((err) => errorMessage(err)).join("\n")) throw new Error("errors while running prompt") + } } const systemMessage: ChatCompletionSystemMessageParam = { diff --git a/packages/core/src/tokens.ts b/packages/core/src/tokens.ts index fb3bdf746a..abeeb7b1ec 100644 --- a/packages/core/src/tokens.ts +++ b/packages/core/src/tokens.ts @@ -35,12 +35,13 @@ export function truncateTextToTokens( maxTokens: number, encoder: TokenEncoder, options?: { + last?: boolean threshold?: number } ): string { const tokens = estimateTokens(content, encoder) if (tokens <= maxTokens) return content - const { threshold = TOKEN_TRUNCATION_THRESHOLD } = options || {} + const { last, threshold = TOKEN_TRUNCATION_THRESHOLD } = options || {} let left = 0 let right = content.length @@ -48,7 +49,9 @@ export function truncateTextToTokens( while (Math.abs(left - right) > threshold) { const mid = Math.floor((left + right) / 2) - const truncated = content.slice(0, mid) + MAX_TOKENS_ELLIPSE + const truncated = last + ? MAX_TOKENS_ELLIPSE + content.slice(-mid) + : content.slice(0, mid) + MAX_TOKENS_ELLIPSE const truncatedTokens = estimateTokens(truncated, encoder) if (truncatedTokens > maxTokens) { diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index d5fb28f000..34678dfba4 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -415,7 +415,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -536,6 +536,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -559,14 +579,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -627,6 +646,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1009,6 +1029,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload diff --git a/packages/core/src/types/prompt_type.d.ts b/packages/core/src/types/prompt_type.d.ts index 9123564bcf..8a9fc2e566 100644 --- a/packages/core/src/types/prompt_type.d.ts +++ b/packages/core/src/types/prompt_type.d.ts @@ -216,6 +216,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/genaisrc/ask-docs.genai.mjs b/packages/sample/genaisrc/ask-docs.genai.mjs new file mode 100644 index 0000000000..4af095e657 --- /dev/null +++ b/packages/sample/genaisrc/ask-docs.genai.mjs @@ -0,0 +1,11 @@ +script({ + tools: [ + "agent_fs", + "agent_git", + "agent_github", + "agent_interpreter", + "agent_docs", + ], +}) + +$`Based on the samples, update the documentation files for 'defTool. Generate updated files to be saved.` diff --git a/packages/sample/genaisrc/blog/genaiscript.d.ts b/packages/sample/genaisrc/blog/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/sample/genaisrc/blog/genaiscript.d.ts +++ b/packages/sample/genaisrc/blog/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/genaisrc/describe-image-run-prompt.genai.js b/packages/sample/genaisrc/describe-image-run-prompt.genai.js index c863a3418c..0b82e115d4 100644 --- a/packages/sample/genaisrc/describe-image-run-prompt.genai.js +++ b/packages/sample/genaisrc/describe-image-run-prompt.genai.js @@ -17,7 +17,7 @@ for (const file of env.files) { _.defImages(file, { detail: "low" }) }, { - model: "openai:gpt-4-turbo-v", + model: "openai:gpt-4o", maxTokens: 4000, } ) diff --git a/packages/sample/genaisrc/describe-image.genai.js b/packages/sample/genaisrc/describe-image.genai.js index bd70a65e5e..e1e949480b 100644 --- a/packages/sample/genaisrc/describe-image.genai.js +++ b/packages/sample/genaisrc/describe-image.genai.js @@ -1,6 +1,6 @@ script({ title: "Describe objects in image", - model: "gpt-4-turbo-v", + model: "gpt-4o", group: "vision", maxTokens: 4000, system: [], diff --git a/packages/sample/genaisrc/genaiscript.d.ts b/packages/sample/genaisrc/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/sample/genaisrc/genaiscript.d.ts +++ b/packages/sample/genaisrc/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/genaisrc/github-agent.genai.mts b/packages/sample/genaisrc/github-agent.genai.mts index 84c3b22193..edb130283b 100644 --- a/packages/sample/genaisrc/github-agent.genai.mts +++ b/packages/sample/genaisrc/github-agent.genai.mts @@ -1,5 +1,11 @@ script({ - tools: ["agent_fs", "agent_git", "agent_github", "agent_interpreter"], + tools: [ + "agent_fs", + "agent_git", + "agent_github", + "agent_interpreter", + "agent_docs", + ], parameters: { workflow: { type: "string" }, // Workflow name failure_run_id: { type: "number" }, // ID of the failed run diff --git a/packages/sample/genaisrc/grep.genai.js b/packages/sample/genaisrc/grep.genai.js deleted file mode 100644 index 004a736194..0000000000 --- a/packages/sample/genaisrc/grep.genai.js +++ /dev/null @@ -1,9 +0,0 @@ -script({ - title: "grep search", - model: "gpt-3.5-turbo", - tests: {}, -}) - -const { files } = await workspace.grep(/defdata/i, "**/*.genai.{js,mjs}") -def("FILE", files, { maxTokens: 1000 }) -$`Summarize FILE'.` diff --git a/packages/sample/genaisrc/grep.genai.mjs b/packages/sample/genaisrc/grep.genai.mjs new file mode 100644 index 0000000000..a538173ed0 --- /dev/null +++ b/packages/sample/genaisrc/grep.genai.mjs @@ -0,0 +1,24 @@ +script({ + title: "grep search", + model: "gpt-4o-mini", + tests: {}, +}) + +let res + +res = await workspace.grep(/defdata/i) +console.log(YAML.stringify(res.matches)) +if (!res.files.length) throw new Error("No files found.") + +res = await workspace.grep(/defdata/i, { + glob: "**/*.genai.{js,mjs}", +}) +console.log(YAML.stringify(res.matches)) +if (!res.files.length) throw new Error("No files found.") + +res = await workspace.grep(/defdata/i, { + path: "docs", + glob: "*.{md,mdx}", +}) +console.log(YAML.stringify(res.matches)) +if (!res.files.length) throw new Error("No files found.") diff --git a/packages/sample/genaisrc/node/genaiscript.d.ts b/packages/sample/genaisrc/node/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/sample/genaisrc/node/genaiscript.d.ts +++ b/packages/sample/genaisrc/node/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/genaisrc/prd-agent.genai.mts b/packages/sample/genaisrc/prd-agent.genai.mts new file mode 100644 index 0000000000..fa6e2b90b3 --- /dev/null +++ b/packages/sample/genaisrc/prd-agent.genai.mts @@ -0,0 +1,25 @@ +script({ + title: "Pull Request Descriptor - Agent", + description: "Generate a pull request description from the git diff", + tools: ["fs", "git_diff", "git_branch_current", "git_branch_default"], + temperature: 0.5, +}) + +$`You are an expert software developer and architect. + +## Task + +1. Compute the code difference between the current branch and the default branch in this repository (use git diff). +2. Describe a high level summary of the code changes. + +## Instructions + +- This description will be used as the pull request description. +- talk like a software engineer +- try to extract the intent of the changes, don't focus on the details +- use bullet points to list the changes +- use emojis to make the description more engaging +- focus on the most important changes +- ignore comments about imports (like added, remove, changed, etc.) +- do NOT add a "pull request description" header +` diff --git a/packages/sample/genaisrc/prd-tools.genai.mts b/packages/sample/genaisrc/prd-tools.genai.mts new file mode 100644 index 0000000000..397b9c4e14 --- /dev/null +++ b/packages/sample/genaisrc/prd-tools.genai.mts @@ -0,0 +1,25 @@ +script({ + title: "Pull Request Descriptor - Agent", + description: "Generate a pull request description from the git diff", + tools: ["fs", "git_diff", "git_branch_current", "git_branch_default"], + temperature: 0.5, +}) + +$`You are an expert software developer and architect. + +## Task + +1. Compute the code different between the current branch and the default branch in this repository. +2. Describe a high level summary of the code changes. + +## Instructions + +- if the diff is too large, diff each file separately +- This description will be used as the pull request description. +- talk like a software engineer +- try to extract the intent of the changes, don't focus on the details +- use bullet points to list the changes +- use emojis to make the description more engaging +- focus on the most important changes +- ignore comments about imports (like added, remove, changed, etc.) +` diff --git a/packages/sample/genaisrc/python/genaiscript.d.ts b/packages/sample/genaisrc/python/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/sample/genaisrc/python/genaiscript.d.ts +++ b/packages/sample/genaisrc/python/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/genaisrc/style/genaiscript.d.ts b/packages/sample/genaisrc/style/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/sample/genaisrc/style/genaiscript.d.ts +++ b/packages/sample/genaisrc/style/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/aici/genaiscript.d.ts b/packages/sample/src/aici/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/sample/src/aici/genaiscript.d.ts +++ b/packages/sample/src/aici/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/cli.test.ts b/packages/sample/src/cli.test.ts index 0f78880796..033d331cf3 100644 --- a/packages/sample/src/cli.test.ts +++ b/packages/sample/src/cli.test.ts @@ -118,25 +118,3 @@ describe("retrieval", () => { }) }) }) - -describe("workspace", () => { - const cmd = "workspace" - describe("grep", () => { - const action = "grep" - test("markdown", async () => { - console.log(`grep markdown`) - const res = - await $`node ${cli} ${cmd} ${action} markdown "src/rag/*"`.nothrow() - console.log(`grep done`) - assert(res.stdout.includes("markdown.md")) - assert(!res.exitCode) - }) - test("mark[d](o)wn", async () => { - console.log(`grep mark[d](o)wn`) - const res = - await $`node ${cli} ${cmd} ${action} "mark[d](o)wn" "src/rag/*"`.nothrow() - assert(res.stdout.includes("markdown.md")) - assert(!res.exitCode) - }) - }) -}) diff --git a/packages/sample/src/errors/genaiscript.d.ts b/packages/sample/src/errors/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/sample/src/errors/genaiscript.d.ts +++ b/packages/sample/src/errors/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/genaiscript.d.ts b/packages/sample/src/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/sample/src/genaiscript.d.ts +++ b/packages/sample/src/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/makecode/genaiscript.d.ts b/packages/sample/src/makecode/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/sample/src/makecode/genaiscript.d.ts +++ b/packages/sample/src/makecode/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/tla/genaiscript.d.ts b/packages/sample/src/tla/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/sample/src/tla/genaiscript.d.ts +++ b/packages/sample/src/tla/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/sample/src/vision/describe-card-schema.genai.js b/packages/sample/src/vision/describe-card-schema.genai.js index d1d19f68a0..b1f7e2ec24 100644 --- a/packages/sample/src/vision/describe-card-schema.genai.js +++ b/packages/sample/src/vision/describe-card-schema.genai.js @@ -2,7 +2,7 @@ script({ description: "Given an image of a receipt, extract a csv of the receipt data", group: "vision", - model: "gpt-4-turbo-v", + model: "gpt-4o", maxTokens: 4000, }) defImages(env.files) diff --git a/packages/sample/src/vision/describe-card.genai.js b/packages/sample/src/vision/describe-card.genai.js index e80c11aa3a..6fce0e5aa4 100644 --- a/packages/sample/src/vision/describe-card.genai.js +++ b/packages/sample/src/vision/describe-card.genai.js @@ -1,7 +1,7 @@ script({ description: "Given an image of business card, extract the details to a csv file", group: "vision", - model: "gpt-4-turbo-v", + model: "gpt-4o", maxTokens: 4000, }) defImages(env.files) diff --git a/packages/sample/src/vision/describe-image-receipt.genai.js b/packages/sample/src/vision/describe-image-receipt.genai.js index 85025a8c52..470df1ffaf 100644 --- a/packages/sample/src/vision/describe-image-receipt.genai.js +++ b/packages/sample/src/vision/describe-image-receipt.genai.js @@ -2,7 +2,7 @@ script({ description: "Given an image of a receipt, extract a csv of the receipt data", group: "vision", - model: "gpt-4-turbo-v", + model: "gpt-4o", maxTokens: 4000, }) defImages(env.files) diff --git a/packages/sample/src/vision/genaiscript.d.ts b/packages/sample/src/vision/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/sample/src/vision/genaiscript.d.ts +++ b/packages/sample/src/vision/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/vscode/genaisrc/gcm.genai.mts b/packages/vscode/genaisrc/gcm.genai.mts index d1f97174f6..1de861abe1 100644 --- a/packages/vscode/genaisrc/gcm.genai.mts +++ b/packages/vscode/genaisrc/gcm.genai.mts @@ -9,7 +9,6 @@ script({ // Check for staged changes and stage all changes if none are staged const diff = await git.diff({ staged: true, - excludedPaths: "**/genaiscript.d.ts", askStageOnEmpty: true, }) if (!diff) cancel("no staged changes") @@ -30,6 +29,7 @@ git diff --cached \`\`\` Please generate a concise, one-line commit message for these changes. - do NOT add quotes +- use emojis ` // TODO: add a better prompt }, { cache: false, temperature: 0.8 } diff --git a/packages/vscode/genaisrc/genaiscript.d.ts b/packages/vscode/genaisrc/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/packages/vscode/genaisrc/genaiscript.d.ts +++ b/packages/vscode/genaisrc/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/packages/vscode/genaisrc/iat.genai.mts b/packages/vscode/genaisrc/iat.genai.mts index 345b70a12a..574ee9ffc5 100644 --- a/packages/vscode/genaisrc/iat.genai.mts +++ b/packages/vscode/genaisrc/iat.genai.mts @@ -37,9 +37,9 @@ const { docs, force, assets } = env.vars const resolveUrl = (filename: string, url: string) => /^\//.test(url) ? path.join(assets, url.slice(1)) - : /^\.\//.test(url) - ? path.join(path.dirname(filename), url.slice(2)) - : url + : /^http?s:\/\//i.test(url) + ? url + : path.join(path.dirname(filename), url) /** ------------------------------------------------ * Collect files @@ -78,13 +78,11 @@ for (const file of files) { const { text, error } = await runPrompt( (_) => { _.defImages(resolvedUrl) - /** - * TODO: customize the prompt to match your domain - */ _.$` You are an expert in assistive technology. - You will analyze each image and generate a description alt text for the image. + You will analyze the image + and generate a description alt text for the image. - Do not include alt text in the description. - Keep it short but description. @@ -95,16 +93,22 @@ for (const file of files) { maxTokens: 4000, temperature: 0.5, cache: "alt-text", + label: `altextify ${resolvedUrl}`, } ) if (error) throw error + else if (!text) console.log(`.. no description generated`) else imgs[url] = text.replace(/\[/g, "") // remove [ from alt text } + console.log(`.. ${Object.keys(imgs).length} image alt text generated`) // apply replacements const newContent = content.replace( rx, (m, url) => `![${imgs[url] ?? ""}](${url})` ) // save updated content - if (newContent !== content) await workspace.writeText(filename, newContent) + if (newContent !== content) { + console.log(`.. updating ${filename}`) + await workspace.writeText(filename, newContent) + } } diff --git a/packages/vscode/genaisrc/prd.genai.mts b/packages/vscode/genaisrc/prd.genai.mts index 3b7f8cfacd..cc6d9c9eae 100644 --- a/packages/vscode/genaisrc/prd.genai.mts +++ b/packages/vscode/genaisrc/prd.genai.mts @@ -3,28 +3,22 @@ script({ description: "Generate a pull request description from the git diff", tools: ["fs"], temperature: 0.5, + model: "openai:gpt-4o", }) const defaultBranch = await git.defaultBranch() const changes = await git.diff({ base: defaultBranch, - excludedPaths: [ - ".vscode/*", - "**/yarn.lock", - "**/genaiscript.d.ts", - "*THIRD_PARTY_LICENSES.md", - ], }) console.log(changes) -def("GIT_DIFF", changes, { maxTokens: 20000 }) - // task $`You are an expert software developer and architect. ## Task -- Describe a high level summary of the changes in GIT_DIFF in a way that a software engineer will understand. +Describe a high level summary of the changes in GIT_DIFF in a way that a software engineer will understand. +This description will be used as the pull request description. ## Instructions @@ -36,4 +30,6 @@ $`You are an expert software developer and architect. - ignore comments about imports (like added, remove, changed, etc.) ` +def("GIT_DIFF", changes, { maxTokens: 30000 }) + // running: make sure to add the -prd flag diff --git a/packages/vscode/genaisrc/prr.genai.mts b/packages/vscode/genaisrc/prr.genai.mts new file mode 100644 index 0000000000..c65515bc62 --- /dev/null +++ b/packages/vscode/genaisrc/prr.genai.mts @@ -0,0 +1,47 @@ +script({ + title: "Pull Request Reviewer", + description: "Review the current pull request", + system: ["system.annotations"], + tools: ["fs", "agent_git", "agent_github"], + cache: "prr", + parameters: { + errors: { + type: "boolean", + description: "Report errors only", + default: false, + }, + }, +}) + +/** ------------------------------------------------ + * Configuration + */ +const { errors } = env.vars + +const defaultBranch = await git.defaultBranch() +const changes = await git.diff({ + base: defaultBranch, +}) +console.log(changes) + +def("GIT_DIFF", changes, { maxTokens: 20000 }) + +$` +## Role + +You are an expert developer at all known programming languages. +You are very helpful at reviewing code and providing constructive feedback. + +## Task + +Report ${errors ? `errors` : `errors and warnings`} in GIT_DIFF using the annotation format. + +## Guidance + +- Use best practices of the programming language of each file. +- If available, provide a URL to the official documentation for the best practice. do NOT invent URLs. +- Analyze ALL the code. Do not be lazy. This is IMPORTANT. +- Use tools to read the entire file content to get more context +${errors ? `- Do not report warnings, only errors.` : ``} +` +// TODO: customize with more rules diff --git a/packages/vscode/genaisrc/rv.genai.mts b/packages/vscode/genaisrc/rv.genai.mts deleted file mode 100644 index 84d3efe549..0000000000 --- a/packages/vscode/genaisrc/rv.genai.mts +++ /dev/null @@ -1,60 +0,0 @@ -script({ - title: "Reviewer", - description: "Review the current files", - system: ["system.annotations"], - tools: ["fs"], - cache: "rv", - parameters: { - errors: { - type: "boolean", - description: "Report errors only", - default: false, - }, - }, -}) - -/** ------------------------------------------------ - * Configuration - */ -const { errors } = env.vars - -/** ------------------------------------------------ - * Context - */ -let content = "" -/** - * env.files contains the file selected by the user in VSCode or through the cli arguments. - */ -if (env.files.length) { - content = def("FILE", env.files, { - maxTokens: 5000, - glob: "**/*.{py,ts,cs,rs,c,cpp,h,hpp,js,mjs,mts,md,mdx}", // TODO: - }) -} else { - // No files selected, review the current changes - console.log("No files found. Using git diff.") - const diff = await git.diff({ unified: 6 }) - // customize git diff to filter some files - if (!diff) cancel("No changes found, did you forget to stage your changes?") - content = def("GIT_DIFF", diff, { language: "diff" }) -} - -$` -## Role - -You are an expert developer at all known programming languages. -You are very helpful at reviewing code and providing constructive feedback. - -## Task - -Report ${errors ? `errors` : `errors and warnings`} in ${content} using the annotation format. - -## Guidance - -- Use best practices of the programming language of each file. -- If available, provide a URL to the official documentation for the best practice. do NOT invent URLs. -- Analyze ALL the code. Do not be lazy. This is IMPORTANT. -- Use tools to read the entire file content to get more context -${errors ? `- Do not report warnings, only errors.` : ``} -` -// TODO: customize with more rules diff --git a/packages/vscode/genaisrc/st.genai.mts b/packages/vscode/genaisrc/st.genai.mts index 8a1e40b162..4336f8a583 100644 --- a/packages/vscode/genaisrc/st.genai.mts +++ b/packages/vscode/genaisrc/st.genai.mts @@ -37,7 +37,7 @@ if (!transform) ) if (!transform) cancel("transform is missing") -const { files } = await workspace.grep(patternRx, glob) +const { files } = await workspace.grep(patternRx, { glob }) // cached computed transformations const patches = {} for (const file of files) { diff --git a/slides/genaisrc/genaiscript.d.ts b/slides/genaisrc/genaiscript.d.ts index 8ddaef4e9d..74674e26e0 100644 --- a/slides/genaisrc/genaiscript.d.ts +++ b/slides/genaisrc/genaiscript.d.ts @@ -71,6 +71,7 @@ interface PromptLike extends PromptDefinition { type SystemPromptId = OptionsOrString< | "system" + | "system.agent_docs" | "system.agent_fs" | "system.agent_git" | "system.agent_github" @@ -87,12 +88,14 @@ type SystemPromptId = OptionsOrString< | "system.fs_find_files" | "system.fs_read_file" | "system.git" + | "system.git_info" | "system.github_actions" | "system.github_files" | "system.github_info" | "system.github_issues" | "system.github_pulls" | "system.math" + | "system.md_find_files" | "system.md_frontmatter" | "system.python" | "system.python_code_interpreter" @@ -109,6 +112,7 @@ type SystemPromptId = OptionsOrString< > type SystemToolId = OptionsOrString< + | "agent_docs" | "agent_fs" | "agent_git" | "agent_github" @@ -118,6 +122,7 @@ type SystemToolId = OptionsOrString< | "fs_find_files" | "fs_read_file" | "git_branch_current" + | "git_branch_default" | "git_branch_list" | "git_diff" | "git_last_tag" @@ -136,6 +141,7 @@ type SystemToolId = OptionsOrString< | "github_pulls_list" | "github_pulls_review_comments_list" | "math_eval" + | "md_find_files" | "md_read_frontmatter" | "python_code_interpreter_copy_files" | "python_code_interpreter_run" @@ -489,7 +495,7 @@ interface WorkspaceFileWithScore extends WorkspaceFile { score?: number } -interface ToolDefinition extends DefToolOptions { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -610,6 +616,26 @@ interface WorkspaceFileCache { values(): Promise } +interface WorkspaceGrepOptions { + /** + * List of paths to + */ + path?: ElementOrArray + /** + * list of filename globs to search. !-prefixed globs are excluded. ** are not supported. + */ + glob?: ElementOrArray + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean +} + +interface WorkspaceGrepResult { + files: WorkspaceFile[] + matches: WorkspaceFile[] +} + interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. @@ -633,14 +659,13 @@ interface WorkspaceFileSystem { */ grep( query: string | RegExp, - globs: string | string[], - options?: { - /** - * Set to false to skip read text content. True by default - */ - readText?: boolean - } - ): Promise<{ files: WorkspaceFile[] }> + options?: WorkspaceGrepOptions + ): Promise + grep( + query: string | RegExp, + glob: string, + options?: Omit + ): Promise /** * Reads the content of a file as text @@ -701,6 +726,7 @@ interface ToolCallContext { interface ToolCallback { spec: ToolDefinition + options?: DefToolOptions impl: ( args: { context: ToolCallContext } & Record ) => Awaitable @@ -1083,6 +1109,28 @@ interface CSVParseOptions { headers?: string[] } +interface Tokenizers { + /** + * Estimates the number of tokens in the content. May not be accurate + * @param model + * @param text + */ + count(text: string, options?: { model: string }): Promise + + /** + * Truncates the text to a given number of tokens, approximation. + * @param model + * @param text + * @param maxTokens + * @param options + */ + truncate( + text: string, + maxTokens: number, + options?: { model?: string; last?: boolean } + ): Promise +} + interface Parsers { /** * Parses text as a JSON5 payload @@ -3152,6 +3200,11 @@ declare var github: GitHub */ declare var git: Git +/** + * Computation around tokens + */ +declare var tokenizers: Tokenizers + /** * Fetches a given URL and returns the response. * @param url diff --git a/slides/pages/alt-text-deploy.md b/slides/pages/alt-text-deploy.md index 6e10cafef5..28307ce5cc 100644 --- a/slides/pages/alt-text-deploy.md +++ b/slides/pages/alt-text-deploy.md @@ -12,7 +12,7 @@ layout: two-cols-header ```js script({ title: "Image Alt Text generator", - model: "gpt-4-turbo-v", + model: "gpt-4o", }) const file = env.files[0] diff --git a/slides/pages/example-doc-review.md b/slides/pages/example-doc-review.md index a538f04800..ee3bd1c322 100644 --- a/slides/pages/example-doc-review.md +++ b/slides/pages/example-doc-review.md @@ -6,22 +6,22 @@ image: /plug-in.png backgroundSize: contain --- -# Example: +# Example: ```js script({ title: "explain-diagram", description: "Given an image of a diagram, explain what it contains", - model: "gpt-4-turbo-v", + model: "gpt-4o", }) defImages(env.files) -$`You are a helpful assistant. Your goal -is to look at the image provided and write -a description of what it contains. You -should infer the context of the diagram, -and write a thorough description of what +$`You are a helpful assistant. Your goal +is to look at the image provided and write +a description of what it contains. You +should infer the context of the diagram, +and write a thorough description of what the diagram is illustrating.` -``` \ No newline at end of file +``` diff --git a/slides/pages/example-gpt-v.md b/slides/pages/example-gpt-v.md index a870520795..2670036ace 100644 --- a/slides/pages/example-gpt-v.md +++ b/slides/pages/example-gpt-v.md @@ -1,6 +1,7 @@ --- layout: two-cols-header --- + # GenAIScript Example: Translate Any Diagram to Text ::left:: @@ -16,16 +17,16 @@ script({ title: "explain-diagram", description: "Given an image of a diagram, explain what it contains", - model: "gpt-4-turbo-v", + model: "gpt-4o", }) defImages(env.files) -$`You are a helpful assistant. Your goal -is to look at the image provided and write -a description of what it contains. You -should infer the context of the diagram, -and write a thorough description of what +$`You are a helpful assistant. Your goal +is to look at the image provided and write +a description of what it contains. You +should infer the context of the diagram, +and write a thorough description of what the diagram is illustrating.` ```