Add agent_interpreter tool and enhance Python code interpreter functi…

…onality (#749)
microsoft · Oct 2, 2024 · 4c951a7 · 4c951a7
1 parent 78ef570
commit 4c951a7
Show file tree

Hide file tree

Showing 23 changed files with 302 additions and 78 deletions.
diff --git a/docs/genaisrc/genaiscript.d.ts b/docs/genaisrc/genaiscript.d.ts
diff --git a/docs/src/components/BuiltinTools.mdx b/docs/src/components/BuiltinTools.mdx
@@ -9,6 +9,7 @@ import { LinkCard } from '@astrojs/starlight/components';
 <LinkCard title="agent_fs" description="Agent that can query files to accomplish tasks" href="/genaiscript/reference/scripts/system#systemagent_fs" />
 <LinkCard title="agent_git" description="Agent that can query Git to accomplish tasks" href="/genaiscript/reference/scripts/system#systemagent_git" />
 <LinkCard title="agent_github" description="Agent that can query GitHub  to accomplish tasks" href="/genaiscript/reference/scripts/system#systemagent_github" />
+<LinkCard title="agent_interpreter" description="Run code interpreters for Python, Math. Use this agent to ground computation questions." href="/genaiscript/reference/scripts/system#systemagent_interpreter" />
 <LinkCard title="fs_find_files" description="Finds file matching a glob pattern. Use pattern to specify a regular expression to search for in the file content." href="/genaiscript/reference/scripts/system#systemfs_find_files" />
 <LinkCard title="fs_read_file" description="Reads a file as text from the file system. Returns undefined if the file does not exist." href="/genaiscript/reference/scripts/system#systemfs_read_file" />
 <LinkCard title="git_branch_current" description="Gets the current branch." href="/genaiscript/reference/scripts/system#systemgit" />
@@ -32,7 +33,8 @@ import { LinkCard } from '@astrojs/starlight/components';
 <LinkCard title="github_pulls_review_comments_list" description="Get review comments for a pull request." href="/genaiscript/reference/scripts/system#systemgithub_pulls" />
 <LinkCard title="math_eval" description="Evaluates a math expression" href="/genaiscript/reference/scripts/system#systemmath" />
 <LinkCard title="md_read_frontmatter" description="Reads the frontmatter of a markdown or MDX file." href="/genaiscript/reference/scripts/system#systemmd_frontmatter" />
-<LinkCard title="python_code_interpreter" description="Executes python 3.12 code for Data Analysis tasks in a docker container. The process output is returned. Do not generate visualizations. The only packages available are numpy, pandas, scipy. There is NO network connectivity. Do not attempt to install other packages or make web requests." href="/genaiscript/reference/scripts/system#systempython_code_interpreter" />
+<LinkCard title="python_code_interpreter_run" description="Executes python 3.12 code for Data Analysis tasks in a docker container. The process output is returned. Do not generate visualizations. The only packages available are numpy, pandas, scipy. There is NO network connectivity. Do not attempt to install other packages or make web requests." href="/genaiscript/reference/scripts/system#systempython_code_interpreter" />
+<LinkCard title="python_code_interpreter_copy_files" description="Copy files from the host file system to the container file system" href="/genaiscript/reference/scripts/system#systempython_code_interpreter" />
 <LinkCard title="retrieval_fuzz_search" description="Search for keywords using the full text of files and a fuzzy distance." href="/genaiscript/reference/scripts/system#systemretrieval_fuzz_search" />
 <LinkCard title="retrieval_vector_search" description="Search files using embeddings and similarity distance." href="/genaiscript/reference/scripts/system#systemretrieval_vector_search" />
 <LinkCard title="retrieval_web_search" description="Search the web for a user query using Bing Search." href="/genaiscript/reference/scripts/system#systemretrieval_web_search" />

diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx
@@ -280,6 +280,65 @@ defTool(
 `````
 
 
+### `system.agent_interpreter`
+
+Agent that can run code interpreters for Python, Math.
+
+
+
+-  tool `agent_interpreter`: Run code interpreters for Python, Math. Use this agent to ground computation questions.
+
+`````js wrap title="system.agent_interpreter"
+system({
+    title: "Agent that can run code interpreters for Python, Math.",
+})
+
+const model = env.vars.agentInterpreterModel
+defTool(
+    "agent_interpreter",
+    "Run code interpreters for Python, Math. Use this agent to ground computation questions.",
+    {
+        query: {
+            type: "string",
+            description: "Query to answer",
+        },
+        required: ["query"],
+    },
+    async (args) => {
+        const { context, query } = args
+        context.log(`agent interpreter: ${query}`)
+        const res = await runPrompt(
+            (_) => {
+                _.def("QUERY", query)
+                _.$`You are an agent that can run code interpreters for Python, Math. 
+                
+                Analyze and answer QUERY. Use the best tool to ground computation questions.
+                
+                - Assume that your answer will be analyzed by an AI, not a human.
+                - Prefer math_eval for math expressions as it is much more efficient.
+                - To use file data in python, prefer copying data files using python_code_interpreter_copy_files rather than inline data in code.
+                - If you cannot answer the query, return an empty string.
+                `
+            },
+            {
+                model,
+                system: [
+                    "system",
+                    "system.tools",
+                    "system.explanations",
+                    "system.math",
+                    "system.python_code_interpreter",
+                ],
+                label: "agent interpreter",
+            }
+        )
+        return res
+    }
+)
+
+`````
+
+
 ### `system.annotations`
 
 Emits annotations compatible with GitHub Actions
@@ -1277,7 +1336,8 @@ Python Dockerized code execution for data analysis
 
 
 
--  tool `python_code_interpreter`: Executes python 3.12 code for Data Analysis tasks in a docker container. The process output is returned. Do not generate visualizations. The only packages available are numpy, pandas, scipy. There is NO network connectivity. Do not attempt to install other packages or make web requests.
+-  tool `python_code_interpreter_run`: Executes python 3.12 code for Data Analysis tasks in a docker container. The process output is returned. Do not generate visualizations. The only packages available are numpy, pandas, scipy. There is NO network connectivity. Do not attempt to install other packages or make web requests.
+-  tool `python_code_interpreter_copy_files`: Copy files from the host file system to the container file system
 
 `````js wrap title="system.python_code_interpreter"
 system({
@@ -1288,46 +1348,75 @@ const image = env.vars.pythonImage ?? "python:3.12"
 const packages = ["numpy", "pandas", "scipy"]
 
 const queue = host.promiseQueue(1)
-let container = null
+
+/** @type {ContainerHost} */
+let _container = null
+
+/** @type {Promise<ContainerHost>} */
+const getContainer = queue.add(async () => {
+    if (!_container) {
+        console.log(`python: preparing container...`)
+        _container = await host.container({
+            image,
+            networkEnabled: true,
+        })
+        const res = await _container.exec("pip", [
+            "install",
+            "--root-user-action",
+            "ignore",
+            ...packages,
+        ])
+        if (res.failed) throw new Error(`Failed to install requirements`)
+        await _container.disconnect()
+    }
+    return _container
+})
 
 defTool(
-    "python_code_interpreter",
+    "python_code_interpreter_run",
     "Executes python 3.12 code for Data Analysis tasks in a docker container. The process output is returned. Do not generate visualizations. The only packages available are numpy, pandas, scipy. There is NO network connectivity. Do not attempt to install other packages or make web requests.",
     {
-        type: "object",
-        properties: {
-            main: {
-                type: "string",
-                description: "python 3.12 source code to execute",
-            },
+        main: {
+            type: "string",
+            description: "python 3.12 source code to execute",
         },
         required: ["main"],
     },
-    async (args) =>
-        // serialize work
-        await queue.add(async () => {
-            const { main = "" } = args
-            console.log(`python code interpreter: ` + main)
-            if (!container) {
-                console.log(`python: preparing container...`)
-                container = await host.container({
-                    image,
-                    networkEnabled: true,
-                })
-                const res = await container.exec("pip", [
-                    "install",
-                    "--root-user-action",
-                    "ignore",
-                    ...packages,
-                ])
-                if (res.failed)
-                    throw new Error(`Failed to install requirements`)
-                await container.disconnect()
-            }
+    async (args) => {
+        const { main = "" } = args
+        console.log(`python code interpreter: run`)
+        const container = await getContainer
+        return await queue.add(async () => {
             await container.writeText("main.py", main)
             const res = await container.exec("python", ["main.py"])
             return res
         })
+    }
+)
+
+defTool(
+    "python_code_interpreter_copy_files",
+    "Copy files from the host file system to the container file system",
+    {
+        from: {
+            type: "string",
+            description: "Host file path",
+        },
+        to: {
+            type: "string",
+            description: "Container file path",
+        },
+        required: ["from"],
+    },
+    async (args) => {
+        const { from, to = "" } = args
+        console.log(`python code interpreter: cp ${from} ${to}`)
+        const container = await getContainer
+        return await queue.add(async () => {
+            await container.copyTo(from, to)
+            return "OK"
+        })
+    }
 )
 
 `````

diff --git a/genaisrc/genaiscript.d.ts b/genaisrc/genaiscript.d.ts
diff --git a/packages/auto/genaiscript.d.ts b/packages/auto/genaiscript.d.ts
diff --git a/packages/cli/src/docker.ts b/packages/cli/src/docker.ts
@@ -304,7 +304,9 @@ export class DockerManager {
                 const files = await host.findFiles(from)
                 for (const file of files) {
                     const source = host.path.resolve(file)
-                    const target = host.path.resolve(hostPath, to, file)
+                    const target = to
+                        ? host.path.resolve(hostPath, to, file)
+                        : host.path.resolve(hostPath, file)
                     await ensureDir(host.path.dirname(target))
                     await copyFile(source, target)
                 }

diff --git a/packages/core/src/genaisrc/genaiscript.d.ts b/packages/core/src/genaisrc/genaiscript.d.ts
diff --git a/packages/core/src/genaisrc/system.agent_interpreter.genai.mjs b/packages/core/src/genaisrc/system.agent_interpreter.genai.mjs
@@ -0,0 +1,46 @@
+system({
+    title: "Agent that can run code interpreters for Python, Math.",
+})
+
+const model = env.vars.agentInterpreterModel
+defTool(
+    "agent_interpreter",
+    "Run code interpreters for Python, Math. Use this agent to ground computation questions.",
+    {
+        query: {
+            type: "string",
+            description: "Query to answer",
+        },
+        required: ["query"],
+    },
+    async (args) => {
+        const { context, query } = args
+        context.log(`agent interpreter: ${query}`)
+        const res = await runPrompt(
+            (_) => {
+                _.def("QUERY", query)
+                _.$`You are an agent that can run code interpreters for Python, Math. 
+                
+                Analyze and answer QUERY. Use the best tool to ground computation questions.
+                
+                - Assume that your answer will be analyzed by an AI, not a human.
+                - Prefer math_eval for math expressions as it is much more efficient.
+                - To use file data in python, prefer copying data files using python_code_interpreter_copy_files rather than inline data in code.
+                - If you cannot answer the query, return an empty string.
+                `
+            },
+            {
+                model,
+                system: [
+                    "system",
+                    "system.tools",
+                    "system.explanations",
+                    "system.math",
+                    "system.python_code_interpreter",
+                ],
+                label: "agent interpreter",
+            }
+        )
+        return res
+    }
+)