Ollama tool calling (#911)

* add kv flahs * use ollama:start script * log default model names * refactor: ♻️ use toolTrace in runToolCall function * add type annotation * 🔧 Fix math tool documentation and LLM invocation * save a few calls * feat: add tracing to tool call options 🛠️ * updated git log parsing
microsoft · Dec 5, 2024 · 55813f2 · 55813f2
1 parent 12f99be
commit 55813f2
Show file tree

Hide file tree

Showing 24 changed files with 85 additions and 49 deletions.
diff --git a/.github/workflows/build-genai-commit.yml_ b/.github/workflows/build-genai-commit.yml_
@@ -31,7 +31,7 @@ jobs:
             - name: compile
               run: yarn compile
             - name: download ollama docker
-              run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+              run: yarn ollama:start
             - name: generate dummy result
               working-directory: packages/sample
               run: mkdir -p temp && touch temp/commit-tests.txt

diff --git a/.github/workflows/build-genai.yml b/.github/workflows/build-genai.yml
@@ -25,6 +25,6 @@ jobs:
             - name: compile
               run: yarn compile
             - name: download ollama docker
-              run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+              run: yarn ollama:start
             - name: run test within scripts
               run: yarn test:scripts --out-summary $GITHUB_STEP_SUMMARY --test-delay 10
diff --git a/.github/workflows/genai-commander.yml b/.github/workflows/genai-commander.yml
@@ -71,8 +71,7 @@ jobs:
             # Start Ollama in a docker container
             #
             - name: start ollama
-              run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
-            #
+              run: yarn ollama:start
             # Execute the /genai <command> found in the comment
             #
             - name: genaiscript pr-describe

diff --git a/.github/workflows/genai-issue-review.yml b/.github/workflows/genai-issue-review.yml
@@ -30,7 +30,7 @@ jobs:
             - name: compile
               run: yarn compile
             - name: start ollama
-              run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+              run: yarn ollama:start
             - name: genaiscript issue-review
               run: node packages/cli/built/genaiscript.cjs run issue-reviewer -prc --out-trace $GITHUB_STEP_SUMMARY
               env:

diff --git a/.github/workflows/genai-pr-commit-review.yml b/.github/workflows/genai-pr-commit-review.yml
@@ -33,7 +33,7 @@ jobs:
             - name: compile
               run: yarn compile
             - name: start ollama
-              run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+              run: yarn ollama:start
             - name: git stuff
               run: git fetch origin && git pull origin main:main
             - name: genaiscript pr-review-commit

diff --git a/.github/workflows/genai-pr-docs-commit-review.yml b/.github/workflows/genai-pr-docs-commit-review.yml
@@ -28,7 +28,7 @@ jobs:
             - name: compile
               run: yarn compile
             - name: start ollama
-              run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+              run: yarn ollama:start
             - name: git stuff
               run: git fetch origin && git pull origin main:main
             - name: genaiscript pr-review-commit

diff --git a/.github/workflows/genai-pr-review.yml b/.github/workflows/genai-pr-review.yml
@@ -35,7 +35,7 @@ jobs:
             - name: compile
               run: yarn compile
             - name: start ollama
-              run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+              run: yarn ollama:start
             - name: git stuff
               run: git fetch origin && git pull origin main:main
             - name: genaiscript pr-describe

diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml
@@ -29,7 +29,7 @@ jobs:
             - name: compile
               run: yarn compile
             - name: start ollama
-              run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+              run: yarn ollama:start
             - name: run summarize-ollama-phi3
               run: yarn test:summarize --model ollama:phi3.5 --out ./temp/summarize-ollama-phi3
               env:

diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml
@@ -36,6 +36,6 @@ jobs:
             - name: compile
               run: yarn compile
             - name: download ollama docker
-              run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
+              run: yarn ollama:start
             - name: run browse-text
               run: yarn run:script browse-text --out ./temp/browse-text --model ollama:phi3.5
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
@@ -11,7 +11,6 @@
         "github.vscode-pull-request-github",
         "ms-toolsai.prompty",
         "unifiedjs.vscode-mdx",
-        "johnpapa.vscode-peacock",
         "usernamehw.errorlens",
         "goessner.mdmath"
     ]

diff --git a/docs/src/components/BuiltinTools.mdx b/docs/src/components/BuiltinTools.mdx
@@ -30,7 +30,7 @@ import { LinkCard } from '@astrojs/starlight/components';
 <LinkCard title="github_pulls_list" description="List all pull requests in a repository." href="/genaiscript/reference/scripts/system#systemgithub_pulls" />
 <LinkCard title="github_pulls_get" description="Get a single pull request by number." href="/genaiscript/reference/scripts/system#systemgithub_pulls" />
 <LinkCard title="github_pulls_review_comments_list" description="Get review comments for a pull request." href="/genaiscript/reference/scripts/system#systemgithub_pulls" />
-<LinkCard title="math_eval" description="Evaluates a math expression" href="/genaiscript/reference/scripts/system#systemmath" />
+<LinkCard title="math_eval" description="Evaluates a math expression. Do NOT try to compute arithmetic operations yourself, use this tool." href="/genaiscript/reference/scripts/system#systemmath" />
 <LinkCard title="md_find_files" description="Get the file structure of the documentation markdown/MDX files. Retursn filename, title, description for each match. Use pattern to specify a regular expression to search for in the file content." href="/genaiscript/reference/scripts/system#systemmd_find_files" />
 <LinkCard title="md_read_frontmatter" description="Reads the frontmatter of a markdown or MDX file." href="/genaiscript/reference/scripts/system#systemmd_frontmatter" />
 <LinkCard title="meta_prompt" description="Tool that applies OpenAI's meta prompt guidelines to a user prompt. Modified from https://platform.openai.com/docs/guides/prompt-generation?context=text-out." href="/genaiscript/reference/scripts/system#systemmeta_prompt" />

diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx
@@ -1193,6 +1193,7 @@ defTool(
     },
     async (args) => {
         const {
+            context,
             base,
             head,
             paths,
@@ -1212,9 +1213,11 @@ defTool(
             excludedPaths,
             count,
         })
-        return commits
+        const res = commits
             .map(({ sha, date, message }) => `${sha} ${date} ${message}`)
             .join("\n")
+        context.debug(res)
+        return res
     }
 )
 
@@ -1916,7 +1919,7 @@ Math expression evaluator
 
 Register a function that evaluates math expressions
 
--  tool `math_eval`: Evaluates a math expression
+-  tool `math_eval`: Evaluates a math expression. Do NOT try to compute arithmetic operations yourself, use this tool.
 
 `````js wrap title="system.math"
 system({
@@ -1926,7 +1929,7 @@ system({
 
 defTool(
     "math_eval",
-    "Evaluates a math expression",
+    "Evaluates a math expression. Do NOT try to compute arithmetic operations yourself, use this tool.",
     {
         type: "object",
         properties: {

diff --git a/package.json b/package.json
@@ -75,7 +75,7 @@
         "upgrade:deps": "zx scripts/upgrade-deps.mjs",
         "cli": "node packages/cli/built/genaiscript.cjs",
         "ollama": "yarn ollama:stop && yarn ollama:start",
-        "ollama:start": "docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama",
+        "ollama:start": "docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama -e OLLAMA_FLASH_ATTENTION=1 -e OLLAMA_KV_CACHE_TYPE=q8_0 ollama/ollama",
         "ollama:stop": "docker stop ollama && docker rm ollama"
     },
     "release-it": {

diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts
@@ -220,6 +220,9 @@ export async function runScript(
     }
 
     logInfo(`genaiscript: ${scriptId}`)
+    logVerbose(` large : ${host.defaultModelOptions.model}`)
+    logVerbose(` small : ${host.defaultModelOptions.smallModel}`)
+    logVerbose(` vision: ${host.defaultModelOptions.visionModel}`)
 
     if (out) {
         if (removeOut) await emptyDir(out)

diff --git a/packages/core/src/agent.ts b/packages/core/src/agent.ts
@@ -12,6 +12,9 @@ export async function agentQueryMemory(
 ) {
     if (!query) return undefined
 
+    const memories = await loadMemories()
+    if (!memories?.length) return undefined
+
     let memoryAnswer: string | undefined
     // always pre-query memory with cheap model
     const res = await ctx.runPrompt(
@@ -20,7 +23,9 @@ export async function agentQueryMemory(
             - Use MEMORY as the only source of information.
             - If you cannot find relevant information to answer QUERY, return ${TOKEN_NO_ANSWER}. DO NOT INVENT INFORMATION.
             - Be concise. Keep it short. The output is used by another LLM.
-            - Provide important details like identifiers and names.`
+            - Provide important details like identifiers and names.`.role(
+                "system"
+            )
             _.def("QUERY", query)
             await defMemory(_)
         },
@@ -64,7 +69,7 @@ export async function agentAddMemory(
     )
 }
 
-export async function traceAgentMemory(trace: MarkdownTrace) {
+async function loadMemories() {
     const cache = MemoryCache.byName<
         { agent: string; query: string },
         {
@@ -73,8 +78,13 @@ export async function traceAgentMemory(trace: MarkdownTrace) {
             answer: string
         }
     >(AGENT_MEMORY_CACHE_NAME, { lookupOnly: true })
-    if (cache) {
-        const memories = await cache.values()
+    const memories = await cache?.values()
+    return memories
+}
+
+export async function traceAgentMemory(trace: MarkdownTrace) {
+    const memories = await loadMemories()
+    if (memories) {
         try {
             trace.startDetails("🧠 agent memory")
             memories
@@ -92,7 +102,7 @@ export async function traceAgentMemory(trace: MarkdownTrace) {
     }
 }
 
-export async function defMemory(ctx: ChatTurnGenerationContext) {
+async function defMemory(ctx: ChatTurnGenerationContext) {
     const cache = MemoryCache.byName<
         { agent: string; query: string },
         {

diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts
@@ -54,6 +54,7 @@ import {
     ChatCompletionSystemMessageParam,
     ChatCompletionTool,
     ChatCompletionToolCall,
+    ChatCompletionToolMessageParam,
     ChatCompletionUserMessageParam,
     CreateChatCompletionRequest,
 } from "./chattypes"
@@ -168,14 +169,14 @@ async function runToolCalls(
         const toolTrace = trace.startTraceDetails(`📠 tool call ${call.name}`)
         try {
             await runToolCall(
-                trace,
+                toolTrace,
                 call,
                 tools,
                 edits,
                 projFolder,
                 encoder,
                 messages,
-                options
+                { ...options, trace: toolTrace }
             )
         } catch (e) {
             logError(e)
@@ -352,7 +353,7 @@ ${toolResult.join("\n\n")}
             role: "tool",
             content: toolResult.join("\n\n"),
             tool_call_id: call.id,
-        })
+        } satisfies ChatCompletionToolMessageParam)
 }
 
 async function applyRepairs(

diff --git a/packages/core/src/genaisrc/system.git.genai.mjs b/packages/core/src/genaisrc/system.git.genai.mjs
@@ -129,6 +129,7 @@ defTool(
     },
     async (args) => {
         const {
+            context,
             base,
             head,
             paths,
@@ -148,9 +149,11 @@ defTool(
             excludedPaths,
             count,
         })
-        return commits
+        const res = commits
             .map(({ sha, date, message }) => `${sha} ${date} ${message}`)
             .join("\n")
+        context.debug(res)
+        return res
     }
 )
 

diff --git a/packages/core/src/genaisrc/system.math.genai.js b/packages/core/src/genaisrc/system.math.genai.js
@@ -5,7 +5,7 @@ system({
 
 defTool(
     "math_eval",
-    "Evaluates a math expression",
+    "Evaluates a math expression. Do NOT try to compute arithmetic operations yourself, use this tool.",
     {
         type: "object",
         properties: {

diff --git a/packages/core/src/git.ts b/packages/core/src/git.ts
@@ -242,7 +242,7 @@ export class GitClient implements Git {
             .split("\n")
             .map(
                 (line) =>
-                    /^(?<sha>[a-z0-9]{9,40})\s+(?<date>\d{4,4}-\d{2,2}-\d{2,2})\s+(?<message>.*)$/.exec(
+                    /^(?<sha>[a-z0-9]{6,40})\s+(?<date>\d{4,4}-\d{2,2}-\d{2,2})\s+(?<message>.*)$/.exec(
                         line
                     )?.groups
             )

diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts
@@ -284,7 +284,7 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async (
         if ((choice as ChatCompletionChunkChoice).delta) {
             const { delta, logprobs } = choice as ChatCompletionChunkChoice
             if (logprobs?.content) lbs.push(...logprobs.content)
-            if (typeof delta?.content === "string") {
+            if (typeof delta?.content === "string" && delta.content !== "") {
                 numTokens += estimateTokens(delta.content, encoder)
                 chatResp += delta.content
                 tokens.push(
@@ -293,7 +293,8 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async (
                     )
                 )
                 trace.appendToken(delta.content)
-            } else if (Array.isArray(delta.tool_calls)) {
+            }
+            if (Array.isArray(delta?.tool_calls)) {
                 const { tool_calls } = delta
                 for (const call of tool_calls) {
                     const tc =
@@ -385,6 +386,7 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async (
             }
             if (cancellationToken?.isCancellationRequested)
                 finishReason = "cancel"
+            else if (toolCalls?.length) finishReason = "tool_calls"
             finishReason = finishReason || "stop" // some provider do not implement this final mesage
         } catch (e) {
             finishReason = "fail"

diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts
@@ -449,9 +449,11 @@ export function createChatGenerationContext(
                         - If you are missing information, reply "${TOKEN_MISSING_INFO}: <what is missing>".
                         - If you cannot answer the query, return "${TOKEN_NO_ANSWER}: <reason>".
                         - Be concise. Minimize output to the most relevant information to save context tokens.
-                        `
+                        `.role("system")
                         if (memoryAnswer)
-                            _.$`- The QUERY applied to the agent memory is in MEMORY.`
+                            _.$`- The QUERY applied to the agent memory is in MEMORY.`.role(
+                                "system"
+                            )
                         _.def("QUERY", query)
                         if (Object.keys(argsNoQuery).length)
                             _.defData("QUERY_CONTEXT", argsNoQuery, {

diff --git a/packages/sample/genaisrc/chunk.genai.mjs b/packages/sample/genaisrc/chunk.genai.mjs
@@ -13,11 +13,19 @@ let summary = ""
 for (const chunk of chunks) {
     const { text } = await runPrompt(
         (ctx) => {
-            ctx.def("CHUNK", chunk)
-            ctx.def("SUMMARY_SO_FAR", summary, { ignoreEmpty: true })
-            ctx.$`Summarize CHUNK. Use SUMMARY_SO_FAR as a starting point (but do not repeat it).`
+            ctx.$`Summarize the content in CHUNK. Use the content in SUMMARY_SO_FAR as a starting point (but do not repeat it). Answer in plain text.`.role(
+                "system"
+            )
+            ctx.def("CHUNK", chunk, { lineNumbers: false })
+            ctx.def("SUMMARY_SO_FAR", summary, {
+                ignoreEmpty: true,
+                lineNumbers: false,
+            })
         },
-        { model: "small", system: ["system"] }
+        {
+            model: "small",
+            label: chunk.content.slice(0, 42) + "...",
+        }
     )
     summary = text
 }