From 55813f2b3b9e638527816adc80411344fca7eca5 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 4 Dec 2024 22:56:46 -0800 Subject: [PATCH] Ollama tool calling (#911) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add kv flahs * use ollama:start script * log default model names * refactor: ♻️ use toolTrace in runToolCall function * add type annotation * 🔧 Fix math tool documentation and LLM invocation * save a few calls * feat: add tracing to tool call options 🛠️ * updated git log parsing --- .github/workflows/build-genai-commit.yml_ | 2 +- .github/workflows/build-genai.yml | 2 +- .github/workflows/genai-commander.yml | 3 +-- .github/workflows/genai-issue-review.yml | 2 +- .github/workflows/genai-pr-commit-review.yml | 2 +- .../workflows/genai-pr-docs-commit-review.yml | 2 +- .github/workflows/genai-pr-review.yml | 2 +- .github/workflows/ollama.yml | 2 +- .github/workflows/playwright.yml | 2 +- .vscode/extensions.json | 1 - docs/src/components/BuiltinTools.mdx | 2 +- .../content/docs/reference/scripts/system.mdx | 9 ++++--- package.json | 2 +- packages/cli/src/run.ts | 3 +++ packages/core/src/agent.ts | 20 ++++++++++---- packages/core/src/chat.ts | 7 ++--- .../core/src/genaisrc/system.git.genai.mjs | 5 +++- .../core/src/genaisrc/system.math.genai.js | 2 +- packages/core/src/git.ts | 2 +- packages/core/src/openai.ts | 6 +++-- packages/core/src/runpromptcontext.ts | 6 +++-- packages/sample/genaisrc/chunk.genai.mjs | 16 +++++++++--- .../sample/genaisrc/llm-as-expert.genai.mts | 26 +++++++++++-------- .../genaisrc/math-agent-system.genai.js | 8 +++--- 24 files changed, 85 insertions(+), 49 deletions(-) diff --git a/.github/workflows/build-genai-commit.yml_ b/.github/workflows/build-genai-commit.yml_ index 6e56b6934e..078201f5bf 100644 --- a/.github/workflows/build-genai-commit.yml_ +++ b/.github/workflows/build-genai-commit.yml_ @@ -31,7 +31,7 @@ jobs: - name: compile run: yarn compile - name: download ollama docker - run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama + run: yarn ollama:start - name: generate dummy result working-directory: packages/sample run: mkdir -p temp && touch temp/commit-tests.txt diff --git a/.github/workflows/build-genai.yml b/.github/workflows/build-genai.yml index 7b0b85a25a..246903f6b8 100644 --- a/.github/workflows/build-genai.yml +++ b/.github/workflows/build-genai.yml @@ -25,6 +25,6 @@ jobs: - name: compile run: yarn compile - name: download ollama docker - run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama + run: yarn ollama:start - name: run test within scripts run: yarn test:scripts --out-summary $GITHUB_STEP_SUMMARY --test-delay 10 diff --git a/.github/workflows/genai-commander.yml b/.github/workflows/genai-commander.yml index ef10ff2562..db61091ab9 100644 --- a/.github/workflows/genai-commander.yml +++ b/.github/workflows/genai-commander.yml @@ -71,8 +71,7 @@ jobs: # Start Ollama in a docker container # - name: start ollama - run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama - # + run: yarn ollama:start # Execute the /genai found in the comment # - name: genaiscript pr-describe diff --git a/.github/workflows/genai-issue-review.yml b/.github/workflows/genai-issue-review.yml index b6ee015d7e..a506c8db22 100644 --- a/.github/workflows/genai-issue-review.yml +++ b/.github/workflows/genai-issue-review.yml @@ -30,7 +30,7 @@ jobs: - name: compile run: yarn compile - name: start ollama - run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama + run: yarn ollama:start - name: genaiscript issue-review run: node packages/cli/built/genaiscript.cjs run issue-reviewer -prc --out-trace $GITHUB_STEP_SUMMARY env: diff --git a/.github/workflows/genai-pr-commit-review.yml b/.github/workflows/genai-pr-commit-review.yml index daaf097471..676a50fd1d 100644 --- a/.github/workflows/genai-pr-commit-review.yml +++ b/.github/workflows/genai-pr-commit-review.yml @@ -33,7 +33,7 @@ jobs: - name: compile run: yarn compile - name: start ollama - run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama + run: yarn ollama:start - name: git stuff run: git fetch origin && git pull origin main:main - name: genaiscript pr-review-commit diff --git a/.github/workflows/genai-pr-docs-commit-review.yml b/.github/workflows/genai-pr-docs-commit-review.yml index 7269fbca66..e4b56b6d9c 100644 --- a/.github/workflows/genai-pr-docs-commit-review.yml +++ b/.github/workflows/genai-pr-docs-commit-review.yml @@ -28,7 +28,7 @@ jobs: - name: compile run: yarn compile - name: start ollama - run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama + run: yarn ollama:start - name: git stuff run: git fetch origin && git pull origin main:main - name: genaiscript pr-review-commit diff --git a/.github/workflows/genai-pr-review.yml b/.github/workflows/genai-pr-review.yml index 0d7dad11ee..eaf7a1fd5f 100644 --- a/.github/workflows/genai-pr-review.yml +++ b/.github/workflows/genai-pr-review.yml @@ -35,7 +35,7 @@ jobs: - name: compile run: yarn compile - name: start ollama - run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama + run: yarn ollama:start - name: git stuff run: git fetch origin && git pull origin main:main - name: genaiscript pr-describe diff --git a/.github/workflows/ollama.yml b/.github/workflows/ollama.yml index 8a68f427be..39f50fe27e 100644 --- a/.github/workflows/ollama.yml +++ b/.github/workflows/ollama.yml @@ -29,7 +29,7 @@ jobs: - name: compile run: yarn compile - name: start ollama - run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama + run: yarn ollama:start - name: run summarize-ollama-phi3 run: yarn test:summarize --model ollama:phi3.5 --out ./temp/summarize-ollama-phi3 env: diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml index e605b4a266..240a106f12 100644 --- a/.github/workflows/playwright.yml +++ b/.github/workflows/playwright.yml @@ -36,6 +36,6 @@ jobs: - name: compile run: yarn compile - name: download ollama docker - run: docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama + run: yarn ollama:start - name: run browse-text run: yarn run:script browse-text --out ./temp/browse-text --model ollama:phi3.5 diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 137035d640..d32ab14003 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -11,7 +11,6 @@ "github.vscode-pull-request-github", "ms-toolsai.prompty", "unifiedjs.vscode-mdx", - "johnpapa.vscode-peacock", "usernamehw.errorlens", "goessner.mdmath" ] diff --git a/docs/src/components/BuiltinTools.mdx b/docs/src/components/BuiltinTools.mdx index e1eb2590c1..7edf0b1683 100644 --- a/docs/src/components/BuiltinTools.mdx +++ b/docs/src/components/BuiltinTools.mdx @@ -30,7 +30,7 @@ import { LinkCard } from '@astrojs/starlight/components'; - + diff --git a/docs/src/content/docs/reference/scripts/system.mdx b/docs/src/content/docs/reference/scripts/system.mdx index a397d9db5f..62946f2775 100644 --- a/docs/src/content/docs/reference/scripts/system.mdx +++ b/docs/src/content/docs/reference/scripts/system.mdx @@ -1193,6 +1193,7 @@ defTool( }, async (args) => { const { + context, base, head, paths, @@ -1212,9 +1213,11 @@ defTool( excludedPaths, count, }) - return commits + const res = commits .map(({ sha, date, message }) => `${sha} ${date} ${message}`) .join("\n") + context.debug(res) + return res } ) @@ -1916,7 +1919,7 @@ Math expression evaluator Register a function that evaluates math expressions -- tool `math_eval`: Evaluates a math expression +- tool `math_eval`: Evaluates a math expression. Do NOT try to compute arithmetic operations yourself, use this tool. `````js wrap title="system.math" system({ @@ -1926,7 +1929,7 @@ system({ defTool( "math_eval", - "Evaluates a math expression", + "Evaluates a math expression. Do NOT try to compute arithmetic operations yourself, use this tool.", { type: "object", properties: { diff --git a/package.json b/package.json index 8fa0aa3f0d..dd7ba486db 100644 --- a/package.json +++ b/package.json @@ -75,7 +75,7 @@ "upgrade:deps": "zx scripts/upgrade-deps.mjs", "cli": "node packages/cli/built/genaiscript.cjs", "ollama": "yarn ollama:stop && yarn ollama:start", - "ollama:start": "docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama", + "ollama:start": "docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama -e OLLAMA_FLASH_ATTENTION=1 -e OLLAMA_KV_CACHE_TYPE=q8_0 ollama/ollama", "ollama:stop": "docker stop ollama && docker rm ollama" }, "release-it": { diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index a91333d857..ca59392edb 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -220,6 +220,9 @@ export async function runScript( } logInfo(`genaiscript: ${scriptId}`) + logVerbose(` large : ${host.defaultModelOptions.model}`) + logVerbose(` small : ${host.defaultModelOptions.smallModel}`) + logVerbose(` vision: ${host.defaultModelOptions.visionModel}`) if (out) { if (removeOut) await emptyDir(out) diff --git a/packages/core/src/agent.ts b/packages/core/src/agent.ts index f7a7b61879..52dcb1db2b 100644 --- a/packages/core/src/agent.ts +++ b/packages/core/src/agent.ts @@ -12,6 +12,9 @@ export async function agentQueryMemory( ) { if (!query) return undefined + const memories = await loadMemories() + if (!memories?.length) return undefined + let memoryAnswer: string | undefined // always pre-query memory with cheap model const res = await ctx.runPrompt( @@ -20,7 +23,9 @@ export async function agentQueryMemory( - Use MEMORY as the only source of information. - If you cannot find relevant information to answer QUERY, return ${TOKEN_NO_ANSWER}. DO NOT INVENT INFORMATION. - Be concise. Keep it short. The output is used by another LLM. - - Provide important details like identifiers and names.` + - Provide important details like identifiers and names.`.role( + "system" + ) _.def("QUERY", query) await defMemory(_) }, @@ -64,7 +69,7 @@ export async function agentAddMemory( ) } -export async function traceAgentMemory(trace: MarkdownTrace) { +async function loadMemories() { const cache = MemoryCache.byName< { agent: string; query: string }, { @@ -73,8 +78,13 @@ export async function traceAgentMemory(trace: MarkdownTrace) { answer: string } >(AGENT_MEMORY_CACHE_NAME, { lookupOnly: true }) - if (cache) { - const memories = await cache.values() + const memories = await cache?.values() + return memories +} + +export async function traceAgentMemory(trace: MarkdownTrace) { + const memories = await loadMemories() + if (memories) { try { trace.startDetails("🧠 agent memory") memories @@ -92,7 +102,7 @@ export async function traceAgentMemory(trace: MarkdownTrace) { } } -export async function defMemory(ctx: ChatTurnGenerationContext) { +async function defMemory(ctx: ChatTurnGenerationContext) { const cache = MemoryCache.byName< { agent: string; query: string }, { diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts index 06953d10ae..c842278d4e 100644 --- a/packages/core/src/chat.ts +++ b/packages/core/src/chat.ts @@ -54,6 +54,7 @@ import { ChatCompletionSystemMessageParam, ChatCompletionTool, ChatCompletionToolCall, + ChatCompletionToolMessageParam, ChatCompletionUserMessageParam, CreateChatCompletionRequest, } from "./chattypes" @@ -168,14 +169,14 @@ async function runToolCalls( const toolTrace = trace.startTraceDetails(`📠 tool call ${call.name}`) try { await runToolCall( - trace, + toolTrace, call, tools, edits, projFolder, encoder, messages, - options + { ...options, trace: toolTrace } ) } catch (e) { logError(e) @@ -352,7 +353,7 @@ ${toolResult.join("\n\n")} role: "tool", content: toolResult.join("\n\n"), tool_call_id: call.id, - }) + } satisfies ChatCompletionToolMessageParam) } async function applyRepairs( diff --git a/packages/core/src/genaisrc/system.git.genai.mjs b/packages/core/src/genaisrc/system.git.genai.mjs index 4484543f8b..b753cd36e4 100644 --- a/packages/core/src/genaisrc/system.git.genai.mjs +++ b/packages/core/src/genaisrc/system.git.genai.mjs @@ -129,6 +129,7 @@ defTool( }, async (args) => { const { + context, base, head, paths, @@ -148,9 +149,11 @@ defTool( excludedPaths, count, }) - return commits + const res = commits .map(({ sha, date, message }) => `${sha} ${date} ${message}`) .join("\n") + context.debug(res) + return res } ) diff --git a/packages/core/src/genaisrc/system.math.genai.js b/packages/core/src/genaisrc/system.math.genai.js index 1ce90785ca..0606bbaf03 100644 --- a/packages/core/src/genaisrc/system.math.genai.js +++ b/packages/core/src/genaisrc/system.math.genai.js @@ -5,7 +5,7 @@ system({ defTool( "math_eval", - "Evaluates a math expression", + "Evaluates a math expression. Do NOT try to compute arithmetic operations yourself, use this tool.", { type: "object", properties: { diff --git a/packages/core/src/git.ts b/packages/core/src/git.ts index 95dbd371c6..389bf35b31 100644 --- a/packages/core/src/git.ts +++ b/packages/core/src/git.ts @@ -242,7 +242,7 @@ export class GitClient implements Git { .split("\n") .map( (line) => - /^(?[a-z0-9]{9,40})\s+(?\d{4,4}-\d{2,2}-\d{2,2})\s+(?.*)$/.exec( + /^(?[a-z0-9]{6,40})\s+(?\d{4,4}-\d{2,2}-\d{2,2})\s+(?.*)$/.exec( line )?.groups ) diff --git a/packages/core/src/openai.ts b/packages/core/src/openai.ts index 157d8cfe89..3e1019247c 100644 --- a/packages/core/src/openai.ts +++ b/packages/core/src/openai.ts @@ -284,7 +284,7 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( if ((choice as ChatCompletionChunkChoice).delta) { const { delta, logprobs } = choice as ChatCompletionChunkChoice if (logprobs?.content) lbs.push(...logprobs.content) - if (typeof delta?.content === "string") { + if (typeof delta?.content === "string" && delta.content !== "") { numTokens += estimateTokens(delta.content, encoder) chatResp += delta.content tokens.push( @@ -293,7 +293,8 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( ) ) trace.appendToken(delta.content) - } else if (Array.isArray(delta.tool_calls)) { + } + if (Array.isArray(delta?.tool_calls)) { const { tool_calls } = delta for (const call of tool_calls) { const tc = @@ -385,6 +386,7 @@ export const OpenAIChatCompletion: ChatCompletionHandler = async ( } if (cancellationToken?.isCancellationRequested) finishReason = "cancel" + else if (toolCalls?.length) finishReason = "tool_calls" finishReason = finishReason || "stop" // some provider do not implement this final mesage } catch (e) { finishReason = "fail" diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts index a2e55a2154..23e09d6902 100644 --- a/packages/core/src/runpromptcontext.ts +++ b/packages/core/src/runpromptcontext.ts @@ -449,9 +449,11 @@ export function createChatGenerationContext( - If you are missing information, reply "${TOKEN_MISSING_INFO}: ". - If you cannot answer the query, return "${TOKEN_NO_ANSWER}: ". - Be concise. Minimize output to the most relevant information to save context tokens. - ` + `.role("system") if (memoryAnswer) - _.$`- The QUERY applied to the agent memory is in MEMORY.` + _.$`- The QUERY applied to the agent memory is in MEMORY.`.role( + "system" + ) _.def("QUERY", query) if (Object.keys(argsNoQuery).length) _.defData("QUERY_CONTEXT", argsNoQuery, { diff --git a/packages/sample/genaisrc/chunk.genai.mjs b/packages/sample/genaisrc/chunk.genai.mjs index f9ed57c27b..a18e24bb9e 100644 --- a/packages/sample/genaisrc/chunk.genai.mjs +++ b/packages/sample/genaisrc/chunk.genai.mjs @@ -13,11 +13,19 @@ let summary = "" for (const chunk of chunks) { const { text } = await runPrompt( (ctx) => { - ctx.def("CHUNK", chunk) - ctx.def("SUMMARY_SO_FAR", summary, { ignoreEmpty: true }) - ctx.$`Summarize CHUNK. Use SUMMARY_SO_FAR as a starting point (but do not repeat it).` + ctx.$`Summarize the content in CHUNK. Use the content in SUMMARY_SO_FAR as a starting point (but do not repeat it). Answer in plain text.`.role( + "system" + ) + ctx.def("CHUNK", chunk, { lineNumbers: false }) + ctx.def("SUMMARY_SO_FAR", summary, { + ignoreEmpty: true, + lineNumbers: false, + }) }, - { model: "small", system: ["system"] } + { + model: "small", + label: chunk.content.slice(0, 42) + "...", + } ) summary = text } diff --git a/packages/sample/genaisrc/llm-as-expert.genai.mts b/packages/sample/genaisrc/llm-as-expert.genai.mts index 083b39eed8..aac24e1695 100644 --- a/packages/sample/genaisrc/llm-as-expert.genai.mts +++ b/packages/sample/genaisrc/llm-as-expert.genai.mts @@ -1,5 +1,6 @@ script({ - model: "large", + model: "small", + system: ["system", "system.assistant", "system.tools"], tests: { keywords: [ "Permanent Waves", @@ -35,42 +36,45 @@ const rushAlbums = [ defData("RUSH_ALBUMS", rushAlbums) defTool( - "llm-gpt35", - "Invokes gpt-3.5-turbo to execute a LLM request", + "llm-small", + "Invokes small LLM (like gpt-4o-mini) to execute a LLM request", { prompt: { type: "string", description: "the prompt to be executed by the LLM", + required: true, }, }, async ({ prompt }) => { const res = await env.generator.runPrompt(prompt, { model: "small", - label: "llm-gpt35", + label: "llm-small", }) return res.text } ) defTool( - "llm-4o", - "Invokes gpt-4o to execute a LLM request", + "llm-large", + "Invokes large LLM (like gpt-4o) to execute a LLM request", { prompt: { type: "string", description: "the prompt to be executed by the LLM", + required: true, }, }, async ({ prompt }) => { const res = await env.generator.runPrompt(prompt, { - model: "openai:gpt-4o", - label: "llm-4o", + model: "large", + label: "llm-small", }) return res.text } ) -$` +$`You are a small LLM model. + Filter the list to only include rush albums released in the 1980's. Sort the result from the previous task chronologically from oldest to newest. @@ -80,6 +84,6 @@ Validate results. Report as YAML list. Let's solve this step by step. -Use gpt-3.5 for filter and sort options. -Use gpt-4o for validation. +Use a small LLM for filter and sort options. +Use a large LLM for validation. ` diff --git a/packages/sample/genaisrc/math-agent-system.genai.js b/packages/sample/genaisrc/math-agent-system.genai.js index 1f7fbb6d03..003f696c70 100644 --- a/packages/sample/genaisrc/math-agent-system.genai.js +++ b/packages/sample/genaisrc/math-agent-system.genai.js @@ -2,11 +2,11 @@ script({ title: "math-agent-system", model: "small", description: "A port of https://ts.llamaindex.ai/examples/agent", - system: ["system.math"], + system: ["system", "system.math", "system.tools"], parameters: { "question": { type: "string", - default: "How much is 11 + 4? then divide by 3?" + default: "How much is (11 + 4 / 9.11)? then divide by 3.13?" } }, tests: { @@ -15,8 +15,10 @@ script({ } }) -$`Answer the following arithmetic question: +$`Respond this math question: ${env.vars.question} +- do not generate python code +- print the final result in text format `