From aa986510480ad254bf5097dde1b7b4e6540b4a32 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 21 Aug 2024 15:43:32 +0000 Subject: [PATCH 1/5] group filtering in tests --- docs/src/content/docs/reference/cli/commands.md | 4 +++- packages/cli/src/cli.ts | 6 +++++- packages/cli/src/test.ts | 3 +++ packages/core/src/server/messages.ts | 1 + packages/core/src/util.ts | 15 +++++++++++++-- .../genaisrc/describe-image-run-prompt.genai.js | 1 + packages/sample/genaisrc/describe-image.genai.js | 1 + packages/sample/package.json | 2 +- .../src/vision/describe-card-schema.genai.js | 2 +- packages/sample/src/vision/describe-card.genai.js | 2 +- .../sample/src/vision/describe-image.genai.js | 2 +- 11 files changed, 31 insertions(+), 8 deletions(-) diff --git a/docs/src/content/docs/reference/cli/commands.md b/docs/src/content/docs/reference/cli/commands.md index 7b42a9e82b..6ec0db4304 100644 --- a/docs/src/content/docs/reference/cli/commands.md +++ b/docs/src/content/docs/reference/cli/commands.md @@ -45,7 +45,7 @@ Options: -em, --embeddings-model embeddings model for the run --no-cache disable LLM result cache -cn, --cache-name custom cache file name - --cs, --csv-separator csv separator (default: "\t") + -cs, --csv-separator csv separator (default: "\t") -ae, --apply-edits apply file edits --vars variables, as name=value, stored in env.vars -h, --help display help for command @@ -89,6 +89,8 @@ Options: -v, --verbose verbose output -pv, --promptfoo-version [version] promptfoo version, default is 0.78.0 -os, --out-summary append output summary in file + --groups groups to include or exclude. Use :! + prefix to exclude -h, --help display help for command ``` diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 61f3ad700a..78be130c98 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -142,7 +142,7 @@ export async function cli() { ) .option("--no-cache", "disable LLM result cache") .option("-cn, --cache-name ", "custom cache file name") - .option("--cs, --csv-separator ", "csv separator", "\t") + .option("-cs, --csv-separator ", "csv separator", "\t") .option("-ae, --apply-edits", "apply file edits") .option( "--vars ", @@ -174,6 +174,10 @@ export async function cli() { `promptfoo version, default is ${PROMPTFOO_VERSION}` ) .option("-os, --out-summary ", "append output summary in file") + .option( + "--groups ", + "groups to include or exclude. Use :! prefix to exclude" + ) .action(scriptsTest) test.command("view") diff --git a/packages/cli/src/test.ts b/packages/cli/src/test.ts index 79b66dc707..ff1b7a08c7 100644 --- a/packages/cli/src/test.ts +++ b/packages/cli/src/test.ts @@ -27,6 +27,7 @@ import { logInfo, logVerbose, delay, + tagFilter, } from "../../core/src/util" import { YAMLStringify } from "../../core/src/yaml" import { @@ -83,6 +84,7 @@ export async function runPromptScriptTests( const scripts = prj.templates .filter((t) => arrayify(t.tests)?.length) .filter((t) => !ids?.length || ids.includes(t.id)) + .filter((t) => tagFilter(options?.groups, t.group)) if (!scripts.length) return { ok: false, @@ -217,6 +219,7 @@ export async function scriptsTest( promptfooVersion?: string outSummary?: string testDelay?: string + groups?: string[] } ) { const { status, value = [] } = await runPromptScriptTests(ids, options) diff --git a/packages/core/src/server/messages.ts b/packages/core/src/server/messages.ts index 22402bf9c4..a11b4df9a0 100644 --- a/packages/core/src/server/messages.ts +++ b/packages/core/src/server/messages.ts @@ -24,6 +24,7 @@ export interface ServerEnv extends RequestMessage { export interface PromptScriptTestRunOptions { testProvider?: string models?: string[] + groups?: string[] } export interface PromptScriptTestRun extends RequestMessage { diff --git a/packages/core/src/util.ts b/packages/core/src/util.ts index ef81bf25dd..a3088965fe 100644 --- a/packages/core/src/util.ts +++ b/packages/core/src/util.ts @@ -180,9 +180,9 @@ export function logWarn(msg: string) { export function logError(msg: string | Error | SerializedError) { const { message, ...e } = serializeError(msg) if (message) host.log(LogLevel.Error, message) - console.debug(msg) + console.debug(msg) const se = YAMLStringify(e) - if (!/^\s*\{\}\s*$/) host.log(LogLevel.Info, se) + if (!/^\s*\{\s*\}\s*$/) host.log(LogLevel.Info, se) } export function concatArrays(...arrays: T[][]): T[] { if (arrays.length == 0) return [] @@ -285,3 +285,14 @@ export function renderWithPrecision( } export const HTMLEscape = HTMLEscape_ + +export function tagFilter(tags: string[], tag: string) { + if (!tags?.length || !tag) return true + const ltag = tag.toLocaleLowerCase() + for (const t of tags) { + const lt = t.toLocaleLowerCase() + if (lt.startsWith(":!") && ltag.startsWith(lt.slice(2))) return false + else if (ltag.startsWith(t)) return true + } + return false +} diff --git a/packages/sample/genaisrc/describe-image-run-prompt.genai.js b/packages/sample/genaisrc/describe-image-run-prompt.genai.js index 099544eb61..837d60e7a3 100644 --- a/packages/sample/genaisrc/describe-image-run-prompt.genai.js +++ b/packages/sample/genaisrc/describe-image-run-prompt.genai.js @@ -1,6 +1,7 @@ script({ title: "Describe objects in each image", model: "gpt-3.5-turbo", + group: "vision", maxTokens: 4000, system: [], tests: { diff --git a/packages/sample/genaisrc/describe-image.genai.js b/packages/sample/genaisrc/describe-image.genai.js index b3be0982e9..363bac77e8 100644 --- a/packages/sample/genaisrc/describe-image.genai.js +++ b/packages/sample/genaisrc/describe-image.genai.js @@ -1,6 +1,7 @@ script({ title: "Describe objects in image", model: "gpt-4-turbo-v", + group: "vision", maxTokens: 4000, system: [], tests: { diff --git a/packages/sample/package.json b/packages/sample/package.json index 65560da649..83bc410fa6 100644 --- a/packages/sample/package.json +++ b/packages/sample/package.json @@ -11,7 +11,7 @@ "test:watch": "node --import tsx --watch --test-name-pattern=run --test src/**.test.ts", "cache:clear": "node ../cli/built/genaiscript.cjs cache clear", "run:script": "node ../cli/built/genaiscript.cjs run", - "test:scripts": "node ../cli/built/genaiscript.cjs test -rmo -tp tnrllmproxy.azurewebsites.net", + "test:scripts": "node ../cli/built/genaiscript.cjs test -rmo --groups :!vision", "test:scripts:view": "node ../cli/built/genaiscript.cjs test view" }, "devDependencies": { diff --git a/packages/sample/src/vision/describe-card-schema.genai.js b/packages/sample/src/vision/describe-card-schema.genai.js index 83c785f1e8..d1d19f68a0 100644 --- a/packages/sample/src/vision/describe-card-schema.genai.js +++ b/packages/sample/src/vision/describe-card-schema.genai.js @@ -1,7 +1,7 @@ script({ description: "Given an image of a receipt, extract a csv of the receipt data", - group: "image tools", + group: "vision", model: "gpt-4-turbo-v", maxTokens: 4000, }) diff --git a/packages/sample/src/vision/describe-card.genai.js b/packages/sample/src/vision/describe-card.genai.js index 97f553bb1c..e80c11aa3a 100644 --- a/packages/sample/src/vision/describe-card.genai.js +++ b/packages/sample/src/vision/describe-card.genai.js @@ -1,6 +1,6 @@ script({ description: "Given an image of business card, extract the details to a csv file", - group: "image tools", + group: "vision", model: "gpt-4-turbo-v", maxTokens: 4000, }) diff --git a/packages/sample/src/vision/describe-image.genai.js b/packages/sample/src/vision/describe-image.genai.js index 44ffd4a12f..85025a8c52 100644 --- a/packages/sample/src/vision/describe-image.genai.js +++ b/packages/sample/src/vision/describe-image.genai.js @@ -1,7 +1,7 @@ script({ description: "Given an image of a receipt, extract a csv of the receipt data", - group: "image tools", + group: "vision", model: "gpt-4-turbo-v", maxTokens: 4000, }) From 207418365cec9c6594dfe930a38df3f005e74bec Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 21 Aug 2024 15:52:49 +0000 Subject: [PATCH 2/5] adding run retry argume t --- docs/src/content/docs/reference/cli/commands.md | 1 + packages/cli/src/cli.ts | 1 + packages/cli/src/run.ts | 9 ++++++++- packages/core/src/genaiscript-api-provider.mjs | 1 + packages/core/src/server/messages.ts | 1 + packages/sample/genaisrc/summarize-max-tokens.genai.js | 4 ++-- packages/sample/package.json | 2 +- 7 files changed, 15 insertions(+), 4 deletions(-) diff --git a/docs/src/content/docs/reference/cli/commands.md b/docs/src/content/docs/reference/cli/commands.md index 6ec0db4304..90c97eb812 100644 --- a/docs/src/content/docs/reference/cli/commands.md +++ b/docs/src/content/docs/reference/cli/commands.md @@ -48,6 +48,7 @@ Options: -cs, --csv-separator csv separator (default: "\t") -ae, --apply-edits apply file edits --vars variables, as name=value, stored in env.vars + -rr, --run-retry number of retries for the entire run -h, --help display help for command ``` diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 78be130c98..02768ea805 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -148,6 +148,7 @@ export async function cli() { "--vars ", "variables, as name=value, stored in env.vars" ) + .option("-rr, --run-retry ", "number of retries for the entire run") .action(runScriptWithExitCode) const test = program.command("test") diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index ffd4fdaa53..a01041a704 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -79,7 +79,14 @@ export async function runScriptWithExitCode( TraceOptions & CancellationOptions ) { - const { exitCode } = await runScript(scriptId, files, options) + const runRetry = Math.max(1, normalizeInt(options.runRetry) || 1) + let exitCode = -1 + for (let r = 0; r < runRetry; ++r) { + const res = await runScript(scriptId, files, options) + exitCode = res.exitCode + if (exitCode === 0) break + console.error(`run failed, retrying ${r + 1}/${runRetry}`) + } process.exit(exitCode) } diff --git a/packages/core/src/genaiscript-api-provider.mjs b/packages/core/src/genaiscript-api-provider.mjs index ce5a155219..805489b17d 100644 --- a/packages/core/src/genaiscript-api-provider.mjs +++ b/packages/core/src/genaiscript-api-provider.mjs @@ -41,6 +41,7 @@ class GenAIScriptApiProvider { args.push("run", prompt) if (files) args.push(...files) + args.push("--run-retry", 2) if (testVars && typeof testVars === "object") { args.push("--vars") for (const [key, value] of Object.entries(testVars)) { diff --git a/packages/core/src/server/messages.ts b/packages/core/src/server/messages.ts index a11b4df9a0..7fbf3265b8 100644 --- a/packages/core/src/server/messages.ts +++ b/packages/core/src/server/messages.ts @@ -45,6 +45,7 @@ export interface PromptScriptTestRunResponse extends ResponseStatus { export interface PromptScriptRunOptions { excludedFiles: string[] excludeGitIgnore: boolean + runRetry: string out: string retry: string retryDelay: string diff --git a/packages/sample/genaisrc/summarize-max-tokens.genai.js b/packages/sample/genaisrc/summarize-max-tokens.genai.js index cedfed205a..63b7b4cb88 100644 --- a/packages/sample/genaisrc/summarize-max-tokens.genai.js +++ b/packages/sample/genaisrc/summarize-max-tokens.genai.js @@ -8,6 +8,6 @@ script({ }, }) -def("FILE", env.files, { maxTokens: 40 }) +def("FILE", env.files, { maxTokens: 80 }) -$`Extract keywords for the contents of FILE.` +$`Extract 5 keywords for the contents of FILE.` diff --git a/packages/sample/package.json b/packages/sample/package.json index 83bc410fa6..35205cde5b 100644 --- a/packages/sample/package.json +++ b/packages/sample/package.json @@ -11,7 +11,7 @@ "test:watch": "node --import tsx --watch --test-name-pattern=run --test src/**.test.ts", "cache:clear": "node ../cli/built/genaiscript.cjs cache clear", "run:script": "node ../cli/built/genaiscript.cjs run", - "test:scripts": "node ../cli/built/genaiscript.cjs test -rmo --groups :!vision", + "test:scripts": "node ../cli/built/genaiscript.cjs test --groups :!vision -rmo", "test:scripts:view": "node ../cli/built/genaiscript.cjs test view" }, "devDependencies": { From 72a97310b215b4f9a260a32c8e7330b1a976eb4f Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 21 Aug 2024 16:00:27 +0000 Subject: [PATCH 3/5] filter by error code --- packages/cli/src/run.ts | 9 +++++++-- packages/core/src/constants.ts | 9 +++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index a01041a704..6f7cfeeb1d 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -25,6 +25,7 @@ import { ANNOTATION_ERROR_CODE, GENAI_ANY_REGEX, TRACE_CHUNK, + UNRECOVERABLE_ERROR_CODES, } from "../../core/src/constants" import { isCancelError, errorMessage } from "../../core/src/error" import { Fragment, GenerationResult } from "../../core/src/generation" @@ -47,6 +48,7 @@ import { normalizeInt, logVerbose, logError, + delay, } from "../../core/src/util" import { YAMLStringify } from "../../core/src/yaml" import { PromptScriptRunOptions } from "../../core/src/server/messages" @@ -84,8 +86,11 @@ export async function runScriptWithExitCode( for (let r = 0; r < runRetry; ++r) { const res = await runScript(scriptId, files, options) exitCode = res.exitCode - if (exitCode === 0) break - console.error(`run failed, retrying ${r + 1}/${runRetry}`) + if (UNRECOVERABLE_ERROR_CODES.includes(exitCode)) break + + const delayMs = 2000 * Math.pow(2, r) + console.error(`run failed, retry #${r + 1}/${runRetry} in ${delayMs}ms`) + await delay(delayMs) } process.exit(exitCode) } diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 18e2aad1c2..b8b1d65b07 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -76,6 +76,7 @@ export const FETCH_RETRY_MAX_DELAY_DEFAULT = 120000 export const EXEC_MAX_BUFFER = 64 export const DOT_ENV_FILENAME = ".env" +export const SUCCESS_ERROR_CODE = 0 export const UNHANDLED_ERROR_CODE = -1 export const ANNOTATION_ERROR_CODE = -2 export const FILES_NOT_FOUND_ERROR_CODE = -3 @@ -85,6 +86,14 @@ export const CONNECTION_CONFIGURATION_ERROR_CODE = -6 export const USER_CANCELLED_ERROR_CODE = -7 export const CONFIGURATION_ERROR_CODE = -8 +export const UNRECOVERABLE_ERROR_CODES = Object.freeze([ + 0, + CONNECTION_CONFIGURATION_ERROR_CODE, + USER_CANCELLED_ERROR_CODE, + FILES_NOT_FOUND_ERROR_CODE, + ANNOTATION_ERROR_CODE, +]) + export const DOT_ENV_REGEX = /\.env$/i export const PROMPT_FENCE = "```" export const MARKDOWN_PROMPT_FENCE = "`````" From 8f49aa38a18447ee7a595074b5d08a2e8e5deadf Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 21 Aug 2024 16:13:43 +0000 Subject: [PATCH 4/5] fix tag filtering --- packages/cli/src/run.ts | 4 +++- packages/core/src/util.ts | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index 6f7cfeeb1d..3d6015af11 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -89,7 +89,9 @@ export async function runScriptWithExitCode( if (UNRECOVERABLE_ERROR_CODES.includes(exitCode)) break const delayMs = 2000 * Math.pow(2, r) - console.error(`run failed, retry #${r + 1}/${runRetry} in ${delayMs}ms`) + console.error( + `error: run failed with ${exitCode}, retry #${r + 1}/${runRetry} in ${delayMs}ms` + ) await delay(delayMs) } process.exit(exitCode) diff --git a/packages/core/src/util.ts b/packages/core/src/util.ts index a3088965fe..6ad3dbef15 100644 --- a/packages/core/src/util.ts +++ b/packages/core/src/util.ts @@ -289,10 +289,14 @@ export const HTMLEscape = HTMLEscape_ export function tagFilter(tags: string[], tag: string) { if (!tags?.length || !tag) return true const ltag = tag.toLocaleLowerCase() + let inclusive = false for (const t of tags) { const lt = t.toLocaleLowerCase() - if (lt.startsWith(":!") && ltag.startsWith(lt.slice(2))) return false + const exclude = lt.startsWith(":!") + if (!exclude) inclusive = true + + if (exclude && ltag.startsWith(lt.slice(2))) return false else if (ltag.startsWith(t)) return true } - return false + return !inclusive } From 3ba00620d12c914f1e5cc79e18e970e6fa9906ee Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Wed, 21 Aug 2024 16:16:03 +0000 Subject: [PATCH 5/5] pr feedback --- packages/cli/src/run.ts | 7 ++++++- packages/core/src/constants.ts | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index 3d6015af11..b7affd2639 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -26,6 +26,7 @@ import { GENAI_ANY_REGEX, TRACE_CHUNK, UNRECOVERABLE_ERROR_CODES, + SUCCESS_ERROR_CODE, } from "../../core/src/constants" import { isCancelError, errorMessage } from "../../core/src/error" import { Fragment, GenerationResult } from "../../core/src/generation" @@ -86,7 +87,11 @@ export async function runScriptWithExitCode( for (let r = 0; r < runRetry; ++r) { const res = await runScript(scriptId, files, options) exitCode = res.exitCode - if (UNRECOVERABLE_ERROR_CODES.includes(exitCode)) break + if ( + exitCode === SUCCESS_ERROR_CODE || + UNRECOVERABLE_ERROR_CODES.includes(exitCode) + ) + break const delayMs = 2000 * Math.pow(2, r) console.error( diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index b8b1d65b07..5f4c4005e2 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -87,7 +87,6 @@ export const USER_CANCELLED_ERROR_CODE = -7 export const CONFIGURATION_ERROR_CODE = -8 export const UNRECOVERABLE_ERROR_CODES = Object.freeze([ - 0, CONNECTION_CONFIGURATION_ERROR_CODE, USER_CANCELLED_ERROR_CODE, FILES_NOT_FOUND_ERROR_CODE,