diff --git a/docs/src/content/docs/reference/cli/commands.md b/docs/src/content/docs/reference/cli/commands.md index 7b42a9e82b..90c97eb812 100644 --- a/docs/src/content/docs/reference/cli/commands.md +++ b/docs/src/content/docs/reference/cli/commands.md @@ -45,9 +45,10 @@ Options: -em, --embeddings-model embeddings model for the run --no-cache disable LLM result cache -cn, --cache-name custom cache file name - --cs, --csv-separator csv separator (default: "\t") + -cs, --csv-separator csv separator (default: "\t") -ae, --apply-edits apply file edits --vars variables, as name=value, stored in env.vars + -rr, --run-retry number of retries for the entire run -h, --help display help for command ``` @@ -89,6 +90,8 @@ Options: -v, --verbose verbose output -pv, --promptfoo-version [version] promptfoo version, default is 0.78.0 -os, --out-summary append output summary in file + --groups groups to include or exclude. Use :! + prefix to exclude -h, --help display help for command ``` diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 61f3ad700a..02768ea805 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -142,12 +142,13 @@ export async function cli() { ) .option("--no-cache", "disable LLM result cache") .option("-cn, --cache-name ", "custom cache file name") - .option("--cs, --csv-separator ", "csv separator", "\t") + .option("-cs, --csv-separator ", "csv separator", "\t") .option("-ae, --apply-edits", "apply file edits") .option( "--vars ", "variables, as name=value, stored in env.vars" ) + .option("-rr, --run-retry ", "number of retries for the entire run") .action(runScriptWithExitCode) const test = program.command("test") @@ -174,6 +175,10 @@ export async function cli() { `promptfoo version, default is ${PROMPTFOO_VERSION}` ) .option("-os, --out-summary ", "append output summary in file") + .option( + "--groups ", + "groups to include or exclude. Use :! prefix to exclude" + ) .action(scriptsTest) test.command("view") diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index ffd4fdaa53..b7affd2639 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -25,6 +25,8 @@ import { ANNOTATION_ERROR_CODE, GENAI_ANY_REGEX, TRACE_CHUNK, + UNRECOVERABLE_ERROR_CODES, + SUCCESS_ERROR_CODE, } from "../../core/src/constants" import { isCancelError, errorMessage } from "../../core/src/error" import { Fragment, GenerationResult } from "../../core/src/generation" @@ -47,6 +49,7 @@ import { normalizeInt, logVerbose, logError, + delay, } from "../../core/src/util" import { YAMLStringify } from "../../core/src/yaml" import { PromptScriptRunOptions } from "../../core/src/server/messages" @@ -79,7 +82,23 @@ export async function runScriptWithExitCode( TraceOptions & CancellationOptions ) { - const { exitCode } = await runScript(scriptId, files, options) + const runRetry = Math.max(1, normalizeInt(options.runRetry) || 1) + let exitCode = -1 + for (let r = 0; r < runRetry; ++r) { + const res = await runScript(scriptId, files, options) + exitCode = res.exitCode + if ( + exitCode === SUCCESS_ERROR_CODE || + UNRECOVERABLE_ERROR_CODES.includes(exitCode) + ) + break + + const delayMs = 2000 * Math.pow(2, r) + console.error( + `error: run failed with ${exitCode}, retry #${r + 1}/${runRetry} in ${delayMs}ms` + ) + await delay(delayMs) + } process.exit(exitCode) } diff --git a/packages/cli/src/test.ts b/packages/cli/src/test.ts index 79b66dc707..ff1b7a08c7 100644 --- a/packages/cli/src/test.ts +++ b/packages/cli/src/test.ts @@ -27,6 +27,7 @@ import { logInfo, logVerbose, delay, + tagFilter, } from "../../core/src/util" import { YAMLStringify } from "../../core/src/yaml" import { @@ -83,6 +84,7 @@ export async function runPromptScriptTests( const scripts = prj.templates .filter((t) => arrayify(t.tests)?.length) .filter((t) => !ids?.length || ids.includes(t.id)) + .filter((t) => tagFilter(options?.groups, t.group)) if (!scripts.length) return { ok: false, @@ -217,6 +219,7 @@ export async function scriptsTest( promptfooVersion?: string outSummary?: string testDelay?: string + groups?: string[] } ) { const { status, value = [] } = await runPromptScriptTests(ids, options) diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 18e2aad1c2..5f4c4005e2 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -76,6 +76,7 @@ export const FETCH_RETRY_MAX_DELAY_DEFAULT = 120000 export const EXEC_MAX_BUFFER = 64 export const DOT_ENV_FILENAME = ".env" +export const SUCCESS_ERROR_CODE = 0 export const UNHANDLED_ERROR_CODE = -1 export const ANNOTATION_ERROR_CODE = -2 export const FILES_NOT_FOUND_ERROR_CODE = -3 @@ -85,6 +86,13 @@ export const CONNECTION_CONFIGURATION_ERROR_CODE = -6 export const USER_CANCELLED_ERROR_CODE = -7 export const CONFIGURATION_ERROR_CODE = -8 +export const UNRECOVERABLE_ERROR_CODES = Object.freeze([ + CONNECTION_CONFIGURATION_ERROR_CODE, + USER_CANCELLED_ERROR_CODE, + FILES_NOT_FOUND_ERROR_CODE, + ANNOTATION_ERROR_CODE, +]) + export const DOT_ENV_REGEX = /\.env$/i export const PROMPT_FENCE = "```" export const MARKDOWN_PROMPT_FENCE = "`````" diff --git a/packages/core/src/genaiscript-api-provider.mjs b/packages/core/src/genaiscript-api-provider.mjs index ce5a155219..805489b17d 100644 --- a/packages/core/src/genaiscript-api-provider.mjs +++ b/packages/core/src/genaiscript-api-provider.mjs @@ -41,6 +41,7 @@ class GenAIScriptApiProvider { args.push("run", prompt) if (files) args.push(...files) + args.push("--run-retry", 2) if (testVars && typeof testVars === "object") { args.push("--vars") for (const [key, value] of Object.entries(testVars)) { diff --git a/packages/core/src/server/messages.ts b/packages/core/src/server/messages.ts index 22402bf9c4..7fbf3265b8 100644 --- a/packages/core/src/server/messages.ts +++ b/packages/core/src/server/messages.ts @@ -24,6 +24,7 @@ export interface ServerEnv extends RequestMessage { export interface PromptScriptTestRunOptions { testProvider?: string models?: string[] + groups?: string[] } export interface PromptScriptTestRun extends RequestMessage { @@ -44,6 +45,7 @@ export interface PromptScriptTestRunResponse extends ResponseStatus { export interface PromptScriptRunOptions { excludedFiles: string[] excludeGitIgnore: boolean + runRetry: string out: string retry: string retryDelay: string diff --git a/packages/core/src/util.ts b/packages/core/src/util.ts index ef81bf25dd..6ad3dbef15 100644 --- a/packages/core/src/util.ts +++ b/packages/core/src/util.ts @@ -180,9 +180,9 @@ export function logWarn(msg: string) { export function logError(msg: string | Error | SerializedError) { const { message, ...e } = serializeError(msg) if (message) host.log(LogLevel.Error, message) - console.debug(msg) + console.debug(msg) const se = YAMLStringify(e) - if (!/^\s*\{\}\s*$/) host.log(LogLevel.Info, se) + if (!/^\s*\{\s*\}\s*$/) host.log(LogLevel.Info, se) } export function concatArrays(...arrays: T[][]): T[] { if (arrays.length == 0) return [] @@ -285,3 +285,18 @@ export function renderWithPrecision( } export const HTMLEscape = HTMLEscape_ + +export function tagFilter(tags: string[], tag: string) { + if (!tags?.length || !tag) return true + const ltag = tag.toLocaleLowerCase() + let inclusive = false + for (const t of tags) { + const lt = t.toLocaleLowerCase() + const exclude = lt.startsWith(":!") + if (!exclude) inclusive = true + + if (exclude && ltag.startsWith(lt.slice(2))) return false + else if (ltag.startsWith(t)) return true + } + return !inclusive +} diff --git a/packages/sample/genaisrc/describe-image-run-prompt.genai.js b/packages/sample/genaisrc/describe-image-run-prompt.genai.js index 099544eb61..837d60e7a3 100644 --- a/packages/sample/genaisrc/describe-image-run-prompt.genai.js +++ b/packages/sample/genaisrc/describe-image-run-prompt.genai.js @@ -1,6 +1,7 @@ script({ title: "Describe objects in each image", model: "gpt-3.5-turbo", + group: "vision", maxTokens: 4000, system: [], tests: { diff --git a/packages/sample/genaisrc/describe-image.genai.js b/packages/sample/genaisrc/describe-image.genai.js index b3be0982e9..363bac77e8 100644 --- a/packages/sample/genaisrc/describe-image.genai.js +++ b/packages/sample/genaisrc/describe-image.genai.js @@ -1,6 +1,7 @@ script({ title: "Describe objects in image", model: "gpt-4-turbo-v", + group: "vision", maxTokens: 4000, system: [], tests: { diff --git a/packages/sample/genaisrc/summarize-max-tokens.genai.js b/packages/sample/genaisrc/summarize-max-tokens.genai.js index cedfed205a..63b7b4cb88 100644 --- a/packages/sample/genaisrc/summarize-max-tokens.genai.js +++ b/packages/sample/genaisrc/summarize-max-tokens.genai.js @@ -8,6 +8,6 @@ script({ }, }) -def("FILE", env.files, { maxTokens: 40 }) +def("FILE", env.files, { maxTokens: 80 }) -$`Extract keywords for the contents of FILE.` +$`Extract 5 keywords for the contents of FILE.` diff --git a/packages/sample/package.json b/packages/sample/package.json index 65560da649..35205cde5b 100644 --- a/packages/sample/package.json +++ b/packages/sample/package.json @@ -11,7 +11,7 @@ "test:watch": "node --import tsx --watch --test-name-pattern=run --test src/**.test.ts", "cache:clear": "node ../cli/built/genaiscript.cjs cache clear", "run:script": "node ../cli/built/genaiscript.cjs run", - "test:scripts": "node ../cli/built/genaiscript.cjs test -rmo -tp tnrllmproxy.azurewebsites.net", + "test:scripts": "node ../cli/built/genaiscript.cjs test --groups :!vision -rmo", "test:scripts:view": "node ../cli/built/genaiscript.cjs test view" }, "devDependencies": { diff --git a/packages/sample/src/vision/describe-card-schema.genai.js b/packages/sample/src/vision/describe-card-schema.genai.js index 83c785f1e8..d1d19f68a0 100644 --- a/packages/sample/src/vision/describe-card-schema.genai.js +++ b/packages/sample/src/vision/describe-card-schema.genai.js @@ -1,7 +1,7 @@ script({ description: "Given an image of a receipt, extract a csv of the receipt data", - group: "image tools", + group: "vision", model: "gpt-4-turbo-v", maxTokens: 4000, }) diff --git a/packages/sample/src/vision/describe-card.genai.js b/packages/sample/src/vision/describe-card.genai.js index 97f553bb1c..e80c11aa3a 100644 --- a/packages/sample/src/vision/describe-card.genai.js +++ b/packages/sample/src/vision/describe-card.genai.js @@ -1,6 +1,6 @@ script({ description: "Given an image of business card, extract the details to a csv file", - group: "image tools", + group: "vision", model: "gpt-4-turbo-v", maxTokens: 4000, }) diff --git a/packages/sample/src/vision/describe-image.genai.js b/packages/sample/src/vision/describe-image.genai.js index 44ffd4a12f..85025a8c52 100644 --- a/packages/sample/src/vision/describe-image.genai.js +++ b/packages/sample/src/vision/describe-image.genai.js @@ -1,7 +1,7 @@ script({ description: "Given an image of a receipt, extract a csv of the receipt data", - group: "image tools", + group: "vision", model: "gpt-4-turbo-v", maxTokens: 4000, })