Skip to content

Commit

Permalink
Support for inline validation tests (#387)
Browse files Browse the repository at this point in the history
* define test structure

* helper

* emit structured output in promptfoo

* show in trace

* check for tests

* pass more options around

* logging

* twoards running promptfoo

* bundling and testing

* building test cli

* still working thorugh path and cwd issues

* more tuning of execution

* test description

* support for custom test provider

* updated testgen

* updated generated help

* add test all options

* support multiple models

* better loggins

* simpler log

* added help on view

* typos

* parse, cleanup

* removing progress message

* added view option
  • Loading branch information
pelikhan authored Apr 26, 2024
1 parent 65284a0 commit e6d4b58
Show file tree
Hide file tree
Showing 31 changed files with 1,587 additions and 347 deletions.
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"type": "node",
"cwd": "${workspaceFolder}",
"preLaunchTask": "npm: compile-cli",
"args": ["run", "packages/sample/genaisrc/summarize-import.genai.mjs", "packages/sample/src/questions.md"]
"args": ["run", "summarize", "packages/sample/src/rag/markdown.md", "--json"]
},
{
"name": "Run - sample",
Expand Down
51 changes: 51 additions & 0 deletions docs/src/content/docs/getting-started/testing-scripts.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
---
title: Testing scripts
sidebar:
order: 4.6
---
import providerSrc from "../../../../../packages/core/src/genaiscript-api-provider.mjs?raw"
import { Code } from '@astrojs/starlight/components';

It is possible to declare tests and assertions in the `script` function
to validate the output of the script.

The tests are executed by [promptfoo](https://promptfoo.dev/).
[promptfoo](https://promptfoo.dev/) is tool for evaluating LLM output quality.

## Declaring tests

The tests are added as an array of objects in the `tests` key of the `script` function.

```js title="proofreader.genai.js"
scripts({
...,
tests: [{
files: "src/rag/testcode.ts",
rubrics: "is a report with a list of issues",
facts: ["The report says that the input string should be validated before use."]
}]
})
```

## Running tests

You can use the cli to run the tests for one or more scripts.

```sh
npx genaiscript test proofreader
```

If `script` is not provided, all scripts with tests will be tested.

## Viewing results

The results of the tests can be explored through the [promptfoo web ui](https://promptfoo.dev/docs/usage/web-ui).

```sh
npx promptfoo view
```

## Known limitations

Currently, promptfoo treats the script source as the prompt text. Therefore, one cannot use assertions
that also rely on the input text, such as `answer_relevance`.
71 changes: 0 additions & 71 deletions docs/src/content/docs/guides/promptfoo.mdx

This file was deleted.

23 changes: 23 additions & 0 deletions docs/src/content/docs/reference/cli/commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,29 @@ Options:
-h, --help display help for command
```

## `test`

```
Usage: genaiscript test [options] [script]
Runs the tests for scripts
Arguments:
script Script id. If not provided, all scripts are
tested
Options:
--models [models...] models to test where mode is the key value
pair list of m (model), t (temperature), p
(top-p)
-o, --out <folder> output folder
-rmo, --remove-out remove output folder if it exists
--cli <string> override path to the cli
-tp, --test-provider <string> test provider
--view open test viewer once tests are executed
-h, --help display help for command
```

## `scripts`

```
Expand Down
9 changes: 5 additions & 4 deletions packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
"description": "A CLI for GenAIScript, a generative AI scripting framework.",
"license": "MIT",
"dependencies": {
"tree-sitter-wasms": "^0.1.11",
"web-tree-sitter": "^0.22.2",
"pdfjs-dist": "4.0.379",
"typescript": "5.4.5"
"tree-sitter-wasms": "^0.1.11",
"typescript": "5.4.5",
"web-tree-sitter": "^0.22.2"
},
"optionalDependencies": {
"llamaindex": "0.1.21"
Expand All @@ -35,6 +35,7 @@
"node": ">=20.0.0"
},
"devDependencies": {
"promptfoo": "^0.55.0",
"@llamaindex/env": "^0.0.5",
"@types/fs-extra": "^11.0.4",
"@types/node": "^20.12.7",
Expand Down Expand Up @@ -62,7 +63,7 @@
"zx": "^8.0.2"
},
"scripts": {
"compile": "esbuild src/main.ts --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:pdfjs-dist --external:llamaindex --external:web-tree-sitter --external:tree-sitter-wasms && node ../../scripts/patch-cli.mjs",
"compile": "esbuild src/main.ts --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:pdfjs-dist --external:llamaindex --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo && node ../../scripts/patch-cli.mjs",
"postcompile": "node built/genaiscript.cjs info help > ../../docs/src/content/docs/reference/cli/commands.md",
"go": "yarn compile && node built/genaiscript.cjs",
"test": "node --import tsx --test src/**.test.ts",
Expand Down
45 changes: 34 additions & 11 deletions packages/cli/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import {
import { compileScript, createScript, listScripts } from "./scripts"
import { codeQuery } from "./codequery"
import { modelInfo, systemInfo } from "./info"
import { scriptsTest } from "./test"

export async function cli() {
process.on("uncaughtException", (err) => {
Expand Down Expand Up @@ -157,28 +158,51 @@ export async function cli() {
)
.action(batchScript)

program
.command("test")
.description("Runs the tests for scripts")
.argument(
"[script]",
"Script id. If not provided, all scripts are tested"
)
.action(scriptsTest)
.option(
"--models [models...]",
"models to test where mode is the key value pair list of m (model), t (temperature), p (top-p)"
)
.option("-o, --out <folder>", "output folder")
.option("-rmo, --remove-out", "remove output folder if it exists")
.option("--cli <string>", "override path to the cli")
.option("-tp, --test-provider <string>", "test provider")
.option("--view", "open test viewer once tests are executed")

const scripts = program
.command("scripts").alias("script")
.command("scripts")
.alias("script")
.description("Utility tasks for scripts")
scripts.command("list", { isDefault: true })
scripts
.command("list", { isDefault: true })
.description("List all available scripts in workspace")
.action(listScripts)
scripts.command("create")
scripts
.command("create")
.description("Create a new script")
.argument("<name>", "Name of the script")
.action(createScript)
scripts.command("compile")
scripts
.command("compile")
.description("Compile all script in workspace")
.action(compileScript)
scripts.command("model")
scripts
.command("model")
.description("Show model connection information for scripts")
.argument("[script]", "Script id or file")
.option("-t, --token", "show token")
.action(modelInfo)


const retrieval = program
.command("retrieval").alias("retreival")
.command("retrieval")
.alias("retreival")
.description("RAG support")
retrieval
.command("index")
Expand Down Expand Up @@ -221,7 +245,8 @@ export async function cli() {
.action(startServer)

const parser = program
.command("parse").alias("parsers")
.command("parse")
.alias("parsers")
.description("Parse various outputs")
parser
.command("fence <language>")
Expand Down Expand Up @@ -259,9 +284,7 @@ export async function cli() {
.argument("<file...>", "input JSONL files")
.action(jsonl2json)

const info = program
.command("info")
.description("Utility tasks")
const info = program.command("info").description("Utility tasks")
info.command("help")
.description("Show help for all commands")
.action(helpAll)
Expand Down
66 changes: 0 additions & 66 deletions packages/cli/src/genaiscript-api-provider.mjs

This file was deleted.

Loading

0 comments on commit e6d4b58

Please sign in to comment.