From b4a276d423676a019773e4ce287181282d9bb694 Mon Sep 17 00:00:00 2001 From: pelikhan Date: Mon, 29 Apr 2024 13:32:35 -0700 Subject: [PATCH] more docs --- .../content/docs/reference/scripts/tests.mdx | 122 +++++++++++++++++- packages/core/src/types/prompt_template.d.ts | 2 - 2 files changed, 115 insertions(+), 9 deletions(-) diff --git a/docs/src/content/docs/reference/scripts/tests.mdx b/docs/src/content/docs/reference/scripts/tests.mdx index 962d7d91d6..94884c8a0c 100644 --- a/docs/src/content/docs/reference/scripts/tests.mdx +++ b/docs/src/content/docs/reference/scripts/tests.mdx @@ -7,16 +7,17 @@ keywords: promptfoo, LLM testing, output quality, language model evaluation, scr --- It is possible to define tests for the LLM scripts, to evaluate the output quality of the LLM -over time and ModuleResolutionKind. +over time and model types. The tests are executed by [promptfoo](https://promptfoo.dev/), a tool for evaluating LLM output quality. ## Defining tests -The tests are declared in the `script` function in your test. You may define one or many tests (array). +The tests are declared in the `script` function in your test. +You may define one or many tests (array). -```js title="proofreader.genai.js" wrap +```js title="proofreader.genai.js" wrap "tests" scripts({ ..., tests: [{ @@ -28,11 +29,118 @@ scripts({ }) ``` -### rubrics +### `files` -### facts +`files` takes a list of file path (relative to the workspace) and populate the `env.files` +variable while running the test. You can provide multiple files by passing an array of strings. -### Assertions and metrics +```js title="proofreader.genai.js" wrap "files" +scripts({ + tests: { + files: "src/rag/testcode.ts", + ... + } +}) +``` + +### `rubrics` + +`rubrics` checks if the LLM output matches given requirements, +using a language model to grade the output based on the rubric (see [llm-rubric](https://promptfoo.dev/docs/configuration/expected-outputs/model-graded/#examples-output-based)). +You can specify multiple rubrics by passing an array of strings. + +```js title="proofreader.genai.js" wrap "rubrics" +scripts({ + tests: { + rubrics: "is a report with a list of issues", + ..., + } +}) +``` + +### `facts` + +`facts` checks a factual consistency (see [factuality](https://promptfoo.dev/docs/guides/factuality-eval/)). +You can specify multiple facts by passing an array of strings. + +> given a completion A and reference answer B evaluates +> whether A is a subset of B, A is a superset of B, A and B are equivalent, +> A and B disagree, or A and B differ, +> but difference don't matter from the perspective of factuality. + +```js title="proofreader.genai.js" wrap "facts" +scripts({ + tests: { + facts: `The report says that the input string should be validated before use.`, + ..., + } +}) +``` -The assertions of tests are based on +### `asserts` + +Other assertions on [promptfoo assertions and metrics](https://promptfoo.dev/docs/configuration/expected-outputs/). + +- `icontains` (`not-icontains"`) +- `equals` (`not-equals`) +- `starts-with` (`not-starts-with`) + +```js title="proofreader.genai.js" wrap "asserts" +scripts({ + tests: { + facts: `The report says that the input string should be validated before use.`, + asserts: [ + { + type: "icontains", + value: "issue", + }, + ], + }, +}) +``` + +- `contains-all` (`not-contains-all`) +- `contains-any` (`not-contains-any`) +- `icontains-all` (`not-icontains-all`) + +```js title="proofreader.genai.js" wrap "asserts" +scripts({ + tests: { + facts: `The report says that the input string should be validated before use.`, + asserts: [ + { + type: "icontains-all", + value: ["issue", "fix"], + }, + ], + }, +}) +``` + +## Running tests + +You can run tests from Visual Studio Code or using the [command line](/genaiscript/reference/cli). +In both cases, genaiscript generates a [promptfoo configuration file](https://promptfoo.dev/docs/configuration/guide) +and execute promptfoo on it. + +### Visual Studio Code + +- Open the script to test +- Right click in the editor and select **Run GenAIScript Tests** in the context menu +- The [promptfoo web view](https://promptfoo.dev/docs/usage/web-ui/) will automatically + open and refresh with the test results. + +### Command line + +Run the `test` command with the script file as argument. + +```sh "test" +npx genaiscript test +``` + +You can specify additional models to test against by passing the `--models` option. + +```sh '--models "ollama:phi3"' +npx genaiscript test --models "ollama:phi3" +``` diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index 53c9e8bd56..e515cac676 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -207,8 +207,6 @@ type PromptAssertion = { | "not-equals" | "starts-with" | "not-starts-with" - | "llm-rubric" - | "factuality" // The expected value value: string }