diff --git a/docs/src/content/docs/reference/cli/commands.md b/docs/src/content/docs/reference/cli/commands.md index 6fef3fb4e5..91293843f4 100644 --- a/docs/src/content/docs/reference/cli/commands.md +++ b/docs/src/content/docs/reference/cli/commands.md @@ -2,10 +2,11 @@ title: Commands description: List of all CLI commands sidebar: - order: 100 + order: 100 --- + A full list of the CLI command and its respective help text. ## `run` @@ -309,6 +310,7 @@ Commands: query tokens [options] Count tokens in a set of files jsonl2json Converts JSONL files to a JSON file + prompty Converts .prompty files to genaiscript ``` ### `parse fence` @@ -389,6 +391,20 @@ Options: -h, --help display help for command ``` +### `parse prompty` + +``` +Usage: genaiscript parse prompty [options] + +Converts .prompty files to genaiscript + +Arguments: + file input JSONL files + +Options: + -h, --help display help for command +``` + ## `workspace` ``` diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 43b21ff4c2..b0e9fc0f74 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -13,6 +13,7 @@ import { parseHTMLToText, parsePDF, parseTokens, + prompty2genaiscript, } from "./parse" import { compileScript, createScript, fixScripts, listScripts } from "./scripts" import { codeQuery } from "./codequery" @@ -299,6 +300,11 @@ export async function cli() { .command("jsonl2json", "Converts JSONL files to a JSON file") .argument("", "input JSONL files") .action(jsonl2json) + parser + .command("prompty") + .description("Converts .prompty files to genaiscript") + .argument("", "input JSONL files") + .action(prompty2genaiscript) const workspace = program .command("workspace") diff --git a/packages/cli/src/parse.ts b/packages/cli/src/parse.ts index 77b8749839..1e7da72c5b 100644 --- a/packages/cli/src/parse.ts +++ b/packages/cli/src/parse.ts @@ -10,6 +10,7 @@ import { estimateTokens } from "../../core/src/tokens" import { YAMLStringify } from "../../core/src/yaml" import { resolveTokenEncoder } from "../../core/src/encoders" import { DEFAULT_MODEL } from "../../core/src/constants" +import { promptyParse, promptyToGenAIScript } from "../../core/src/prompty" export async function parseFence(language: string, file: string) { const res = await parsePdf(file) @@ -69,3 +70,15 @@ export async function parseTokens( } console.log(text) } + +export async function prompty2genaiscript(files: string[]) { + const fs = await expandFiles(files) + for (const f of fs) { + console.log(f) + const gf = replaceExt(f, ".genai.mts") + const content = await readText(f) + const doc = promptyParse(content) + const script = promptyToGenAIScript(doc) + await writeText(gf, script) + } +} diff --git a/packages/core/src/json5.ts b/packages/core/src/json5.ts index 84899ffa88..eb46c67684 100644 --- a/packages/core/src/json5.ts +++ b/packages/core/src/json5.ts @@ -1,5 +1,5 @@ /* eslint-disable curly */ -import { parse } from "json5" +import { parse, stringify } from "json5" import { jsonrepair } from "jsonrepair" export function isJSONObjectOrArray(text: string) { @@ -61,3 +61,5 @@ export function JSONLLMTryParse(s: string): any { s = s.replace(startRx, "").replace(endRx, "") return JSON5TryParse(s) } + +export const JSON5Stringify = stringify diff --git a/packages/core/src/prompty.ts b/packages/core/src/prompty.ts index 074e8b199f..2176fe9d18 100644 --- a/packages/core/src/prompty.ts +++ b/packages/core/src/prompty.ts @@ -1,6 +1,9 @@ import { ChatCompletionMessageParam } from "./chattypes" import { splitMarkdown } from "./frontmatter" import { YAMLTryParse } from "./yaml" +import { dedent } from "./indent" +import { deleteUndefinedValues } from "./util" +import { JSON5Stringify } from "./json5" export interface PromptyFrontmatter { name?: string @@ -9,13 +12,20 @@ export interface PromptyFrontmatter { authors?: string[] tags?: string[] sample?: Record + inputs?: Record< + string, + JSONSchemaNumber | JSONSchemaBoolean | JSONSchemaString + > + outputs?: JSONSchemaObject } -export function promptyParse(text: string): { +export interface PromptyDocument { frontmatter: PromptyFrontmatter content: string messages: ChatCompletionMessageParam[] -} { +} + +export function promptyParse(text: string): PromptyDocument { const { frontmatter = "", content = "" } = splitMarkdown(text) const fm = YAMLTryParse(frontmatter) ?? {} // todo: validate frontmatter? @@ -31,7 +41,7 @@ export function promptyParse(text: string): { if (role && chunk.length && chunk.some((l) => !!l)) { messages.push({ role, - content: chunk.join("\n"), + content: chunk.join("\n").trim(), }) } } @@ -50,3 +60,38 @@ export function promptyParse(text: string): { pushMessage() return { frontmatter: fm, content, messages } } + +export function promptyToGenAIScript(doc: PromptyDocument) { + const { frontmatter, messages } = doc + const { name, description, tags, sample, inputs, outputs } = frontmatter + const parameters = inputs ? structuredClone(inputs) : undefined + if (parameters && sample) + for (const p in sample) { + const s = sample[p] + if (s !== undefined) parameters[p].default = s + } + const meta = deleteUndefinedValues({ + title: name, + description, + tags, + parameters, + responseType: outputs ? "json_object" : undefined, + responseSchema: outputs, + }) + + let src = `` + if (Object.keys(meta).length) { + src += `script(${JSON5Stringify(meta, null, 2)})\n\n` + } + src += messages + .map((m) => { + const text = String(m.content).replace( + /\{\{([^\}]+)\}\}/g, + (m, name) => "${env.vars." + name + "}" + ) + return `$\`${text}\`` + }) + .join("\n") + + return src +} diff --git a/packages/core/src/util.ts b/packages/core/src/util.ts index c7c8524909..e18ecd9137 100644 --- a/packages/core/src/util.ts +++ b/packages/core/src/util.ts @@ -49,6 +49,11 @@ export function parseBoolean(s: string) { : undefined } +export function deleteUndefinedValues>(o: T): T { + for (const k in o) if (o[k] === undefined) delete o[k] + return o +} + export function assert( cond: boolean, msg = "Assertion failed", diff --git a/packages/sample/src/basic.genai.mts b/packages/sample/src/basic.genai.mts new file mode 100644 index 0000000000..2244d08c0e --- /dev/null +++ b/packages/sample/src/basic.genai.mts @@ -0,0 +1,16 @@ +script({ + title: "Basic Prompt", + description: "A basic prompt that uses the chat API to answer questions", + parameters: { + question: { + type: "string", + default: "Who is the most famous person in the world?", + }, + }, +}) + +$`You are an AI assistant who helps people find information. +As the assistant, you answer questions briefly, succinctly.` +$`${env.vars.question} + +${env.vars.hint}` diff --git a/packages/sample/src/cli.test.ts b/packages/sample/src/cli.test.ts index eed29b7850..9bf8c70d19 100644 --- a/packages/sample/src/cli.test.ts +++ b/packages/sample/src/cli.test.ts @@ -115,6 +115,14 @@ describe("retrieval", () => { }) }) +describe("prompty", () => { + const cmd = "prompty" + test("src", async () => { + const res = await $`node ${cli} ${cmd} "src/*.prompty"`.nothrow() + assert(!res.exitCode) + }) +}) + describe("workspace", () => { const cmd = "workspace" describe("grep", () => { diff --git a/packages/sample/src/genaiscript.d.ts b/packages/sample/src/genaiscript.d.ts index dd7dce959a..28562647bd 100644 --- a/packages/sample/src/genaiscript.d.ts +++ b/packages/sample/src/genaiscript.d.ts @@ -1,13 +1,37 @@ +type OptionsOrString = (string & {}) | TOptions + +interface PromptGenerationConsole { + log(...data: any[]): void + warn(...data: any[]): void + debug(...data: any[]): void + error(...data: any[]): void +} + type DiagnosticSeverity = "error" | "warning" | "info" + interface Diagnostic { filename: string range: CharRange severity: DiagnosticSeverity message: string + /** + * error or warning code + */ + code?: string } type Awaitable = T | PromiseLike +interface SerializedError { + name?: string + message?: string + stack?: string + cause?: unknown + code?: string + line?: number + column?: number +} + interface PromptDefinition { /** * Based on file name. @@ -43,28 +67,42 @@ interface PromptLike extends PromptDefinition { text?: string } -type SystemPromptId = +type SystemPromptId = OptionsOrString< + | "system" | "system.annotations" + | "system.changelog" + | "system.diagrams" + | "system.diff" | "system.explanations" - | "system.typescript" + | "system.files" + | "system.files_schema" | "system.fs_find_files" | "system.fs_read_file" - | "system.fs_read_summary" - | "system.files" - | "system.changelog" - | "system.diff" - | "system.tasks" - | "system.schema" - | "system.json" - | "system" | "system.math" - | "system.technical" - | "system.web_search" - | "system.files_schema" + | "system.md_frontmatter" | "system.python" - | "system.summary" + | "system.python_code_interpreter" + | "system.retrieval_fuzz_search" + | "system.retrieval_vector_search" + | "system.retrieval_web_search" + | "system.schema" + | "system.tasks" + | "system.technical" + | "system.tools" + | "system.typescript" | "system.zero_shot_cot" - | "system.functions" +> + +type SystemToolId = OptionsOrString< + | "fs_find_files" + | "fs_read_file" + | "math_eval" + | "md_read_frontmatter" + | "python_code_interpreter" + | "retrieval_fuzz_search" + | "retrieval_vector_search" + | "retrieval_web_search" +> type FileMergeHandler = ( filename: string, @@ -90,45 +128,30 @@ interface PromptOutputProcessorResult { } type PromptOutputProcessorHandler = ( - output: PromptGenerationOutput + output: GenerationOutput ) => | PromptOutputProcessorResult | Promise | undefined | Promise + | void + | Promise -interface UrlAdapter { - contentType?: "text/plain" | "application/json" - - /** - * Given a friendly URL, return a URL that can be used to fetch the content. - * @param url - * @returns - */ - matcher: (url: string) => string - - /** - * Convers the body of the response to a string. - * @param body - * @returns - */ - adapter?: (body: string | any) => string | undefined -} - -type PromptTemplateResponseType = "json_object" | undefined +type PromptTemplateResponseType = "json_object" | "json_schema" | undefined interface ModelConnectionOptions { /** * Which LLM model to use. * * @default gpt-4 - * @example gpt-4 gpt-4-32k gpt-3.5-turbo + * @example gpt-4 */ model?: - | "gpt-4-turbo" - | "gpt-4" - | "gpt-4-32k" - | "gpt-3.5-turbo" + | "openai:gpt-4" + | "openai:gpt-4-turbo" + | "openai:gpt-4o" + | "openai:gpt-4o-mini" + | "openai:gpt-3.5-turbo" | "ollama:phi3" | "ollama:llama3" | "ollama:mixtral" @@ -144,6 +167,19 @@ interface ModelOptions extends ModelConnectionOptions { */ temperature?: number + /** + * Specifies the type of output. Default is plain text. + * - `json_object` enables JSON mode + * - `json_schema` enables structured outputs + * Use `responseSchema` to specify an output schema. + */ + responseType?: PromptTemplateResponseType + + /** + * JSON object schema for the output. Enables the `JSON` output mode by default. + */ + responseSchema?: PromptParametersSchema | JSONSchemaObject + /** * “Top_p” or nucleus sampling is a setting that decides how many possible words to consider. * A high “top_p” value means the model looks at more possible words, even the less likely ones, @@ -157,38 +193,85 @@ interface ModelOptions extends ModelConnectionOptions { */ maxTokens?: number + /** + * Maximum number of tool calls to make. + */ + maxToolCalls?: number + + /** + * Maximum number of data repairs to attempt. + */ + maxDataRepairs?: number + /** * A deterministic integer seed to use for the model. */ seed?: number /** - * If true, the prompt will be cached. If false, the LLM chat is never cached. - * Leave empty to use the default behavior. + * By default, LLM queries are not cached. If true, the LLM request will be cached. Use a string to override the default cache name */ - cache?: boolean + cache?: boolean | string /** * Custom cache name. If not set, the default cache is used. + * @deprecated Use `cache` instead with a string */ cacheName?: string } -interface ScriptRuntimeOptions { +interface EmbeddingsModelConnectionOptions { /** - * Template identifiers for the system prompts (concatenated). + * LLM model to use for embeddings. */ - system?: SystemPromptId[] + embeddingsModel?: OptionsOrString< + "openai:text-embedding-3-small", + "openai:text-embedding-3-large", + "openai:text-embedding-ada-002", + "github:text-embedding-3-small", + "github:text-embedding-3-large", + "ollama:nomic-embed-text" + > +} +interface EmbeddingsModelOptions extends EmbeddingsModelConnectionOptions {} + +interface ScriptRuntimeOptions { /** - * Specifies the type of output. Default is `markdown`. - */ - responseType?: PromptTemplateResponseType + * List of system script ids used by the prompt. + */ + /** + * System prompt identifiers ([reference](https://microsoft.github.io/genaiscript/reference/scripts/system/)) + * - `system`: Base system prompt + * - `system.annotations`: Emits annotations compatible with GitHub Actions + * - `system.changelog`: Generate changelog formatter edits + * - `system.diagrams`: Generate diagrams + * - `system.diff`: Generates concise file diffs. + * - `system.explanations`: Explain your answers + * - `system.files`: File generation + * - `system.files_schema`: Apply JSON schemas to generated data. + * - `system.fs_find_files`: File find files + * - `system.fs_read_file`: File Read File + * - `system.math`: Math expression evaluator + * - `system.md_frontmatter`: Frontmatter reader + * - `system.python`: Expert at generating and understanding Python code. + * - `system.python_code_interpreter`: Python Dockerized code execution for data analysis + * - `system.retrieval_fuzz_search`: Full Text Fuzzy Search + * - `system.retrieval_vector_search`: Embeddings Vector Search + * - `system.retrieval_web_search`: Web Search + * - `system.schema`: JSON Schema support + * - `system.tasks`: Generates tasks + * - `system.technical`: Technical Writer + * - `system.tools`: Tools support + * - `system.typescript`: Export TypeScript Developer + * - `system.zero_shot_cot`: Zero-shot Chain Of Though + **/ + system?: SystemPromptId[] /** - * Given a user friendly URL, return a URL that can be used to fetch the content. Returns undefined if unknown. + * List of tools used by the prompt. */ - urlAdapters?: UrlAdapter[] + tools?: SystemToolId | SystemToolId[] /** * Secrets required by the prompt @@ -205,11 +288,15 @@ type PromptParameterType = | string | number | boolean + | object | JSONSchemaNumber | JSONSchemaString | JSONSchemaBoolean -type PromptParametersSchema = Record -type PromptParameters = Record +type PromptParametersSchema = Record< + string, + PromptParameterType | PromptParameterType[] +> +type PromptParameters = Record type PromptAssertion = { // How heavily to weigh the assertion. Defaults to 1.0 @@ -246,7 +333,9 @@ type PromptAssertion = { | { // type of assertion type: "levenshtein" | "not-levenshtein" - // The threshold value, applicable only to certain types + // The expected value + value: string + // The threshold value threshold?: number } | { @@ -255,11 +344,18 @@ type PromptAssertion = { * JavaScript expression to evaluate. */ value: string + /** + * Optional threshold if the javascript expression returns a number + */ threshold?: number } ) interface PromptTest { + /** + * Short name of the test + */ + name?: string /** * Description of the test. */ @@ -267,7 +363,11 @@ interface PromptTest { /** * List of files to apply the test to. */ - files: string | string[] + files?: string | string[] + /** + * Extra set of variables for this scenario + */ + vars?: Record /** * LLM output matches a given rubric, using a Language Model to grade output. */ @@ -280,13 +380,21 @@ interface PromptTest { * List of keywords that should be contained in the LLM output. */ keywords?: string | string[] + /** + * List of keywords that should not be contained in the LLM output. + */ + forbidden?: string | string[] /** * Additional deterministic assertions. */ asserts?: PromptAssertion | PromptAssertion[] } -interface PromptScript extends PromptLike, ModelOptions, ScriptRuntimeOptions { +interface PromptScript + extends PromptLike, + ModelOptions, + EmbeddingsModelOptions, + ScriptRuntimeOptions { /** * Groups template in UI */ @@ -297,6 +405,17 @@ interface PromptScript extends PromptLike, ModelOptions, ScriptRuntimeOptions { */ parameters?: PromptParametersSchema + /** + * A file path or list of file paths or globs. + * The content of these files will be by the files selected in the UI by the user or the cli arguments. + */ + files?: string | string[] + + /** + * Extra variable values that can be used to configure system prompts. + */ + vars?: Record + /** * Tests to validate this script. */ @@ -323,17 +442,19 @@ interface WorkspaceFile { filename: string /** - * @deprecated Unused + * Content of the file. */ - label?: string + content?: string +} +interface WorkspaceFileWithScore extends WorkspaceFile { /** - * Content of the file. + * Score allocated by search algorithm */ - content: string + score?: number } -interface ChatFunctionDefinition { +interface ToolDefinition { /** * The name of the function to be called. Must be a-z, A-Z, 0-9, or contain * underscores and dashes, with a maximum length of 64. @@ -355,21 +476,10 @@ interface ChatFunctionDefinition { * * Omitting `parameters` defines a function with an empty parameter list. */ - parameters?: ChatFunctionParameters + parameters?: JSONSchema } -/** - * The parameters the functions accepts, described as a JSON Schema object. See the - * [guide](https://platform.openai.com/docs/guides/text-generation/function-calling) - * for examples, and the - * [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for - * documentation about the format. - * - * Omitting `parameters` defines a function with an empty parameter list. - */ -type ChatFunctionParameters = JSONSchema - -interface ChatFunctionCallTrace { +interface ToolCallTrace { log(message: string): void item(message: string): void tip(message: string): void @@ -395,6 +505,7 @@ interface FileEdit { type: string filename: string label?: string + validated?: boolean } interface ReplaceEdit extends FileEdit { @@ -423,60 +534,143 @@ interface CreateFileEdit extends FileEdit { type Edits = InsertEdit | ReplaceEdit | DeleteEdit | CreateFileEdit -interface ChatFunctionCallContent { +interface ToolCallContent { type?: "content" content: string edits?: Edits[] } -interface ChatFunctionCallShell { - type: "shell" - command: string - stdin?: string - files?: Record - outputFile?: string - cwd?: string - args?: string[] - timeout?: number - ignoreExitCode?: boolean -} - -type ChatFunctionCallOutput = +type ToolCallOutput = | string - | ChatFunctionCallContent - | ChatFunctionCallShell + | number + | boolean + | ToolCallContent + | ShellOutput + | WorkspaceFile + +interface WorkspaceFileCache { + /** + * Gets the value associated with the key, or undefined if there is none. + * @param key + */ + get(key: K): Promise + /** + * Sets the value associated with the key. + * @param key + * @param value + */ + set(key: K, value: V): Promise + + /** + * List of keys + */ + keys(): Promise + + /** + * List the values in the cache. + */ + values(): Promise +} interface WorkspaceFileSystem { /** * Searches for files using the glob pattern and returns a list of files. - * If the file is text, also return the content. + * Ignore `.env` files and apply `.gitignore` if present. * @param glob */ findFiles( glob: string, options?: { /** - * Set to false to read text content by default + * Set to false to skip read text content. True by default */ readText?: boolean } ): Promise + + /** + * Performs a grep search over the files in the workspace + * @param query + * @param globs + */ + grep( + query: string | RegExp, + globs: string | string[], + options?: { + /** + * Set to false to skip read text content. True by default + */ + readText?: boolean + } + ): Promise<{ files: WorkspaceFile[] }> + /** * Reads the content of a file as text * @param path */ - readText(path: string | WorkspaceFile): Promise + readText(path: string | Awaitable): Promise + + /** + * Reads the content of a file and parses to JSON, using the JSON5 parser. + * @param path + */ + readJSON(path: string | Awaitable): Promise + + /** + * Reads the content of a file and parses to XML, using the XML parser. + */ + readXML(path: string | Awaitable): Promise + + /** + * Writes a file as text to the file system + * @param path + * @param content + */ + writeText(path: string, content: string): Promise + + /** + * Opens a key-value cache for the given cache name. + * The cache is persisted accross runs of the script. Entries are dropped when the cache grows too large. + * @param cacheName + */ + cache( + cacheName: string + ): Promise> +} + +interface ToolCallContext { + trace: ToolCallTrace +} + +interface ToolCallback { + spec: ToolDefinition + impl: ( + args: { context: ToolCallContext } & Record + ) => Awaitable +} + +type AgenticToolCallback = Omit & { + spec: Omit & { + parameters: Record + } } -interface ChatFunctionCallContext { - trace: ChatFunctionCallTrace +interface AgenticToolProviderCallback { + functions: Iterable +} + +type ChatParticipantHandler = ( + context: ChatTurnGenerationContext, + messages: ChatCompletionMessageParam[] +) => Awaitable + +interface ChatParticipantOptions { + label?: string } -interface ChatFunctionCallback { - definition: ChatFunctionDefinition - fn: ( - args: { context: ChatFunctionCallContext } & Record - ) => ChatFunctionCallOutput | Promise +interface ChatParticipant { + generator: ChatParticipantHandler + options: ChatParticipantOptions } /** @@ -484,9 +678,9 @@ interface ChatFunctionCallback { */ interface ExpansionVariables { /** - * Description of the context as markdown; typically the content of a .gpspec.md file. + * Directory where the prompt is executed */ - spec: WorkspaceFile + dir: string /** * List of linked files parsed in context @@ -501,21 +695,36 @@ interface ExpansionVariables { /** * User defined variables */ - vars: PromptParameters + vars?: Record /** - * List of secrets used by the prompt, must be registred in `genaiscript`. + * List of secrets used by the prompt, must be registered in `genaiscript`. */ secrets?: Record + + /** + * Root prompt generation context + */ + generator: ChatGenerationContext } type MakeOptional = Partial> & Omit -type PromptArgs = Omit +type PromptArgs = Omit type PromptSystemArgs = Omit< PromptArgs, - "model" | "temperature" | "topP" | "maxTokens" | "seed" | "tests" + | "model" + | "embeddingsModel" + | "temperature" + | "topP" + | "maxTokens" + | "seed" + | "tests" + | "responseLanguage" + | "responseType" + | "responseSchema" + | "files" > type StringLike = string | WorkspaceFile | WorkspaceFile[] @@ -554,7 +763,7 @@ interface ContextExpansionOptions { maxTokens?: number } -interface DefOptions extends FenceOptions, ContextExpansionOptions { +interface DefOptions extends FenceOptions, ContextExpansionOptions, DataFilter { /** * Filename filter based on file suffix. Case insensitive. */ @@ -609,6 +818,10 @@ interface JSONSchemaNumber { type: "number" | "integer" description?: string default?: number + minimum?: number + exclusiveMinimum?: number + maximum?: number + exclusiveMaximum?: number } interface JSONSchemaBoolean { @@ -618,6 +831,7 @@ interface JSONSchemaBoolean { } interface JSONSchemaObject { + $schema?: string type: "object" description?: string properties?: { @@ -628,6 +842,7 @@ interface JSONSchemaObject { } interface JSONSchemaArray { + $schema?: string type: "array" description?: string items?: JSONSchemaType @@ -649,13 +864,20 @@ interface DataFrame { interface RunPromptResult { text: string - finishReason?: + annotations?: Diagnostic[] + fences?: Fenced[] + frames?: DataFrame[] + json?: any + error?: SerializedError + genVars?: Record + schemas?: Record + finishReason: | "stop" | "length" | "tool_calls" | "content_filter" | "cancel" - | "error" + | "fail" } /** @@ -700,6 +922,12 @@ interface Path { * @param pathSegments */ resolve(...pathSegments: string[]): string + + /** + * Determines whether the path is an absolute path. + * @param path + */ + isAbsolute(path: string): boolean } interface Fenced { @@ -734,6 +962,24 @@ interface HTMLToTextOptions { wordwrap?: number | false | null | undefined } +interface ParseXLSXOptions { + // specific worksheet name + sheet?: string + // Use specified range (A1-style bounded range string) + range?: string +} + +interface WorkbookSheet { + name: string + rows: object[] +} + +interface ParseZipOptions { + glob?: string +} + +type TokenEncoder = (text: string) => number[] + interface Parsers { /** * Parses text as a JSON5 payload @@ -742,6 +988,13 @@ interface Parsers { content: string | WorkspaceFile, options?: { defaultValue?: any } ): any | undefined + + /** + * Parses text or file as a JSONL payload. Empty lines are ignore, and JSON5 is used for parsing. + * @param content + */ + JSONL(content: string | WorkspaceFile): any[] | undefined + /** * Parses text as a YAML paylaod */ @@ -795,6 +1048,15 @@ interface Parsers { options?: { delimiter?: string; headers?: string[] } ): object[] | undefined + /** + * Parses a XLSX file and a given worksheet + * @param content + */ + XLSX( + content: WorkspaceFile, + options?: ParseXLSXOptions + ): Promise + /** * Parses a .env file * @param content @@ -829,6 +1091,23 @@ interface Parsers { options?: HTMLToTextOptions ): string + /** + * Convert HTML to markdown + * @param content html string or file + * @param options + */ + HTMLToMarkdown(content: string | WorkspaceFile): string + + /** + * Extracts the contents of a zip archive file + * @param file + * @param options + */ + unzip( + file: WorkspaceFile, + options?: ParseZipOptions + ): Promise + /** * Estimates the number of tokens in the content. * @param content content to tokenize @@ -857,7 +1136,21 @@ interface Parsers { * Parses and evaluates a math expression * @param expression math expression compatible with mathjs */ - math(expression: string): string | number | undefined + math(expression: string): Promise + + /** + * Using the JSON schema, validates the content + * @param schema JSON schema instance + * @param content object to validate + */ + validateJSON(schema: JSONSchema, content: any): JSONSchemaValidation + + /** + * Renders a mustache template + * @param text template text + * @param data data to render + */ + mustache(text: string | WorkspaceFile, data: Record): string } interface AICIGenOptions { @@ -931,7 +1224,84 @@ interface XML { * Parses an XML payload to an object * @param text */ - parse(text: string): any + parse(text: string, options?: XMLParseOptions): any +} + +interface HTMLTableToJSONOptions { + useFirstRowForHeadings?: boolean + headers?: HeaderRows + stripHtmlFromHeadings?: boolean + stripHtmlFromCells?: boolean + stripHtml?: boolean | null + forceIndexAsNumber?: boolean + countDuplicateHeadings?: boolean + ignoreColumns?: number[] | null + onlyColumns?: number[] | null + ignoreHiddenRows?: boolean + id?: string[] | null + headings?: string[] | null + containsClasses?: string[] | null + limitrows?: number | null +} + +interface HTML { + /** + * Converts all HTML tables to JSON. + * @param html + * @param options + */ + convertTablesToJSON( + html: string, + options?: HTMLTableToJSONOptions + ): object[][] + /** + * Converts HTML markup to plain text + * @param html + */ + convertToText(html: string): string + /** + * Converts HTML markup to markdown + * @param html + */ + convertToMarkdown(html: string): string +} + +interface MD { + /** + * Parses front matter from markdown + * @param text + */ + frontmatter(text: string, format?: "yaml" | "json" | "toml" | "text"): any + + /** + * Removes the front matter from the markdown text + */ + content(text: string): string + + /** + * Merges frontmatter with the existing text + * @param text + * @param frontmatter + * @param format + */ + updateFrontmatter( + text: string, + frontmatter: any, + format?: "yaml" | "json" + ): string +} + +interface JSONL { + /** + * Parses a JSONL string to an array of objects + * @param text + */ + parse(text: string): any[] + /** + * Converts objects to JSONL format + * @param objs + */ + stringify(objs: any[]): string } interface INI { @@ -967,60 +1337,145 @@ interface CSV { * @param csv * @param options */ - mardownify(csv: object[], options?: { headers?: string[] }): string + markdownify(csv: object[], options?: { headers?: string[] }): string } interface HighlightOptions { maxLength?: number } -interface SearchResult { +interface WebSearchResult { webPages: WorkspaceFile[] } +interface VectorSearchOptions extends EmbeddingsModelOptions { + /** + * Maximum number of embeddings to use + */ + topK?: number + /** + * Minimum similarity score + */ + minScore?: number +} + +interface FuzzSearchOptions { + /** + * Controls whether to perform prefix search. It can be a simple boolean, or a + * function. + * + * If a boolean is passed, prefix search is performed if true. + * + * If a function is passed, it is called upon search with a search term, the + * positional index of that search term in the tokenized search query, and the + * tokenized search query. + */ + prefix?: boolean + /** + * Controls whether to perform fuzzy search. It can be a simple boolean, or a + * number, or a function. + * + * If a boolean is given, fuzzy search with a default fuzziness parameter is + * performed if true. + * + * If a number higher or equal to 1 is given, fuzzy search is performed, with + * a maximum edit distance (Levenshtein) equal to the number. + * + * If a number between 0 and 1 is given, fuzzy search is performed within a + * maximum edit distance corresponding to that fraction of the term length, + * approximated to the nearest integer. For example, 0.2 would mean an edit + * distance of 20% of the term length, so 1 character in a 5-characters term. + * The calculated fuzziness value is limited by the `maxFuzzy` option, to + * prevent slowdown for very long queries. + */ + fuzzy?: boolean | number + /** + * Controls the maximum fuzziness when using a fractional fuzzy value. This is + * set to 6 by default. Very high edit distances usually don't produce + * meaningful results, but can excessively impact search performance. + */ + maxFuzzy?: number + /** + * Maximum number of results to return + */ + topK?: number +} + interface Retrieval { /** * Executers a Bing web search. Requires to configure the BING_SEARCH_API_KEY secret. * @param query */ - webSearch(query: string): Promise + webSearch(query: string): Promise /** - * Search for embeddings + * Search using similarity distance on embeddings */ - search( + vectorSearch( query: string, - files: (string | WorkspaceFile)[], - options?: { - /** - * Maximum number of embeddings to use - */ - topK?: number - /** - * Minimum similarity score - */ - minScore?: number - } - ): Promise<{ - files: WorkspaceFile[] - fragments: WorkspaceFile[] - }> + files: (string | WorkspaceFile) | (string | WorkspaceFile)[], + options?: VectorSearchOptions + ): Promise + + /** + * Performs a fuzzy search over the files + * @param query keywords to search + * @param files list of files + * @param options fuzzing configuration + */ + fuzzSearch( + query: string, + files: WorkspaceFile | WorkspaceFile[], + options?: FuzzSearchOptions + ): Promise } type FetchTextOptions = Omit -interface DefDataOptions extends ContextExpansionOptions { - format?: "json" | "yaml" | "csv" +interface DataFilter { + /** + * The keys to select from the object. + * If a key is prefixed with -, it will be removed from the object. + */ headers?: string[] + /** + * Selects the first N elements from the data + */ + sliceHead?: number + /** + * Selects the last N elements from the data + */ + sliceTail?: number + /** + * Selects the a random sample of N items in the collection. + */ + sliceSample?: number + + /** + * Removes items with duplicate values for the specified keys. + */ + distinct?: string[] +} + +interface DefDataOptions + extends Omit, + DataFilter { + /** + * Output format in the prompt. Defaults to markdownified CSV + */ + format?: "json" | "yaml" | "csv" } interface DefSchemaOptions { + /** + * Output format in the prompt. + */ format?: "typescript" | "json" | "yaml" } type ChatFunctionHandler = ( - args: { context: ChatFunctionCallContext } & Record -) => ChatFunctionCallOutput | Promise + args: { context: ToolCallContext } & Record +) => Awaitable interface WriteTextOptions extends ContextExpansionOptions { /** @@ -1029,21 +1484,96 @@ interface WriteTextOptions extends ContextExpansionOptions { assistant?: boolean } -type RunPromptGenerator = (ctx: RunPromptContext) => Awaitable +type PromptGenerator = (ctx: ChatGenerationContext) => Awaitable -// keep in sync with prompt_type.d.ts -interface RunPromptContext { +interface PromptGeneratorOptions extends ModelOptions { + /** + * Label for trace + */ + label?: string + + /** + * List of system prompts if any + */ + system?: SystemPromptId[] +} + +interface FileOutputOptions { + /** + * Schema identifier to validate the generated file + */ + schema?: string +} + +interface FileOutput { + pattern: string + description?: string + options?: FileOutputOptions +} + +interface ImportTemplateOptions {} + +interface ChatTurnGenerationContext { + importTemplate( + files: string | string[], + arguments?: Record< + string | number | boolean | (() => string | number | boolean) + >, + options?: ImportTemplateOptions + ): void writeText(body: Awaitable, options?: WriteTextOptions): void $(strings: TemplateStringsArray, ...args: any[]): void fence(body: StringLike, options?: FenceOptions): void - def(name: string, body: StringLike, options?: DefOptions): string - runPrompt( - generator: string | RunPromptGenerator, - options?: ModelOptions - ): Promise + def( + name: string, + body: string | WorkspaceFile | WorkspaceFile[] | ShellOutput | Fenced, + options?: DefOptions + ): string + defData( + name: string, + data: object[] | object, + options?: DefDataOptions + ): string + console: PromptGenerationConsole +} + +interface FileUpdate { + before: string + after: string + validation?: JSONSchemaValidation +} + +interface ChatGenerationContext extends ChatTurnGenerationContext { + defSchema( + name: string, + schema: JSONSchema, + options?: DefSchemaOptions + ): string + defImages( + files: StringLike | Buffer | Blob, + options?: DefImagesOptions + ): void + defTool( + tool: ToolCallback | AgenticToolCallback | AgenticToolProviderCallback + ): void + defTool( + name: string, + description: string, + parameters: PromptParametersSchema | JSONSchema, + fn: ChatFunctionHandler + ): void + defChatParticipant( + participant: ChatParticipantHandler, + options?: ChatParticipantOptions + ): void + defFileOutput( + pattern: string, + description?: string, + options?: FileOutputOptions + ): void } -interface PromptGenerationOutput { +interface GenerationOutput { /** * LLM output. */ @@ -1062,7 +1592,7 @@ interface PromptGenerationOutput { /** * A map of file updates */ - fileEdits: Record + fileEdits: Record /** * Generated variables, typically from AICI.gen @@ -1073,6 +1603,16 @@ interface PromptGenerationOutput { * Generated annotations */ annotations: Diagnostic[] + + /** + * Schema definition used in the generation + */ + schemas: Record + + /** + * Output as JSON if parsable + */ + json?: any } type Point = { @@ -1184,28 +1724,547 @@ interface QueryCapture { node: SyntaxNode } -interface PromptContext extends RunPromptContext { +interface ShellOptions { + cwd?: string + stdin?: string + /** + * Process timeout in milliseconds, default is 60s + */ + timeout?: number + /** + * trace label + */ + label?: string +} + +interface ShellOutput { + stdout?: string + stderr?: string + exitCode: number + failed: boolean +} + +interface BrowserOptions { + /** + * Browser engine for this page. Defaults to chromium + * + */ + browser?: "chromium" | "firefox" | "webkit" +} + +interface BrowseSessionOptions extends BrowserOptions, TimeoutOptions { + /** + * Creates a new context for the browser session + */ + incognito?: boolean + + /** + * Base url to use for relative urls + * @link https://playwright.dev/docs/api/class-browser#browser-new-context-option-base-url + */ + baseUrl?: string + + /** + * Toggles bypassing page's Content-Security-Policy. Defaults to false. + * @link https://playwright.dev/docs/api/class-browser#browser-new-context-option-bypass-csp + */ + bypassCSP?: boolean + + /** + * Whether to ignore HTTPS errors when sending network requests. Defaults to false. + * @link https://playwright.dev/docs/api/class-browser#browser-new-context-option-ignore-https-errors + */ + ignoreHTTPSErrors?: boolean + + /** + * Whether or not to enable JavaScript in the context. Defaults to true. + * @link https://playwright.dev/docs/api/class-browser#browser-new-context-option-java-script-enabled + */ + javaScriptEnabled?: boolean +} + +interface TimeoutOptions { + /** + * Maximum time in milliseconds. Default to no timeout + */ + timeout?: number +} + +interface ScreenshotOptions extends TimeoutOptions { + quality?: number + scale?: "css" | "device" + type?: "png" | "jpeg" + style?: string +} + +interface PageScreenshotOptions extends ScreenshotOptions { + fullPage?: boolean + omitBackground?: boolean + clip?: { + x: number + y: number + width: number + height: number + } +} + +interface BrowserLocatorSelector { + /** + * Allows locating elements by their ARIA role, ARIA attributes and accessible name. + * @param role + * @param options + */ + getByRole( + role: + | "alert" + | "alertdialog" + | "application" + | "article" + | "banner" + | "blockquote" + | "button" + | "caption" + | "cell" + | "checkbox" + | "code" + | "columnheader" + | "combobox" + | "complementary" + | "contentinfo" + | "definition" + | "deletion" + | "dialog" + | "directory" + | "document" + | "emphasis" + | "feed" + | "figure" + | "form" + | "generic" + | "grid" + | "gridcell" + | "group" + | "heading" + | "img" + | "insertion" + | "link" + | "list" + | "listbox" + | "listitem" + | "log" + | "main" + | "marquee" + | "math" + | "meter" + | "menu" + | "menubar" + | "menuitem" + | "menuitemcheckbox" + | "menuitemradio" + | "navigation" + | "none" + | "note" + | "option" + | "paragraph" + | "presentation" + | "progressbar" + | "radio" + | "radiogroup" + | "region" + | "row" + | "rowgroup" + | "rowheader" + | "scrollbar" + | "search" + | "searchbox" + | "separator" + | "slider" + | "spinbutton" + | "status" + | "strong" + | "subscript" + | "superscript" + | "switch" + | "tab" + | "table" + | "tablist" + | "tabpanel" + | "term" + | "textbox" + | "time" + | "timer" + | "toolbar" + | "tooltip" + | "tree" + | "treegrid" + | "treeitem", + options?: { + checked?: boolean + disabled?: boolean + exact?: boolean + expanded?: boolean + name?: string + selected?: boolean + } & TimeoutOptions + ): Locator + + /** + * Allows locating input elements by the text of the associated