From 822c8bef6988fdbbd6adaed689407fa420b9d331 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 21 Dec 2024 10:49:53 +0000 Subject: [PATCH 01/10] built-in zod support --- packages/cli/package.json | 5 ++-- packages/cli/src/runtime.ts | 23 +++++++++++++++++-- .../sample/genaisrc/cityinfo-zod.genai.mts | 6 ++--- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/packages/cli/package.json b/packages/cli/package.json index 5fe00442cc..0b2f61c279 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -67,7 +67,8 @@ "turndown-plugin-gfm": "^1.0.2", "typescript": "5.7.2", "vectra": "^0.9.0", - "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz" + "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz", + "zod-to-json-schema": "^3.24.1" }, "optionalDependencies": { "@huggingface/transformers": "^3.2.1", @@ -112,7 +113,7 @@ "zx": "^8.2.4" }, "scripts": { - "compile:runtime": "tsc src/runtime.ts --skipLibCheck --outDir built --declaration --target es2020 --moduleResolution node && mv built/runtime.js built/runtime.mjs", + "compile:runtime": "tsc src/runtime.ts --skipLibCheck --outDir built --declaration --target es2020 --moduleResolution node --module esnext && mv built/runtime.js built/runtime.mjs", "compile:api": "esbuild src/api.ts --outfile=built/api.mjs", "compile:cli": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:turndown-plugin-gfm --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest --external:skia-canvas --external:@huggingface/transformers --external:@modelcontextprotocol/sdk --external:@anthropic-ai/sdk --external:@anthropic-ai/bedrock-sdk --external:es-toolkit && node ../../scripts/patch-cli.mjs", "compile": "yarn compile:api && yarn compile:runtime && yarn compile:cli", diff --git a/packages/cli/src/runtime.ts b/packages/cli/src/runtime.ts index 6d7ad17e3d..68684dc149 100644 --- a/packages/cli/src/runtime.ts +++ b/packages/cli/src/runtime.ts @@ -1,9 +1,28 @@ /** * GenAIScript supporting runtime */ -import { delay as esDelay } from "es-toolkit" +import { delay as _delay } from "es-toolkit" +import { zodToJsonSchema as _zodToJsonSchema } from "zod-to-json-schema" /** * A helper function to delay the execution of the script */ -export const delay: (ms: number) => Promise = esDelay +export const delay: (ms: number) => Promise = _delay + +/** + * Converts a Zod schema to a JSON schema + * @param z + * @param options + * @returns + */ +export function zodToJsonSchema(z: any, options?: object): any { + const definitions = _zodToJsonSchema(z, { + name: "schema", + target: "openAi", + ...(options || {}), + }).definitions + console.log(JSON.stringify(definitions, null, 2)) + const keys = Object.keys(definitions) + const schema = definitions[keys[0]] + return schema +} diff --git a/packages/sample/genaisrc/cityinfo-zod.genai.mts b/packages/sample/genaisrc/cityinfo-zod.genai.mts index 603d5d4540..ddfb9b04af 100644 --- a/packages/sample/genaisrc/cityinfo-zod.genai.mts +++ b/packages/sample/genaisrc/cityinfo-zod.genai.mts @@ -6,7 +6,7 @@ script({ def("CITIES", env.files) import { z } from "zod" -import { zodToJsonSchema } from "zod-to-json-schema" +import { zodToJsonSchema } from "genaiscript/runtime" // create schema using zod const CitySchema = z.array( z.object({ @@ -16,9 +16,7 @@ const CitySchema = z.array( }) ) // JSON schema to constrain the output of the tool. -const schema = defSchema("CITY_SCHEMA", zodToJsonSchema(CitySchema, "citySchema").definitions[ - "citySchema" -] as JSONSchemaArray) +const schema = defSchema("CITY_SCHEMA", zodToJsonSchema(CitySchema)) // the task` $`Answer with the information of the cities in the CITIES data set, compliant with ${schema}.` From 5da37e077da550ee8385ee7b228b8f9cb20d4c58 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 21 Dec 2024 10:54:00 +0000 Subject: [PATCH 02/10] towards builtin zod support --- packages/cli/package.json | 1 + packages/cli/src/runtime.ts | 8 +++++++- packages/sample/genaisrc/cityinfo-zod.genai.mts | 3 +-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/packages/cli/package.json b/packages/cli/package.json index 0b2f61c279..e33e81fc1a 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -68,6 +68,7 @@ "typescript": "5.7.2", "vectra": "^0.9.0", "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz", + "zod": "^3.24.1", "zod-to-json-schema": "^3.24.1" }, "optionalDependencies": { diff --git a/packages/cli/src/runtime.ts b/packages/cli/src/runtime.ts index 68684dc149..b59dcf1536 100644 --- a/packages/cli/src/runtime.ts +++ b/packages/cli/src/runtime.ts @@ -3,19 +3,25 @@ */ import { delay as _delay } from "es-toolkit" import { zodToJsonSchema as _zodToJsonSchema } from "zod-to-json-schema" +import { z as zod } from "zod" /** * A helper function to delay the execution of the script */ export const delay: (ms: number) => Promise = _delay +/** + * Zod schema generator + */ +export const z = zod + /** * Converts a Zod schema to a JSON schema * @param z * @param options * @returns */ -export function zodToJsonSchema(z: any, options?: object): any { +export function zodToJsonSchema(z: zod.ZodType, options?: object): any { const definitions = _zodToJsonSchema(z, { name: "schema", target: "openAi", diff --git a/packages/sample/genaisrc/cityinfo-zod.genai.mts b/packages/sample/genaisrc/cityinfo-zod.genai.mts index ddfb9b04af..d8abe8b4ea 100644 --- a/packages/sample/genaisrc/cityinfo-zod.genai.mts +++ b/packages/sample/genaisrc/cityinfo-zod.genai.mts @@ -5,8 +5,7 @@ script({ // the data to analyze def("CITIES", env.files) -import { z } from "zod" -import { zodToJsonSchema } from "genaiscript/runtime" +import { z, zodToJsonSchema } from "genaiscript/runtime" // create schema using zod const CitySchema = z.array( z.object({ From a07d0da1986e333c4f632a51eaf624c597462f88 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 21 Dec 2024 11:05:59 +0000 Subject: [PATCH 03/10] =?UTF-8?q?feat:=20=E2=9C=A8=20add=20Zod=20to=20JSON?= =?UTF-8?q?=20schema=20conversion=20helpers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/cli/package.json | 4 +-- packages/cli/src/runtime.ts | 19 -------------- packages/core/src/promptdom.ts | 3 +++ packages/core/src/types/prompt_template.d.ts | 2 +- packages/core/src/types/prompt_type.d.ts | 2 +- packages/core/src/zod.ts | 26 +++++++++++++++++++ .../sample/genaisrc/cityinfo-zod.genai.mts | 4 +-- 7 files changed, 35 insertions(+), 25 deletions(-) create mode 100644 packages/core/src/zod.ts diff --git a/packages/cli/package.json b/packages/cli/package.json index e33e81fc1a..ba9e1fee1a 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -116,9 +116,9 @@ "scripts": { "compile:runtime": "tsc src/runtime.ts --skipLibCheck --outDir built --declaration --target es2020 --moduleResolution node --module esnext && mv built/runtime.js built/runtime.mjs", "compile:api": "esbuild src/api.ts --outfile=built/api.mjs", - "compile:cli": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:turndown-plugin-gfm --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest --external:skia-canvas --external:@huggingface/transformers --external:@modelcontextprotocol/sdk --external:@anthropic-ai/sdk --external:@anthropic-ai/bedrock-sdk --external:es-toolkit && node ../../scripts/patch-cli.mjs", + "compile:cli": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:turndown-plugin-gfm --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest --external:skia-canvas --external:@huggingface/transformers --external:@modelcontextprotocol/sdk --external:@anthropic-ai/sdk --external:@anthropic-ai/bedrock-sdk --external:es-toolkit --external:zod --external:zod-to-json-schema && node ../../scripts/patch-cli.mjs", "compile": "yarn compile:api && yarn compile:runtime && yarn compile:cli", - "compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:turndown-plugin-gfm --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest --external:skia-canvas --external:@huggingface/transformers --external:@modelcontextprotocol/sdk --external:@anthropic-ai/sdk --external:@anthropic-ai/bedrock-sdk --external:es-toolkit", + "compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:turndown-plugin-gfm --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest --external:skia-canvas --external:@huggingface/transformers --external:@modelcontextprotocol/sdk --external:@anthropic-ai/sdk --external:@anthropic-ai/bedrock-sdk --external:es-toolkit --external:zod --external:zod-to-json-schema", "postcompile": "node built/genaiscript.cjs info help > ../../docs/src/content/docs/reference/cli/commands.md", "vis:treemap": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.treemap.html", "vis:network": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.network.html --template network", diff --git a/packages/cli/src/runtime.ts b/packages/cli/src/runtime.ts index b59dcf1536..6c4515e3ed 100644 --- a/packages/cli/src/runtime.ts +++ b/packages/cli/src/runtime.ts @@ -2,7 +2,6 @@ * GenAIScript supporting runtime */ import { delay as _delay } from "es-toolkit" -import { zodToJsonSchema as _zodToJsonSchema } from "zod-to-json-schema" import { z as zod } from "zod" /** @@ -14,21 +13,3 @@ export const delay: (ms: number) => Promise = _delay * Zod schema generator */ export const z = zod - -/** - * Converts a Zod schema to a JSON schema - * @param z - * @param options - * @returns - */ -export function zodToJsonSchema(z: zod.ZodType, options?: object): any { - const definitions = _zodToJsonSchema(z, { - name: "schema", - target: "openAi", - ...(options || {}), - }).definitions - console.log(JSON.stringify(definitions, null, 2)) - const keys = Object.keys(definitions) - const schema = definitions[keys[0]] - return schema -} diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index 10eefbc849..0456148a85 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -41,6 +41,7 @@ import { jinjaRenderChatMessage } from "./jinja" import { runtimeHost } from "./host" import { hash } from "./crypto" import { startMcpServer } from "./mcp" +import { tryZodToJsonSchema } from "./zod" // Definition of the PromptNode interface which is an essential part of the code structure. export interface PromptNode extends ContextExpansionOptions { @@ -366,6 +367,8 @@ export function createSchemaNode( ): PromptSchemaNode { assert(!!name) assert(value !== undefined) + // auto zod conversion + value = tryZodToJsonSchema(value) ?? value return { type: "schema", name, value, options } } diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index c4dfbbb17a..6ff01de749 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -2607,7 +2607,7 @@ type McpServersConfig = Record> interface ChatGenerationContext extends ChatTurnGenerationContext { defSchema( name: string, - schema: JSONSchema, + schema: JSONSchema | object, options?: DefSchemaOptions ): string defImages( diff --git a/packages/core/src/types/prompt_type.d.ts b/packages/core/src/types/prompt_type.d.ts index 97b4ef7097..5297701493 100644 --- a/packages/core/src/types/prompt_type.d.ts +++ b/packages/core/src/types/prompt_type.d.ts @@ -239,7 +239,7 @@ declare function fetchText( */ declare function defSchema( name: string, - schema: JSONSchema, + schema: JSONSchema | object, options?: DefSchemaOptions ): string diff --git a/packages/core/src/zod.ts b/packages/core/src/zod.ts new file mode 100644 index 0000000000..49d7cec29a --- /dev/null +++ b/packages/core/src/zod.ts @@ -0,0 +1,26 @@ +import { zodToJsonSchema as _zodToJsonSchema } from "zod-to-json-schema" +import { ZodType } from "zod" + +/** + * Converts a Zod schema to a JSON schema + * @param z + * @param options + * @returns + */ +export function tryZodToJsonSchema(z: object, options?: object): JSONSchema { + if (!z || !(z instanceof ZodType)) { + return undefined + } + try { + const definitions = _zodToJsonSchema(z, { + name: "schema", + target: "openAi", + ...(options || {}), + }).definitions + const keys = Object.keys(definitions) + const schema = definitions[keys[0]] + return structuredClone(schema) as JSONSchema + } catch (e) { + return undefined + } +} diff --git a/packages/sample/genaisrc/cityinfo-zod.genai.mts b/packages/sample/genaisrc/cityinfo-zod.genai.mts index d8abe8b4ea..1a7575e7a8 100644 --- a/packages/sample/genaisrc/cityinfo-zod.genai.mts +++ b/packages/sample/genaisrc/cityinfo-zod.genai.mts @@ -5,7 +5,7 @@ script({ // the data to analyze def("CITIES", env.files) -import { z, zodToJsonSchema } from "genaiscript/runtime" +import { z } from "genaiscript/runtime" // create schema using zod const CitySchema = z.array( z.object({ @@ -15,7 +15,7 @@ const CitySchema = z.array( }) ) // JSON schema to constrain the output of the tool. -const schema = defSchema("CITY_SCHEMA", zodToJsonSchema(CitySchema)) +const schema = defSchema("CITY_SCHEMA", CitySchema) // the task` $`Answer with the information of the cities in the CITIES data set, compliant with ${schema}.` From e8cd8fa420c000be5ec0d6f967fb9e270f89c594 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 21 Dec 2024 11:16:57 +0000 Subject: [PATCH 04/10] =?UTF-8?q?fix:=20=F0=9F=90=9B=20update=20Zod=20type?= =?UTF-8?q?=20check=20to=20handle=20missing=20=5Fdef=20method?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/core/src/zod.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/core/src/zod.ts b/packages/core/src/zod.ts index 49d7cec29a..9bb7450cbf 100644 --- a/packages/core/src/zod.ts +++ b/packages/core/src/zod.ts @@ -8,11 +8,11 @@ import { ZodType } from "zod" * @returns */ export function tryZodToJsonSchema(z: object, options?: object): JSONSchema { - if (!z || !(z instanceof ZodType)) { - return undefined - } + if (!z) return undefined + // instanceof not working, test for some existing methoid + if (!(z as ZodType)._def) return undefined try { - const definitions = _zodToJsonSchema(z, { + const definitions = _zodToJsonSchema(z as ZodType, { name: "schema", target: "openAi", ...(options || {}), From 66c5fdcdb680808dc95abc5d28f1a1122af6b97d Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 21 Dec 2024 13:47:11 +0000 Subject: [PATCH 05/10] =?UTF-8?q?refactor:=20=E2=99=BB=EF=B8=8F=20simplify?= =?UTF-8?q?=20type=20usage=20in=20tryZodToJsonSchema?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/core/src/zod.ts | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/packages/core/src/zod.ts b/packages/core/src/zod.ts index 9bb7450cbf..68f2a94da9 100644 --- a/packages/core/src/zod.ts +++ b/packages/core/src/zod.ts @@ -1,5 +1,4 @@ import { zodToJsonSchema as _zodToJsonSchema } from "zod-to-json-schema" -import { ZodType } from "zod" /** * Converts a Zod schema to a JSON schema @@ -10,15 +9,12 @@ import { ZodType } from "zod" export function tryZodToJsonSchema(z: object, options?: object): JSONSchema { if (!z) return undefined // instanceof not working, test for some existing methoid - if (!(z as ZodType)._def) return undefined + if (!(z as any)._def) return undefined try { - const definitions = _zodToJsonSchema(z as ZodType, { - name: "schema", + const schema = _zodToJsonSchema(z as any, { target: "openAi", ...(options || {}), - }).definitions - const keys = Object.keys(definitions) - const schema = definitions[keys[0]] + }) return structuredClone(schema) as JSONSchema } catch (e) { return undefined From a358674d5dd5f263ab42cef072c3c717344966f9 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 21 Dec 2024 13:50:36 +0000 Subject: [PATCH 06/10] =?UTF-8?q?feat:=20=E2=9C=A8=20add=20native=20Zod=20?= =?UTF-8?q?schema=20support=20in=20documentation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../docs/reference/scripts/schemas.mdx | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/src/content/docs/reference/scripts/schemas.mdx b/docs/src/content/docs/reference/scripts/schemas.mdx index a341a98cac..36c65a4e90 100644 --- a/docs/src/content/docs/reference/scripts/schemas.mdx +++ b/docs/src/content/docs/reference/scripts/schemas.mdx @@ -100,7 +100,27 @@ File ./data.json: {/* genaiscript output end */} +### Native zod support +A [Zod](https://zod.dev/) type can be passed in `defSchema` and it will be automatically converted to JSON schema. +The GenAIScript also exports the `z` object from Zod for convenience. + +```js +// import from genaiscript +import { z } from "genaiscript/runtime" +// or directly from zod +// import { z } from "zod" +// create schema using zod +const CitySchema = z.array( + z.object({ + name: z.string(), + population: z.number(), + url: z.string(), + }) +) +// JSON schema to constrain the output of the tool. +const schema = defSchema("CITY_SCHEMA", CitySchema) +``` ### Prompt encoding From f5090787e9b71b0ba71b9de9665f516f5dfe9648 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 21 Dec 2024 13:50:58 +0000 Subject: [PATCH 07/10] =?UTF-8?q?style:=20=F0=9F=8E=A8=20format=20and=20im?= =?UTF-8?q?prove=20schema=20documentation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../docs/reference/scripts/schemas.mdx | 137 +++++++++--------- 1 file changed, 68 insertions(+), 69 deletions(-) diff --git a/docs/src/content/docs/reference/scripts/schemas.mdx b/docs/src/content/docs/reference/scripts/schemas.mdx index 36c65a4e90..473d6a9f7f 100644 --- a/docs/src/content/docs/reference/scripts/schemas.mdx +++ b/docs/src/content/docs/reference/scripts/schemas.mdx @@ -1,17 +1,16 @@ --- title: Data Schemas sidebar: - order: 6 + order: 6 description: Learn how to define and use data schemas for structured output in - JSON/YAML with LLM, including validation and repair techniques. + JSON/YAML with LLM, including validation and repair techniques. keywords: data schemas, JSON schema, YAML validation, LLM structured output, - schema repair + schema repair genaiscript: - model: openai:gpt-3.5-turbo - + model: openai:gpt-3.5-turbo --- -import { Card } from '@astrojs/starlight/components'; +import { Card } from "@astrojs/starlight/components" It is possible to force the LLM to generate data that conforms to a specific schema. This technique works reasonably well and GenAIScript also provides automatic validation "just in case". @@ -32,11 +31,17 @@ const schema = defSchema("CITY_SCHEMA", { description: "A city with population and elevation information.", properties: { name: { type: "string", description: "The name of the city." }, - population: { type: "number", description: "The population of the city." }, - url: { type: "string", description: "The URL of the city's Wikipedia page." } + population: { + type: "number", + description: "The population of the city.", + }, + url: { + type: "string", + description: "The URL of the city's Wikipedia page.", + }, }, - required: ["name", "population", "url"] - } + required: ["name", "population", "url"], + }, }) $`Generate data using JSON compliant with ${schema}.` @@ -47,9 +52,9 @@ $`Generate data using JSON compliant with ${schema}.`
👤 user - ````markdown wrap CITY_SCHEMA: + ```typescript-schema // A list of cities with population and elevation information. type CITY_SCHEMA = Array<{ @@ -61,41 +66,39 @@ type CITY_SCHEMA = Array<{ url: string, }> ``` + Generate data using JSON compliant with CITY_SCHEMA. ```` -
-
🤖 assistant - ````markdown wrap File ./data.json: + ```json schema=CITY_SCHEMA [ - { - "name": "New York", - "population": 8398748, - "url": "https://en.wikipedia.org/wiki/New_York_City" - }, - { - "name": "Los Angeles", - "population": 3990456, - "url": "https://en.wikipedia.org/wiki/Los_Angeles" - }, - { - "name": "Chicago", - "population": 2705994, - "url": "https://en.wikipedia.org/wiki/Chicago" - } + { + "name": "New York", + "population": 8398748, + "url": "https://en.wikipedia.org/wiki/New_York_City" + }, + { + "name": "Los Angeles", + "population": 3990456, + "url": "https://en.wikipedia.org/wiki/Los_Angeles" + }, + { + "name": "Chicago", + "population": 2705994, + "url": "https://en.wikipedia.org/wiki/Chicago" + } ] ``` ```` -
{/* genaiscript output end */} @@ -131,12 +134,12 @@ from TypeChat, the schema is converted TypeScript types before being injected in // A list of cities with population and elevation information. type CITY_SCHEMA = Array<{ // The name of the city. - name: string, + name: string // The population of the city. - population: number, + population: number // The URL of the city's Wikipedia page. - url: string, - }> + url: string +}> ``` You can change this behavior by using the `{ format: "json" }` option. @@ -154,50 +157,46 @@ in the output folder as well.
schema CITY_SCHEMA -- source: +- source: ```json { - "type": "array", - "description": "A list of cities with population and elevation information.", - "items": { - "type": "object", - "description": "A city with population and elevation information.", - "properties": { - "name": { - "type": "string", - "description": "The name of the city." - }, - "population": { - "type": "number", - "description": "The population of the city." - }, - "url": { - "type": "string", - "description": "The URL of the city's Wikipedia page." - } - }, - "required": [ - "name", - "population", - "url" - ] - } + "type": "array", + "description": "A list of cities with population and elevation information.", + "items": { + "type": "object", + "description": "A city with population and elevation information.", + "properties": { + "name": { + "type": "string", + "description": "The name of the city." + }, + "population": { + "type": "number", + "description": "The population of the city." + }, + "url": { + "type": "string", + "description": "The URL of the city's Wikipedia page." + } + }, + "required": ["name", "population", "url"] + } } ``` -- prompt (rendered as typescript): + +- prompt (rendered as typescript): ```ts // A list of cities with population and elevation information. type CITY_SCHEMA = Array<{ // The name of the city. - name: string, + name: string // The population of the city. - population: number, + population: number // The URL of the city's Wikipedia page. - url: string, - }> - + url: string +}> ```
@@ -219,7 +218,7 @@ GenAIScript automatically validates the payload against the schema. :::tip -Not all data formats are equal! Some data formats like JSON introduce ambiguity +Not all data formats are equal! Some data formats like JSON introduce ambiguity and can confuse the LLM. [Read more...](https://betterprogramming.pub/yaml-vs-json-which-is-more-efficient-for-language-models-5bc11dd0f6df). @@ -227,7 +226,7 @@ and can confuse the LLM. ## Repair -GenAIScript will automatically try to repair the data by issues additional messages +GenAIScript will automatically try to repair the data by issues additional messages back to the LLM with the parsing output. ## Runtime Validation @@ -236,4 +235,4 @@ Use `parsers.validateJSON` to validate JSON when running the script. ```js const validation = parsers.validateJSON(schema, json) -``` \ No newline at end of file +``` From bf5e8daa6bb4e951612ff08eccc06fda55809e75 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 21 Dec 2024 13:52:21 +0000 Subject: [PATCH 08/10] =?UTF-8?q?refactor:=20=E2=99=BB=EF=B8=8F=20rename?= =?UTF-8?q?=20and=20update=20script=20with=20test=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sample/genaisrc/{cityinfo-zod.genai.mts => zod.genai.mts} | 3 +++ 1 file changed, 3 insertions(+) rename packages/sample/genaisrc/{cityinfo-zod.genai.mts => zod.genai.mts} (90%) diff --git a/packages/sample/genaisrc/cityinfo-zod.genai.mts b/packages/sample/genaisrc/zod.genai.mts similarity index 90% rename from packages/sample/genaisrc/cityinfo-zod.genai.mts rename to packages/sample/genaisrc/zod.genai.mts index 1a7575e7a8..a63e4d6fd2 100644 --- a/packages/sample/genaisrc/cityinfo-zod.genai.mts +++ b/packages/sample/genaisrc/zod.genai.mts @@ -1,5 +1,8 @@ script({ files: ["./src/cities.md"], + tests: { + files: ["./src/cities.md"], + }, }) // the data to analyze From 7317c04058d5e7fdaefa9a38cd0e9cb89d3d11c4 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 21 Dec 2024 14:59:49 +0000 Subject: [PATCH 09/10] =?UTF-8?q?docs:=20=E2=9C=8F=EF=B8=8F=20add=20note?= =?UTF-8?q?=20about=20builtin=20Zod=20support=20in=20schemas?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- packages/cli/README.md | 2 +- packages/vscode/README.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1eedc0dcc9..35dd9df720 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ $`Analyze FILE and extract data to JSON using the ${schema} schema.` ### 📋 Data Schemas -Define, validate, and repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas). +Define, validate, and repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas). Zod support builtin. ```js const data = defSchema("MY_DATA", { type: "array", items: { ... } }) diff --git a/packages/cli/README.md b/packages/cli/README.md index 5dc1df8570..c249ab1d4e 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -94,7 +94,7 @@ $`Analyze FILE and extract data to JSON using the ${schema} schema.` ### 📋 Data Schemas -Define, validate, and repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas). +Define, validate, and repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas). Zod support builtin. ```js const data = defSchema("MY_DATA", { type: "array", items: { ... } }) diff --git a/packages/vscode/README.md b/packages/vscode/README.md index c649e8b11f..2e9fd84dde 100644 --- a/packages/vscode/README.md +++ b/packages/vscode/README.md @@ -24,7 +24,7 @@ $`Analyze FILE and - 📁 Scripts are [files](https://microsoft.github.io/genaiscript/reference/scripts/)! They can be versioned, shared, forked, ... -- 📊 Define, validate, repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas). +- 📊 Define, validate, repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas). Zod support builtin. ```js wrap const data = defSchema("MY_DATA", From 901527aa22cf24b667c13a005cf55da8dcd0db10 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 21 Dec 2024 15:17:49 +0000 Subject: [PATCH 10/10] =?UTF-8?q?feat:=20=E2=9C=A8=20add=20support=20for?= =?UTF-8?q?=20Zod=20types=20in=20schema=20utilities?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/core/src/promptdom.ts | 4 ++-- packages/core/src/types/prompt_template.d.ts | 4 +++- packages/core/src/types/prompt_type.d.ts | 2 +- packages/core/src/zod.ts | 23 +++++++++----------- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index 0456148a85..71d9f5f63e 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -362,13 +362,13 @@ export function createImageNode( // Function to create a schema node. export function createSchemaNode( name: string, - value: JSONSchema, + value: JSONSchema | ZodTypeLike, options?: DefSchemaOptions ): PromptSchemaNode { assert(!!name) assert(value !== undefined) // auto zod conversion - value = tryZodToJsonSchema(value) ?? value + value = tryZodToJsonSchema(value as ZodTypeLike) ?? (value as JSONSchema) return { type: "schema", name, value, options } } diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index 6ff01de749..f6481a5778 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -2604,10 +2604,12 @@ interface McpServerConfig { type McpServersConfig = Record> +type ZodTypeLike = { _def: any, safeParse: any, refine: any } + interface ChatGenerationContext extends ChatTurnGenerationContext { defSchema( name: string, - schema: JSONSchema | object, + schema: JSONSchema | ZodTypeLike, options?: DefSchemaOptions ): string defImages( diff --git a/packages/core/src/types/prompt_type.d.ts b/packages/core/src/types/prompt_type.d.ts index 5297701493..56c7042f37 100644 --- a/packages/core/src/types/prompt_type.d.ts +++ b/packages/core/src/types/prompt_type.d.ts @@ -239,7 +239,7 @@ declare function fetchText( */ declare function defSchema( name: string, - schema: JSONSchema | object, + schema: JSONSchema | ZodTypeLike, options?: DefSchemaOptions ): string diff --git a/packages/core/src/zod.ts b/packages/core/src/zod.ts index 68f2a94da9..f3fe5a600d 100644 --- a/packages/core/src/zod.ts +++ b/packages/core/src/zod.ts @@ -6,17 +6,14 @@ import { zodToJsonSchema as _zodToJsonSchema } from "zod-to-json-schema" * @param options * @returns */ -export function tryZodToJsonSchema(z: object, options?: object): JSONSchema { - if (!z) return undefined - // instanceof not working, test for some existing methoid - if (!(z as any)._def) return undefined - try { - const schema = _zodToJsonSchema(z as any, { - target: "openAi", - ...(options || {}), - }) - return structuredClone(schema) as JSONSchema - } catch (e) { - return undefined - } +export function tryZodToJsonSchema( + z: ZodTypeLike, + options?: object +): JSONSchema { + if (!z || !z._def || !z.refine || !z.safeParse) return undefined + const schema = _zodToJsonSchema(z as any, { + target: "openAi", + ...(options || {}), + }) + return structuredClone(schema) as JSONSchema }