diff --git a/README.md b/README.md
index 1eedc0dcc9..35dd9df720 100644
--- a/README.md
+++ b/README.md
@@ -90,7 +90,7 @@ $`Analyze FILE and extract data to JSON using the ${schema} schema.`
### 📋 Data Schemas
-Define, validate, and repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas).
+Define, validate, and repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas). Zod support builtin.
```js
const data = defSchema("MY_DATA", { type: "array", items: { ... } })
diff --git a/docs/src/content/docs/reference/scripts/schemas.mdx b/docs/src/content/docs/reference/scripts/schemas.mdx
index a341a98cac..473d6a9f7f 100644
--- a/docs/src/content/docs/reference/scripts/schemas.mdx
+++ b/docs/src/content/docs/reference/scripts/schemas.mdx
@@ -1,17 +1,16 @@
---
title: Data Schemas
sidebar:
- order: 6
+ order: 6
description: Learn how to define and use data schemas for structured output in
- JSON/YAML with LLM, including validation and repair techniques.
+ JSON/YAML with LLM, including validation and repair techniques.
keywords: data schemas, JSON schema, YAML validation, LLM structured output,
- schema repair
+ schema repair
genaiscript:
- model: openai:gpt-3.5-turbo
-
+ model: openai:gpt-3.5-turbo
---
-import { Card } from '@astrojs/starlight/components';
+import { Card } from "@astrojs/starlight/components"
It is possible to force the LLM to generate data that conforms to a specific schema.
This technique works reasonably well and GenAIScript also provides automatic validation "just in case".
@@ -32,11 +31,17 @@ const schema = defSchema("CITY_SCHEMA", {
description: "A city with population and elevation information.",
properties: {
name: { type: "string", description: "The name of the city." },
- population: { type: "number", description: "The population of the city." },
- url: { type: "string", description: "The URL of the city's Wikipedia page." }
+ population: {
+ type: "number",
+ description: "The population of the city.",
+ },
+ url: {
+ type: "string",
+ description: "The URL of the city's Wikipedia page.",
+ },
},
- required: ["name", "population", "url"]
- }
+ required: ["name", "population", "url"],
+ },
})
$`Generate data using JSON compliant with ${schema}.`
@@ -47,9 +52,9 @@ $`Generate data using JSON compliant with ${schema}.`
👤 user
-
````markdown wrap
CITY_SCHEMA:
+
```typescript-schema
// A list of cities with population and elevation information.
type CITY_SCHEMA = Array<{
@@ -61,46 +66,64 @@ type CITY_SCHEMA = Array<{
url: string,
}>
```
+
Generate data using JSON compliant with CITY_SCHEMA.
````
-
-
🤖 assistant
-
````markdown wrap
File ./data.json:
+
```json schema=CITY_SCHEMA
[
- {
- "name": "New York",
- "population": 8398748,
- "url": "https://en.wikipedia.org/wiki/New_York_City"
- },
- {
- "name": "Los Angeles",
- "population": 3990456,
- "url": "https://en.wikipedia.org/wiki/Los_Angeles"
- },
- {
- "name": "Chicago",
- "population": 2705994,
- "url": "https://en.wikipedia.org/wiki/Chicago"
- }
+ {
+ "name": "New York",
+ "population": 8398748,
+ "url": "https://en.wikipedia.org/wiki/New_York_City"
+ },
+ {
+ "name": "Los Angeles",
+ "population": 3990456,
+ "url": "https://en.wikipedia.org/wiki/Los_Angeles"
+ },
+ {
+ "name": "Chicago",
+ "population": 2705994,
+ "url": "https://en.wikipedia.org/wiki/Chicago"
+ }
]
```
````
-
{/* genaiscript output end */}
+### Native zod support
+
+A [Zod](https://zod.dev/) type can be passed in `defSchema` and it will be automatically converted to JSON schema.
+The GenAIScript also exports the `z` object from Zod for convenience.
+```js
+// import from genaiscript
+import { z } from "genaiscript/runtime"
+// or directly from zod
+// import { z } from "zod"
+// create schema using zod
+const CitySchema = z.array(
+ z.object({
+ name: z.string(),
+ population: z.number(),
+ url: z.string(),
+ })
+)
+// JSON schema to constrain the output of the tool.
+const schema = defSchema("CITY_SCHEMA", CitySchema)
+```
### Prompt encoding
@@ -111,12 +134,12 @@ from TypeChat, the schema is converted TypeScript types before being injected in
// A list of cities with population and elevation information.
type CITY_SCHEMA = Array<{
// The name of the city.
- name: string,
+ name: string
// The population of the city.
- population: number,
+ population: number
// The URL of the city's Wikipedia page.
- url: string,
- }>
+ url: string
+}>
```
You can change this behavior by using the `{ format: "json" }` option.
@@ -134,50 +157,46 @@ in the output folder as well.
schema CITY_SCHEMA
-- source:
+- source:
```json
{
- "type": "array",
- "description": "A list of cities with population and elevation information.",
- "items": {
- "type": "object",
- "description": "A city with population and elevation information.",
- "properties": {
- "name": {
- "type": "string",
- "description": "The name of the city."
- },
- "population": {
- "type": "number",
- "description": "The population of the city."
- },
- "url": {
- "type": "string",
- "description": "The URL of the city's Wikipedia page."
- }
- },
- "required": [
- "name",
- "population",
- "url"
- ]
- }
+ "type": "array",
+ "description": "A list of cities with population and elevation information.",
+ "items": {
+ "type": "object",
+ "description": "A city with population and elevation information.",
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "The name of the city."
+ },
+ "population": {
+ "type": "number",
+ "description": "The population of the city."
+ },
+ "url": {
+ "type": "string",
+ "description": "The URL of the city's Wikipedia page."
+ }
+ },
+ "required": ["name", "population", "url"]
+ }
}
```
-- prompt (rendered as typescript):
+
+- prompt (rendered as typescript):
```ts
// A list of cities with population and elevation information.
type CITY_SCHEMA = Array<{
// The name of the city.
- name: string,
+ name: string
// The population of the city.
- population: number,
+ population: number
// The URL of the city's Wikipedia page.
- url: string,
- }>
-
+ url: string
+}>
```
@@ -199,7 +218,7 @@ GenAIScript automatically validates the payload against the schema.
:::tip
-Not all data formats are equal! Some data formats like JSON introduce ambiguity
+Not all data formats are equal! Some data formats like JSON introduce ambiguity
and can confuse the LLM.
[Read more...](https://betterprogramming.pub/yaml-vs-json-which-is-more-efficient-for-language-models-5bc11dd0f6df).
@@ -207,7 +226,7 @@ and can confuse the LLM.
## Repair
-GenAIScript will automatically try to repair the data by issues additional messages
+GenAIScript will automatically try to repair the data by issues additional messages
back to the LLM with the parsing output.
## Runtime Validation
@@ -216,4 +235,4 @@ Use `parsers.validateJSON` to validate JSON when running the script.
```js
const validation = parsers.validateJSON(schema, json)
-```
\ No newline at end of file
+```
diff --git a/packages/cli/README.md b/packages/cli/README.md
index 5dc1df8570..c249ab1d4e 100644
--- a/packages/cli/README.md
+++ b/packages/cli/README.md
@@ -94,7 +94,7 @@ $`Analyze FILE and extract data to JSON using the ${schema} schema.`
### 📋 Data Schemas
-Define, validate, and repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas).
+Define, validate, and repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas). Zod support builtin.
```js
const data = defSchema("MY_DATA", { type: "array", items: { ... } })
diff --git a/packages/cli/package.json b/packages/cli/package.json
index 5fe00442cc..ba9e1fee1a 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -67,7 +67,9 @@
"turndown-plugin-gfm": "^1.0.2",
"typescript": "5.7.2",
"vectra": "^0.9.0",
- "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz"
+ "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz",
+ "zod": "^3.24.1",
+ "zod-to-json-schema": "^3.24.1"
},
"optionalDependencies": {
"@huggingface/transformers": "^3.2.1",
@@ -112,11 +114,11 @@
"zx": "^8.2.4"
},
"scripts": {
- "compile:runtime": "tsc src/runtime.ts --skipLibCheck --outDir built --declaration --target es2020 --moduleResolution node && mv built/runtime.js built/runtime.mjs",
+ "compile:runtime": "tsc src/runtime.ts --skipLibCheck --outDir built --declaration --target es2020 --moduleResolution node --module esnext && mv built/runtime.js built/runtime.mjs",
"compile:api": "esbuild src/api.ts --outfile=built/api.mjs",
- "compile:cli": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:turndown-plugin-gfm --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest --external:skia-canvas --external:@huggingface/transformers --external:@modelcontextprotocol/sdk --external:@anthropic-ai/sdk --external:@anthropic-ai/bedrock-sdk --external:es-toolkit && node ../../scripts/patch-cli.mjs",
+ "compile:cli": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:turndown-plugin-gfm --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest --external:skia-canvas --external:@huggingface/transformers --external:@modelcontextprotocol/sdk --external:@anthropic-ai/sdk --external:@anthropic-ai/bedrock-sdk --external:es-toolkit --external:zod --external:zod-to-json-schema && node ../../scripts/patch-cli.mjs",
"compile": "yarn compile:api && yarn compile:runtime && yarn compile:cli",
- "compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:turndown-plugin-gfm --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest --external:skia-canvas --external:@huggingface/transformers --external:@modelcontextprotocol/sdk --external:@anthropic-ai/sdk --external:@anthropic-ai/bedrock-sdk --external:es-toolkit",
+ "compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:turndown-plugin-gfm --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest --external:skia-canvas --external:@huggingface/transformers --external:@modelcontextprotocol/sdk --external:@anthropic-ai/sdk --external:@anthropic-ai/bedrock-sdk --external:es-toolkit --external:zod --external:zod-to-json-schema",
"postcompile": "node built/genaiscript.cjs info help > ../../docs/src/content/docs/reference/cli/commands.md",
"vis:treemap": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.treemap.html",
"vis:network": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.network.html --template network",
diff --git a/packages/cli/src/runtime.ts b/packages/cli/src/runtime.ts
index 6d7ad17e3d..6c4515e3ed 100644
--- a/packages/cli/src/runtime.ts
+++ b/packages/cli/src/runtime.ts
@@ -1,9 +1,15 @@
/**
* GenAIScript supporting runtime
*/
-import { delay as esDelay } from "es-toolkit"
+import { delay as _delay } from "es-toolkit"
+import { z as zod } from "zod"
/**
* A helper function to delay the execution of the script
*/
-export const delay: (ms: number) => Promise = esDelay
+export const delay: (ms: number) => Promise = _delay
+
+/**
+ * Zod schema generator
+ */
+export const z = zod
diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts
index 10eefbc849..71d9f5f63e 100644
--- a/packages/core/src/promptdom.ts
+++ b/packages/core/src/promptdom.ts
@@ -41,6 +41,7 @@ import { jinjaRenderChatMessage } from "./jinja"
import { runtimeHost } from "./host"
import { hash } from "./crypto"
import { startMcpServer } from "./mcp"
+import { tryZodToJsonSchema } from "./zod"
// Definition of the PromptNode interface which is an essential part of the code structure.
export interface PromptNode extends ContextExpansionOptions {
@@ -361,11 +362,13 @@ export function createImageNode(
// Function to create a schema node.
export function createSchemaNode(
name: string,
- value: JSONSchema,
+ value: JSONSchema | ZodTypeLike,
options?: DefSchemaOptions
): PromptSchemaNode {
assert(!!name)
assert(value !== undefined)
+ // auto zod conversion
+ value = tryZodToJsonSchema(value as ZodTypeLike) ?? (value as JSONSchema)
return { type: "schema", name, value, options }
}
diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts
index c4dfbbb17a..f6481a5778 100644
--- a/packages/core/src/types/prompt_template.d.ts
+++ b/packages/core/src/types/prompt_template.d.ts
@@ -2604,10 +2604,12 @@ interface McpServerConfig {
type McpServersConfig = Record>
+type ZodTypeLike = { _def: any, safeParse: any, refine: any }
+
interface ChatGenerationContext extends ChatTurnGenerationContext {
defSchema(
name: string,
- schema: JSONSchema,
+ schema: JSONSchema | ZodTypeLike,
options?: DefSchemaOptions
): string
defImages(
diff --git a/packages/core/src/types/prompt_type.d.ts b/packages/core/src/types/prompt_type.d.ts
index 97b4ef7097..56c7042f37 100644
--- a/packages/core/src/types/prompt_type.d.ts
+++ b/packages/core/src/types/prompt_type.d.ts
@@ -239,7 +239,7 @@ declare function fetchText(
*/
declare function defSchema(
name: string,
- schema: JSONSchema,
+ schema: JSONSchema | ZodTypeLike,
options?: DefSchemaOptions
): string
diff --git a/packages/core/src/zod.ts b/packages/core/src/zod.ts
new file mode 100644
index 0000000000..f3fe5a600d
--- /dev/null
+++ b/packages/core/src/zod.ts
@@ -0,0 +1,19 @@
+import { zodToJsonSchema as _zodToJsonSchema } from "zod-to-json-schema"
+
+/**
+ * Converts a Zod schema to a JSON schema
+ * @param z
+ * @param options
+ * @returns
+ */
+export function tryZodToJsonSchema(
+ z: ZodTypeLike,
+ options?: object
+): JSONSchema {
+ if (!z || !z._def || !z.refine || !z.safeParse) return undefined
+ const schema = _zodToJsonSchema(z as any, {
+ target: "openAi",
+ ...(options || {}),
+ })
+ return structuredClone(schema) as JSONSchema
+}
diff --git a/packages/sample/genaisrc/cityinfo-zod.genai.mts b/packages/sample/genaisrc/zod.genai.mts
similarity index 66%
rename from packages/sample/genaisrc/cityinfo-zod.genai.mts
rename to packages/sample/genaisrc/zod.genai.mts
index 603d5d4540..a63e4d6fd2 100644
--- a/packages/sample/genaisrc/cityinfo-zod.genai.mts
+++ b/packages/sample/genaisrc/zod.genai.mts
@@ -1,12 +1,14 @@
script({
files: ["./src/cities.md"],
+ tests: {
+ files: ["./src/cities.md"],
+ },
})
// the data to analyze
def("CITIES", env.files)
-import { z } from "zod"
-import { zodToJsonSchema } from "zod-to-json-schema"
+import { z } from "genaiscript/runtime"
// create schema using zod
const CitySchema = z.array(
z.object({
@@ -16,9 +18,7 @@ const CitySchema = z.array(
})
)
// JSON schema to constrain the output of the tool.
-const schema = defSchema("CITY_SCHEMA", zodToJsonSchema(CitySchema, "citySchema").definitions[
- "citySchema"
-] as JSONSchemaArray)
+const schema = defSchema("CITY_SCHEMA", CitySchema)
// the task`
$`Answer with the information of the cities in the CITIES data set,
compliant with ${schema}.`
diff --git a/packages/vscode/README.md b/packages/vscode/README.md
index c649e8b11f..2e9fd84dde 100644
--- a/packages/vscode/README.md
+++ b/packages/vscode/README.md
@@ -24,7 +24,7 @@ $`Analyze FILE and
- 📁 Scripts are [files](https://microsoft.github.io/genaiscript/reference/scripts/)! They can be versioned, shared, forked, ...
-- 📊 Define, validate, repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas).
+- 📊 Define, validate, repair data using [schemas](https://microsoft.github.io/genaiscript/reference/scripts/schemas). Zod support builtin.
```js wrap
const data = defSchema("MY_DATA",