Skip to content

Commit

Permalink
feat: 🤖 add JSON schema inference and utilities
Browse files Browse the repository at this point in the history
  • Loading branch information
pelikhan committed Dec 23, 2024
1 parent 7a868fe commit 43ed7f3
Show file tree
Hide file tree
Showing 9 changed files with 357 additions and 18 deletions.
2 changes: 1 addition & 1 deletion docs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"@astrojs/starlight": "^0.30.3",
"astro": "^5.1.1",
"rehype-mermaid": "^3.0.0",
"starlight-blog": "^0.16.0",
"starlight-blog": "^0.16.1",
"starlight-links-validator": "^0.14.0",
"starlight-package-managers": "^0.8.1",
"typescript": "5.7.2",
Expand Down
2 changes: 1 addition & 1 deletion docs/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4798,7 +4798,7 @@ sprintf-js@~1.0.2:
resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"
integrity sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==

starlight-blog@^0.16.0:
starlight-blog@^0.16.1:
version "0.16.1"
resolved "https://registry.yarnpkg.com/starlight-blog/-/starlight-blog-0.16.1.tgz#fbf9da70c678c66e1629c8df1d02a478480cc17c"
integrity sha512-9WMpRZHhfgWjf2oQ1oUqGCJOUB3z+JohHrrQxVtzwm9GPAxNeu7/DAx6eQJf3moLbkdzCNP2VyrT2Molo62vRw==
Expand Down
4 changes: 3 additions & 1 deletion packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,15 @@
"@types/html-to-text": "^9.0.4",
"@types/inflection": "^1.13.2",
"@types/ini": "^4.1.1",
"@types/json-schema-generator": "^2.0.3",
"@types/mime-types": "^2.1.4",
"@types/mustache": "^4.2.5",
"@types/node": "^22.10.2",
"@types/object-inspect": "^1.13.0",
"@types/semver": "^7.5.8",
"@types/shell-quote": "^1.7.5",
"ajv": "^8.17.1",
"cross-fetch": "^4.0.0",
"cross-fetch": "^4.1.0",
"csv-parse": "^5.6.0",
"csv-stringify": "^6.5.2",
"diff": "^7.0.0",
Expand All @@ -62,6 +63,7 @@
"inflection": "^3.0.0",
"ini": "^5.0.0",
"jimp": "^1.6.0",
"json-schema-generator": "^2.0.6",
"json5": "^2.2.3",
"jsonrepair": "^3.11.2",
"magic-string": "^0.30.17",
Expand Down
7 changes: 6 additions & 1 deletion packages/core/src/globals.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { estimateTokens, truncateTextToTokens } from "./tokens"
import { chunk, resolveTokenEncoder } from "./encoders"
import { runtimeHost } from "./host"
import { JSON5Stringify, JSON5TryParse } from "./json5"
import { JSONSchemaInfer } from "./schema"

/**
* This file defines global utilities and installs them into the global context.
Expand Down Expand Up @@ -89,7 +90,11 @@ export function installGlobals() {

glb.JSON5 = Object.freeze<JSON5>({
parse: JSON5TryParse,
stringify: JSON5Stringify
stringify: JSON5Stringify,
})

glb.JSONSchema = Object.freeze<JSONSchemaUtilities>({
infer: JSONSchemaInfer,
})

// Freeze AICI utilities with a generation function
Expand Down
6 changes: 6 additions & 0 deletions packages/core/src/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Ajv from "ajv"
import { YAMLParse } from "./yaml"
import { errorMessage } from "./error"
import { promptParametersSchemaToJSONSchema } from "./parameters"
import jsonToSchema from "json-schema-generator"

/**
* Check if an object is a JSON Schema
Expand Down Expand Up @@ -344,3 +345,8 @@ export function toStrictJSONSchema(
}
return clone
}

export function JSONSchemaInfer(obj: any): JSONSchema {
const schema = jsonToSchema(obj)
return schema as JSONSchema
}
8 changes: 8 additions & 0 deletions packages/core/src/types/prompt_template.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1686,6 +1686,14 @@ interface XML {
parse(text: string | WorkspaceFile, options?: XMLParseOptions): any
}

interface JSONSchemaUtilities {
/**
* Infers a JSON schema from an object
* @param obj
*/
infer(obj: any): JSONSchema
}

interface HTMLTableToJSONOptions {
useFirstRowForHeadings?: boolean
headers?: HeaderRows
Expand Down
5 changes: 5 additions & 0 deletions packages/core/src/types/prompt_type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,11 @@ declare var JSONL: JSONL
*/
declare var JSON5: JSON5

/**
* JSON Schema utilities
*/
declare var JSONSchema: JSONSchemaUtilities

/**
* AICI operations
*/
Expand Down
9 changes: 6 additions & 3 deletions packages/sample/genaisrc/groq.genai.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -1206,25 +1206,28 @@ const json = `[
}
]`
const query = "filter to keep completed tasks and userid 2"
const schema = JSONSchema.infer(JSON.parse(json))

const res = await runPrompt(
(ctx) => {
ctx.$`## Task
Based on the example <DATASET> snippet and the desired <QUERY>, write a GROQ program that executes the query.
Based on the example <DATASET> snippet, and schema <DATASET_SCHEMA> and the desired <QUERY>, write a GROQ program that executes the query.
- Infer the JSON Schema of <DATASET> and use valid field names in the GROQ query
- The dataset does not specify types, do NOT use '_type' filters
- The query must be compatible with the JSON Schema of <DATASET> is in <DATASET_SCHEMA>
- The dataset is untyped, '_type' is not supported
- Explain the query step by step
- Emit the GROQ query in a groq code fence section.
`.role("system")
ctx.def("QUERY", query)
ctx.def("DATASET_SCHEMA", JSON.stringify(schema, null, 2), { language: "json" })
ctx.def("DATASET", json, { maxTokens: 500 })
},
{ system: ["system", "system.output_markdown", "system.assistant"] }
)

const GROQ = res.fences.find(f => f.language === "groq").content
const GROQ = res.fences.find((f) => f.language === "groq").content
console.log(GROQ)
const resjq = await parsers.GROQ(GROQ, JSON.parse(json))
console.log(resjq)
Loading

0 comments on commit 43ed7f3

Please sign in to comment.