Skip to content

Commit

Permalink
Jsonl (#531)
Browse files Browse the repository at this point in the history
* stringify

* added JSONL parser

* added JSONL object

* fix defninition files
  • Loading branch information
pelikhan authored Jun 11, 2024
1 parent cdf7aa4 commit e1b4527
Show file tree
Hide file tree
Showing 22 changed files with 511 additions and 24 deletions.
31 changes: 31 additions & 0 deletions docs/genaisrc/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/src/content/docs/reference/cli/commands.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ Options:
-td, --test-delay <string> delay between tests in seconds
--no-cache disable LLM result cache
-v, --verbose verbose output
-pv, --promptfoo-version [version] promptfoo version, default is ^0.62.1
-pv, --promptfoo-version [version] promptfoo version, default is ^0.63.0
-os, --out-summary <file> append output summary in file
-h, --help display help for command
```
Expand Down
15 changes: 15 additions & 0 deletions docs/src/content/docs/reference/scripts/parsers.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,21 @@ To parse, use `parsers.TOML`. It supports both a text content or a file as input
const res = parsers.TOML("...")
```

## JSONL

JSON**L** is a format that stores JSON objects in a line-by-line format. Each line is a valid JSON(5) object (we use the JSON5 parser to be more error resilient).

```jsonl title="data.jsonl"
{"name": "Alice"}
{"name": "Bob"}
```

You can use `parsers.JSONL` to parse the JSONL files into an array of object (`any[]`).

```js
const res = parsers.JSONL(file)
```

## XML

The `parsers.XML` function parses for the [XML format](https://en.wikipedia.org/wiki/XML).
Expand Down
31 changes: 31 additions & 0 deletions genaisrc/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions packages/core/src/genaisrc/genaiscript.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 27 additions & 7 deletions packages/core/src/jsonl.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { host } from "./host"
import { JSON5TryParse } from "./json5"
import { concatBuffers, logWarn, utf8Decode, utf8Encode } from "./util"

function tryReadFile(fn: string) {
Expand Down Expand Up @@ -37,15 +38,34 @@ export async function readJSONL(fn: string) {
return res
}

function serialize(objs: any[]) {
let accLen = 0
let acc = ""
for (const o of objs) {
const s = JSON.stringify(o)
accLen += s.length + 1
acc += s + "\n"
export function JSONLTryParse(
text: string,
options?: {
repair?: boolean
}
): any[] {
if (!text) return []
const res: any[] = []
for (const line of text.split("\n")) {
if (!line) continue
const obj = JSON5TryParse(line, options)
res.push(obj)
}
return res
}

export function JSONLStringify(objs: any[]) {
const acc: string[] = []
if (objs?.length)
for (const o of objs) {
const s = JSON.stringify(o)
acc.push(s)
}
return acc.join("\n")
}

function serialize(objs: any[]) {
const acc = JSONLStringify(objs)
const buf = host.createUTF8Encoder().encode(acc)
return buf
}
Expand Down
6 changes: 6 additions & 0 deletions packages/core/src/parsers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ describe("parsers", () => {
assert.deepStrictEqual(result, { key: "value" })
})

test("JSONL", () => {
const result = parsers.JSONL('{"key": "value"}\n{"key2": "value2"}')
assert.deepStrictEqual(result[0], { key: "value" })
assert.deepStrictEqual(result[1], { key2: "value2" })
})

test("YAML", () => {
const result = parsers.YAML("key: value")
assert.deepStrictEqual(result, { key: "value" })
Expand Down
2 changes: 2 additions & 0 deletions packages/core/src/parsers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { validateJSONWithSchema } from "./schema"
import { XLSXTryParse } from "./xlsx"
import { host } from "./host"
import { unzip } from "./zip"
import { JSONLTryParse } from "./jsonl"

export function createParsers(options: {
trace: MarkdownTrace
Expand All @@ -29,6 +30,7 @@ export function createParsers(options: {
return Object.freeze<Parsers>({
JSON5: (text, options) =>
JSON5TryParse(filenameOrFileToContent(text), options?.defaultValue),
JSONL: (text) => JSONLTryParse(filenameOrFileToContent(text)),
YAML: (text, options) =>
YAMLTryParse(filenameOrFileToContent(text), options?.defaultValue),
XML: (text, options) => {
Expand Down
6 changes: 6 additions & 0 deletions packages/core/src/promptcontext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import { fuzzSearch } from "./fuzzsearch"
import { parseModelIdentifier, resolveModelConnectionInfo } from "./models"
import { renderAICI } from "./aici"
import { MODEL_PROVIDER_AICI } from "./constants"
import { JSONLStringify, JSONLTryParse } from "./jsonl"

function stringLikeToFileName(f: string | WorkspaceFile) {
return typeof f === "string" ? f : f?.filename
Expand Down Expand Up @@ -74,6 +75,10 @@ export function createPromptContext(
const XML = Object.freeze<XML>({
parse: XMLParse,
})
const JSONL = Object.freeze<JSONL>({
parse: JSONLTryParse,
stringify: JSONLStringify,
})
const AICI = Object.freeze<AICI>({
gen: (options: AICIGenOptions) => {
// validate options
Expand Down Expand Up @@ -211,6 +216,7 @@ export function createPromptContext(
INI,
AICI,
XML,
JSONL,
retrieval,
host: promptHost,
defOutputProcessor,
Expand Down
21 changes: 21 additions & 0 deletions packages/core/src/types/prompt_template.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,13 @@ interface Parsers {
content: string | WorkspaceFile,
options?: { defaultValue?: any }
): any | undefined

/**
* Parses text or file as a JSONL payload. Empty lines are ignore, and JSON5 is used for parsing.
* @param content
*/
JSONL(content: string | WorkspaceFile): any[] | undefined

/**
* Parses text as a YAML paylaod
*/
Expand Down Expand Up @@ -1014,6 +1021,19 @@ interface XML {
parse(text: string): any
}

interface JSONL {
/**
* Parses a JSONL string to an array of objects
* @param text
*/
parse(text: string): any[]
/**
* Converts objects to JSONL format
* @param objs
*/
stringify(objs: any[]): string
}

interface INI {
/**
* Parses a .ini file
Expand Down Expand Up @@ -1531,6 +1551,7 @@ interface PromptContext extends ChatGenerationContext {
workspace: WorkspaceFileSystem
YAML: YAML
XML: XML
JSONL: JSONL
CSV: CSV
INI: INI
AICI: AICI
Expand Down
10 changes: 10 additions & 0 deletions packages/core/src/types/prompt_type.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,16 @@ declare var YAML: YAML
*/
declare var INI: INI

/**
* XML parsing and stringifying.
*/
declare var XML: XML

/**
* JSONL parsing and stringifying.
*/
declare var JSONL: JSONL

/**
* AICI operations
*/
Expand Down
Loading

0 comments on commit e1b4527

Please sign in to comment.