From 182b72a27c88538c1fe1e036c9a947f8cef68800 Mon Sep 17 00:00:00 2001 From: pelikhan Date: Wed, 4 Sep 2024 09:34:41 -0700 Subject: [PATCH] added prompty parser --- genaisrc/test-gen.genai.mjs | 10 ++--- packages/core/src/mustache.ts | 2 +- packages/core/src/prompty.test.ts | 75 +++++++++++++++++++++++++++++++ packages/core/src/prompty.ts | 52 +++++++++++++++++++++ 4 files changed, 133 insertions(+), 6 deletions(-) create mode 100644 packages/core/src/prompty.test.ts create mode 100644 packages/core/src/prompty.ts diff --git a/genaisrc/test-gen.genai.mjs b/genaisrc/test-gen.genai.mjs index fdef20e1ea..a73d43bd82 100644 --- a/genaisrc/test-gen.genai.mjs +++ b/genaisrc/test-gen.genai.mjs @@ -1,4 +1,5 @@ script({ + model: "openai:gpt-4", title: "unit test generator", system: ["system", "system.typescript", "system.files"], tools: ["fs"], @@ -6,13 +7,13 @@ script({ const code = def("CODE", env.files) - $`## Step 1 For each file in ${code}, generate a plan to test the source code in each file -- use input test files from packages/sample/src/rag/* +- generate self-contained tests as much as possible by inlining all necessary values +- if needed, use input test files from packages/sample/src/rag/* - only generate tests for files in ${code} - update the existing test files (.test.ts). keep old tests if possible. @@ -41,7 +42,7 @@ ${fence('import test, { beforeEach, describe } from "node:test"', { language: "j Validate and fix test sources. -Use 'run_test' tool to execute the generated test code and fix the test code to make tests pass. +Call the 'run_test' tool to execute the generated test code and fix the test code to make tests pass. - this is important. ` @@ -55,8 +56,7 @@ defTool( }, async (args) => { const { filename, source } = args - if (source) - await workspace.writeText(filename, source) + if (source) await workspace.writeText(filename, source) console.debug(`running test code ${filename}`) return host.exec(`node`, ["--import", "tsx", "--test", filename]) } diff --git a/packages/core/src/mustache.ts b/packages/core/src/mustache.ts index 26f99d0135..10174df115 100644 --- a/packages/core/src/mustache.ts +++ b/packages/core/src/mustache.ts @@ -18,7 +18,7 @@ export async function interpolateVariables( // remove prompty roles // https://github.com/microsoft/prompty/blob/main/runtime/prompty/prompty/parsers.py#L113C21-L113C77 - content = content.replace(/^\s*(system|user):\s*$/gim, "\n") + content = content.replace(/^\s*(system|user|assistant)\s*:\s*$/gim, "\n") // remove xml tags // https://humanloop.com/docs/prompt-file-format diff --git a/packages/core/src/prompty.test.ts b/packages/core/src/prompty.test.ts new file mode 100644 index 0000000000..10bf8557c4 --- /dev/null +++ b/packages/core/src/prompty.test.ts @@ -0,0 +1,75 @@ +import { promptyParse } from "./prompty" +import { describe, test, beforeEach } from "node:test" +import assert from "node:assert/strict" + +describe("promptyParse", () => { + test("correctly parses an empty markdown string", () => { + const result = promptyParse("") + assert.deepStrictEqual(result, { + frontmatter: {}, + content: "", + messages: [], + }) + }) + + test("correctly parses a markdown string without frontmatter", () => { + const content = "This is a sample content without frontmatter." + const result = promptyParse(content) + assert.deepStrictEqual(result, { + frontmatter: {}, + content: content, + messages: [{ role: "system", content: content }], + }) + }) + + test("correctly parses a markdown string with valid frontmatter", () => { + const markdownString = `--- +name: Test +description: A test description +version: 1.0.0 +authors: + - Author1 + - Author2 +tags: + - tag1 + - tag2 +sample: + key: value +--- +# Heading +Content below heading.` + const result = promptyParse(markdownString) + assert.deepStrictEqual(result.frontmatter, { + name: "Test", + description: "A test description", + version: "1.0.0", + authors: ["Author1", "Author2"], + tags: ["tag1", "tag2"], + sample: { key: "value" }, + }) + assert.strictEqual(result.content, "# Heading\nContent below heading.") + }) + + test("correctly parses a markdown string with content split into roles", () => { + const markdownContent = `user: +User's message +assistant: +Assistant's reply +user: +Another message from the user` + const result = promptyParse(markdownContent) + assert.deepStrictEqual(result.messages, [ + { role: "user", content: "User's message" }, + { role: "assistant", content: "Assistant's reply" }, + { role: "user", content: "Another message from the user" }, + ]) + }) + + test("correctly handles a markdown string with content but without roles", () => { + const markdownContent = `Just some content without specifying roles.` + const result = promptyParse(markdownContent) + assert.deepStrictEqual(result.messages, [ + { role: "system", content: markdownContent }, + ]) + }) +}) diff --git a/packages/core/src/prompty.ts b/packages/core/src/prompty.ts new file mode 100644 index 0000000000..074e8b199f --- /dev/null +++ b/packages/core/src/prompty.ts @@ -0,0 +1,52 @@ +import { ChatCompletionMessageParam } from "./chattypes" +import { splitMarkdown } from "./frontmatter" +import { YAMLTryParse } from "./yaml" + +export interface PromptyFrontmatter { + name?: string + description?: string + version?: string + authors?: string[] + tags?: string[] + sample?: Record +} + +export function promptyParse(text: string): { + frontmatter: PromptyFrontmatter + content: string + messages: ChatCompletionMessageParam[] +} { + const { frontmatter = "", content = "" } = splitMarkdown(text) + const fm = YAMLTryParse(frontmatter) ?? {} + // todo: validate frontmatter? + const messages: ChatCompletionMessageParam[] = [] + + // split + const rx = /^\s*(system|user|assistant)\s*:\s*$/gim + const lines = content.split(/\r?\n/g) + let role: "system" | "user" | "assistant" | undefined = "system" + let chunk: string[] = [] + + const pushMessage = () => { + if (role && chunk.length && chunk.some((l) => !!l)) { + messages.push({ + role, + content: chunk.join("\n"), + }) + } + } + + for (const line of lines) { + const m = rx.exec(line) + if (m) { + // next role starts + pushMessage() + role = m[1] as "system" | "user" | "assistant" + chunk = [] + } else { + chunk.push(line) + } + } + pushMessage() + return { frontmatter: fm, content, messages } +}