added prompty parser

microsoft · Sep 4, 2024 · 182b72a · 182b72a
1 parent a8c114a
commit 182b72a
Show file tree

Hide file tree

Showing 4 changed files with 133 additions and 6 deletions.
diff --git a/genaisrc/test-gen.genai.mjs b/genaisrc/test-gen.genai.mjs
@@ -1,18 +1,19 @@
 script({
+    model: "openai:gpt-4",
     title: "unit test generator",
     system: ["system", "system.typescript", "system.files"],
     tools: ["fs"],
 })
 
 const code = def("CODE", env.files)
 
-
 $`## Step 1
 
 For each file in ${code}, 
 generate a plan to test the source code in each file
 
-- use input test files from packages/sample/src/rag/*
+- generate self-contained tests as much as possible by inlining all necessary values
+- if needed, use input test files from packages/sample/src/rag/*
 - only generate tests for files in ${code}
 - update the existing test files (<code filename>.test.ts). keep old tests if possible.
 
@@ -41,7 +42,7 @@ ${fence('import test, { beforeEach, describe } from "node:test"', { language: "j
 
 Validate and fix test sources.
 
-Use 'run_test' tool to execute the generated test code and fix the test code to make tests pass.
+Call the 'run_test' tool to execute the generated test code and fix the test code to make tests pass.
 
 - this is important.
 `
@@ -55,8 +56,7 @@ defTool(
     },
     async (args) => {
         const { filename, source } = args
-        if (source)
-            await workspace.writeText(filename, source)
+        if (source) await workspace.writeText(filename, source)
         console.debug(`running test code ${filename}`)
         return host.exec(`node`, ["--import", "tsx", "--test", filename])
     }

diff --git a/packages/core/src/mustache.ts b/packages/core/src/mustache.ts
@@ -18,7 +18,7 @@ export async function interpolateVariables(
 
     // remove prompty roles
     // https://github.com/microsoft/prompty/blob/main/runtime/prompty/prompty/parsers.py#L113C21-L113C77
-    content = content.replace(/^\s*(system|user):\s*$/gim, "\n")
+    content = content.replace(/^\s*(system|user|assistant)\s*:\s*$/gim, "\n")
 
     // remove xml tags
     // https://humanloop.com/docs/prompt-file-format

diff --git a/packages/core/src/prompty.test.ts b/packages/core/src/prompty.test.ts
@@ -0,0 +1,75 @@
+import { promptyParse } from "./prompty"
+import { describe, test, beforeEach } from "node:test"
+import assert from "node:assert/strict"
+
+describe("promptyParse", () => {
+    test("correctly parses an empty markdown string", () => {
+        const result = promptyParse("")
+        assert.deepStrictEqual(result, {
+            frontmatter: {},
+            content: "",
+            messages: [],
+        })
+    })
+
+    test("correctly parses a markdown string without frontmatter", () => {
+        const content = "This is a sample content without frontmatter."
+        const result = promptyParse(content)
+        assert.deepStrictEqual(result, {
+            frontmatter: {},
+            content: content,
+            messages: [{ role: "system", content: content }],
+        })
+    })
+
+    test("correctly parses a markdown string with valid frontmatter", () => {
+        const markdownString = `---
+name: Test
+description: A test description
+version: 1.0.0
+authors:
+  - Author1
+  - Author2
+tags:
+  - tag1
+  - tag2
+sample:
+  key: value
+---
+# Heading
+Content below heading.`
+        const result = promptyParse(markdownString)
+        assert.deepStrictEqual(result.frontmatter, {
+            name: "Test",
+            description: "A test description",
+            version: "1.0.0",
+            authors: ["Author1", "Author2"],
+            tags: ["tag1", "tag2"],
+            sample: { key: "value" },
+        })
+        assert.strictEqual(result.content, "# Heading\nContent below heading.")
+    })
+
+    test("correctly parses a markdown string with content split into roles", () => {
+        const markdownContent = `user:
+User's message
+assistant:
+Assistant's reply
+user:
+Another message from the user`
+        const result = promptyParse(markdownContent)
+        assert.deepStrictEqual(result.messages, [
+            { role: "user", content: "User's message" },
+            { role: "assistant", content: "Assistant's reply" },
+            { role: "user", content: "Another message from the user" },
+        ])
+    })
+
+    test("correctly handles a markdown string with content but without roles", () => {
+        const markdownContent = `Just some content without specifying roles.`
+        const result = promptyParse(markdownContent)
+        assert.deepStrictEqual(result.messages, [
+            { role: "system", content: markdownContent },
+        ])
+    })
+})
diff --git a/packages/core/src/prompty.ts b/packages/core/src/prompty.ts
@@ -0,0 +1,52 @@
+import { ChatCompletionMessageParam } from "./chattypes"
+import { splitMarkdown } from "./frontmatter"
+import { YAMLTryParse } from "./yaml"
+
+export interface PromptyFrontmatter {
+    name?: string
+    description?: string
+    version?: string
+    authors?: string[]
+    tags?: string[]
+    sample?: Record<string, any>
+}
+
+export function promptyParse(text: string): {
+    frontmatter: PromptyFrontmatter
+    content: string
+    messages: ChatCompletionMessageParam[]
+} {
+    const { frontmatter = "", content = "" } = splitMarkdown(text)
+    const fm = YAMLTryParse(frontmatter) ?? {}
+    // todo: validate frontmatter?
+    const messages: ChatCompletionMessageParam[] = []
+
+    // split
+    const rx = /^\s*(system|user|assistant)\s*:\s*$/gim
+    const lines = content.split(/\r?\n/g)
+    let role: "system" | "user" | "assistant" | undefined = "system"
+    let chunk: string[] = []
+
+    const pushMessage = () => {
+        if (role && chunk.length && chunk.some((l) => !!l)) {
+            messages.push({
+                role,
+                content: chunk.join("\n"),
+            })
+        }
+    }
+
+    for (const line of lines) {
+        const m = rx.exec(line)
+        if (m) {
+            // next role starts
+            pushMessage()
+            role = m[1] as "system" | "user" | "assistant"
+            chunk = []
+        } else {
+            chunk.push(line)
+        }
+    }
+    pushMessage()
+    return { frontmatter: fm, content, messages }
+}