From 182b72a27c88538c1fe1e036c9a947f8cef68800 Mon Sep 17 00:00:00 2001
From: pelikhan <jhalleux@microsoft.com>
Date: Wed, 4 Sep 2024 09:34:41 -0700
Subject: [PATCH] added prompty parser

---
 genaisrc/test-gen.genai.mjs       | 10 ++---
 packages/core/src/mustache.ts     |  2 +-
 packages/core/src/prompty.test.ts | 75 +++++++++++++++++++++++++++++++
 packages/core/src/prompty.ts      | 52 +++++++++++++++++++++
 4 files changed, 133 insertions(+), 6 deletions(-)
 create mode 100644 packages/core/src/prompty.test.ts
 create mode 100644 packages/core/src/prompty.ts
diff --git a/genaisrc/test-gen.genai.mjs b/genaisrc/test-gen.genai.mjs
index fdef20e1ea..a73d43bd82 100644
--- a/genaisrc/test-gen.genai.mjs
+++ b/genaisrc/test-gen.genai.mjs
@@ -1,4 +1,5 @@
 script({
+    model: "openai:gpt-4",
     title: "unit test generator",
     system: ["system", "system.typescript", "system.files"],
     tools: ["fs"],
@@ -6,13 +7,13 @@ script({
 
 const code = def("CODE", env.files)
 
-
 $`## Step 1
 
 For each file in ${code}, 
 generate a plan to test the source code in each file
 
-- use input test files from packages/sample/src/rag/*
+- generate self-contained tests as much as possible by inlining all necessary values
+- if needed, use input test files from packages/sample/src/rag/*
 - only generate tests for files in ${code}
 - update the existing test files (<code filename>.test.ts). keep old tests if possible.
 
@@ -41,7 +42,7 @@ ${fence('import test, { beforeEach, describe } from "node:test"', { language: "j
 
 Validate and fix test sources.
 
-Use 'run_test' tool to execute the generated test code and fix the test code to make tests pass.
+Call the 'run_test' tool to execute the generated test code and fix the test code to make tests pass.
 
 - this is important.
 `
@@ -55,8 +56,7 @@ defTool(
     },
     async (args) => {
         const { filename, source } = args
-        if (source)
-            await workspace.writeText(filename, source)
+        if (source) await workspace.writeText(filename, source)
         console.debug(`running test code ${filename}`)
         return host.exec(`node`, ["--import", "tsx", "--test", filename])
     }
diff --git a/packages/core/src/mustache.ts b/packages/core/src/mustache.ts
index 26f99d0135..10174df115 100644
--- a/packages/core/src/mustache.ts
+++ b/packages/core/src/mustache.ts
@@ -18,7 +18,7 @@ export async function interpolateVariables(
 
     // remove prompty roles
     // https://github.com/microsoft/prompty/blob/main/runtime/prompty/prompty/parsers.py#L113C21-L113C77
-    content = content.replace(/^\s*(system|user):\s*$/gim, "\n")
+    content = content.replace(/^\s*(system|user|assistant)\s*:\s*$/gim, "\n")
 
     // remove xml tags
     // https://humanloop.com/docs/prompt-file-format
diff --git a/packages/core/src/prompty.test.ts b/packages/core/src/prompty.test.ts
new file mode 100644
index 0000000000..10bf8557c4
--- /dev/null
+++ b/packages/core/src/prompty.test.ts
@@ -0,0 +1,75 @@
+import { promptyParse } from "./prompty"
+import { describe, test, beforeEach } from "node:test"
+import assert from "node:assert/strict"
+
+describe("promptyParse", () => {
+    test("correctly parses an empty markdown string", () => {
+        const result = promptyParse("")
+        assert.deepStrictEqual(result, {
+            frontmatter: {},
+            content: "",
+            messages: [],
+        })
+    })
+
+    test("correctly parses a markdown string without frontmatter", () => {
+        const content = "This is a sample content without frontmatter."
+        const result = promptyParse(content)
+        assert.deepStrictEqual(result, {
+            frontmatter: {},
+            content: content,
+            messages: [{ role: "system", content: content }],
+        })
+    })
+
+    test("correctly parses a markdown string with valid frontmatter", () => {
+        const markdownString = `---
+name: Test
+description: A test description
+version: 1.0.0
+authors:
+  - Author1
+  - Author2
+tags:
+  - tag1
+  - tag2
+sample:
+  key: value
+---
+# Heading
+Content below heading.`
+        const result = promptyParse(markdownString)
+        assert.deepStrictEqual(result.frontmatter, {
+            name: "Test",
+            description: "A test description",
+            version: "1.0.0",
+            authors: ["Author1", "Author2"],
+            tags: ["tag1", "tag2"],
+            sample: { key: "value" },
+        })
+        assert.strictEqual(result.content, "# Heading\nContent below heading.")
+    })
+
+    test("correctly parses a markdown string with content split into roles", () => {
+        const markdownContent = `user:
+User's message
+assistant:
+Assistant's reply
+user:
+Another message from the user`
+        const result = promptyParse(markdownContent)
+        assert.deepStrictEqual(result.messages, [
+            { role: "user", content: "User's message" },
+            { role: "assistant", content: "Assistant's reply" },
+            { role: "user", content: "Another message from the user" },
+        ])
+    })
+
+    test("correctly handles a markdown string with content but without roles", () => {
+        const markdownContent = `Just some content without specifying roles.`
+        const result = promptyParse(markdownContent)
+        assert.deepStrictEqual(result.messages, [
+            { role: "system", content: markdownContent },
+        ])
+    })
+})
diff --git a/packages/core/src/prompty.ts b/packages/core/src/prompty.ts
new file mode 100644
index 0000000000..074e8b199f
--- /dev/null
+++ b/packages/core/src/prompty.ts
@@ -0,0 +1,52 @@
+import { ChatCompletionMessageParam } from "./chattypes"
+import { splitMarkdown } from "./frontmatter"
+import { YAMLTryParse } from "./yaml"
+
+export interface PromptyFrontmatter {
+    name?: string
+    description?: string
+    version?: string
+    authors?: string[]
+    tags?: string[]
+    sample?: Record<string, any>
+}
+
+export function promptyParse(text: string): {
+    frontmatter: PromptyFrontmatter
+    content: string
+    messages: ChatCompletionMessageParam[]
+} {
+    const { frontmatter = "", content = "" } = splitMarkdown(text)
+    const fm = YAMLTryParse(frontmatter) ?? {}
+    // todo: validate frontmatter?
+    const messages: ChatCompletionMessageParam[] = []
+
+    // split
+    const rx = /^\s*(system|user|assistant)\s*:\s*$/gim
+    const lines = content.split(/\r?\n/g)
+    let role: "system" | "user" | "assistant" | undefined = "system"
+    let chunk: string[] = []
+
+    const pushMessage = () => {
+        if (role && chunk.length && chunk.some((l) => !!l)) {
+            messages.push({
+                role,
+                content: chunk.join("\n"),
+            })
+        }
+    }
+
+    for (const line of lines) {
+        const m = rx.exec(line)
+        if (m) {
+            // next role starts
+            pushMessage()
+            role = m[1] as "system" | "user" | "assistant"
+            chunk = []
+        } else {
+            chunk.push(line)
+        }
+    }
+    pushMessage()
+    return { frontmatter: fm, content, messages }
+}