From eb1cfea4a1b89575c5ff494e8d971273a9631960 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sat, 19 Oct 2024 13:43:42 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=F0=9F=8E=A8=20add=20Jinja2=20template?= =?UTF-8?q?=20parsing=20feature?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../content/docs/reference/cli/commands.md | 22 ++++++++++++++++--- packages/cli/src/cli.ts | 10 +++++++++ packages/cli/src/parse.ts | 20 +++++++++++++++++ packages/cli/src/run.ts | 20 ++--------------- packages/cli/src/vars.ts | 19 ++++++++++++++++ packages/core/src/constants.ts | 1 + 6 files changed, 71 insertions(+), 21 deletions(-) create mode 100644 packages/cli/src/vars.ts diff --git a/docs/src/content/docs/reference/cli/commands.md b/docs/src/content/docs/reference/cli/commands.md index ebceec6939..70b4b006c9 100644 --- a/docs/src/content/docs/reference/cli/commands.md +++ b/docs/src/content/docs/reference/cli/commands.md @@ -303,8 +303,8 @@ Options: -h, --help display help for command Commands: - data [options] Convert CSV, YAML, TOML, INI, XLSX or JSON files - into various formats + data [options] Convert CSV, YAML, TOML, INI, XLSX or JSON data + files into various formats fence Extracts a code fenced regions of the given type pdf Parse a PDF into text docx Parse a DOCX into texts @@ -314,6 +314,7 @@ Commands: tokens [options] Count tokens in a set of files jsonl2json Converts JSONL files to a JSON file prompty [options] Converts .prompty files to genaiscript + jinja2 [options] Renders Jinj2 or prompty template ``` ### `parse data` @@ -321,7 +322,7 @@ Commands: ``` Usage: genaiscript parse data [options] -Convert CSV, YAML, TOML, INI, XLSX or JSON files into various formats +Convert CSV, YAML, TOML, INI, XLSX or JSON data files into various formats Options: -f, --format output format (choices: "json", "json5", "yaml", @@ -422,6 +423,21 @@ Options: -h, --help display help for command ``` +### `parse jinja2` + +``` +Usage: genaiscript parse jinja2 [options] + +Renders Jinj2 or prompty template + +Arguments: + file input Jinja2 or prompty template file + +Options: + --vars variables, as name=value passed to the template + -h, --help display help for command +``` + ## `info` ``` diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 62872b3396..ea41716b26 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -17,6 +17,7 @@ import { parseDOCX, parseFence, parseHTMLToText, + parseJinja2, parsePDF, parseTokens, prompty2genaiscript, @@ -347,6 +348,15 @@ export async function cli() { .argument("", "input JSONL files") .option("-o, --out ", "output folder") .action(prompty2genaiscript) // Action to convert prompty files + parser + .command("jinja2") + .description("Renders Jinj2 or prompty template") + .argument("", "input Jinja2 or prompty template file") + .option( + "--vars ", + "variables, as name=value passed to the template" + ) + .action(parseJinja2) // Define 'info' command group for utility information tasks const info = program.command("info").description("Utility tasks") diff --git a/packages/cli/src/parse.ts b/packages/cli/src/parse.ts index 62066b0a9f..f68bb64b9c 100644 --- a/packages/cli/src/parse.ts +++ b/packages/cli/src/parse.ts @@ -19,6 +19,8 @@ import { DEFAULT_MODEL, INI_REGEX, JSON5_REGEX, + MD_REGEX, + PROMPTY_REGEX, TOML_REGEX, XLSX_REGEX, YAML_REGEX, @@ -30,6 +32,9 @@ import { INIParse, INIStringify } from "../../core/src/ini" import { TOMLParse } from "../../core/src/toml" import { JSON5parse, JSON5Stringify } from "../../core/src/json5" import { XLSXParse } from "../../core/src/xlsx" +import { jinjaRender } from "../../core/src/jinja" +import { splitMarkdown } from "../../core/src/frontmatter" +import { parseOptionsVars } from "./vars" /** * This module provides various parsing utilities for different file types such @@ -83,6 +88,21 @@ export async function parseHTMLToText(file: string) { console.log(text) } +export async function parseJinja2( + file: string, + options: { + vars: string[] + } +) { + let src = await readFile(file, { encoding: "utf-8" }) + if (PROMPTY_REGEX.test(file)) src = promptyParse(src).content + else if (MD_REGEX.test(file)) src = splitMarkdown(src).content + + const vars = parseOptionsVars(options.vars, process.env) + const res = jinjaRender(src, vars) + console.log(res) +} + export async function parseAnyToJSON( file: string, options: { format: string } diff --git a/packages/cli/src/run.ts b/packages/cli/src/run.ts index 68743286aa..7b607d7206 100644 --- a/packages/cli/src/run.ts +++ b/packages/cli/src/run.ts @@ -80,23 +80,7 @@ import { delay } from "es-toolkit" import { GenerationStats } from "../../core/src/usage" import { traceAgentMemory } from "../../core/src/agent" import { appendFile } from "node:fs/promises" - -function parseVars( - vars: string[], - env: Record -): Record { - const vals = - vars?.reduce((acc, v) => ({ ...acc, ...parseKeyValuePair(v) }), {}) ?? - {} - const envVals = Object.keys(env) - .filter((k) => CLI_ENV_VAR_RX.test(k)) - .map((k) => ({ - [k.replace(CLI_ENV_VAR_RX, "").toLocaleLowerCase()]: env[k], - })) - .reduce((acc, v) => ({ ...acc, ...v }), {}) - - return { ...vals, ...envVals } -} +import { parseOptionsVars } from "./vars" async function setupTraceWriting(trace: MarkdownTrace, filename: string) { logVerbose(`trace: ${filename}`) @@ -271,7 +255,7 @@ export async function runScript( const fragment: Fragment = { files: Array.from(resolvedFiles), } - const vars = parseVars(options.vars, process.env) + const vars = parseOptionsVars(options.vars, process.env) const stats = new GenerationStats("") try { if (options.label) trace.heading(2, options.label) diff --git a/packages/cli/src/vars.ts b/packages/cli/src/vars.ts new file mode 100644 index 0000000000..00ddd12eca --- /dev/null +++ b/packages/cli/src/vars.ts @@ -0,0 +1,19 @@ +import { CLI_ENV_VAR_RX } from "../../core/src/constants" +import { parseKeyValuePair } from "../../core/src/fence" + +export function parseOptionsVars( + vars: string[], + env: Record +): Record { + const vals = + vars?.reduce((acc, v) => ({ ...acc, ...parseKeyValuePair(v) }), {}) ?? + {} + const envVals = Object.keys(env) + .filter((k) => CLI_ENV_VAR_RX.test(k)) + .map((k) => ({ + [k.replace(CLI_ENV_VAR_RX, "").toLocaleLowerCase()]: env[k], + })) + .reduce((acc, v) => ({ ...acc, ...v }), {}) + + return { ...vals, ...envVals } +} diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index 7543d53898..b63a9492a2 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -34,6 +34,7 @@ export const TOML_REGEX = /\.toml$/i export const XLSX_REGEX = /\.xlsx$/i export const DOCX_REGEX = /\.docx$/i export const PDF_REGEX = /\.pdf$/i +export const MD_REGEX = /\.md$/i export const MDX_REGEX = /\.mdx$/i export const MJS_REGEX = /\.mjs$/i export const JS_REGEX = /\.js$/i