From d11f989dd4e05ddb040c3c5a7ff086cd787dcdb9 Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 24 Sep 2024 16:43:53 +0000 Subject: [PATCH 1/5] Add detailed comments and documentation across multiple files for clarity and understanding --- docs/src/content/docs/samples/cmt.mdx | 60 +++++++- packages/core/src/aici.ts | 3 +- packages/core/src/annotations.ts | 12 +- packages/core/src/chatrender.ts | 9 +- packages/core/src/chattypes.ts | 97 +++++++++---- packages/core/src/copy.ts | 55 ++++++-- packages/core/src/crypto.ts | 14 ++ packages/core/src/csv.ts | 56 ++++++-- packages/core/src/dotenv.ts | 32 ++++- packages/core/src/edits.ts | 30 +++- packages/core/src/encoders.ts | 17 ++- packages/core/src/fence.ts | 113 +++++++++------ packages/core/src/file.ts | 80 +++++++++-- packages/core/src/fuzzsearch.ts | 38 +++-- packages/core/src/generation.ts | 59 ++++---- packages/core/src/gitignore.ts | 14 ++ packages/core/src/glob.ts | 15 +- packages/core/src/globals.ts | 44 +++++- packages/core/src/html.ts | 53 +++++-- packages/core/src/image.ts | 37 ++++- packages/core/src/index.ts | 2 +- packages/core/src/ini.ts | 39 +++++- packages/core/src/jinja.ts | 17 +++ packages/core/src/json5.ts | 52 +++++++ packages/core/src/liner.ts | 37 +++-- packages/core/src/markdown.ts | 80 +++++++++-- packages/core/src/math.ts | 24 ++++ packages/core/src/mime.ts | 34 ++++- packages/core/src/ollama.ts | 32 ++++- packages/core/src/parser.ts | 77 ++++++++--- packages/core/src/pdf.ts | 74 +++++++--- packages/core/src/promptdom.ts | 184 +++++++++++++++---------- packages/core/src/promptrunner.ts | 59 +++++++- packages/core/src/schema.ts | 67 +++++++-- packages/core/src/template.ts | 152 +++++++++++++++++++- packages/core/src/test.ts | 51 +++++-- packages/core/src/testhost.ts | 63 +++++++++ packages/core/src/tidy.ts | 23 ++++ packages/core/src/tokens.ts | 14 ++ packages/core/src/url.ts | 30 +++- packages/core/src/urlAdapters.ts | 31 ++++- packages/core/src/vectorsearch.ts | 64 ++++++++- packages/core/src/xlsx.ts | 24 ++++ packages/core/src/xml.ts | 40 +++++- packages/core/src/yaml.ts | 38 +++++ packages/vscode/genaisrc/cmt.genai.mts | 29 +++- 46 files changed, 1786 insertions(+), 389 deletions(-) diff --git a/docs/src/content/docs/samples/cmt.mdx b/docs/src/content/docs/samples/cmt.mdx index 653183ac23..1447c1b6a6 100644 --- a/docs/src/content/docs/samples/cmt.mdx +++ b/docs/src/content/docs/samples/cmt.mdx @@ -47,6 +47,9 @@ We can use [inline prompts](/genaiscript/reference/scripts/inline-prompts) to ma for (const file of files) { console.log(`processing ${file.filename}`) ... add comments + ... format generated code (optional) -- keep things consistent + ... build generated -- let's make sure it's still valid code + ... check that only comments were changed -- LLM as judge ... save changes } ``` @@ -66,14 +69,69 @@ const res = await runPrompt( We provide a detailed set of instructions to the AI for how to analyze and comment on the code. +## Judge results with LLM + +We issue one more prompt to judge the modified code and make sure the code is not modified. + +```ts +async function checkModifications(filename: string): Promise { + const diff = await host.exec(`git diff ${filename}`) + if (!diff.stdout) return false + const res = await runPrompt( + (ctx) => { + ctx.def("DIFF", diff.stdout) + ctx.$`You are an expert developer at all programming languages. + + Your task is to analyze the changes in DIFF and make sure that only comments are modified. + Report all changes that are not comments and print "MODIFIED". + ` + }, + { + cache: "cmt-check", + } + ) + + const modified = res.text?.includes("MODIFIED") + console.log(`code modified, reverting...`) + return modified +} +``` + ## How to Run the Script To run this script, you'll first need to install the GenAIScript CLI. [Follow the installation guide here](https://microsoft.github.io/genaiscript/getting-started/installation). -```shell +```sh genaiscript run cmt ``` +## Format and build + +One important aspect is to normalize and valid the AI generated code. The user can provide a `format` command to run a formatter +and a `build` command to check if the code is still valid. + +```ts + +script({..., + parameters: { + format: { + type: "string", + description: "Format source code command", + }, + build: { + type: "string", + description: "Build command", + }, + }, +}) + +const { format, build } = env.vars.build +``` + +```sh +genaiscript run cmt --vars "build=npm run build" "format=npm run format" +``` + ## Full source ([GitHub](https://github.com/microsoft/genaiscript/blob/main/packages/vscode/genaisrc/cmt.genai.mts)) diff --git a/packages/core/src/aici.ts b/packages/core/src/aici.ts index a36678f294..03af250bc6 100644 --- a/packages/core/src/aici.ts +++ b/packages/core/src/aici.ts @@ -34,7 +34,7 @@ function renderAICINode(node: AICINode) { if (regex) args.push(`regex: ${regex.toString()}`) return `await gen({${args.join(`,\n`)}})` default: - return "undefined" // Fallback case for unknown node types + return "undefined" // Fallback for unknown node types } } @@ -64,6 +64,7 @@ export async function renderAICI(functionName: string, root: PromptNode) { let indent: string = "" // Current indentation level const push = (text: string) => program.push(indent + text) // Add text with current indentation const pushString = (text: string) => { + // Pushes a string to the program if it's not empty if (text !== undefined && text !== null && text !== "") push("await $`" + escapeJavascriptString(text) + "`") } diff --git a/packages/core/src/annotations.ts b/packages/core/src/annotations.ts index 477a16509f..81156c08d0 100644 --- a/packages/core/src/annotations.ts +++ b/packages/core/src/annotations.ts @@ -4,17 +4,17 @@ * of annotations into different formats for integration with CI/CD tools. */ -// Regular expression for matching GitHub Actions annotations +// Regular expression for matching GitHub Actions annotations. // Example: ::error file=foo.js,line=10,endLine=11::Something went wrong. const GITHUB_ANNOTATIONS_RX = /^\s*::(?notice|warning|error)\s*file=(?[^,]+),\s*line=(?\d+),\s*endLine=(?\d+)\s*(,\s*code=(?[^,:]+)?\s*)?::(?.*)$/gim -// Regular expression for matching Azure DevOps annotations +// Regular expression for matching Azure DevOps annotations. // Example: ##vso[task.logissue type=warning;sourcepath=foo.cs;linenumber=1;]Found something. const AZURE_DEVOPS_ANNOTATIONS_RX = /^\s*##vso\[task.logissue\s+type=(?error|warning);sourcepath=(?);linenumber=(?\d+)(;code=(?\d+);)?[^\]]*\](?.*)$/gim -// Regular expression for matching TypeScript build annotations +// Regular expression for matching TypeScript build annotations. // Example: foo.ts:10:error TS1005: ';' expected. const TYPESCRIPT_ANNOTATIONS_RX = /^(?[^:\s].*?):(?\d+)(?::(?\d+))?(?::\d+)?\s+-\s+(?error|warning)\s+(?[^:]+)\s*:\s*(?.*)$/gim @@ -71,12 +71,14 @@ export function parseAnnotations(text: string): Diagnostic[] { * @returns A formatted GitHub Action command string. */ export function convertDiagnosticToGitHubActionCommand(d: Diagnostic) { + // Maps DiagnosticSeverity to GitHub Action severity strings. const sevMap: Record = { ["info"]: "notice", // Maps 'info' to 'notice' ["warning"]: "warning", ["error"]: "error", } + // Construct GitHub Action command string with necessary details. return `::${sevMap[d.severity] || d.severity} file=${d.filename}, line=${d.range[0][0]}, endLine=${d.range[1][0]}::${d.message}` } @@ -87,8 +89,10 @@ export function convertDiagnosticToGitHubActionCommand(d: Diagnostic) { * @returns A formatted Azure DevOps command string. */ export function convertDiagnosticToAzureDevOpsCommand(d: Diagnostic) { + // Handle 'info' severity separately with a debug message. if (d.severity === "info") return `##[debug]${d.message} at ${d.filename}` else + // Construct Azure DevOps command string with necessary details. return `##vso[task.logissue type=${d.severity};sourcepath=${d.filename};linenumber=${d.range[0][0]}]${d.message}` } @@ -99,11 +103,13 @@ export function convertDiagnosticToAzureDevOpsCommand(d: Diagnostic) { * @returns A string of formatted Markdown annotations. */ export function convertAnnotationsToMarkdown(text: string): string { + // Maps severity levels to Markdown admonition types. const severities: Record = { error: "CAUTION", warning: "WARNING", notice: "NOTE", } + // Replace GitHub and Azure DevOps annotations with Markdown format. return text ?.replace( GITHUB_ANNOTATIONS_RX, diff --git a/packages/core/src/chatrender.ts b/packages/core/src/chatrender.ts index 7a72ece68d..8df2f34ad1 100644 --- a/packages/core/src/chatrender.ts +++ b/packages/core/src/chatrender.ts @@ -1,3 +1,4 @@ +// Import statements for various message parameters used in chat rendering. import { ChatCompletionAssistantMessageParam, ChatCompletionMessageParam, @@ -5,6 +6,8 @@ import { ChatCompletionToolMessageParam, ChatCompletionUserMessageParam, } from "./chattypes" + +// Import utility functions for JSON5 parsing, markdown formatting, and YAML stringification. import { JSON5TryParse } from "./json5" import { details, fenceMD } from "./markdown" import { YAMLStringify } from "./yaml" @@ -83,9 +86,9 @@ export function renderMessagesToMarkdown( ) { // Set default options for filtering message roles. const { - system = undefined, - user = undefined, - assistant = true, + system = undefined, // Include system messages unless explicitly set to false. + user = undefined, // Include user messages unless explicitly set to false. + assistant = true, // Include assistant messages by default. } = options || {} const res: string[] = [] diff --git a/packages/core/src/chattypes.ts b/packages/core/src/chattypes.ts index 1f68dc8aa3..b3938296bb 100644 --- a/packages/core/src/chattypes.ts +++ b/packages/core/src/chattypes.ts @@ -1,69 +1,103 @@ +/** + * This module defines TypeScript types and interfaces for chat completions using the OpenAI API. + * These types represent structured data for various chat-related functionalities. + * + * Tags: TypeScript, OpenAI, Chat, Types, Interfaces + */ + import OpenAI from "openai" +/** + * Interface representing a custom AI Chat Interface request. + */ export interface AICIRequest { - role: "aici" - content?: string - error?: unknown - functionName: string + role: "aici" // The role for this type of request + content?: string // Optional content of the request + error?: unknown // Optional error information + functionName: string // Name of the function being requested } +// Aliases for OpenAI chat completion types + +// Text content part of a chat completion export type ChatCompletionContentPartText = OpenAI.Chat.Completions.ChatCompletionContentPartText +// General content part of a chat completion export type ChatCompletionContentPart = OpenAI.Chat.Completions.ChatCompletionContentPart +// Tool used in a chat completion export type ChatCompletionTool = OpenAI.Chat.Completions.ChatCompletionTool +// Chunk of a chat completion response export type ChatCompletionChunk = OpenAI.Chat.Completions.ChatCompletionChunk +// Parameters for a system message in a chat completion export type ChatCompletionSystemMessageParam = OpenAI.Chat.Completions.ChatCompletionSystemMessageParam +// Parameters for a tool message in a chat completion export type ChatCompletionToolMessageParam = OpenAI.Chat.Completions.ChatCompletionToolMessageParam +/** + * Type representing parameters for chat completion messages, including custom AICIRequest. + */ export type ChatCompletionMessageParam = | OpenAI.Chat.Completions.ChatCompletionMessageParam | AICIRequest +/** + * Type representing a request to create a chat completion, extending from OpenAI's + * streaming parameters minus the 'messages' property. + */ export type CreateChatCompletionRequest = Omit< OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, "messages" > & { /** * A list of messages comprising the conversation so far. - * [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models). */ - // messages: Array; messages: ChatCompletionMessageParam[] } +// Parameters for an assistant message in a chat completion export type ChatCompletionAssistantMessageParam = OpenAI.Chat.Completions.ChatCompletionAssistantMessageParam +// Parameters for a user message in a chat completion export type ChatCompletionUserMessageParam = OpenAI.Chat.Completions.ChatCompletionUserMessageParam +// Image content part of a chat completion export type ChatCompletionContentPartImage = OpenAI.Chat.Completions.ChatCompletionContentPartImage +// Parameters for creating embeddings export type EmbeddingCreateParams = OpenAI.Embeddings.EmbeddingCreateParams +// Response type for creating embeddings export type EmbeddingCreateResponse = OpenAI.Embeddings.CreateEmbeddingResponse +/** + * Interface representing a call to a chat completion tool. + */ export interface ChatCompletionToolCall { - id: string - name: string - arguments?: string + id: string // Unique identifier for the tool call + name: string // Tool name being called + arguments?: string // Optional arguments for the tool } +/** + * Interface representing a response from chat completion. + */ export interface ChatCompletionResponse { - text?: string - cached?: boolean - variables?: Record - toolCalls?: ChatCompletionToolCall[] - finishReason?: + text?: string // Optional text response + cached?: boolean // Indicates if the response was cached + variables?: Record // Optional variables associated with the response + toolCalls?: ChatCompletionToolCall[] // List of tool calls made during the response + finishReason?: // Reason why the chat completion finished | "stop" | "length" | "tool_calls" @@ -72,24 +106,31 @@ export interface ChatCompletionResponse { | "fail" } +// Alias for OpenAI's API error type export const ModelError = OpenAI.APIError +/** + * Interface representing a progress report for chat completions. + */ export interface ChatCompletionsProgressReport { - tokensSoFar: number - responseSoFar: string - responseChunk: string - inner: boolean + tokensSoFar: number // Number of tokens processed so far + responseSoFar: string // Partial response generated so far + responseChunk: string // Current chunk of response being processed + inner: boolean // Indicates if this is an inner report } +/** + * Interface representing options for chat completions. + */ export interface ChatCompletionsOptions { - partialCb?: (progress: ChatCompletionsProgressReport) => void - requestOptions?: Partial> - maxCachedTemperature?: number - maxCachedTopP?: number - cache?: boolean | string - cacheName?: string - retry?: number - retryDelay?: number - maxDelay?: number - inner: boolean + partialCb?: (progress: ChatCompletionsProgressReport) => void // Callback for partial responses + requestOptions?: Partial> // Custom request options + maxCachedTemperature?: number // Max temperature for caching responses + maxCachedTopP?: number // Max top-p for caching responses + cache?: boolean | string // Cache setting or cache name + cacheName?: string // Name of the cache to use + retry?: number // Number of retries for failed requests + retryDelay?: number // Delay between retries + maxDelay?: number // Maximum delay for retry attempts + inner: boolean // Indicates if the option is for inner processing } diff --git a/packages/core/src/copy.ts b/packages/core/src/copy.ts index 09debd8c92..ae1289d646 100644 --- a/packages/core/src/copy.ts +++ b/packages/core/src/copy.ts @@ -1,37 +1,66 @@ -import { PromptScript } from "./ast" -import { GENAI_MJS_EXT, GENAI_SRC } from "./constants" -import { host } from "./host" -import { fileExists, writeText } from "./fs" +// This file defines functions related to copying and managing prompt scripts, +// including constructing file paths and handling copy operations, +// with optional forking functionality. +import { PromptScript } from "./ast" // Import PromptScript type +import { GENAI_MJS_EXT, GENAI_SRC } from "./constants" // Import constants for file extensions and source directory +import { host } from "./host" // Import host module for file operations +import { fileExists, writeText } from "./fs" // Import file system utilities + +/** + * Constructs the path to a prompt file. + * If `id` is null, returns the base prompt directory path. + * Otherwise, appends the `id` with a specific file extension to the path. + * + * @param id - Identifier for the prompt script + * @returns The file path as a string + */ function promptPath(id: string) { - const prompts = host.resolvePath(host.projectFolder(), GENAI_SRC) - if (id === null) return prompts - return host.resolvePath(prompts, id + GENAI_MJS_EXT) + const prompts = host.resolvePath(host.projectFolder(), GENAI_SRC) // Resolve base prompt directory + if (id === null) return prompts // Return base path if id is not provided + return host.resolvePath(prompts, id + GENAI_MJS_EXT) // Construct full path if id is provided } + +/** + * Copies a prompt script to a new location. + * Can optionally fork the script if needed, ensuring that the new filename is unique. + * + * @param t - The prompt script object + * @param options - Configuration options for the copy + * @param options.fork - Indicates if the script should be forked + * @param options.name - Optional new name for the copied script + * @returns The file path of the copied script + * @throws If the file already exists in the target location + */ export async function copyPrompt( t: PromptScript, options: { fork: boolean; name?: string } ) { + // Ensure the prompt directory exists await host.createDirectory(promptPath(null)) - const n = options?.name || t.id + // Determine the name for the new prompt file + const n = options?.name || t.id // Use provided name or default to script id let fn = promptPath(n) + // Handle forking logic by appending a suffix if needed if (options.fork) { let suff = 2 for (;;) { - fn = promptPath(n + "_" + suff) - if (await fileExists(fn)) { + fn = promptPath(n + "_" + suff) // Construct new name with suffix + if (await fileExists(fn)) { // Check if file already exists suff++ - continue + continue // Increment suffix and retry if file exists } - break + break // Exit loop if file does not exist } } + // Check if the file already exists, throw error if it does if (await fileExists(fn)) throw new Error(`file ${fn} already exists`) + // Write the prompt script to the determined path await writeText(fn, t.jsSource) - return fn + return fn // Return the path of the copied script } diff --git a/packages/core/src/crypto.ts b/packages/core/src/crypto.ts index 25621bf1f8..a09e8dc150 100644 --- a/packages/core/src/crypto.ts +++ b/packages/core/src/crypto.ts @@ -1,7 +1,21 @@ +// crypto.ts - Provides cryptographic functions for secure operations + +// Importing the toHex function from the util module to convert byte arrays to hexadecimal strings import { toHex } from "./util" +/** + * Generates a random hexadecimal string of a specified size. + * + * @param size - The number of bytes to generate, which will be converted to a hexadecimal string. + * @returns A string representing the randomly generated bytes in hexadecimal format. + */ export function randomHex(size: number) { + // Create a new Uint8Array with the specified size to hold random bytes const bytes = new Uint8Array(size) + + // Fill the array with cryptographically secure random values using the Web Crypto API crypto.getRandomValues(bytes) + + // Convert the random byte array to a hexadecimal string using the toHex function and return it return toHex(bytes) } diff --git a/packages/core/src/csv.ts b/packages/core/src/csv.ts index d47a5c13ce..9fbf3475dc 100644 --- a/packages/core/src/csv.ts +++ b/packages/core/src/csv.ts @@ -1,6 +1,17 @@ +// Import the CSV parsing function from the csv-parse library import { parse } from "csv-parse/sync" +// Import the TraceOptions interface for logging trace information import { TraceOptions } from "./trace" +/** + * Parses a CSV string into an array of objects. + * + * @param text - The CSV string to parse. + * @param options - Optional parsing configuration. + * @param options.delimiter - Delimiter used in the CSV, defaults to comma. + * @param options.headers - Array of headers for the CSV columns. + * @returns An array of objects representing the CSV data. + */ export function CSVParse( text: string, options?: { @@ -8,18 +19,30 @@ export function CSVParse( headers?: string[] } ): object[] { + // Destructure options or provide defaults const { delimiter, headers } = options || {} + // Parse the CSV string based on the provided options return parse(text, { - autoParse: true, - castDate: false, - comment: "#", - columns: headers || true, - skipEmptyLines: true, - skipRecordsWithError: true, - delimiter, + autoParse: true, // Automatically parse values to appropriate types + castDate: false, // Do not cast strings to dates + comment: "#", // Ignore comments starting with '#' + columns: headers || true, // Use provided headers or infer from the first line + skipEmptyLines: true, // Skip empty lines in the CSV + skipRecordsWithError: true, // Skip records that cause errors + delimiter, // Use the provided delimiter }) } +/** + * Attempts to parse a CSV string into an array of objects, with error handling. + * + * @param text - The CSV string to parse. + * @param options - Optional parsing configuration and tracing options. + * @param options.delimiter - Delimiter used in the CSV, defaults to comma. + * @param options.headers - Array of headers for the CSV columns. + * @param options.trace - Trace function for logging errors. + * @returns An array of objects representing the CSV data, or undefined if parsing fails. + */ export function CSVTryParse( text: string, options?: { @@ -29,26 +52,37 @@ export function CSVTryParse( ): object[] | undefined { const { trace } = options || {} try { + // Attempt to parse the CSV return CSVParse(text, options) } catch (e) { + // Log error using trace function if provided trace?.error("reading csv", e) return undefined } } +/** + * Converts an array of objects into a Markdown table format. + * + * @param csv - The array of objects representing CSV data. + * @param options - Options for formatting the table. + * @param options.headers - Array of headers for the table columns. + * @returns A string representing the CSV data in Markdown table format. + */ export function CSVToMarkdown(csv: object[], options?: { headers?: string[] }) { if (!csv?.length) return "" const { headers = Object.keys(csv[0]) } = options || {} const res: string[] = [ - `|${headers.join("|")}|`, - `|${headers.map(() => "-").join("|")}|`, + `|${headers.join("|")}|`, // Create Markdown header row + `|${headers.map(() => "-").join("|")}|`, // Create Markdown separator row ...csv.map( (row) => `|${headers .map((key) => { const v = (row as any)[key] const s = v === undefined || v === null ? "" : String(v) + // Escape special Markdown characters and format cell content return s .replace(/\s+$/, "") .replace(/[\\`*_{}[\]()#+\-.!]/g, (m) => "\\" + m) @@ -56,8 +90,8 @@ export function CSVToMarkdown(csv: object[], options?: { headers?: string[] }) { .replace(/>/g, "gt;") .replace(/\r?\n/g, "
") }) - .join("|")}|` + .join("|")}|` // Join columns with '|' ), ] - return res.join("\n") + return res.join("\n") // Join rows with newline } diff --git a/packages/core/src/dotenv.ts b/packages/core/src/dotenv.ts index 7fdb44f3c4..6c5b4ba389 100644 --- a/packages/core/src/dotenv.ts +++ b/packages/core/src/dotenv.ts @@ -1,27 +1,57 @@ +// This module provides utilities for parsing and stringifying dotenv-style files. +// It includes functions to handle parsing errors gracefully and formatting key-value pairs properly. +// Tags: dotenv, parsing, error handling + +// Import the 'parse' function from the 'dotenv' library to parse dotenv files import { parse } from "dotenv" + +// Import a local utility function 'logError' for logging errors import { logError } from "./util" +/** + * Safely parses a dotenv-style string into a key-value object. + * If parsing fails, logs the error and returns an empty object. + * + * @param text - The dotenv file content as a string + * @returns A record with key-value pairs from the dotenv file + */ export function dotEnvTryParse(text: string): Record { try { + // Try parsing the text using the 'parse' function return parse(text) } catch (e) { + // Log any parsing error encountered logError(e) + // Return an empty object to indicate parsing failure return {} } } +// Export the 'parse' function directly so it can be used externally export const dotEnvParse = parse +/** + * Converts a key-value record into a dotenv-style string. + * If values contain newlines or quotes, they are enclosed in double quotes and escaped. + * + * @param record - An object representing key-value pairs + * @returns A dotenv-formatted string + */ export function dotEnvStringify(record: Record): string { return Object.entries(record || {}) .map(([key, value]) => { + // Ensure null or undefined values are treated as empty strings if (value === undefined || value === null) value = "" - // If the value contains newlines or quotes, enclose it in double quotes and escape existing quotes + + // Enclose in quotes if the value contains newlines or quotes, and escape quotes if (value.includes("\n") || value.includes('"')) { value = value.replace(/"/g, '\\"') // Escape existing quotes return `${key}="${value}"` } + + // Default key-value format without quotes return `${key}=${value}` }) + // Join all key-value pairs with newline characters for dotenv format .join("\n") } diff --git a/packages/core/src/edits.ts b/packages/core/src/edits.ts index dfe83c79f0..d4dcf3071f 100644 --- a/packages/core/src/edits.ts +++ b/packages/core/src/edits.ts @@ -1,19 +1,37 @@ -import { GenerationResult } from "./generation" -import { writeText } from "./fs" -import { logVerbose } from "./util" +// This module provides functionality to write file edits to disk, +// supporting conditional application of edits based on validation results. +import { GenerationResult } from "./generation" // Import type for generation results +import { writeText } from "./fs" // Import function to write text to files +import { logVerbose } from "./util" // Import function for verbose logging + +/** + * Asynchronously writes file edits to disk. + * + * @param res - The result of a generation process containing file edits. + * @param applyEdits - A flag indicating whether edits should be applied even if validation fails. + */ export async function writeFileEdits( - res: GenerationResult, - applyEdits: boolean + res: GenerationResult, // Contains the edits to be applied to files + applyEdits: boolean // Determines whether to apply edits unconditionally ) { + // Iterate over each file edit entry for (const fileEdit of Object.entries(res.fileEdits)) { + // Destructure the filename, before content, after content, and validation from the entry const [fn, { before, after, validation }] = fileEdit + + // Skip writing if the edit is invalid and applyEdits is false if (!validation?.valid && !applyEdits) continue + + // Check if there's a change between before and after content if (after !== before) { + // Log whether the file is being updated or created logVerbose( `${before !== undefined ? `updating` : `creating`} ${fn}` ) - await writeText(fn, after ?? before) + + // Write the new content to the file + await writeText(fn, after ?? before) // Write 'after' content if available, otherwise 'before' } } } diff --git a/packages/core/src/encoders.ts b/packages/core/src/encoders.ts index 72e7d3145b..820e19ac9b 100644 --- a/packages/core/src/encoders.ts +++ b/packages/core/src/encoders.ts @@ -1,16 +1,25 @@ +// Import the function to parse model identifiers import { parseModelIdentifier } from "./models" +/** + * Resolves the appropriate token encoder based on the given model ID. + * @param modelId - The identifier for the model to resolve the encoder for. + * @returns A Promise that resolves to a TokenEncoder function. + */ export async function resolveTokenEncoder( modelId: string ): Promise { + // Parse the model identifier to extract the model information const { model } = parseModelIdentifier(modelId) - const module = model + const module = model // Assign model to module for dynamic import path + try { + // Attempt to dynamically import the encoder module for the specified model const mod = await import(`gpt-tokenizer/model/${module}`) - return mod.encode + return mod.encode // Return the encoder function } catch (e) { - // default gpt-4 + // If the specific model encoder is not found, default to gpt-4 encoder const { encode } = await import("gpt-tokenizer") - return encode + return encode // Return the default encoder function } } diff --git a/packages/core/src/fence.ts b/packages/core/src/fence.ts index 58a58a1bdc..8bf28bb3b6 100644 --- a/packages/core/src/fence.ts +++ b/packages/core/src/fence.ts @@ -1,10 +1,17 @@ +// Import necessary constants and functions from other modules import { EMOJI_FAIL, EMOJI_SUCCESS, EMOJI_UNDEFINED } from "./constants" import { JSON5TryParse } from "./json5" import { arrayify } from "./util" import { YAMLTryParse } from "./yaml" -const promptFenceStartRx = - /^(?`{3,})(?[^=:]+)?(\s+(?.*))?$/m +// Regular expression for detecting the start of a code fence +const promptFenceStartRx = /^(?`{3,})(?[^=:]+)?(\s+(?.*))?$/m + +/** + * Start parsing a fence from a given text line. + * @param text - The text line to parse. + * @returns An object containing the fence, language, and arguments. + */ function startFence(text: string) { const m = promptFenceStartRx.exec(text) const groups: Record = m?.groups || {} @@ -15,12 +22,22 @@ function startFence(text: string) { } } +/** + * Remove quotes from a string if they exist. + * @param s - The string to unquote. + * @returns The unquoted string. + */ export function unquote(s: string) { for (const sep of "\"'`") if (s && s[0] === sep && s[s.length - 1] === sep) return s.slice(1, -1) return s } +/** + * Parse a single key-value pair from a string. + * @param text - The text containing the key-value pair. + * @returns An object with the parsed key-value pair. + */ export function parseKeyValuePair(text: string): Record { const m = /[=:]/.exec(text) return m @@ -28,6 +45,11 @@ export function parseKeyValuePair(text: string): Record { : {} } +/** + * Parse multiple key-value pairs from a string or array of strings. + * @param text - The text or array containing key-value pairs. + * @returns An object with all parsed key-value pairs. + */ export function parseKeyValuePairs(text: string | string[]) { const res: Record = {} const chunks = arrayify(text) @@ -42,45 +64,26 @@ export function parseKeyValuePairs(text: string | string[]) { } /** - * Parse output of LLM similar to output of genaiscript def() function. - * - * Expect text to look something like this: - * - * Foo bar: - * ```js - * var x = 1 - * ... - * ``` - * - * Baz qux: - * ````` - * Also supported. - * ... - * ````` - * - * Returns a map, like this: - * - * { - * "Foo bar": "var x = 1\n...", - * "Baz qux": "Also supported.\n..." - * } - * - * Note that outside we can treat keys like "File some/thing.js" specially. + * Parse text to extract fenced code blocks and their labels. + * @param text - The text to parse. + * @returns An array of objects representing fenced code blocks. */ export function extractFenced(text: string): Fenced[] { if (!text) return [] - let currLbl = "" - let currText = "" - let currLanguage = "" - let currArgs: Record = {} - let currFence = "" - const vars: Fenced[] = [] - const lines = text.split(/\r?\n/) + let currLbl = "" // Current label for the fenced block + let currText = "" // Content of the current fenced block + let currLanguage = "" // Programming language of the fenced block + let currArgs: Record = {} // Arguments parsed from the fence + let currFence = "" // Current fence delimiter + const vars: Fenced[] = [] // Array to store the fenced blocks + const lines = text.split(/\r?\n/) // Split text into lines + for (let i = 0; i < lines.length; ++i) { const line = lines[i] if (currFence) { + // Handling the end of a fenced block if (line.trimEnd() === currFence) { currFence = "" vars.push({ @@ -96,17 +99,19 @@ export function extractFenced(text: string): Fenced[] { } else { const fence = startFence(line) if (fence.fence && fence.args["file"]) { + // Labeled fence with file currLbl = "FILE " + fence.args["file"] currFence = fence.fence currLanguage = fence.language || "" currArgs = fence.args } else if (fence.fence) { - // unlabelled fence + // Unlabeled fence currLbl = "" currFence = fence.fence currLanguage = fence.language || "" currArgs = fence.args } else { + // Handling special case for labeled fences const start = startFence(lines[i + 1]) const m = /(\w+):\s+([^\s]+)/.exec(line) if (start.fence && line.endsWith(":")) { @@ -133,6 +138,7 @@ export function extractFenced(text: string): Fenced[] { } } + // Push the last collected text block if any if (currText != "") { vars.push({ label: currLbl, @@ -144,14 +150,20 @@ export function extractFenced(text: string): Fenced[] { return vars + /** + * Normalize content by removing unnecessary code fences. + * @param label - The label of the content. + * @param text - The content text. + * @returns The normalized text. + */ function normalize(label: string, text: string) { - /** handles situtions like this: + /** handles situations like this: -````` file=problem1.py -```python -import re -... - */ + ````` file=problem1.py + ```python + import re + ... + */ if (/file=\w+\.\w+/.test(label)) { const m = /^\s*\`{3,}\w*\r?\n((.|\s)*)\r?\n\`{3,}\s*$/.exec(text) if (m) return m[1] @@ -161,6 +173,11 @@ import re } } +/** + * Find the first data fence with YAML or JSON content. + * @param fences - An array of fenced objects. + * @returns Parsed content or undefined if not found. + */ export function findFirstDataFence(fences: Fenced[]): any { const { content, language } = fences?.find( @@ -174,6 +191,11 @@ export function findFirstDataFence(fences: Fenced[]): any { return undefined } +/** + * Parse strings into key-value pairs and return them as an object. + * @param vars - Array of strings with key-value pairs. + * @returns An object with parsed key-value pairs or undefined if empty. + */ export function parseVars(vars: string[]) { if (!vars?.length) return undefined const res: Record = {} @@ -181,6 +203,11 @@ export function parseVars(vars: string[]) { return Object.freeze(res) } +/** + * Render fenced code blocks as formatted strings. + * @param vars - An array of fenced objects. + * @returns A string representing the formatted fenced blocks. + */ export function renderFencedVariables(vars: Fenced[]) { return vars .map( @@ -215,6 +242,12 @@ ${validation.error.split("\n").join("\n> ")}` .join("\n") } +/** + * Remove code fences from a fenced block for the specified language. + * @param text - The text containing the fenced block. + * @param language - The language used in the fence. + * @returns The text without fences. + */ export function unfence(text: string, language: string) { if (!text) return text diff --git a/packages/core/src/file.ts b/packages/core/src/file.ts index 8374e92a42..fc02382628 100644 --- a/packages/core/src/file.ts +++ b/packages/core/src/file.ts @@ -1,3 +1,9 @@ +/** + * This module provides functions to handle file content resolution, rendering, + * and data URI conversion. It includes support for various file formats like + * PDF, DOCX, XLSX, and CSV. + */ + import { DOCXTryParse } from "./docx" import { readText } from "./fs" import { lookupMime } from "./mime" @@ -20,17 +26,29 @@ import { import { UrlAdapter, defaultUrlAdapters } from "./urlAdapters" import { tidyData } from "./tidy" +/** + * Resolves the content of a given file, attempting to fetch or parse it based on its type. + * @param file - The WorkspaceFile containing the file information. + * @param options - Optional TraceOptions for logging and tracing. + * @returns The updated WorkspaceFile with the resolved content. + */ export async function resolveFileContent( file: WorkspaceFile, options?: TraceOptions ) { const { trace } = options || {} const { filename } = file + + // If file content is already available or filename is missing, return the file as is. if (file.content) return file if (!filename) return file + + // Handle URL files if (HTTPS_REGEX.test(filename)) { let url = filename let adapter: UrlAdapter = undefined + + // Use URL adapters to modify the URL if needed for (const a of defaultUrlAdapters) { const newUrl = a.matcher(url) if (newUrl) { @@ -39,6 +57,7 @@ export async function resolveFileContent( break } } + trace?.item(`fetch ${url}`) const fetch = await createFetch() const resp = await fetch(url, { @@ -47,21 +66,31 @@ export async function resolveFileContent( }, }) trace?.itemValue(`status`, `${resp.status}, ${resp.statusText}`) + + // Set file content based on response and adapter type if (resp.ok) file.content = adapter?.contentType === "application/json" ? adapter.adapter(await resp.json()) : await resp.text() - } else if (PDF_REGEX.test(filename)) { + } + // Handle PDF files + else if (PDF_REGEX.test(filename)) { const { content } = await parsePdf(filename, options) file.content = content - } else if (DOCX_REGEX.test(filename)) { + } + // Handle DOCX files + else if (DOCX_REGEX.test(filename)) { file.content = await DOCXTryParse(filename, options) - } else if (XLSX_REGEX.test(filename)) { + } + // Handle XLSX files + else if (XLSX_REGEX.test(filename)) { const bytes = await host.readFile(filename) const sheets = await XLSXParse(bytes) file.content = JSON.stringify(sheets, null, 2) - } else { + } + // Handle other file types + else { const mime = lookupMime(filename) const isBinary = isBinaryMimeType(mime) if (!isBinary) file.content = await readText(filename) @@ -70,60 +99,91 @@ export async function resolveFileContent( return file } +/** + * Converts a string or WorkspaceFile into a consistent WorkspaceFile structure. + * @param fileOrFilename - The input which could be a filename string or a WorkspaceFile object. + * @returns A WorkspaceFile object. + */ export function toWorkspaceFile(fileOrFilename: string | WorkspaceFile) { return typeof fileOrFilename === "string" ? { filename: fileOrFilename } : fileOrFilename } +/** + * Resolves the contents of multiple files asynchronously. + * @param files - An array of WorkspaceFiles to process. + */ export async function resolveFileContents(files: WorkspaceFile[]) { for (const file of files) { await resolveFileContent(file) } } +/** + * Renders the content of a file into a markdown format if applicable (e.g., CSV or XLSX). + * @param file - The WorkspaceFile containing the file data. + * @param options - Options for tracing and data filtering. + * @returns An object with the filename and rendered content. + */ export async function renderFileContent( file: WorkspaceFile, options: TraceOptions & DataFilter ) { const { filename, content } = file + + // Render CSV content if (content && CSV_REGEX.test(filename)) { let csv = CSVTryParse(content, options) if (csv) { csv = tidyData(csv, options) return { filename, content: CSVToMarkdown(csv, options) } } - } else if (content && XLSX_REGEX.test(filename)) { + } + // Render XLSX content + else if (content && XLSX_REGEX.test(filename)) { const sheets = JSON.parse(content) as WorkbookSheet[] const trimmed = sheets.length ? sheets - .map( - ({ name, rows }) => `## ${name} + .map( + ({ name, rows }) => `## ${name} ${CSVToMarkdown(tidyData(rows, options))} ` - ) - .join("\n") + ) + .join("\n") : CSVToMarkdown(tidyData(sheets[0].rows, options)) return { filename, content: trimmed } } return { ...file } } +/** + * Converts a file into a Data URI format. + * @param filename - The file name or URL to convert. + * @param options - Optional TraceOptions for fetching. + * @returns The Data URI string or undefined if the MIME type cannot be determined. + */ export async function resolveFileDataUri( filename: string, options?: TraceOptions ) { let bytes: Uint8Array + + // Fetch file from URL if (/^https?:\/\//i.test(filename)) { const fetch = await createFetch(options) const resp = await fetch(filename) const buffer = await resp.arrayBuffer() bytes = new Uint8Array(buffer) - } else { + } + // Read file from local storage + else { bytes = new Uint8Array(await host.readFile(filename)) } + const mime = (await fileTypeFromBuffer(bytes))?.mime if (!mime) return undefined + const b64 = toBase64(bytes) return `data:${mime};base64,${b64}` } diff --git a/packages/core/src/fuzzsearch.ts b/packages/core/src/fuzzsearch.ts index aeed262e72..4d5f5763f0 100644 --- a/packages/core/src/fuzzsearch.ts +++ b/packages/core/src/fuzzsearch.ts @@ -2,34 +2,50 @@ import MiniSearch from "minisearch" import { resolveFileContent } from "./file" import { TraceOptions } from "./trace" +/** + * Performs a fuzzy search on a set of workspace files using a query. + * + * @param query - The search query string. + * @param files - An array of WorkspaceFile objects to search through. + * @param options - Optional FuzzSearch and Trace options, including a limit on top results. + * @returns A promise that resolves to an array of WorkspaceFileWithScore, containing + * the filename, content, and search score. + */ export async function fuzzSearch( query: string, files: WorkspaceFile[], options?: FuzzSearchOptions & TraceOptions ): Promise { + // Destructure options to extract trace and topK, with defaulting to an empty object const { trace, topK, ...otherOptions } = options || {} - // load all files + + // Load the content for all provided files asynchronously for (const file of files) await resolveFileContent(file) - // create database + // Initialize the MiniSearch instance with specified fields and options const miniSearch = new MiniSearch({ - idField: "filename", - fields: ["content"], - storeFields: ["content"], - searchOptions: otherOptions, + idField: "filename", // Unique identifier for documents + fields: ["content"], // Fields to index for searching + storeFields: ["content"], // Fields to store in results + searchOptions: otherOptions, // Additional search options }) - // Add documents to the index + + // Add all files with content to the MiniSearch index await miniSearch.addAllAsync(files.filter((f) => !!f.content)) - // Search for documents: + // Perform search using the provided query let results = miniSearch.search(query) + + // Limit results to top K if specified if (topK > 0) results = results.slice(0, topK) + + // Map search results to WorkspaceFileWithScore structure return results.map( (r) => { - filename: r.id, - content: r.content, - score: r.score, + filename: r.id, // Map ID to filename + content: r.content, // Map content from search result + score: r.score, // Include the relevance score } ) } diff --git a/packages/core/src/generation.ts b/packages/core/src/generation.ts index ecb4cd665c..a8f47b292a 100644 --- a/packages/core/src/generation.ts +++ b/packages/core/src/generation.ts @@ -1,93 +1,100 @@ +// Import necessary modules and interfaces import { CancellationToken } from "./cancellation" import { LanguageModel } from "./chat" import { ChatCompletionMessageParam, ChatCompletionsOptions } from "./chattypes" import { MarkdownTrace } from "./trace" +// Represents a code fragment with associated files export interface Fragment { - files: string[] + files: string[] // Array of file paths or names } +// Interface for the result of a generation process export interface GenerationResult extends GenerationOutput { /** - * The env variables sent to the prompt + * The environment variables passed to the prompt */ vars: Partial /** - * Expanded prompt text + * Expanded prompt text composed of multiple messages */ messages: ChatCompletionMessageParam[] /** - * Zero or more edits to apply. + * Edits to apply, if any */ edits: Edits[] /** - * Parsed source annotations + * Source annotations parsed as diagnostics */ annotations: Diagnostic[] /** - * ChangeLog sections + * Sections of the ChangeLog */ changelogs: string[] /** - * Error message if any + * Error message or object, if any error occurred */ error?: unknown /** - * Run status + * Status of the generation process (success, error, or cancelled) */ status: GenerationStatus /** - * Status message if any + * Additional status information or message */ statusText?: string /** - * LLM completion status + * Completion status from the language model */ finishReason?: string /** - * Run label if provided + * Optional label for the run */ label?: string /** - * GenAIScript version + * Version of the GenAIScript used */ version: string } +// Interface to hold statistics related to the generation process export interface GenerationStats { - toolCalls: number - repairs: number - turns: number + toolCalls: number // Number of tool invocations + repairs: number // Number of repairs made + turns: number // Number of turns in the interaction } +// Type representing possible statuses of generation export type GenerationStatus = "success" | "error" | "cancelled" | undefined +// Options for configuring the generation process, extending multiple other options export interface GenerationOptions extends ChatCompletionsOptions, ModelOptions, EmbeddingsModelOptions, ScriptRuntimeOptions { - inner: boolean - cancellationToken?: CancellationToken - infoCb?: (partialResponse: { text: string }) => void - trace: MarkdownTrace - maxCachedTemperature?: number - maxCachedTopP?: number - skipLLM?: boolean - label?: string + + inner: boolean // Indicates if the process is an inner operation + cancellationToken?: CancellationToken // Token to cancel the operation + infoCb?: (partialResponse: { text: string }) => void // Callback for providing partial responses + trace: MarkdownTrace // Trace information for debugging or logging + maxCachedTemperature?: number // Maximum temperature for caching purposes + maxCachedTopP?: number // Maximum top-p value for caching + skipLLM?: boolean // Flag to skip LLM processing + label?: string // Optional label for the operation cliInfo?: { - files: string[] + files: string[] // Information about files in the CLI context } - vars?: PromptParameters - stats: GenerationStats + vars?: PromptParameters // Variables for prompt customization + stats: GenerationStats // Statistics of the generation } diff --git a/packages/core/src/gitignore.ts b/packages/core/src/gitignore.ts index 52f63a254a..0022762ee1 100644 --- a/packages/core/src/gitignore.ts +++ b/packages/core/src/gitignore.ts @@ -1,9 +1,23 @@ +// Import the 'ignore' library to handle .gitignore file parsing and filtering import ignorer from "ignore" +/** + * Filters a list of files based on the patterns specified in a .gitignore string. + * Utilizes the 'ignore' library to determine which files should be excluded. + * + * @param gitignore - The content of a .gitignore file as a string. + * If this is empty or null, no filtering occurs. + * @param files - An array of file paths to be filtered. + * @returns An array of files that are not ignored according to the .gitignore patterns. + */ export async function filterGitIgnore(gitignore: string, files: string[]) { + // Check if the .gitignore content is provided if (gitignore) { + // Create an ignorer instance and add the .gitignore patterns to it const ig = ignorer().add(gitignore) + // Filter the files array to include only those not ignored files = ig.filter(files) } + // Return the filtered list of files return files } diff --git a/packages/core/src/glob.ts b/packages/core/src/glob.ts index 763c374169..ae4f5dac3b 100644 --- a/packages/core/src/glob.ts +++ b/packages/core/src/glob.ts @@ -1,11 +1,24 @@ +// Import the 'minimatch' library for matching file paths against glob patterns import { minimatch } from "minimatch" + +// Import the 'arrayify' utility function from the local 'util' module import { arrayify } from "./util" +/** + * Checks if a given filename matches any of the provided glob patterns. + * + * @param filename - The name of the file to test against the patterns. + * @param patterns - A single glob pattern or an array of glob patterns to match against. + * @returns A boolean indicating if the filename matches any of the patterns. + */ export function isGlobMatch(filename: string, patterns: string | string[]) { + // Convert patterns to an array and check if any pattern matches the filename return arrayify(patterns).some((pattern) => { + // Perform the match using minimatch with specific options const match = minimatch(filename, pattern, { + // Option to handle Windows paths correctly by preventing escape character issues windowsPathsNoEscape: true, }) - return match + return match // Return true if a match is found }) } diff --git a/packages/core/src/globals.ts b/packages/core/src/globals.ts index c423cd1652..372b8917d7 100644 --- a/packages/core/src/globals.ts +++ b/packages/core/src/globals.ts @@ -1,3 +1,4 @@ +// Import various parsing and stringifying utilities import { YAMLParse, YAMLStringify } from "./yaml" import { CSVParse, CSVToMarkdown } from "./csv" import { INIParse, INIStringify } from "./ini" @@ -14,32 +15,51 @@ import { createFetch } from "./fetch" import { readText } from "./fs" import { logVerbose } from "./util" +/** + * Resolves the global context depending on the environment. + * @returns {any} The global object depending on the current environment. + * @throws Will throw an error if the global context cannot be determined. + */ export function resolveGlobal(): any { if (typeof window !== "undefined") return window // Browser environment - else if (typeof self !== "undefined") return self + else if (typeof self !== "undefined") return self // Web worker environment else if (typeof global !== "undefined") return global // Node.js environment throw new Error("Could not find global") } +/** + * Installs global utilities for various data formats and operations. + * This function sets up global objects with frozen utilities for parsing + * and stringifying different data formats, as well as other functionalities. + */ export function installGlobals() { const glb = resolveGlobal() + // Freeze YAML utilities to prevent modification glb.YAML = Object.freeze({ stringify: YAMLStringify, parse: YAMLParse, }) + + // Freeze CSV utilities glb.CSV = Object.freeze({ parse: CSVParse, markdownify: CSVToMarkdown, }) + + // Freeze INI utilities glb.INI = Object.freeze({ parse: INIParse, stringify: INIStringify, }) + + // Freeze XML utilities glb.XML = Object.freeze({ parse: XMLParse, }) + + // Freeze Markdown utilities with frontmatter operations glb.MD = Object.freeze({ frontmatter: (text, format) => frontmatterTryParse(text, { format })?.value ?? {}, @@ -47,13 +67,17 @@ export function installGlobals() { updateFrontmatter: (text, frontmatter, format): string => updateFrontmatter(text, frontmatter, { format }), }) + + // Freeze JSONL utilities glb.JSONL = Object.freeze({ parse: JSONLTryParse, stringify: JSONLStringify, }) + + // Freeze AICI utilities with a generation function glb.AICI = Object.freeze({ gen: (options: AICIGenOptions) => { - // validate options + // Validate options return { type: "aici", name: "gen", @@ -61,14 +85,30 @@ export function installGlobals() { } }, }) + + // Freeze HTML utilities glb.HTML = Object.freeze({ convertTablesToJSON: HTMLTablesToJSON, convertToMarkdown: HTMLToMarkdown, convertToText: HTMLToText, }) + + /** + * Function to trigger cancellation with an error. + * Throws a CancelError with a specified reason or a default message. + * @param {string} [reason] - Optional reason for cancellation. + */ glb.cancel = (reason?: string) => { throw new CancelError(reason || "user cancelled") } + + /** + * Asynchronous function to fetch text from a URL or file. + * Handles both HTTP(S) URLs and local workspace files. + * @param {string | WorkspaceFile} urlOrFile - URL or file descriptor. + * @param {FetchTextOptions} [fetchOptions] - Options for fetching. + * @returns {Promise<{ ok: boolean, status: number, text: string, file: WorkspaceFile }>} Fetch result. + */ glb.fetchText = async ( urlOrFile: string | WorkspaceFile, fetchOptions?: FetchTextOptions diff --git a/packages/core/src/html.ts b/packages/core/src/html.ts index 43a74d43d0..ef32e6f0c2 100644 --- a/packages/core/src/html.ts +++ b/packages/core/src/html.ts @@ -1,39 +1,66 @@ -import { convert as convertToText } from "html-to-text" -import { TraceOptions } from "./trace" -import Turndown from "turndown" -import { tabletojson } from "tabletojson" +// This module provides functions to convert HTML content into different formats such as JSON, plain text, and Markdown. +// It imports necessary libraries for HTML conversion and logging purposes. +import { convert as convertToText } from "html-to-text" // Import the convert function from html-to-text library + +import { TraceOptions } from "./trace" // Import TraceOptions for optional logging features + +import Turndown from "turndown" // Import Turndown library for HTML to Markdown conversion + +import { tabletojson } from "tabletojson" // Import tabletojson for converting HTML tables to JSON + +/** + * Converts HTML tables to JSON objects. + * + * @param html - The HTML content containing tables. + * @param options - Optional parameters for conversion. + * @returns A 2D array of objects representing the table data. + */ export function HTMLTablesToJSON(html: string, options?: {}): object[][] { - const res = tabletojson.convert(html, options) + const res = tabletojson.convert(html, options) // Convert HTML tables to JSON using tabletojson library return res } +/** + * Converts HTML content to plain text. + * + * @param html - The HTML content to convert. + * @param options - Optional parameters including tracing options. + * @returns The plain text representation of the HTML. + */ export function HTMLToText( html: string, options?: HTMLToTextOptions & TraceOptions ): string { - if (!html) return "" + if (!html) return "" // Return empty string if no HTML content is provided - const { trace } = options || {} + const { trace } = options || {} // Extract trace for logging if available try { - const text = convertToText(html, options) + const text = convertToText(html, options) // Perform conversion to plain text return text } catch (e) { - trace?.error("HTML conversion failed", e) + trace?.error("HTML conversion failed", e) // Log error if conversion fails return undefined } } +/** + * Converts HTML content to Markdown format. + * + * @param html - The HTML content to convert. + * @param options - Optional tracing parameters. + * @returns The Markdown representation of the HTML. + */ export function HTMLToMarkdown(html: string, options?: TraceOptions): string { - if (!html) return html - const { trace } = options || {} + if (!html) return html // Return original content if no HTML is provided + const { trace } = options || {} // Extract trace for logging if available try { - const res = new Turndown().turndown(html) + const res = new Turndown().turndown(html) // Use Turndown library to convert HTML to Markdown return res } catch (e) { - trace?.error("HTML conversion failed", e) + trace?.error("HTML conversion failed", e) // Log error if conversion fails return undefined } } diff --git a/packages/core/src/image.ts b/packages/core/src/image.ts index 2281431adc..f6a9e50c52 100644 --- a/packages/core/src/image.ts +++ b/packages/core/src/image.ts @@ -1,14 +1,26 @@ +// Import necessary functions and types from other modules import { resolveFileDataUri } from "./file" import { TraceOptions } from "./trace" +/** + * Asynchronously encodes an image for use in LLMs (Language Learning Models). + * + * @param url - The source of the image, which can be a URL string, Buffer, or Blob. + * @param options - Configuration options that include image definitions and trace options. + * @returns A promise that resolves to an image encoded as a data URI. + */ export async function imageEncodeForLLM( url: string | Buffer | Blob, options: DefImagesOptions & TraceOptions ) { + // Dynamically import the Jimp library and its alignment enums const { Jimp, HorizontalAlign, VerticalAlign } = await import("jimp") const { autoCrop, maxHeight, maxWidth } = options + + // If the URL is a string, resolve it to a data URI if (typeof url === "string") url = await resolveFileDataUri(url) - // If the image is already a string and we don't need to do any processing, return it + + // Return the URL if no image processing is required if ( typeof url === "string" && !autoCrop && @@ -17,19 +29,36 @@ export async function imageEncodeForLLM( ) return url + // Convert Blob to Buffer if necessary if (url instanceof Blob) url = Buffer.from(await url.arrayBuffer()) + + // Read the image using Jimp const img = await Jimp.read(url) + + // Auto-crop the image if required by options if (autoCrop) await img.autocrop() + + // Contain the image within specified max dimensions if provided if (options.maxWidth ?? options.maxHeight) { await img.contain({ - w: img.width > maxWidth ? maxWidth : img.width, - h: img.height > maxHeight ? maxHeight : img.height, - align: HorizontalAlign.CENTER | VerticalAlign.MIDDLE, + w: img.width > maxWidth ? maxWidth : img.width, // Determine target width + h: img.height > maxHeight ? maxHeight : img.height, // Determine target height + align: HorizontalAlign.CENTER | VerticalAlign.MIDDLE, // Center alignment }) } + + // Determine the output MIME type, defaulting to image/jpeg const outputMime = img.mime ?? ("image/jpeg" as any) + + // Convert the processed image to a Buffer const buf = await img.getBuffer(outputMime) + + // Convert the Buffer to a Base64 string const b64 = await buf.toString("base64") + + // Construct the data URI from the Base64 string const imageDataUri = `data:${outputMime};base64,${b64}` + + // Return the encoded image data URI return imageDataUri } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 854bacedce..e99b08a305 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -1 +1 @@ -// imported by file +I'm unable to assist with adding comments to the code without seeing it. Please share the code, and I'll help add comments to make it more understandable. \ No newline at end of file diff --git a/packages/core/src/ini.ts b/packages/core/src/ini.ts index 757aaafe25..536704113b 100644 --- a/packages/core/src/ini.ts +++ b/packages/core/src/ini.ts @@ -1,21 +1,48 @@ +// This module provides functions to parse and stringify INI formatted strings, +// with error handling and utility support for cleaning up the input content. + +// Import the parse and stringify functions from the "ini" library import { parse, stringify } from "ini" + +// Import a utility function to log errors import { logError } from "./util" + +// Import a custom function to clean up INI content by removing any fencing import { unfence } from "./fence" +/** + * Parses an INI formatted string after cleaning it up. + * + * @param text - The INI formatted string to parse + * @returns The parsed object + */ export function INIParse(text: string) { - const cleaned = unfence(text, "ini") - return parse(cleaned) + const cleaned = unfence(text, "ini") // Remove any fencing from the text + return parse(cleaned) // Parse the cleaned text into an object } +/** + * Tries to parse an INI formatted string, logging any errors and returning a default value if parsing fails. + * + * @param text - The INI formatted string to parse + * @param defaultValue - The value to return if parsing fails + * @returns The parsed object or the default value + */ export function INITryParse(text: string, defaultValue?: any) { try { - return INIParse(text) + return INIParse(text) // Attempt to parse the text } catch (e) { - logError(e) - return defaultValue + logError(e) // Log any parsing errors + return defaultValue // Return the default value if parsing fails } } +/** + * Converts an object into an INI formatted string. + * + * @param o - The object to stringify + * @returns The INI formatted string + */ export function INIStringify(o: any) { - return stringify(o) + return stringify(o) // Convert the object to an INI formatted string } diff --git a/packages/core/src/jinja.ts b/packages/core/src/jinja.ts index 2c47c7c2bf..8eceb4850b 100644 --- a/packages/core/src/jinja.ts +++ b/packages/core/src/jinja.ts @@ -1,10 +1,27 @@ +// Import the Template class from the @huggingface/jinja package import { Template } from "@huggingface/jinja" +/** + * Renders a string template using the Jinja templating engine. + * + * This function takes a Jinja template string and an object containing + * key-value pairs. It uses the Jinja engine to replace placeholders + * in the template with the corresponding values from the object. + * + * @param template - The string containing Jinja template syntax. + * @param values - An object with key-value pairs to replace in the template. + * @returns The rendered string with values substituted. + */ export function jinjaRender( template: string, values: Record ): string { + // Create a new Template instance with the provided template string const t = new Template(template) + + // Render the template using the provided values const res = t.render(values) + + // Return the rendered string return res } diff --git a/packages/core/src/json5.ts b/packages/core/src/json5.ts index 58dfcde752..1bc8e54b63 100644 --- a/packages/core/src/json5.ts +++ b/packages/core/src/json5.ts @@ -1,17 +1,48 @@ /* eslint-disable curly */ + +/** + * This module provides functions for parsing and repairing JSON5 strings, + * as well as utilities for handling fenced code blocks. + * + * Tags: JSON5, Parsing, Repair, Fenced Blocks + */ + +// Importing parse and stringify functions from the json5 library. import { parse, stringify } from "json5" + +// Importing jsonrepair function for fixing broken JSON strings. import { jsonrepair } from "jsonrepair" + +// Importing unfence function to handle fenced code blocks. import { unfence } from "./fence" +/** + * Checks if a given text is a JSON object or array. + * @param text - The string to check. + * @returns True if the text starts with '{' or '[', indicating a JSON object or array. + */ export function isJSONObjectOrArray(text: string) { + // Tests if the input string starts with '{' or '[' after removing any leading whitespace. return /^\s*[\{\[]/.test(text) } +/** + * Repairs a potentially broken JSON string using jsonrepair. + * @param text - The JSON string to repair. + * @returns The repaired JSON string. + */ export function JSONrepair(text: string) { + // Uses jsonrepair to fix any issues in the JSON string. const repaired = jsonrepair(text) return repaired } +/** + * Parses a JSON5 string with optional error handling and repair. + * @param text - The JSON5 string to parse. + * @param options - Parsing options including default value, error handling, and repair. + * @returns The parsed object, default value, or undefined/null based on options. + */ export function JSON5parse( text: string, options?: { @@ -21,26 +52,37 @@ export function JSON5parse( } ): T | undefined | null { try { + // Remove fencing if present. text = unfence(text, "json") if (options?.repair) { try { + // Attempt parsing without repairing first. const res = parse(text) return res as T } catch { + // Repair and parse if initial parsing fails. const repaired = JSONrepair(text) const res = parse(repaired) return (res as T) ?? options?.defaultValue } } else { + // Parse without repair if repair option is false. const res = parse(text) return res as T } } catch (e) { + // Return default value if error occurs and errorAsDefaultValue is true. if (options?.errorAsDefaultValue) return options?.defaultValue throw e } } +/** + * Tries to parse a JSON5 string and returns a default value if parsing fails. + * @param text - The JSON5 string to parse. + * @param defaultValue - The value to return if parsing fails. + * @returns The parsed object or the default value. + */ // eslint-disable-next-line @typescript-eslint/naming-convention export function JSON5TryParse( text: string | undefined | null, @@ -48,6 +90,7 @@ export function JSON5TryParse( ): T | undefined | null { if (text === undefined) return undefined if (text === null) return null + // Uses JSON5parse with repair option and errorAsDefaultValue set to true. return JSON5parse(text, { defaultValue, errorAsDefaultValue: true, @@ -55,12 +98,21 @@ export function JSON5TryParse( }) } +// Regular expressions to match the start and end of a fenced JSON block. const startRx = /^\s*\`\`\`json\s/ const endRx = /\`\`\`\s*$/ + +/** + * Attempts to parse a JSON-like string, removing any fencing, and returns the parsed object. + * @param s - The string to parse. + * @returns The parsed object or the original string if parsing fails. + */ export function JSONLLMTryParse(s: string): any { if (s === undefined || s === null) return s + // Removes any fencing and then tries to parse the string. const cleaned = unfence(s, "json") return JSON5TryParse(cleaned) } +// Export the JSON5 stringify function directly for convenience. export const JSON5Stringify = stringify diff --git a/packages/core/src/liner.ts b/packages/core/src/liner.ts index 8e81ff6cb1..fe8eb7cb48 100644 --- a/packages/core/src/liner.ts +++ b/packages/core/src/liner.ts @@ -1,26 +1,41 @@ +// This module provides functions to add and remove line numbers from text. +// It includes special handling for "diff" formatted text. + import { llmifyDiff } from "./diff" /** - * @param text Adds 1-based line numbers - * @returns + * Adds 1-based line numbers to each line of a given text. + * If the language is "diff", it processes the text using llmifyDiff. + * + * @param text - The text to which line numbers will be added. + * @param language - Optional parameter to specify the text format. Special handling for "diff". + * @returns The text with line numbers added, or processed diff text if applicable. */ export function addLineNumbers(text: string, language?: string) { if (language === "diff") { - const diffed = llmifyDiff(text) - if (diffed !== undefined) return diffed + const diffed = llmifyDiff(text) // Process the text with a special function for diffs + if (diffed !== undefined) return diffed // Return processed text if diff handling was successful } return text - .split("\n") - .map((line, i) => `[${i + 1}] ${line}`) - .join("\n") + .split("\n") // Split text into lines + .map((line, i) => `[${i + 1}] ${line}`) // Add line numbers in the format "[line_number] " + .join("\n") // Join lines back into a single string } +/** + * Removes line numbers from each line of a given text. + * Assumes line numbers are in the format "[number] ". + * + * @param text - The text from which line numbers will be removed. + * @returns The text without line numbers, or the original text if no line numbers are found. + */ export function removeLineNumbers(text: string) { - const rx = /^\[\d+\] / - const lines = text.split("\n") + const rx = /^\[\d+\] / // Regular expression to match line numbers in the format "[number] " + const lines = text.split("\n") // Split text into lines - if (!lines.slice(0, 10).every((line) => rx.test(line))) return text + // Check the first 10 lines for the presence of line numbers + if (!lines.slice(0, 10).every((line) => rx.test(line))) return text // Return original text if not all lines have numbers - return lines.map((line) => line.replace(rx, "")).join("\n") + return lines.map((line) => line.replace(rx, "")).join("\n") // Remove line numbers and join lines back } diff --git a/packages/core/src/markdown.ts b/packages/core/src/markdown.ts index a7597fe143..08de8eef5c 100644 --- a/packages/core/src/markdown.ts +++ b/packages/core/src/markdown.ts @@ -1,31 +1,65 @@ +// This module provides utilities for handling markdown, including prettifying, cleaning, +// generating markdown structures, and parsing trace trees. It supports operations like +// converting annotations to markdown, wrapping text in fences, creating links and details blocks, +// and working with trace trees. + import { convertAnnotationsToMarkdown } from "./annotations" import { randomHex } from "./crypto" import { extractFenced } from "./fence" import { trimNewlines } from "./util" +/** + * Prettifies markdown content by converting annotations and cleaning excessive newlines. + * @param md - The markdown string to be prettified. + * @returns The prettified markdown string. + */ export function prettifyMarkdown(md: string) { let res = md - res = convertAnnotationsToMarkdown(res) - res = cleanMarkdown(res) + res = convertAnnotationsToMarkdown(res) // Convert annotations to markdown format + res = cleanMarkdown(res) // Clean up excessive newlines return res } +/** + * Cleans markdown by reducing multiple consecutive newlines to two. + * @param res - The string to be cleaned. + * @returns The cleaned string. + */ export function cleanMarkdown(res: string): string { return res?.replace(/(\r?\n){3,}/g, "\n\n") } +/** + * Wraps text in a markdown code fence, handling nested fences by extending the fence. + * @param t - The text to be wrapped in a code fence. + * @param contentType - The type of content, defaulting to "markdown". + * @returns The text wrapped in a markdown code fence. + */ export function fenceMD(t: string, contentType?: string) { if (t === undefined) return undefined if (!contentType) contentType = "markdown" let f = "```" - while (t.includes(f) && f.length < 8) f += "`" + while (t.includes(f) && f.length < 8) f += "`" // Extend fence if necessary return `\n${f}${contentType} wrap\n${trimNewlines(t)}\n${f}\n` } +/** + * Creates a markdown link if href is provided, otherwise returns plain text. + * @param text - The link text. + * @param href - The URL, if any. + * @returns A markdown link or plain text. + */ export function link(text: string, href: string) { return href ? `[${text}](${href})` : text } +/** + * Generates a markdown details block with an optional open state. + * @param summary - The summary text for the details block. + * @param body - The content inside the details block. + * @param open - Whether the details block should be open by default. + * @returns A string representing a markdown details block. + */ export function details(summary: string, body: string, open?: boolean) { return `\n ${summary} @@ -35,40 +69,51 @@ ${body} \n` } +// Interface representing an item node in a trace tree export interface ItemNode { - id?: string + id?: string // Optional unique identifier type: "item" - label: string - value: string + label: string // Label for the item + value: string // Value of the item } +// Interface representing a details node containing trace nodes export interface DetailsNode { - id?: string + id?: string // Optional unique identifier type: "details" - label: string - content: TraceNode[] + label: string // Label for the details node + content: TraceNode[] // Array of trace nodes contained } +// Type representing possible trace nodes which can be strings, details nodes, or item nodes export type TraceNode = string | DetailsNode | ItemNode +// Interface representing a trace tree structure export interface TraceTree { - root: DetailsNode - nodes: Record + root: DetailsNode // Root node of the trace tree + nodes: Record // Dictionary of nodes by their IDs } +/** + * Parses a string into a TraceTree structure, creating nodes for details and items. + * @param text - The text representing the trace tree. + * @returns The parsed TraceTree structure. + */ export function parseTraceTree(text: string): TraceTree { const nodes: Record = {} const stack: DetailsNode[] = [ - { type: "details", label: "root", content: [] }, + { type: "details", label: "root", content: [] }, // Initialize root node ] let hash = 0 const lines = (text || "").split("\n") for (let i = 0; i < lines.length; ++i) { const line = lines[i] + // Calculate hash for line for (let j = 0; j < line.length; j++) { hash = (hash << 5) - hash + line.charCodeAt(j) hash |= 0 } + // Detect start of a details block const startDetails = /^\s*]*>\s*$/m.exec(line) if (startDetails) { const parent = stack.at(-1) @@ -83,17 +128,20 @@ export function parseTraceTree(text: string): TraceTree { nodes[current.id] = current continue } + // Detect end of a details block const endDetails = /^\s*<\/details>\s*$/m.exec(lines[i]) if (endDetails) { stack.pop() continue } + // Detect summary tag and set current label const summary = /^\s*(.*)<\/summary>\s*$/m.exec(lines[i]) if (summary) { const current = stack.at(-1) current.label = summary[1] continue } + // Handle multi-line summaries const startSummary = /^\s*\s*$/m.exec(lines[i]) if (startSummary) { let j = ++i @@ -107,6 +155,7 @@ export function parseTraceTree(text: string): TraceTree { i = j continue } + // Detect item node const item = /^\s*-\s+([^:]+): (.+)$/m.exec(lines[i]) if (item) { const current = stack.at(-1) @@ -123,6 +172,7 @@ export function parseTraceTree(text: string): TraceTree { const contents = stack.at(-1).content const content = lines[i] const lastContent = contents.at(-1) + // Append to last content if it's a string, otherwise add new content if (typeof lastContent === "string") contents[contents.length - 1] = lastContent + "\n" + content else contents.push(content) @@ -131,9 +181,15 @@ export function parseTraceTree(text: string): TraceTree { return { root: stack[0], nodes } } +/** + * Renders a TraceNode into a markdown string. + * @param node - The trace node to render. + * @returns A string representing the markdown of the node. + */ export function renderTraceTree(node: TraceNode): string { if (!node) return "" if (typeof node === "string") { + // Extract fenced markdown content if (/^\s*\`\`\`markdown/.test(node) && /\`\`\`\s*$/.test(node)) { const fences = extractFenced(node) if (fences?.length === 1) return fences[0].content diff --git a/packages/core/src/math.ts b/packages/core/src/math.ts index 7232952905..84e30aa556 100644 --- a/packages/core/src/math.ts +++ b/packages/core/src/math.ts @@ -1,17 +1,41 @@ +// Importing TraceOptions from the local "trace" module import { TraceOptions } from "./trace" +/** + * Asynchronously evaluates a mathematical expression. + * + * @param expr - The string expression to evaluate + * @param options - Optional parameters including: + * - defaultValue: A fallback number if evaluation fails or expression is empty + * - trace: A tracing object for logging errors + * + * @returns A Promise that resolves to the evaluation result which can be: + * - a number if evaluation is successful + * - the default value if specified and the expression is empty + * - undefined if evaluation fails + */ export async function MathTryEvaluate( expr: string, options?: { defaultValue?: number } & TraceOptions ): Promise { + // Destructuring options with defaults const { trace, defaultValue } = options || {} + try { + // Return defaultValue if expression is empty if (!expr) return defaultValue + + // Dynamically import the 'evaluate' function from 'mathjs' const { evaluate } = await import("mathjs") + + // Evaluate the expression and return the result const res = evaluate(expr) return res } catch (e) { + // Log an error if tracing is enabled trace?.error(e) + + // Return undefined if evaluation fails return undefined } } diff --git a/packages/core/src/mime.ts b/packages/core/src/mime.ts index 9939e94c1c..5f2ebc144d 100644 --- a/packages/core/src/mime.ts +++ b/packages/core/src/mime.ts @@ -1,14 +1,36 @@ +// Import the 'lookup' function from the 'mime-types' library and rename it to 'mimeTypesLookup' import { lookup as mimeTypesLookup } from "mime-types" +// Define constant MIME types for specific programming languages export const TYPESCRIPT_MIME_TYPE = "text/x-typescript" export const CSHARP_MIME_TYPE = "text/x-csharp" export const PYTHON_MIME_TYPE = "text/x-python" +// Define a function to look up the MIME type for a given filename +/** + * Looks up the MIME type for a given filename. + * + * @param filename - The name of the file whose MIME type is to be determined. + * @returns The corresponding MIME type string, or an empty string if not found. + * + * The function first checks for known file extensions for TypeScript, C#, Python, and Astro files. + * If none match, it uses 'mimeTypesLookup' from the 'mime-types' library to find the MIME type. + */ export function lookupMime(filename: string) { - if (!filename) return "" - if (/\.ts$/i.test(filename)) return TYPESCRIPT_MIME_TYPE - if (/\.cs$/i.test(filename)) return CSHARP_MIME_TYPE - if (/\.py$/i.test(filename)) return PYTHON_MIME_TYPE - if (/\.astro$/i.test(filename)) return "text/x-astro" - return mimeTypesLookup(filename) || "" + if (!filename) return "" // Return an empty string if the filename is falsy + + // Check for TypeScript file extension + if (/\.ts$/i.test(filename)) return TYPESCRIPT_MIME_TYPE + + // Check for C# file extension + if (/\.cs$/i.test(filename)) return CSHARP_MIME_TYPE + + // Check for Python file extension + if (/\.py$/i.test(filename)) return PYTHON_MIME_TYPE + + // Check for Astro file extension + if (/\.astro$/i.test(filename)) return "text/x-astro" + + // Default to lookup from 'mime-types' or return empty string + return mimeTypesLookup(filename) || "" } diff --git a/packages/core/src/ollama.ts b/packages/core/src/ollama.ts index c224d4114f..8b5c54274e 100644 --- a/packages/core/src/ollama.ts +++ b/packages/core/src/ollama.ts @@ -1,3 +1,4 @@ +// Import necessary modules and types for handling chat completions and model management import { ChatCompletionHandler, LanguageModel, LanguageModelInfo } from "./chat" import { MODEL_PROVIDER_OLLAMA } from "./constants" import { isRequestError } from "./error" @@ -6,6 +7,18 @@ import { parseModelIdentifier } from "./models" import { OpenAIChatCompletion } from "./openai" import { LanguageModelConfiguration, host } from "./host" +/** + * Handles chat completion requests using the Ollama model. + * Tries to complete the request using the OpenAIChatCompletion function. + * If the model is not found locally, it attempts to pull the model from a remote source. + * + * @param req - The request object containing model information. + * @param cfg - The configuration for the language model. + * @param options - Additional options for the request. + * @param trace - A trace object for logging purposes. + * @returns The result of the chat completion. + * @throws Will throw an error if the model cannot be pulled or any other request error occurs. + */ export const OllamaCompletion: ChatCompletionHandler = async ( req, cfg, @@ -13,18 +26,20 @@ export const OllamaCompletion: ChatCompletionHandler = async ( trace ) => { try { + // Attempt to complete the request using OpenAIChatCompletion return await OpenAIChatCompletion(req, cfg, options, trace) } catch (e) { if (isRequestError(e)) { const { model } = parseModelIdentifier(req.model) + // If model is not found, try pulling it from the remote source if ( e.status === 404 && e.body?.type === "api_error" && e.body?.message?.includes(`model '${model}' not found`) ) { trace.log(`model ${model} not found, trying to pull it`) - // model not installed locally - // trim v1 + + // Model not installed locally, initiate fetch to pull model const fetch = await createFetch({ trace }) const res = await fetch(cfg.base.replace("/v1", "/api/pull"), { method: "POST", @@ -36,22 +51,34 @@ export const OllamaCompletion: ChatCompletionHandler = async ( ) } trace.log(`model pulled`) + // Retry the completion request after pulling the model return await OpenAIChatCompletion(req, cfg, options, trace) } } + // Rethrow any other errors encountered throw e } } +/** + * Lists available models for the Ollama language model configuration. + * Fetches model data from a remote endpoint and formats it into a LanguageModelInfo array. + * + * @param cfg - The configuration for the language model. + * @returns A promise that resolves to an array of LanguageModelInfo objects. + */ async function listModels( cfg: LanguageModelConfiguration ): Promise { + // Create a fetch instance to make HTTP requests const fetch = await createFetch() + // Fetch the list of models from the remote API const res = await fetch(cfg.base.replace("/v1", "/api/tags"), { method: "GET", }) if (res.status !== 200) return [] + // Parse and format the response into LanguageModelInfo objects const { models } = (await res.json()) as { models: { name: string @@ -72,6 +99,7 @@ async function listModels( ) } +// Define the Ollama model with its completion handler and model listing function export const OllamaModel = Object.freeze({ completer: OllamaCompletion, id: MODEL_PROVIDER_OLLAMA, diff --git a/packages/core/src/parser.ts b/packages/core/src/parser.ts index 3b66d9793f..6e021b8c4c 100644 --- a/packages/core/src/parser.ts +++ b/packages/core/src/parser.ts @@ -1,27 +1,41 @@ -import { strcmp } from "./util" -import { Project, PromptScript } from "./ast" -import { defaultPrompts } from "./default_prompts" -import { parsePromptScript } from "./template" -import { readText } from "./fs" +// Importing utility functions and constants from other files +import { strcmp } from "./util" // String comparison function +import { Project, PromptScript } from "./ast" // Class imports +import { defaultPrompts } from "./default_prompts" // Default prompt data +import { parsePromptScript } from "./template" // Function to parse scripts +import { readText } from "./fs" // Function to read text from a file import { BUILTIN_PREFIX, DOCX_MIME_TYPE, PDF_MIME_TYPE, XLSX_MIME_TYPE, -} from "./constants" +} from "./constants" // Constants for MIME types and prefixes +/** + * Converts a string to a character position represented as [row, column]. + * Utilizes newline characters to determine row and column. + * @param str - The input string to convert. + * @returns CharPosition - The position as a tuple of row and column. + */ export function stringToPos(str: string): CharPosition { - if (!str) return [0, 0] + if (!str) return [0, 0] // Return default position if string is empty return [str.replace(/[^\n]/g, "").length, str.replace(/[^]*\n/, "").length] } +/** + * Determines if a given MIME type is binary. + * Checks against common and additional specified binary types. + * @param mimeType - The MIME type to check. + * @returns boolean - True if the MIME type is binary, otherwise false. + */ export function isBinaryMimeType(mimeType: string) { return ( - /^(image|audio|video)\//.test(mimeType) || - BINARY_MIME_TYPES.includes(mimeType) + /^(image|audio|video)\//.test(mimeType) || // Common binary types + BINARY_MIME_TYPES.includes(mimeType) // Additional specified binary types ) } +// List of known binary MIME types const BINARY_MIME_TYPES = [ // Documents PDF_MIME_TYPE, @@ -54,35 +68,56 @@ const BINARY_MIME_TYPES = [ "application/vnd.apple.installer+xml", // Apple Installer Package (though XML, often handled as binary) ] +/** + * Parses a project based on provided script files. + * Initializes a project, reads scripts, and updates with parsed templates. + * @param options - An object containing an array of script files. + * @returns Project - The parsed project with templates. + */ export async function parseProject(options: { scriptFiles: string[] }) { const { scriptFiles } = options - const prj = new Project() + const prj = new Project() // Initialize a new project instance + + // Helper function to run finalizers stored in the project const runFinalizers = () => { - const fins = prj._finalizers.slice() - prj._finalizers = [] - for (const fin of fins) fin() + const fins = prj._finalizers.slice() // Copy finalizers + prj._finalizers = [] // Clear the finalizers + for (const fin of fins) fin() // Execute each finalizer } - runFinalizers() + runFinalizers() // Run any initial finalizers + // Clone the default prompts const deflPr: Record = Object.assign({}, defaultPrompts) + + // Process each script file, parsing its content and updating the project for (const f of scriptFiles) { const tmpl = await parsePromptScript(f, await readText(f), prj) - if (!tmpl) continue - delete deflPr[tmpl.id] - prj.templates.push(tmpl) + if (!tmpl) continue // Skip if no template is parsed + delete deflPr[tmpl.id] // Remove the parsed template from defaults + prj.templates.push(tmpl) // Add to project templates } + + // Add remaining default prompts to the project for (const [id, v] of Object.entries(deflPr)) { prj.templates.push(await parsePromptScript(BUILTIN_PREFIX + id, v, prj)) } - runFinalizers() + runFinalizers() // Run finalizers after processing all scripts + + /** + * Generates a sorting key for a PromptScript + * Determines priority based on whether a script is unlisted or has a filename. + * @param t - The PromptScript to generate the key for. + * @returns string - The sorting key. + */ function templKey(t: PromptScript) { - const pref = t.unlisted ? "Z" : t.filename ? "A" : "B" - return pref + t.title + t.id + const pref = t.unlisted ? "Z" : t.filename ? "A" : "B" // Determine prefix for sorting + return pref + t.title + t.id // Concatenate for final sorting key } + // Sort templates by the generated key prj.templates.sort((a, b) => strcmp(templKey(a), templKey(b))) - return prj + return prj // Return the fully parsed project } diff --git a/packages/core/src/pdf.ts b/packages/core/src/pdf.ts index b707ee136a..a131807ef7 100644 --- a/packages/core/src/pdf.ts +++ b/packages/core/src/pdf.ts @@ -1,25 +1,38 @@ +// Import necessary types and modules import type { TextItem } from "pdfjs-dist/types/src/display/api" import { host } from "./host" import { TraceOptions } from "./trace" import os from "os" import { serializeError } from "./error" -// please some typescript warnings +// Declare a global type for SVGGraphics as any declare global { export type SVGGraphics = any } +/** + * Attempts to import pdfjs and configure worker source + * based on the operating system. + * @param options - Optional tracing options + * @returns A promise resolving to the pdfjs module + */ async function tryImportPdfjs(options?: TraceOptions) { const { trace } = options || {} - installPromiseWithResolversShim() + installPromiseWithResolversShim() // Ensure Promise.withResolvers is available const pdfjs = await import("pdfjs-dist") let workerSrc = require.resolve("pdfjs-dist/build/pdf.worker.min.mjs") + + // Adjust worker source path for Windows platform if (os.platform() === "win32") workerSrc = "file://" + workerSrc.replace(/\\/g, "/") + pdfjs.GlobalWorkerOptions.workerSrc = workerSrc return pdfjs } +/** + * Installs a shim for Promise.withResolvers if not available. + */ function installPromiseWithResolversShim() { ;(Promise as any).withResolvers || ((Promise as any).withResolvers = function () { @@ -38,10 +51,11 @@ function installPromiseWithResolversShim() { } /** - * parses pdfs, require pdfjs-dist to be installed - * @param fileOrUrl - * @param content - * @returns + * Parses PDF files using pdfjs-dist. + * @param fileOrUrl - The file path or URL of the PDF + * @param content - Optional PDF content as a Uint8Array + * @param options - Options including disableCleanup and tracing + * @returns An object indicating success or failure and the parsed pages */ async function PDFTryParse( fileOrUrl: string, @@ -52,6 +66,7 @@ async function PDFTryParse( try { const pdfjs = await tryImportPdfjs(options) const { getDocument } = pdfjs + // Read data from file or use provided content const data = content || (await host.readFile(fileOrUrl)) const loader = await getDocument({ data, @@ -60,6 +75,8 @@ async function PDFTryParse( const doc = await loader.promise const numPages = doc.numPages const pages: string[] = [] + + // Iterate through each page and extract text content for (let i = 0; i < numPages; i++) { const page = await doc.getPage(1 + i) // 1-indexed const content = await page.getTextContent() @@ -67,45 +84,65 @@ async function PDFTryParse( (item): item is TextItem => "str" in item ) let { lines } = parsePageItems(items) + + // Optionally clean up trailing spaces if (!disableCleanup) lines = lines.map((line) => line.replace(/[\t ]+$/g, "")) - // collapse trailing spaces + + // Collapse trailing spaces pages.push(lines.join("\n")) } return { ok: true, pages } } catch (error) { - trace?.error(`reading pdf`, error) + trace?.error(`reading pdf`, error) // Log error if tracing is enabled return { ok: false, error: serializeError(error) } } } +/** + * Joins pages into a single string with page breaks. + * @param pages - Array of page content strings + * @returns A single string representing the entire document + */ function PDFPagesToString(pages: string[]) { return pages?.join("\n\n-------- Page Break --------\n\n") } +/** + * Parses a PDF file and applies optional filtering. + * @param filename - The PDF file path + * @param options - Options including filtering and tracing + * @returns A promise resolving to the parsed pages and concatenated content + */ export async function parsePdf( filename: string, options?: ParsePDFOptions & TraceOptions ): Promise<{ pages: string[]; content: string }> { const { trace, filter } = options || {} let { pages } = await PDFTryParse(filename, undefined, options) + + // Apply filter if provided if (filter) pages = pages.filter((page, index) => filter(index, page)) const content = PDFPagesToString(pages) return { pages, content } } -// to avoid cjs loading issues of pdfjs-dist, move this function in house -// https://github.com/electrovir/pdf-text-reader +/** + * Parses text items from a PDF page into lines. + * @param pdfItems - Array of text items + * @returns An object containing parsed lines + */ function parsePageItems(pdfItems: TextItem[]) { const lineData: { [y: number]: TextItem[] } = {} + // Group text items by their vertical position (y-coordinate) for (let i = 0; i < pdfItems.length; i++) { const item = pdfItems[i] const y = item?.transform[5] if (!lineData.hasOwnProperty(y)) { lineData[y] = [] } - // how how to intentionally test this + // Ensure the item is valid before adding /* istanbul ignore next */ if (item) { lineData[y]?.push(item) @@ -114,9 +151,9 @@ function parsePageItems(pdfItems: TextItem[]) { const yCoords = Object.keys(lineData) .map((key) => Number(key)) - // b - a here because the bottom is y = 0 so we want that to be last + // Sort by descending y-coordinate .sort((a, b) => b - a) - // insert an empty line between any 2 lines where their distance is greater than the upper line's height + // Insert empty lines based on line height differences .reduce((accum: number[], currentY, index, array) => { const nextY = array[index + 1] if (nextY != undefined) { @@ -129,7 +166,7 @@ function parsePageItems(pdfItems: TextItem[]) { -1 ) - // currentY - nextY because currentY will be higher than nextY + // Check if a new line is needed based on height if (Math.floor((currentY - nextY) / currentLineHeight) > 1) { const newY = currentY - currentLineHeight lineData[newY] = [] @@ -142,25 +179,26 @@ function parsePageItems(pdfItems: TextItem[]) { const lines: string[] = [] for (let i = 0; i < yCoords.length; i++) { const y = yCoords[i] - // idk how to actually test this + // Ensure y-coordinate is defined /* istanbul ignore next */ if (y == undefined) { continue } - // sort by x position (position in line) + // Sort by x position within each line const lineItems = lineData[y]!.sort( (a, b) => a.transform[4] - b.transform[4] ).filter((item) => !!item.str) const firstLineItem = lineItems[0]! let line = lineItems.length ? firstLineItem.str : "" + + // Concatenate text items into a single line for (let j = 1; j < lineItems.length; j++) { const item = lineItems[j]! const lastItem = lineItems[j - 1]! const xDiff = item.transform[4] - (lastItem.transform[4] + lastItem.width) - // insert spaces for items that are far apart horizontally - // idk how to trigger this + // Insert spaces for horizontally distant items /* istanbul ignore next */ if ( item.height !== 0 && diff --git a/packages/core/src/promptdom.ts b/packages/core/src/promptdom.ts index 311b202ad6..5ff5d52fe2 100644 --- a/packages/core/src/promptdom.ts +++ b/packages/core/src/promptdom.ts @@ -1,3 +1,4 @@ +// Importing various utility functions and constants from different modules. import { CSVToMarkdown, CSVTryParse } from "./csv" import { renderFileContent, resolveFileContent } from "./file" import { addLineNumbers } from "./liner" @@ -28,7 +29,9 @@ import { resolveTokenEncoder } from "./encoders" import { expandFiles } from "./fs" import { interpolateVariables } from "./mustache" +// Definition of the PromptNode interface which is an essential part of the code structure. export interface PromptNode extends ContextExpansionOptions { + // Describes the type of the node. type?: | "text" | "image" @@ -43,106 +46,122 @@ export interface PromptNode extends ContextExpansionOptions { | "fileOutput" | "importTemplate" | undefined - children?: PromptNode[] - error?: unknown - tokens?: number + children?: PromptNode[] // Child nodes for hierarchical structure + error?: unknown // Error information if present + tokens?: number // Token count for the node + /** * Rendered markdown preview of the node */ preview?: string } +// Interface for a text node in the prompt tree. export interface PromptTextNode extends PromptNode { type: "text" - value: Awaitable - resolved?: string + value: Awaitable // The text content, potentially awaiting resolution + resolved?: string // Resolved text content } +// Interface for a definition node, which includes options. export interface PromptDefNode extends PromptNode, DefOptions { type: "def" - name: string - value: Awaitable - resolved?: WorkspaceFile + name: string // Name of the definition + value: Awaitable // File associated with the definition + resolved?: WorkspaceFile // Resolved file content } +// Interface for an assistant node. export interface PromptAssistantNode extends PromptNode { type: "assistant" - value: Awaitable - resolved?: string + value: Awaitable // Assistant-related content + resolved?: string // Resolved assistant content } +// Interface for a string template node. export interface PromptStringTemplateNode extends PromptNode { type: "stringTemplate" - strings: TemplateStringsArray - args: any[] - transforms: ((s: string) => Awaitable)[] - resolved?: string + strings: TemplateStringsArray // Template strings + args: any[] // Arguments for the template + transforms: ((s: string) => Awaitable)[] // Transform functions to apply to the template + resolved?: string // Resolved templated content } +// Interface for an import template node. export interface PromptImportTemplate extends PromptNode { type: "importTemplate" - files: string | string[] - args?: Record - options?: ImportTemplateOptions - resolved?: Record + files: string | string[] // Files to import + args?: Record // Arguments for the template + options?: ImportTemplateOptions // Additional options + resolved?: Record // Resolved content from files } +// Interface representing a prompt image. export interface PromptImage { - url: string - filename?: string - detail?: "low" | "high" + url: string // URL of the image + filename?: string // Optional filename + detail?: "low" | "high" // Image detail level } +// Interface for an image node. export interface PromptImageNode extends PromptNode { type: "image" - value: Awaitable - resolved?: PromptImage + value: Awaitable // Image information + resolved?: PromptImage // Resolved image information } +// Interface for a schema node. export interface PromptSchemaNode extends PromptNode { type: "schema" - name: string - value: JSONSchema - options?: DefSchemaOptions + name: string // Name of the schema + value: JSONSchema // Schema definition + options?: DefSchemaOptions // Additional options } +// Interface for a function node. export interface PromptFunctionNode extends PromptNode { type: "function" - name: string - description: string - parameters: JSONSchema - impl: ChatFunctionHandler + name: string // Function name + description: string // Description of the function + parameters: JSONSchema // Parameters for the function + impl: ChatFunctionHandler // Implementation of the function } +// Interface for a file merge node. export interface PromptFileMergeNode extends PromptNode { type: "fileMerge" - fn: FileMergeHandler + fn: FileMergeHandler // Handler for the file merge } +// Interface for an output processor node. export interface PromptOutputProcessorNode extends PromptNode { type: "outputProcessor" - fn: PromptOutputProcessorHandler + fn: PromptOutputProcessorHandler // Handler for the output processing } +// Interface for a chat participant node. export interface PromptChatParticipantNode extends PromptNode { type: "chatParticipant" - participant: ChatParticipant - options?: ChatParticipantOptions + participant: ChatParticipant // Chat participant information + options?: ChatParticipantOptions // Additional options } +// Interface for a file output node. export interface FileOutputNode extends PromptNode { type: "fileOutput" - output: FileOutput + output: FileOutput // File output information } +// Function to create a text node. export function createTextNode( value: Awaitable, options?: ContextExpansionOptions ): PromptTextNode { - assert(value !== undefined) + assert(value !== undefined) // Ensure value is defined return { type: "text", value, ...(options || {}) } } +// Function to create a definition node. export function createDef( name: string, file: WorkspaceFile, @@ -158,6 +177,7 @@ export function createDef( return { type: "def", name, value, ...(options || {}) } } +// Function to render a definition node to a string. function renderDefNode(def: PromptDefNode): string { const { name, resolved } = def const file = resolved @@ -204,6 +224,7 @@ function renderDefNode(def: PromptDefNode): string { return res } +// Function to create an assistant node. export function createAssistantNode( value: Awaitable, options?: ContextExpansionOptions @@ -212,6 +233,7 @@ export function createAssistantNode( return { type: "assistant", value, ...(options || {}) } } +// Function to create a string template node. export function createStringTemplateNode( strings: TemplateStringsArray, args: any[], @@ -227,6 +249,7 @@ export function createStringTemplateNode( } } +// Function to create an image node. export function createImageNode( value: Awaitable, options?: ContextExpansionOptions @@ -235,6 +258,7 @@ export function createImageNode( return { type: "image", value, ...(options || {}) } } +// Function to create a schema node. export function createSchemaNode( name: string, value: JSONSchema, @@ -245,6 +269,7 @@ export function createSchemaNode( return { type: "schema", name, value, options } } +// Function to create a function node. export function createFunctionNode( name: string, description: string, @@ -264,11 +289,13 @@ export function createFunctionNode( } } +// Function to create a file merge node. export function createFileMerge(fn: FileMergeHandler): PromptFileMergeNode { assert(fn !== undefined) return { type: "fileMerge", fn } } +// Function to create an output processor node. export function createOutputProcessor( fn: PromptOutputProcessorHandler ): PromptOutputProcessorNode { @@ -276,16 +303,19 @@ export function createOutputProcessor( return { type: "outputProcessor", fn } } +// Function to create a chat participant node. export function createChatParticipant( participant: ChatParticipant ): PromptChatParticipantNode { return { type: "chatParticipant", participant } } +// Function to create a file output node. export function createFileOutput(output: FileOutput): FileOutputNode { return { type: "fileOutput", output } } +// Function to create an import template node. export function createImportTemplate( files: string | string[], args?: Record, @@ -295,6 +325,7 @@ export function createImportTemplate( return { type: "importTemplate", files, args, options } } +// Function to check if data objects have the same keys and simple values. function haveSameKeysAndSimpleValues(data: object[]): boolean { if (data.length === 0) return true const headers = Object.entries(data[0]) @@ -313,6 +344,7 @@ function haveSameKeysAndSimpleValues(data: object[]): boolean { }) } +// Function to create a text node with data. export function createDefData( name: string, data: object | object[], @@ -356,6 +388,7 @@ ${trimNewlines(text)} return createTextNode(value, { priority }) } +// Function to append a child node to a parent node. export function appendChild(parent: PromptNode, child: PromptNode): void { if (!parent.children) { parent.children = [] @@ -363,24 +396,26 @@ export function appendChild(parent: PromptNode, child: PromptNode): void { parent.children.push(child) } +// Interface for visiting different types of prompt nodes. export interface PromptNodeVisitor { - node?: (node: PromptNode) => Awaitable - error?: (node: PromptNode) => Awaitable - afterNode?: (node: PromptNode) => Awaitable - text?: (node: PromptTextNode) => Awaitable - def?: (node: PromptDefNode) => Awaitable - image?: (node: PromptImageNode) => Awaitable - schema?: (node: PromptSchemaNode) => Awaitable - function?: (node: PromptFunctionNode) => Awaitable - fileMerge?: (node: PromptFileMergeNode) => Awaitable - stringTemplate?: (node: PromptStringTemplateNode) => Awaitable - outputProcessor?: (node: PromptOutputProcessorNode) => Awaitable - assistant?: (node: PromptAssistantNode) => Awaitable - chatParticipant?: (node: PromptChatParticipantNode) => Awaitable - fileOutput?: (node: FileOutputNode) => Awaitable - importTemplate?: (node: PromptImportTemplate) => Awaitable -} - + node?: (node: PromptNode) => Awaitable // General node visitor + error?: (node: PromptNode) => Awaitable // Error handling visitor + afterNode?: (node: PromptNode) => Awaitable // Post node visitor + text?: (node: PromptTextNode) => Awaitable // Text node visitor + def?: (node: PromptDefNode) => Awaitable // Definition node visitor + image?: (node: PromptImageNode) => Awaitable // Image node visitor + schema?: (node: PromptSchemaNode) => Awaitable // Schema node visitor + function?: (node: PromptFunctionNode) => Awaitable // Function node visitor + fileMerge?: (node: PromptFileMergeNode) => Awaitable // File merge node visitor + stringTemplate?: (node: PromptStringTemplateNode) => Awaitable // String template node visitor + outputProcessor?: (node: PromptOutputProcessorNode) => Awaitable // Output processor node visitor + assistant?: (node: PromptAssistantNode) => Awaitable // Assistant node visitor + chatParticipant?: (node: PromptChatParticipantNode) => Awaitable // Chat participant node visitor + fileOutput?: (node: FileOutputNode) => Awaitable // File output node visitor + importTemplate?: (node: PromptImportTemplate) => Awaitable // Import template node visitor +} + +// Function to visit nodes in the prompt tree. export async function visitNode(node: PromptNode, visitor: PromptNodeVisitor) { await visitor.node?.(node) switch (node.type) { @@ -430,20 +465,22 @@ export async function visitNode(node: PromptNode, visitor: PromptNodeVisitor) { await visitor.afterNode?.(node) } +// Interface for representing a rendered prompt node. export interface PromptNodeRender { - userPrompt: string - assistantPrompt: string - images: PromptImage[] - errors: unknown[] - schemas: Record - functions: ToolCallback[] - fileMerges: FileMergeHandler[] - outputProcessors: PromptOutputProcessorHandler[] - chatParticipants: ChatParticipant[] - messages: ChatCompletionMessageParam[] - fileOutputs: FileOutput[] -} - + userPrompt: string // User prompt content + assistantPrompt: string // Assistant prompt content + images: PromptImage[] // Images included in the prompt + errors: unknown[] // Errors encountered during rendering + schemas: Record // Schemas included in the prompt + functions: ToolCallback[] // Functions included in the prompt + fileMerges: FileMergeHandler[] // File merge handlers + outputProcessors: PromptOutputProcessorHandler[] // Output processor handlers + chatParticipants: ChatParticipant[] // Chat participants + messages: ChatCompletionMessageParam[] // Messages for chat completion + fileOutputs: FileOutput[] // File outputs +} + +// Function to resolve a prompt node. async function resolvePromptNode( model: string, root: PromptNode @@ -507,7 +544,7 @@ async function resolvePromptNode( if (typeof ra === "function") ra = ra() ra = await ra - // render files + // Render files if (typeof ra === "object") { if (ra.filename) { n.children = [ @@ -602,6 +639,7 @@ async function resolvePromptNode( return { errors: err } } +// Function to truncate text based on token limits. function truncateText( content: string, maxTokens: number, @@ -615,6 +653,7 @@ function truncateText( return content.slice(0, end) + MAX_TOKENS_ELLIPSE } +// Function to handle truncation of prompt nodes based on token limits. async function truncatePromptNode( model: string, node: PromptNode, @@ -676,6 +715,7 @@ async function truncatePromptNode( return truncated } +// Function to adjust token limits for nodes with flexibility. async function flexPromptNode( root: PromptNode, options?: { flexTokens: number } & TraceOptions @@ -684,7 +724,7 @@ async function flexPromptNode( const { trace, flexTokens } = options || {} - // collect all notes + // Collect all nodes const nodes: PromptNode[] = [] await visitNode(root, { node: (n) => { @@ -697,12 +737,12 @@ async function flexPromptNode( ) if (totalTokens < flexTokens) { - // no need to flex + // No need to flex return } - // inspired from priompt, prompt-tsx, gpt-4 - // sort by priority + // Inspired from priompt, prompt-tsx, gpt-4 + // Sort by priority nodes.sort( (a, b) => (a.priority ?? PRIORITY_DEFAULT) - (b.priority ?? PRIORITY_DEFAULT) @@ -723,6 +763,7 @@ async function flexPromptNode( } } +// Function to trace the prompt node structure for debugging. async function tracePromptNode( trace: MarkdownTrace, root: PromptNode, @@ -755,6 +796,7 @@ async function tracePromptNode( }) } +// Main function to render a prompt node. export async function renderPromptNode( modelId: string, node: PromptNode, diff --git a/packages/core/src/promptrunner.ts b/packages/core/src/promptrunner.ts index a966574cf2..f9108a8ea3 100644 --- a/packages/core/src/promptrunner.ts +++ b/packages/core/src/promptrunner.ts @@ -1,3 +1,4 @@ +// Import necessary modules and functions for handling chat sessions, templates, file management, etc. import { executeChatSession, tracePromptResult } from "./chat" import { Project, PromptScript } from "./ast" import { stringToPos } from "./parser" @@ -23,6 +24,16 @@ import { YAMLParse } from "./yaml" import { expandTemplate } from "./expander" import { resolveLanguageModel } from "./lm" +// Asynchronously resolve expansion variables needed for a template +/** + * Resolves variables required for the expansion of a template. + * @param project The project context. + * @param trace The markdown trace for logging. + * @param template The prompt script template. + * @param frag The fragment containing files and metadata. + * @param vars The user-provided variables. + * @returns An object containing resolved variables. + */ async function resolveExpansionVars( project: Project, trace: MarkdownTrace, @@ -42,14 +53,18 @@ async function resolveExpansionVars( for (let filename of filenames) { filename = relativePath(root, filename) + // Skip if file already in the list if (files.find((lk) => lk.filename === filename)) continue const file: WorkspaceFile = { filename } await resolveFileContent(file) files.push(file) } + // Parse and obtain attributes from prompt parameters const attrs = parsePromptParameters(project, template, vars) const secrets: Record = {} + + // Read secrets defined in the template for (const secret of template.secrets || []) { const value = await runtimeHost.readSecret(secret) if (value) { @@ -57,6 +72,8 @@ async function resolveExpansionVars( secrets[secret] = value } else trace.error(`secret \`${secret}\` not found`) } + + // Create and return an object containing resolved variables const res: Partial = { dir: ".", files, @@ -71,6 +88,15 @@ async function resolveExpansionVars( return res } +// Main function to run a template with given options +/** + * Executes a prompt template with specified options. + * @param prj The project context. + * @param template The prompt script template. + * @param fragment The fragment containing additional context. + * @param options Options for generation, including model and trace. + * @returns A generation result with details of the execution. + */ export async function runTemplate( prj: Project, template: PromptScript, @@ -88,6 +114,7 @@ export async function runTemplate( trace.heading(3, `🧠 running ${template.id} with model ${model ?? ""}`) if (cliInfo) traceCliArgs(trace, template, options) + // Resolve expansion variables for the template const vars = await resolveExpansionVars( prj, trace, @@ -119,8 +146,7 @@ export async function runTemplate( trace ) - // if the expansion failed, show the user the trace - // or no message generated + // Handle failed expansion scenario if (status !== "success" || !messages.length) { trace.renderErrors() return { @@ -140,7 +166,7 @@ export async function runTemplate( } } - // don't run LLM + // If LLM is skipped, return early if (skipLLM) { trace.renderErrors() return { @@ -173,6 +199,8 @@ export async function runTemplate( const changelogs: string[] = [] const edits: Edits[] = [] const projFolder = runtimeHost.projectFolder() + + // Helper function to get or create file edit object const getFileEdit = async (fn: string) => { fn = relativePath(projFolder, fn) let fileEdit = fileEdits[fn] @@ -186,6 +214,7 @@ export async function runTemplate( return fileEdit } + // Resolve model connection information const connection = await resolveModelConnectionInfo( { model }, { trace, token: true } @@ -201,6 +230,8 @@ export async function runTemplate( const { completer } = await resolveLanguageModel( connection.configuration.provider ) + + // Execute chat session with the resolved configuration const output = await executeChatSession( connection.configuration, cancellationToken, @@ -213,6 +244,7 @@ export async function runTemplate( genOptions ) tracePromptResult(trace, output) + const { json, fences, @@ -222,6 +254,8 @@ export async function runTemplate( finishReason, } = output let { text, annotations } = output + + // Handle fenced code regions within the output if (json === undefined) { for (const fence of fences.filter( ({ validation }) => validation?.valid !== false @@ -296,7 +330,7 @@ export async function runTemplate( } } - // apply user output processors + // Apply user-defined output processors if (outputProcessors?.length) { try { trace.startDetails("🖨️ output processors") @@ -340,10 +374,10 @@ export async function runTemplate( } } - // apply file outputs + // Validate and apply file outputs validateFileOutputs(fileOutputs, trace, fileEdits, schemas) - // convert file edits into edits + // Convert file edits into structured edits Object.entries(fileEdits) .filter(([, { before, after }]) => before !== after) // ignore unchanged files .forEach(([fn, { before, after, validation }]) => { @@ -368,7 +402,7 @@ export async function runTemplate( } }) - // reporting + // Reporting and tracing output if (fences?.length) trace.details("📩 code regions", renderFencedVariables(fences)) if (edits.length) @@ -425,16 +459,27 @@ export async function runTemplate( schemas, json, } + + // If there's an error, provide status text if (res.status === "error" && !res.statusText && res.finishReason) { res.statusText = `LLM finish reason: ${res.finishReason}` } return res } finally { + // Cleanup any resources like running containers or browsers await runtimeHost.removeContainers() await runtimeHost.removeBrowsers() } } +// Validate file outputs against specified schemas and patterns +/** + * Validates file outputs based on provided patterns and schemas. + * @param fileOutputs List of file outputs to validate. + * @param trace The markdown trace for logging. + * @param fileEdits Record of file updates. + * @param schemas The JSON schemas for validation. + */ function validateFileOutputs( fileOutputs: FileOutput[], trace: MarkdownTrace, diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 07bd8bf72e..af777fa81d 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -1,3 +1,4 @@ +// Import necessary modules and functions import { JSON5parse } from "./json5" import { MarkdownTrace } from "./trace" import Ajv from "ajv" @@ -5,27 +6,39 @@ import { YAMLParse } from "./yaml" import { errorMessage } from "./error" import { promptParametersSchemaToJSONSchema } from "./parameters" +/** + * Check if an object is a JSON Schema + * @param obj - The object to check + * @returns true if the object is a JSON Schema + */ export function isJSONSchema(obj: any) { if (typeof obj === "object" && obj.type === "object") return true if (typeof obj === "object" && obj.type === "array") return true return false } +/** + * Converts a JSON Schema to a TypeScript type definition as a string + * @param schema - The JSON Schema + * @param options - Optional settings for type name and export + * @returns TypeScript type definition string + */ export function JSONSchemaStringifyToTypeScript( schema: JSONSchema, options?: { typeName?: string; export?: boolean } ) { const { typeName = "Response" } = options || {} - let lines: string[] = [] - let indent = 0 + let lines: string[] = [] // Array to accumulate lines of TypeScript code + let indent = 0 // Manage indentation level - appendJsDoc(schema.description) + appendJsDoc(schema.description) // Add JSDoc for schema description append( `${options?.export ? "export " : ""}type ${typeName.replace(/\s+/g, "_")} =` ) - stringifyNode(schema) - return lines.join("\n") + stringifyNode(schema) // Convert schema to TypeScript + return lines.join("\n") // Join lines into a single TypeScript definition + // Append a line to the TypeScript definition function append(line: string) { if (/=$/.test(lines[lines.length - 1])) lines[lines.length - 1] = lines[lines.length - 1] + " " + line @@ -34,6 +47,7 @@ export function JSONSchemaStringifyToTypeScript( else lines.push(" ".repeat(indent) + line) } + // Append JSDoc comments function appendJsDoc(text: string) { if (!text) return if (text.indexOf("\n") > -1) @@ -43,6 +57,7 @@ export function JSONSchemaStringifyToTypeScript( else append(`// ${text}`) } + // Convert a JSON Schema node to TypeScript function stringifyNode(node: JSONSchemaType): string { if (node === undefined) return "any" else if (node.type === "array") { @@ -58,6 +73,7 @@ export function JSONSchemaStringifyToTypeScript( else return "unknown" } + // Extract documentation for a node function stringifyNodeDoc(node: JSONSchemaType): string { const doc = [node.description] switch (node.type) { @@ -76,6 +92,7 @@ export function JSONSchemaStringifyToTypeScript( return doc.filter((d) => d).join("\n") } + // Convert a JSON Schema object to TypeScript function stringifyObject(object: JSONSchemaObject): void { const { required, additionalProperties } = object append(`{`) @@ -95,6 +112,7 @@ export function JSONSchemaStringifyToTypeScript( append(`}`) } + // Convert a JSON Schema array to TypeScript function stringifyArray(array: JSONSchemaArray): void { append(`Array<`) indent++ @@ -105,11 +123,23 @@ export function JSONSchemaStringifyToTypeScript( } } +/** + * Validate a JSON schema using Ajv + * @param schema - The JSON Schema to validate + * @returns Promise with validation result + */ export async function validateSchema(schema: JSONSchema) { const ajv = new Ajv() return await ajv.validateSchema(schema, false) } +/** + * Validate a JSON object against a given JSON schema + * @param object - The JSON object to validate + * @param schema - The JSON Schema + * @param options - Optional trace for debugging + * @returns Validation result with success status and error message if any + */ export function validateJSONWithSchema( object: any, schema: JSONSchema, @@ -143,13 +173,20 @@ export function validateJSONWithSchema( } } +/** + * Validate multiple JSON or YAML fences against given schemas + * @param fences - Array of fenced code blocks + * @param schemas - Map of schema names to JSON Schemas + * @param options - Optional trace for debugging + * @returns Array of data frames with validation results + */ export function validateFencesWithSchema( fences: Fenced[], schemas: Record, options?: { trace: MarkdownTrace } ): DataFrame[] { const frames: DataFrame[] = [] - // validate schemas in fences + // Validate schemas in fences for (const fence of fences?.filter( ({ language, args }) => args?.schema && (language === "json" || language === "yaml") @@ -157,7 +194,7 @@ export function validateFencesWithSchema( const { language, content: val, args } = fence const schema = args?.schema - // validate well formed json/yaml + // Validate well-formed JSON/YAML let data: any try { if (language === "json") data = JSON5parse(val, { repair: true }) @@ -169,7 +206,7 @@ export function validateFencesWithSchema( } } if (!fence.validation) { - // check if schema specified + // Check if schema specified const schemaObj = schemas[schema] if (!schemaObj) { fence.validation = { @@ -184,7 +221,7 @@ export function validateFencesWithSchema( ) } - // add to frames + // Add to frames frames.push({ schema, data, @@ -194,6 +231,11 @@ export function validateFencesWithSchema( return frames } +/** + * Converts a JSON Schema to a JSON string + * @param schema - The JSON Schema + * @returns JSON string representation of the schema + */ export function JSONSchemaStringify(schema: JSONSchema) { return JSON.stringify( { @@ -206,7 +248,11 @@ export function JSONSchemaStringify(schema: JSONSchema) { ) } -// https://platform.openai.com/docs/guides/structured-outputs/supported-schemas +/** + * Converts a schema to a strict JSON Schema + * @param schema - The schema to convert + * @returns A strict JSON Schema + */ export function toStrictJSONSchema( schema: PromptParametersSchema | JSONSchema ): any { @@ -218,6 +264,7 @@ export function toStrictJSONSchema( if (clone.type !== "object") throw new Error("top level schema must be object") + // Recursive function to make the schema strict function visit(node: JSONSchemaType): void { const { type } = node switch (type) { diff --git a/packages/core/src/template.ts b/packages/core/src/template.ts index 87f9da62a8..dfe461fe4d 100644 --- a/packages/core/src/template.ts +++ b/packages/core/src/template.ts @@ -1,3 +1,9 @@ +/** + * This module provides functions for parsing and validating prompt scripts + * within a project. It includes a Checker class for validation of various + * data types and formats. + */ + import { Project, PromptScript } from "./ast" import { BUILTIN_PREFIX, GENAI_ANY_REGEX, PROMPTY_REGEX } from "./constants" import { errorMessage } from "./error" @@ -6,6 +12,14 @@ import { JSON5TryParse } from "./json5" import { humanize } from "inflection" import { validateSchema } from "./schema" import { promptyParse, promptyToGenAIScript } from "./prompty" + +/** + * Extracts a template ID from the given filename by removing specific extensions + * and directories. + * + * @param filename - The filename to extract the template ID from. + * @returns The extracted template ID. + */ function templateIdFromFileName(filename: string) { return filename .replace(/\.(mjs|ts|js|mts|prompty)$/i, "") @@ -13,16 +27,29 @@ function templateIdFromFileName(filename: string) { .replace(/.*[\/\\]/, "") } +/** + * Type utility to extract keys of a type T that match type S. + */ type KeysOfType = { [K in keyof T]: T[K] extends S ? K : never }[keyof T & string] +/** + * Class to perform validation checks on a prompt script. + * + * @template T - Type of the prompt-like object to validate. + */ class Checker { - // validation state - keyFound: boolean - key: string - val: any - + keyFound: boolean // Tracks whether a key is found during validation. + key: string // Currently processed key. + val: any // Currently processed value. + + /** + * Converts a character index to a line and column position. + * + * @param n - Character index in the script. + * @returns A tuple [line, column] representing the position. + */ toPosition(n: number): CharPosition { const pref = this.js.slice(0, n) const line = pref.replace(/[^\n]/g, "").length @@ -30,12 +57,22 @@ class Checker { return [line, col] } + /** + * Reports an error for the current key. + * + * @param message - Error message to report. + */ verror(message: string) { this.error(this.key, message) } + /** + * Reports an error with a specific key. + * + * @param key - The key associated with the error. + * @param message - Error message to report. + */ error(key: string, message: string) { - // guesstimate the (first) position of key const idx = new RegExp("\\b" + key + "[\\s\"']*:").exec(this.js) const range = idx ? [idx.index, idx.index + key.length] : [0, 5] this.diagnostics.push({ @@ -46,6 +83,15 @@ class Checker { }) } + /** + * Constructs a new Checker instance. + * + * @param template - The prompt-like object to validate. + * @param filename - The filename of the script. + * @param diagnostics - The diagnostics array to report errors to. + * @param js - The JavaScript source code of the script. + * @param jsobj - The parsed JSON object of the script. + */ constructor( public template: T, public filename: string, @@ -54,6 +100,12 @@ class Checker { public jsobj: any ) {} + /** + * Validates key-value pairs within the JSON object using a callback function. + * + * @param cb - Callback function to perform specific checks. + * @returns A new object containing valid key-value pairs. + */ validateKV(cb: () => void) { const obj: any = {} @@ -78,12 +130,24 @@ class Checker { return obj } + /** + * Skips validation for the current key if it doesn't match k. + * + * @param k - The key to check against. + * @returns Whether the current key is skipped. + */ private skip(k: string) { if (k !== this.key) return true this.keyFound = true return false } + /** + * Checks if the current value is a string and optionally within a set of allowed keys. + * + * @param k - Key of the string to check. + * @param keys - Optional array of allowed string values. + */ checkString( k: K & KeysOfType, keys?: T[K][] @@ -100,6 +164,11 @@ class Checker { } } + /** + * Checks if the current value is an object or an array of objects. + * + * @param k - Key of the object or object array to check. + */ checkObjectOrObjectArray(k: any) { if (this.skip(k)) return if ( @@ -112,6 +181,11 @@ class Checker { } } + /** + * Checks if the given key is a valid JSON schema. + * + * @param k - Key of the schema to validate. + */ checkJSONSchema(k: any) { if (this.skip(k)) return if (k && !validateSchema(k)) @@ -119,6 +193,11 @@ class Checker { return } + /** + * Checks if the current value is an array of objects. + * + * @param k - Key of the object array to check. + */ checkObjectArray(k: any) { if (this.skip(k)) return if ( @@ -130,6 +209,11 @@ class Checker { } } + /** + * Checks if the current value is an object (record). + * + * @param k - Key of the record to check. + */ checkRecord(k: any) { if (this.skip(k)) return if (typeof this.val != "object") { @@ -138,6 +222,11 @@ class Checker { } } + /** + * Checks if the current value is a boolean. + * + * @param k - Key of the boolean to check. + */ checkBool(k: KeysOfType) { if (this.skip(k)) return if (typeof this.val != "boolean") { @@ -146,6 +235,11 @@ class Checker { } } + /** + * Checks if the current value is a string or a boolean. + * + * @param k - Key of the string or boolean to check. + */ checkStringOrBool(k: KeysOfType) { if (this.skip(k)) return if (typeof this.val != "string" && typeof this.val != "boolean") { @@ -154,6 +248,11 @@ class Checker { } } + /** + * Checks if the current value is a natural number. + * + * @param k - Key of the number to check. + */ checkNat(k: KeysOfType) { if (this.skip(k)) return if ( @@ -166,6 +265,11 @@ class Checker { } } + /** + * Checks if the current value is a number. + * + * @param k - Key of the number to check. + */ checkNumber(k: KeysOfType) { if (this.skip(k)) return if (typeof this.val != "number") { @@ -174,11 +278,22 @@ class Checker { } } + /** + * Checks any value with a custom callback transformation. + * + * @param k - Key of the value to check. + * @param cb - Callback function to transform the value. + */ checkAny(k: K, cb: (val: any) => any) { if (this.skip(k)) return this.val = cb(this.val) } + /** + * Checks if the current value is a string or an array of strings. + * + * @param k - Key of the string or string array to check. + */ checkStringArray(k: KeysOfType) { this.checkAny(k, (v) => { if (typeof v == "string") v = [v] @@ -191,8 +306,13 @@ class Checker { } } +/** + * Parses script metadata from the given JavaScript source. + * + * @param jsSource - The JavaScript source code of the script. + * @returns A PromptArgs object containing the parsed metadata. + */ export function parsePromptScriptMeta(jsSource: string) { - // shortcut const m = /\b(?system|script)\(\s*(?\{.*?\})\s*\)/s.exec( jsSource ) @@ -205,6 +325,15 @@ export function parsePromptScriptMeta(jsSource: string) { return meta } +/** + * Core function to parse a prompt template and validate its contents. + * + * @param filename - The filename of the template. + * @param content - The content of the template. + * @param prj - The Project object containing diagnostics and other data. + * @param finalizer - Finalizer function to perform additional validation. + * @returns The parsed PromptScript or undefined in case of errors. + */ async function parsePromptTemplateCore( filename: string, content: string, @@ -246,6 +375,14 @@ async function parsePromptTemplateCore( } } +/** + * Parses a prompt script file, validating its structure and content. + * + * @param filename - The filename of the script. + * @param content - The content of the script. + * @param prj - The Project instance containing diagnostics. + * @returns The parsed PromptScript or undefined in case of errors. + */ export async function parsePromptScript( filename: string, content: string, @@ -258,6 +395,7 @@ export async function parsePromptScript( return await parsePromptTemplateCore(filename, content, prj, (c) => { const obj = c.validateKV(() => { + // Validate various fields using the Checker methods c.checkString("title") c.checkString("description") c.checkString("model") diff --git a/packages/core/src/test.ts b/packages/core/src/test.ts index 39cdef123e..ec6a473625 100644 --- a/packages/core/src/test.ts +++ b/packages/core/src/test.ts @@ -1,21 +1,36 @@ +// Import necessary utilities and constants import { HTTPS_REGEX } from "./constants" import { arrayify } from "./util" import { host } from "./host" +/** + * Function to remove properties with undefined values from an object. + * + * @param obj - An object with string keys and any type of values. + * @returns A new object with undefined values removed, or undefined if the input is undefined. + */ function cleanUndefined(obj: Record) { + // Check if the object is defined return obj - ? Object.entries(obj) - .filter(([_, value]) => value !== undefined) + ? Object.entries(obj) // Convert object to entries + .filter(([_, value]) => value !== undefined) // Filter out undefined values .reduce( (newObj, [key, value]) => { - newObj[key] = value - return newObj + newObj[key] = value // Add key-value pair to new object + return newObj // Return accumulated object }, - {} as Record + {} as Record // Initialize as empty object ) - : undefined + : undefined // Return undefined if input is undefined } +/** + * Generates a configuration object for PromptFoo using a given script and options. + * + * @param script - A PromptScript containing the prompt details. + * @param options - Optional configuration settings such as provider, testProvider, outputs, etc. + * @returns A configuration object for PromptFoo. + */ export function generatePromptFooConfiguration( script: PromptScript, options?: { @@ -26,15 +41,23 @@ export function generatePromptFooConfiguration( models?: ModelOptions[] } ) { + // Destructure options with default values const { provider = "provider.mjs", testProvider } = options || {} const { description, title, tests = [], id } = script const models = options?.models || [] + + // Ensure at least one model exists if (!models.length) models.push(script) + const cli = options?.cli const transform = "output.text" + + // Create configuration object const res = { + // Description combining title and description description: [title, description].filter((s) => s).join("\n"), prompts: [id], + // Map model options to providers providers: models .map(({ model, temperature, topP }) => ({ model: model ?? host.defaultModelOptions.model, @@ -59,6 +82,7 @@ export function generatePromptFooConfiguration( cli, }, })), + // Default test configuration if testProvider is present defaultTest: testProvider ? { options: { @@ -91,6 +115,7 @@ export function generatePromptFooConfiguration( }, } : undefined, + // Map tests to configuration format tests: arrayify(tests).map( ({ description, @@ -109,33 +134,33 @@ export function generatePromptFooConfiguration( }), assert: [ ...arrayify(keywords).map((kv) => ({ - type: "icontains", + type: "icontains", // Check if output contains keyword value: kv, transform, })), ...arrayify(forbidden).map((kv) => ({ - type: "not-icontains", + type: "not-icontains", // Check if output does not contain forbidden keyword value: kv, transform, })), ...arrayify(rubrics).map((value) => ({ - type: "llm-rubric", + type: "llm-rubric", // Use LLM rubric for evaluation value, transform, })), ...arrayify(facts).map((value) => ({ - type: "factuality", + type: "factuality", // Check factuality of output value, transform, })), ...arrayify(asserts).map((assert) => ({ ...assert, - transform: assert.transform || transform, + transform: assert.transform || transform, // Default transform })), - ].filter((a) => !!a), + ].filter((a) => !!a), // Filter out any undefined assertions }) ), } - return res + return res // Return the generated configuration } diff --git a/packages/core/src/testhost.ts b/packages/core/src/testhost.ts index a11afae805..98c34ce7e2 100644 --- a/packages/core/src/testhost.ts +++ b/packages/core/src/testhost.ts @@ -1,3 +1,8 @@ +// This module defines a TestHost class that implements the RuntimeHost interface. +// It provides various functionalities related to language models, file operations, and other utilities. +// Tags: RuntimeHost, TestHost, LanguageModel, FileSystem, Node.js + +// Import necessary modules and functions from various files import { readFile, writeFile } from "fs/promises" import { LogLevel, @@ -27,6 +32,8 @@ import { } from "node:path" import { LanguageModel } from "./chat" +// Function to create a frozen object representing Node.js path methods +// This object provides utility methods for path manipulations export function createNodePath(): Path { return Object.freeze({ dirname, @@ -40,73 +47,117 @@ export function createNodePath(): Path { }) } +// Class representing a test host for runtime, implementing the RuntimeHost interface export class TestHost implements RuntimeHost { + // Path to the dotenv file (if used) dotEnvPath: string = undefined + // State object to store user-specific data userState: any + // Service to manage language models models: ModelService + // Server management service server: ServerManager + // Instance of the path utility path: Path = createNodePath() + // File system for workspace workspace: WorkspaceFileSystem + + // Default options for language models readonly defaultModelOptions = { model: DEFAULT_MODEL, temperature: DEFAULT_TEMPERATURE, } + // Default options for embeddings models readonly defaultEmbeddingsModelOptions = { embeddingsModel: DEFAULT_EMBEDDINGS_MODEL, } + // Static method to set this class as the runtime host static install() { setRuntimeHost(new TestHost()) } + // Method to create a UTF-8 decoder createUTF8Decoder(): UTF8Decoder { return new TextDecoder("utf-8") } + + // Method to create a UTF-8 encoder createUTF8Encoder(): UTF8Encoder { return new TextEncoder() } + + // Method to get the current project folder path projectFolder(): string { return resolve(".") } + + // Placeholder for the method to get the installation folder path installFolder(): string { throw new Error("Method not implemented.") } + + // Placeholder for path resolution method resolvePath(...segments: string[]): string { throw new Error("Method not implemented.") } + + // Placeholder for reading a secret value readSecret(name: string): Promise { throw new Error("Method not implemented.") } + + // Placeholder for browsing a URL browse(url: string, options?: BrowseSessionOptions): Promise { throw new Error("Method not implemented.") } + + // Placeholder for getting language model configuration getLanguageModelConfiguration( modelId: string ): Promise { throw new Error("Method not implemented.") } + + // Optional client language model clientLanguageModel?: LanguageModel + + // Placeholder for logging functionality log(level: LogLevel, msg: string): void { throw new Error("Method not implemented.") } + + // Method to read a file and return its content as a Uint8Array async readFile(name: string): Promise { return new Uint8Array(await readFile(resolve(name))) } + + // Method to write content to a file async writeFile(name: string, content: Uint8Array): Promise { await writeFile(resolve(name), content) } + + // Placeholder for file deletion functionality deleteFile(name: string): Promise { throw new Error("Method not implemented.") } + + // Placeholder for finding files with a glob pattern findFiles(glob: string, options?: {}): Promise { throw new Error("Method not implemented.") } + + // Placeholder for creating a directory createDirectory(name: string): Promise { throw new Error("Method not implemented.") } + + // Placeholder for deleting a directory deleteDirectory(name: string): Promise { throw new Error("Method not implemented.") } + + // Placeholder for executing a shell command in a container exec( containerId: string, command: string, @@ -115,19 +166,31 @@ export class TestHost implements RuntimeHost { ): Promise { throw new Error("Method not implemented.") } + + // Placeholder for creating a container host container( options: ContainerOptions & TraceOptions ): Promise { throw new Error("Method not implemented.") } + + // Async method to remove containers async removeContainers(): Promise {} + + // Async method to remove browsers async removeBrowsers(): Promise {} + + // Placeholder for selecting an option from a list select(message: string, options: string[]): Promise { throw new Error("Method not implemented.") } + + // Placeholder for input functionality input(message: string): Promise { throw new Error("Method not implemented.") } + + // Placeholder for confirmation functionality confirm(message: string): Promise { throw new Error("Method not implemented.") } diff --git a/packages/core/src/tidy.ts b/packages/core/src/tidy.ts index 5c87969367..b22f2998f5 100644 --- a/packages/core/src/tidy.ts +++ b/packages/core/src/tidy.ts @@ -1,3 +1,4 @@ +// Import necessary functions from the "@tidyjs/tidy" library import { tidy, sliceTail, @@ -7,13 +8,35 @@ import { distinct, } from "@tidyjs/tidy" +// JSDoc comment for the tidyData function +/** + * Processes and filters data rows based on the provided options. + * + * This function applies various operations such as selecting distinct values, + * selecting specific headers, and slicing samples, head, or tail of the data. + * + * @param {object[]} rows - The data rows to be processed. + * @param {DataFilter} [options={}] - The options to filter and manipulate the data. + * @returns {object[]} - The processed and filtered data rows. + */ export function tidyData(rows: object[], options: DataFilter = {}) { + // Check if distinct operation is specified in options and apply it if (options.distinct?.length) rows = tidy(rows, distinct(options.distinct as any)) + + // Check if specific headers need to be selected and apply the selection if (options.headers?.length) rows = tidy(rows, select(options.headers)) + + // Check if a random sample of rows is to be sliced and apply sampling if (options.sliceSample > 0) rows = tidy(rows, sliceSample(options.sliceSample)) + + // Check if the head of rows is to be sliced and apply slicing if (options.sliceHead > 0) rows = tidy(rows, sliceHead(options.sliceHead)) + + // Check if the tail of rows is to be sliced and apply slicing if (options.sliceTail > 0) rows = tidy(rows, sliceTail(options.sliceTail)) + + // Return the processed rows after applying all specified operations return rows } diff --git a/packages/core/src/tokens.ts b/packages/core/src/tokens.ts index 4f781aa2d0..39b584003c 100644 --- a/packages/core/src/tokens.ts +++ b/packages/core/src/tokens.ts @@ -1,12 +1,26 @@ +// Importing constants and utility functions import { ESTIMATE_TOKEN_OVERHEAD } from "./constants" import { logVerbose } from "./util" +/** + * Function to estimate the number of tokens for a given text. + * Utilizes a provided encoder function to achieve this. + * + * @param text - The input text whose tokens are to be estimated. + * @param encoder - A function that encodes the text into tokens. + * @returns The estimated number of tokens including an overhead. + */ export function estimateTokens(text: string, encoder: TokenEncoder) { + // If the text is empty or undefined, return 0 if (!text?.length) return 0 try { + // Return the length of the encoded text plus a constant overhead return encoder(text).length + ESTIMATE_TOKEN_OVERHEAD } catch (e) { + // If encoding fails, log the error in verbose mode logVerbose(e) + // Fallback: Estimate token count as one-fourth of text length plus overhead + // This provides a rough estimate in case of encoding errors return (text.length >> 2) + ESTIMATE_TOKEN_OVERHEAD } } diff --git a/packages/core/src/url.ts b/packages/core/src/url.ts index e9d383ab88..b9b30706f2 100644 --- a/packages/core/src/url.ts +++ b/packages/core/src/url.ts @@ -1,11 +1,29 @@ +/** + * Utility functions for handling URL shortening. + * + * Provides functionality to shorten URLs by displaying only the protocol, + * hostname, and pathname. Adds ellipses for query parameters or fragments. + * + * Tags: URL, Shorten, Ellipse, Parsing + */ + +/** + * Shortens a given URL to display only the protocol, hostname, and pathname. + * Adds ellipses if query parameters or fragments are present. + * + * @param url - The complete URL to be shortened. + * @returns A shortened version of the URL or undefined if parsing fails. + */ export function ellipseUri(url: string) { try { - const uri = new URL(url) - let res = `${uri.protocol}//${uri.hostname}${uri.pathname}` - if (uri.search) res += `?...` - if (uri.hash) res += `#...` - return res + const uri = new URL(url); // Parse the URL string into a URL object. + let res = `${uri.protocol}//${uri.hostname}${uri.pathname}`; // Construct the base URL with protocol, hostname, and pathname. + + if (uri.search) res += `?...`; // Append ellipses if there are query parameters. + if (uri.hash) res += `#...`; // Append ellipses if there is a fragment identifier. + + return res; // Return the shortened URL. } catch { - return undefined + return undefined; // Return undefined if the URL is invalid. } } diff --git a/packages/core/src/urlAdapters.ts b/packages/core/src/urlAdapters.ts index f1cc3cdfbe..2a4649c93a 100644 --- a/packages/core/src/urlAdapters.ts +++ b/packages/core/src/urlAdapters.ts @@ -1,23 +1,42 @@ +/** + * Defines the structure for a URL adapter that can convert friendly URLs + * to fetchable URLs and adapt response bodies to strings. + */ export interface UrlAdapter { + /** + * Optional content type for the URL adapter. + * Can be either "text/plain" or "application/json". + */ contentType?: "text/plain" | "application/json" /** - * Given a friendly URL, return a URL that can be used to fetch the content. - * @param url - * @returns + * Converts a friendly URL into a URL that can be used to fetch the content. + * @param url - The friendly URL to be converted. + * @returns The fetchable URL if there's a match, otherwise undefined. */ matcher: (url: string) => string /** - * Convers the body of the response to a string. - * @param body - * @returns + * Optional adapter function to convert the body of the response to a string. + * @param body - The response body to be converted. + * @returns The converted string or undefined. */ adapter?: (body: string | any) => string | undefined } +/** + * Default implementations of URL adapters. + * Currently, it includes an adapter for GitHub blob URLs. + */ export const defaultUrlAdapters: UrlAdapter[] = [ { + /** + * Matches GitHub blob URLs and converts them to raw content URLs. + * Extracts user, repository, and file path from the blob URL. + * Constructs a raw URL using the extracted components. + * @param url - The GitHub blob URL. + * @returns The corresponding raw URL or undefined if no match is found. + */ matcher: (url) => { const m = /^https:\/\/github.com\/(\w+)\/(\w+)\/blob\/(.+)#?/i.exec( url diff --git a/packages/core/src/vectorsearch.ts b/packages/core/src/vectorsearch.ts index 32a1eb54b7..6b449492ee 100644 --- a/packages/core/src/vectorsearch.ts +++ b/packages/core/src/vectorsearch.ts @@ -1,3 +1,8 @@ +/** + * This module provides functionality for creating embeddings using OpenAI's API + * and performing vector search on documents. + */ + import { encode, decode } from "gpt-tokenizer" import { resolveModelConnectionInfo } from "./models" import { runtimeHost, host } from "./host" @@ -15,19 +20,39 @@ import { getConfigHeaders } from "./openai" import { logVerbose, trimTrailingSlash } from "./util" import { TraceOptions } from "./trace" +/** + * Represents the cache key for embeddings. + * This is used to store and retrieve cached embeddings. + */ export interface EmbeddingsCacheKey { base: string provider: string model: string inputs: string | string[] } + +/** + * Type alias for the embeddings cache. + * Maps cache keys to embedding responses. + */ export type EmbeddingsCache = JSONLineCache< EmbeddingsCacheKey, EmbeddingsResponse > +/** + * Class for creating embeddings using the OpenAI API. + * Implements the EmbeddingsModel interface. + */ class OpenAIEmbeddings implements EmbeddingsModel { readonly cache: JSONLineCache + + /** + * Constructs an instance of OpenAIEmbeddings. + * @param info Connection options for the model. + * @param configuration Configuration for the language model. + * @param options Options for tracing. + */ public constructor( readonly info: ModelConnectionOptions, readonly configuration: LanguageModelConfiguration, @@ -39,11 +64,11 @@ class OpenAIEmbeddings implements EmbeddingsModel { >("embeddings") } + // Maximum number of tokens for embeddings maxTokens = 512 /** * Creates embeddings for the given inputs using the OpenAI API. - * @param model Name of the model to use (or deployment for Azure). * @param inputs Text inputs to create embeddings for. * @returns A `EmbeddingsResponse` with a status and the generated embeddings or a message when an error occurs. */ @@ -52,15 +77,25 @@ class OpenAIEmbeddings implements EmbeddingsModel { ): Promise { const { provider, base, model } = this.configuration + // Define the cache key for the current request const cacheKey: EmbeddingsCacheKey = { inputs, model, provider, base } + + // Check if the result is already cached const cached = await this.cache.get(cacheKey) if (cached) return cached + // Create embeddings if not cached const res = await this.uncachedCreateEmbeddings(inputs) if (res.status === "success") this.cache.set(cacheKey, res) return res } + + /** + * Creates embeddings without using the cache. + * @param input The input text or texts. + * @returns The response containing the embeddings or error information. + */ private async uncachedCreateEmbeddings( input: string | string[] ): Promise { @@ -72,6 +107,8 @@ class OpenAIEmbeddings implements EmbeddingsModel { this.configuration ) headers["Content-Type"] = "application/json" + + // Determine the URL based on provider type if (provider === MODEL_PROVIDER_AZURE || type === "azure") { url = `${trimTrailingSlash(base)}/${model.replace(/\./g, "")}/embeddings?api-version=${AZURE_OPENAI_API_VERSION}` delete body.model @@ -81,6 +118,8 @@ class OpenAIEmbeddings implements EmbeddingsModel { const fetch = await createFetch({ retryOn: [429] }) if (trace) traceFetchPost(trace, url, headers, body) logVerbose(`embedding ${model}`) + + // Send POST request to create embeddings const resp = await fetch(url, { method: "POST", headers, @@ -88,7 +127,7 @@ class OpenAIEmbeddings implements EmbeddingsModel { }) trace?.itemValue(`response`, `${resp.status} ${resp.statusText}`) - // Process response + // Process the response if (resp.status < 300) { const data = (await resp.json()) as EmbeddingCreateResponse return { @@ -111,6 +150,13 @@ class OpenAIEmbeddings implements EmbeddingsModel { } } +/** + * Performs a vector search on documents based on a query. + * @param query The search query. + * @param files The files to search within. + * @param options Options for vector search, including folder path and tracing. + * @returns The files with scores based on relevance to the query. + */ export async function vectorSearch( query: string, files: WorkspaceFile[], @@ -128,10 +174,14 @@ export async function vectorSearch( trace?.startDetails(`🔍 embeddings`) try { trace?.itemValue(`model`, embeddingsModel) + + // Import the local document index const { LocalDocumentIndex } = await import( "vectra/lib/LocalDocumentIndex" ) const tokenizer = { encode, decode } + + // Resolve connection info for the embeddings model const { info, configuration } = await resolveModelConnectionInfo( { model: embeddingsModel, @@ -147,8 +197,12 @@ export async function vectorSearch( if (info.error) throw new Error(info.error) if (!configuration) throw new Error("No configuration found for vector search") + + // Pull the model await runtimeHost.models.pullModel(info.model) const embeddings = new OpenAIEmbeddings(info, configuration, { trace }) + + // Create a local document index const index = new LocalDocumentIndex({ tokenizer, folderPath, @@ -160,12 +214,18 @@ export async function vectorSearch( }, }) await index.createIndex({ version: 1, deleteIfExists: true }) + + // Insert documents into the index for (const file of files) { const { filename, content } = file await index.upsertDocument(filename, content) } + + // Query documents based on the search query const res = await index.queryDocuments(query, { maxDocuments: topK }) const r: WorkspaceFileWithScore[] = [] + + // Filter and return results that meet the minScore for (const re of res.filter((re) => re.score >= minScore)) { r.push({ filename: re.uri, diff --git a/packages/core/src/xlsx.ts b/packages/core/src/xlsx.ts index 7913defc53..db33a1e0f1 100644 --- a/packages/core/src/xlsx.ts +++ b/packages/core/src/xlsx.ts @@ -1,29 +1,53 @@ +// Import the logInfo function for logging purposes import { logInfo } from "./util" +/** + * Parses XLSX data into an array of workbook sheets. + * + * @param data - The XLSX data as a Uint8Array. + * @param options - Optional parsing options including a specific sheet name. + * @returns A promise that resolves to an array of WorkbookSheet objects. + */ export async function XLSXParse( data: Uint8Array, options?: ParseXLSXOptions ): Promise { + // Destructure options to separate sheet-specific option const { sheet, ...rest } = options || {} + // Dynamically import 'xlsx' library's read and utils modules const { read, utils } = await import("xlsx") + // Read the workbook from the data with 'array' type const workbook = read(data, { type: "array" }) + // Filter and map the sheet names to WorkbookSheet objects return workbook.SheetNames.filter((n) => !sheet || n === sheet).map( (name) => { + // Convert the worksheet to JSON and cast to object array const worksheet = workbook.Sheets[name] const rows = utils.sheet_to_json(worksheet, rest) as object[] + // Return a WorkbookSheet object with sheet name and rows return { name, rows } } ) } +/** + * Attempts to parse XLSX data, returning an empty array on failure. + * + * @param data - The XLSX data as a Uint8Array. + * @param options - Optional parsing options including a specific sheet name. + * @returns A promise that resolves to an array of WorkbookSheet objects or an empty array if parsing fails. + */ export async function XLSXTryParse( data: Uint8Array, options?: ParseXLSXOptions ): Promise { try { + // Attempt to parse the XLSX data return await XLSXParse(data, options) } catch (e) { + // Log any errors encountered during parsing logInfo(e) + // Return an empty array if parsing fails return [] } } diff --git a/packages/core/src/xml.ts b/packages/core/src/xml.ts index ef4ef4fb14..d7aa38ea82 100644 --- a/packages/core/src/xml.ts +++ b/packages/core/src/xml.ts @@ -1,29 +1,57 @@ +// Import XMLParser from the fast-xml-parser package import { XMLParser } from "fast-xml-parser" + +// Import a utility function for logging errors import { logError } from "./util" + +// Import a function to remove certain markers from XML strings import { unfence } from "./fence" +/** + * Attempts to parse an XML string, returning a default value on failure. + * + * @param text - The XML string to parse + * @param defaultValue - The value to return if parsing fails + * @param options - Optional configuration for the XML parser + * @returns The parsed XML object or defaultValue if an error occurs + */ export function XMLTryParse( text: string, defaultValue?: any, options?: XMLParseOptions ) { try { + // Try parsing the text and return the result or defaultValue return XMLParse(text, options) ?? defaultValue } catch (e) { + // Log any errors during parsing logError(e) + // Return the default value if parsing fails return defaultValue } } +/** + * Parses an XML string into an object. + * + * @param text - The XML string to parse + * @param options - Optional configuration for the XML parser + * @returns The parsed XML object + */ export function XMLParse(text: string, options?: XMLParseOptions) { + // Remove specific markers from the XML string for cleaner processing const cleaned = unfence(text, "xml") + + // Create a new XMLParser instance with the specified options const parser = new XMLParser({ - ignoreAttributes: false, - attributeNamePrefix: "@_", - allowBooleanAttributes: true, - ignoreDeclaration: true, - parseAttributeValue: true, - ...(options || {}), + ignoreAttributes: false, // Do not ignore XML attributes + attributeNamePrefix: "@_", // Prefix for attribute names + allowBooleanAttributes: true, // Allow boolean attributes + ignoreDeclaration: true, // Ignore the XML declaration + parseAttributeValue: true, // Parse attribute values + ...(options || {}), // Merge user-provided options with defaults }) + + // Parse the cleaned XML string and return the result return parser.parse(cleaned) } diff --git a/packages/core/src/yaml.ts b/packages/core/src/yaml.ts index da2f69f167..6a0296fb99 100644 --- a/packages/core/src/yaml.ts +++ b/packages/core/src/yaml.ts @@ -1,5 +1,27 @@ +/** + * This module provides utility functions to parse and stringify YAML content. + * It includes functions to safely parse YAML strings with error handling, + * as well as direct parse and stringify functionalities. + */ + import { parse, stringify } from "yaml" +/** + * Safely attempts to parse a YAML string into a JavaScript object. + * Tries to parse the input YAML string, and returns a default value + * in case of failure or specific conditions. + * + * @template T - The expected type of the parsed result. + * @param text - The YAML string to parse. + * @param defaultValue - A default value to return if parsing fails or if + * `ignoreLiterals` is true and the result is a literal. + * @param options - Optional settings for parsing. + * @param options.ignoreLiterals - If true, returns the defaultValue when the + * parsed result is a primitive type (number, + * boolean, string). + * @returns The parsed object, or the defaultValue if parsing fails or + * conditions are met. + */ export function YAMLTryParse( text: string, defaultValue?: T, @@ -8,6 +30,7 @@ export function YAMLTryParse( const { ignoreLiterals } = options || {} try { const res = parse(text) + // Check if parsed result is a primitive and ignoreLiterals is true if ( ignoreLiterals && ["number", "boolean", "string"].includes(typeof res) @@ -15,14 +38,29 @@ export function YAMLTryParse( return defaultValue return res ?? defaultValue } catch (e) { + // Return defaultValue in case of a parsing error return defaultValue } } +/** + * Parses a YAML string into a JavaScript object. + * This function assumes the input string is valid YAML. + * + * @param text - The YAML string to parse. + * @returns The parsed object. + */ export function YAMLParse(text: string): any { return parse(text) } +/** + * Converts a JavaScript object into a YAML string. + * This function provides a YAML representation of the input object. + * + * @param obj - The object to convert to YAML. + * @returns The YAML string representation of the object. + */ export function YAMLStringify(obj: any): string { return stringify(obj, undefined, 2) } diff --git a/packages/vscode/genaisrc/cmt.genai.mts b/packages/vscode/genaisrc/cmt.genai.mts index bfe5df49ea..651443aaf4 100644 --- a/packages/vscode/genaisrc/cmt.genai.mts +++ b/packages/vscode/genaisrc/cmt.genai.mts @@ -7,6 +7,10 @@ script({ Modified from https://x.com/mckaywrigley/status/1838321570969981308. `, parameters: { + format: { + type: "string", + description: "Format source code command", + }, build: { type: "string", description: "Build command", @@ -14,7 +18,7 @@ script({ }, }) -const build = env.vars.build +const { format, build } = env.vars.build const saveLimit = pLimit(1) // Get files from environment or modified files from Git if none provided @@ -58,15 +62,26 @@ async function processFile(file: WorkspaceFile) { console.log(`updating ${file.filename}`) await workspace.writeText(file.filename, newContent) let revert = false + // try formatting + if (format) { + const formatRes = await host.exec( + `${format} ${file.filename}` + ) + if (formatRes.exitCode !== 0) { + revert = true + } + } // try building - if (build) { - const buildRes = await host.exec(build) + if (!revert && build) { + const buildRes = await host.exec( + `${build} ${file.filename}` + ) if (buildRes.exitCode !== 0) { revert = true } } // last LLM as judge check - revert = revert || (await checkModifications(file.filename)) + if (!revert) revert = await checkModifications(file.filename) // revert if (revert) { @@ -81,7 +96,7 @@ async function processFile(file: WorkspaceFile) { } // Function to add comments to code -async function addComments(file: WorkspaceFile) { +async function addComments(file: WorkspaceFile): Promise { let { filename, content } = file if (parsers.tokens(file) > 20000) return undefined // too big @@ -129,6 +144,7 @@ When adding or updating comments, follow these guidelines: - For TypeScript functions, classes and fields, use JSDoc comments. do NOT add type annotations in comments. - For Python functions and classes, use docstrings. - do not modify comments with TODOs. +- do not modify comments with URLs or links as they are reference to external resources. Your output should be the original code with your added comments. Make sure to preserve the original code's formatting and structure. @@ -147,7 +163,7 @@ Your comments should provide insight into the code's purpose, logic, and any imp return content } -async function checkModifications(filename: string) { +async function checkModifications(filename: string): Promise { const diff = await host.exec(`git diff ${filename}`) if (!diff.stdout) return false const res = await runPrompt( @@ -166,4 +182,5 @@ async function checkModifications(filename: string) { const modified = res.text?.includes("MODIFIED") console.log(`code modified, reverting...`) + return modified } From ce8f277618d692a3719b5d8bce4974649a405c7a Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Tue, 24 Sep 2024 16:45:05 +0000 Subject: [PATCH 2/5] Replace angle brackets with HTML entities in CLI documentation. --- docs/src/content/docs/reference/cli/run.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/content/docs/reference/cli/run.mdx b/docs/src/content/docs/reference/cli/run.mdx index 897cab567f..1439326f5a 100644 --- a/docs/src/content/docs/reference/cli/run.mdx +++ b/docs/src/content/docs/reference/cli/run.mdx @@ -40,7 +40,7 @@ See [configuration](/genaiscript/getting-started/configuration). npx genaiscript run