From 6913415fd944c334db2e57e451753958d4886c1e Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Fri, 27 Sep 2024 15:20:15 +0000 Subject: [PATCH] Add Turndown library; refactor HTMLToMarkdown for dynamic import and clean logs --- packages/cli/package.json | 5 +++-- packages/core/src/github.ts | 7 ++++--- packages/core/src/html.ts | 8 +++++--- packages/core/src/shell.ts | 4 ++++ 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/packages/cli/package.json b/packages/cli/package.json index 624348d802..6e0e30b402 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -44,6 +44,7 @@ "playwright": "^1.47.2", "tree-sitter-wasms": "^0.1.11", "tsx": "^4.19.1", + "turndown": "^7.2.0", "typescript": "5.6.2", "vectra": "^0.9.0", "web-tree-sitter": "^0.22.2", @@ -82,8 +83,8 @@ "zx": "^8.1.8" }, "scripts": { - "compile": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp && node ../../scripts/patch-cli.mjs", - "compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp", + "compile": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:vectra && node ../../scripts/patch-cli.mjs", + "compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:vectra", "postcompile": "node built/genaiscript.cjs info help > ../../docs/src/content/docs/reference/cli/commands.md", "vis:treemap": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.treemap.html", "vis:network": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.network.html --template network", diff --git a/packages/core/src/github.ts b/packages/core/src/github.ts index 7a0a5dbed1..4b672eaffd 100644 --- a/packages/core/src/github.ts +++ b/packages/core/src/github.ts @@ -9,6 +9,7 @@ import { createFetch } from "./fetch" import { runtimeHost } from "./host" import { link, prettifyMarkdown } from "./markdown" import { assert, logError, logVerbose, normalizeInt } from "./util" +import { shellRemoveAsciiColors } from "./shell" export interface GithubConnectionInfo { token: string @@ -605,13 +606,13 @@ export class GitHubClient implements GitHub { } function cleanLog(text: string) { - return text - .replace( + return shellRemoveAsciiColors( + text.replace( // timestamps /^?\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{2,}Z /gm, "" ) - .replace(/\x1b\[[0-9;]*m/g, "") // ascii colors + ) } } } diff --git a/packages/core/src/html.ts b/packages/core/src/html.ts index 3feef21163..251b116aba 100644 --- a/packages/core/src/html.ts +++ b/packages/core/src/html.ts @@ -5,8 +5,6 @@ import { convert as convertToText } from "html-to-text" // Import the convert fu import { TraceOptions } from "./trace" // Import TraceOptions for optional logging features -import Turndown from "turndown" // Import Turndown library for HTML to Markdown conversion - import { tabletojson } from "tabletojson" // Import tabletojson for converting HTML tables to JSON /** @@ -52,11 +50,15 @@ export function HTMLToText( * @param options - Optional tracing parameters. * @returns The Markdown representation of the HTML. */ -export function HTMLToMarkdown(html: string, options?: TraceOptions): string { +export async function HTMLToMarkdown( + html: string, + options?: TraceOptions +): Promise { if (!html) return html // Return original content if no HTML is provided const { trace } = options || {} // Extract trace for logging if available try { + const Turndown = (await import("turndown")).default // Import Turndown library for HTML to Markdown conversion const res = new Turndown().turndown(html) // Use Turndown library to convert HTML to Markdown return res } catch (e) { diff --git a/packages/core/src/shell.ts b/packages/core/src/shell.ts index d776ec871c..0452d03ae3 100644 --- a/packages/core/src/shell.ts +++ b/packages/core/src/shell.ts @@ -17,3 +17,7 @@ export function shellParse(cmd: string): string[] { export function shellQuote(args: string[]): string { return quote(args) } + +export function shellRemoveAsciiColors(text: string) { + return text?.replace(/\x1b\[[0-9;]*m/g, "") // ascii colors +}