Skip to content

Commit

Permalink
Add Turndown library; refactor HTMLToMarkdown for dynamic import and …
Browse files Browse the repository at this point in the history
…clean logs
  • Loading branch information
pelikhan committed Sep 27, 2024
1 parent 0e6745b commit 6913415
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 8 deletions.
5 changes: 3 additions & 2 deletions packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
"playwright": "^1.47.2",
"tree-sitter-wasms": "^0.1.11",
"tsx": "^4.19.1",
"turndown": "^7.2.0",
"typescript": "5.6.2",
"vectra": "^0.9.0",
"web-tree-sitter": "^0.22.2",
Expand Down Expand Up @@ -82,8 +83,8 @@
"zx": "^8.1.8"
},
"scripts": {
"compile": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp && node ../../scripts/patch-cli.mjs",
"compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp",
"compile": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:vectra && node ../../scripts/patch-cli.mjs",
"compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:vectra",
"postcompile": "node built/genaiscript.cjs info help > ../../docs/src/content/docs/reference/cli/commands.md",
"vis:treemap": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.treemap.html",
"vis:network": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.network.html --template network",
Expand Down
7 changes: 4 additions & 3 deletions packages/core/src/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { createFetch } from "./fetch"
import { runtimeHost } from "./host"
import { link, prettifyMarkdown } from "./markdown"
import { assert, logError, logVerbose, normalizeInt } from "./util"
import { shellRemoveAsciiColors } from "./shell"

export interface GithubConnectionInfo {
token: string
Expand Down Expand Up @@ -605,13 +606,13 @@ export class GitHubClient implements GitHub {
}

function cleanLog(text: string) {
return text
.replace(
return shellRemoveAsciiColors(
text.replace(
// timestamps
/^?\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{2,}Z /gm,
""
)
.replace(/\x1b\[[0-9;]*m/g, "") // ascii colors
)
}
}
}
8 changes: 5 additions & 3 deletions packages/core/src/html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ import { convert as convertToText } from "html-to-text" // Import the convert fu

import { TraceOptions } from "./trace" // Import TraceOptions for optional logging features

import Turndown from "turndown" // Import Turndown library for HTML to Markdown conversion

import { tabletojson } from "tabletojson" // Import tabletojson for converting HTML tables to JSON

/**
Expand Down Expand Up @@ -52,11 +50,15 @@ export function HTMLToText(
* @param options - Optional tracing parameters.
* @returns The Markdown representation of the HTML.
*/
export function HTMLToMarkdown(html: string, options?: TraceOptions): string {
export async function HTMLToMarkdown(
html: string,
options?: TraceOptions
): Promise<string> {
if (!html) return html // Return original content if no HTML is provided
const { trace } = options || {} // Extract trace for logging if available

try {
const Turndown = (await import("turndown")).default // Import Turndown library for HTML to Markdown conversion
const res = new Turndown().turndown(html) // Use Turndown library to convert HTML to Markdown
return res
} catch (e) {
Expand Down
4 changes: 4 additions & 0 deletions packages/core/src/shell.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ export function shellParse(cmd: string): string[] {
export function shellQuote(args: string[]): string {
return quote(args)
}

export function shellRemoveAsciiColors(text: string) {
return text?.replace(/\x1b\[[0-9;]*m/g, "") // ascii colors
}

0 comments on commit 6913415

Please sign in to comment.