Skip to content

Commit

Permalink
feat: 🎨 Add support for rendering PDFs as images
Browse files Browse the repository at this point in the history
  • Loading branch information
pelikhan committed Nov 17, 2024
1 parent 0464df7 commit 492d0fe
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 18 deletions.
4 changes: 2 additions & 2 deletions packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@
"zx": "^8.2.2"
},
"scripts": {
"compile": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest && node ../../scripts/patch-cli.mjs",
"compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest",
"compile": "esbuild src/main.ts --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest --external:canvas && node ../../scripts/patch-cli.mjs",
"compile-debug": "esbuild src/main.ts --sourcemap --metafile=./esbuild.meta.json --bundle --platform=node --target=node20 --outfile=built/genaiscript.cjs --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:dockerode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:promptfoo --external:typescript --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:mammoth --external:xlsx --external:mathjs --external:@azure/identity --external:gpt-tokenizer --external:playwright --external:@inquirer/prompts --external:jimp --external:turndown --external:vectra --external:tabletojson --external:html-to-text --external:@octokit/rest --external:@octokit/plugin-throttling --external:@octokit/plugin-retry --external:@octokit/plugin-paginate-rest --external:canvas",
"postcompile": "node built/genaiscript.cjs info help > ../../docs/src/content/docs/reference/cli/commands.md",
"vis:treemap": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.treemap.html",
"vis:network": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.network.html --template network",
Expand Down
13 changes: 13 additions & 0 deletions packages/core/src/parsers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,19 @@ describe("parsers", () => {
assert.equal(result.key, "value")
})

test("PDF", async () => {
const result = await parsers.PDF({ filename: "src/rag/loremipsum.pdf" })
assert(result.file.content.includes("Lorem"))
})

test("PDF-image", async () => {
const result = await parsers.PDF(
{ filename: "src/rag/loremipsum.pdf" },
{ renderAsImage: true }
)
assert(result.file.content.includes("Lorem"))
})

test("CSV", () => {
const result = parsers.CSV("key,value\n1,2")
assert.deepStrictEqual(result, [{ key: "1", value: "2" }])
Expand Down
16 changes: 8 additions & 8 deletions packages/core/src/parsers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,15 @@ export async function createParsers(options: {
trace,
}
const filename = typeof file === "string" ? file : file.filename
const { pages, content } = (await parsePdf(filename, opts)) || {}
const { pages, images, content } =
(await parsePdf(filename, opts)) || {}
return {
file: pages
? <WorkspaceFile>{
filename,
content,
}
: undefined,
file: <WorkspaceFile>{
filename,
content,
},
pages,
images,
}
},
code: async (file, query) => {
Expand All @@ -121,6 +121,6 @@ export async function createParsers(options: {
diff: (f1, f2) => llmifyDiff(createDiff(f1, f2)),
tidyData: (rows, options) => tidyData(rows, options),
hash: async (text, options) => await hash(text, options),
unfence: unfence
unfence: unfence,
})
}
47 changes: 40 additions & 7 deletions packages/core/src/pdf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import { host } from "./host"
import { TraceOptions } from "./trace"
import os from "os"
import { serializeError } from "./error"
import { logVerbose, logWarn } from "./util"
import { log } from "console"

// Declare a global type for SVGGraphics as any
declare global {
Expand All @@ -30,6 +32,17 @@ async function tryImportPdfjs(options?: TraceOptions) {
return pdfjs
}

async function tryImportCanvas() {
try {
const { createCanvas } = await import("canvas")
return createCanvas
} catch (error) {
logWarn("Failed to import canvas")
logVerbose(error)
return undefined
}
}

/**
* Installs a shim for Promise.withResolvers if not available.
*/
Expand Down Expand Up @@ -60,9 +73,10 @@ function installPromiseWithResolversShim() {
async function PDFTryParse(
fileOrUrl: string,
content?: Uint8Array,
options?: { disableCleanup?: boolean } & TraceOptions
options?: ParsePDFOptions & TraceOptions
) {
const { disableCleanup, trace } = options || {}
const { disableCleanup, trace, renderAsImage } = options || {}

try {
const pdfjs = await tryImportPdfjs(options)
const { getDocument } = pdfjs
Expand All @@ -75,6 +89,7 @@ async function PDFTryParse(
const doc = await loader.promise
const numPages = doc.numPages
const pages: string[] = []
const images: Buffer[] = []

// Iterate through each page and extract text content
for (let i = 0; i < numPages; i++) {
Expand All @@ -91,8 +106,26 @@ async function PDFTryParse(

// Collapse trailing spaces
pages.push(lines.join("\n"))

if (renderAsImage) {
const viewport = page.getViewport({ scale: 1.5 })
const createCanvas = await tryImportCanvas()
if (createCanvas) {
const canvas = await createCanvas(
viewport.width,
viewport.height
)
const canvasContext = canvas.getContext("2d")
await page.render({
canvasContext: canvasContext as any,
viewport,
}).promise
const buffer = canvas.toBuffer("image/png")
images.push(buffer)
}
}
}
return { ok: true, pages }
return { ok: true, pages, images }
} catch (error) {
trace?.error(`reading pdf`, error) // Log error if tracing is enabled
return { ok: false, error: serializeError(error) }
Expand All @@ -117,14 +150,14 @@ function PDFPagesToString(pages: string[]) {
export async function parsePdf(
filename: string,
options?: ParsePDFOptions & TraceOptions
): Promise<{ pages: string[]; content: string }> {
const { trace, filter } = options || {}
let { pages } = await PDFTryParse(filename, undefined, options)
): Promise<{ pages: string[]; images?: Buffer[]; content: string }> {
const { filter } = options || {}
let { pages, images } = await PDFTryParse(filename, undefined, options)

// Apply filter if provided
if (filter) pages = pages.filter((page, index) => filter(index, page))
const content = PDFPagesToString(pages)
return { pages, content }
return { pages, images, content }
}

/**
Expand Down
2 changes: 2 additions & 0 deletions packages/core/src/types/prompt_template.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,8 @@ interface XMLParseOptions {
}

interface ParsePDFOptions {
disableCleanup?: boolean
renderAsImage?: boolean
filter?: (pageIndex: number, text?: string) => boolean
}

Expand Down
2 changes: 1 addition & 1 deletion packages/vscode/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@
"vscode:update-dts": "npx @vscode/dts dev && mv vscode.*.d.ts src/",
"vscode:prepublish": "yarn run compile",
"compile:icons": "node updatefonts.mjs",
"compile:extension": "esbuild src/extension.ts --sourcemap --metafile=./esbuild.meta.json --bundle --format=cjs --platform=node --target=node20 --outfile=built/extension.js --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:vscode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:@lvce-editor/ripgrep --external:gpt-3-encoder",
"compile:extension": "esbuild src/extension.ts --sourcemap --metafile=./esbuild.meta.json --bundle --format=cjs --platform=node --target=node20 --outfile=built/extension.js --external:tsx --external:esbuild --external:get-tsconfig --external:resolve-pkg-maps --external:vscode --external:pdfjs-dist --external:web-tree-sitter --external:tree-sitter-wasms --external:@lvce-editor/ripgrep --external:gpt-3-encoder --external:canvas",
"compile": "yarn compile:icons && yarn compile:extension",
"vis:treemap": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.treemap.html",
"vis:network": "npx --yes esbuild-visualizer --metadata esbuild.meta.json --filename esbuild.network.html --template network",
Expand Down

0 comments on commit 492d0fe

Please sign in to comment.