From ddc245c41f687217732d73fd68e4ced979a6ec7a Mon Sep 17 00:00:00 2001 From: Peli de Halleux Date: Sun, 17 Nov 2024 17:21:07 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=F0=9F=A7=A9=20enhance=20PDF=20parser?= =?UTF-8?q?=20with=20image=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/core/src/parsers.test.ts | 3 ++- packages/core/src/types/prompt_template.d.ts | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/core/src/parsers.test.ts b/packages/core/src/parsers.test.ts index 30d4aeafe8..4f14038b80 100644 --- a/packages/core/src/parsers.test.ts +++ b/packages/core/src/parsers.test.ts @@ -47,7 +47,8 @@ describe("parsers", () => { }) test("PDF", async () => { - const result = await parsers.PDF({ filename: "src/rag/loremipsum.pdf" }) + const result = await parsers.PDF({ filename: "./src/rag/loremipsum.pdf" }) + console.log(result) assert(result.file.content.includes("Lorem")) }) diff --git a/packages/core/src/types/prompt_template.d.ts b/packages/core/src/types/prompt_template.d.ts index 87398e786b..58427f299f 100644 --- a/packages/core/src/types/prompt_template.d.ts +++ b/packages/core/src/types/prompt_template.d.ts @@ -1326,7 +1326,9 @@ interface Parsers { PDF( content: string | WorkspaceFile, options?: ParsePDFOptions - ): Promise<{ file: WorkspaceFile; pages: string[] } | undefined> + ): Promise< + { file: WorkspaceFile; pages: string[]; images?: Buffer[] } | undefined + > /** * Parses a .docx file