From 962795743c4f21d84c52e47e4f82dcf4ca536dc3 Mon Sep 17 00:00:00 2001 From: zarwan-translate <161089115+zarwan-translate@users.noreply.github.com> Date: Tue, 21 May 2024 22:05:48 -0400 Subject: [PATCH] feat: Add ability to detect patches which are present in a file (#2633) * feat: Add ability to detect patches which are present in a file * chore: export patchDetector function * fix: Make sure we don't attempt to call toJson on binary content --------- Co-authored-by: Christopher Fox --- src/patcher/from-docx.ts | 2 +- src/patcher/index.ts | 1 + src/patcher/patch-detector.spec.ts | 225 +++++++++++++++++++++++++++++ src/patcher/patch-detector.ts | 30 ++++ src/patcher/traverser.ts | 7 +- 5 files changed, 262 insertions(+), 3 deletions(-) create mode 100644 src/patcher/patch-detector.spec.ts create mode 100644 src/patcher/patch-detector.ts diff --git a/src/patcher/from-docx.ts b/src/patcher/from-docx.ts index 97c5400b4dd..ff9b3ea8480 100644 --- a/src/patcher/from-docx.ts +++ b/src/patcher/from-docx.ts @@ -17,7 +17,7 @@ import { appendRelationship, getNextRelationshipIndex } from "./relationship-man import { appendContentType } from "./content-types-manager"; // eslint-disable-next-line functional/prefer-readonly-type -type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream; +export type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream; export const PatchType = { DOCUMENT: "file", diff --git a/src/patcher/index.ts b/src/patcher/index.ts index 466cb3eda73..786e57341f7 100644 --- a/src/patcher/index.ts +++ b/src/patcher/index.ts @@ -1 +1,2 @@ export * from "./from-docx"; +export * from "./patch-detector"; diff --git a/src/patcher/patch-detector.spec.ts b/src/patcher/patch-detector.spec.ts new file mode 100644 index 00000000000..961502c11a0 --- /dev/null +++ b/src/patcher/patch-detector.spec.ts @@ -0,0 +1,225 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import JSZip from "jszip"; +import { patchDetector } from "./patch-detector"; + +const MOCK_XML = ` + + + + + + + + + Hello World + + + + + + Hello {{name}}, + + + how are you? + + + + + + {{paragraph_replace}} + + + + + + {{table}} + + + + + + + + + + + + + + + + + + + + + + {{table_heading_1}} + + + + + + + + + + + + + + + + + + + + + + + + Item: {{item_1}} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {{image_test}} + + + + + + Thank you + + + + + + + + + + + + +`; + +describe("patch-detector", () => { + describe("patchDetector", () => { + describe("document.xml and [Content_Types].xml", () => { + beforeEach(() => { + vi.spyOn(JSZip, "loadAsync").mockReturnValue( + new Promise((resolve) => { + const zip = new JSZip(); + + zip.file("word/document.xml", MOCK_XML); + zip.file("[Content_Types].xml", ``); + resolve(zip); + }), + ); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it("should patch the document", async () => { + const output = await patchDetector({ + data: Buffer.from(""), + }); + expect(output).toMatchObject(["name", "paragraph_replace", "table", "image_test", "table_heading_1", "item_1"]); + }); + }); + }); +}); diff --git a/src/patcher/patch-detector.ts b/src/patcher/patch-detector.ts new file mode 100644 index 00000000000..7ebcede0500 --- /dev/null +++ b/src/patcher/patch-detector.ts @@ -0,0 +1,30 @@ +import JSZip from "jszip"; +import { toJson } from "./util"; +import { traverse } from "./traverser"; +import { InputDataType } from "./from-docx"; + +type PatchDetectorOptions = { + readonly data: InputDataType; +}; + +/** Detects which patches are needed/present in a template */ +export const patchDetector = async ({ data }: PatchDetectorOptions): Promise => { + const zipContent = await JSZip.loadAsync(data); + const patches = new Set(); + + for (const [key, value] of Object.entries(zipContent.files)) { + if (!key.endsWith(".xml") && !key.endsWith(".rels")) { + continue; + } + if (key.startsWith("word/") && !key.endsWith(".xml.rels")) { + const json = toJson(await value.async("text")); + traverse(json).forEach((p) => findPatchKeys(p.text).forEach((patch) => patches.add(patch))); + } + } + return Array.from(patches); +}; + +const findPatchKeys = (text: string): readonly string[] => { + const pattern = /(?<=\{\{).+?(?=\}\})/gs; + return text.match(pattern) ?? []; +}; diff --git a/src/patcher/traverser.ts b/src/patcher/traverser.ts index 52112e10b97..b05279696dc 100644 --- a/src/patcher/traverser.ts +++ b/src/patcher/traverser.ts @@ -15,7 +15,7 @@ const elementsToWrapper = (wrapper: ElementWrapper): readonly ElementWrapper[] = parent: wrapper, })) ?? []; -export const findLocationOfText = (node: Element, text: string): readonly IRenderedParagraphNode[] => { +export const traverse = (node: Element): readonly IRenderedParagraphNode[] => { let renderedParagraphs: readonly IRenderedParagraphNode[] = []; // eslint-disable-next-line functional/prefer-readonly-type @@ -41,5 +41,8 @@ export const findLocationOfText = (node: Element, text: string): readonly IRende } } - return renderedParagraphs.filter((p) => p.text.includes(text)); + return renderedParagraphs; }; + +export const findLocationOfText = (node: Element, text: string): readonly IRenderedParagraphNode[] => + traverse(node).filter((p) => p.text.includes(text));