diff --git a/packages/parser/src/__tests__/parse.test.ts b/packages/parser/src/__tests__/parse.test.ts index 713baf5..932a02f 100644 --- a/packages/parser/src/__tests__/parse.test.ts +++ b/packages/parser/src/__tests__/parse.test.ts @@ -44,8 +44,4 @@ describe('parse', () => { assert.strictEqual(hwpDocument.sections[0].width, 59528) assert.strictEqual(hwpDocument.sections[0].height, 84188) }) - - it('should parse signature', () => { - assert.strictEqual(hwpDocument.header.signature, 'HWP Document File') - }) }) diff --git a/packages/parser/src/doc-info-parser.ts b/packages/parser/src/doc-info-parser.ts index ff4943a..696113c 100644 --- a/packages/parser/src/doc-info-parser.ts +++ b/packages/parser/src/doc-info-parser.ts @@ -191,7 +191,7 @@ export class DocInfoParser { if ( properties.compress === BinDataCompress.COMPRESS - || (properties.compress === BinDataCompress.DEFAULT && this.header.properties.compressed) + || (properties.compress === BinDataCompress.DEFAULT && this.header.flags.compressed) ) { const data = inflate(Uint8Array.from(payload), { windowBits: -15 }) this.result.binData.push(new BinData(properties, extension, data)) diff --git a/packages/parser/src/models/document.ts b/packages/parser/src/models/document.ts index 92f09a0..1f9bb7e 100644 --- a/packages/parser/src/models/document.ts +++ b/packages/parser/src/models/document.ts @@ -19,19 +19,9 @@ import { HWPHeader } from './header.js' import { Section } from './section.js' export class HWPDocument { - header: HWPHeader - - info: DocInfo - - sections: Section[] - constructor( - header: HWPHeader, - info: DocInfo, - sections: Section[], - ) { - this.header = header - this.info = info - this.sections = sections - } + public header: HWPHeader, + public info: DocInfo, + public sections: Section[], + ) {} } diff --git a/packages/parser/src/models/header.ts b/packages/parser/src/models/header.ts index 28b3026..edf6fa6 100644 --- a/packages/parser/src/models/header.ts +++ b/packages/parser/src/models/header.ts @@ -1,7 +1,7 @@ /** * Copyright Han Lee and other contributors * - * Licensed under the Apache License, Version 2.0 (the "License"); + * Licensed under the Apache License, Version 2.0 (the 'License'); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * @@ -14,77 +14,206 @@ * limitations under the License. */ -import { HWPVersion } from './version.js' - -interface Properties { - /** 압축 여부 */ - compressed: boolean - - /** 암호 설정 여부 */ - encrypted: boolean - - /** 배포용 문서 여부 */ - distribution: boolean - - /** 스크립트 저장 여부 */ - script: boolean - - /** DRM 보안 문서 여부 */ - drm: boolean - - /** XMLTemplate 스토리지 존재 여부 */ - hasXmlTemplateStorage: boolean +import { find, type CFB$Container } from 'cfb' - /** 문서 이력 관리 존재 여부 */ - vcs: boolean - - /** 전자 서명 정보 존재 여부 */ - hasElectronicSignatureInfomation: boolean - - /** 공인 인증서 암호화 여부 */ - certificateEncryption: boolean - - /** 전자 서명 예비 저장 여부 */ - prepareSignature: boolean - - /** 공인 인증서 DRM 보안 문서 여부 */ - certificateDRM: boolean - - /** CCL 문서 여부 */ - ccl: boolean - - /** 모바일 최적화 여부 */ - mobile: boolean +import { HWPVersion } from './version.js' +import { ByteReader } from '../utils/byte-reader.js' +import { getBitValue } from '../utils/bit-utils.js' - /** 개인 정보 보안 문서 여부 */ - isPrivacySecurityDocument: boolean +const SIGNATURE = 'HWP Document File' +const FILE_HEADER_BYTES = 256 - /** 변경 추적 문서 여부 */ - trackChanges: boolean +export class HWPHeader { + constructor( + public version: HWPVersion, + public flags: Flags, + public license: License, + public encryptVersion: EncryptVersion, + public kogl: KOGL, + public reserved: ArrayBuffer + ) {} + + static fromCfbContainer(container: CFB$Container): HWPHeader { + const fileHeader = find(container, 'FileHeader') + + if (!fileHeader) { + throw new Error('Cannot find FileHeader') + } + + const reader = new ByteReader(Uint8Array.from(fileHeader.content).buffer) + return HWPHeader.fromByteReader(reader) + } - /** 공공누리(KOGL) 저작권 문서 */ - kogl: boolean + static fromByteReader(reader: ByteReader): HWPHeader { + if (reader.length() !== FILE_HEADER_BYTES) { + throw new Error( + `FileHeader must be ${FILE_HEADER_BYTES} bytes, Received: ${reader.length()}` + ) + } + + const signature = new TextDecoder().decode(reader.read(17)) + if (signature !== SIGNATURE) { + throw new Error( + `hwp file's signature should be ${SIGNATURE}. Received version: ${signature}` + ) + } + + // Reserved + reader.read(15) + + const revision = reader.readUInt8() + const build = reader.readUInt8() + const minor = reader.readUInt8() + const major = reader.readUInt8() + const version = new HWPVersion(major, minor, build, revision) + + const flags = Flags.fromBits(reader.readUInt32()) + const license = License.fromBits(reader.readUInt32()) + const encryptVersion = mapEncryptVersion(reader.readInt32()) + const kogl = mapKogl(reader.readUInt8()) + const reserved = reader.read(207) + + if (!reader.isEOF()) { + throw new Error('FileHeader is not EOF') + } + + return new HWPHeader( + version, + flags, + license, + encryptVersion, + kogl, + reserved + ) + } +} - /** 비디오 컨트롤 포함 여부 */ - hasVideoControl: boolean +export class Flags { + constructor( + /** 압축 여부 */ + public compressed: boolean, + /** 암호 설정 여부 */ + public encrypted: boolean, + /** 배포용 문서 여부 */ + public distributed: boolean, + /** 스크립트 저장 여부 */ + public script: boolean, + /** DRM 보안 문서 여부 */ + public drm: boolean, + /** XMLTemplate 스토리지 존재 여부 */ + public xmlTemplateStorage: boolean, + /** 문서 이력 관리 존재 여부 */ + public vcs: boolean, + /** 전자 서명 정보 존재 여부 */ + public electronicSignatured: boolean, + /** 공인 인증서 암호화 여부 */ + public certificateEncrypted: boolean, + /** 전자 서명 예비 저장 여부 */ + public prepareSignatured: boolean, + /** 공인 인증서 DRM 보안 문서 여부 */ + public certificateDrm: boolean, + /** CCL 문서 여부 */ + public ccl: boolean, + /** 모바일 최적화 여부 */ + public mobileOptimized: boolean, + /** 개인 정보 보안 문서 여부 */ + public privacySecurityDocument: boolean, + /** 변경 추적 문서 여부 */ + public trackChanges: boolean, + /** 공공누리(KOGL) 저작권 문서 */ + public kogl: boolean, + /** 비디오 컨트롤 포함 여부 */ + public videoControl: boolean, + /** 차례 필드 컨트롤 포함 여부 */ + public orderFieldControl: boolean + ) {} + + static fromBits(bits: number): Flags { + return new Flags( + Boolean(getBitValue(bits, 0)), + Boolean(getBitValue(bits, 1)), + Boolean(getBitValue(bits, 2)), + Boolean(getBitValue(bits, 3)), + Boolean(getBitValue(bits, 4)), + Boolean(getBitValue(bits, 5)), + Boolean(getBitValue(bits, 6)), + Boolean(getBitValue(bits, 7)), + Boolean(getBitValue(bits, 8)), + Boolean(getBitValue(bits, 9)), + Boolean(getBitValue(bits, 10)), + Boolean(getBitValue(bits, 11)), + Boolean(getBitValue(bits, 12)), + Boolean(getBitValue(bits, 13)), + Boolean(getBitValue(bits, 14)), + Boolean(getBitValue(bits, 15)), + Boolean(getBitValue(bits, 16)), + Boolean(getBitValue(bits, 17)) + ) + } +} - /** 차례 필드 컨트롤 포함 여부 */ - hasOrderFieldControl: boolean +export class License { + constructor( + public ccl: boolean, + public replicationRestrictions: boolean, + public replicationAlike: boolean, + public reserved: number + ) {} + + static fromBits(bits: number): License { + return new License( + Boolean(getBitValue(bits, 0)), + Boolean(getBitValue(bits, 1)), + Boolean(getBitValue(bits, 2)), + getBitValue(bits, 3, 32) + ) + } } -/** - * @see https://github.com/hahnlee/hwp.js/blob/master/docs/hwp/5.0/FileHeader.md - */ -export class HWPHeader { - version: HWPVersion +export enum EncryptVersion { + None, + /** 한/글 2.5 버전 이하 */ + HWP2_5, + /** 한/글 3.0 버전 Enhanced */ + HWP3Enhanced, + /** 한/글 3.0 버전 Old */ + HWP3Old, + /** 한/글 7.0 버전 이후 */ + HWP7, +} - signature: string +function mapEncryptVersion(byte: number): EncryptVersion { + switch (byte) { + case 0: + return EncryptVersion.None + case 1: + return EncryptVersion.HWP2_5 + case 2: + return EncryptVersion.HWP3Enhanced + case 3: + return EncryptVersion.HWP3Old + case 4: + return EncryptVersion.HWP7 + default: + throw new Error(`Unknown encrypt version: ${byte}`) + } +} - properties: Properties +export enum KOGL { + None, + KOR = 6, + US = 15, +} - constructor(version: HWPVersion, signature: string, properties: Properties) { - this.version = version - this.signature = signature - this.properties = properties +function mapKogl(byte: number): KOGL { + switch (byte) { + case 0: + return KOGL.None + case 6: + return KOGL.KOR + case 15: + return KOGL.US + default: + throw new Error(`Unknown KOGL: ${byte}`) } } diff --git a/packages/parser/src/parse.ts b/packages/parser/src/parse.ts index d878091..5d2a6fb 100644 --- a/packages/parser/src/parse.ts +++ b/packages/parser/src/parse.ts @@ -25,72 +25,9 @@ import { inflate } from 'pako' import { HWPDocument } from './models/document.js' import { DocInfo } from './models/doc-info.js' import { HWPHeader } from './models/header.js' -import { HWPVersion } from './models/version.js' import { Section } from './models/section.js' import { DocInfoParser } from './doc-info-parser.js' import { SectionParser } from './section-parser.js' -import { ByteReader } from './utils/byte-reader.js' -import { getBitValue } from './utils/bit-utils.js' - -// @link https://github.com/hahnlee/hwp.js/blob/master/docs/hwp/5.0/FileHeader.md#%ED%8C%8C%EC%9D%BC-%EC%9D%B8%EC%8B%9D-%EC%A0%95%EB%B3%B4 -const FILE_HEADER_BYTES = 256 - -const SUPPORTED_VERSION = new HWPVersion(5, 1, 0, 0) -const SIGNATURE = 'HWP Document File' - -function parseFileHeader(container: CFB$Container): HWPHeader { - const fileHeader = find(container, 'FileHeader') - - if (!fileHeader) { - throw new Error('Cannot find FileHeader') - } - - const { content } = fileHeader - - if (content.length !== FILE_HEADER_BYTES) { - throw new Error(`FileHeader must be ${FILE_HEADER_BYTES} bytes, Received: ${content.length}`) - } - - const signature = String.fromCharCode(...Array.from(content.slice(0, 17))) - if (SIGNATURE !== signature) { - throw new Error(`hwp file's signature should be ${SIGNATURE}. Received version: ${signature}`) - } - - const [major, minor, build, revision] = Array.from(content.slice(32, 36)).reverse() - const version = new HWPVersion(major, minor, build, revision) - - if (!version.isCompatible(SUPPORTED_VERSION)) { - throw new Error(`hwp.js only support ${SUPPORTED_VERSION.toString()} format. Received version: ${version.toString()}`) - } - - const reader = new ByteReader(Uint8Array.from(content).buffer) - - // signature bytes + version bytes - reader.skipByte(32 + 4) - - const data = reader.readUInt32() - - return new HWPHeader(version, signature, { - compressed: Boolean(getBitValue(data, 0)), - encrypted: Boolean(getBitValue(data, 1)), - distribution: Boolean(getBitValue(data, 2)), - script: Boolean(getBitValue(data, 3)), - drm: Boolean(getBitValue(data, 4)), - hasXmlTemplateStorage: Boolean(getBitValue(data, 5)), - vcs: Boolean(getBitValue(data, 6)), - hasElectronicSignatureInfomation: Boolean(getBitValue(data, 7)), - certificateEncryption: Boolean(getBitValue(data, 8)), - prepareSignature: Boolean(getBitValue(data, 9)), - certificateDRM: Boolean(getBitValue(data, 10)), - ccl: Boolean(getBitValue(data, 11)), - mobile: Boolean(getBitValue(data, 12)), - isPrivacySecurityDocument: Boolean(getBitValue(data, 13)), - trackChanges: Boolean(getBitValue(data, 14)), - kogl: Boolean(getBitValue(data, 15)), - hasVideoControl: Boolean(getBitValue(data, 16)), - hasOrderFieldControl: Boolean(getBitValue(data, 17)), - }) -} function parseDocInfo(container: CFB$Container, header: HWPHeader): DocInfo { const docInfoEntry = find(container, 'DocInfo') @@ -101,7 +38,7 @@ function parseDocInfo(container: CFB$Container, header: HWPHeader): DocInfo { const content = docInfoEntry.content - if (header.properties.compressed) { + if (header.flags.compressed) { const decodedContent = inflate(Uint8Array.from(content), { windowBits: -15 }) return new DocInfoParser(header, decodedContent, container).parse() } else { @@ -118,7 +55,7 @@ function parseSection(container: CFB$Container, header: HWPHeader, sectionNumber const content = section.content - if (header.properties.compressed) { + if (header.flags.compressed) { const decodedContent = inflate(Uint8Array.from(content), { windowBits: -15 }) return new SectionParser(decodedContent).parse() } else { @@ -131,7 +68,7 @@ export function parse(input: CFB$Blob): HWPDocument { type: 'array', }) - const header = parseFileHeader(container) + const header = HWPHeader.fromCfbContainer(container) const docInfo = parseDocInfo(container, header) const sections: Section[] = [] diff --git a/packages/parser/src/utils/byte-reader.ts b/packages/parser/src/utils/byte-reader.ts index 6ba414f..afd74d6 100644 --- a/packages/parser/src/utils/byte-reader.ts +++ b/packages/parser/src/utils/byte-reader.ts @@ -25,37 +25,49 @@ export class ByteReader { readUInt32(): number { const result = this.view.getUint32(this.offsetByte, true) - this.offsetByte += 4 + this.#updateOffsetByte(4) return result } readInt32(): number { const result = this.view.getInt32(this.offsetByte, true) - this.offsetByte += 4 + this.#updateOffsetByte(4) return result } readInt16(): number { const result = this.view.getUint16(this.offsetByte, true) - this.offsetByte += 2 + this.#updateOffsetByte(2) return result } readUInt16(): number { const result = this.view.getUint16(this.offsetByte, true) - this.offsetByte += 2 + this.#updateOffsetByte(2) return result } readInt8(): number { const result = this.view.getInt8(this.offsetByte) - this.offsetByte += 1 + this.#updateOffsetByte(1) return result } readUInt8(): number { const result = this.view.getUint8(this.offsetByte) - this.offsetByte += 1 + this.#updateOffsetByte(1) + return result + } + + readFloat32(): number { + const result = this.view.getFloat32(this.offsetByte, true) + this.#updateOffsetByte(4) + return result + } + + readFloat64(): number { + const result = this.view.getFloat64(this.offsetByte, true) + this.#updateOffsetByte(8) return result } @@ -74,8 +86,11 @@ export class ByteReader { } read(byte: number): ArrayBuffer { - const result = this.view.buffer.slice(this.offsetByte, this.offsetByte + byte) - this.offsetByte += byte + const result = this.view.buffer.slice( + this.offsetByte, + this.offsetByte + byte + ) + this.#updateOffsetByte(byte) return result } @@ -95,10 +110,21 @@ export class ByteReader { } skipByte(offset: number) { - this.offsetByte += offset + this.#updateOffsetByte(offset) } isEOF() { - return this.view.byteLength <= this.offsetByte + return this.view.byteLength === this.offsetByte + } + + length() { + return this.view.byteLength + } + + #updateOffsetByte(offset: number) { + this.offsetByte += offset + if (this.offsetByte > this.view.byteLength) { + throw new Error('Out of range') + } } }