-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.js
61 lines (54 loc) · 1.63 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
const fs = require('fs');
const fastcsv = require('fast-csv');
const pdfjsLib = require('pdfjs-dist/es5/build/pdf.js');
function collapse(info, metadata, filename) {
const result = {
Title: filename || info.Title,
Author: info.Author,
Subject: info.Subject || '',
PDFFormatVersion: info.PDFFormatVersion,
CreationDate: metadata._metadataMap.get('xmp:createdate'),
ModifiedDate: metadata._metadataMap.get('xmp:modifydate'),
Creator: info.Creator,
IsAcroFormPresent: info.IsAcroFormPresent,
IsXFAPresent: info.IsXFAPresent,
Contact: metadata['desc:contact'] || '',
};
return result;
}
function summarize(pdf, filename) {
// Loading a document.
var loadingTask = pdfjsLib.getDocument(pdf);
return loadingTask.promise
.then(async function (pdfDocument) {
const {
info,
metadata,
contentDispositionFilename,
} = await pdfDocument.getMetadata();
console.log(info);
console.log(metadata);
const output = collapse(info, metadata, filename);
return output;
})
.catch(function (reason) {
console.error('Error: ' + reason);
});
}
async function getPDFInfo() {
const pdfPath = 'data/pdf';
const stat = fs.readdirSync(pdfPath);
const output = [];
for (const file of stat) {
const filepath = `${pdfPath}/${file}`;
let info = await summarize(filepath, file);
output.push(info);
}
return output;
}
async function writePDFSummary(outputPath) {
const data = await getPDFInfo();
const ws = fs.createWriteStream('data/output.csv');
fastcsv.write(data, { headers: true }).pipe(ws);
}
writePDFSummary('output.csv');