forked from ezodude/tabula-js
-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.js
52 lines (46 loc) · 1.79 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
'use strict';
// Options:
//
// area <AREA> Portion of the page to analyze (top,left,bottom,right).
// Example: "269.875,12.75,790.5,561". Default is entire page.
//
// columns <COLUMNS> X coordinates of column boundaries. Example "10.1,20.2,30.3"
//
// debug Print detected table areas instead ofprocessing.
//
// guess Guess the portion of the page to analyze per page.
//
// silent Suppress all stderr output.
//
// noSpreadsheet Force PDF not to be extracted using spreadsheet-style extraction
// (if there are ruling lines separating each cell, as in a PDF of an Excel spreadsheet)
//
// pages <PAGES> Comma separated list of ranges, or all.
// Examples: pages: "1-3,5-7", pages: "3" or pages: "all". Default is pages: "1"
//
// spreadsheet Force PDF to be extracted using spreadsheet-style extraction
// (if there are ruling lines separating each cell, as in a PDF
// of an Excel spreadsheet)
//
// password <PASSWORD> Password to decrypt document. Default is empty
//
// useLineReturns Use embedded line returns in cells. (Only in spreadsheet mode.)
const cmd = require('./lib/cmd')
, hp = require('highland-process');
module.exports = Tabula;
function Tabula(pdfPath, options) {
if (!(this instanceof Tabula)) return new Tabula(pdfPath, options);
this.pdfPath = pdfPath;
this.options = options;
}
Tabula.prototype.streamCsv = function () {
return hp.from(cmd(this.pdfPath, this.options).run());
};
Tabula.prototype.extractCsv = function (cb) {
this.streamCsv()
.map(data => data.toString())
.split()
.collect()
.stopOnError(err => cb(err, null))
.each(data => cb(null, data));
};