-
Notifications
You must be signed in to change notification settings - Fork 124
/
pluggable.js
69 lines (56 loc) · 1.95 KB
/
pluggable.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
module.exports = class Pluggable {
constructor(options = {}) {
const {
chromeFlags = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu',
'--window-size=1920x1080',
'--hide-scrollbars',
'--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3835.0 Safari/537.36',
],
headless = true,
} = options;
this.chromeFlags = chromeFlags;
this.headless = headless;
}
async close_browser() {
await this.browser.close();
}
// Callback invoked after metadata has been gathered
async handle_metadata(args) {
// store scraping metadata somewhere
}
// Callback invoked after all keywords have been scraped
async handle_results(args) {
// store the results somewhere
}
// Callback invoked before a keyword is scraped.
async before_keyword_scraped(args) {
console.log('before keyword scraped.');
}
// Callback invoked after a keyword has been scraped.
// TODO: implement this
async after_keyword_scraped(args) {
console.log('after keyword scraped.')
}
async start_browser(args={}) {
const puppeteer = require('puppeteer');
let launch_args = {
args: args.chromeFlags || this.chromeFlags,
headless: args.headless,
};
if (launch_args.headless === undefined) {
launch_args.headless = this.headless;
}
this.browser = await puppeteer.launch(launch_args);
console.log('Loaded custom function get_browser()');
console.log(launch_args);
return this.browser;
}
async do_work(page) {
// do some scraping work and return results and num_requests
}
};