-
Notifications
You must be signed in to change notification settings - Fork 0
/
spider.js
47 lines (41 loc) · 1.01 KB
/
spider.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
var _ = require('lodash');
var low = require('lowdb');
var async = require('async');
var cheerio = require('cheerio');
var request = require('request');
var BASE = 'http://www.goodreads.com/quotes/tag/love-quotes';
var limit = 5;
var db = low('quotes.json');
db._.mixin(require('underscore-db'));
db.defaults({ quotes: [], chat: [] })
.write();
/* start initial pages */
var maxPage = 100;
var pages = _.range(1, maxPage);
/* end initial pages */
var getPage = function (page, callback) {
var url = `${BASE}?page=${page}`;
request(url, function(error, response, body) {
var $ = cheerio.load(body);
$('.quote.mediumText .quoteText').each(function(index, elem) {
$(elem).find('script').remove();
db.get('quotes')
.insert({ text: $(elem).text() })
.write();
});
callback();
});
}
async.eachLimit(pages, limit, getPage, function (error) {
if (error) {
/*
* TODO
* handle error
*/
console.log(error);
} else {
/*
* successful
*/
}
})