-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.js
39 lines (36 loc) · 1.12 KB
/
scrape.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
var request = require('request'),
fs = require('fs'),
cheerio = require('cheerio'),
ws = fs.createWriteStream('file.json'),
restaurants = [],
alphabet = ['a']
website = 'http://san.francisco.diningchannel.com/',
//alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','w','x','y']
detailUrls = [];
for (var letter in alphabet) {
// console.log('in for loop');
request(website+'restaurants_'+alphabet[letter]+'.htm', function(err, resp, body){
if(!err && resp.statusCode == 200) {
var $ = cheerio.load(body);
var i = 0;
var num;
$('.restaurant_link').each(function(){
num = i++;
var detailPath = this.attr('href');
var url = '\''+website + detailPath+'\'\,\n';
ws.write(url);
detailUrls.push(url);
}, console.log('test'));
}
});
};
/*for (var url in detailUrls) {
console.log('fail');
request(detailUrls[url], function(err, resp, body){
console.log('almost success');
if(!err && resp.statusCode == 200) {
console.log('success');
//var $ = cheerio.load(body);
}
});
}*/