Node.js 网页抓取与数据采集
var request = require('request'); var cheerio = require('cheerio'); var Promise = require("bluebird"); var url = 'http://kickass.so/search/yify/'; var getData = new Promise(function(resolve, reject) { request('http://kickass.so/search/yify/', function(error, response, body) { if(!error && response.statusCode == 200) { resolve(body); } else { reject(Error('fatch failed!')); } }); }); function getMagnet(url) { return new Promise(function(resolve, reject) { request(url, function(error, response, body) { if(!error && response.statusCode == 200) { resolve(body); } else { reject(Error('fatch failed!')); } }); }); } getData.then(function(data) { var $ = cheerio.load(data); var arr = []; $('.torrentname').each(function() { var $this = $(this); var title = $this.find('.cellMainLink').text(); var link = $this.find('.cellMainLink').attr('href'); arr.push({'title': title, 'src': link}); }); return arr; }).then(function(data) { //console.log(data); data.forEach(function(row) { var src = 'http://kickass.so' + row.src; getMagnet(src).then(function(result) { var $ = cheerio.load(result); var magnetLink = $('.magnetlinkButton').attr('href'); return magnetLink; }).then(function(result) { console.log('fetch:' + result + '\n'); }); }); });
延伸阅读:
暂无内容!
评论列表 (0条):
加载更多评论 Loading...