约30行代码实现一个简单nodejs爬虫工具,定时抓取网页数据。
使用npm模块
request---简单http请求客户端。(轻量级)
fs---nodejs文件模块。
index.js
var request = require(‘request‘);var fs = require("fs");var JJurl = "https://recommender-api-ms.juejin.im/v1/get_recommended_entry?suid=6bYFY7IRbfmijiJeeeIQ&ab=welcome_3&src=web"var title = ""var options = { ???uri: JJurl, ???headers: { ???????‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36‘, ???}, ???json: true};setInterval(function() { ???request(options, function(error, response, body) { ???????if (!error) { ???????????for (var i in body.d) { ???????????????title += body.d[i].title + "\n" ???????????} ???????????fs.writeFile(‘./result.txt‘, title, function(err) { ???????????????if (err) { ???????????????????throw err; ???????????????} ???????????}); ???????} else { ???????????console.log(‘抓取失败‘) ???????} ???});}, 5000);
简单实现nodejs爬虫工具
原文地址:https://www.cnblogs.com/Sroot/p/8543897.html