nodejs爬取数据出现编码错误的问题
可以使用 superagent-charset 和 superagent 模块进行处理
var charset = require(‘superagent-charset‘);var cheerio = require(‘cheerio‘);var superagent = require(‘superagent‘);charset(superagent);var express = require(‘express‘);var url = ‘http://acm.hdu.edu.cn/statistic.php?pid=1000‘;var app = express();app.get(‘/‘, function (req, res, next) { ???superagent.get(url) ???????.charset(‘gbk‘) ???????.end(function (err, sres) { ???????????var html = sres.text; ???????????var $ = cheerio.load(html, {decodeEntities: false}); ???????????var len = $(‘.table_text‘).length; ???????????console.log(len); ???????????var arr =[]; ???????????for(var i = 0 ; i<len;i++){ ???????????????arr.push($(‘.table_text td a‘).eq(i).html()); ???????????} ???????????// var ans = $(‘.table_text td a‘).eq(1).html(); ???????????// res.send(ans); ???????????res.send(arr); ???????????console.log(arr); ???????});});app.listen(3000, function () { ???console.log(‘app is listening at port 3000‘);});
结果是
node.js之爬虫
原文地址:https://www.cnblogs.com/ldlx-mars/p/8588021.html