//server.js
1 var http = require(‘http‘), 2 ????url = require("url"), 3 ????????superagent = require("superagent"), 4 ????????cheerio = require("cheerio"), 5 ????????async = require("async"), 6 ????????eventproxy = require(‘eventproxy‘), 7 ????????fs = require("fs"), 8 ????nodeExcel = require(‘excel-export‘), 9 ????querystring = require(‘querystring‘);10 11 exports.start=function() {12 ????//request 客户端的HTTP请求 //response服务器端的HTTP回应13 ????function onReqoest(req,response){14 ????????// 设置字符编码(去掉中文会乱码,回应)15 ????????response.writeHead(200, {‘Content-Type‘: ‘text/html;charset=utf-8‘});16 ????????superagent17 ????????????.get(‘https://list.yohobuy.com/boys-new/‘)18 ????????????.set(‘Accept‘,‘application/json‘)19 ????????????.end(function(err,pres){20 ????????????????var startDate = new Date();21 ????????????????//常规错误处理22 ????????????????if (err) {23 ??????????????????console.log(1)24 ??????????????????return;25 ????????????????}26 ????????????????var html=pres.text;27 ????????????????var chapterData;28 ????????????????var chapter = crawlerChapter(html);//229 ????????????????//console.log(chapter);30 ????????????????console.log(chapter);31 ????????????????function crawlerChapter(html) {32 ????????????????????var $ = cheerio.load(html);33 ????????????????????var chapters = $(‘.good-info‘);34 ????????????????????var data1 = [];35 ????????????????????chapterData = {36 ????????????????????????????imgUrl:[],37 ????????????????????????????biaoti: [],38 ????????????????????????????price:[]39 ????????????????????????};40 ????????????????????$(‘.good-detail-img‘).each(function(i, elem) {41 ??????????????????????????chapterData.imgUrl[i] = $(this).text();42 ????????????????????});43 ????????????????????$(‘.good-detail-text>a‘).each(function(i, elem) {44 ??????????????????????????chapterData.biaoti[i] = $(this).text().trim();45 ????????????????????});46 ????????????????????$(‘.good-detail-text .price .market-price‘).each(function(i, elem) {47 ??????????????????????????chapterData.price[i] = $(this).text().trim();48 ????????????????????});49 ????????????????};50 ????????????????var conf= {},temp=[];51 ????????????????conf.cols =[52 ????????????????????{caption:‘标题‘,53 ?????????????????????type:‘string‘},54 ??????????????????????{caption:‘价格‘,55 ????????????????????type:‘string‘}, 56 ????????????????????{caption:‘图片地址‘,57 ????????????????????type:‘string‘}58 ????????????????????];59 ????????????????for(var i=0; i<chapterData.biaoti.length;i++) {60 ????????????????????var buffer = [chapterData.biaoti[i], chapterData.price[i]];61 ????????????????????console.log(buffer);62 ????????????????????temp.push(buffer);63 ????????????????}64 ????????????????conf.rows=temp;65 ??????????????????var result = nodeExcel.execute(conf);66 ????????????????var random = Math.floor(Math.random()*10000+0);//用来保证生成不同的文件名67 ????????????????var filePath ="fi" + random + ".xlsx"; ?//文件名68 ????????????????fs.writeFile(filePath, result, ‘binary‘,function(err){69 ????????????????????if(err){70 ????????????????????????console.log(err);71 ????????????????????}72 ????????????????});73 ????????????});74 ????}75 ????http.createServer(onReqoest).listen(3500); ???76 }
//app.js
var server = require("./server"); //node js创建了一个模块server.start();
第一次写,随便找个网页,然后就好了!!!
大家有什么意见一定发表噢。。。。,我要进步呀!。
node.js 爬虫以及数据导出到excel
原文地址:http://www.cnblogs.com/zzty/p/7886634.html