//爬取静态页面const request = require (‘request‘);const cheerio = require(‘cheerio‘);const mysql = require(‘mysql‘);var conn = mysql.createConnection({ ???host:‘localhost‘, ???user:‘root‘, ???password:‘root‘, ???port:‘3306‘, ???database:‘xiaomi‘});// conn.connect();request(‘https://www.epet.com/cleargoodsmdog.html‘,function(err,res,body){ ???var $ = cheerio.load(body); ???var list = $(‘.qcGoodsBox.bgwhite .fl.rela‘); ???console.log(list); ???request(imgsrc).pipe(fs.createWriteStream(__dirname+"/downloadimg/"+path.parse(imgsrc).base)); ???list.each(function(index){ ???????var pic = $(this).find(‘.cloud-zoom img‘).attr(‘src0‘); ???????var title = $(this).find(‘.qcGoodsTit a‘).text(); ???????var price = $(this).find(‘.qcPriceBox .ft20‘).text(); ???????var yprice = $(this).find(‘.qcPriceBox .ft12‘).text(); ???????conn.query(‘insert into goods(goodsname,price,pic,goodsclass) values(?,?,?,?)‘,[title,price,pic,yprice],function(err,results,fields){ ???????????console.log(results); ???????}) ???}); ???conn.end();})
//爬取动态数据用的是phantomconst phantom = require (‘phantom‘);const cheerio = require (‘cheerio‘); (async function() { ?const instance = await phantom.create(); ?const page = await instance.createPage(); ?await page.on(‘onResourceRequested‘, function(requestData) { ???console.info(‘Requesting‘, requestData.url); ?}); ??const status = await page.open(‘http://you.163.com/item/list?categoryId=1065000&subCategoryId=1065001‘); ?const content = await page.property(‘content‘);// ??console.log(content); ?var $ = cheerio.load(content); ?var list = $(‘.m-itemList.m-itemList-level2Category .item‘); ?list.each(function(index){ ???????var title = $(this).find(‘.name a span:nth-of-type(3)‘).text(); ???????console.log(title); ???}); ???await instance.exit();})();
nodejs的爬虫
原文地址:https://www.cnblogs.com/bao2333/p/10142910.html