用了ndir组件
没有用nodejs的event模块 继承了上一篇文章的Event类
‘use strict‘const ndir = require(‘ndir‘);const fs = require(‘fs‘);class Event{ ???constructor() { ???????//保存事件列表 ???????this.eventList = []; ???} ???on(key,fn){ ???????if ( !this.eventList[ key ] ){ ????????????this.eventList[ key ] = []; ????????} ????????this.eventList[ key ].push( fn ); ???} ???trigger(){ ???????var key = Array.prototype.shift.call(arguments), ???????????fns = this.eventList[ key ]; ????????if ( !fns || fns.length === 0 ){ ???????????return false; ????????} ????????for( var i = 0, fn; fn = fns[ i++ ]; ){ ????????????fn.apply( this, arguments ); ???????} ????}}class GetChinese extends Event{ ???constructor(path,saveOutPutPath,regs){ ???????super(); ???????this.path = path; ???????this.saveOutPutPath = saveOutPutPath; ???????this.regs = regs; ???????this.outPathObj = {}; ???????this.files = []; ???????this.init(); ???} ???init(){ ???????let me = this; ???????me.on(‘read-file‘,(state)=>{ ???????????console.log(state) ???????????if(state === ‘err‘){ ???????????????return false; ???????????} ???????????if(state === ‘finish‘){ ???????????????me.readFils(); ???????????} ???????}); ???????me.on(‘read-files‘,(state)=>{ ???????????if(state === ‘finish‘){ ???????????????fs.writeFile(me.saveOutPutPath,JSON.stringify(me.outPathObj),(err)=>{}); ???????????} ???????}); ???????me.readDir(); ???} ???readDir(){ ???????let me = this; ???????ndir.walk(this.path, ????????????function onDir(dirpath, files) { ???????????????for (var i = 0, l = files.length; i < l; i++) { ???????????????????var info = files[i]; ???????????????????if (info[1].isFile()) { ???????????????????????me.files.push(info[0]); ???????????????????} ???????????????} ???????????}, ????????????function end() { ???????????????me.trigger(‘read-file‘,‘finish‘); ???????????}, ????????????function error(err, errPath) { ???????????????me.trigger(‘read-file‘,‘err‘) ???????????} ???????); ???} ???readFils(index=0){ ???????let me = this; ???????console.log(‘index‘,index); ???????console.log(‘length‘,me.files.length) ???????if(index>=me.files.length){ ???????????me.trigger(‘read-files‘,‘finish‘); ???????????return false; ???????} ???????fs.readFile(me.files[index],‘utf8‘,(err, data) => { ???????????if(err){ ???????????????me.readFils(index+1); ???????????????return false; ???????????} ???????????let result = []; ???????????while( result = me.regs.exec(data)) ???????????{ ???????????????me.outPathObj[result[0]] = result[0]; ???????????} ???????????me.readFils(index+1); ???????}); ???}}//要爬去的文件夹路径//保存爬取内容的文件路径//匹配正则let getChinese = new GetChinese(‘C:\\Users\\long\\Desktop\\123test‘,‘C:\\Users\\long\\Desktop\\123test.txt‘,/[^>]+[\u4E00-\u9FA5]+[^<]+/g);
<span>要的订单</span>
例子是获取html标签里面内容,例如上面得到 ‘要的订单’
简单实现功能,还有些错误没有处理
nodejs 根据正则爬取本地文件夹里面内容
原文地址:http://www.cnblogs.com/longor/p/7700769.html