1 url=‘http://www.biquge.info/10_10218/‘ #定义网址 2 UA={"User-Agent":"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"}#定义协议 3 import time,lxml 4 from lxml import etree 5 from urllib.request import Request 6 from urllib.request import urlopen 7 import os 8 #c=os.path.join(os.path.abspath(os.path.dirname(__name__)),‘1.html‘)#写到本地读入 9 #with open(c,‘r‘) as f:10 # ??a=f.read()11 def source(url):#写入到本地的网页源代码12 ???global UA13 ???print(UA)14 ???text=urlopen(Request(url,None,UA),timeout=5)15 ???return text.read()16 17 def respon(text):#xpath到各个章节的链接地址18 ???global url19 ???seletor=etree.HTML(text)20 ???url1=seletor.xpath("//*[@id=‘list‘]/dl/dd/a/@href")21 ???name=seletor.xpath("//*[@id=‘list‘]/dl/dd/a/text()")22 ???for i in range(len(url1)):23 ???????print(url+url1[i],name[i])24 a=source(url)25 respon(a)
特别需要注意的是UA在Request中传值会出现错误,这时需要耐心来把问题解决
【url ---lib___】笔趣阁(抓取部分)
原文地址:https://www.cnblogs.com/Skyda/p/9179420.html