from urllib import requestfrom lxml import etree# 请求的urlurl = "http://www.dfenqi.cn/Product/Index"# 请求的头文件headers = { ???"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36"}# 创建请求对象req = request.Request(url,headers = headers)# 创建处理器对象httpHandler = request.HTTPHandler()# 创建openeropener = request.build_opener(httpHandler)# 发送请求response = opener.open(req)# 读取源文件html = response.read().decode(‘utf-8‘)# 创建xpath关系xpath = "//div[@class=‘liebiao‘]/ul/li/p/text()"# 获取属性值列表# xpath = "//div[@class=‘liebiao‘]/ul/li/p/@class"# 将html转换成可解析对象selector = etree.HTML(html)# 返回xpath查询列表goodsList = selector.xpath(xpath)# 显示商品标题for goods in goodsList: ???print(goods)
6、通过xpath获取网页数据
原文地址:https://www.cnblogs.com/toloy/p/8618007.html