爬取资讯网站的新闻并保存到excel

发布时间：2023-09-06 01:40责任编辑：胡小海关键词：excel

#!/usr/bin/env python
#* coding:utf-8 *
#author:Jacky

from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from bs4 import BeautifulSoup
import xlwt

driver = webdriver.Firefox()
driver.implicitly_wait(3)
first_url = ‘http://www.yidianzixun.com/channel/c6‘
driver.get(first_url)
driver.find_element_by_class_name(‘icon-refresh‘).click()
for i in range(1, 90):
driver.find_element_by_class_name(‘icon-refresh‘).send_keys(Keys.DOWN)
soup = BeautifulSoup(driver.page_source, ‘lxml‘)
print soup
articles=[]
for article in soup.findall(class=‘item doc style-small-image style-content-middle‘):
title= article.find(class_=‘doc-title‘).gettext()
source=article.find(class=‘source‘).gettext()
comment=article.find(class=‘comment-count‘).get_text()
link=‘http://www.yidianzixun.com‘+article.get(‘href‘)
articles.append([title,source,comment,link])
print articles
driver.quit()

wbk=xlwt.Workbook(encoding=‘utf-8‘)
sheet=wbk.add_sheet(‘yidianzixun‘)
i=1
sheet.write(0, 0, ‘title‘)
sheet.write(0, 1, ‘source‘)
sheet.write(0, 2, ‘comment‘)
sheet.write(0, 3, ‘link‘)
for row in articles:
#print row[0]
sheet.write(i,0,row[0])
sheet.write(i,1,row[1])
sheet.write(i,2,row[2])
sheet.write(i,3,row[3])
i +=1
wbk.save(r‘zixun\zixun.xls‘)

爬取资讯网站的新闻并保存到excel

原文地址：http://blog.51cto.com/jackyxin/2066959

爬取资讯网站的新闻并保存到excel

知识推荐