设置ip
方法1:
service_args = [ ???‘--proxy=%s‘ % ip_html, ???# 代理 IP:prot ???(eg:192.168.0.28:808) ???‘--proxy-type=http’, ???????????# 代理类型:http/https ???‘--load-images=no’, ??????????# 关闭图片加载(可选) ???‘--disk-cache=yes’, ???????????# 开启缓存(可选) ???‘--ignore-ssl-errors=true’ ???# 忽略https错误(可选)]driver = webdriver.PhantomJS(service_args=service_args)
方法2:
browser=webdriver.PhantomJS(PATH_PHANTOMJS)# 利用DesiredCapabilities(代理设置)参数值,重新打开一个sessionId,我看意思就相当于浏览器清空缓存后,加上代理重新访问一次urlproxy=webdriver.Proxy()proxy.proxy_type=ProxyType.MANUALproxy.http_proxy=‘1.9.171.51:800‘# 将代理设置添加到webdriver.DesiredCapabilities.PHANTOMJS中proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)browser.get(‘http://1212.ip138.com/ic.asp‘)print(‘1: ‘,browser.session_id)print(‘2: ‘,browser.page_source)print(‘3: ‘,browser.get_cookies())还原为系统代理# 还原为系统代理proxy=webdriver.Proxy()proxy.proxy_type=ProxyType.DIRECTproxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)browser.get(‘http://1212.ip138.com/ic.asp‘)
设置请求头
方法1
# -*- coding:utf-8 -*- ?from selenium import webdriver ?from selenium.webdriver.common.desired_capabilities import DesiredCapabilities ?from selenium.webdriver.common.proxy import ProxyType desired_capabilities = DesiredCapabilities.PHANTOMJS.copy() ?# 从USER_AGENTS列表中随机选一个浏览器头,伪装浏览器 ?desired_capabilities["phantomjs.page.settings.userAgent"] = (random.choice(headers.my_headers)) ?# 不载入图片,爬页面速度会快很多 ?desired_capabilities["phantomjs.page.settings.loadImages"] = False# 打开带配置信息的phantomJS浏览器 ?driver = webdriver.PhantomJS(executable_path=phantomjs_driver,desired_capabilities=desired_capabilities) ?driver.start_session(desired_capabilities) ?# 隐式等待5秒,可以自己调节 ?driver.implicitly_wait(5) ?# 设置10秒页面超时返回,类似于requests.get()的timeout选项,driver.get()没有timeout选项 ?# 以前遇到过driver.get(url)一直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。 ?driver.set_page_load_timeout(20) ?# 设置10秒脚本超时时间 ?driver.set_script_timeout(20)
方法2
from selenium.webdriver.common.desired_capabilities import DesiredCapabilitiesfrom selenium import webdriver# 设置请求头user_agent = ( ???????"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) " + ???????"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36" ???)dcap = dict(DesiredCapabilities.PHANTOMJS)dcap["phantomjs.page.settings.userAgent"] = user_agentdriver = webdriver.PhantomJS(executable_path=r"/home/zhou/phantomjs-2.1.1-linux-x86_64/bin/phantomjs", desired_capabilities=dcap)
phantomjs和selenium设置proxy、headers
原文地址:http://www.cnblogs.com/zhouxinfei/p/8098809.html