#!/usr/bin/env python# -*- coding: utf-8 -*-# @Time ???: 2018/9/26 10:26# @Author ?: Sa.Song# @Desc ???: 抓取崔庆才微博, 模拟ajax请求# @File ???: weiBo.py# @Software: PyCharmimport requestsfrom urllib.parse import urlencodefrom pyquery import PyQuery as pqheaders = { ???‘Accept‘:‘application/json, text/plain, */*‘, ???‘Referer‘:‘https://m.weibo.cn/u/2830678474‘, ???‘User-Agent‘:‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36‘, ???‘X-Requested-With‘:‘XMLHttpRequest‘,}base_url = ‘https://m.weibo.cn/api/container/getIndex?‘def get_page(page): #page是分页 ???parms = { ???????‘type‘:‘uid‘, ???????‘value‘:‘2830678474‘, ???????‘containerid‘:‘1076032830678474‘, ???????‘page‘:page ???} ???url = base_url + urlencode(parms) ???try: ???????response = requests.get(url=url, headers=headers) ???????if response.status_code == 200: ???????????return response.json() ???except Exception as e: ???????print(‘报错:‘, e)def parse_message(json): ???weibo_data = [] ????if json: ???????items = json.get(‘data‘).get(‘cards‘) ???????for item in items: ???????????item = item.get(‘mblog‘) ???????????if item == None: ???????????????continue ???????????else: ???????????????weibo_message = {} ???????????????weibo_message[‘id‘] = item.get(‘id‘) ???????????????weibo_message[‘text‘] = pq(item.get(‘text‘)).text() ???????????????weibo_data.append(weibo_message) ???print(weibo_data)if __name__ == ‘__main__‘: ???for i in range(100): ???????json = get_page(i) ???????parse_message(json)模拟ajax请求爬取微博
原文地址:https://www.cnblogs.com/ss-py/p/9706722.html