需求:
1 (1)使用socket及ssl模块写通用的web客户端2 (2)向服务器发起请求3 (3)接受响应内容并解析出状态码、消息报头、响应正文4 (4)最核心的函数: 输入一个url,返回状态码、消息报头、响应正文;当然这也是最后实现的效果
知识储备:
网络基础知识
python的web编程(socket)
最后实现代码:
?1 # __author__ = "wyb" ?2 # date: 2018/6/5 ?3 # 代码: 高内聚低耦合 -> 使用函数封装一些逻辑代码 -> 功能函数 ?4 ??5 import socket ?6 import ssl ?7 """ ?8 在 Python3 中,bytes 和 str 的互相转换方式是 ?9 str.encode(‘utf-8‘) 10 bytes.decode(‘utf-8‘) 11 ?12 send 函数的参数和 recv 函数的返回值都是 bytes 类型 13 ?14 一、使用 https 15 ????1, https 请求的默认端口是 443 16 ????2, https 的 socket 连接需要 import ssl 17 ????????并且使用 s = ssl.wrap_socket(socket.socket()) 来初始化 18 ?19 二、HTTP 协议的 301 状态 20 ????请求豆瓣电影 top250 (注意协议) 21 ????http://movie.douban.com/top250 22 ????返回结果是一个 301 23 ????301 状态会在 HTTP 头的 Location 部分告诉你应该转向的 URL 24 ????所以, 如果遇到 301, 就请求新地址并且返回 25 ????????HTTP/1.1 301 Moved Permanently 26 ????????Date: Sun, 05 Jun 2016 12:37:55 GMT 27 ????????Content-Type: text/html 28 ????????Content-Length: 178 29 ????????Connection: keep-alive 30 ????????Keep-Alive: timeout=30 31 ????????Location: https://movie.douban.com/top250 32 ????????Server: dae 33 ????????X-Content-Type-Options: nosniff 34 ?35 ????????<html> 36 ????????<head><title>301 Moved Permanently</title></head> 37 ????????<body bgcolor="white"> 38 ????????<center><h1>301 Moved Permanently</h1></center> 39 ????????<hr><center>nginx</center> 40 ????????</body> 41 ????????</html> 42 ?43 https 的默认端口是 443, 所以你需要在 get 函数中根据协议设置不同的默认端口 44 """ 45 ?46 ?47 # 功能函数: 48 # 解析url产生protocol、host、port、path 49 def parsed_url(url): 50 ????""" 51 ????:param url: 字符串, 可能的值如下 52 ????‘g.cn‘ 53 ????‘g.cn/‘ 54 ????‘g.cn:3000‘ 55 ????‘g.cn:3000/search‘ 56 ????‘http://g.cn‘ 57 ????‘https://g.cn‘ 58 ????‘http://g.cn/‘ 59 ????:return: 返回一个 tuple, 内容: (protocol, host, port, path) 60 ????""" 61 ????protocol = "http" 62 ????if url[:7] == "http://": 63 ????????u = url.split("://")[1] 64 ????elif url[:8] == "https://": 65 ????????protocol = "https" 66 ????????u = url.split("://")[1] 67 ????else: 68 ????????u = url 69 ?70 ????# 检查默认path 71 ????i = u.find("/") 72 ????if i == -1: 73 ????????host = u 74 ????????path = "/" 75 ????else: 76 ????????host = u[:i] 77 ????????path = u[i:] 78 ?79 ????# 检查端口 80 ????port_dict = { 81 ????????"http": 80, 82 ????????"https": 443, 83 ????} 84 ????# 默认端口 85 ????port = port_dict[protocol] 86 ????if ":" in host: 87 ????????h = host.split(":") 88 ????????host = h[0] 89 ????????port = int(h[1]) 90 ?91 ????return protocol, host, port, path 92 ?93 ?94 # 根据协议返回socket实例 95 def socket_by_protocol(protocol): 96 ????""" 97 ????根据协议返回socket实例 98 ????:param protocol: 协议 99 ????:return: socket实例100 ????"""101 ????if protocol == "http":102 ????????s = socket.socket() ????????????# 生成一个socket对象103 104 ????else:105 ????????# HTTPS 协议需要使用 ssl.wrap_socket 包装一下原始的 socket106 ????????# 除此之外无其他差别107 ????????s = ssl.wrap_socket(socket.socket())108 ????return s109 110 111 # 根据socket对象接受数据112 def response_by_socket(s):113 ????"""114 ????接受数据115 ????:param s: socket实例116 ????:return: response117 ????"""118 ????response = b""119 ????buffer_size = 1024120 ????while True:121 ????????r = s.recv(buffer_size)122 ????????if len(r) == 0:123 ????????????break124 ????????response += r125 ????return response126 127 128 # 把 response 解析出 状态码 headers body 返回129 def parsed_response(r):130 ????"""131 ????解析response对象获取状态码、headers、body132 ????:param r: response133 ????:return: tuple(status_code, headers, body)134 ????"""135 ????header, body = r.split(‘\r\n\r\n‘, 1)136 ????h = header.split(‘\r\n‘)137 ????# headers的头部: HTTP/1.1 200 OK138 ????status_code = h[0].split()[1]139 ????status_code = int(status_code)140 141 ????headers = {}142 ????for line in h[1:]:143 ????????k, v = line.split(‘: ‘)144 ????????headers[k] = v145 ????return status_code, headers, body146 147 148 # 主逻辑函数:149 # 把向服务器发送 HTTP 请求并且获得数据这个过程封装成函数 -> 复杂的逻辑(具有重用性)封装成函数150 def get(url):151 ????"""152 ????使用 socket 连接服务器,获取服务器返回的数据并返回153 ????:param url: 链接地址,url的值如下:154 ????‘g.cn‘155 ????‘g.cn/‘156 ????‘g.cn:3000‘157 ????‘g.cn:3000/search‘158 ????‘http://g.cn‘159 ????‘https://g.cn‘160 ????‘http://g.cn/‘161 ????:return: 返回tuple(status_code, headers, body)162 ????"""163 ????protocol, host, port, path = parsed_url(url)164 165 ????# 得到socket对象并连接服务器166 ????s = socket_by_protocol(protocol)167 ????s.connect((host, port))168 169 ????# 发送请求170 ????request = ‘GET {} HTTP/1.1\r\nhost: {}\r\nConnection: close\r\n\r\n‘.format(path, host)171 ????encoding = ‘utf-8‘172 ????s.send(request.encode(encoding))173 174 ????# 获得响应175 ????response = response_by_socket(s)176 ????r = response.decode(encoding)177 178 ????# 解析响应179 ????status_code, headers, body = parsed_response(r)180 ????# 当状态码为301或302时表示为重定向181 ????if status_code in [301, 302]:182 ????????url = headers[‘Location‘]183 ????????return get(url)184 185 ????return status_code, headers, body186 187 188 # 单元测试:189 def test_parsed_url():190 ????"""191 ????parsed_url函数很容易出错,我们写测试函数来运行检测是否正确运行192 ????"""193 ????http = "http"194 ????https = "https"195 ????host = "g.cn"196 ????path = "/"197 ????test_items = [198 ????????(‘http://g.cn‘, (http, host, 80, path)),199 ????????(‘http://g.cn/‘, (http, host, 80, path)),200 ????????(‘http://g.cn:90‘, (http, host, 90, path)),201 ????????(‘http://g.cn:90/‘, (http, host, 90, path)),202 ????????(‘https://g.cn‘, (https, host, 443, path)),203 ????????(‘https://g.cn:233‘, (https, host, 233, path)),204 ????]205 ????for t in test_items:206 ????????url, expected = t207 ????????u = parsed_url(url)208 ????????# assert 是一个语句, 名字叫 断言209 ????????# 如果断言成功, 条件成立, 则通过测试, 否则为测试失败, 中断程序报错210 ????????e = "parsed_url ERROR, ({}) ({}) ({})".format(url, u, expected)211 ????????assert u == expected, e212 213 214 def test_get():215 ????"""216 ????????测试是否能正确处理 HTTP 和 HTTPS217 ????"""218 ????urls = [219 ????????‘http://movie.douban.com/top250‘,220 ????????‘https://movie.douban.com/top250‘,221 ????]222 ????for u in urls:223 ????????res = get(u)224 ????????print(res)225 226 227 # 使用:228 def main():229 ????url = ‘http://movie.douban.com/top250‘230 ????# r = get(url)231 ????# print(r)232 ????status_code, headers, body = get(url)233 ????print("status_code: ", status_code)234 ????print("headers: ", headers)235 ????print("body: ", body)236 237 238 if __name__ == ‘__main__‘:239 ????# test_parsed_url()240 ????# test_get()241 ????main()
原生socket请求url获取状态码、消息报头、响应正文
原文地址:https://www.cnblogs.com/wyb666/p/9142278.html