Requests请求库
import requests# -*- coding:utf8 -*-# -*- coding:utf8 -*-# 工程路径:3 requests请求库使用.py# 工程日期:8/6/2019# 工程目标:"""requests 7个主要方法: requests.request(): 构造一个请求,支撑一下各方法的基础方法 requests.get(): 获取HTML网页的主要方法,对应HTTP的GET requests.head(): 获取HTML网页头的信息方法,对应HTTP的HEAD requests.post(): 向HTML网页提交POST请求方法,对应HTTP的POST requests.put(): 向HTML网页提交PUT请求的方法,对应HTTP的RUT requests.patch(): 向HTML网页提交局部修改请求,对应于HTTP的PATCH requests.delete(): 向HTML页面提交删除请求,对应HTTP的DELETE13个参数 requests.request(method,url,**kwargs) method:请求方式,对应get/put/post等7种 requests.method(url, **kwargs) url:拟获取页面的url链接 **kwargs:控制访问参数,共13个,均为可选项: params: 字典或字节序列,作为参数增加到url中 data: 字典,字节序列或文件对象,作为Request的内容 json: JSON格式的数据,作为Request的内容 headers: 字典,HTTP定制头(模拟浏览器进行访问) cookies: 字典或CpplieJar,Request中的cookie auth: 元祖,支持HTTP认证功能 files: 字典类型,传输文件 timeout: 设定超时时间,秒为单位 proxies: 字典类型,设定访问代理服务器,可以增加登陆认证 allow_redirects:True//False, 默认为True,重定向开关 stream:True/False,默认为True, 获取内容立即下载开关 verify:True/False,默认为True, 认证SSL证书开关 cert: 本地SSL证书路径"""#%%import requestsresponse = requests.get('http://www.baidu.com/')print(type(response)) #查看response的类型print(requests.status_codes) # 状态码print(response.text) # 输出响应内容print(response.headers) # 返回响应头print(requests.cookies)#%% get 方法传传参 添加参数,headers等import requestsdata = { "s?tn" : "02003390_30_hao_pg",'wd':'美女'}header = {}response = requests.get('http://www.taobao.com/',params=data )print(response.text)print(response.url)#%% 解析jsonimport requestsimport jsonresponse = requests.get('http://www.baidu.com/')print(response.json())#%% 获取二进制流数据import requestsresponse = requests.get('https://hbimg.huabanimg.com/6519f3b9d79be866403eb8d33ea5fa9ca5e3e5a2e40f6-Fzf6yq_fw658')with open('tupian.jpg','wb') as f:f.write(response.content) f.close()#%% 响应属性import requestsresponse = requests.get('http://www.baidu.com')print(response.content)print(response.url)print(response.headers)print(response.text)print(response.cookies)print(response.encoding)print(response.history)print(response.next)#%%状态码判断import requestsresponse = requests.get('http://www.baidu.com')if response.status_code == requests.codes.ok:print("ok")#%% 取cookiesimport requestsresponse = requests.get('http://www.baidu.com')print(response.cookies)for key, value in response.cookies.items():print(key + '=' + value)#%% 会话维持 session#%% 证书验证# 大部分的网站为https网站, 需要证书验证 非官方认证的证书网站会发生ssl报错# 为避免该类型的异常抛出,将证书的参数设置为falseimport requests# response = requests.get('https://www.12306.cn',verify = False)response = requests.get('https://www.12306.cn')print(response.status_code)print(response.content)#%% 代理设置# 声明字典类型的代理集,作为代理参数传即可import requestsproxies = { 'http':'http://127.0.0.1:1080'#'https': 'https://127.0.1.7:1060'}response = requests.get('https://www.12306.cn',verify = False, proxies=proxies)print(response.content)#%% 异常处理# requestsexception(ioerror)# 父类异常为requestexception 继承IOerror# requests的异常也可以捕获子类 connectionerror、urlrequerd、toomanyredirects、httperror# connecttimeout、readtimeout、timeout、sslerror、proxyerror 异常import requestsfrom requests.exceptions import ReadTimeout, HTTPError, RequestExceptionresponse = requests.get('http://www.baidu.com')try:response = requests.get('https://www.baidu.com', timeout=0.1)print(response.status_code)except ReadTimeout:print("超时错误")except ConnectionError:print("连接错误")except RequestException