当前位置:首页 > Web开发 > 正文

但是不能构造HTTP请求

2024-03-31 Web开发

 

from urllib.request import urlopen
from http.client import HTTPResponse

response = urlopen(‘‘)# http.client.HTTPResponse东西
print(type(response))

with response:
print(1,response.status)
print(2,response.reason)
print(3,response.geturl())
print(4,response.info()) # headers
print(5,response.read())
# urlopen 只能通报url和data,但是不能结构HTTP请求,所以Request类来实现

# 初始化要领,结构一个请求东西 可以添加一个headers字典,data参数决定是GET或者POST
# add_header(key,value)也可以为headers中增加一个键值对 from urllib.request import Request,urlopen
import random
url = ‘‘
ua_list= [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11 ","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1","Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50","Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11" ]
ua = random.choice(ua_list)
req = Request(url) # 返回<class ‘urllib.request.Request‘>类
req.add_header(‘User-agent‘,ua)
print(‘type_req‘,type(req))# 返回<class ‘http.client.HTTPResponse‘>类

response = urlopen(req,timeout=20)
print(type(response))

with response:
print(1,response.status,response.getcode(),response.reason)
print(2,response.geturl())
print(3,response.info())#response的headers
print(4,response.read())
print(5,req.get_header(‘User-agent‘))
print(6,‘user-agent‘.capitalize())

# urllib.parse 模块
from urllib import parse
u = {
‘url‘:‘‘,
‘p_url‘:‘‘
}
x = parse.urlencode(u)
print(x)

u = parse.urlencode({‘wd‘:‘中国‘}) # 编码
print(u)
url = "https://www.baidu.com/s?{}".format(u)
print(url)
print(‘中国‘.encode(‘utf-8‘))

print(parse.unquote(u)) # 解码
print(parse.unquote(url))

需求:通过关键字在bing中搜索,返回功效生存在html文件中 from urllib.request import Request,urlopen
from urllib.parse import urlencode
import random
keyword = input("请输入关键字")
data = urlencode({‘q‘:keyword})
base_url = ‘‘
url = ‘{}?{}‘.format(base_url,data)
print(url)
ua_list= [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11 ","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1","Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50","Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11" ]
ua = random.choice(ua_list)
req = Request(url,headers={‘User-agent‘:ua})
response = urlopen(req)
with response:
with open(‘1.html‘,‘wb‘) as f:
f.write(response.read())
print("success")

# POST要领
from urllib.request import Request,urlopen
from urllib.parse import urlencode
import simplejson
import random
ua_list= [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11 ","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1","Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50","Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11" ]
ua = random.choice(ua_list)
req = Request(‘‘)
req.add_header(‘User-agent‘,ua)
data = urlencode({‘name‘:‘张三,@=http://www.mamicode.com/&*‘,‘age‘:‘6‘})
print(data)
res1 = urlopen(req,data=‘name=张三,@=http://www.mamicode.com/&*,&age=6‘.encode())# 不做url编码
res2 = urlopen(req,data=data.encode())#POST要领,Form提交数据
# with res1:
# print(res1.read())
with res2:
print(res2.read())

# 豆瓣https://movie.douban.com/explore#!type=movie&tag=%E7%83%AD%E9%97%A8&sort=rank&page_limit=20&page_start=0
from urllib.request import Request,urlopen
from urllib.parse import urlencode
import random

ua_list= [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11 ","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1","Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50","Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11" ]
ua = random.choice(ua_list)
url = ‘https://movie.douban.com/explore#!‘
req = Request(url)
req.add_header(‘User-agent‘,ua)

data = urlencode({
‘type‘:‘movie‘,
‘tag‘:‘热门‘,
‘sort‘:‘rank‘,
‘page_limit‘:8,
‘page_start‘:10
})
# POST 要领
res = urlopen(req,data=data.encode())
with res:
print(res._method)
print(1,res.read().decode())
# GET要领
with urlopen(‘{}?{}‘.format(url,data)) as res:
print(res._method)
print(2,res.read().decode())

from urllib.request import Request,urlopen
import ssl
import random
ua_list= [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11 ","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1","Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50","Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11" ]
ua = random.choice(ua_list)
request = Request(‘https://www.12306.cn/mormhweb/‘)
request.add_header(
‘User-agent‘,ua
)
# 接受不受信任证书
context = ssl._create_unverified_context()
res = urlopen(request,context= context)
with res:
print(res._method)
print(res.geturl())
print(res.read().decode())

标准库urllib缺少关键成果,,非标准第三方库供给了好比连接池打点 import urllib3
import random
url = ‘https://movie.douban.com‘
ua_list= [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11 ","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1","Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50","Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11" ]
ua = random.choice(ua_list)
with urllib3.PoolManager() as http:
response = http.request(‘GET‘,url,headers={‘User-agent‘:ua})
print(type(response)) # <class ‘urllib3.response.HTTPResponse‘>类
print(response.status,response.reason)
print(response.headers)
print(response.data)

requests库使用了urllib3库,供给可友好的api

import requests
import random
url = ‘https://movie.douban.com‘
ua_list= [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11 ","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1","Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50","Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11" ]
ua = random.choice(ua_list)
response = requests.request(‘GET‘,url,headers={‘User-Agent‘:ua})
with response:
print(type(response))
print(response.url)
print(response.status_code)
print(response.request.headers)# 请求头
print(response.headers)# 响应头
print(response.text)
with open(‘movie.html‘,‘w‘,encoding=‘utf-8‘) as f:
f.write(response.text)
requests默认使用了Session东西,是为了多次与处事器交互保存会话信息: # 直接使用session
import requests
import random
ua_list= [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11 ","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1","Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50","Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11" ]
ua = random.choice(ua_list)
urls = [‘https://www.baidu.com‘,‘https://www.baidu.com‘]
session = requests.Session()
print(type(session))
with session:
for url in urls:
response = session.get(url,headers={‘User-agent‘:ua})
with response:
print(type(response)) # <class ‘requests.models.Response‘>类
print(response.url)
print(response.status_code)
print(‘headers‘,response.request.headers)
print(‘cookie‘,response.cookies)
print(response.text[:20])

urllib库中常见的类和要领

温馨提示: 本文由Jm博客推荐,转载请保留链接: https://www.jmwww.net/file/web/32207.html