urllib.request.urlopen(url, data=None, [timeout, ]*, cafile=None, capath=None, cadefault=False, context=None)
url:地址
data:bytes 类型的内容,可通过 bytes()函数转为化字节流。它也是可选参数。使用 data 参数,请求方式变成以 POST 方式提交表单。使用标准格式是application/x-www-form-urlencoded
timeout :设置请求超时时间。单位是s。
cafile、capath: CA 证书和 CA 证书的路径。如果使用HTTPS则需要用到。
contex:t参数必须是ssl.SSLContext类型,用来指定SSL设置
import urllib.request
url = "http://www.baidu.com"
response = urllib.request.urlopen(url,timeout=3)
html = response.read()
print(html.decode('utf-8'))带data
import urllib.parse
import urllib.request
url = "http://www.baidu.com/s?"
params = {
'wd':'python'
}
post_data = bytes(urllib.parse.urlencode(params), encoding='utf8')
response = urllib.request.urlopen(url, data=post_data)
print(response.read().decode('utf-8'))其它示例如下:
from urllib import request, parse
def get_request(url):
with request.urlopen(url) as f:
data = f.read()
print(f'Status-->{f.status}:{f.reason}')
for k, v in f.getheaders():
print(f'{k}->{v}')
print(data.decode('utf-8'))
def post_request(url, post_data):
post_data = parse.urlencode(post_data)
req = request.Request(url, method='POST')
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5;Windows NT)')
with request.urlopen(req, data=post_data.encode('utf-8')) as f:
print(f'Status-->{f.status}:{f.reason}')
data = f.read()
for k, v in f.getheaders():
print(f'{k}->{v}')
print(data.decode('utf-8'))
if name == 'main':
# get_request('https://www.baidu.com/s')
data = ([
('wd', '123'),
('r', '123')
])
post_request('https://www.baidu.com/s', data)
通用的方法
import urllib.requestclass reqhelper: def init(self):
# 生成useragent地址:https://gongjux.com/userAgentGenerator/ self.useragent = ( 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36', 'Mozilla/5.0 (Windows NT 10; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 ' 'Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:83.0) Gecko/20100101 Firefox/83.0', 'Mozilla/5.0 (Linux; Android 10; ELS-AN00) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.92 Mobile ' 'Safari/537.36 ' )
self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/96.0.4664.93 Safari/537.36', }
def get_content(self,url): print('' 3, '直接使用', '' 3) # 直接使用 res = urllib.request.urlopen(url) content = res.read().decode('utf-8') return content
def get_content_with_useragent(self,url): # 使用User-Agent req = urllib.request.Request(url=url, headers=self.headers) content = urllib.request.urlopen(req).read().decode('utf-8') return content
def get_content_with_cookie(self,url): print('' 3, '使用cookie', '' 3) # 使用cookie from http import cookiejar cookie_support = urllib.request.HTTPCookieProcessor(cookiejar.CookieJar()) opener = urllib.request.build_opener(cookie_support, urllib.request.HTTPHandler) req = urllib.request.Request(url, headers=self.headers) content = opener.open(req).read().decode("utf-8") return content
def get_content_with_proxy(self,url): print('' 3, '使用代理服务器', '' 3) # 使用代理服务器 import random, urllib proxy_list = [ {'http': '127.0.0.1:8000'}, {'http': '127.0.0.1:8001'}, {'http': '127.0.0.1:8002'} ] url = "https://www.baidu.com/" proxy_support = urllib.request.ProxyHandler(random.choice(proxy_list)) opener = urllib.request.build_opener(proxy_support, urllib.request.HTTPHandler)
urllib.request.install_opener(opener) content = urllib.request.urlopen(url).read().decode('utf-8') print(content) # 建议使用以下的方法 req = urllib.request.Request(url) content = opener.open(req).read().decode("utf-8") return content
def get_content_with_proxy_up(self,url): print('' 3, '使用带账号密码的代码', '' 3) # 带账号密码的代码[转] import urllib.request # 用户名密码 user = "user" passwd = "passwd" proxyserver = "x.x.x.x:xx" # 建一个密码管理对象,保存用户名和密码 pwdmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() # 添加账户信息,第一个None, pwdmgr.add_password(None, proxyserver, user, passwd) # 构建一个代理基础用户名/密码验证的 ProxyBasicAuthHandler 处理器对象,参数是创建的密码管理对象 # 注意,这里不再使用普通 ProxyHandler 类了 proxyauth_handler = urllib.request.ProxyBasicAuthHandler(pwdmgr) # 通过 build_opener()方法使用这些代理 Handler 对象,创建自定义 opener 对象,参数包括构建的 proxy_handler 和 proxyauth_handler opener = urllib.request.build_opener(proxyauth_handler) # 构造Request 请求 req = urllib.request.Request(url) # 使用自定义opener发送请求 response = opener.open(req) # 打印响应内容 content = response.read().decode("utf-8") return content
if name == 'main': r = reqhelper() url = "https://www.baidu.com/" print(r.get_content(url)) print(r.get_content_with_cookie(url)) print(r.get_content_with_useragent(url)) print(r.get_content_with_proxy(url)) print(r.get_content_with_proxy_up(url))

评论