Python urllib.request 本文共有12035个字,关键词: urllib.request.urlopen(url, data=None, [timeout, ]*, cafile=None, capath=None, cadefault=False, context=None)url:地址data:bytes 类型的内容,可通过 bytes()函数转为化字节流。它也是可选参数。使用 data 参数,请求方式变成以 POST 方式提交表单。使用标准格式是application/x-www-form-urlencodedtimeout :设置请求超时时间。单位是s。cafile、capath: CA 证书和 CA 证书的路径。如果使用HTTPS则需要用到。contex:t参数必须是ssl.SSLContext类型,用来指定SSL设置import urllib.request url = "http://www.baidu.com" response = urllib.request.urlopen(url,timeout=3) html = response.read() print(html.decode('utf-8'))带dataimport urllib.parse import urllib.request url = "http://www.baidu.com/s?" params = { 'wd':'python' } post_data = bytes(urllib.parse.urlencode(params), encoding='utf8') response = urllib.request.urlopen(url, data=post_data) print(response.read().decode('utf-8'))其它示例如下:from urllib import request, parse def get_request(url): with request.urlopen(url) as f: data = f.read() print(f'Status-->{f.status}:{f.reason}') for k, v in f.getheaders(): print(f'{k}->{v}') print(data.decode('utf-8')) def post_request(url, post_data): post_data = parse.urlencode(post_data) req = request.Request(url, method='POST') req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5;Windows NT)') with request.urlopen(req, data=post_data.encode('utf-8')) as f: print(f'Status-->{f.status}:{f.reason}') data = f.read() for k, v in f.getheaders(): print(f'{k}->{v}') print(data.decode('utf-8')) if __name__ == '__main__': # get_request('https://www.baidu.com/s') data = ([ ('wd', '123'), ('r', '123') ]) post_request('https://www.baidu.com/s', data)通用的方法import urllib.request class reqhelper: def __init__(self): # 生成useragent地址:https://gongjux.com/userAgentGenerator/ self.useragent = ( 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36', 'Mozilla/5.0 (Windows NT 10; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 ' 'Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:83.0) Gecko/20100101 Firefox/83.0', 'Mozilla/5.0 (Linux; Android 10; ELS-AN00) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.92 Mobile ' 'Safari/537.36 ' ) self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/96.0.4664.93 Safari/537.36', } def get_content(self,url): print('*' * 3, '直接使用', '*' * 3) # 直接使用 res = urllib.request.urlopen(url) content = res.read().decode('utf-8') return content def get_content_with_useragent(self,url): # 使用User-Agent req = urllib.request.Request(url=url, headers=self.headers) content = urllib.request.urlopen(req).read().decode('utf-8') return content def get_content_with_cookie(self,url): print('*' * 3, '使用cookie', '*' * 3) # 使用cookie from http import cookiejar cookie_support = urllib.request.HTTPCookieProcessor(cookiejar.CookieJar()) opener = urllib.request.build_opener(cookie_support, urllib.request.HTTPHandler) req = urllib.request.Request(url, headers=self.headers) content = opener.open(req).read().decode("utf-8") return content def get_content_with_proxy(self,url): print('*' * 3, '使用代理服务器', '*' * 3) # 使用代理服务器 import random, urllib proxy_list = [ {'http': '127.0.0.1:8000'}, {'http': '127.0.0.1:8001'}, {'http': '127.0.0.1:8002'} ] url = "https://www.baidu.com/" proxy_support = urllib.request.ProxyHandler(random.choice(proxy_list)) opener = urllib.request.build_opener(proxy_support, urllib.request.HTTPHandler) urllib.request.install_opener(opener) content = urllib.request.urlopen(url).read().decode('utf-8') print(content) # 建议使用以下的方法 req = urllib.request.Request(url) content = opener.open(req).read().decode("utf-8") return content def get_content_with_proxy_up(self,url): print('*' * 3, '使用带账号密码的代码', '*' * 3) # 带账号密码的代码[转] import urllib.request # 用户名密码 user = "user" passwd = "passwd" proxyserver = "x.x.x.x:xx" # 建一个密码管理对象,保存用户名和密码 pwdmgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() # 添加账户信息,第一个None, pwdmgr.add_password(None, proxyserver, user, passwd) # 构建一个代理基础用户名/密码验证的 ProxyBasicAuthHandler 处理器对象,参数是创建的密码管理对象 # 注意,这里不再使用普通 ProxyHandler 类了 proxyauth_handler = urllib.request.ProxyBasicAuthHandler(pwdmgr) # 通过 build_opener()方法使用这些代理 Handler 对象,创建自定义 opener 对象,参数包括构建的 proxy_handler 和 proxyauth_handler opener = urllib.request.build_opener(proxyauth_handler) # 构造Request 请求 req = urllib.request.Request(url) # 使用自定义opener发送请求 response = opener.open(req) # 打印响应内容 content = response.read().decode("utf-8") return content if __name__ == '__main__': r = reqhelper() url = "https://www.baidu.com/" print(r.get_content(url)) print(r.get_content_with_cookie(url)) print(r.get_content_with_useragent(url)) print(r.get_content_with_proxy(url)) print(r.get_content_with_proxy_up(url)) 「一键投喂 软糖/蛋糕/布丁/牛奶/冰阔乐!」 赞赏 × 梦白沙 (๑>ڡ<)☆谢谢老板~ 1元 2元 5元 10元 50元 任意金额 2元 使用微信扫描二维码完成支付 版权声明:本文为作者原创,如需转载须联系作者本人同意,未经作者本人同意不得擅自转载。 Python 2022-04-24 评论 198 次浏览