Python 简单获取网页Html 本文共有1533个字,关键词: 多线程函数,实现以不阻塞多线程的方式从一个指定网页抓取网页链接 import threading, queue, time, urllib from urllib import request BASE_URL = '' URL_QUEUE = queue.Queue() for item in range(0, 10): url = 'http://www.baidu.com' URL_QUEUE.put(url) def fetch_url(url_queue, thid): while not url_queue.empty(): try: url = url_queue.get_nowait() except Exception as e: print('error--->' + e) break print(f'current thread id:{threading.Thread.__name__}-{thid},url:{url}\n') try: response = urllib.request.urlopen(url) response_code = response.getcode() except Exception as e: print(f'get{url},error--->' + e) continue if response_code == 200: html = response.read() # 获取到页面的源代码 print(html.decode('utf-8')) # 转化为 utf-8 编码 time.sleep(1) if __name__ == '__main__': print('-' * 4 + 'ALL START' + '-' * 4) stat_time = time.time() threads = [] thread_num = 3 for x in range(0, thread_num): thread = threading.Thread(target=fetch_url, args=(URL_QUEUE, x)) threads.append(thread) thread.start() for t in threads: t.join() print('-' * 4 + 'ALL DONE' + '-' * 4) 「一键投喂 软糖/蛋糕/布丁/牛奶/冰阔乐!」 赞赏 × 梦白沙 (๑>ڡ<)☆谢谢老板~ 1元 2元 5元 10元 50元 任意金额 2元 使用微信扫描二维码完成支付 版权声明:本文为作者原创,如需转载须联系作者本人同意,未经作者本人同意不得擅自转载。 Python 2022-04-24 评论 315 次浏览