# 进程池相比于Process效率高,因为创建进程和开启进程需要时间
# 进程数并非越多越好,操作系统调度进程需要耗时
# 进程池与Process效率比对
# Process 50个进程效率测试
# from multiprocessing import Process
# from multiprocessing import Queue
# import time
#
# def func(q):
# ret = q.get()
# ret -= 1
# q.put(ret)
# if __name__ == '__main__':
# q = Queue()
# q.put(50)
# p_lst = []
# start_time = time.time()
# for i in range(50):
# p = Process(target=func,args=(q,))
# p.start()
# p_lst.append(p)
# for p in p_lst:p.join()
# print('50个进程从50减到:',q.get(),'耗时:',time.time() - start_time)
#
# # 进程池效率测试
# from multiprocessing import Pool
#
# if __name__ == '__main__':
# q = Queue()
# # 使用5个进程去跑50个任务
# p = Pool(5)
# q.put(50)
# start_time = time.time()
# for i in range(50):
# p.apply_async(func,args=(q,)) # 异步提交方式
# p.close() # 结束往进程池提交任务
# p.join() # 感知进程池中任务结束
# print('5个进程从50减到:', q.get(), '耗时:', time.time() - start_time)
'''
50个进程从50减到: 0 耗时: 2.6329572200775146
5个进程从50减到: 50 耗时: 0.09228920936584473
'''
# 进程池最优进程个数:cpu个数加1
# 获取子进程执行结果
# from multiprocessing import Pool
# import time
#
# def func(i):
# time.sleep(0.5)
# return i*2
# if __name__ == '__main__':
# p = Pool(5)
# for i in range(10):
# res = p.apply_async(func,args=(i,))
# print(res.get()) # get会阻塞等待进程返回结果,放在此处则变为同步执行
# p.close()
# p.join()
# 修改方式让进程异步执行
# from multiprocessing import Pool
# import time
#
# def func(i):
# time.sleep(0.5)
# return i*2
# if __name__ == '__main__':
# p = Pool(5)
# res_lst = []
# for i in range(10):
# res = p.apply_async(func,args=(i,))
# res_lst.append(res)
# for i in res_lst:print(i.get())
# p.close()
# p.join()
# 回调函数
# from multiprocessing import Pool
# import os
#
# def func1(n):
# print('in func1',os.getpid())
# return n*n
#
# def func2(n2):
# print('in func2',os.getpid())
# print(n2)
#
# if __name__ == '__main__':
# p = Pool(5)
# print(os.getpid())
# for i in range(5):
# p.apply_async(func1,args=(i,),callback=func2)
# p.close()
# p.join()
'''
7232
in func1 16532
in func1 16532
in func1 8028
in func1 16532
in func1 16100
in func2 7232
0
in func2 7232
9
in func2 7232
16
in func2 7232
4
in func2 7232
1
'''
# 回调函数接收一个参数(没有限制数据类型),参数是子进程的返回值,且回调函数在主进程中执行
# 回调函数在爬虫案例中使用的比较多,爬取网页网络延迟造成爬取数据时间长,用回调函数取处理数据可以提高效率
# requests简单用法
# import requests
# response = requests.get('http://www.baidu.com')
# print(response) # 直接打印responce返回网页状态码
# print(response.content) # 获取网页內蓉(byte类型)
# print(response.content.decode('utf-8')) # 解码
# 爬虫小例子requests版
# import requests
# from multiprocessing import Pool
# url_lst = [
# 'https://www.cnblogs.com',
# 'https://www.sogou.com',
# 'https://www.baidu.com',
# 'http://www.sohu.com'
# ]
#
# def get(url):
# response = requests.get(url)
# if response.status_code == 200:
# return (url,response.content.decode('utf-8'))
# def coll_back(url_info):
# print(url_info)
#
# if __name__ == '__main__':
# p = Pool(5)
# for url in url_lst:
# p.apply_async(get,args=(url,),callback=coll_back)
# p.close()
# p.join()
# urllib版
# from urllib.request import urlopen
# from multiprocessing import Pool
# url_lst = [
# 'https://www.cnblogs.com',
# 'https://www.sogou.com',
# 'https://www.baidu.com',
# 'http://www.sohu.com'
# ]
# def get(url):
# response = urlopen(url)
# return (url,response.read().decode('utf-8'))
# def coll_back(url_info):
# print(url_info)
#
# if __name__ == '__main__':
# p = Pool(5)
# for url in url_lst:
# p.apply_async(get,args=(url,),callback=coll_back)
# p.close()
# p.join()
# 爬虫小例子2--爬取猫眼电影网top100
# 正则测试网站:http://tool.chinaz.com/regex/
# url = 'https://maoyan.com/board'
# import re
'''content =
<dd>
<i class="board-index board-index-9">9</i>
<a href="/films/247295" title="驯龙高手3" class="image-link" data-act="boarditem-click" data-val="{movieId:247295}">
<img src="//s3plus.meituan.net/v1/mss_e2821d7f0cfe4ac1bf9202ecf9590e67/cdn-prod/file:5788b470/image/loading_2.e3d934bf.png" alt="" class="poster-default" />
<img data-src="https://p0.meituan.net/movie/9ef02a501fee7f62d49d2096b52175d32155331.jpg@160w_220h_1e_1c" alt="驯龙高手3" class="board-img" />
</a>
<div class="board-item-main">
<div class="board-item-content">
<div class="movie-item-info">
<p class="name"><a href="/films/247295" title="驯龙高手3" data-act="boarditem-click" data-val="{movieId:247295}">驯龙高手3</a></p>
<p class="star">
主演:杰伊·巴鲁切尔,刘昊然,亚美莉卡·费雷拉
</p>
<p class="releasetime">上映时间:2019-03-01</p> </div>
<div class="movie-item-number score-num">
<p class="score"><i class="integer">9.</i><i class="fraction">0</i></p>
</div>
</div>
</div>
</dd>
'''
# partten = '<dd>.*?<i class=.*?>(?P<index>d+)</i>.*?<p class="name"><a.*?>(?P<movename>.*?)</a></p>.*?<p class="star">(?P<star>.*?)</p>.*?<p class="releasetime">(?P<time>.*?)</p>.*?</dd>'
# regex = re.compile(partten,re.S)
# ret = re.search(regex,content)
# print(ret.group('index'),ret.group('movename'),ret.group('star').strip(),ret.group('time'))
# 以上为正则需要匹配的內容
from multiprocessing import Pool
import re
# 编译正则表达式
partten = '<dd>.*?<i class=.*?>(?P<index>d+)</i>.*?<p class="name"><a.*?>(?P<movename>.*?)</a></p>.*?<p class="star">(?P<star>.*?)</p>.*?<p class="releasetime">(?P<time>.*?)</p>.*?</dd>'
regex = re.compile(partten,re.S)
# 定义爬去网页內容得函数
import requests
def get_content(url,n):
response = requests.get(url)
if response.status_code == 200:
# 将爬取得网页內蓉保存下来,以免调试过程多次访问IP被禁止
# filename = 'movelist/test' + str(n)
# with open(filename,'wb') as f:
# f.write(response.content)
return response.content.decode('utf-8')
# 定义一个从保存好得文件里获取网页內容得函数
def getContentFromfile(filename):
content = ''
with open(filename,'rb') as f:
for line in f:
content += line.decode('utf-8')
return content
# 定义一个解析网页內容的函数
def parse_content(content):
ret = re.findall(regex,content)
move_lst = []
for move_info in ret:
move_dic = {
'榜单排名':move_info[0].strip(),
'电影名':move_info[1].strip(),
'演员':move_info[2].strip(),
'上映时间':move_info[3].strip()
}
print(move_dic)
if __name__ == '__main__':
url = 'https://maoyan.com/board/4?offset='
p = Pool(5)
# 直接从网页上爬去数据
for i in range(0,91,10):
p.apply_async(get_content,args=(url+str(i),i),callback=parse_content)
p.close()
p.join()
# filepath = 'movelist/test'
# for i in range(0,91,10):
# p.apply_async(getContentFromfile,args=(filepath + str(i),),callback=parse_content)
# p.close()
# p.join()
最后
以上就是缥缈手机最近收集整理的关于Python之进程池与回调函数的全部内容,更多相关Python之进程池与回调函数内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复