概述
# 进程池相比于Process效率高,因为创建进程和开启进程需要时间 # 进程数并非越多越好,操作系统调度进程需要耗时 # 进程池与Process效率比对 # Process 50个进程效率测试 # from multiprocessing import Process # from multiprocessing import Queue # import time # # def func(q): # ret = q.get() # ret -= 1 # q.put(ret) # if __name__ == '__main__': # q = Queue() # q.put(50) # p_lst = [] # start_time = time.time() # for i in range(50): # p = Process(target=func,args=(q,)) # p.start() # p_lst.append(p) # for p in p_lst:p.join() # print('50个进程从50减到:',q.get(),'耗时:',time.time() - start_time) # # # 进程池效率测试 # from multiprocessing import Pool # # if __name__ == '__main__': # q = Queue() # # 使用5个进程去跑50个任务 # p = Pool(5) # q.put(50) # start_time = time.time() # for i in range(50): # p.apply_async(func,args=(q,)) # 异步提交方式 # p.close() # 结束往进程池提交任务 # p.join() # 感知进程池中任务结束 # print('5个进程从50减到:', q.get(), '耗时:', time.time() - start_time) ''' 50个进程从50减到: 0 耗时: 2.6329572200775146 5个进程从50减到: 50 耗时: 0.09228920936584473 ''' # 进程池最优进程个数:cpu个数加1 # 获取子进程执行结果 # from multiprocessing import Pool # import time # # def func(i): # time.sleep(0.5) # return i*2 # if __name__ == '__main__': # p = Pool(5) # for i in range(10): # res = p.apply_async(func,args=(i,)) # print(res.get()) # get会阻塞等待进程返回结果,放在此处则变为同步执行 # p.close() # p.join() # 修改方式让进程异步执行 # from multiprocessing import Pool # import time # # def func(i): # time.sleep(0.5) # return i*2 # if __name__ == '__main__': # p = Pool(5) # res_lst = [] # for i in range(10): # res = p.apply_async(func,args=(i,)) # res_lst.append(res) # for i in res_lst:print(i.get()) # p.close() # p.join() # 回调函数 # from multiprocessing import Pool # import os # # def func1(n): # print('in func1',os.getpid()) # return n*n # # def func2(n2): # print('in func2',os.getpid()) # print(n2) # # if __name__ == '__main__': # p = Pool(5) # print(os.getpid()) # for i in range(5): # p.apply_async(func1,args=(i,),callback=func2) # p.close() # p.join() ''' 7232 in func1 16532 in func1 16532 in func1 8028 in func1 16532 in func1 16100 in func2 7232 0 in func2 7232 9 in func2 7232 16 in func2 7232 4 in func2 7232 1 ''' # 回调函数接收一个参数(没有限制数据类型),参数是子进程的返回值,且回调函数在主进程中执行 # 回调函数在爬虫案例中使用的比较多,爬取网页网络延迟造成爬取数据时间长,用回调函数取处理数据可以提高效率 # requests简单用法 # import requests # response = requests.get('http://www.baidu.com') # print(response) # 直接打印responce返回网页状态码 # print(response.content) # 获取网页內蓉(byte类型) # print(response.content.decode('utf-8')) # 解码 # 爬虫小例子requests版 # import requests # from multiprocessing import Pool # url_lst = [ # 'https://www.cnblogs.com', # 'https://www.sogou.com', # 'https://www.baidu.com', # 'http://www.sohu.com' # ] # # def get(url): # response = requests.get(url) # if response.status_code == 200: # return (url,response.content.decode('utf-8')) # def coll_back(url_info): # print(url_info) # # if __name__ == '__main__': # p = Pool(5) # for url in url_lst: # p.apply_async(get,args=(url,),callback=coll_back) # p.close() # p.join() # urllib版 # from urllib.request import urlopen # from multiprocessing import Pool # url_lst = [ # 'https://www.cnblogs.com', # 'https://www.sogou.com', # 'https://www.baidu.com', # 'http://www.sohu.com' # ] # def get(url): # response = urlopen(url) # return (url,response.read().decode('utf-8')) # def coll_back(url_info): # print(url_info) # # if __name__ == '__main__': # p = Pool(5) # for url in url_lst: # p.apply_async(get,args=(url,),callback=coll_back) # p.close() # p.join() # 爬虫小例子2--爬取猫眼电影网top100 # 正则测试网站:http://tool.chinaz.com/regex/ # url = 'https://maoyan.com/board' # import re '''content = <dd> <i class="board-index board-index-9">9</i> <a href="/films/247295" title="驯龙高手3" class="image-link" data-act="boarditem-click" data-val="{movieId:247295}"> <img src="//s3plus.meituan.net/v1/mss_e2821d7f0cfe4ac1bf9202ecf9590e67/cdn-prod/file:5788b470/image/loading_2.e3d934bf.png" alt="" class="poster-default" /> <img data-src="https://p0.meituan.net/movie/9ef02a501fee7f62d49d2096b52175d32155331.jpg@160w_220h_1e_1c" alt="驯龙高手3" class="board-img" /> </a> <div class="board-item-main"> <div class="board-item-content"> <div class="movie-item-info"> <p class="name"><a href="/films/247295" title="驯龙高手3" data-act="boarditem-click" data-val="{movieId:247295}">驯龙高手3</a></p> <p class="star"> 主演:杰伊·巴鲁切尔,刘昊然,亚美莉卡·费雷拉 </p> <p class="releasetime">上映时间:2019-03-01</p> </div> <div class="movie-item-number score-num"> <p class="score"><i class="integer">9.</i><i class="fraction">0</i></p> </div> </div> </div> </dd> ''' # partten = '<dd>.*?<i class=.*?>(?P<index>d+)</i>.*?<p class="name"><a.*?>(?P<movename>.*?)</a></p>.*?<p class="star">(?P<star>.*?)</p>.*?<p class="releasetime">(?P<time>.*?)</p>.*?</dd>' # regex = re.compile(partten,re.S) # ret = re.search(regex,content) # print(ret.group('index'),ret.group('movename'),ret.group('star').strip(),ret.group('time')) # 以上为正则需要匹配的內容
from multiprocessing import Pool
import re
# 编译正则表达式
partten = '<dd>.*?<i class=.*?>(?P<index>d+)</i>.*?<p class="name"><a.*?>(?P<movename>.*?)</a></p>.*?<p class="star">(?P<star>.*?)</p>.*?<p class="releasetime">(?P<time>.*?)</p>.*?</dd>'
regex = re.compile(partten,re.S)
# 定义爬去网页內容得函数
import requests
def get_content(url,n):
response = requests.get(url)
if response.status_code == 200:
# 将爬取得网页內蓉保存下来,以免调试过程多次访问IP被禁止
# filename = 'movelist/test' + str(n)
# with open(filename,'wb') as f:
# f.write(response.content)
return response.content.decode('utf-8')
# 定义一个从保存好得文件里获取网页內容得函数
def getContentFromfile(filename):
content = ''
with open(filename,'rb') as f:
for line in f:
content += line.decode('utf-8')
return content
# 定义一个解析网页內容的函数
def parse_content(content):
ret = re.findall(regex,content)
move_lst = []
for move_info in ret:
move_dic = {
'榜单排名':move_info[0].strip(),
'电影名':move_info[1].strip(),
'演员':move_info[2].strip(),
'上映时间':move_info[3].strip()
}
print(move_dic)
if __name__ == '__main__':
url = 'https://maoyan.com/board/4?offset='
p = Pool(5)
# 直接从网页上爬去数据
for i in range(0,91,10):
p.apply_async(get_content,args=(url+str(i),i),callback=parse_content)
p.close()
p.join()
# filepath = 'movelist/test'
# for i in range(0,91,10):
# p.apply_async(getContentFromfile,args=(filepath + str(i),),callback=parse_content)
# p.close()
# p.join()
最后
以上就是缥缈手机为你收集整理的Python之进程池与回调函数的全部内容,希望文章能够帮你解决Python之进程池与回调函数所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复