概述
1 import requests 2 from lxml import etree 3 from selenium import webdriver 4 import time 5 import pymongo 6 7 8 client=pymongo.MongoClient('localhost',27017) 9 DB=client['闲鱼'] 10 #url_list=DB['shop_list'] 11 shop_info=DB['shop_info'] 12 13 url="https://s.2.taobao.com/list/list.htm?spm=2007.1000337.0.0.735ad9c1MhZfTa&st_trust=1&ist=0" 14 broswer=webdriver.PhantomJS() 15 broswer.get(url) 16 #time.sleep(2) 17 button=broswer.find_element_by_xpath('//*[@id="J_CategoryFilters"]/div/a') 18 button.click() 19 #time.sleep(1) 20 r=broswer.page_source 21 22 23 def labelparse(r): 24 html = etree.HTML(r) 25 label_title = html.xpath('//ul[@class="J_HiddenAreaContent clearfix"]/li/a/text()') 26 label_urls = html.xpath('//ul[@class="J_HiddenAreaContent clearfix"]/li/a/@href') 27 return label_urls 28 29 30 def shopparse(url,page): 31 fulurl='{0}{1}page={2}&ist=0'.format('https:',url[:-5],str(page)) 32 headers={'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'} 33 r=requests.get(fulurl,headers=headers) 34 #print(r.url) 35 html=etree.HTML(r.text) 36 shopname = html.xpath('//h4[@class="item-title"]/a/text()') 37 shopprice = html.xpath('//span[@class="price"]/em/text()') 38 shoplocation = html.xpath('//div[@class="seller-location"]/text()') 39 #shopmaster=html.xpath('//span[@class="ww-light ww-small"]') 40 for i in range(0,len(shopname)): 41 shop_info.insert_one({ 42 'shopname':shopname[i], 43 'shopprice':shopprice[i], 44 'shoplocation':shoplocation[i] 45 46 }) 47 48 49 50 51 def main(page): 52 urls=labelparse(r) 53 for url in urls: 54 #time.sleep(2) 55 shopparse(url,page) 56 57 58 59 if __name__=="__main__": 60 for page in range(0,100): 61 main(page)
转载于:https://www.cnblogs.com/realmonkeykingsun/p/8044273.html
最后
以上就是单薄学姐为你收集整理的闲鱼的全部内容,希望文章能够帮你解决闲鱼所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复