闲鱼

107 阅读 0 评论 71 点赞

我是靠谱客的博主单薄学姐，这篇文章主要介绍闲鱼，现在分享给大家，希望可以做个参考。

复制代码

1 import requests
 2 from lxml import etree
 3 from selenium import webdriver
 4 import time
 5 import pymongo
 6 
 7 
 8 client=pymongo.MongoClient('localhost',27017)
 9 DB=client['闲鱼']
10 #url_list=DB['shop_list']
11 shop_info=DB['shop_info']
12 
13 url="https://s.2.taobao.com/list/list.htm?spm=2007.1000337.0.0.735ad9c1MhZfTa&st_trust=1&ist=0"
14 broswer=webdriver.PhantomJS()
15 broswer.get(url)
16 #time.sleep(2)
17 button=broswer.find_element_by_xpath('//*[@id="J_CategoryFilters"]/div/a')
18 button.click()
19 #time.sleep(1)
20 r=broswer.page_source
21 
22 
23 def labelparse(r):
24     html = etree.HTML(r)
25     label_title = html.xpath('//ul[@class="J_HiddenAreaContent clearfix"]/li/a/text()')
26     label_urls = html.xpath('//ul[@class="J_HiddenAreaContent clearfix"]/li/a/@href')
27     return label_urls
28 
29 
30 def shopparse(url,page):
31     fulurl='{0}{1}page={2}&ist=0'.format('https:',url[:-5],str(page))
32     headers={'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
33     r=requests.get(fulurl,headers=headers)
34     #print(r.url)
35     html=etree.HTML(r.text)
36     shopname = html.xpath('//h4[@class="item-title"]/a/text()')
37     shopprice = html.xpath('//span[@class="price"]/em/text()')
38     shoplocation = html.xpath('//div[@class="seller-location"]/text()')
39     #shopmaster=html.xpath('//span[@class="ww-light ww-small"]')
40     for i in range(0,len(shopname)):
41         shop_info.insert_one({
42             'shopname':shopname[i],
43             'shopprice':shopprice[i],
44             'shoplocation':shoplocation[i]
45 
46         })
47 
48 
49 
50 
51 def main(page):
52     urls=labelparse(r)
53     for url in urls:
54         #time.sleep(2)
55         shopparse(url,page)
56 
57 
58 
59 if __name__=="__main__":
60     for page in range(0,100):
61         main(page)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
 1 import requests
 2 from lxml import etree
 3 from selenium import webdriver
 4 import time
 5 import pymongo
 6 
 7 
 8 client=pymongo.MongoClient('localhost',27017)
 9 DB=client['闲鱼']
10 #url_list=DB['shop_list']
11 shop_info=DB['shop_info']
12 
13 url="https://s.2.taobao.com/list/list.htm?spm=2007.1000337.0.0.735ad9c1MhZfTa&st_trust=1&ist=0"
14 broswer=webdriver.PhantomJS()
15 broswer.get(url)
16 #time.sleep(2)
17 button=broswer.find_element_by_xpath('//*[@id="J_CategoryFilters"]/div/a')
18 button.click()
19 #time.sleep(1)
20 r=broswer.page_source
21 
22 
23 def labelparse(r):
24     html = etree.HTML(r)
25     label_title = html.xpath('//ul[@class="J_HiddenAreaContent clearfix"]/li/a/text()')
26     label_urls = html.xpath('//ul[@class="J_HiddenAreaContent clearfix"]/li/a/@href')
27     return label_urls
28 
29 
30 def shopparse(url,page):
31     fulurl='{0}{1}page={2}&ist=0'.format('https:',url[:-5],str(page))
32     headers={'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
33     r=requests.get(fulurl,headers=headers)
34     #print(r.url)
35     html=etree.HTML(r.text)
36     shopname = html.xpath('//h4[@class="item-title"]/a/text()')
37     shopprice = html.xpath('//span[@class="price"]/em/text()')
38     shoplocation = html.xpath('//div[@class="seller-location"]/text()')
39     #shopmaster=html.xpath('//span[@class="ww-light ww-small"]')
40     for i in range(0,len(shopname)):
41         shop_info.insert_one({
42             'shopname':shopname[i],
43             'shopprice':shopprice[i],
44             'shoplocation':shoplocation[i]
45 
46         })
47 
48 
49 
50 
51 def main(page):
52     urls=labelparse(r)
53     for url in urls:
54         #time.sleep(2)
55         shopparse(url,page)
56 
57 
58 
59 if __name__=="__main__":
60     for page in range(0,100):
61         main(page)