复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#coding=utf-8 import unittest from selenium import webdriver from bs4 import BeautifulSoup class douyuSelenium(unittest.TestCase): def setUp(self): self.driver = webdriver.PhantomJS() def testDouyu(self): self.driver.get('http://www.douyu.com/directory/all') while True: #print self.driver.page_source soup = BeautifulSoup(self.driver.page_source, "html.parser") titles = soup.select('h3.ellipsis') nums = soup.select('span.dy-num.fr') for title, num in zip(nums, titles): print u"观众人数:"+ title.get_text().strip(), u"t房间标题:"+num.get_text().strip() if self.driver.page_source.find('shark-pager-disable-next') != -1: break self.driver.find_element_by_class_name('shark-pager-next').click() def tearDown(self): print 'finish load ...' self.driver.quit() if __name__ == '__main__': unittest.main()
scrapy 模拟登录
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26# -*- coding: utf-8 -*- import scrapy class Renren2Spider(scrapy.Spider): name = "renren2" allowed_domains = ["renren.com"] start_urls = ( "http://www.renren.com/PLogin.do", ) # 处理start_urls里的登录url的响应内容,提取登陆需要的参数(如果需要的话) def parse(self, response): # 提取登陆需要的参数 #_xsrf = response.xpath("//_xsrf").extract()[0] # 发送请求参数,并调用指定回调函数处理 yield scrapy.FormRequest.from_response( response, formdata = {"email" : "mr_mao_hacker@163.com", "password" : "axxxxxxxe"},#, "_xsrf" = _xsrf}, callback = self.parse_page ) # 获取登录成功状态,访问需要登录后才能访问的页面 def parse_page(self, response): url = "http://www.renren.com/422167102/profile" yield scrapy.Request(url, callback = self.parse_newpage) # 处理响应内容 def parse_newpage(self, response): with open("xiao.html", "w") as filename: filename.write(response.body)
最后
以上就是独特铃铛最近收集整理的关于selenium+beautifulsoup模拟翻页的全部内容,更多相关selenium+beautifulsoup模拟翻页内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复