概述
#coding=utf-8
import unittest
from selenium import webdriver
from bs4 import BeautifulSoup
class douyuSelenium(unittest.TestCase):
def setUp(self):
self.driver = webdriver.PhantomJS()
def testDouyu(self):
self.driver.get('http://www.douyu.com/directory/all')
while True:
#print self.driver.page_source
soup = BeautifulSoup(self.driver.page_source, "html.parser")
titles = soup.select('h3.ellipsis')
nums = soup.select('span.dy-num.fr')
for title, num in zip(nums, titles):
print u"观众人数:"+ title.get_text().strip(), u"t房间标题:"+num.get_text().strip()
if self.driver.page_source.find('shark-pager-disable-next') != -1:
break
self.driver.find_element_by_class_name('shark-pager-next').click()
def tearDown(self):
print 'finish load ...'
self.driver.quit()
if __name__ == '__main__':
unittest.main()
scrapy 模拟登录
# -*- coding: utf-8 -*-
import scrapy
class Renren2Spider(scrapy.Spider):
name = "renren2"
allowed_domains = ["renren.com"]
start_urls = (
"http://www.renren.com/PLogin.do",
)
# 处理start_urls里的登录url的响应内容,提取登陆需要的参数(如果需要的话)
def parse(self, response):
# 提取登陆需要的参数
#_xsrf = response.xpath("//_xsrf").extract()[0]
# 发送请求参数,并调用指定回调函数处理
yield scrapy.FormRequest.from_response(
response,
formdata = {"email" : "mr_mao_hacker@163.com", "password" : "axxxxxxxe"},#, "_xsrf" = _xsrf},
callback = self.parse_page
)
# 获取登录成功状态,访问需要登录后才能访问的页面
def parse_page(self, response):
url = "http://www.renren.com/422167102/profile"
yield scrapy.Request(url, callback = self.parse_newpage)
# 处理响应内容
def parse_newpage(self, response):
with open("xiao.html", "w") as filename:
filename.write(response.body)
最后
以上就是独特铃铛为你收集整理的selenium+beautifulsoup模拟翻页的全部内容,希望文章能够帮你解决selenium+beautifulsoup模拟翻页所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复