我是靠谱客的博主 狂野苗条,最近开发中收集的这篇文章主要介绍Python 3网络爬虫之Selenium用法,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

# selenium基本实例
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
brower = webdriver.Chrome()
try:
brower.get('http://www.baidu.com')
# 请求网页
input=brower.find_element(By.ID,'kw')
# 根据对应ID获取
input.send_keys('Python')
input.send_keys(Keys.ENTER)
wait = WebDriverWait(brower,10)
wait.until(EC.presence_of_element_located((By.ID,'content_left')))
print(brower.current_url)
print(brower.get_cookies())
print(brower.page_source)
# 打印源代码
finally:
# brower.close()
pass
# 查找节点
#
单个节点
ID ,CSS,XPATH
from selenium import webdriver
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
browser.get('https://www.taobao.com')
# 单个节点
first=browser.find_element(By.ID,'q')
second=browser.find_element(By.CSS_SELECTOR,'#q')
third=browser.find_element(By.XPATH,'//*[@id="q"]')
# 多个节点
four=browser.find_elements(By.CSS_SELECTOR,'.service-bd li')
print(first,'n',second,'n',third,'nn',four)
# <selenium.webdriver.remote.webelement.WebElement (session="23f60c7606e1f26b9148218fe48bfce4", element="2cfebc87-e9f5-44be-9a2f-39913bd5ab23")>
#
<selenium.webdriver.remote.webelement.WebElement (session="23f60c7606e1f26b9148218fe48bfce4", element="2cfebc87-e9f5-44be-9a2f-39913bd5ab23")>
#
<selenium.webdriver.remote.webelement.WebElement (session="23f60c7606e1f26b9148218fe48bfce4", element="2cfebc87-e9f5-44be-9a2f-39913bd5ab23")>
# 节点交互
send_keys(),clear(),click()
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
browser= webdriver.Chrome()
browser.get('https://www.taobao.com')
input = browser.find_element(By.ID,'q')
input.send_keys('iPone13')
time.sleep(1)
input.clear()
time.sleep(3)
input.send_keys('iPad')
button= browser.find_element(By.CLASS_NAME,'btn-search')
button.click()
# 动作链
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import time
browser= webdriver.Chrome()
url='https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
source = browser.find_element(By.CSS_SELECTOR,'#draggable')
target = browser.find_element(By.CSS_SELECTOR,'#droppable')
actions = ActionChains(browser)
actions.drag_and_drop(source,target)
actions.perform()
# 执行js
execute_script()
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
browser.execute_script('alert("To Bottom")')
# 获取属性 get_attribute() 文本text
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
url = 'https://www.zhihu.com/explore'
browser.get(url)
logo = browser.find_element(By.ID,'root')
# print(logo.text)
# print(logo)
# print('n')
# print(logo.get_attribute('class')) #属性
print(logo.id) # id
print(logo.location) # 位置
print(logo.tag_name) # 标签
print(logo.size) # 大小
browser.close()
# 切换Frame
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchFrameException
from selenium.webdriver.common.by import By
browser =webdriver.Chrome()
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
try:
logo = browser.find_element(By.CLASS_NAME,'logo')
except NoSuchFrameException:
print('NO LOGO')
browser.switch_to.parent_frame()
logo = browser.find_element(By.CLASS_NAME,'logo')
print(logo)
print(logo.text)
# 延时等待
from selenium import webdriver
from selenium.webdriver.common.by import By
browser= webdriver.Chrome()
browser.implicitly_wait(10) # 隐式等待
browser.get('https://www.zhihu.com/explore')
input=browser.find_element(By.CLASS_NAME,'zu-top-add-question')
print(input)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait # 显式等待
from selenium.webdriver.support import expected_conditions as EC
browser= webdriver.Chrome()
browser.get('https://www.taobao.com/')
wait = WebDriverWait(browser,10)
input =wait.until(EC.presence_of_element_located((By.ID,'q')))
button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.btn-search')))
print(input,'n',button)
# 前进或后退
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.baidu.com/')
browser.get('https://www.taobao.com/')
browser.get('https://www.python.org/')
browser.back()
time.sleep(1)
browser.forward()
browser.close()
# Cookies
from selenium import webdriver
browser =webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())
browser.add_cookie({'name':'name','domain':'www.zhihu.com','value':'germey'})
print(browser.get_cookies())
browser.delete_all_cookies()
print(browser.get_cookies())
browser.close()
# [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'JOID', 'path': '/', 'secure': False, 'value': 'WloWBkpXuqcwA0exIFvZv4H41Nw-Gu7sVmUkwXUO2JZlQDTJbPozpV4ATLYi23jHHkOk4SXADwKFCoiclQvPJtE='}, {'domain': '.zhihu.com', 'expiry': 1692516908, 'httpOnly': False, 'name': 'Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'KLBRSID', 'path': '/', 'secure': False, 'value': '76ae5fb4fba0f519d97e594f1cef9fab|1660980902|1660980900'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'osd', 'path': '/', 'secure': False, 'value': 'UVwQAk5cvKE0B0y3Jl_dtIf-0Ng1HOjoUm4ix3EK05BjRDDCavw3oVUGSrIm0H7BGkev5yPECwmDDIyYng3JItU='}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'fe594060-fef6-4fca-b59c-4cafe146b257'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': '46dc1123-1966-4906-b7fc-c7462df65abe'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'SESSIONID', 'path': '/', 'secure': False, 'value': 'GUMboiqSPh0SGzrMLFRjrdl34WhUBf3zpMD5lGohNvW'}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': 'AMDXsveebhWPTiudfRrzU69qi3k-JbQMQgo=|1660980900'}]
# [{'domain': '.www.zhihu.com', 'httpOnly': False, 'name': 'name', 'path': '/', 'secure': True, 'value': 'germey'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'JOID', 'path': '/', 'secure': False, 'value': 'WloWBkpXuqcwA0exIFvZv4H41Nw-Gu7sVmUkwXUO2JZlQDTJbPozpV4ATLYi23jHHkOk4SXADwKFCoiclQvPJtE='}, {'domain': '.zhihu.com', 'expiry': 1692516908, 'httpOnly': False, 'name': 'Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'KLBRSID', 'path': '/', 'secure': False, 'value': '76ae5fb4fba0f519d97e594f1cef9fab|1660980902|1660980900'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'osd', 'path': '/', 'secure': False, 'value': 'UVwQAk5cvKE0B0y3Jl_dtIf-0Ng1HOjoUm4ix3EK05BjRDDCavw3oVUGSrIm0H7BGkev5yPECwmDDIyYng3JItU='}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'fe594060-fef6-4fca-b59c-4cafe146b257'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': '46dc1123-1966-4906-b7fc-c7462df65abe'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'SESSIONID', 'path': '/', 'secure': False, 'value': 'GUMboiqSPh0SGzrMLFRjrdl34WhUBf3zpMD5lGohNvW'}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': 'AMDXsveebhWPTiudfRrzU69qi3k-JbQMQgo=|1660980900'}]
# []
# 选项卡管理
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.baidu.com')
browser.execute_script('window.open()')
print(browser.window_handles)
browser.switch_to.window(browser.window_handles[1])
browser.get('https://www.taobao.com')
time.sleep(1)
browser.switch_to.window(browser.window_handles[0])
browser.get('https://python.org')
time.sleep(3)
browser.close()

代码摘抄之《Python 3网络爬虫开发实战》

最后

以上就是狂野苗条为你收集整理的Python 3网络爬虫之Selenium用法的全部内容,希望文章能够帮你解决Python 3网络爬虫之Selenium用法所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(50)

评论列表共有 0 条评论

立即
投稿
返回
顶部