复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169# selenium基本实例 from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC brower = webdriver.Chrome() try: brower.get('http://www.baidu.com') # 请求网页 input=brower.find_element(By.ID,'kw') # 根据对应ID获取 input.send_keys('Python') input.send_keys(Keys.ENTER) wait = WebDriverWait(brower,10) wait.until(EC.presence_of_element_located((By.ID,'content_left'))) print(brower.current_url) print(brower.get_cookies()) print(brower.page_source) # 打印源代码 finally: # brower.close() pass # 查找节点 # 单个节点 ID ,CSS,XPATH from selenium import webdriver from selenium.webdriver.common.by import By browser = webdriver.Chrome() browser.get('https://www.taobao.com') # 单个节点 first=browser.find_element(By.ID,'q') second=browser.find_element(By.CSS_SELECTOR,'#q') third=browser.find_element(By.XPATH,'//*[@id="q"]') # 多个节点 four=browser.find_elements(By.CSS_SELECTOR,'.service-bd li') print(first,'n',second,'n',third,'nn',four) # <selenium.webdriver.remote.webelement.WebElement (session="23f60c7606e1f26b9148218fe48bfce4", element="2cfebc87-e9f5-44be-9a2f-39913bd5ab23")> # <selenium.webdriver.remote.webelement.WebElement (session="23f60c7606e1f26b9148218fe48bfce4", element="2cfebc87-e9f5-44be-9a2f-39913bd5ab23")> # <selenium.webdriver.remote.webelement.WebElement (session="23f60c7606e1f26b9148218fe48bfce4", element="2cfebc87-e9f5-44be-9a2f-39913bd5ab23")> # 节点交互 send_keys(),clear(),click() from selenium import webdriver from selenium.webdriver.common.by import By import time browser= webdriver.Chrome() browser.get('https://www.taobao.com') input = browser.find_element(By.ID,'q') input.send_keys('iPone13') time.sleep(1) input.clear() time.sleep(3) input.send_keys('iPad') button= browser.find_element(By.CLASS_NAME,'btn-search') button.click() # 动作链 from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver import ActionChains import time browser= webdriver.Chrome() url='https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' browser.get(url) browser.switch_to.frame('iframeResult') source = browser.find_element(By.CSS_SELECTOR,'#draggable') target = browser.find_element(By.CSS_SELECTOR,'#droppable') actions = ActionChains(browser) actions.drag_and_drop(source,target) actions.perform() # 执行js execute_script() from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.zhihu.com/explore') browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') browser.execute_script('alert("To Bottom")') # 获取属性 get_attribute() 文本text from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By browser = webdriver.Chrome() url = 'https://www.zhihu.com/explore' browser.get(url) logo = browser.find_element(By.ID,'root') # print(logo.text) # print(logo) # print('n') # print(logo.get_attribute('class')) #属性 print(logo.id) # id print(logo.location) # 位置 print(logo.tag_name) # 标签 print(logo.size) # 大小 browser.close() # 切换Frame import time from selenium import webdriver from selenium.common.exceptions import NoSuchFrameException from selenium.webdriver.common.by import By browser =webdriver.Chrome() url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' browser.get(url) browser.switch_to.frame('iframeResult') try: logo = browser.find_element(By.CLASS_NAME,'logo') except NoSuchFrameException: print('NO LOGO') browser.switch_to.parent_frame() logo = browser.find_element(By.CLASS_NAME,'logo') print(logo) print(logo.text) # 延时等待 from selenium import webdriver from selenium.webdriver.common.by import By browser= webdriver.Chrome() browser.implicitly_wait(10) # 隐式等待 browser.get('https://www.zhihu.com/explore') input=browser.find_element(By.CLASS_NAME,'zu-top-add-question') print(input) from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait # 显式等待 from selenium.webdriver.support import expected_conditions as EC browser= webdriver.Chrome() browser.get('https://www.taobao.com/') wait = WebDriverWait(browser,10) input =wait.until(EC.presence_of_element_located((By.ID,'q'))) button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'.btn-search'))) print(input,'n',button) # 前进或后退 import time from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.baidu.com/') browser.get('https://www.taobao.com/') browser.get('https://www.python.org/') browser.back() time.sleep(1) browser.forward() browser.close() # Cookies from selenium import webdriver browser =webdriver.Chrome() browser.get('https://www.zhihu.com/explore') print(browser.get_cookies()) browser.add_cookie({'name':'name','domain':'www.zhihu.com','value':'germey'}) print(browser.get_cookies()) browser.delete_all_cookies() print(browser.get_cookies()) browser.close() # [{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'JOID', 'path': '/', 'secure': False, 'value': 'WloWBkpXuqcwA0exIFvZv4H41Nw-Gu7sVmUkwXUO2JZlQDTJbPozpV4ATLYi23jHHkOk4SXADwKFCoiclQvPJtE='}, {'domain': '.zhihu.com', 'expiry': 1692516908, 'httpOnly': False, 'name': 'Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'KLBRSID', 'path': '/', 'secure': False, 'value': '76ae5fb4fba0f519d97e594f1cef9fab|1660980902|1660980900'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'osd', 'path': '/', 'secure': False, 'value': 'UVwQAk5cvKE0B0y3Jl_dtIf-0Ng1HOjoUm4ix3EK05BjRDDCavw3oVUGSrIm0H7BGkev5yPECwmDDIyYng3JItU='}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'fe594060-fef6-4fca-b59c-4cafe146b257'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': '46dc1123-1966-4906-b7fc-c7462df65abe'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'SESSIONID', 'path': '/', 'secure': False, 'value': 'GUMboiqSPh0SGzrMLFRjrdl34WhUBf3zpMD5lGohNvW'}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': 'AMDXsveebhWPTiudfRrzU69qi3k-JbQMQgo=|1660980900'}] # [{'domain': '.www.zhihu.com', 'httpOnly': False, 'name': 'name', 'path': '/', 'secure': True, 'value': 'germey'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'JOID', 'path': '/', 'secure': False, 'value': 'WloWBkpXuqcwA0exIFvZv4H41Nw-Gu7sVmUkwXUO2JZlQDTJbPozpV4ATLYi23jHHkOk4SXADwKFCoiclQvPJtE='}, {'domain': '.zhihu.com', 'expiry': 1692516908, 'httpOnly': False, 'name': 'Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49', 'path': '/', 'secure': False, 'value': '1660980908'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'KLBRSID', 'path': '/', 'secure': False, 'value': '76ae5fb4fba0f519d97e594f1cef9fab|1660980902|1660980900'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'osd', 'path': '/', 'secure': False, 'value': 'UVwQAk5cvKE0B0y3Jl_dtIf-0Ng1HOjoUm4ix3EK05BjRDDCavw3oVUGSrIm0H7BGkev5yPECwmDDIyYng3JItU='}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'fe594060-fef6-4fca-b59c-4cafe146b257'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': '46dc1123-1966-4906-b7fc-c7462df65abe'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': 'SESSIONID', 'path': '/', 'secure': False, 'value': 'GUMboiqSPh0SGzrMLFRjrdl34WhUBf3zpMD5lGohNvW'}, {'domain': '.zhihu.com', 'expiry': 1695540905, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': 'AMDXsveebhWPTiudfRrzU69qi3k-JbQMQgo=|1660980900'}] # [] # 选项卡管理 import time from selenium import webdriver browser = webdriver.Chrome() browser.get('https://www.baidu.com') browser.execute_script('window.open()') print(browser.window_handles) browser.switch_to.window(browser.window_handles[1]) browser.get('https://www.taobao.com') time.sleep(1) browser.switch_to.window(browser.window_handles[0]) browser.get('https://python.org') time.sleep(3) browser.close()
代码摘抄之《Python 3网络爬虫开发实战》
最后
以上就是狂野苗条最近收集整理的关于Python 3网络爬虫之Selenium用法的全部内容,更多相关Python内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复