解决selenium下被反扒的情况
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139# -*- encoding:utf-8 -*- import time import requests from selenium.webdriver.common.by import By def get_data(cookies): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36", # "Host": "www.iwencai.com", # 'Upgrade-Insecure-Requests': '1', 'Accept-Language': 'zh-CN,zh;q=0.9', # 'referer': 'https://www.iwencai.com/', "Cookie": f"cid={cookies['cid']}; " f"ComputerID={cookies['ComputerID']}; " "ver_mark=c; " "other_uid=Ths_iwencai_Xuangu_9fnth9iibkd8zrtya1yj4ll48wx683s8; " f"ta_random_userid={cookies['ta_random_userid']}; " "WafStatus=0; " f"PHPSESSID={cookies['PHPSESSID']}; " f"v={cookies['v']}", "hexin-v": f"{cookies['v']}" } print("开始爬取数据".center(30,"*")) print("请求头为:",headers) url = 'http://www.iwencai.com/stockpick/load-data?typed=0&preParams=&ts=1&f=1&qs=result_original&selfsectsn=&querytype=stock&searchfilter=&tid=stockpick&' 'w=股价下跌,超大单净流入大于1000万元,大单净流入大于1000万元,市值小于400亿,非ST,非创业板,非科创板,股价大于10元' res = requests.get(url, headers=headers) print("res为:", res) if res.status_code != 200: print(res.text) return result_data = res.json() print('爬取数据的结果为:',result_data) return result_data['data']['result']['result'] def handle_data(result_data): """ 处理数据 :return: """ result_list_data = [] for one_data in result_data: result_dict_data = {} # 股票代码 result_dict_data['stock_code'] = one_data[0] # 股票名称 result_dict_data['stock_name'] = one_data[1] # 涨跌幅 result_dict_data['stock_upanddown'] = one_data[2] # 超大单流入 result_dict_data['stock_extea_large_flow'] = one_data[3] # 大单流入 result_dict_data['stock_large_flow'] = one_data[4] # 股票市值 result_dict_data['stock_market_value'] = one_data[5] # 股票价格 result_dict_data['stock_price'] = one_data[7] result_list_data.append(result_dict_data) print("处理后的结果数据为:",result_list_data) return result_list_data def handle_business(cookies): """ 处理业务 :return: """ # 爬取数据 result_data = get_data(cookies) if not result_data: return # 处理数据为字典的列表 result_list_data = handle_data(result_data) # print(result_list_data) # 业务统计,并发送邮件 for one_data in result_list_data: # 涨跌幅小于5%的,市值小于200亿,特大单净流入大于2000万的 if abs(float(one_data['stock_upanddown'])) > 2 and float(one_data['stock_market_value']) < 20000000000 and float(one_data['stock_extea_large_flow']) >= 20000000: print('满足跌幅在5%以下的,市值小于200亿,特大单净流入大于2000万的的股票为:',one_data) # 发送邮件通知 from selenium import webdriver from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import os,subprocess,threading def open_browser(): subprocess.call( 'C:Program FilesGoogleChromeApplicationchrome.exe --remote-debugging-port=9222 --user-data-dir="C:selenumAutomationProfile"', timeout=15) print("打开浏览器结束了") th = threading.Thread(target=open_browser) th.start() options = webdriver.ChromeOptions() options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") # 不加载图片 # options.add_argument('blink-settings=imagesEnabled=false') # 不显示图形界面 # options.add_argument('--headless') driver = webdriver.Chrome(executable_path=r'D:PythonPython38chromedriver.exe',options=options) driver.get('https://www.iwencai.com') # # WebDriverWait(driver,timeout=10).until(EC.presence_of_element_located((By.ID,"auto"))) # driver.find_element(By.ID,"auto").send_keys('跌幅在5%以下的,市值小于200亿,特大单净流入大于2000万') # driver.find_element(By.ID,'qs-enter').click() cookies = driver.get_cookies() print(cookies) res_cookies = {} for one_cookie in cookies: if one_cookie['name'] == 'v': res_cookies['v'] = one_cookie['value'] elif one_cookie['name'] == 'ta_random_userid': res_cookies['ta_random_userid'] = one_cookie['value'] elif one_cookie['name'] == 'cid': res_cookies['cid'] = one_cookie['value'] elif one_cookie['name'] == 'ComputerID': res_cookies['ComputerID'] = one_cookie['value'] elif one_cookie['name'] == 'PHPSESSID': res_cookies['PHPSESSID'] = one_cookie['value'] time.sleep(3) handle_business(res_cookies) driver.close() print("关闭页签")
最后
以上就是痴情鸡翅最近收集整理的关于python3 windows下的几乎万能爬虫方法的全部内容,更多相关python3内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复