1.验证手机号码(第一位是1,第二位是[34578]中一位,其余位数字即可)
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13import re text = '1357032****' # 敏感信息,自行定义 if text.__len__() != 11: print("phone number'length is error!") else: try: ret = re.match('1[34578]d{9}', text) print(ret.group()) except AttributeError: print('匹配到非数字字符', text) else: print('正确手机号码:', ret.group())
2.验证邮箱
复制代码
1
2
3
4
5text = '594398***@163.com' # 敏感信息,自行定义 ret =re.match('w+@[a-z0-9]+.[a-z]+', text) # @的前面可以是字母数字下划线,所以用w print(ret.group())
3.验证URL
复制代码
1
2
3
4
5text = 'https://www.runoob.com/python/python-exceptions.html' ret = re.match('(http|https|ftp)://[^s]+', text) print(ret.group())
4.验证身份证:
复制代码
1
2
3
4
5text = '44262319980425***X' # 敏感信息,自行定义 ret = re.match('d{17}[dxX]', text) print(ret.group())
5.综合实战(古诗文爬虫)
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65import re import requests def prase_page(url): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' } resp = requests.get(url, headers=headers) text = resp.text titles = re.findall(r'<div class="sons">.*?<b>(.*?)</b>', text, re.DOTALL) dynasties = re.findall(r'<p class="source">.*?<a.*?>(.*?)</a>.*?<span>', text, re.DOTALL) authors = re.findall(r'<p class="source">.*?</span>.*?<a.*?>(.*?)</a>', text, re.DOTALL) contents_tags = re.findall(r'<div class="contson".*?>(.*?)</div>', text, re.DOTALL) contents = [] for content in contents_tags: contents_temp = re.sub('<.*?>', '', content) contents.append(contents_temp.strip()) #方式一二三,自行打开/注释即可 # 方式一:默认匹配每项输出作者,朝代,内容 # for i in range(len(contents)): # print('诗题:', titles[i]) # print('作者,朝代:', dynasties[i], authors[i]) # print(contents[i]) # print('#' * 50) # 方式二:各个项目进行组合 # test = [] # for i in range(len(authors)): # value = { # 'title': titles[i], # 'dynasty': dynasties[i], # 'author': authors[i], # 'content':contents[i] # } # test.append(value) # for i in test: # print(i) # print('#' * 50) # 方式三:zip匹配索引相同的各个项目 poems = [] for value in zip(titles, dynasties, authors, contents): title, dynasty, author, content2 = value poem = { 'title': title, 'dynasty': dynasty, 'author': author, 'content': content2 } poems.append(poem) for poem in poems: print(poem) print('#' * 80) def main(): for x in range(1,11): url = "https://www.gushiwen.org/default_%s.aspx" % x prase_page(url) if __name__ == '__main__': main()
最后
以上就是自信金毛最近收集整理的关于正则小案例(手机号、邮箱、URL、身份证、古诗文爬虫)的全部内容,更多相关正则小案例(手机号、邮箱、URL、身份证、古诗文爬虫)内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复