#coding:utf-8
importosfrom bs4 importBeautifulSoup#jsp 路径
folderPath = "E:/whm/google/src_jsp"
for dirPath,dirNames,fileNames inos.walk(folderPath):for fileName infileNames:if fileName.endswith(".jsp"):
soup=BeautifulSoup(open(os.path.join(dirPath,fileName)),"html.parser")if(soup.header is notNone):
soup.header.extract()#属性选择器。。。只能选择出第一个符合规则的元素
if(soup.find(attrs={‘role‘:‘banner‘}) is notNone):
soup.find(attrs={‘role‘:‘banner‘}).extract()if(soup.find(attrs={‘class‘:"col-xs-3"}) is notNone):
soup.find(attrs={‘class‘:"col-xs-3"}).extract()
with open(os.path.join(dirPath,fileName),"w+") as file:#pretify()方法返回一个美化过的html 字符串 encode(‘utf-8‘)指定编码--
file.write(soup.prettify(formatter=None).encode(‘utf-8‘))
最后
以上就是甜蜜服饰最近收集整理的关于python is beautiful_python BeautifulSoup基本用法的全部内容,更多相关python内容请搜索靠谱客的其他文章。
发表评论 取消回复