概述
import requests
from bs4 import BeautifulSoup
import time
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98
Safari/537.36'}
for i in range(1,11):
link = 'https://beijing.anjuke.com/sale/p' + str(i)
r = requests.get(link, headers = headers)
print ('现在爬取的是第', i, '页')
soup = BeautifulSoup(r.text, 'lxml')
house_list = soup.find_all('li', class_="list-item")
for house in house_list:
name = house.find('div', class_ ='house- title').a.text.strip()
price = house.find('span', class_='price det').text.strip()
price_area = house.find('span', class_='unit price').text.strip()
no_room = house.find('div', class_='details item').span.text
area = house.find('div', class_='details item').contents[3].text
floor = house.find('div', class_='details item').contents[5].text
year = house.find('div', class_='details item').contents[7].text
broker = house.find('span', class_='brokername').text
broker = broker[1:]
address = house.find('span', class_='comm address').text.strip()
address = address.replace('xa0xa0n ', ' ')
tag_list = house.find_all('span', class_='item-tags')
tags = [i.text for i in tag_list]
print (name, price, price_area, no_room, area, floor, year, broker, address, tags)
time.sleep(5)
最后
以上就是危机小熊猫为你收集整理的beautifulsoup实践:房屋价格数据的全部内容,希望文章能够帮你解决beautifulsoup实践:房屋价格数据所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复