概述
初始化
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium
# 文件目录,绝对路径
INPUT_PATH = 'G:DCICData\'
# 文件读取行数
MAX_ROWS = 100000
读取数据
# 巡游车 GPS 数据
taxigps2019 = pd.read_csv(INPUT_PATH + 'taxiGps20190531.csv',
dtype = {
'DRIVING_DIRECTION': np.uint16,
'OPERATING_STATUS': np.uint8,
'LONGITUDE': np.float32,
'LATITUDE': np.float32,
'GPS_SPEED': np.float16
})
taxigps2019 = taxigps2019[taxigps2019.columns[::-1]] # 把列的顺序反过来
taxigps2019.sort_values(by=['CARNO','GPS_TIME'], inplace=True) # 根据车号以及GPS_TIME进行排序
taxigps2019.reset_index(inplace=True, drop=True)
数据可视化
- 24小时巡游车统计数量
taxigps2019['GPS_TIME'] = pd.to_datetime(taxigps2019['GPS_TIME'])
taxigps2019['GPS_TIME_hour'] = taxigps2019['GPS_TIME'].dt.hour
taxigps2019.groupby(['GPS_TIME_hour'])['CARNO'].nunique().plot() # 统计每个小时的巡游车数量
plt.ylabel('Car Count')
从图中可以看出,0时与12-15时的巡游车数量最少,其他时间的巡游车数量相近均在6600辆左右。
- 24小时巡游车平均GPS速度
taxigps2019['GPS_SPEED'] = np.clip(0, 150, taxigps2019['GPS_SPEED'])
taxigps2019.groupby(['GPS_TIME_hour'])['GPS_SPEED'].mean().plot() # 统计每个小时的GPS速度的平均值
taxigps2019[taxigps2019['GPS_SPEED'] != 0].groupby(['GPS_TIME_hour'])['GPS_SPEED'].mean().plot()
plt.legend(['Mean GPS Speed, contain 0', 'Mean GPS Speed, not contain 0'])
- 24小时巡游车平均运营车辆
import matplotlib.font_manager as fm
df = taxigps2019[taxigps2019['OPERATING_STATUS'] == 1]
df.groupby(['GPS_TIME_hour'])['CARNO'].nunique().plot()
df = taxigps2019[taxigps2019['OPERATING_STATUS'] == 8]
df.groupby(['GPS_TIME_hour'])['CARNO'].nunique().plot()
plt.legend(['STATUS 1', 'STATUS 8'])
- 某个时间巡游车位置分布
from folium import plugins
from folium.plugins import HeatMap
map_hooray = folium.Map(location=[24.482426, 118.157606], zoom_start=14)
HeatMap(taxigps2019[['LATITUDE', 'LONGITUDE']].iloc[:1000].values).add_to(map_hooray)
map_hooray
- 某个巡游车具体的路线
# 创建地图并绘制路线
m = folium.Map(location=[24.482426, 118.157606], zoom_start=12)
my_PolyLine=folium.PolyLine(locations=taxigps2019[taxigps2019['CARNO'] == '0006d282be70d06881a7513b69fcaa60'][['LATITUDE', 'LONGITUDE']].iloc[:50].values,weight=5)
m.add_child(my_PolyLine)
- 20190531-20190609 期间巡游车早上9点的平均速度变化
l_name = ['0531','0601','0602','0603','0604','0605','0606','0607','0608','0609']
l_mean_speed = []
for i in l_name:
f_name = 'taxiGps2019'+i+'.csv'
taxigps2019 = pd.read_csv(INPUT_PATH + f_name,usecols = [1,3,6],dtype = {'GPS_SPEED': np.float32})
taxigps2019['GPS_TIME'] = pd.to_datetime(taxigps2019['GPS_TIME']).dt.hour
mean_speed = taxigps2019[taxigps2019['GPS_TIME'] == 9].mean()
l_mean_speed.append(mean_speed.values[0])
del taxigps2019
plt.plot(l_name,l_mean_speed)
- 20190531-20190609 平均每辆巡游车运营时间(一天之内活跃的分钟个数)变化
l_name = ['0531','0601','0602','0603','0604','0605','0606','0607','0608','0609']
l_mean_run_time = []
for i in l_name:
f_name = 'taxiGps2019'+i+'.csv'
taxigps2019 = pd.read_csv(INPUT_PATH + f_name,usecols = [1,3,6],dtype = {'GPS_SPEED': np.float32})
taxigps2019['GPS_TIME'] = pd.to_datetime(taxigps2019['GPS_TIME'])
taxigps2019['GPS_TIME'] = taxigps2019['GPS_TIME'].dt.hour.astype(str) +taxigps2019['GPS_TIME'].dt.minute.astype(str)
mean_run_time = taxigps2019.groupby(['CARNO'])['GPS_TIME'].nunique().mean()
l_mean_run_time.append(mean_run_time)
del taxigps2019
plt.plot(l_name,l_mean_run_time)
最后
以上就是发嗲饼干为你收集整理的A城市巡游车与网约车运营特征对比分析—数据可视化初始化读取数据数据可视化的全部内容,希望文章能够帮你解决A城市巡游车与网约车运营特征对比分析—数据可视化初始化读取数据数据可视化所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复