我是靠谱客的博主 暴躁溪流,最近开发中收集的这篇文章主要介绍A城市巡游车与网约车运营特征对比分析—地图及订单数据统计初始化地图数据统计:订单数据统计,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

初始化

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import folium 

# 文件目录,绝对路径
INPUT_PATH = 'G:DCICData\'

# 文件读取行数
MAX_ROWS = 100000 

地图数据统计:

taxigps2019 = pd.read_csv(INPUT_PATH + 'taxiGps20190531.csv',
                         dtype = {
                             'DRIVING_DIRECTION': np.uint16,
                             'OPERATING_STATUS': np.uint8,
                             'LONGITUDE': np.float32,
                             'LATITUDE': np.float32,
                             'GPS_SPEED': np.float32 
                         })

taxigps2019 = taxigps2019[taxigps2019.columns[::-1]]
taxigps2019['GPS_TIME'] = pd.to_datetime(taxigps2019['GPS_TIME'])
taxigps2019.sort_values(by=['CARNO','GPS_TIME'], inplace=True)
taxigps2019.reset_index(inplace=True, drop=True)
taxigps2019.head() 
CARNOLATITUDELONGITUDEGPS_TIMEDRIVING_DIRECTIONGPS_SPEEDOPERATING_STATUS
00006d282be70d06881a7513b69fcaa6024.479755118.1469352019-05-31 01:31:202920.01
10006d282be70d06881a7513b69fcaa6024.479755118.1469352019-05-31 01:31:352920.01
20006d282be70d06881a7513b69fcaa6024.479755118.1469352019-05-31 01:31:502920.01
30006d282be70d06881a7513b69fcaa6024.479755118.1469352019-05-31 01:32:052920.01
40006d282be70d06881a7513b69fcaa6024.479755118.1469352019-05-31 01:32:202920.01

统计每辆巡游车最早、最晚出现的记录:

df_first = taxigps2019.groupby(['CARNO']).first()
print('最早出现的记录:')
df_first.head() 
最早出现的记录:
LATITUDELONGITUDEGPS_TIMEDRIVING_DIRECTIONGPS_SPEEDOPERATING_STATUS
CARNO
0006d282be70d06881a7513b69fcaa6024.479755118.1469352019-05-31 01:31:202920.0000001
000e8886a7b27ca761e34d59b1dee35c24.550379118.1030122019-05-31 01:31:3814872.1999976
001df76bfa67259259f596c6dd353e6a24.499088118.1411822019-05-31 01:31:0927837.0000006
001e3756542dc796b402dfd1b56fd4ec24.471125118.1055602019-05-31 01:31:1434635.0999986
002b23a3762ea245f18cc896a55579d224.544592118.1024632019-05-31 01:31:211990.3000001
df_last = taxigps2019.groupby(['CARNO']).last()
print('最晚出现的记录:')
df_last.head() 
最晚出现的记录:
LATITUDELONGITUDEGPS_TIMEDRIVING_DIRECTIONGPS_SPEEDOPERATING_STATUS
CARNO
0006d282be70d06881a7513b69fcaa6024.498701118.0301822019-05-31 23:59:585444.4000021
000e8886a7b27ca761e34d59b1dee35c24.538876118.1298902019-05-31 23:59:492320.0000001
001df76bfa67259259f596c6dd353e6a24.488588118.1571962019-05-31 23:59:4725230.1000006
001e3756542dc796b402dfd1b56fd4ec24.524464118.1470952019-05-31 23:59:498012.9000006
002b23a3762ea245f18cc896a55579d224.497768118.1803742019-05-31 23:59:4500.0000001

统计每辆巡游车最早最晚的时间间隔:

df_first = taxigps2019.groupby(['CARNO']).first()
df_last = taxigps2019.groupby(['CARNO']).last()

df = df_last['GPS_TIME'] - df_first['GPS_TIME']
df = df.reset_index()

df['GPS_HOUR'] = df['GPS_TIME'].dt.seconds / 3600 # 秒转换为小时
df['GPS_HOUR'] = df['GPS_HOUR'].astype(int)
df.set_index('CARNO', inplace=True)
df.head() 
GPS_TIMEGPS_HOUR
CARNO
0006d282be70d06881a7513b69fcaa600 days 22:28:3822
000e8886a7b27ca761e34d59b1dee35c0 days 22:28:1122
001df76bfa67259259f596c6dd353e6a0 days 22:28:3822
001e3756542dc796b402dfd1b56fd4ec0 days 22:28:3522
002b23a3762ea245f18cc896a55579d20 days 22:28:2422

统计每辆巡游车的经纬度和速度极差:

taxigps2019 = taxigps2019[taxigps2019['LATITUDE'] != 0]
taxigps2019 = taxigps2019[taxigps2019['LONGITUDE'] != 0]

df['LATITUDE_PTP'] = taxigps2019.groupby(['CARNO'])['LATITUDE'].apply(np.ptp)
df['LONGITUDE_PTP'] = taxigps2019.groupby(['CARNO'])['LONGITUDE'].apply(np.ptp)
df['GPS_SPEED_PTP'] = taxigps2019.groupby(['CARNO'])['GPS_SPEED'].apply(np.ptp)

df.head()
GPS_TIMEGPS_HOURLATITUDE_PTPLONGITUDE_PTPGPS_SPEED_PTP
CARNO
0006d282be70d06881a7513b69fcaa600 days 22:28:38220.0824240.16724479.599998
000e8886a7b27ca761e34d59b1dee35c0 days 22:28:11220.2180860.22843990.699997
001df76bfa67259259f596c6dd353e6a0 days 22:28:38220.0892410.11176374.099998
001e3756542dc796b402dfd1b56fd4ec0 days 22:28:35220.2089610.17615598.099998
002b23a3762ea245f18cc896a55579d20 days 22:28:24220.1137220.11389278.099998
df[(df['LATITUDE_PTP'] == 0)&(df['LONGITUDE_PTP'] == 0)&(df['GPS_SPEED_PTP'] == 0)].count()
GPS_TIME         39
GPS_HOUR         39
LATITUDE_PTP     39
LONGITUDE_PTP    39
GPS_SPEED_PTP    39
dtype: int64

通过统计经纬度以及GPS速度的极差是否为0,可以剔除全天GPS异常的车辆。

计算每辆巡游车的平均经纬度,并绘制热力图:

df['LONGITUDE_MEAN'] = taxigps2019.groupby(['CARNO'])['LONGITUDE'].mean()
df['LATITUDE_MEAN'] = taxigps2019.groupby(['CARNO'])['LATITUDE'].mean()
df = df.dropna()
from folium import plugins
from folium.plugins import HeatMap

map_hooray = folium.Map(location=[24.482426, 118.157606], zoom_start=14)
HeatMap(df[['LATITUDE_MEAN', 'LONGITUDE_MEAN']].iloc[:1000].values).add_to(map_hooray)
map_hooray  

在这里插入图片描述

对比分析2019,2020年端午节前一天巡游车平均速度的变化:

# 2019年端午节6月7日,2020年端午节6月25日
taxigps2019 = pd.read_csv(INPUT_PATH + 'taxiGps20190606.csv',
                         dtype = {
                             'DRIVING_DIRECTION': np.uint16,
                             'OPERATING_STATUS': np.uint8,
                             'LONGITUDE': np.float16,
                             'LATITUDE': np.float16,
                             'GPS_SPEED': np.float32 
                         })
dwq2019 = taxigps2019['GPS_SPEED'].mean()
del taxigps2019
taxigps2020 = pd.read_csv(INPUT_PATH + 'taxiGps20200624.csv',
                         dtype = {
                             'DRIVING_DIRECTION': np.uint16,
                             'OPERATING_STATUS': np.uint8,
                             'LONGITUDE': np.float16,
                             'LATITUDE': np.float16,
                             'GPS_SPEED': np.float32 
                         })
dwq2020 = taxigps2020['GPS_SPEED'].mean()
del taxigps2020
print(dw2019)
print(dw2020) 
243.19984
218.5537

对比分析2019年端午假期前、端午假期中和假期后,巡游车日平均速度变化:

l_name = ['0606','0607','0608','0609']
speed2019 = []
for i in l_name:
    taxigps2019 = pd.read_csv(INPUT_PATH + 'taxiGps2019'+i+'.csv',
                         dtype = {
                             'DRIVING_DIRECTION': np.uint16,
                             'OPERATING_STATUS': np.uint8,
                             'LONGITUDE': np.float16,
                             'LATITUDE': np.float16,
                             'GPS_SPEED': np.float32 
                         })
    speed2019.append(taxigps2019['GPS_SPEED'].mean())
plt.plot(l_name,speed2019) 

在这里插入图片描述

订单数据统计

del taxigps2019
taxiorder2019 = pd.read_csv(INPUT_PATH + 'taxiOrder20190531.csv',
                           dtype = {
                               'GETON_LONGITUDE': np.float32,
                               'GETON_LATITUDE': np.float32,
                               'GETOFF_LONGITUDE': np.float32,
                               'GETOFF_LATITUDE': np.float32,
                               'PASS_MILE': np.float16,
                               'NOPASS_MILE': np.float16,
                               'WAITING_TIME': np.float32
                           })
taxiorder2019 = taxiorder2019.rename(columns={'CAR_NO':'CARNO'})
taxiorder2019.sort_values(by=['CARNO','GETON_DATE'], inplace=True) # 根据车号和上车时间进行排序
taxiorder2019.reset_index(inplace=True, drop=True)
taxiorder2019.head()
CARNOGETON_DATEGETON_LONGITUDEGETON_LATITUDEGETOFF_DATEGETOFF_LONGITUDEGETOFF_LATITUDEPASS_MILENOPASS_MILEWAITING_TIME
00006d282be70d06881a7513b69fcaa602019-05-31 00:08:00118.15506024.5060352019-05-31 00:13:00118.17666624.5098953.0996091.29980562.0
10006d282be70d06881a7513b69fcaa602019-05-31 00:27:00118.15470924.4887732019-05-31 00:32:00118.16300224.5047612.9003914.10156234.0
20006d282be70d06881a7513b69fcaa602019-05-31 00:44:00118.15508324.5060902019-05-31 00:55:00118.15660924.4508097.6015622.90039171.0
30006d282be70d06881a7513b69fcaa602019-05-31 01:57:00118.11774424.4792752019-05-31 02:01:00118.12435224.4720861.7998058.70312517.0
40006d282be70d06881a7513b69fcaa602019-05-31 02:14:00118.11769124.4824372019-05-31 02:29:00118.15497624.4886114.6992195.101562425.0

上下客点分布密度:

  • 上车经纬度分布热力图:
df = taxiorder2019.groupby(['CARNO'])['GETON_LONGITUDE'].mean() 
df = df.reset_index()
df.set_index('CARNO', inplace=True)
df['GETON_LATITUDE'] = taxiorder2019.groupby(['CARNO'])['GETON_LATITUDE'].mean()
df.head() 
GETON_LONGITUDEGETON_LATITUDE
CARNO
0006d282be70d06881a7513b69fcaa60118.11631024.496180
000e8886a7b27ca761e34d59b1dee35c118.06875624.568338
001df76bfa67259259f596c6dd353e6a102.00956021.137037
001e3756542dc796b402dfd1b56fd4ec118.12536624.499846
002b23a3762ea245f18cc896a55579d2100.29605120.792345
map_hooray = folium.Map(location=[24.482426, 118.157606], zoom_start=14)
HeatMap(df[['GETON_LATITUDE', 'GETON_LONGITUDE']].values).add_to(map_hooray)
map_hooray  

在这里插入图片描述

  • 下车经纬度分布热力图:
df['GETOFF_LONGITUDE'] = taxiorder2019.groupby(['CARNO'])['GETOFF_LONGITUDE'].mean()
df['GETOFF_LATITUDE'] = taxiorder2019.groupby(['CARNO'])['GETOFF_LATITUDE'].mean()
df.head() 
GETON_LONGITUDEGETON_LATITUDEGETOFF_LONGITUDEGETOFF_LATITUDE
CARNO
0006d282be70d06881a7513b69fcaa60118.11631024.496180118.11814124.496452
000e8886a7b27ca761e34d59b1dee35c118.06875624.568338113.53883423.626139
001df76bfa67259259f596c6dd353e6a102.00956021.137037118.11827124.478399
001e3756542dc796b402dfd1b56fd4ec118.12536624.499846118.12139124.505577
002b23a3762ea245f18cc896a55579d2100.29605120.792345100.29344220.792120
map_hooray = folium.Map(location=[24.482426, 118.157606], zoom_start=14)
HeatMap(df[['GETOFF_LATITUDE', 'GETOFF_LONGITUDE']].values).add_to(map_hooray)
map_hooray  

在这里插入图片描述

巡游车在(118.155060±0.01)、(24.506035±0.01)方位内打车的平均等待时间:

taxiorder2019[(118.155060-0.01<taxiorder2019['GETON_LONGITUDE'])&(taxiorder2019['GETON_LONGITUDE']<118.155060+0.01)&
              (24.506035-0.01<taxiorder2019['GETON_LATITUDE'])&(taxiorder2019['GETON_LATITUDE']<24.506035+0.01)]['WAITING_TIME'].mean()
276.4932

统计出租订单平均等待时间超过600秒的经纬度:

f = lambda x:str(round(x,3))
taxiorder2019['GETON_POSITION'] = taxiorder2019['GETON_LONGITUDE'].apply(f)+','+taxiorder2019['GETON_LATITUDE'].apply(f)
df = taxiorder2019.groupby(['GETON_POSITION'])['WAITING_TIME'].mean()
df = df.reset_index()
df.set_index('GETON_POSITION', inplace=True) 
df = df[ taxiorder2019.groupby(['GETON_POSITION'])['WAITING_TIME'].mean() > 600]
df = df.reset_index() 
df['GETON_POSITION'].value_counts() 
118.147,24.456    1
118.158,24.492    1
117.997,24.615    1
118.158,24.488    1
118.122,24.621    1
                 ..
118.091,24.523    1
118.149,24.489    1
118.186,24.505    1
118.074,24.612    1
118.156,24.452    1
Name: GETON_POSITION, Length: 631, dtype: int64

统计20190531 - 20190609 期间出租订单经纬度上平均等待时间长的位置(且位置出现频率大于5):

l_name = ['0531','0601','0602','0603','0604','0605','0606','0607','0608','0609']
from collections import Counter
counter = Counter()
for i in l_name:
    taxiorder2019 = pd.read_csv(INPUT_PATH + 'taxiOrder2019'+i+'.csv',
                           dtype = {
                               'GETON_LONGITUDE': np.float32,
                               'GETON_LATITUDE': np.float32,
                               'GETOFF_LONGITUDE': np.float32,
                               'GETOFF_LATITUDE': np.float32,
                               'PASS_MILE': np.float16,
                               'NOPASS_MILE': np.float16,
                               'WAITING_TIME': np.float32
                           })
    taxiorder2019 = taxiorder2019.rename(columns={'CAR_NO':'CARNO'})
    taxiorder2019.sort_values(by=['CARNO','GETON_DATE'], inplace=True) # 根据车号和上车时间进行排序
    taxiorder2019.reset_index(inplace=True, drop=True)
    taxiorder2019['GETON_POSITION'] = taxiorder2019['GETON_LONGITUDE'].apply(f)+','+taxiorder2019['GETON_LATITUDE'].apply(f)
    df = taxiorder2019.groupby(['GETON_POSITION'])['WAITING_TIME'].mean()
    df = df.reset_index()
    df.set_index('GETON_POSITION', inplace=True) 
    df = df[ taxiorder2019.groupby(['GETON_POSITION'])['WAITING_TIME'].mean() > 600]
    df = df.reset_index() 
    counter.update(df['GETON_POSITION'].values) 
sorted(counter.items(),key=lambda x:x[1], reverse = True) 
[('0.0,0.0', 10),
 ('117.939,24.492', 8),
 ('118.13,24.486', 7),
 ('118.023,24.48', 6),
 ('118.109,24.431', 6),
 ('118.226,24.569', 6),
 ('118.019,24.482', 5),
 ('118.052,24.5', 5),
 ('118.166,24.481', 5),
 ('118.167,24.48', 5),
 ('118.228,24.568', 5),
 ('118.11,24.489', 5),
 ('118.036,24.491', 5),
 ('118.068,24.464', 5),
 ...]

对比2019年和2020年出租车端午节订单的平均等待时间,是如何变化的:

# 2019年端午节6月7日,2020年端午节6月25日
taxiorder2019 = pd.read_csv(INPUT_PATH + 'taxiOrder2019'+'0607'+'.csv',
                           dtype = {
                               'GETON_LONGITUDE': np.float32,
                               'GETON_LATITUDE': np.float32,
                               'GETOFF_LONGITUDE': np.float32,
                               'GETOFF_LATITUDE': np.float32,
                               'PASS_MILE': np.float16,
                               'NOPASS_MILE': np.float16,
                               'WAITING_TIME': np.float32
                           })
taxiorder2019 = taxiorder2019.rename(columns={'CAR_NO':'CARNO'})

taxiorder2020 = pd.read_csv(INPUT_PATH + 'taxiOrder2020'+'0625'+'.csv',
                           dtype = {
                               'GETON_LONGITUDE': np.float32,
                               'GETON_LATITUDE': np.float32,
                               'GETOFF_LONGITUDE': np.float32,
                               'GETOFF_LATITUDE': np.float32,
                               'PASS_MILE': np.float16,
                               'NOPASS_MILE': np.float16,
                               'WAITING_TIME': np.float32
                           })
taxiorder2020 = taxiorder2020.rename(columns={'CAR_NO':'CARNO'})

dw2019 = taxiorder2019['WAITING_TIME'].mean()
dw2020 = taxiorder2020['WAITING_TIME'].mean()
print(dw2019)
print(dw2020)
243.19984
218.5537

巡游车订单距离与时间的对比:

import matplotlib.animation as animation
import glob, gc

def barlist(n): 
    taxiorder2019 = pd.read_csv(paths[n], nrows=None,
                                   dtype = {
                                       'GETON_LONGITUDE': np.float32,
                                       'GETON_LATITUDE': np.float32,
                                       'GETOFF_LONGITUDE': np.float32,
                                       'GETOFF_LATITUDE': np.float32,
                                       'PASS_MILE': np.float16,
                                       'NOPASS_MILE': np.float16,
                                       'WAITING_TIME': np.float16
                                   })
    taxiorder2019['GETON_DATE'] = pd.to_datetime(taxiorder2019['GETON_DATE'])
    taxiorder2019['GETON_Hour'] = taxiorder2019['GETON_DATE'].dt.hour
    return taxiorder2019.groupby(['GETON_Hour'])['PASS_MILE'].mean().values # 不同上车时间的平均行驶距离

fig=plt.figure()

paths = glob.glob(INPUT_PATH +'taxiOrder20190*.csv')
paths.sort()
n = len(paths) #Number of frames
x = range(24)
barcollection = plt.bar(x,barlist(0))
plt.ylim(0,8)

def animate(i):
    y=barlist(i+1)
    for idx, b in enumerate(barcollection):
        b.set_height(y[idx])
    plt.ylim(0,8)
    plt.title(paths[i+1].split('/')[-1])
    plt.ylabel('PASS_MILE / KM')
    plt.xlabel('Hour')

anim=animation.FuncAnimation(fig,animate,repeat=False,blit=False,frames=n-1,
                             interval=500)

anim.save('order.gif', dpi=150)

在这里插入图片描述

巡游车空驶率对比:

for path in paths:
    taxiorder2019 = pd.read_csv(path, nrows=None,
                               dtype = {
                                   'GETON_LONGITUDE': np.float32,
                                   'GETON_LATITUDE': np.float32,
                                   'GETOFF_LONGITUDE': np.float32,
                                   'GETOFF_LATITUDE': np.float32,
                                   'PASS_MILE': np.float16,
                                   'NOPASS_MILE': np.float16,
                                   'WAITING_TIME': np.float16
                               })

    taxiorder2019 = taxiorder2019[['NOPASS_MILE', 'PASS_MILE']].dropna()
    taxiorder2019['NOPASS_Ratio'] = taxiorder2019['NOPASS_MILE'] / (taxiorder2019['NOPASS_MILE'] + taxiorder2019['PASS_MILE'])
    print(path, taxiorder2019['NOPASS_Ratio'].astype(np.float32).mean()) 
G:DCICDatataxiOrder20190531.csv 0.27126783
G:DCICDatataxiOrder20190601.csv 0.27297953
G:DCICDatataxiOrder20190602.csv 0.30302802
G:DCICDatataxiOrder20190603.csv 0.31049386
G:DCICDatataxiOrder20190604.csv 0.3039471
G:DCICDatataxiOrder20190605.csv 0.2933384
G:DCICDatataxiOrder20190606.csv 0.2547359
G:DCICDatataxiOrder20190607.csv 0.28453994
G:DCICDatataxiOrder20190608.csv 0.304996
G:DCICDatataxiOrder20190609.csv 0.3115026

最后

以上就是暴躁溪流为你收集整理的A城市巡游车与网约车运营特征对比分析—地图及订单数据统计初始化地图数据统计:订单数据统计的全部内容,希望文章能够帮你解决A城市巡游车与网约车运营特征对比分析—地图及订单数据统计初始化地图数据统计:订单数据统计所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(35)

评论列表共有 0 条评论

立即
投稿
返回
顶部