计算公交车出行OD的整体思路如下:
基于公交车出行轨迹数据、设备线路对照表和乘客刷卡数据表。
公交车轨迹原始数据:公交车车牌号、定位时间和经纬度等信息,
乘客的刷卡数据:卡号、刷卡时间和交易终端号等信息。
公交车乘客出行OD提取规则:根据“聚类中心-站点”两阶段预测模型,基于乘客公共交通出行路径选择具有规律性及上车站点和前一下车站点通常在相对邻近空间域的假设,对出行站点进行空间聚类构建乘客出行链,基于一种部分匹配预测算法的变阶马尔科夫模型对出行链路进行预测,进而提取出公交车乘客的完整出行OD。
数据处理部分请参考:
python_根据规土委乘客刷卡数据_找出用户的上车、下车站点经纬度
# -*- coding: utf-8
import os
import pandas as pd
from utils.calculateDistance import calculator_gid
from utils.read_write import writeOneCSV
os.chdir(r'D:\data\\')
# “OD_-00.txt
# 文本中网格编号起点,网格编号终点,出行OD量对应的45581009024, 45581009024, 3 ”
def on_out_bus(name,group):
length = group.shape[0]
for index in range(0, length):
on_gid_list = []
if group.iat[index, 2]:
if group.iat[index, 3]:
lon, lat = float(group.iat[index, 2]), float(group.iat[index, 3])
gid = calculator_gid(lon, lat)
up_station_day = group.iat[index, 1][0:10]
up_station_time = group.iat[index, 1][11:13]
on_gid_list.append(name)
on_gid_list.append(gid)
if length > index + 1:
lon, lat = float(group.iat[index+1, 2]), float(group.iat[index+1, 3])
gid = calculator_gid(lon, lat)
on_gid_list.append(gid)
elif length == 1:
pass
else:
# 如果是最后一条就链接到第一条
lon, lat = float(group.iat[0, 2]), float(group.iat[0, 3])
gid1 = calculator_gid(lon, lat)
if gid == gid1:
pass
else:
on_gid_list.append(gid1)
writeOneCSV(on_gid_list, on_dir+'OD_'+ up_station_day+'-'+up_station_time + '.csv')
def get_bus_station():
for name,group in grouped_upstation:
on_out_bus(name,group)
if __name__ == '__main__':
# FID,Id,XZQDM,XZQMC,lsq,Longitude,Latitude,GID
rec_file_250 = 'deg.txt'
nets = pd.read_table(rec_file_250, sep=',', encoding='gbk')
# bus_taxi = 'all_card_up_stations_del.csv'
# bus_taxi = 'up_stations_5_add.csv'
# bus_taxi = 'add_up_stations.csv'
src = 'up_stations_5.csv'
# bus_taxi = 'up_stations_5_.csv'
# bus_taxi = 'up_stations_5_del.csv'
on_dir = 'D:\data\\'
all_upstation = pd.read_csv(src, engine='python',usecols=[0, 2,3, 4])
colum = ['CARDID', 'TRADEDATE', 'longitude', 'latitude']
all_upstation.columns = colum
all_upstation = all_upstation.dropna(subset=['longitude'])
all_upstation = all_upstation.dropna(subset=['latitude'])
# all_upstation = all_upstation.loc[~all_upstation.where(all_upstation['CARDID'] > '330975418').any(axis=1)]
# all_upstation = all_upstation.loc[~all_upstation.where(all_upstation['CARDID'] < '328684975').any(axis=1)]
grouped_upstation = all_upstation.groupby("CARDID")
get_bus_station()
如需数据示例和分析结果请私聊我。。。