def fix_data(engine, tb_name, stnm, stcd):
df = pd.read_sql(tb_name, engine)
df['tm'] = pd.to_datetime(df['tm']).dt.tz_localize('Asia/Shanghai')
print(df['tm'].shape[0]) # 63653
# 起止时间
_start = df['tm'].loc[df.index[0]]
_end = df['tm'].loc[df.index[-1]]
full_range = pd.date_range(start=_start, end=_end, freq='60min', tz='Asia/Shanghai')
print(full_range.shape[0]) # 63745
# 设置原始数据索引为时间
df = df.drop_duplicates(subset=['tm'], keep='last') # 去除重复时间
df['stamp'] = df['tm'].apply(lambda x: int(x.timestamp())) # 获取原始数据时间戳
df = df.set_index('tm', drop=True) # 设置原始数据索引为时间
# 查找缺测值
df = df.reindex(full_range)
na_index = df.index[df.val.isna()] # 获取这缺测数据的行序号
df = df.fillna({
'val': 0, 'stnm': stnm, 'stcd': stcd}) # 填充缺测值
df = df.reset_index(names='tm') # 保留时间列,重新设置索引
scale = 60 * 60 # 原始数据精度为逐小时
df['stamp'] = df['tm'].apply(lambda x: int(x.timestamp()))
df['ID'] = df['stamp'].apply(lambda x: int((int(x) - 1447344000) / scale) + 1)
df = df.set_index('ID', drop=True)
print(df)
df.to_sql(tb_name, engine, if_exists='fail', index=True)
【Python】pandas读取sqlite,并处理缺测值
猜你喜欢
转载自blog.csdn.net/qq_25262697/article/details/131470850
今日推荐
周排行