python 获取时间序列中断开的时间,并进行插值处理

获取断开的时间

import time
from datetime import datetime, timedelta, date
import numpy as np
import pandas as pd
import math
from scipy import interpolate


dicts = [
    {
    
    'date_time': '2021-06-01', 'ecpm_tom': 13},
    {
    
    'date_time': '2021-06-02', 'ecpm_tom': 25},
    {
    
    'date_time': '2021-06-03', 'ecpm_tom': 9},
    {
    
    'date_time': '2021-06-04', 'ecpm_tom': 0.2},
    {
    
    'date_time': '2021-06-05', 'ecpm_tom': 0},
    {
    
    'date_time': '2021-06-11', 'ecpm_tom': 7.9},
    {
    
    'date_time': '2021-06-12', 'ecpm_tom': 3.7},
    {
    
    'date_time': '2021-06-13', 'ecpm_tom': 9.7},
    {
    
    'date_time': '2021-06-14', 'ecpm_tom': 0},
    {
    
    'date_time': '2021-06-18', 'ecpm_tom': 6},
]

data = pd.DataFrame(dicts)

xs = data['date_time'].values
ys = data['ecpm_tom'].values
insert_xs = []
# 获取断开的时间
for i in range(len(xs)):
    if i + 1 == len(xs):
        break
    t1 = int(time.mktime(time.strptime(xs[i], "%Y-%m-%d")))
    t2 = int(time.mktime(time.strptime(xs[i + 1], "%Y-%m-%d")))
    differ = (datetime.fromtimestamp(t2) - datetime.fromtimestamp(t1)).days
    while differ != 1:
        differ -= 1
        tmp = (datetime.fromtimestamp(t2) + timedelta(days=-differ)).strftime("%Y-%m-%d")
        insert_xs.append(tmp)

print(insert_xs)        

在这里插入图片描述

插值处理

def interpolation_data(x, y, kind):
    x, y = list(x), list(y)
    insert_x = []
    for i in range(len(x)):
        if i + 1 == len(x):
            break
        t1 = int(time.mktime(time.strptime(x[i], "%Y-%m-%d")))
        t2 = int(time.mktime(time.strptime(x[i + 1], "%Y-%m-%d")))
        differ = (datetime.fromtimestamp(t2) - datetime.fromtimestamp(t1)).days
        while differ != 1:
            differ -= 1
            tmp = (datetime.fromtimestamp(t2) + timedelta(days=-differ)).strftime("%Y-%m-%d")
            insert_x.append(tmp)
    
    # 等于0说明没有断开的时间
    if len(insert_x) == 0:
        return 0
    
    # 对断开的数据进行插值,并将原来补0的值替换
    newx = x + insert_x
    newx = sorted(newx)
    
    xdict = {
    
    }          # 插值后的时间x
    resx_dict = {
    
    }      # 存放插值的结果列表,key:时间,value:ecpm_yesterday
    x_list = []         # 原x转为对应数字
    x_i_list = []       # 待插值x转为对应数字
    j = 0
    for i in range(len(newx)):
        xdict[newx[i]] = i + 1
        if newx[i] in x:
            x_list.append(xdict[newx[i]])
            resx_dict[newx[i]] = y[j]
            j += 1
        elif newx[i] in insert_x:
            x_i_list.append(xdict[newx[i]])
    
    # 得到差值函数  linear: 线性插值  cubic: 三次样条插值
    Flinear = interpolate.interp1d(x_list, y, kind=kind)
    ynew = Flinear(x_i_list)
    ynew = np.array(ynew).tolist()
    ynew = [abs(round(xi, 4)) for xi in ynew]
    j = 0
    for i in x_i_list:
        k = [k for k, v in xdict.items() if v == i][0]
        resx_dict[k] = ynew[j]
        j += 1
    
    resx_dict = sorted(resx_dict.items(), key=lambda x: x[0], reverse=False)
    resx_dict = dict(resx_dict)
    print(resx_dict)
interpolation_data(xs, ys, kind='cubic')

在这里插入图片描述

插值还可参考:

https://blog.csdn.net/qq_42363032/article/details/117995960

三次样条插值可参考:

https://blog.csdn.net/qq_42363032/article/details/118017126

猜你喜欢

转载自blog.csdn.net/qq_42363032/article/details/118656862