使用scipy.interpolate对数据进行插值,但是在缺失点出现在极值附近时效果并不好

import numpy as np
import math
import matplotlib.pyplot as plt
import pandas as pd
import datetime 
from scipy import interpolate
from pandas import DataFrame,Series
#num_pi为要产生几个π的sin数据,num_ex为异常点的个数,num_gap为段缺失数据的个数,num_bk为单个缺失值的个数
def test_data_gen(num_pi,num_ex,num_gap,num_bk):
    if (num_pi>0) :
        num_point=72*num_pi
        x=np.linspace(0,np.pi*num_pi,num_point)
        signal1=[(math.sin(i)+1) for i in x] #产生测试用的num_pi个sin数据
        noise=0.2*(np.random.rand(num_point)-0.5)
        signal1=signal1+noise#在sin数据上添加噪声
    else:
        print("Please input valid num_pi")
        return

    if (num_ex>0) :
        #随机添加异常值
        point_ex=[]
        for i in range(num_ex):
            point_ex.append(np.random.randint(0,len(signal1))) #异常值的位置
        for _ in point_ex:
            signal1[_]=signal1[_]*1.8
    else:
        pass
    if (num_gap>0) :    
        #随机添加段数据缺失
        longth_gap=np.random.randint(15)+5 #缺口大小5~20

        point_gap=[]   #缺口的位置
        for i in range(num_gap):
            point_gap.append(np.random.randint(num_point-20))

        for i in point_gap:
            for j in range(longth_gap):  
                signal1[i+j]=None
    else:
        pass
    if (num_bk>0) :        
        #随机添加单点缺失值
        point_break=[]
        for i in range(num_bk):
            point_break.append(np.random.randint(num_point))        
        for _ in point_break:
            signal1[_]=None
    else:
        pass
    #产生时间序列,每隔5分钟一个点
    date_need=[]
    start_dt = datetime.datetime(2017, 1, 1) 
    interval = datetime.timedelta(seconds=300) 
    for i in range(num_point): 
        date_need.append(start_dt + interval * i)

    df = DataFrame(signal1,index = date_need[0:num_point])
    df.to_excel('data_test.xlsx')        
    plt.figure(figsize=(10,5))
    plt.plot(signal1)
    plt.show()

    return signal1
test_data=test_data_gen(8,10,5,15)

png这里写图片描述

”’
若要产生不同时间起始点的序列,在后面加上自己想设定的时、分、秒
start_dt = datetime.datetime(2017, 1, 1, hour ,min ,second)
要模拟产生不同采样时间间隔序列,设置timedelta的值即可
interval = datetime.timedelta(seconds=300)
”’

def data_interpolate(array):

    data20 =array    
    test_nan=np.isnan(np.array(data20))
    a2=test_nan.tolist()    
    if a2.index(0) != 0:    #对原始数据data20做首插值
        data20[:a2.index(0)]=[data20[a2.index(0)]]*a2.index(0)

    data20_reverse=list(reversed(data20))
    test_nan2=np.isnan(np.array(data20_reverse))
    a3=test_nan2.tolist()
    if a3.index(0) != 0:#对原始数据做尾插值
        data20_reverse[:a3.index(0)]=[data20_reverse[a3.index(0)]]*a3.index(0)

    data20_back=list(reversed(data20_reverse))

    #样条曲线插值
    ts1 = Series(data20_back) 
    data03=ts1.dropna()
    x_new=[i for i in range (len(ts1))]
    #x_new=np.linspace(0, len(ts1), 4*len(ts1))
    tck = interpolate.splrep(data03.index, data03)      
    y_bspline = interpolate.splev(x_new, tck)
    #画图显示
    fig=plt.figure(figsize=(10,8))  
    ax1 = fig.add_subplot(211)  
    ax1.plot(data20)  
    ax2 = fig.add_subplot(212)  
    ax2.plot(y_bspline)  
    plt.show()

    return y_bspline
data1=data_interpolate(test_data)

png这里写图片描述

猜你喜欢

转载自blog.csdn.net/elite666/article/details/80636754