python实现二维函数高次拟合

在参加“数据挖掘”比赛中遇到了关于函数高次拟合的问题，然后就整理了一下源码，以便后期的学习与改进。

在本次“数据挖掘”比赛中感觉收获最大的还是对于神经网络的认识，在接近一周的时间里，研究了进40种神经网络模型，虽然在持续一周的挖掘比赛把自己折磨的惨不忍睹，但是收获颇丰。现在想想也挺欣慰自己在这段时间里接受新知识的能力。关于神经网络方面的理解会在后续博文中补充（刚提交完论文，还没来得及整理），先分享一下高次拟合方面的知识。

# coding=utf-8
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import csv
from scipy.stats import norm
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model

''''' 数据导入 '''
def loadDataSet(fileName):
    dataMat = []
    labelMat = []
    csvfile = file(fileName, 'rb')
    reader = csv.reader(csvfile)
    b = 0
    for line in reader:
        if line[50] is '':
            b += 1
        else:
            dataMat.append(float(line[41])/100*20+30)
            labelMat.append(float(line[25])*100)


    csvfile.close()
    print "absence time number: %d" % b
    return dataMat,labelMat

xArr,yArr = loadDataSet('data.csv')
x = np.array(xArr)
y = np.array(yArr)
# x = np.arange(0, 1, 0.002)
# y = norm.rvs(0, size=500, scale=0.1)
# y = y + x ** 2

def rmse(y_test, y):
    return sp.sqrt(sp.mean((y_test - y) ** 2))

def R2(y_test, y_true):
    return 1 - ((y_test - y_true) ** 2).sum() / ((y_true - y_true.mean()) ** 2).sum()

def R22(y_test, y_true):
    y_mean = np.array(y_true)
    y_mean[:] = y_mean.mean()
    return 1 - rmse(y_test, y_true) / rmse(y_mean, y_true)


plt.scatter(x, y, s=5)
#分别进行1,2,3,6次拟合
degree = [1, 2,3, 6]
y_test = []
y_test = np.array(y_test)

for d in degree:
    #普通
    # clf = Pipeline([('poly', PolynomialFeatures(degree=d)),
    #                 ('linear', LinearRegression(fit_intercept=False))])
    # clf.fit(x[:, np.newaxis], y)

    # 岭回归
    clf = Pipeline([('poly', PolynomialFeatures(degree=d)),
                    ('linear', linear_model.Ridge())])
    clf.fit(x[:, np.newaxis], y)
    y_test = clf.predict(x[:, np.newaxis])

    print('多项式参数%s' %clf.named_steps['linear'].coef_)
    print('rmse=%.2f, R2=%.2f, R22=%.2f, clf.score=%.2f' %
          (rmse(y_test, y),
           R2(y_test, y),
           R22(y_test, y),
           clf.score(x[:, np.newaxis], y)))

    plt.plot(x, y_test, linewidth=2)

plt.grid()
plt.legend(['1', '2','3', '6'], loc='upper left')
plt.show()

python实现二维函数高次拟合

猜你喜欢