python K近邻法KNeighborsClassifier,KNeighborsRegressor模型

运行环境:win10 64位 py 3.6 pycharm 2018.1.1
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors,datasets,cross_validation

def load_classification_data():
    digits = datasets.load_digits()
    X_train = digits.data
    y_train = digits.target
    return cross_validation.train_test_split(X_train,y_train,test_size=0.25,random_state=0,stratify=y_train)

def create_regression_data(n):
    X = 5 * np.random.rand(n,1)
    y = np.sin(X).ravel()
    y[::5] += 1 * (0.5 - np.random.rand(int(n/5)))
    return cross_validation.train_test_split(X,y,test_size=0.25,random_state=0)
#使用KneighborsClassifier分类
def test_KNeighborsClassifier(*data):
    X_train, X_test, y_train, y_test = data
    clf = neighbors.KNeighborsClassifier()
    clf.fit(X_train,y_train)
    print('Training Score:%f'%clf.score(X_train,y_train))
    print('Testing score:%f'%clf.score(X_test,y_test))
X_train, X_test, y_train, y_test = load_classification_data()
test_KNeighborsClassifier(X_train, X_test, y_train, y_test)

这里写图片描述

#考察p值,即距离函数的形式对于预测性能的影响
def test_KNeighborsClassifier_k_p(*data):
    X_train, X_test, y_train, y_test = data
    Ks = np.linspace(1,y_train.size,endpoint=False,dtype='int')
    Ps = [1,2,10]

    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    for p in Ps:
        training_scores = []
        testing_scores = []
        for k in Ks:
            clf = neighbors.KNeighborsClassifier(p=p,n_neighbors=k)
            clf.fit(X_train,y_train)
            testing_scores.append(clf.score(X_test,y_test))
            training_scores.append(clf.score(X_train,y_train))
        ax.plot(Ks,testing_scores,label='testing score:p=%d:'%p)
        ax.plot(Ks,training_scores,label='training score:p=%d:'%p)
    ax.legend(loc='best')
    ax.set_xlabel('K')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.set_title('KNeighborsClassifiter')
    plt.show()
X_train, X_test, y_train, y_test = load_classification_data()
test_KNeighborsClassifier_k_p(X_train, X_test, y_train, y_test)

这里写图片描述

#KNN回归
def test_KNeighborsRegressor(*data):
    X_train, X_test, y_train, y_test = data
    regr = neighbors.KNeighborsRegressor()
    regr.fit(X_train,y_train)
    print('Training Score:%f' % regr.score(X_train, y_train))
    print('Testing score:%f'%regr.score(X_test,y_test))
X_train, X_test, y_train, y_test = create_regression_data(1000)
test_KNeighborsRegressor(X_train, X_test, y_train, y_test)
#考虑K值与投票策略对预测性能的影响
def test_KNeighborsRegressor_k_w(*data):
    X_train, X_test, y_train, y_test = data
    Ks = np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int')
    weights = ['uniform','distance']

    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    for weight in weights:
        training_scores = []
        testing_scores = []
        for k in Ks:
            regr = neighbors.KNeighborsRegressor(weights=weight,n_neighbors=k)
            regr.fit(X_train,y_train)
            testing_scores.append(regr.score(X_test,y_test))
            training_scores.append(regr.score(X_train,y_train))
        ax.plot(Ks,testing_scores,label='testing score:weight=%s:'%weight)
        ax.plot(Ks,training_scores,label='traing score:weight=%s:'%weight)
    ax.legend(loc='best')
    ax.set_xlabel('K')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.set_title('KNeighborsRegressor')
    plt.show()
X_train, X_test, y_train, y_test = create_regression_data(1000)
test_KNeighborsRegressor_k_w(X_train, X_test, y_train, y_test)

这里写图片描述

#考查p值(即距离函数形式)对预测性能的影响
def test_KNeighborsRegressor_k_p(*data):
    X_train, X_test, y_train, y_test = data
    Ks = np.linspace(1,y_train.size,endpoint=False,dtype='int')
    Ps = [1,2,10]

    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    for p in Ps:
        training_scores = []
        testing_scores = []
        for k in Ks:
            regr = neighbors.KNeighborsRegressor(p=p,n_neighbors=k)
            regr.fit(X_train,y_train)
            testing_scores.append(regr.score(X_test,y_test))
            training_scores.append(regr.score(X_train,y_train))
        ax.plot(Ks,testing_scores,label='testing score:p=%d'%p)
        ax.plot(Ks,training_scores,label='traing score:p=%d'%p)
    ax.legend(loc='best')
    ax.set_xlabel('K')
    ax.set_ylabel('score')
    ax.set_ylim(0,1.05)
    ax.set_title('KNeighborsRegressor')
    plt.show()
X_train, X_test, y_train, y_test = create_regression_data(1000)
test_KNeighborsRegressor_k_p(X_train, X_test, y_train, y_test)

这里写图片描述

猜你喜欢

转载自blog.csdn.net/dingming001/article/details/80740181