版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u013317445/article/details/85100884
部分依赖图是一个extract insights from complex models的好方法。
部分依赖图显示了目标相应和一组特征之间的独立性,排除了其他所有的特征。直观的,可将部分依赖解释为预期的目标响应,和目标特征的函数。
key code:
from sklearn.ensemble.partial_dependence import partial_dependence, plot_partial_dependence
my_plots = plot_partial_dependence(my_model,
features=[0, 2], # column numbers of plots we want to show
X=X, # raw predictors data.
feature_names=['Distance', 'Landsize', 'BuildingArea'], # labels on graphs
grid_resolution=10) # number of values to plot on x axis
import pandas as pd
melb_data= pd.read_csv('G:\kaggle\melb_data.csv')
y= melb_data.Price
clo_to_use=['Distance','Landsize','BuildingArea','Rooms']
X=melb_data[clo_to_use]
from sklearn.preprocessing import Imputer
my_imputer= Imputer()
imputed_X= my_imputer.fit_transform(X)
d:\python27\lib\site-packages\sklearn\utils\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.
warnings.warn(msg, category=DeprecationWarning)
#梯度树提升
from sklearn.ensemble import GradientBoostingRegressor
my_model= GradientBoostingRegressor()
my_model.fit(imputed_X, y)
GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
max_leaf_nodes=None, min_impurity_decrease=0.0,
min_impurity_split=None, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
n_estimators=100, n_iter_no_change=None, presort='auto',
random_state=None, subsample=1.0, tol=0.0001,
validation_fraction=0.1, verbose=0, warm_start=False)
#画部分依赖图,看目标y与变量之间的关系
from sklearn.ensemble.partial_dependence import plot_partial_dependence
my_plots= plot_partial_dependence(my_model,
feature_names= clo_to_use,
features= [0,2],
X= imputed_X)
my_plots1= plot_partial_dependence(my_model,
feature_names= clo_to_use,
features= [0,2],
X= imputed_X,
grid_resolution= 10)