#!/usr/bin/env python # coding: utf-8 # # sklearn常用方法 # # #标准化 (需要使用距离来度量相似性或用PCA降维时) # In[1]: from sklearn.preprocessing import StandardScaler data_train = StandardScaler().fit_transform(data_train) data_test = StandardScaler().fit_transform(data_test) # In[ ]: from sklearn.preprocessing import MinMaxScaler data = MinMaxScaler().fit_transform(data) # In[ ]: from sklearn.preprocessing import Normalizer data = Normalizer().fit_transform(data) # In[ ]: from sklearn.preprocessing import Binarizer data = Binarizer(threshold=epsilon).fit_transform(data) # # # 类别型特征转换为数值型特征:保留数值型特征,并将不同的类别转换为读热编码 # In[ ]: from sklearn.feature_extraction import DictVectorizer vec = DictVectorizer(sparse=False) X_train = vec.fit_transform(X_train.to_dict(orient='recoed')) # # 卡方检验 # In[ ]: from sklearn.feature_extraction import selectKBest from sklearn.feature_extraction import chi2 # 选择K个最好的特征,返回选择特征后的数据 skb = SelectKBest(chi2, k=10).fit(X_train, Y_train) X_train = skb.transform(X_train) X_test = skb.transform(X_test) # # 互信息法 # In[2]: from sklearn.feature_extraction import SelectKBest from minepy import MINE # 由于MINE的设计不是函数式的,定义mic的方法将其设为函数式的,返回一个二元组,二元组的第2项设置成固定的P值0.5 def mic(x, y): m = MINE() m.compute_score(x, y) return(m.mic(), 0.5) # 选择K个最好的特征,返回选择特征后的数据 SelectKBest(lambda X, Y: array(map(lambda x:mic(x, Y), X.T)).T, k=2).fit_transform(iris.data, iris.taget) # In[ ]: from sklearn.decomposition import PCA estimator = PCA(n_components=2) X_pca = estimator.fit_transform(X_data) # # 开始算法 # In[ ]: from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=7) # In[ ]: from sklearn.XXXXXXX import AlgorithmXXX alg = AlgorithmXXX() alg.fit(X_train, Y_train) y_predict = alg.predict(x_test) # In[ ]: from sklearn.linear_model import SGDClassifier sgd = SGDClassifier() # In[ ]: from sklearn.linear_model import SGDRegressor sgd = SGDRegressor(loss='squared_loss', penalty=None, random_state=7) # In[ ]: from sklearn.svm import SVR svr = SVR(kernel='linear')# linear/poly/ rbf # In[ ]: from sklearn.svm import SVC svc = SVC(kernel='linear') # In[ ]: from sklearn.naive_bayes import MultinomialNB mnb = MultinomialNB() # In[ ]: from sklearn.tree import DecisionTreeClassifier dtc = DecisionTreeClassifier(criterion='entropy', max_depth=3, min_samples_leaf=5)# 最大深度与最小样本数,防止过拟合 # In[ ]: from sklearn.ensemble import RandomForestClassifier rfc = RandomForestClassifier(max_depth=3, min_samples_leaf=5) # In[ ]: from sklearn.ensemble import GradientBoostingClassifier gbc = GradientBoostingClassifier(max_depth=3, min_samples_leaf=5) # In[ ]: from sklearn.ensemble import ExtraTreesClassifier etr = ExtraTreesClassifier() # In[ ]: from sklearn import metrics accuracy_rate = metrics.accuracy_score(y_test, y_predict) metrics.classification_report(y_test, y_predict, target_names=data.target_names) # In[ ]: from sklearn.model_selection import KFold from sklearn.model_selection import cross_val_score kfold = KFold(n_splits=10, random_state=7) result = cross_val_score(model, X, Y, cv=kfold)
kaggle入窝-
猜你喜欢
转载自www.cnblogs.com/2019-02-11/p/10674096.html
今日推荐
周排行