1、查看输出y分类是否均衡?
sns.countplot(train.target);
pyplot.xlabel('target');
pyplot.ylabel('Number of occurrences');
2、代码
#四要素
import pandas as pd
import numpy as np
from matplotlib import pyplot
import seaborn as sns
%matplotlib inline
#加载数据
train = pd.read_csv(dpath +"Otto_train.csv")
#特征编码
y_train = train['target']
y_train = y_train.map(lambda s: s[6:])
y_train = y_train.map(lambda s: int(s)-1)
X_train = train.drop(["id", "target"], axis=1)
#数据标准化
from sklearn.preprocessing import StandardScaler
ss_X = StandardScaler()
X_train = ss_X.fit_transform(X_train)
#模型训练
#no1:普通的logisticRegression
from sklearn.linear_model import LogisticRegression
lr= LogisticRegression()
#no2:加正则的logisticRegression