from sklearn.preprocessing import OneHotEncoder
ddd2 = pd.DataFrame({
'f1': [1, 2, 3, 4, 5, 6], 'f2': ['你', '我', '他', '你', '我', '她'], 'y': [1, 1, 1, 0, 0, 0]})
ddd2
one = OneHotEncoder()
ddd2['f2'] = one.fit_transform(ddd2['f2'].values.reshape(-1, 1))
ddd2
ddd2['index'] = ddd2.index
def indexapply(index, x):
return np.float64(x.toarray())[index, :]
ddd2['f2'] = ddd2.apply(lambda row: indexapply(row['index'], row['f2']), axis=1)
ddd2 = ddd2.drop('index', axis=1)
ddd2
from sklearn.ensemble import GradientBoostingClassifier
x2, y2 = ddd2.iloc[:, :-1].values, ddd2.iloc[:, -1].values
print(x2)
print(y2)
def flatten_x_cols(lst):
res = []
for i in range(len(lst)):
lsnew = []
lsnew.append(lst[i][0])
for j in lst[i][1]:
lsnew.append(j)
res.append(lsnew)
return res
x2 = flatten_x_cols(x2)
model = GradientBoostingClassifier()
model.fit(x2, y2)
[[1 array([0., 1., 0., 0.])]
[2 array([0., 0., 0., 1.])]
[3 array([1., 0., 0., 0.])]
[4 array([0., 1., 0., 0.])]
[5 array([0., 0., 0., 1.])]
[6 array([0., 0., 1., 0.])]]
[1 1 1 0 0 0]
GradientBoostingClassifier()
sklearn 将onehot之后的结果拼接回原来的dataframe
https://blog.csdn.net/qq_42363032/article/details/121377220