充电桩故障分类与检测(入门赛f1-1.0)源代码

第一次接触数据类比赛,本比赛是个新人入门赛。

数据采用原特征,模型采用GDBT,网格搜索进行模型调参

历程:一开始用了tensorflow搭建了全链接神经网络,但是NN效果并不好0.89封顶,后来采用了XGB和LGB线上自能达到0.9999,最后换成GBDT开始了玄学调参的路程(调包侠。。)

数据下载地址:https://download.csdn.net/download/qq_39622065/10456270

模型训练和预测:

from sklearn.ensemble import GradientBoostingClassifier
from sklearn import cross_validation, metrics
from sklearn.model_selection import GridSearchCV
import return_data2
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")
import time
dataset_X,dataset_Y=return_data2.return_tarin_data()
X_train, X_test, y_train, y_test = train_test_split(dataset_X, dataset_Y,
                                                      test_size=0.2,
                                                      random_state=21)
clf = GradientBoostingClassifier(
    learning_rate=0.1, n_estimators=1500, min_samples_split=350,
    min_samples_leaf=20, max_depth=8, max_features="auto", subsample=0.8, random_state=0
)
clf.fit(X_train,y_train)

X_pre=return_data2.return_test_data()
y_pre=clf.predict(X_pre)

with open("baidu_sub25(2).csv","w") as f:
    for i in range(len(y_pre)):
        f.write(str(i+1)+","+str(int(y_pre[i]))+"\n")
print("pre over..")


y_pred = clf.predict(X_test)
acc = metrics.accuracy_score(y_test, y_pred)
print(acc)
print(y_pred)

数据处理:

import pandas as pd
import numpy as np
from sklearn import preprocessing
def return_tarin_data():
    col_names = ["ID","K1K2驱动信号","电子锁驱动信号","急停信号","门禁信号","THDV-M","THDI-M","label"]
    data = pd.read_csv("data_train.csv",names=col_names)
    # print(data.info())
    data["test1"] = data["THDI-M"] * data["THDV-M"]
    data["test2"] = data["急停信号"] * data["THDV-M"]
    data["test3"] = data["THDI-M"] / data["THDV-M"]
    data["test4"] = data["THDI-M"] / (data["急停信号"]*data["THDV-M"])
    scaler = preprocessing.StandardScaler()
    # lists = ["K1K2驱动信号", "电子锁驱动信号", "急停信号", "门禁信号", "THDV-M", "THDI-M"]
    # for list in lists:
    #     data[list] = scaler.fit_transform(data[[list]])

    # print(data.describe())
    dataset_X = data[["K1K2驱动信号","电子锁驱动信号","急停信号","门禁信号","THDV-M","THDI-M"]].as_matrix()
    dataset_Y = data[["label"]].as_matrix()
    dataset_Y=np.array(dataset_Y).reshape(len(dataset_Y))
    # print(dataset_X)
    # print(dataset_Y)
    return dataset_X,dataset_Y

def return_test_data():
    col_names = ["ID", "K1K2驱动信号", "电子锁驱动信号", "急停信号", "门禁信号", "THDV-M", "THDI-M"]
    data = pd.read_csv("data_test.csv", names=col_names)
    # print(data.info())
    # data["test1"] = data["THDI-M"] * data["THDV-M"]
    data["test2"] = data["急停信号"] * data["THDV-M"]
    data["test3"] = data["THDI-M"] / data["THDV-M"]
    data["test4"] = data["THDI-M"] / (data["急停信号"] * data["THDV-M"])
    # print(data.describe())
    scaler = preprocessing.StandardScaler()
    # lists = ["K1K2驱动信号", "电子锁驱动信号", "急停信号", "门禁信号", "THDV-M", "THDI-M"]
    # for list in lists:
    #     data[list] = scaler.fit_transform(data[[list]])
    datasett_X = data[["K1K2驱动信号", "电子锁驱动信号", "急停信号", "门禁信号", "THDV-M", "THDI-M"]].as_matrix()
    return datasett_X

猜你喜欢

转载自blog.csdn.net/qq_39622065/article/details/80471195