第一步:收集数据
系统目标为预测混凝土抗压强度:数据集地址
第二步:加载数据
import pandas as pd
from sklearn import preprocessing,linear_model #导入模型
from sklearn.metrics import r2_score #性能评价得分接近1模型性能越好
from sklearn.model_selection import train_test_split
import numpy as np
import warnings
warnings.filterwarnings('ignore')
dataset = pd.read_excel('Concrete_Data.xls')
dataset.head()
# 修改列名&将数据样本和预测数据分离
dataset.columns =['cement','blash','fly','water','superplastic','coarse','fine','age','concrete']
target = dataset.pop('concrete')
第三步:选择模型
使用自己编写的交叉验证器
def CV(X,y,n):
length = len(X)
number = length // n
y = np.reshape(y,(length,-1))
scoreList = []
regr = linear_model.LinearRegression()
for i in range(n):
if i == n-1:
test_X = X[i*number:, :]
test_y = y[i*number:]
train_X = X[:i*number,:]
train_y = y[:i*number]
else:
test_X = X[i*number:(i+1)*number,:]
test_y = y[i*number:(i+1)*number]
train_X = np.concatenate((X[:i*number,:],X[(i+1)*number:,:]),axis = 0)
train_y = np.concatenate((y[:i*number],y[(i+1)*number:]),axis = 0)
scaler = preprocessing.StandardScaler().fit(train_X)
train_X_standard = scaler.transform(train_X)
test_X_standard = scaler.transform(test_X)
regr.fit(train_X_standard,train_y)
predicts = regr.predict(test_X_standard)
scoreList.append(r2_score(test_y,predicts))
#print(r2_score(test_y,predicts))
return scoreList
np.mean(CV(np.matrix(dataset),np.matrix(target),10))
第四步:保存模型
dataset = pd.read_excel('Concrete_Data.xls')
dataset.columns =['cement','blash','fly','water','superplastic','coarse','fine','age','concrete']
target = dataset.pop('concrete')
X_train,X_test,y_train,y_test = train_test_split(dataset,target,test_size = 0.1,random_state = 88)
scaler = preprocessing.StandardScaler().fit(X_train)
X_train_standard = scaler.transform(X_train)
X_test_standard = scaler.transform(X_test)
regr = linear_model.LinearRegression()
regr.fit(X_train_standard,y_train)
#保存模型
from sklearn.externals import joblib
joblib.dump(regr,'linear_regression_concrete.pkl')
joblib.dump(scaler,'linear_regression_concrete_scaler.pkl')
第五步:加载模型
predictor = joblib.load('linear_regression_concrete.pkl')
scaler = joblib.load('linear_regression_concrete_scaler.pkl')
第六步:构建预测系统
def concrete_System():
cement = float(input('水泥,单位 千克每立方米:'))
blast = float(input('高炉炉渣,单位 千克每立方米:'))
fly = float(input('飞灰,单位 千克每立方米:'))
water = float(input('水,单位 千克每立方米:'))
superplastic = float(input('减水剂,单位 千克每立方米:'))
coarse = float(input('粗骨料,单位 千克每立方米:'))
fine = float(input('细骨料,单位 千克每立方米:'))
age = float(input('年龄,单位 天数:'))
inputArray = np.array([cement,blast,fly,water,superplastic,coarse,fine,age]).reshape((1,-1))
standard_input = scaler.transform(inputArray)
result = predictor.predict( standard_input)
print('预测混凝土抗压强度是%.2f'.center(60,'=')% result)
系统运行如下所示: