1.模型的保存与恢复：

#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Time:       2019/11/30 16:52
# @Author:     weiz
# @File:       09_up_and_running_with_tensorflow.py
# @Description:

# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)


from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
housing = fetch_california_housing()
m, n = housing.data.shape
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

reset_graph()

n_epochs = 1000  # not shown in the book
learning_rate = 0.01  # not shown

X = tf.constant(scaled_housing_data_plus_bias, dtype=tf.float32, name="X")  # not shown
y = tf.constant(housing.target.reshape(-1, 1), dtype=tf.float32, name="y")  # not shown
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")  # not shown
error = y_pred - y  # not shown
mse = tf.reduce_mean(tf.square(error), name="mse")  # not shown
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)  # not shown
training_op = optimizer.minimize(mse)  # not shown

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)   # 恢复模型用：saver.restore(sess, "/tmp/my_model_final.ckpt")

    for epoch in range(n_epochs):
        if epoch % 100 == 0:     # checkpoint every 100 epochs
            print("Epoch", epoch, "MSE =", mse.eval())  # not shown
            # 指定需要保存的参数：saver = tf.train.Saver({"weights": theta})
            save_path = saver.save(sess, "./tmp/my_model.ckpt")
        sess.run(training_op)

    best_theta = theta.eval()
    save_path = saver.save(sess, "./tmp/my_model_final.ckpt")    # 需要先创建tmp文件夹

print(best_theta)

2.启用tensorboard：

#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Time:       2019/11/30 16:52
# @Author:     weiz
# @File:       09_up_and_running_with_tensorflow.py
# @Description:

# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import tensorflow as tf

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)


def fetch_batch(epoch, batch_index, batch_size):
    np.random.seed(epoch * n_batches + batch_index)
    indices = np.random.randint(m, size=batch_size)
    X_batch = scaled_housing_data_plus_bias[indices]
    y_batch = housing.target.reshape(-1, 1)[indices]
    return X_batch, y_batch


from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
housing = fetch_california_housing()
m, n = housing.data.shape
scaler = StandardScaler()
scaled_housing_data = scaler.fit_transform(housing.data)
scaled_housing_data_plus_bias = np.c_[np.ones((m, 1)), scaled_housing_data]

reset_graph()

from datetime import datetime                       # tensorboard
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")    # tensorboard
root_logdir = "tf_logs"                             # tensorboard
logdir = "{}/run-{}/".format(root_logdir, now)      # tensorboard

n_epochs = 1000
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=(None, n + 1), name="X")
y = tf.placeholder(tf.float32, shape=(None, 1), name="y")
theta = tf.Variable(tf.random_uniform([n + 1, 1], -1.0, 1.0, seed=42), name="theta")
y_pred = tf.matmul(X, theta, name="predictions")
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(mse)

init = tf.global_variables_initializer()

mse_summary = tf.summary.scalar('MSE', mse)                             # tensorboard
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())     # tensorboard

n_epochs = 10
batch_size = 100
n_batches = int(np.ceil(m / batch_size))

with tf.Session() as sess:
    sess.run(init)

    for epoch in range(n_epochs):
        for batch_index in range(n_batches):
            X_batch, y_batch = fetch_batch(epoch, batch_index, batch_size)
            if batch_index % 10 == 0:     # 注意：tensorboard会影响训练时间，所以不要每一步都记录状态信息
                summary_str = mse_summary.eval(feed_dict={X: X_batch, y: y_batch})    # tensorboard
                step = epoch * n_batches + batch_index
                file_writer.add_summary(summary_str, step)                            # tensorboard
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})

    best_theta = theta.eval()

file_writer.close()
print(best_theta)

# 使用命令行切换到程序运行根目录（注意是否开启了虚拟环境）
# tensorboard --logdir tf_logs/
# 复制命令行输出的地址到浏览器
# 按CTRL + C 停止

3.命名作用域

# 当网络比较复杂时，可以使用
# 将error(误差)和mse ops（MSE操作）定义到一个叫作“loss”的命名作用域中
with tf.name_scope("loss") as scope:
    error = y_pred - y
    mse = tf.reduce_mean(tf.square(error), name="mse")

4.模块化

不友好的代码：

reset_graph()

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")

w1 = tf.Variable(tf.random_normal((n_features, 1)), name="weights1")
w2 = tf.Variable(tf.random_normal((n_features, 1)), name="weights2")
b1 = tf.Variable(0.0, name="bias1")
b2 = tf.Variable(0.0, name="bias2")

z1 = tf.add(tf.matmul(X, w1), b1, name="z1")
z2 = tf.add(tf.matmul(X, w2), b2, name="z2")

relu1 = tf.maximum(z1, 0., name="relu1")
relu2 = tf.maximum(z1, 0., name="relu2")  # Oops, cut&paste error! Did you spot it?

output = tf.add(relu1, relu2, name="output")

易维护的代码：

reset_graph()

def relu(X):
    with tf.name_scope("relu"):
        w_shape = (int(X.get_shape()[1]), 1)
        w = tf.Variable(tf.random_normal(w_shape), name="weights")
        b = tf.Variable(0.0, name="bias")
        z = tf.add(tf.matmul(X, w), b, name="z")
        return tf.maximum(z, 0., name="max")

n_features = 3
X = tf.placeholder(tf.float32, shape=(None, n_features), name="X")
relus = [relu(X) for i in range(5)]
output = tf.add_n(relus, name="output")

file_writer = tf.summary.FileWriter("logs/relu2", tf.get_default_graph())
file_writer.close()

当创建一个节点时，tensorflow会检查这个名字是否已经存在，如果已经存在了，它会为其添加一个下划线和一个索引以保证唯一性。比如上面包含五个ReLU名字为“weights”“bias”“z”和“relu”：第一个名字为“weights”“bias”；第二个为“weights_1”“bias_1”;后面一次类推。

5.scikit-learn模型保存、载入

5.1 利用pickle

import pickle
# 保存
model.fit(train_X, train_y)
s=pickle.dumps(model)
f=open('svm.model', "wb+")  # 注意是wb+
f.write(s)
f.close()
print ("Done\n")

# 载入
f2=open('svm.model','rb')  # 注意是rb
s2=f2.read()
model1=pickle.loads(s2)
expected = test_y
predicted = model1.predict(test_X)

5.2 利用joblib（推荐）

from sklearn.externals import joblib
# 保存
model.fit(train_X, train_y)
joblib.dump(model, "train1_model.m")
print ("Done\n")

# 载入
model1 = joblib.load("train1_model.m")
expected = test_y 
predicted = model1.predict(test_X)

SeventhBlue

发布了82 篇原创文章 · 获赞 126 · 访问量 6万+

私信关注

tensorflow系统学习（2）：模型保存、恢复和可视化