版权声明:本文为博主原创文章,未经博主允许不得转载,希望能在相互交流中共同成长。【大红色:一级标题 绿色:二级标题 二红色:三级标题 黄色:四级标题】 https://blog.csdn.net/admin_maxin/article/details/85868501
1.CSV文件(提供图像地址和标签)
1.1 创建
import os
path = "pic"
filenames = os.listdir(path=path)
strText = ""
with open(file="train_list.csv", mode="w") as fid:
for a in range(len(filenames)):
# 获取图片的地址和标签
strText = path+os.sep+filenames[a]+","+filenames[a].split(sep="_")[0]+"\n"
fid.write(strText)
1.2 读取
import os
import cv2
import tensorflow as tf
# img_add_list = []
# img_label_list = []
# with open("train_list.csv") as f:
# for img in f.readlines():
# # strip:移出空格
# img_add_list.append(img.strip().split(sep=",")[0])
# img_label_list.append(img.strip().split(sep=",")[1])
#
# # tf.image.decode_jpeg:将jpeg编码的图片解码成jpg格式
# # tf.image.convert_image_dtype: 对图像进行转换,将图像矩阵转化成TensorFlow需要的张量格式
# img = tf.image.convert_image_dtype(tf.image.decode_jpeg(tf.read_file("pic\\1_0.jpg"), channels=1), dtype=tf.float32)
# print(img)
img_add_list = []
img_label_list = []
with open("train_list.csv", "r") as f:
for img in f.readlines():
img_add_list.append(img.strip().split(",")[0])
img_label_list.append(img.strip().split(sep=",")[1])
def get_img(img_path):
# 颜色通道
# channels=0,1,3 or 4
return tf.image.convert_image_dtype(tf.image.decode_jpeg(tf.read_file(img_path), channels=1), dtype=tf.float32)
with tf.Session() as sess:
cv2Img = sess.run(get_img("pic\\1_1.jpg"))
img2 = cv2.resize(cv2Img, (200, 200))
cv2.imshow("img2", img2)
cv2.waitKey()
2.TFRecords文件(提供图像特征与标签)
TFRecords是TensorFlow专用的数据文件格式。其中包含了tf.train.Example协议内存块,其是包含特征值和数据内容的一种数据格式。通过tf.python_io.TFRecordWriter类,可以获取相应的数据并将其填入Example协议内存块中,最终生成TFRecords文件。
换句话说,tf.train.Example包含着若干数据特征(Features),而Features中又包含Feature字典。更进一步的说明,任何一个Feature中又包含着FloatList,或者ByteList,或者Int64List,这三种数据格式之一。TFRecords就是通过一个包含着二进制文件的数据文件,将特征和标签进行保存,以便于TensorFlow读取
2.1 TFRecords文件写入
tf.python_io.TFRecordWriter(path, options):仅能写入二进制数据
import tensorflow as tf
import numpy as np
# 创建TFRecords类型文件写操作的对象
writer = tf.python_io.TFRecordWriter("trainArray.tfrecords", options="error")
for i in range(100):
randomArray = np.random.random((1, 3))
# 将数组转换成二进制
array_raw = randomArray.tobytes()
example = tf.train.Example(features=tf.train.Features(feature={"label": tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),
"img_raw": tf.train.Feature(bytes_list=tf.train.BytesList(value=[array_raw]))}))
# tf.train.Example().SerializeToString: 将协议消息数据转换成二进制字符串
writer.write(example.SerializeToString())
writer.close()
2.2 TFRecords文件读取
import tensorflow as tf
import numpy as np
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
# 将[文件名]输出到输入管道的队列
filename_queue = tf.train.string_input_producer(string_tensor=["trainArray.tfrecords"], num_epochs=None)
# 创建tfrecords文件读取对象
# reader是符号化的,只有在sess中才能执行
reader = tf.TFRecordReader()
key, serialized_example = reader.read(filename_queue)
# tf.parse_single_example: 解释单一内存块
# tf.FixedLenFeatures(): 解析固定长度的输入特征
features = tf.parse_single_example(serialized_example, features={"label": tf.FixedLenFeature([], tf.int64),
"img_raw": tf.FixedLenFeature([], tf.string)})
label = features["label"]
img_raw = features["img_raw"]
# tf.decode_raw: 将string的字节重新解释为数字vector(数字向量)
img = tf.decode_raw(img_raw, tf.uint8)
img = tf.reshape(img, [3, 8])
# tf.train.shuffle: 通过随机填充张量创建批次
label_batch, img_batch = tf.train.shuffle_batch([label, img], batch_size=1, capacity=200, min_after_dequeue=100, num_threads=6)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
coord = tf.train.Coordinator()
# tf.train.start_queue_runners: 启动图中所有的队列运行程序
tf.train.start_queue_runners(sess=sess, coord=coord)
label_val, img_val = sess.run([label_batch, img_batch])
print(label_val)
print("*" * 100)
print(img_val)
3. 图片文件的创建与读取
图片存储和命名形式如下:
3.1 图片文件的创建
# 创建
import tensorflow as tf
import os
from PIL import Image
path = "pic"
filenames = os.listdir(path=path)
writer = tf.python_io.TFRecordWriter("train.tfrecords")
for name in filenames:
class_path = path + os.sep + name
for img_name in os.listdir(class_path):
img_path = class_path + os.sep + img_name
img = Image.open(img_path)
img = img.resize((500, 500))
img_raw = img.tobytes()
example = tf.train.Example(features=tf.train.Features(feature={"label": tf.train.Feature(int64_list=tf.train.Int64List(value=[int(name)])),
"image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))}))
writer.write(example.SerializeToString())
writer.close()
3.2 图片文件的循环读取
import tensorflow as tf
import cv2
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
def read_and_decode(filename):
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
# tf.TFRecordReader().read(queue): 返回的是(index, value)
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example, features={"label": tf.FixedLenFeature([], tf.int64),
"image": tf.FixedLenFeature([], tf.string)})
label = features["label"]
img_raw = features["image"]
img = tf.decode_raw(img_raw, tf.uint8)
img = tf.reshape(img, shape=[500, 500, 3])
return img, label
if "__main__" == __name__:
filename = "train.tfrecords"
img, label = read_and_decode(filename=filename)
# tf.train.shuffle_batch: 从队列当中随机采样
img_batch, label_batch = tf.train.shuffle_batch([img, label], batch_size=1, capacity=10, min_after_dequeue=1)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(20):
val = sess.run(img_batch)
label = sess.run(label_batch)
val.resize((500, 500, 3))
cv2.imshow("cool", val)
cv2.waitKey()
print(label)