1.CSV文件（提供图像地址和标签）

1.1 创建

import os


path = "pic"
filenames = os.listdir(path=path)
strText = ""

with open(file="train_list.csv", mode="w") as fid:
    for a in range(len(filenames)):
        # 获取图片的地址和标签
        strText = path+os.sep+filenames[a]+","+filenames[a].split(sep="_")[0]+"\n"
        fid.write(strText)

1.2 读取

import os
import cv2
import tensorflow as tf


# img_add_list = []
# img_label_list = []
# with open("train_list.csv") as f:
#     for img in f.readlines():
#         # strip：移出空格
#         img_add_list.append(img.strip().split(sep=",")[0])
#         img_label_list.append(img.strip().split(sep=",")[1])
#
# # tf.image.decode_jpeg:将jpeg编码的图片解码成jpg格式
# # tf.image.convert_image_dtype: 对图像进行转换，将图像矩阵转化成TensorFlow需要的张量格式
# img = tf.image.convert_image_dtype(tf.image.decode_jpeg(tf.read_file("pic\\1_0.jpg"), channels=1), dtype=tf.float32)
# print(img)

img_add_list = []
img_label_list = []
with open("train_list.csv", "r") as f:
    for img in f.readlines():
        img_add_list.append(img.strip().split(",")[0])
        img_label_list.append(img.strip().split(sep=",")[1])


def get_img(img_path):
    # 颜色通道
    # channels=0,1,3 or 4
    return tf.image.convert_image_dtype(tf.image.decode_jpeg(tf.read_file(img_path), channels=1), dtype=tf.float32)


with tf.Session() as sess:
    cv2Img = sess.run(get_img("pic\\1_1.jpg"))
    img2 = cv2.resize(cv2Img, (200, 200))
    cv2.imshow("img2", img2)
    cv2.waitKey()

2.TFRecords文件（提供图像特征与标签）

TFRecords是TensorFlow专用的数据文件格式。其中包含了tf.train.Example协议内存块，其是包含特征值和数据内容的一种数据格式。通过tf.python_io.TFRecordWriter类，可以获取相应的数据并将其填入Example协议内存块中，最终生成TFRecords文件。

换句话说，tf.train.Example包含着若干数据特征（Features），而Features中又包含Feature字典。更进一步的说明，任何一个Feature中又包含着FloatList，或者ByteList，或者Int64List，这三种数据格式之一。TFRecords就是通过一个包含着二进制文件的数据文件，将特征和标签进行保存，以便于TensorFlow读取

2.1 TFRecords文件写入

tf.python_io.TFRecordWriter(path, options):仅能写入二进制数据

import tensorflow as tf
import numpy as np


# 创建TFRecords类型文件写操作的对象
writer = tf.python_io.TFRecordWriter("trainArray.tfrecords", options="error")

for i in range(100):
    randomArray = np.random.random((1, 3))

    # 将数组转换成二进制
    array_raw = randomArray.tobytes()

    example = tf.train.Example(features=tf.train.Features(feature={"label": tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),
                                                                   "img_raw": tf.train.Feature(bytes_list=tf.train.BytesList(value=[array_raw]))}))

    # tf.train.Example().SerializeToString: 将协议消息数据转换成二进制字符串
    writer.write(example.SerializeToString())
writer.close()

2.2 TFRecords文件读取

import tensorflow as tf
import numpy as np
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"


# 将[文件名]输出到输入管道的队列
filename_queue = tf.train.string_input_producer(string_tensor=["trainArray.tfrecords"], num_epochs=None)

# 创建tfrecords文件读取对象
# reader是符号化的，只有在sess中才能执行
reader = tf.TFRecordReader()

key, serialized_example = reader.read(filename_queue)

# tf.parse_single_example: 解释单一内存块
# tf.FixedLenFeatures(): 解析固定长度的输入特征
features = tf.parse_single_example(serialized_example, features={"label": tf.FixedLenFeature([], tf.int64),
                                                                 "img_raw": tf.FixedLenFeature([], tf.string)})
label = features["label"]
img_raw = features["img_raw"]

# tf.decode_raw: 将string的字节重新解释为数字vector(数字向量)
img = tf.decode_raw(img_raw, tf.uint8)
img = tf.reshape(img, [3, 8])

# tf.train.shuffle: 通过随机填充张量创建批次
label_batch, img_batch = tf.train.shuffle_batch([label, img], batch_size=1, capacity=200, min_after_dequeue=100, num_threads=6)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

coord = tf.train.Coordinator()
# tf.train.start_queue_runners: 启动图中所有的队列运行程序
tf.train.start_queue_runners(sess=sess, coord=coord)

label_val, img_val = sess.run([label_batch, img_batch])
print(label_val)
print("*" * 100)
print(img_val)

3. 图片文件的创建与读取

图片存储和命名形式如下：

3.1 图片文件的创建

# 创建
import tensorflow as tf
import os
from PIL import Image


path = "pic"
filenames = os.listdir(path=path)
writer = tf.python_io.TFRecordWriter("train.tfrecords")

for name in filenames:
    class_path = path + os.sep + name
    for img_name in os.listdir(class_path):
        img_path = class_path + os.sep + img_name
        img = Image.open(img_path)
        img = img.resize((500, 500))
        img_raw = img.tobytes()
        example = tf.train.Example(features=tf.train.Features(feature={"label": tf.train.Feature(int64_list=tf.train.Int64List(value=[int(name)])),
                                                                       "image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))}))
        writer.write(example.SerializeToString())
writer.close()

3.2 图片文件的循环读取

import tensorflow as tf
import cv2
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"


def read_and_decode(filename):
    filename_queue = tf.train.string_input_producer([filename])

    reader = tf.TFRecordReader()

    # tf.TFRecordReader().read(queue): 返回的是(index, value)
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(serialized_example, features={"label": tf.FixedLenFeature([], tf.int64),
                                                                     "image": tf.FixedLenFeature([], tf.string)})
    label = features["label"]
    img_raw = features["image"]

    img = tf.decode_raw(img_raw, tf.uint8)
    img = tf.reshape(img, shape=[500, 500, 3])
    return img, label


if "__main__" == __name__:
    filename = "train.tfrecords"
    img, label = read_and_decode(filename=filename)

    # tf.train.shuffle_batch: 从队列当中随机采样
    img_batch, label_batch = tf.train.shuffle_batch([img, label], batch_size=1, capacity=10, min_after_dequeue=1)

    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    for i in range(20):
        val = sess.run(img_batch)
        label = sess.run(label_batch)
        val.resize((500, 500, 3))
        cv2.imshow("cool", val)
        cv2.waitKey()
        print(label)

python3深度学习：TensorFlow数据的生成与读取（主要针对图像处理）

1.CSV文件（提供图像地址和标签）

1.1 创建

1.2 读取

2.TFRecords文件（提供图像特征与标签）

2.1 TFRecords文件写入

2.2 TFRecords文件读取

3. 图片文件的创建与读取

3.1 图片文件的创建

猜你喜欢

python3__深度学习：TensorFlow__数据的生成与读取（主要针对图像处理）

1.CSV文件（提供图像地址和标签）

1.1 创建

1.2 读取

2.TFRecords文件（提供图像特征与标签）

2.1 TFRecords文件写入

2.2 TFRecords文件读取

3. 图片文件的创建与读取

3.1 图片文件的创建

猜你喜欢

python3深度学习：TensorFlow数据的生成与读取（主要针对图像处理）