tf.train.Example

tf.train.Example主要用在将数据处理成二进制方面,一般是为了提升IO效率和方便管理数据。

 example = tf.train.Example(features=tf.train.Features(feature={
        'image/encoded': _bytes_feature(image_buffer),
        'image/label': _int64_feature(class_label),
        'image/roi': _float_feature(roi)
    }))

用法如下

假设我们有一个data.txt文件,其内容为

21
This is a test data file.
We will convert this text file to bin file.

文件中第一行是个整数,第二行和第三行都是字符串。这是我们处理的原始数据。
 

import struct
import tensorflow as tf


def read_text_file(text_file): #将data.txt的内容读入,并存进lines
    lines = []
    with open(text_file, "r") as f:
        for line in f:
            lines.append(line.strip())
    return lines


def text_to_binary(in_file, out_file):
    inputs = read_text_file(in_file)

    with open(out_file, 'wb') as writer:
        data_id = tf.train.Int64List(value=[int(inputs[0])])#原始的txt中的数据进行格式转换,tf.train.Int64List是一个类,这个类中有一个方法value用于传输数据
        data = tf.train.BytesList(value=[bytes(' '.join(inputs[1:]), encoding='utf-8')])

        feature_dict = {
            "data_id": tf.train.Feature(int64_list=data_id),
            "data": tf.train.Feature(bytes_list=data)
        }
        features = tf.train.Features(feature=feature_dict)

        example = tf.train.Example(features=features)
        example_str = example.SerializeToString()

        str_len = len(example_str)

        writer.write(struct.pack('H', str_len))
        writer.write(struct.pack('%ds' % str_len, example_str))


if __name__ == '__main__':


    text_to_binary('data.txt', 'data.bin')

猜你喜欢

转载自blog.csdn.net/weixin_38145317/article/details/89603620