tf.train.Example主要用在将数据处理成二进制方面,一般是为了提升IO效率和方便管理数据。
example = tf.train.Example(features=tf.train.Features(feature={
'image/encoded': _bytes_feature(image_buffer),
'image/label': _int64_feature(class_label),
'image/roi': _float_feature(roi)
}))
用法如下
假设我们有一个data.txt文件,其内容为
21
This is a test data file.
We will convert this text file to bin file.
文件中第一行是个整数,第二行和第三行都是字符串。这是我们处理的原始数据。
import struct
import tensorflow as tf
def read_text_file(text_file): #将data.txt的内容读入,并存进lines
lines = []
with open(text_file, "r") as f:
for line in f:
lines.append(line.strip())
return lines
def text_to_binary(in_file, out_file):
inputs = read_text_file(in_file)
with open(out_file, 'wb') as writer:
data_id = tf.train.Int64List(value=[int(inputs[0])])#原始的txt中的数据进行格式转换,tf.train.Int64List是一个类,这个类中有一个方法value用于传输数据
data = tf.train.BytesList(value=[bytes(' '.join(inputs[1:]), encoding='utf-8')])
feature_dict = {
"data_id": tf.train.Feature(int64_list=data_id),
"data": tf.train.Feature(bytes_list=data)
}
features = tf.train.Features(feature=feature_dict)
example = tf.train.Example(features=features)
example_str = example.SerializeToString()
str_len = len(example_str)
writer.write(struct.pack('H', str_len))
writer.write(struct.pack('%ds' % str_len, example_str))
if __name__ == '__main__':
text_to_binary('data.txt', 'data.bin')