tensorflow下从文件传入数据流程及解释

当前日期：4/7/2018
#coding=utf8
import tensorflow as tf

#生成一个先入先出队列和一个QueueRunner,生成文件名队列
filenames = ['csv/A.csv', 'csv/B.csv', 'csv/C.csv']

#shuffle=True 文件队列随机读取，默认
#string_input_producer生成一个先入先出的队列，文件阅读器会需要它来读取数据
#A QueueRunner for the Queue is added to the current Graph's QUEUE_RUNNER collection
#the QueueRunner, combined with the coordinator, help handle these issues.
#QueueRunner的工作线程独立于文件阅读器的线程，因此乱序和文件名推入到文件名队列这些过程不会阻塞文件阅读器运行
filename_queue = tf.train.string_input_producer(filenames, shuffle=False)

TFReader = tf.TextLineReader() #class  a reader that outputs the lines of a delimited by newlines

#返回reader所产生的下一个记录对(key, value)，该reader可以限定新产生输出的行数
key, value = TFReader.read(filename_queue)

#decode_csv: convert CSV records to tensors. each column maps to one tensor
example, label = tf.decode_csv(value, record_defaults=[['none'], ['none']])

#shuffle_batch返回值是和输入部分一致的张量格式[example, label]，因此返回时仍然是配对的
#主要完成功能是：随机的从输入的张量序列中抽取batch_size大小的序列送出去（是出对列操作，shuffle batch内存一个队列random shuffle queue）
example_batch, label_batch = tf.train.shuffle_batch([example, label], batch_size=1, capacity=200,
                                                    min_after_dequeue=100, num_threads=1)


with tf.Session() as sess:
  #coordinator协调员 a coornator for threads
  #this class implements a simple mechanism to coordinator the termination of a set of threads
  coord = tf.train.Coordinator()    #create a coordinator

  threads = tf.train.start_queue_runners(sess=sess, coord=coord)  #starts all queue runners collected int the graph
  for i in range(9):
    e_val, l_val = sess.run([example_batch, label_batch])
    print(e_val, l_val)

  coord.request_stop()             #ask for all the threads to stop
  coord.join(threads)              #wait for all the threads to terminate
tensorflow下从文件传入数据流程及解释

猜你喜欢