import os, random, shutil
# 创建目录
test_path = './test_path/'
if not os.path.exists(test_path):
os.makedirs(test_path)
print('test_path is Ok')
else:
print('test_path is exit')
new_train_list_path = './test_path/new_train_list.txt'
old_train_list_path = './train_list.txt'
# 去掉train/, 存入new_train_list.txt
f_new_train = open(new_train_list_path, 'w')
for line in open(old_train_list_path, 'r'):
# print(line)
img = line.split(' ')[0].split('/')[1]
pid = line.split(' ')[1].rstrip()
f_new_train.write(img + ' ' + pid + '\n')
f_new_train.close()
## 划分test_split.txt
import pandas as pd
import numpy as np
# 读取new_train_list.txt
new_train_df = pd.read_table(new_train_list_path, sep=' ', header=None)
new_train_df.columns = ['img', 'pid']
# new_train_df.shape
if os.path.exists(finally_train_list):
os.remove(finally_train_list)
if os.path.exists(test_split_100):
os.remove(test_split_100)
finally_train_list = './test_path/finally_train_list.txt'
test_split_100 = './test_path/test_split_1200.txt'
import random
# 新建一个空的dataframe
test_split_100_df = pd.DataFrame(columns=new_train_df.columns)
# 产生100个不重复的随机数
all_random_pid = random.sample(list(new_train_df['pid'].unique()), 1200)
# 选取100个随机数对应的行
for random_pid in all_random_pid:
# print(random_pid)
random_pid_line = new_train_df[new_train_df['pid'] == random_pid]
test_split_100_df = test_split_100_df.append(random_pid_line)
new_train_df = new_train_df[~(new_train_df['pid'] == random_pid)]
test_split_100_df.to_csv(test_split_100)
new_train_df.to_csv(finally_train_list)
test_split_100_df.head(10)
划分训练、测试数据集,从Dataframe中选取固定id的行数据并存入txt文件
猜你喜欢
转载自blog.csdn.net/c2250645962/article/details/103253996
今日推荐
周排行