# encoding: utf-8 import argparse import os import sys import time import re from glob import glob import random import numpy as np from shutil import copy from scipy import misc import shutil import send2trash def save_classes_index(classes, txt_file): with open(txt_file, 'w',encoding='UTF-8') as f: for i in range(len(classes)): f.write('%s %s %s\n' % (classes[i].index, classes[i].name, len(classes[i].image_paths))) class ImageClass(): "Stores the paths to images for a given class" def __init__(self, index, name, image_paths): self.index = index self.name = name self.image_paths = image_paths def __str__(self): return self.index + ', ' + self.name + ', ' + str(len(self.image_paths)) + ' images' def __len__(self): return len(self.image_paths) def context_clean(context): context.strip('\n') context = context.replace('\n','') context = context.replace(' ', '') for label_name in unique_labels_list: documentpaths = [] for labels_path in labels_path_list: x = labels_path.split("/")[-1] if (labels_path.split("/")[-1] == label_name): # if "/"+label_name in labels_path: documentpaths.append(labels_path) documentpathclass.append(DocumentPathClass(label_name, documentpaths)) return context def save_flie(old_dir,new_dir): os.rename(old_dir, new_dir) def index_image(dir,text_name): print("a") def naive_match(s,p): m=len(s) n=len(p) for i in range(m-n+1): if s[i:i+n]==p: return 1 return 0 def key_confirm(j,key,sentence,file_dir,dirs4,file_dir1,dirs3,dirs2,dirs1,datasetpath): save_number=0 for i in sentence.readlines(): lines = i.replace(' ','') a=key j=j+naive_match(lines,a) # print("--------------------------------") if j==0: #print('%s---->>>>>这个文件不是%s'%(file_dir,key)) #if key== '提押票': #f.close() #print('-------------') #print('-------------') #print(file_dir) #os.remove(file_dir) #flag_name = dirs4.split(".txt")[0] #flag_image=flag_name+'.jpg' #Newdir2=file_dir1+'\\'+flag_image #os.remove(Newdir2) return 0 else: save_number=save_number+1 #print("这个文件是"+key) #print(key) #print(file_dir) dirss="E:\\工作\\清洗的干净数据" Newdir1 = dirss+ '\\'+key+'\\'+dirs3 #+'\\'+dirs4 isExists=os.path.exists(Newdir1) #print(isExists) if not isExists: os.makedirs(Newdir1) Newdir11 = Newdir1+'\\'+dirs4 Olddir=file_dir shutil.copy(Olddir, Newdir11) flag_name = dirs4.split(".txt")[0] flag_image=flag_name+'.jpg' Newdir2=file_dir1+'\\'+flag_image Newdir22 = Newdir1+'\\'+flag_image #isExists1=os.path.exists(Newdir1) #print(isExists1) shutil.copy(Newdir2, Newdir22) return 1 if __name__ == '__main__': number=0 number1=0 number11=0 number22=0 j=0 key_list=['狗','猫’','老叔'] start_time = time.time() datasetpath = "E:\\文件路径" for dirs1 in os.listdir(datasetpath): for dirs2 in os.listdir(datasetpath + "\\" +dirs1): for dirs3 in os.listdir(datasetpath + "\\" +dirs1+ "\\" +dirs2): for dirs4 in os.listdir(datasetpath + "\\" +dirs1+ "\\" +dirs2+ "\\" +dirs3): if ('C.txt' in dirs4) or ('CA.txt' in dirs4): file_dir=datasetpath + "\\" +dirs1+ "\\" +dirs2+ "\\" +dirs3+"\\" +dirs4 file_dir1=datasetpath + "\\" +dirs1+ "\\" +dirs2+ "\\" +dirs3 number=number+1 #key="老鼠" #sentence=f #key_confirm(key,sentence,dirs3,dirs4) for key in key_list: with open(file_dir, 'r', encoding='UTF-8') as f: j=0 a=key_confirm(j,key,f,file_dir,dirs4,file_dir1,dirs3,dirs2,dirs1,datasetpath) number1=a+number1 if number1 ==0: number11=number11+1 dirss="E:\\我最帅了人见人爱" uncongize_class = dirss+ '\\'+dirs3 isExists=os.path.exists(uncongize_class) #print(isExists) if not isExists: os.makedirs(uncongize_class) Newdir11 = uncongize_class+'\\'+dirs4 Olddir=file_dir shutil.copy(Olddir, Newdir11) flag_name = dirs4.split(".txt")[0] flag_image=flag_name+'.jpg' Newdir2=file_dir1+'\\'+flag_image Newdir22 = uncongize_class+'\\'+flag_image shutil.copy(Newdir2, Newdir22) print('%s---->>>>>这个文件是一个不知道的类别'%(file_dir)) else: number22=number22+1 print(">>>>>>>>>>>>>>>>>>>>>>识别成功") number1=0 print("total_number--->%d"%number) print("regoncize_number--->%d"%number22) print("unregoncize_number--->%d"%number11)
基于文本模板匹配的数据清洗
猜你喜欢
转载自blog.csdn.net/zhouguangfei0717/article/details/80534079
今日推荐
周排行