在进行训练前,我们首先是保证数据的可用性,要进行数据清洗等操作。
为了保证训练模型的准确率,需要进行扩增。
比如:几何转化,颜色改变,通道改变等。
train_path = glob.glob(’…/input/train/*.png’)
train_path.sort()
train_json = json.load(open(’…/input/train.json’))
train_label = [train_json[x][‘label’] for x in train_json]
print(len(train_path), len(train_label))
train_loader = torch.utils.data.DataLoader(
SVHNDataset(train_path, train_label,
transforms.Compose([
transforms.Resize((64, 128)),
transforms.RandomCrop((60, 120)),
transforms.ColorJitter(0.3, 0.3, 0.2),
transforms.RandomRotation(5),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])),
batch_size=40,
shuffle=True,
num_workers=10,
)
val_path = glob.glob(’…/input/val/*.png’)
val_path.sort()
val_json = json.load(open(’…/input/val.json’))
val_label = [val_json[x][‘label’] for x in val_json]
print(len(val_path), len(val_label))
val_loader = torch.utils.data.DataLoader(
SVHNDataset(val_path, val_label,
transforms.Compose([
transforms.Resize((60, 120)),
# transforms.ColorJitter(0.3, 0.3, 0.2),
# transforms.RandomRotation(5),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])),
batch_size=40,
shuffle=False,
num_workers=10,
)