Pytorch自定义dataloader以及在迭代过程中返回image的name

pytorch官方给的加载数据的方式是已经定义好的dataset以及loader,如何加载自己本地的图片以及label?

形如数据格式为

image1 label1

image2 label2

...

imagen labeln

实验中我采用的数据的格式如下,一个图片的名字对应一个label,每一个label是一个9维的向量

1_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.304295635957 0.952577642997 0.0614006041909 0.0938333659301 -0.995587916479 0.126405046864 -0.999368204665 0.0355414055005 0.382030624629 0.0
1_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.271224474168 0.962516121742 0.061399602839 0.128727689658 -0.991679979588 0.126495313272 -0.999999890616 0.000467726796359 0.381981952872 0.0
1_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.237868729379 0.971297311632 0.0614713240576 0.163626102983 -0.986522426721 0.1265439964 -0.999400990041 -0.0346072406472 0.382020891324 0.0
1.1_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.303575822293 0.95280728383 0.0675229548933 0.0939225945957 -0.995579502714 0.138745857429 -0.999376861795 0.0352971402251 0.410670255038 0.1
1.1_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.270745576918 0.962650940154 0.0674654115238 0.128659340525 -0.991688849436 0.138685653232 -0.999999909615 0.000425170029598 0.410739827476 0.1
1.1_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.23757921143 0.971368168253 0.0674866175928 0.16322766122 -0.986588430204 0.138789623782 -0.999406504329 -0.0344476284471 0.410661183171 0.1
1.2_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.305474635089 0.952200213882 0.0736939767933 0.0939968709874 -0.995572492712 0.150981626608 -0.999370773952 0.0354690875311 0.437620875774 0.2
1.2_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.270346113421 0.962763199836 0.073518963401 0.128433455959 -0.991718129002 0.150964425444 -0.999999924062 0.000389711583812 0.437667827367 0.2
1.2_2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.237337349604 0.971427291403 0.0734898449879 0.162895476227 -0.986643331617 0.150931800731 -0.999411541516 -0.0343011761519 0.437608139736 0.2
1.3_-2_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.305514664536 0.952187371137 0.0795990377393 0.0941741911595 -0.995555735115 0.162914965783 -0.999378340534 0.0352552474342 0.462816755558 0.3
1.3_0_pitch_100_yaw_0_lat_29.7553171_lng_-95.3675684.jpg 0.272366931798 0.962193459998 0.0796135882128 0.128398130503 -0.991722703221 0.162940731132 -0.999999935257 0.000359841646368 0.462733965419 0.3

...

源程序如下

import torch
import torch.nn as nn
import math
import os
from PIL import Image
import random
from torchvision import datasets, transforms
import torch.utils.data as data
from torch.autograd import Variable

torch.cuda.set_device(0)
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
kwargs = {'num_workers': 1, 'pin_memory': True}
batch_size = 8
# load the data
def random_choose_data(label_path):
    random.seed(1)
    file = open(label_path)
    lines = file.readlines()
    slice_initial = random.sample(lines, 200000)  # if don't change this ,it will be all the same
    slice = list(set(lines)-set(slice_initial))
    random.shuffle(slice)

    train_label = slice[:150000]
    test_label = slice[150000:200000]
    return train_label, test_label  # output the list and delvery it into ImageFolder


# def my data loader, return the data and corresponding label
def default_loader(path):
    return Image.open(path).convert('RGB')  # operation object is the PIL image object


class myImageFloder(data.Dataset):  # Class inheritance
    def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader):
        # fh = open(label)
        c = 0
        imgs = []
        class_names = ['regression']
        for line in label:  # label is a list
            cls = line.split()  # cls is a list
            fn = cls.pop(0)
            if os.path.isfile(os.path.join(root, fn)):
                imgs.append((fn, tuple([float(v) for v in cls[:len(cls)-1]])))
                # access the last label
                # images is the list,and the content is the tuple, every image corresponds to a label
                # despite the label's dimension
                # we can use the append way to append the element for list
            c = c + 1
        print('the total image is',c)
        print(class_names)
        self.root = root
        self.imgs = imgs
        self.classes = class_names
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader
    def __getitem__(self, index):
        fn, label = self.imgs[index]  # even though the imgs is just a list, it can return the elements of it
        # in a proper way
        img = self.loader(os.path.join(self.root, fn))
        if self.transform is not None:
            img = self.transform(img)
        return img, torch.Tensor(label), fn

    def __len__(self):
        return len(self.imgs)

    def getName(self):
        return self.classes

mytransform = transforms.Compose([transforms.ToTensor()])  # transform [0,255] to [0,1]
test_data_root = "/home/ying/data/google_streetview_train_test1"
data_label = "/home/ying/data/google_streetview_train_test1/label.txt"
# test_label="/home/ying/data/google_streetview_train_test1/label.txt"
train_label, test_label = random_choose_data(data_label)
test_loader = torch.utils.data.DataLoader(
        myImageFloder(root=test_data_root, label=test_label, transform=mytransform),batch_size=batch_size, shuffle=True, **kwargs)


def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                    padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)  # decrease the channel, does't change size
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                              padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=9):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                              bias=False)  # the size become 1/2
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # the size become 1/2
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7)
        # self.fc = nn.Linear(512 * block.expansion, num_classes)
        self.fc = nn.Linear(2048, num_classes)


        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        #  block: object, planes: output channel, blocks: the num of blocks
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion  # the input channel num become 4 times
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


def resnet50(pretrained = True):
    """Constructs a ResNet-50 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 4, 6, 3])
    # model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
    model.load_state_dict(torch.load('./resnet50_20170907_state_dict.pth'))
    return model
cnn = resnet50(pretrained=True)  # the output number is 9
cnn.cuda()
cnn.eval()
criterion = nn.MSELoss().cuda()

for i, (test_images, test_labels, fn) in enumerate(test_loader):  # the first i in index, and the () is the content
    test_images = Variable(test_images.cuda())
    test_labels = Variable(test_labels.cuda())
    outputs = cnn(test_images)
    print(outputs.data[0])
    print(fn)
    loss = criterion(outputs, test_labels)
    print("Iter [%d/%d] Test_Loss: %.4f" % (i + 1, 781, loss.data[0]))

着重看定义dataloader以及返回图像名称的一段代码:

def random_choose_data(label_path):
    random.seed(1)
    file = open(label_path)
    lines = file.readlines()
    slice_initial = random.sample(lines, 200000)  # if don't change this ,it will be all the same
    slice = list(set(lines)-set(slice_initial))
    random.shuffle(slice)

    train_label = slice[:150000]
    test_label = slice[150000:200000]
    return train_label, test_label  # output the list and delvery it into ImageFolder


# def my data loader, return the data and corresponding label
def default_loader(path):
    return Image.open(path).convert('RGB')  # operation object is the PIL image object


class myImageFloder(data.Dataset):  # Class inheritance,继承Dataset类
    def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader):
        # fh = open(label)
        c = 0
        imgs = []
        class_names = ['regression']
        for line in label:  # label is a list
            cls = line.split()  # cls is a list
            fn = cls.pop(0)
            if os.path.isfile(os.path.join(root, fn)):
                imgs.append((fn, tuple([float(v) for v in cls[:len(cls)-1]])))
                # access the last label
                # images is the list,and the content is the tuple, every image corresponds to a label
                # despite the label's dimension
                # we can use the append way to append the element for list
            c = c + 1
        print('the total image is',c)
        print(class_names)
        self.root = root
        self.imgs = imgs
        self.classes = class_names
        self.transform = transform
        self.target_transform = target_transform
        self.loader = loader
    def __getitem__(self, index):
        fn, label = self.imgs[index]  # even though the imgs is just a list, it can return the elements of it
        # in a proper way
        img = self.loader(os.path.join(self.root, fn))
        if self.transform is not None:
            img = self.transform(img)
        return img, torch.Tensor(label), fn  # 在这里返回图像数据以及对应的label以及对应的名称

    def __len__(self):
        return len(self.imgs)

    def getName(self):
        return self.classes

实际上是继承Dataset这个类中的两个函数__getitem____len__,并且返回的变量类型是torch.Tensor即可

看dataloader定义方式以及如何在dataloader中加载数据

mytransform = transforms.Compose([transforms.ToTensor()])  # transform [0,255] to [0,1]
test_data_root = "/home/ying/data/google_streetview_train_test1"
data_label = "/home/ying/data/google_streetview_train_test1/label.txt"
# test_label="/home/ying/data/google_streetview_train_test1/label.txt"
train_label, test_label = random_choose_data(data_label)
test_loader = torch.utils.data.DataLoader(
        myImageFloder(root=test_data_root, label=test_label, transform=mytransform),batch_size=batch_size, shuffle=True, **kwargs)
...
for i, (test_images, test_labels, fn) in enumerate(test_loader):  # the first i in index, and the () is the content
    test_images = Variable(test_images.cuda())
    test_labels = Variable(test_labels.cuda())
    outputs = cnn(test_images)
    print(outputs.data[0])
    print(fn)
    loss = criterion(outputs, test_labels)
    print("Iter [%d/%d] Test_Loss: %.4f" % (i + 1, 781, loss.data[0]))

实际上刚刚在myImageFloder中定义的__getitem__实际上就是i, (test_images, test_labels, fn) in enumerate(test_loader): 中返回的对象, 其中第一个i是与enumberate相关的index

这样就能够在模型test的时候观察哪些数据误差比较大并且进行输出

猜你喜欢

转载自www.linuxidc.com/Linux/2017-09/147183.htm