EvoNorms

evolving normalization activation layers

本篇文章则通过将正则化层和激活层公式化为一个单独的构建模块,来研究他们的协同设计

同时,本文还进行了具有超大batch规格的学习动态过程的可视化。在训练设置相同的情况下,相比BN-RELU和ResNet-50,虽然训练损失较大,但是,EvoNorm-B0展现了更好的泛化性能。在其他所有的情况下,EvoNorm同时实现了优化性能和泛化性能的提升。

代码:

https://github.com/lonePatient/EvoNorms_PyTorch/blob/master/models/normalization.py

import torch
import torch.nn as nn
from torch.nn import init
from torch.nn.parameter import Parameter

def instance_std(x, eps=1e-5):
    N,C,H,W = x.size()
    x1 = x.reshape(N*C,-1)
    var = x1.var(dim=-1, keepdim=True)+eps
    return var.sqrt().reshape(N,C,1,1)

def group_std(x, groups, eps = 1e-5):
    N, C, H, W = x.size()
    x1 = x.reshape(N,groups,-1)
    var = (x1.var(dim=-1, keepdim = True)+eps).reshape(N,groups,-1)
    return (x1 / var.sqrt()).reshape(N,C,H,W)


class BatchNorm2dRelu(nn.Module):
    def __init__(self,in_channels):
        super(BatchNorm2dRelu,self).__init__()
        self.layer = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True))
    def forward(self, x):
        output = self.layer(x)
        return output


class EvoNorm2dB0(nn.Module):
    def __init__(self,in_channels,nonlinear=True,momentum=0.9,eps = 1e-5):
        super(EvoNorm2dB0, self).__init__()
        self.nonlinear = nonlinear
        self.momentum = momentum
        self.eps = eps
        self.gamma = Parameter(torch.Tensor(1,in_channels,1,1))
        self.beta = Parameter(torch.Tensor(1,in_channels,1,1))
        if nonlinear:
            self.v = Parameter(torch.Tensor(1,in_channels,1,1))
        self.register_buffer('running_var', torch.ones(1, in_channels, 1, 1))
        self.reset_parameters()

    def reset_parameters(self):
        init.ones_(self.gamma)
        init.zeros_(self.beta)
        if self.nonlinear:
            init.ones_(self.v)

    def forward(self, x):
        N, C, H, W = x.size()
        if self.training:
            x1 = x.permute(1, 0, 2, 3).reshape(C, -1)
            var = x1.var(dim=1).reshape(1, C, 1, 1)
            self.running_var.copy_(self.momentum * self.running_var + (1 - self.momentum) * var)
        else:
            var = self.running_var
        if self.nonlinear:
            den = torch.max((var+self.eps).sqrt(), self.v * x + instance_std(x))
            return x / den * self.gamma + self.beta
        else:
            return x * self.gamma + self.beta


class EvoNorm2dS0(nn.Module):
    def __init__(self,in_channels,groups=8,nonlinear=True):
        super(EvoNorm2dS0, self).__init__()
        self.nonlinear = nonlinear
        self.groups = groups
        self.gamma = Parameter(torch.Tensor(1,in_channels,1,1))
        self.beta = Parameter(torch.Tensor(1,in_channels,1,1))
        if nonlinear:
            self.v = Parameter(torch.Tensor(1,in_channels,1,1))
        self.reset_parameters()

    def reset_parameters(self):
        init.ones_(self.gamma)
        init.zeros_(self.beta)
        if self.nonlinear:
            init.ones_(self.v)
    def forward(self, x):
        if self.nonlinear:
            num = torch.sigmoid(self.v * x)
            std = group_std(x,self.groups)
            return num * std * self.gamma + self.beta
        else:
            return x * self.gamma + self.beta

https://mp.weixin.qq.com/s?__biz=MzA5ODEzMjIyMA==&mid=2247499200&idx=1&sn=180d334794d395311ff161c92a8a68b5&chksm=9094f453a7e37d456787e25e7df306d171c8c62284d75fdc17d92785cac0540819fd1a23a597&mpshare=1&scene=1&srcid=&sharer_sharetime=1588606871565&sharer_shareid=ab5aa3530015c5ae813227bf34b4fc84&key=4cff061b45048e1c8a9baecda499bb04f9b13760c23776b6982420ac0809b9a74b1021fb8acab2e6ecd1f9f9ea4ce8101439b533cbd791f21a72ed9def4eafa5768e1bfb24e80a2aaee4e196164142b3&ascene=1&uin=MjIzODAyMTI0MA%3D%3D&devicetype=Windows+10+x64&version=62090070&lang=zh_CN&exportkey=AQ5v5opZVbQl8D4LLEfMo7Q%3D&pass_ticket=njfgvraIpBCvSbdu7OpIowU%2BfiiR32X%2BSWoeu0bUTfgANnlI3zB4ZwCcuCrWjAU7

原创文章 2935 获赞 1163 访问量 619万+

猜你喜欢

转载自blog.csdn.net/jacke121/article/details/105925923