#!/user/bin/env python3
# -*- coding: utf-8 -*-
# @Time : 2022-09-29 15:18
# @Author : Lyt
# @IDE : PyCharm
# @FileName : transformer.py
# @Blog : https://blog.csdn.net/m0_53292725?type=blog
import torch
import torch.nn as nn
from einops import rearrange
class Attention(nn.Module):
def __init__(self, dim, head_dim=64, heads=8, dropout=0.):
super(Attention, self).__init__()
inner_dim = head_dim * heads
self.heads = heads
self.softmax = nn.Softmax(dim=-1)
self.to_qkv = nn.Linear(dim, inner_dim*3)
self.to_out = nn.Sequential(nn.Linear(inner_dim, dim),
nn.Dropout(dropout))
self.scale = head_dim ** -0.5
def forward(self, x):
qkv = self.to_qkv(x).chunk(3, dim=-1)
q, k, v = map(lambda t:rearrange(t, 'b n (h d) -> b h n d', h=self.heads), qkv)
dots = torch.matmul(q, k.transpose(-1, -2))*self.scale
attend = self.softmax(dots)
out = torch.matmul(attend, v)
out = rearrange(out, 'b h n d -> b n (h d)')
return self.to_out(out)
class Norm(nn.Module): # 先 norm 再进norm里的那一层 这里是attention 和 feedforward
def __init__(self, dim, fn):
super(Norm, self).__init__()
self.norm = nn.LayerNorm(dim)
self.fn = fn
def forward(self, x, **kwargs): #
return self.fn(self.norm(x), **kwargs)
class FeedForward(nn.Module):
def __init__(self, dim, hidden_dim, dropout=0.):
super(FeedForward, self).__init__()
self.net = nn.Sequential(
nn.Linear(dim, hidden_dim),
nn.GELU(),
nn.Dropout(dropout),
nn.Linear(hidden_dim, dim),
nn.Dropout(dropout)
)
def forward(self, x):
return self.net(x)
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
super(Transformer, self).__init__()
self.layers = nn.ModuleList([])
for _ in range(depth):
self.layers.append(nn.ModuleList([
Norm(dim, Attention(dim, dim_head=dim_head, heads=heads, dropout=dropout)),
Norm(dim, FeedForward(dim, mlp_dim, dropout=dropout))
]))
def forward(self, x):
for att, ff in self.layers:
x = att(x) + x
x = ff(x) + x
return x
手写transformer
猜你喜欢
转载自blog.csdn.net/m0_53292725/article/details/127112621
今日推荐
周排行