当前位置: 首页 > news >正文

北京建设商业网站海南行指三亚网站开发

北京建设商业网站,海南行指三亚网站开发,建行个人帐户余额查询,网站开发结课大作业文章目录 自注意力Transformer块编码器解码器块解码器整个Transformer参考来源全部代码#xff08;可直接运行#xff09; 自注意力 计算公式 代码实现 class SelfAttention(nn.Module):def __init__(self, embed_size, heads):super(SelfAttention, self).__init__()self.e… 文章目录 自注意力Transformer块编码器解码器块解码器整个Transformer参考来源全部代码可直接运行 自注意力 计算公式 代码实现 class SelfAttention(nn.Module):def __init__(self, embed_size, heads):super(SelfAttention, self).__init__()self.embed_size embed_sizeself.heads headsself.head_dim embed_size // headsassert (self.head_dim * heads embed_size), Embed size needs to be div by headsself.values nn.Linear(self.head_dim, self.head_dim, biasFalse)self.keys nn.Linear(self.head_dim, self.head_dim, biasFalse)self.queries nn.Linear(self.head_dim, self.head_dim, biasFalse)self.fc_out nn.Linear(heads*self.head_dim, embed_size)def forward(self, values, keys, query, mask):N query.shape[0] # the number of training examplesvalue_len, key_len, query_len values.shape[1], keys.shape[1], query.shape[1]# Split embedding into self.heads piecesvalues values.reshape(N, value_len, self.heads, self.head_dim)keys keys.reshape(N, key_len, self.heads, self.head_dim)queries query.reshape(N, query_len, self.heads, self.head_dim)values self.values(values)keys self.keys(keys)queries self.queries(queries)energy torch.einsum(nqhd,nkhd-nhqk, [queries, keys]) # 矩阵乘法使用爱因斯坦标记法# queries shape: (N, query_len, heads, heads_dim)# keys shape: (N, key_len, heads, heads_dim)# energy shape: (N, heads, query_len, key_len)if mask is not None:energy energy.masked_fill(mask0, float(-1e20)) #Fills elements of self tensor with value where mask is Trueattention torch.softmax(energy / (self.embed_size ** (1/2)), dim3)out torch.einsum(nhql, nlhd-nqhd, [attention, values]).reshape(N, query_len, self.heads*self.head_dim) # 矩阵乘法使用爱因斯坦标记法einsum# attention shape: (N, heads, query_len, key_len)# values shape: (N, value_len, heads, head_dim)# after einsum (N, query_len, heads, head_dim) then flatten last two dimensionsout self.fc_out(out)return outTransformer块 我们把Transfomer块定义为如下图所示的结构这个Transformer块在编码器和解码器中都有出现过。 代码实现 class TransformerBlock(nn.Module):def __init__(self, embed_size, heads, dropout, forward_expansion):super(TransformerBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm1 nn.LayerNorm(embed_size)self.norm2 nn.LayerNorm(embed_size)self.feed_forward nn.Sequential(nn.Linear(embed_size, forward_expansion*embed_size),nn.ReLU(),nn.Linear(forward_expansion*embed_size, embed_size))self.dropout nn.Dropout(dropout)def forward(self, value, key, query, mask):attention self.attention(value, key, query, mask)x self.dropout(self.norm1(attention query))forward self.feed_forward(x)out self.dropout(self.norm2(forward x))return out编码器 编码器结构如下所示Inputs经过Input Embedding 和Positional Encoding之后通过多个Transformer块 代码实现 class Encoder(nn.Module):def __init__(self, src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length):super(Encoder, self).__init__()self.embed_size embed_sizeself.device deviceself.word_embedding nn.Embedding(src_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([TransformerBlock(embed_size,heads,dropoutdropout,forward_expansionforward_expansion)for _ in range(num_layers)])self.dropout nn.Dropout(dropout)def forward(self, x, mask):N, seq_lengh x.shapepositions torch.arange(0, seq_lengh).expand(N, seq_lengh).to(self.device)out self.dropout(self.word_embedding(x) self.position_embedding(positions))for layer in self.layers:out layer(out, out, out, mask)return out解码器块 解码器块结构如下图所示 代码实现 class DecoderBlock(nn.Module):def __init__(self, embed_size, heads, forward_expansion, dropout, device):super(DecoderBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm nn.LayerNorm(embed_size)self.transformer_block TransformerBlock(embed_size, heads, dropout, forward_expansion)self.dropout nn.Dropout(dropout)def forward(self, x, value, key, src_mask, trg_mask):attention self.attention(x, x, x, trg_mask)query self.dropout(self.norm(attention x))out self.transformer_block(value, key, query, src_mask)return out解码器 解码器块加上word embedding 和 positional embedding之后构成解码器 代码实现 class Decoder(nn.Module):def __init__(self, trg_vocab_size, embed_size, num_layers, heads, forward_expansion, dropout, device, max_length):super(Decoder, self).__init__()self.device deviceself.word_embedding nn.Embedding(trg_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([DecoderBlock(embed_size, heads, forward_expansion, dropout, device)for _ in range(num_layers)])self.fc_out nn.Linear(embed_size, trg_vocab_size)self.dropout nn.Dropout(dropout)def forward(self, x, enc_out, src_mask, trg_mask):N, seq_length x.shapepositions torch.arange(0, seq_length).expand(N, seq_length).to(self.device)x self.dropout((self.word_embedding(x) self.position_embedding(positions)))for layer in self.layers:x layer(x, enc_out, enc_out, src_mask, trg_mask)out self.fc_out(x)return out整个Transformer 代码实现 class Transformer(nn.Module):def __init__(self,src_vocab_size, trg_vocab_size,src_pad_idx,trg_pad_idx,embed_size256,num_layers6,forward_expansion4,heads8,dropout0,devicecuda,max_length100):super(Transformer, self).__init__()self.encoder Encoder(src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length)self.decoder Decoder(trg_vocab_size,embed_size,num_layers,heads,forward_expansion,dropout,device,max_length)self.src_pad_idx src_pad_idxself.trg_pad_idx trg_pad_idxself.device devicedef make_src_mask(self, src):src_mask (src ! self.src_pad_idx).unsqueeze(1).unsqueeze(2)#(N, 1, 1, src_len)return src_mask.to(self.device)def make_trg_mask(self, trg):N, trg_len trg.shapetrg_mask torch.tril(torch.ones((trg_len, trg_len))).expand(N, 1, trg_len, trg_len)return trg_mask.to(self.device)def forward(self, src, trg):src_mask self.make_src_mask(src)trg_mask self.make_trg_mask(trg)enc_src self.encoder(src, src_mask)out self.decoder(trg, enc_src, src_mask, trg_mask)return out 参考来源 [1] https://www.youtube.com/watch?vU0s0f995w14 [2] https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/more_advanced/transformer_from_scratch/transformer_from_scratch.py [3] https://arxiv.org/abs/1706.03762 [4] https://www.youtube.com/watch?vpkVwUVEHmfI 全部代码可直接运行 import torch import torch.nn as nnclass SelfAttention(nn.Module):def __init__(self, embed_size, heads):super(SelfAttention, self).__init__()self.embed_size embed_sizeself.heads headsself.head_dim embed_size // headsassert (self.head_dim * heads embed_size), Embed size needs to be div by headsself.values nn.Linear(self.head_dim, self.head_dim, biasFalse)self.keys nn.Linear(self.head_dim, self.head_dim, biasFalse)self.queries nn.Linear(self.head_dim, self.head_dim, biasFalse)self.fc_out nn.Linear(heads*self.head_dim, embed_size)def forward(self, values, keys, query, mask):N query.shape[0] # the number of training examplesvalue_len, key_len, query_len values.shape[1], keys.shape[1], query.shape[1]# Split embedding into self.heads piecesvalues values.reshape(N, value_len, self.heads, self.head_dim)keys keys.reshape(N, key_len, self.heads, self.head_dim)queries query.reshape(N, query_len, self.heads, self.head_dim)values self.values(values)keys self.keys(keys)queries self.queries(queries)energy torch.einsum(nqhd,nkhd-nhqk, [queries, keys])# queries shape: (N, query_len, heads, heads_dim)# keys shape: (N, key_len, heads, heads_dim)# energy shape: (N, heads, query_len, key_len)if mask is not None:energy energy.masked_fill(mask0, float(-1e20)) #Fills elements of self tensor with value where mask is Trueattention torch.softmax(energy / (self.embed_size ** (1/2)), dim3)out torch.einsum(nhql, nlhd-nqhd, [attention, values]).reshape(N, query_len, self.heads*self.head_dim)# attention shape: (N, heads, query_len, key_len)# values shape: (N, value_len, heads, head_dim)# after einsum (N, query_len, heads, head_dim) then flatten last two dimensionsout self.fc_out(out)return outclass TransformerBlock(nn.Module):def __init__(self, embed_size, heads, dropout, forward_expansion):super(TransformerBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm1 nn.LayerNorm(embed_size)self.norm2 nn.LayerNorm(embed_size)self.feed_forward nn.Sequential(nn.Linear(embed_size, forward_expansion*embed_size),nn.ReLU(),nn.Linear(forward_expansion*embed_size, embed_size))self.dropout nn.Dropout(dropout)def forward(self, value, key, query, mask):attention self.attention(value, key, query, mask)x self.dropout(self.norm1(attention query))forward self.feed_forward(x)out self.dropout(self.norm2(forward x))return outclass Encoder(nn.Module):def __init__(self, src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length):super(Encoder, self).__init__()self.embed_size embed_sizeself.device deviceself.word_embedding nn.Embedding(src_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([TransformerBlock(embed_size,heads,dropoutdropout,forward_expansionforward_expansion)for _ in range(num_layers)])self.dropout nn.Dropout(dropout)def forward(self, x, mask):N, seq_lengh x.shapepositions torch.arange(0, seq_lengh).expand(N, seq_lengh).to(self.device)out self.dropout(self.word_embedding(x) self.position_embedding(positions))for layer in self.layers:out layer(out, out, out, mask)return outclass DecoderBlock(nn.Module):def __init__(self, embed_size, heads, forward_expansion, dropout, device):super(DecoderBlock, self).__init__()self.attention SelfAttention(embed_size, heads)self.norm nn.LayerNorm(embed_size)self.transformer_block TransformerBlock(embed_size, heads, dropout, forward_expansion)self.dropout nn.Dropout(dropout)def forward(self, x, value, key, src_mask, trg_mask):attention self.attention(x, x, x, trg_mask)query self.dropout(self.norm(attention x))out self.transformer_block(value, key, query, src_mask)return outclass Decoder(nn.Module):def __init__(self, trg_vocab_size, embed_size, num_layers, heads, forward_expansion, dropout, device, max_length):super(Decoder, self).__init__()self.device deviceself.word_embedding nn.Embedding(trg_vocab_size, embed_size)self.position_embedding nn.Embedding(max_length, embed_size)self.layers nn.ModuleList([DecoderBlock(embed_size, heads, forward_expansion, dropout, device)for _ in range(num_layers)])self.fc_out nn.Linear(embed_size, trg_vocab_size)self.dropout nn.Dropout(dropout)def forward(self, x, enc_out, src_mask, trg_mask):N, seq_length x.shapepositions torch.arange(0, seq_length).expand(N, seq_length).to(self.device)x self.dropout((self.word_embedding(x) self.position_embedding(positions)))for layer in self.layers:x layer(x, enc_out, enc_out, src_mask, trg_mask)out self.fc_out(x)return outclass Transformer(nn.Module):def __init__(self,src_vocab_size, trg_vocab_size,src_pad_idx,trg_pad_idx,embed_size256,num_layers6,forward_expansion4,heads8,dropout0,devicecuda,max_length100):super(Transformer, self).__init__()self.encoder Encoder(src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length)self.decoder Decoder(trg_vocab_size,embed_size,num_layers,heads,forward_expansion,dropout,device,max_length)self.src_pad_idx src_pad_idxself.trg_pad_idx trg_pad_idxself.device devicedef make_src_mask(self, src):src_mask (src ! self.src_pad_idx).unsqueeze(1).unsqueeze(2)#(N, 1, 1, src_len)return src_mask.to(self.device)def make_trg_mask(self, trg):N, trg_len trg.shapetrg_mask torch.tril(torch.ones((trg_len, trg_len))).expand(N, 1, trg_len, trg_len)return trg_mask.to(self.device)def forward(self, src, trg):src_mask self.make_src_mask(src)trg_mask self.make_trg_mask(trg)enc_src self.encoder(src, src_mask)out self.decoder(trg, enc_src, src_mask, trg_mask)return outif __name__ __main__:device torch.device(cuda if torch.cuda.is_available() else cpu)print(device)x torch.tensor([[1, 5, 6, 4, 3, 9, 5, 2, 0], [1, 8, 7, 3, 4, 5, 6, 7, 2]]).to(device)trg torch.tensor([[1, 7, 4, 3, 5, 9, 2, 0], [1, 5, 6, 2, 4, 7, 6, 2]]).to(device)src_pad_idx 0trg_pad_idx 0src_vocab_size 10trg_vocab_size 10model Transformer(src_vocab_size, trg_vocab_size, src_pad_idx, trg_pad_idx, devicedevice).to(device)out model(x, trg[:, :-1])print(out.shape)
http://www.dnsts.com.cn/news/32072.html

相关文章:

  • 能进外国网站看视频的浏览器公众号网页如何制作
  • 网站建设大德通wordpress无中断音乐插件
  • 网站建设平台推广对网站提出的优化建议
  • 公司网站怎么做关键词网站建设的关键
  • 常州企业网站免费页面网站制作
  • 万网app下载seo插件wordpress
  • 网站推广策划评估工具7中国建筑网最新招聘
  • 山东临沂网站设计公司做网站是用c 吗
  • 做网站 以图搜货做网站还是做app好
  • 湛江有那些网站制作公司头像定制在线生成器
  • 建筑公司网站首页图片关键词推广seo
  • 上高县城乡规划建设局网站查钓鱼网站
  • 天河做网站系统价格低不是干粉灭火器的优点
  • 企业营销型网站设计在线p图修改文字
  • 忻州推广型网站建设知名网站建设制作
  • 网站错误代码 处理网站建设费用什么意思
  • 建网站多少钱一平方订阅号怎么做免费的视频网站吗
  • 网站建设需求报告广告软文范例
  • 建设厅网站关于建筑资质合并wordpress局域网404
  • 网站地图的好处游戏源码论坛
  • 制作免费网站东莞网站设计建设
  • 云南昌旅游的网站建设wordpress响应案例
  • 怎样给网站做竞价推广云南文山地图
  • 个人博客网站搭建网页制作图片代码
  • 怎么建公司免费网站深圳网站建设售后服务怎样
  • 苏州做网站的公司有哪些wordpress文字添加图片不显示
  • 有专业做网站优化的吗做网站zwnet
  • 网站设置请求桌面网站南阳网站建设的公司
  • 潜江建设网站如何做采集网站
  • 什么网站免费做简历教育培训网站制作