做海报创意网站,电子商务网站dw建设实验报告,教做鞋的网站,信息技术 网站建设教案1.1 在哪里缝
测试文件#xff1f;#xff08;#xff09;
训练文件#xff1f;#xff08;#xff09;
模型文件#xff1f;#xff08;√#xff09;
1.2 骨干网络与模块缝合
以Vision Transformer为例#xff0c;模型文件里有很多类#xff0c;我们只在最后…1.1 在哪里缝
测试文件×
训练文件×
模型文件√
1.2 骨干网络与模块缝合
以Vision Transformer为例模型文件里有很多类我们只在最后集大成的那个类里添加模块。 之后后我们准备好我们要缝合的模块比如SE Net模块我们先建立一个测试文件测试能否跑通
import numpy as np
import torch
from torch import nn
from torch.nn import initclass SEAttention(nn.Module):# 初始化SE模块channel为通道数reduction为降维比率def __init__(self, channel512, reduction16):super().__init__()self.avg_pool nn.AdaptiveAvgPool2d(1) # 自适应平均池化层将特征图的空间维度压缩为1x1self.fc nn.Sequential( # 定义两个全连接层作为激励操作通过降维和升维调整通道重要性nn.Linear(channel, channel // reduction, biasFalse), # 降维减少参数数量和计算量nn.ReLU(inplaceTrue), # ReLU激活函数引入非线性nn.Linear(channel // reduction, channel, biasFalse), # 升维恢复到原始通道数nn.Sigmoid() # Sigmoid激活函数输出每个通道的重要性系数)# 权重初始化方法def init_weights(self):for m in self.modules(): # 遍历模块中的所有子模块if isinstance(m, nn.Conv2d): # 对于卷积层init.kaiming_normal_(m.weight, modefan_out) # 使用Kaiming初始化方法初始化权重if m.bias is not None:init.constant_(m.bias, 0) # 如果有偏置项则初始化为0elif isinstance(m, nn.BatchNorm2d): # 对于批归一化层init.constant_(m.weight, 1) # 权重初始化为1init.constant_(m.bias, 0) # 偏置初始化为0elif isinstance(m, nn.Linear): # 对于全连接层init.normal_(m.weight, std0.001) # 权重使用正态分布初始化if m.bias is not None:init.constant_(m.bias, 0) # 偏置初始化为0# 前向传播方法def forward(self, x):b, c, _, _ x.size() # 获取输入x的批量大小b和通道数cy self.avg_pool(x).view(b, c) # 通过自适应平均池化层后调整形状以匹配全连接层的输入y self.fc(y).view(b, c, 1, 1) # 通过全连接层计算通道重要性调整形状以匹配原始特征图的形状return x * y.expand_as(x) # 将通道重要性系数应用到原始特征图上进行特征重新校准# 示例使用
if __name__ __main__:input torch.randn(50, 512, 7, 7) # 随机生成一个输入特征图se SEAttention(channel512, reduction8) # 实例化SE模块设置降维比率为8output se(input) # 将输入特征图通过SE模块进行处理print(output.shape) # 打印处理后的特征图形状验证SE模块的作用 打印处理后的形状我们这里要注意缝合模块时只需要注意第一维也就是这个channel要和骨干网络保持一致只要你把输入输出的通道数对齐那么这个通道数就可以缝合成功。
把模块复制进骨干网络中 然后进行缝合在缝合之前要先测试通道是否匹配不然肯定报错。
如何验证通道数
我们找到骨干网络前向传播的部分在你想加入这个模块地方print(x.shape)即可。运行训练文件
放在最前面 通道数为38为batch size。
将模块添加进骨干网络
在骨干网络的init函数下添加ctrlp可查看参数通道数与之前查的对齐。 在前向传播中添加 看看是否正常运行 正常运行说明模块缝合成功
打印缝合后的模型结构
该操作在模型文件中进行。 VisionTransformer( (patch_embed): PatchEmbed( (proj): Conv2d(3, 768, kernel_size(16, 16), stride(16, 16)) (norm): Identity() ) (pos_drop): Dropout(p0.0, inplaceFalse) (blocks): Sequential( (0): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) (1): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) (2): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) (3): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) (4): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) (5): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) (6): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) (7): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) (8): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) (9): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) (10): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) (11): Block( (norm1): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (attn): Attention( (qkv): Linear(in_features768, out_features2304, biasTrue) (attn_drop): Dropout(p0.0, inplaceFalse) (proj): Linear(in_features768, out_features768, biasTrue) (proj_drop): Dropout(p0.0, inplaceFalse) ) (drop_path): Identity() (norm2): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (mlp): Mlp( (fc1): Linear(in_features768, out_features3072, biasTrue) (act): GELU() (fc2): Linear(in_features3072, out_features768, biasTrue) (drop): Dropout(p0.0, inplaceFalse) ) ) ) (norm): LayerNorm((768,), eps1e-06, elementwise_affineTrue) (pre_logits): Sequential( (fc): Linear(in_features768, out_features768, biasTrue) (act): Tanh() ) (head): Linear(in_features768, out_features21843, biasTrue) (se): SEAttention( (avg_pool): AdaptiveAvgPool2d(output_size1) (fc): Sequential( (0): Linear(in_features3, out_features0, biasFalse) (1): ReLU(inplaceTrue) (2): Linear(in_features0, out_features3, biasFalse) (3): Sigmoid() ) ) ) 我们可以看到多了一个SEAttention,说明模块缝合进去了 1.3 模块之间缝合
以SENet和ECA模块为例。 串联模块
方式1
同1.2。照猫画虎。注意通道数保持一致 打印模型结构 ECAAttention( (gap): AdaptiveAvgPool2d(output_size1) (conv): Conv1d(1, 1, kernel_size(3,), stride(1,), padding(1,)) (sigmoid): Sigmoid() (se): SEAttention( (avg_pool): AdaptiveAvgPool2d(output_size1) (fc): Sequential( (0): Linear(in_features64, out_features4, biasFalse) (1): ReLU(inplaceTrue) (2): Linear(in_features4, out_features64, biasFalse) (3): Sigmoid() ))) 方式2
我们定义一个串联函数将模块之间串联起来 实例化查看一下模型结构 输出结果 torch.Size([1, 63, 64, 64]) torch.Size([1, 63, 64, 64]) Cascade( (se): SEAttention( (avg_pool): AdaptiveAvgPool2d(output_size1) (fc): Sequential( (0): Linear(in_features63, out_features3, biasFalse) (1): ReLU(inplaceTrue) (2): Linear(in_features3, out_features63, biasFalse) (3): Sigmoid() ) ) (eca): ECAAttention( (gap): AdaptiveAvgPool2d(output_size1) (conv): Conv1d(1, 1, kernel_size(63,), stride(1,), padding(31,)) (sigmoid): Sigmoid() ) ) 并联模块
对于并联模块方法有很多种两个两个模块输出的张量可以
1逐元素相加2逐元素相乘3concat拼接4等等 输出结果 torch.Size([1, 63, 64, 64]) torch.Size([1, 126, 64, 64]) Cascade( (se): SEAttention( (avg_pool): AdaptiveAvgPool2d(output_size1) (fc): Sequential( (0): Linear(in_features63, out_features3, biasFalse) (1): ReLU(inplaceTrue) (2): Linear(in_features3, out_features63, biasFalse) (3): Sigmoid() ) ) (eca): ECAAttention( (gap): AdaptiveAvgPool2d(output_size1) (conv): Conv1d(1, 1, kernel_size(63,), stride(1,), padding(31,)) (sigmoid): Sigmoid() ) ) 1.4 思考
我们不要拘泥于只串联获并联可以将二者结合多个模块中部分模块并联后又与其他模块串联等等。。这种排列组合之后总会有一个你想要的模型