哪家手表网站,html网页制作个人主页制作教程,北京建设信源网站 怎么打不开,亚马逊雨林面积有多大本篇文章将介绍一个新的改进机制——WTConv#xff08;小波卷积#xff09;#xff0c;并阐述如何将其应用于YOLOv11中#xff0c;显著提升模型性能。YOLOv11模型相比较于前几个模型在检测精度和速度上有显著提升#xff0c;但其仍然受卷积核感受野大小的限制。因此#… 本篇文章将介绍一个新的改进机制——WTConv小波卷积并阐述如何将其应用于YOLOv11中显著提升模型性能。YOLOv11模型相比较于前几个模型在检测精度和速度上有显著提升但其仍然受卷积核感受野大小的限制。因此我们引入了小波卷积模块旨在扩大卷积的感受野并有效捕捉图像中的低频信息。其对多尺度问题和小目标问题上有很好的效果。 首先我们将解析WTConv的工作原理它通过小波变换将输入图像分解为不同的频率成分并在每个频率层上进行小尺寸卷积最后通过逆小波变换将结果重新组合从而实现对图像的多尺度分析。随后我们会详细说明如何将该模块与YOLOv11相结合展示代码实现细节及其使用方法最终展现这一改进对目标检测效果的积极影响。
1. Wavelet Convolutions(WTConv)结构介绍 1. 首先WTConv利用二维Haar小波变换对输入图像进行多级分解。Haar小波变换使用四个滤波器将图像分解为四个子带低频分量 (LL)捕捉图像的低频信息如整体形状或轮廓。水平高频分量 (LH)捕捉图像中的水平边缘信息。垂直高频分量 (HL)捕捉图像中的垂直边缘信息。对角线高频分量 (HH)捕捉图像的对角线细节。在每一级的小波变换中图像被下采样空间分辨率减半但频率信息得到了更细的分解。递归地执行小波变换称为多级分解可以得到不同尺度下的频率分量。 2. 在WTConv中卷积并非直接在原始输入图像上进行而是分别在每个频率子带上应用小尺寸的深度卷积核。通常使用3x3或5x5的小卷积核对分解后的各个子带进行卷积操作。由于小波变换降低了每个子带的空间分辨率小尺寸的卷积核可以覆盖更大的原始图像区域即感受野增大。 低频子带 (LL) 主要包含了图像的大尺度信息因此在其上应用卷积有助于捕捉全局的特征。 高频子带 (LH, HL, HH) 则包含局部边缘和细节信息卷积操作可以捕捉这些细节。 3. 在完成卷积后使用逆小波变换Inverse Wavelet Transform, IWT将各个子带的卷积结果重新合成为一个完整的输出。这一过程类似于将不同频率层次的特征融合在一起。值得注意的是IWT操作是线性的因此可以无损地将卷积结果重构到原始空间。 2. YOLOv11与WTConv的结合
1. 改进C3k2本文使用WTConv卷积改进C3k2构建C3k2_WT模块然后使用C3k2_WT替换原有的C3k2这样就可以利用WTConv扩大模型的感受野。
2. 在head中使用DSConvWithWT卷积本文使用WTConv卷积构建DSConvWithWT深度可分离卷积然后将它替换head中的深度可分离卷积扩大head层的感受野。
3. Wavelet Convolutions(WTConv)代码部分
import pywt
import pywt.data
import torch
from torch import nn
from functools import partial
import torch.nn.functional as Ffrom .conv import Conv
from .block import C2f, C3, Bottleneckdef create_wavelet_filter(wave, in_size, out_size, typetorch.float):w pywt.Wavelet(wave)dec_hi torch.tensor(w.dec_hi[::-1], dtypetype)dec_lo torch.tensor(w.dec_lo[::-1], dtypetype)dec_filters torch.stack([dec_lo.unsqueeze(0) * dec_lo.unsqueeze(1),dec_lo.unsqueeze(0) * dec_hi.unsqueeze(1),dec_hi.unsqueeze(0) * dec_lo.unsqueeze(1),dec_hi.unsqueeze(0) * dec_hi.unsqueeze(1)], dim0)dec_filters dec_filters[:, None].repeat(in_size, 1, 1, 1)rec_hi torch.tensor(w.rec_hi[::-1], dtypetype).flip(dims[0])rec_lo torch.tensor(w.rec_lo[::-1], dtypetype).flip(dims[0])rec_filters torch.stack([rec_lo.unsqueeze(0) * rec_lo.unsqueeze(1),rec_lo.unsqueeze(0) * rec_hi.unsqueeze(1),rec_hi.unsqueeze(0) * rec_lo.unsqueeze(1),rec_hi.unsqueeze(0) * rec_hi.unsqueeze(1)], dim0)rec_filters rec_filters[:, None].repeat(out_size, 1, 1, 1)return dec_filters, rec_filtersdef wavelet_transform(x, filters):b, c, h, w x.shapepad (filters.shape[2] // 2 - 1, filters.shape[3] // 2 - 1)x F.conv2d(x, filters, stride2, groupsc, paddingpad)x x.reshape(b, c, 4, h // 2, w // 2)return xdef inverse_wavelet_transform(x, filters):b, c, _, h_half, w_half x.shapepad (filters.shape[2] // 2 - 1, filters.shape[3] // 2 - 1)x x.reshape(b, c * 4, h_half, w_half)x F.conv_transpose2d(x, filters, stride2, groupsc, paddingpad)return x# Wavelet Transform Conv(WTConv2d)
class WTConv2d(nn.Module):def __init__(self, in_channels, out_channels, kernel_size5, stride1, biasTrue, wt_levels1, wt_typedb1):super(WTConv2d, self).__init__()assert in_channels out_channelsself.in_channels in_channelsself.wt_levels wt_levelsself.stride strideself.dilation 1self.wt_filter, self.iwt_filter create_wavelet_filter(wt_type, in_channels, in_channels, torch.float)self.wt_filter nn.Parameter(self.wt_filter, requires_gradFalse)self.iwt_filter nn.Parameter(self.iwt_filter, requires_gradFalse)self.wt_function partial(wavelet_transform, filtersself.wt_filter)self.iwt_function partial(inverse_wavelet_transform, filtersself.iwt_filter)self.base_conv nn.Conv2d(in_channels, in_channels, kernel_size, paddingsame, stride1, dilation1,groupsin_channels, biasbias)self.base_scale _ScaleModule([1, in_channels, 1, 1])self.wavelet_convs nn.ModuleList([nn.Conv2d(in_channels * 4, in_channels * 4, kernel_size, paddingsame, stride1, dilation1,groupsin_channels * 4, biasFalse) for _ in range(self.wt_levels)])self.wavelet_scale nn.ModuleList([_ScaleModule([1, in_channels * 4, 1, 1], init_scale0.1) for _ in range(self.wt_levels)])if self.stride 1:self.stride_filter nn.Parameter(torch.ones(in_channels, 1, 1, 1), requires_gradFalse)self.do_stride lambda x_in: F.conv2d(x_in, self.stride_filter, biasNone, strideself.stride,groupsin_channels)else:self.do_stride Nonedef forward(self, x):x_ll_in_levels []x_h_in_levels []shapes_in_levels []curr_x_ll xfor i in range(self.wt_levels):curr_shape curr_x_ll.shapeshapes_in_levels.append(curr_shape)if (curr_shape[2] % 2 0) or (curr_shape[3] % 2 0):curr_pads (0, curr_shape[3] % 2, 0, curr_shape[2] % 2)curr_x_ll F.pad(curr_x_ll, curr_pads)curr_x self.wt_function(curr_x_ll)curr_x_ll curr_x[:, :, 0, :, :]shape_x curr_x.shapecurr_x_tag curr_x.reshape(shape_x[0], shape_x[1] * 4, shape_x[3], shape_x[4])curr_x_tag self.wavelet_scale[i](self.wavelet_convs[i](curr_x_tag))curr_x_tag curr_x_tag.reshape(shape_x)x_ll_in_levels.append(curr_x_tag[:, :, 0, :, :])x_h_in_levels.append(curr_x_tag[:, :, 1:4, :, :])next_x_ll 0for i in range(self.wt_levels - 1, -1, -1):curr_x_ll x_ll_in_levels.pop()curr_x_h x_h_in_levels.pop()curr_shape shapes_in_levels.pop()curr_x_ll curr_x_ll next_x_llcurr_x torch.cat([curr_x_ll.unsqueeze(2), curr_x_h], dim2)next_x_ll self.iwt_function(curr_x)next_x_ll next_x_ll[:, :, :curr_shape[2], :curr_shape[3]]x_tag next_x_llassert len(x_ll_in_levels) 0x self.base_scale(self.base_conv(x))x x x_tagif self.do_stride is not None:x self.do_stride(x)return xclass _ScaleModule(nn.Module):def __init__(self, dims, init_scale1.0, init_bias0):super(_ScaleModule, self).__init__()self.dims dimsself.weight nn.Parameter(torch.ones(*dims) * init_scale)self.bias Nonedef forward(self, x):return torch.mul(self.weight, x)class DSConvWithWT(nn.Module):def __init__(self, in_channels, out_channels, kernel_size3):super(DSConvWithWT, self).__init__()# 深度卷积使用 WTConv2d 替换 3x3 卷积self.depthwise WTConv2d(in_channels, in_channels, kernel_sizekernel_size)# 逐点卷积使用 1x1 卷积self.pointwise nn.Conv2d(in_channels, out_channels, kernel_size1, stride1, padding0, biasFalse)def forward(self, x):x self.depthwise(x)x self.pointwise(x)return xclass Bottleneck_WT(nn.Module):Standard bottleneck.def __init__(self, c1, c2, shortcutTrue, g1, k(3, 3), e0.5):Initializes a standard bottleneck module with optional shortcut connection and configurable parameters.super().__init__()c_ int(c2 * e) # hidden channelsself.cv1 Conv(c1, c_, k[0], 1)self.cv2 WTConv2d(c_, c2)self.add shortcut and c1 c2def forward(self, x):Applies the YOLO FPN to input data.return x self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))class C3k_WT(C3):C3k is a CSP bottleneck module with customizable kernel sizes for feature extraction in neural networks.def __init__(self, c1, c2, n1, shortcutTrue, g1, e0.5, k3):Initializes the C3k module with specified channels, number of layers, and configurations.super().__init__(c1, c2, n, shortcut, g, e)c_ int(c2 * e) # hidden channels# self.m nn.Sequential(*(RepBottleneck(c_, c_, shortcut, g, k(k, k), e1.0) for _ in range(n)))self.m nn.Sequential(*(Bottleneck_WT(c_, c_, shortcut, g, k(k, k), e1.0) for _ in range(n)))# 在c3kTrue时使用Bottleneck_WT特征融合为false的时候我们使用普通的Bottleneck提取特征
class C3k2_WT(C2f):Faster Implementation of CSP Bottleneck with 2 convolutions.def __init__(self, c1, c2, n1, c3kFalse, e0.5, g1, shortcutTrue):Initializes the C3k2 module, a faster CSP Bottleneck with 2 convolutions and optional C3k blocks.super().__init__(c1, c2, n, shortcut, g, e)self.m nn.ModuleList(C3k_WT(self.c, self.c, 2, shortcut, g) if c3k else Bottleneck(self.c, self.c, shortcut, g) for _ in range(n))if __name__ __main__:DW DSConvWithWT(256, 128)#创建一个输入张量batch_size 8input_tensortorch.randn(batch_size, 256, 64, 64 )#运行模型并打印输入和输出的形状output_tensor DW(input_tensor)print(Input shape:,input_tensor.shape)print(0utput shape:,output_tensor.shape)4. 将WTConv引入到YOLOv11中
第一: 将下面的核心代码复制到D:\bilibili\model\YOLO11\ultralytics-main\ultralytics\nn路径下如下图所示。 第二在task.py中导入WTConv包 第三在task.py中的模型配置部分下面代码
第一个改进需修改的地方 第二个改进需修改的地方 将DWConv改成DSConvWithWT 第四将模型配置文件复制到YOLOV11.YAMY文件中
第一个修改的配置文件
# Ultralytics YOLO , AGPL-3.0 license
# YOLO11 object detection model with P3-P5 outputs. For Usage examples see https://docs.ultralytics.com/tasks/detect# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. modelyolo11n.yaml will call yolo11.yaml with scale n# [depth, width, max_channels]n: [0.50, 0.25, 1024] # summary: 319 layers, 2624080 parameters, 2624064 gradients, 6.6 GFLOPss: [0.50, 0.50, 1024] # summary: 319 layers, 9458752 parameters, 9458736 gradients, 21.7 GFLOPsm: [0.50, 1.00, 512] # summary: 409 layers, 20114688 parameters, 20114672 gradients, 68.5 GFLOPsl: [1.00, 1.00, 512] # summary: 631 layers, 25372160 parameters, 25372144 gradients, 87.6 GFLOPsx: [1.00, 1.50, 512] # summary: 631 layers, 56966176 parameters, 56966160 gradients, 196.0 GFLOPs# YOLO11n backbone
backbone:# [from, repeats, module, args]- [-1, 1, Conv, [64, 3, 2]] # 0-P1/2- [-1, 1, Conv, [128, 3, 2]] # 1-P2/4- [-1, 2, C3k2_WT, [256, False, 0.25]]- [-1, 1, Conv, [256, 3, 2]] # 3-P3/8- [-1, 2, C3k2_WT, [512, False, 0.25]]- [-1, 1, Conv, [512, 3, 2]] # 5-P4/16- [-1, 2, C3k2_WT, [512, True]]- [-1, 1, Conv, [1024, 3, 2]] # 7-P5/32- [-1, 2, C3k2_WT, [1024, True]]- [-1, 1, SPPF, [1024, 5]] # 9- [-1, 2, C2PSA, [1024]] # 10# YOLO11n head
head:- [-1, 1, nn.Upsample, [None, 2, nearest]]- [[-1, 6], 1, Concat, [1]] # cat backbone P4- [-1, 2, C3k2, [512, False]] # 13- [-1, 1, nn.Upsample, [None, 2, nearest]]- [[-1, 4], 1, Concat, [1]] # cat backbone P3- [-1, 2, C3k2, [256, False]] # 16 (P3/8-small)- [-1, 1, Conv, [256, 3, 2]]- [[-1, 13], 1, Concat, [1]] # cat head P4- [-1, 2, C3k2, [512, False]] # 19 (P4/16-medium)- [-1, 1, Conv, [512, 3, 2]]- [[-1, 10], 1, Concat, [1]] # cat head P5- [-1, 2, C3k2, [1024, True]] # 22 (P5/32-large)- [[16, 19, 22], 1, Detect, [nc]] # Detect(P3, P4, P5)第二个修改的配置文件
第二个不需要修改配置文件
第五运行成功 from ultralytics.models import NAS, RTDETR, SAM, YOLO, FastSAM, YOLOWorldif __name____main__:# 使用自己的YOLOv11.yamy文件搭建模型并加载预训练权重训练模型model YOLO(rD:\bilibili\model\YOLO11\ultralytics-main\ultralytics\cfg\models\11\yolo11_WTConv.yaml)\.load(rD:\bilibili\model\YOLO11\ultralytics-main\yolo11n.pt) # build from YAML and transfer weightsresults model.train(datarD:\bilibili\model\ultralytics-main\ultralytics\cfg\datasets\VOC_my.yaml,epochs100, imgsz640, batch8)