RT-DETR融合GhostModel V3及相关改进思路
RT-DETR使用教程: RT-DETR使用教程
RT-DETR改进汇总贴:RT-DETR更新汇总贴
《GhostNetV3: Exploring the Training Strategies for Compact Models》
一、 模块介绍
论文链接:https://arxiv.org/pdf/2404.11202v1
代码链接:https://github.com/huawei-noah/Efficient-AI-Backbones/....
论文速览:
小型神经网络专为边缘设备上的应用程序而设计,具有更快的推理速度和适中的性能。然而,目前紧凑模型的训练策略借鉴了传统模型的训练策略,忽略了它们在模型容量上的差异,从而可能阻碍紧凑模型的性能。在本文中,通过系统地研究不同训练成分的影响,我们引入了一种针对紧凑模型的强大训练策略。我们发现,适当的重新参数化和知识蒸馏设计对于训练高性能紧凑模型至关重要,而一些常用的用于训练常规模型的数据增强,如 Mixup 和 CutMix,会导致性能变差。我们在 ImageNet-1K 数据集上的实验表明,我们对紧凑模型的专门训练策略适用于各种架构,包括 GhostNetV2、MobileNetV2 和 ShuffleNetV2。具体来说,配备我们的策略,GhostNetV3 1.3 × 在移动设备上仅以 269M FLOPs和 14.46ms 的延迟实现了 79.1% 的顶级准确率,大大超过了通常训练的同类产品。此外,我们的观察还可以扩展到对象检测场景。
总结:Ghost Net V3。
二、 加入到RT-DETR中
2.1 创建脚本文件
首先在ultralytics->nn路径下创建blocks.py脚本,用于存放模块代码。
2.2 复制代码
复制代码粘到刚刚创建的blocks.py脚本中,如下图所示:
import torch
import torch.nn as nn
import torch.nn.functional as F
import mathfrom typing import Tupledef _make_divisible(v, divisor, min_value=None):"""This function is taken from the original tf repo.It ensures that all layers have a channel number that is divisible by 8It can be seen here:https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py"""if min_value is None:min_value = divisornew_v = max(min_value, int(v + divisor / 2) // divisor * divisor)# Make sure that round down does not go down by more than 10%.if new_v < 0.9 * v:new_v += divisorreturn new_vdef hard_sigmoid(x, inplace: bool = False):if inplace:return x.add_(3.).clamp_(0., 6.).div_(6.)else:return F.relu6(x + 3.) / 6.class SqueezeExcite(nn.Module):def __init__(self, in_chs, se_ratio=0.25, reduced_base_chs=None,act_layer=nn.ReLU, gate_fn=hard_sigmoid, divisor=4, **_):super(SqueezeExcite, self).__init__()self.gate_fn = gate_fnreduced_chs = _make_divisible((reduced_base_chs or in_chs) * se_ratio, divisor)self.avg_pool = nn.AdaptiveAvgPool2d(1)self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True)self.act1 = act_layer(inplace=True)self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True)def forward(self, x):x_se = self.avg_pool(x)x_se = self.conv_reduce(x_se)x_se = self.act1(x_se)x_se = self.conv_expand(x_se)x = x * self.gate_fn(x_se)return xclass ConvBnAct(nn.Module):def __init__(self, in_chs, out_chs, kernel_size,stride=1, act_layer=nn.ReLU):super(ConvBnAct, self).__init__()self.conv = nn.Conv2d(in_chs, out_chs, kernel_size, stride, kernel_size // 2, bias=False)self.bn1 = nn.BatchNorm2d(out_chs)self.act1 = act_layer(inplace=True)def forward(self, x):x = self.conv(x)x = self.bn1(x)x = self.act1(x)return xdef gcd(a, b):if a < b:a, b = b, awhile (a % b != 0):c = a % ba = bb = creturn bdef MyNorm(dim):return nn.GroupNorm(1, dim)class GhostModuleV3(nn.Module):def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True, mode='ori', args=None):super(GhostModuleV3, self).__init__()# self.args=args# mode = 'ori_shortcut_mul_conv15'self.mode = modeself.gate_loc = 'before'self.inter_mode = 'nearest'self.scale = 1.0self.infer_mode = Falseself.num_conv_branches = 3self.dconv_scale = Trueself.gate_fn = nn.Sigmoid()# if args.gate_fn=='hard_sigmoid':# self.gate_fn=hard_sigmoid# elif args.gate_fn=='sigmoid':# self.gate_fn=nn.Sigmoid()# elif args.gate_fn=='relu':# self.gate_fn=nn.ReLU()# elif args.gate_fn=='clip':# self.gate_fn=myclip# elif args.gate_fn=='tanh':# self.gate_fn=nn.Tanh()if self.mode in ['ori']:self.oup = oupinit_channels = math.ceil(oup / ratio)new_channels = init_channels * (ratio - 1)if self.infer_mode:self.primary_conv = nn.Sequential(nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size // 2, bias=False),nn.BatchNorm2d(init_channels),nn.ReLU(inplace=True) if relu else nn.Sequential(),)self.cheap_operation = nn.Sequential(nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size // 2, groups=init_channels, bias=False),nn.BatchNorm2d(new_channels),nn.ReLU(inplace=True) if relu else nn.Sequential(),)else:self.primary_rpr_skip = nn.BatchNorm2d(inp) \if inp == init_channels and stride == 1 else Noneprimary_rpr_conv = list()for _ in range(self.num_conv_branches):primary_rpr_conv.append(self._conv_bn(inp, init_channels, kernel_size, stride, kernel_size // 2, bias=False))self.primary_rpr_conv = nn.ModuleList(primary_rpr_conv)# Re-parameterizable scale branchself.primary_rpr_scale = Noneif kernel_size > 1:self.primary_rpr_scale = self._conv_bn(inp, init_channels, 1, 1, 0, bias=False)self.primary_activation = nn.ReLU(inplace=True) if relu else Noneself.cheap_rpr_skip = nn.BatchNorm2d(init_channels) \if init_channels == new_channels else Nonecheap_rpr_conv = list()for _ in range(self.num_conv_branches):cheap_rpr_conv.append(self._conv_bn(init_channels, new_channels, dw_size, 1, dw_size // 2, groups=init_channels,bias=False))self.cheap_rpr_conv = nn.ModuleList(cheap_rpr_conv)# Re-parameterizable scale branchself.cheap_rpr_scale = Noneif dw_size > 1:self.cheap_rpr_scale = self._conv_bn(init_channels, new_channels, 1, 1, 0, groups=init_channels,bias=False)self.cheap_activation = nn.ReLU(inplace=True) if relu else Noneself.in_channels = init_channelsself.groups = init_channelsself.kernel_size = dw_sizeelif self.mode in ['ori_shortcut_mul_conv15']:self.oup = oupinit_channels = math.ceil(oup / ratio)new_channels = init_channels * (ratio - 1)self.short_conv = nn.Sequential(nn.Conv2d(inp, oup, kernel_size, stride, kernel_size // 2, bias=False),nn.BatchNorm2d(oup),nn.Conv2d(oup, oup, kernel_size=(1, 5), stride=1, padding=(0, 2), groups=oup, bias=False),nn.BatchNorm2d(oup),nn.Conv2d(oup, oup, kernel_size=(5, 1), stride=1, padding=(2, 0), groups=oup, bias=False),nn.BatchNorm2d(oup),)if self.infer_mode:self.primary_conv = nn.Sequential(nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size // 2, bias=False),nn.BatchNorm2d(init_channels),nn.ReLU(inplace=True) if relu else nn.Sequential(),)self.cheap_operation = nn.Sequential(nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size // 2, groups=init_channels, bias=False),nn.BatchNorm2d(new_channels),nn.ReLU(inplace=True) if relu else nn.Sequential(),)else:self.primary_rpr_skip = nn.BatchNorm2d(inp) \if inp == init_channels and stride == 1 else Noneprimary_rpr_conv = list()for _ in range(self.num_conv_branches):primary_rpr_conv.append(self._conv_bn(inp, init_channels, kernel_size, stride, kernel_size // 2, bias=False))self.primary_rpr_conv = nn.ModuleList(primary_rpr_conv)# Re-parameterizable scale branchself.primary_rpr_scale = Noneif kernel_size > 1:self.primary_rpr_scale = self._conv_bn(inp, init_channels, 1, 1, 0, bias=False)self.primary_activation = nn.ReLU(inplace=True) if relu else Noneself.cheap_rpr_skip = nn.BatchNorm2d(init_channels) \if init_channels == new_channels else Nonecheap_rpr_conv = list()for _ in range(self.num_conv_branches):cheap_rpr_conv.append(self._conv_bn(init_channels, new_channels, dw_size, 1, dw_size // 2, groups=init_channels,bias=False))self.cheap_rpr_conv = nn.ModuleList(cheap_rpr_conv)# Re-parameterizable scale branchself.cheap_rpr_scale = Noneif dw_size > 1:self.cheap_rpr_scale = self._conv_bn(init_channels, new_channels, 1, 1, 0, groups=init_channels,bias=False)self.cheap_activation = nn.ReLU(inplace=True) if relu else Noneself.in_channels = init_channelsself.groups = init_channelsself.kernel_size = dw_sizedef forward(self, x):if self.mode in ['ori']:if self.infer_mode:x1 = self.primary_conv(x)x2 = self.cheap_operation(x1)else:identity_out = 0if self.primary_rpr_skip is not None:identity_out = self.primary_rpr_skip(x)scale_out = 0if self.primary_rpr_scale is not None and self.dconv_scale:scale_out = self.primary_rpr_scale(x)x1 = scale_out + identity_outfor ix in range(self.num_conv_branches):x1 += self.primary_rpr_conv[ix](x)if self.primary_activation is not None:x1 = self.primary_activation(x1)cheap_identity_out = 0if self.cheap_rpr_skip is not None:cheap_identity_out = self.cheap_rpr_skip(x1)cheap_scale_out = 0if self.cheap_rpr_scale is not None and self.dconv_scale:cheap_scale_out = self.cheap_rpr_scale(x1)x2 = cheap_scale_out + cheap_identity_outfor ix in range(self.num_conv_branches):x2 += self.cheap_rpr_conv[ix](x1)if self.cheap_activation is not None:x2 = self.cheap_activation(x2)out = torch.cat([x1, x2], dim=1)return outelif self.mode in ['ori_shortcut_mul_conv15']:res = self.short_conv(F.avg_pool2d(x, kernel_size=2, stride=2))if self.infer_mode:x1 = self.primary_conv(x)x2 = self.cheap_operation(x1)else:identity_out = 0if self.primary_rpr_skip is not None:identity_out = self.primary_rpr_skip(x)scale_out = 0if self.primary_rpr_scale is not None and self.dconv_scale:scale_out = self.primary_rpr_scale(x)x1 = scale_out + identity_outfor ix in range(self.num_conv_branches):x1 += self.primary_rpr_conv[ix](x)if self.primary_activation is not None:x1 = self.primary_activation(x1)cheap_identity_out = 0if self.cheap_rpr_skip is not None:cheap_identity_out = self.cheap_rpr_skip(x1)cheap_scale_out = 0if self.cheap_rpr_scale is not None and self.dconv_scale:cheap_scale_out = self.cheap_rpr_scale(x1)x2 = cheap_scale_out + cheap_identity_outfor ix in range(self.num_conv_branches):x2 += self.cheap_rpr_conv[ix](x1)if self.cheap_activation is not None:x2 = self.cheap_activation(x2)out = torch.cat([x1, x2], dim=1)if self.gate_loc == 'before':return out[:, :self.oup, :, :] * F.interpolate(self.gate_fn(res / self.scale), size=out.shape[-2:],mode=self.inter_mode) # 'nearest'# return out*F.interpolate(self.gate_fn(res/self.scale),size=out.shape[-1].item(),mode=self.inter_mode) # 'nearest'else:return out[:, :self.oup, :, :] * self.gate_fn(F.interpolate(res, size=out.shape[-2:], mode=self.inter_mode))# return out*self.gate_fn(F.interpolate(res,size=out.shape[-1],mode=self.inter_mode))def reparameterize(self):""" Following works like `RepVGG: Making VGG-style ConvNets Great Again` -https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branchedarchitecture used at training time to obtain a plain CNN-like structurefor inference."""if self.infer_mode:returnprimary_kernel, primary_bias = self._get_kernel_bias_primary()self.primary_conv = nn.Conv2d(in_channels=self.primary_rpr_conv[0].conv.in_channels,out_channels=self.primary_rpr_conv[0].conv.out_channels,kernel_size=self.primary_rpr_conv[0].conv.kernel_size,stride=self.primary_rpr_conv[0].conv.stride,padding=self.primary_rpr_conv[0].conv.padding,dilation=self.primary_rpr_conv[0].conv.dilation,groups=self.primary_rpr_conv[0].conv.groups,bias=True)self.primary_conv.weight.data = primary_kernelself.primary_conv.bias.data = primary_biasself.primary_conv = nn.Sequential(self.primary_conv,self.primary_activation if self.primary_activation is not None else nn.Sequential())cheap_kernel, cheap_bias = self._get_kernel_bias_cheap()self.cheap_operation = nn.Conv2d(in_channels=self.cheap_rpr_conv[0].conv.in_channels,out_channels=self.cheap_rpr_conv[0].conv.out_channels,kernel_size=self.cheap_rpr_conv[0].conv.kernel_size,stride=self.cheap_rpr_conv[0].conv.stride,padding=self.cheap_rpr_conv[0].conv.padding,dilation=self.cheap_rpr_conv[0].conv.dilation,groups=self.cheap_rpr_conv[0].conv.groups,bias=True)self.cheap_operation.weight.data = cheap_kernelself.cheap_operation.bias.data = cheap_biasself.cheap_operation = nn.Sequential(self.cheap_operation,self.cheap_activation if self.cheap_activation is not None else nn.Sequential())# Delete un-used branchesfor para in self.parameters():para.detach_()if hasattr(self, 'primary_rpr_conv'):self.__delattr__('primary_rpr_conv')if hasattr(self, 'primary_rpr_scale'):self.__delattr__('primary_rpr_scale')if hasattr(self, 'primary_rpr_skip'):self.__delattr__('primary_rpr_skip')if hasattr(self, 'cheap_rpr_conv'):self.__delattr__('cheap_rpr_conv')if hasattr(self, 'cheap_rpr_scale'):self.__delattr__('cheap_rpr_scale')if hasattr(self, 'cheap_rpr_skip'):self.__delattr__('cheap_rpr_skip')self.infer_mode = Truedef _get_kernel_bias_primary(self) -> Tuple[torch.Tensor, torch.Tensor]:""" Method to obtain re-parameterized kernel and bias.Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83:return: Tuple of (kernel, bias) after fusing branches."""# get weights and bias of scale branchkernel_scale = 0bias_scale = 0if self.primary_rpr_scale is not None:kernel_scale, bias_scale = self._fuse_bn_tensor(self.primary_rpr_scale)# Pad scale branch kernel to match conv branch kernel size.pad = self.kernel_size // 2kernel_scale = torch.nn.functional.pad(kernel_scale,[pad, pad, pad, pad])# get weights and bias of skip branchkernel_identity = 0bias_identity = 0if self.primary_rpr_skip is not None:kernel_identity, bias_identity = self._fuse_bn_tensor(self.primary_rpr_skip)# get weights and bias of conv brancheskernel_conv = 0bias_conv = 0for ix in range(self.num_conv_branches):_kernel, _bias = self._fuse_bn_tensor(self.primary_rpr_conv[ix])kernel_conv += _kernelbias_conv += _biaskernel_final = kernel_conv + kernel_scale + kernel_identitybias_final = bias_conv + bias_scale + bias_identityreturn kernel_final, bias_finaldef _get_kernel_bias_cheap(self) -> Tuple[torch.Tensor, torch.Tensor]:""" Method to obtain re-parameterized kernel and bias.Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83:return: Tuple of (kernel, bias) after fusing branches."""# get weights and bias of scale branchkernel_scale = 0bias_scale = 0if self.cheap_rpr_scale is not None:kernel_scale, bias_scale = self._fuse_bn_tensor(self.cheap_rpr_scale)# Pad scale branch kernel to match conv branch kernel size.pad = self.kernel_size // 2kernel_scale = torch.nn.functional.pad(kernel_scale,[pad, pad, pad, pad])# get weights and bias of skip branchkernel_identity = 0bias_identity = 0if self.cheap_rpr_skip is not None:kernel_identity, bias_identity = self._fuse_bn_tensor(self.cheap_rpr_skip)# get weights and bias of conv brancheskernel_conv = 0bias_conv = 0for ix in range(self.num_conv_branches):_kernel, _bias = self._fuse_bn_tensor(self.cheap_rpr_conv[ix])kernel_conv += _kernelbias_conv += _biaskernel_final = kernel_conv + kernel_scale + kernel_identitybias_final = bias_conv + bias_scale + bias_identityreturn kernel_final, bias_finaldef _fuse_bn_tensor(self, branch) -> Tuple[torch.Tensor, torch.Tensor]:""" Method to fuse batchnorm layer with preceeding conv layer.Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95:param branch::return: Tuple of (kernel, bias) after fusing batchnorm."""if isinstance(branch, nn.Sequential):kernel = branch.conv.weightrunning_mean = branch.bn.running_meanrunning_var = branch.bn.running_vargamma = branch.bn.weightbeta = branch.bn.biaseps = branch.bn.epselse:assert isinstance(branch, nn.BatchNorm2d)if not hasattr(self, 'id_tensor'):input_dim = self.in_channels // self.groupskernel_value = torch.zeros((self.in_channels,input_dim,self.kernel_size,self.kernel_size),dtype=branch.weight.dtype,device=branch.weight.device)for i in range(self.in_channels):kernel_value[i, i % input_dim,self.kernel_size // 2,self.kernel_size // 2] = 1self.id_tensor = kernel_valuekernel = self.id_tensorrunning_mean = branch.running_meanrunning_var = branch.running_vargamma = branch.weightbeta = branch.biaseps = branch.epsstd = (running_var + eps).sqrt()t = (gamma / std).reshape(-1, 1, 1, 1)return kernel * t, beta - running_mean * gamma / stddef _conv_bn(self, in_channels, out_channels, kernel_size, stride, padding, groups=1, bias=False):""" Helper method to construct conv-batchnorm layers.:param kernel_size: Size of the convolution kernel.:param padding: Zero-padding size.:return: Conv-BN module."""mod_list = nn.Sequential()mod_list.add_module('conv', nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding,groups=groups,bias=bias))mod_list.add_module('bn', nn.BatchNorm2d(out_channels))return mod_list
2.3 更改task.py文件
打开ultralytics->nn->modules->task.py,在脚本空白处导入函数。
from ultralytics.nn.blocks import *
之后找到模型解析函数parse_model(约在tasks.py脚本中940行左右位置,可能因代码版本不同变动),在该函数的最后一个else分支上面增加相关解析代码。
elif m is GhostModuleV3:c2 = args[0]args = [ch[f], *args]
2.4 更改yaml文件
yam文件解读:YOLO系列 “.yaml“文件解读_yolo yaml文件-CSDN博客
打开更改ultralytics/cfg/models/rt-detr路径下的rtdetr-l.yaml文件,替换原有模块。(放在该位置仅能插入该模块,具体效果未知。博主精力有限,仅完成与其他模块二次创新融合的测试,结构图见文末,代码见群文件更新。)
# Ultralytics YOLO 🚀, AGPL-3.0 license
# RT-DETR-l object detection model with P3-P5 outputs. For details see https://docs.ultralytics.com/models/rtdetr# Parameters
nc: 80 # number of classes
scales: # model compound scaling constants, i.e. 'model=yolov8n-cls.yaml' will call yolov8-cls.yaml with scale 'n'# [depth, width, max_channels]l: [1.00, 1.00, 1024]backbone:# [from, repeats, module, args]- [-1, 1, HGStem, [32, 48]] # 0-P2/4- [-1, 6, HGBlock, [48, 128, 3]] # stage 1- [-1, 1, DWConv, [128, 3, 2, 1, False]] # 2-P3/8- [-1, 6, HGBlock, [96, 512, 3]] # stage 2- [-1, 1, DWConv, [512, 3, 2, 1, False]] # 4-P3/16- [-1, 2, GhostModuleV3, [512, 3]] # cm, c2, k, light, shortcut- [-1, 6, HGBlock, [192, 1024, 5, True, True]]- [-1, 6, HGBlock, [192, 1024, 5, True, True]] # stage 3- [-1, 1, DWConv, [1024, 3, 2, 1, False]] # 8-P4/32- [-1, 6, HGBlock, [384, 2048, 5, True, False]] # stage 4head:- [-1, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 10 input_proj.2- [-1, 1, AIFI, [1024, 8]]- [-1, 1, Conv, [256, 1, 1]] # 12, Y5, lateral_convs.0- [-1, 1, nn.Upsample, [None, 2, "nearest"]]- [7, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 14 input_proj.1- [[-2, -1], 1, Concat, [1]]- [-1, 3, RepC3, [256]] # 16, fpn_blocks.0- [-1, 1, Conv, [256, 1, 1]] # 17, Y4, lateral_convs.1- [-1, 1, nn.Upsample, [None, 2, "nearest"]]- [3, 1, Conv, [256, 1, 1, None, 1, 1, False]] # 19 input_proj.0- [[-2, -1], 1, Concat, [1]] # cat backbone P4- [-1, 3, RepC3, [256]] # X3 (21), fpn_blocks.1- [-1, 1, Conv, [256, 3, 2]] # 22, downsample_convs.0- [[-1, 17], 1, Concat, [1]] # cat Y4- [-1, 3, RepC3, [256]] # F4 (24), pan_blocks.0- [-1, 1, Conv, [256, 3, 2]] # 25, downsample_convs.1- [[-1, 12], 1, Concat, [1]] # cat Y5- [-1, 3, RepC3, [256]] # F5 (27), pan_blocks.1- [[21, 24, 27], 1, RTDETRDecoder, [nc]] # Detect(P3, P4, P5)
2.5 修改train.py文件
创建Train_RT脚本用于训练。
from ultralytics.models import RTDETR
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'if __name__ == '__main__':model = RTDETR(model='ultralytics/cfg/models/rt-detr/rtdetr-l.yaml')# model.load('yolov8n.pt')model.train(data='./data.yaml', epochs=2, batch=1, device='0', imgsz=640, workers=2, cache=False,amp=True, mosaic=False, project='runs/train', name='exp')
在train.py脚本中填入修改好的yaml路径,运行即可训。
三、相关改进思路(2024/11/23日群文件)
该模块可如图加入到HGBlock、RepNCSPELAN4、RepC3等模块中,代码见群文件,结构如图。自研模块与该模块融合代码及yaml文件见群文件。
⭐另外,融合上百种改进模块的YOLO项目仅79.9(含百种改进的v9),RTDETR79.9,含高性能自研模型,更易发论文,代码每周更新,欢迎点击下方小卡片加我了解。⭐
⭐⭐平均每个文章对应4-6个二创及自研融合模块⭐⭐