计算机视觉方面的一些模块-EW帮帮网

# __all__ 是一个可选的列表，定义在模块级别。当使用 from ... import * 语句时，如果模块中定义了
# __all__，则只有 __all__ 列表中的名称会被导入。这是模块作者控制哪些公开API被导入的一种方式。
# 使用 * 导入的行为
# 如果模块中有 __all__ 列表：只有 __all__ 列表中的名称会被导入
# 如果模块中没有 __all__ 列表：Python 解释器会尝试导入模块中定义的所有公共名称（即不
# 是以下划线 _ 开头的名称）。但是，这通常不包括以单下划线或双下划线开头的特殊方法或变量
def autopad(k, p=None, d=1): # kernel, padding, dilation
    if d > 1:
        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
    return p
class Conv(nn.Module):
    default_act = nn.SiLU() # default activation
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
        super().__init__()
        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
        self.bn = nn.BatchNorm2d(c2)
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
    def forward(self, x): # (b,c1,...)
        # (b,c1,...)-->(b,c2,...),卷积块
        return self.act(self.bn(self.conv(x)))
    def forward_fuse(self, x):
        return self.act(self.conv(x))
class Conv2(Conv):
    def __init__(self, c1, c2, k=3, s=1, p=None, g=1, d=1, act=True):
        super().__init__(c1, c2, k, s, p, g=g, d=d, act=act)
        self.cv2 = nn.Conv2d(c1, c2, 1, s, autopad(1, p, d), groups=g, dilation=d, bias=False) # add 1x1 conv
    def forward(self, x): # (b,c1,...)
        # self.cv2(x):(b,c1,...)-->(b,c2,...) # 点卷积
        # self.conv(x):(b,c1,...)-->(b,c2,...) # 普通卷积
        # 先把两种卷积的处理结果做残差,之后批次标准化,激活函数
        return self.act(self.bn(self.conv(x) + self.cv2(x)))
    def forward_fuse(self, x): # (b,c1,...)
        return self.act(self.bn(self.conv(x)))
    def fuse_convs(self):
        # 具有conv权重w形状的全0张量
        w = torch.zeros_like(self.conv.weight.data)
        i = [x // 2 for x in w.shape[2:]] # (1,1)
        # 将w中最后两维1:2的数据用cv2的权重替换
        w[:, :, i[0] : i[0] + 1, i[1] : i[1] + 1] = self.cv2.weight.data.clone()
        # 用conv.weight和w做残差,结果做为conv的权重
        self.conv.weight.data += w
        self.__delattr__("cv2") # 删除cv2属性
        self.forward = self.forward_fuse # 更改对象的forward方法
class DWConv(Conv):
    # 假设 c1 = 6（输入通道数），c2 = 8（输出通道数），并且最大公约数 g = math.gcd(6, 8) = 2
    # g是指组数,输出通道数 (8)：表示最终的输出通道数。输入通道组数 (3)：表示每个输出通道对应的输入通道的一个子集（组），
    # 这里每个组包含 3 个输入通道。卷积核大小 (3x3)：表示卷积核的大小，这里是 3x3。
    def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
class LightConv(nn.Module):
    def __init__(self, c1, c2, k=1, act=nn.ReLU()):
        super().__init__()
        self.conv1 = Conv(c1, c2, 1, act=False) # 点卷积
        self.conv2 = DWConv(c2, c2, k, act=act) # 深度卷积

    def forward(self, x): # (b,c1,...)
        # (b,c1,...)-->(b,c2,...)
        # 先点卷积切换通道,之后深度卷积处理
        # 用light_conv.conv1.conv.weight来访问属性权重
        # conv2_weight.shape[18, 1, 3, 3]
        # 18指输出和输入通道被分成18组,每组包含1个通道,
        # 每个组包含 1个输出和输入通道。18表示最终的输出通道数。
        # 输入通道组数 (18)：表示每个输出通道对应的输入通道的一个子集（组），这里每个组包含 1个输入通道。
        # 卷积核大小 (3x3)：表示卷积核的大小，这里是 3x3。
        return self.conv2(self.conv1(x))
class DWConvTranspose2d(nn.ConvTranspose2d):
    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
        super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
class ConvTranspose(nn.Module):
    default_act = nn.SiLU() # default activation
    def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
        super().__init__()
        self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn)
        self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity()
        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        return self.act(self.bn(self.conv_transpose(x)))
    def forward_fuse(self, x):
        return self.act(self.conv_transpose(x))
class Focus(nn.Module):
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
        super().__init__()
        self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
    def forward(self, x):
        # 这里的切片切分空间操作,是先切分奇数行奇数列,之后是偶数行奇数列，之后奇数行偶数列，之后是偶数行偶数列
        # 之后在通道维度合并特征,整个空间采样每个像素位置都被恰好取样了一次。没有任何像素被重复取样,没有任何像素被遗漏
        # 合并特征后经过卷积处理,我第一感觉是这样采样,有助于模型发现图片数据的行梯度和列梯度的变化
        return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
class GhostConv(nn.Module):
    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
        super().__init__()
        c_ = c2 // 2 # hidden channels
        self.cv1 = Conv(c1, c_, k, s, None, g, act=act)<

计算机视觉方面的一些模块

网站公告

今日签到

热门文章

最新发布