使用PyTorch构建模型的方法有以下3种:
- 继承
nn.Module
基类构建模型; - 使用
nn.Sequential
按层顺序构建模型; - 继承
nn.Module
基类构建模型,再使用相关模型容器(如nn.Sequential
、nn.ModuleList
、nn.ModuleDict
)进行封装。
1. 继承nn.Module
基类构建模型
使用此方法构建模型,先定义一个类,使之继承nn.Module
基类,把模型中需要用到的层放在构造函数__init__()
中,在forward()
方法中实现模型的正向传播。
构建示例:
import torch
from torch import nn
import torch.nn.functional as F
# 构建模型
class myNet(nn.Module):
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(myNet, self).__init__()
self.flatten = nn.Flatten()
self.linear1 = nn.Linear(in_dim, n_hidden_1)
self.bn1 = nn.BatchNorm1d(n_hidden_1)
self.linear2 = nn.Linear(n_hidden_1, n_hidden_2)
self.bn2 = nn.BatchNorm1d(n_hidden_2)
self.out = nn.Linear(n_hidden_2, out_dim)
def forward(self, input):
x = self.flatten(input)
x = self.linear1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.linear2(x)
x = self.bn2(x)
x = F.relu(x)
x = self.out(x)
x = F.softmax(x, dim=1)
return x
if __name__ == '__main__':
in_dim, n_hidden_1, n_hidden_2, out_dim = 28*28, 300, 100, 10
my_net = myNet(in_dim, n_hidden_1, n_hidden_2, out_dim)
print(my_net)
输出:
myNet(
(flatten): Flatten(start_dim=1, end_dim=-1)
(linear1): Linear(in_features=784, out_features=300, bias=True)
(bn1): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(linear2): Linear(in_features=300, out_features=100, bias=True)
(bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(out): Linear(in_features=100, out_features=10, bias=True)
)
2. 使用nn.Sequential
按层顺序构建模型
使用nn.Sequential
构建模型时因其内部实现了forward
函数,因此可以不用重写。nn.Sequential
里面的模块时按照先后顺序进行排列的,所以必须确保前一个模块的输出大小和下一个模块的输入大小是一致的。
2.1 利用可变参数
import torch
from torch import nn
in_dim, n_hidden_1, n_hidden_2, out_dim = 28 * 28, 300, 100, 10
# 构建模型
myNet = nn.Sequential(
nn.Flatten(),
nn.Linear(in_dim, n_hidden_1),
nn.BatchNorm1d(n_hidden_1),
nn.ReLU(),
nn.Linear(n_hidden_1, n_hidden_2),
nn.BatchNorm1d(n_hidden_2),
nn.ReLU(),
nn.Linear(n_hidden_2, out_dim),
nn.Softmax(dim=1)
)
if __name__ == '__main__':
print(myNet)
输出:
Sequential(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=300, bias=True)
(2): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU()
(4): Linear(in_features=300, out_features=100, bias=True)
(5): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): ReLU()
(7): Linear(in_features=100, out_features=10, bias=True)
(8): Softmax(dim=1)
)
这种构建方法不能给每个层指定名称,如果需要给每个层指定名称,可以使用add_module
方法或OrderedDict
方法。
2.2 使用add_module
方法
from torch import nn
in_dim, n_hidden_1, n_hidden_2, out_dim = 28 * 28, 300, 100, 10
myNet = nn.Sequential()
myNet.add_module("flatten", nn.Flatten())
myNet.add_module("linear1", nn.Linear(in_dim, n_hidden_1))
myNet.add_module("bn1", nn.BatchNorm1d(n_hidden_1))
myNet.add_module("relu1", nn.ReLU())
myNet.add_module("linear2", nn.Linear(n_hidden_1, n_hidden_2))
myNet.add_module("bn2", nn.BatchNorm1d(n_hidden_2))
myNet.add_module("relu2", nn.ReLU())
myNet.add_module("out", nn.Linear(n_hidden_2, out_dim))
myNet.add_module("softmax", nn.Softmax(dim=1))
if __name__ == '__main__':
print(myNet)
输出:
Sequential(
(flatten): Flatten(start_dim=1, end_dim=-1)
(linear1): Linear(in_features=784, out_features=300, bias=True)
(bn1): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(linear2): Linear(in_features=300, out_features=100, bias=True)
(bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(out): Linear(in_features=100, out_features=10, bias=True)
(softmax): Softmax(dim=1)
)
2.3 使用OrderedDict
方法
from torch import nn
from collections import OrderedDict
in_dim, n_hidden_1, n_hidden_2, out_dim = 28 * 28, 300, 100, 10
myNet = nn.Sequential(OrderedDict([
("flatten", nn.Flatten()),
("linear1", nn.Linear(in_dim, n_hidden_1)),
("bn1", nn.BatchNorm1d(n_hidden_1)),
("relu1", nn.ReLU()),
("linear2", nn.Linear(n_hidden_1, n_hidden_2)),
("bn2", nn.BatchNorm1d(n_hidden_2)),
("relu2", nn.ReLU()),
("out", nn.Linear(n_hidden_2, out_dim)),
("softmax", nn.Softmax(dim=1))]))
if __name__ == '__main__':
print(myNet)
输出:
Sequential(
(flatten): Flatten(start_dim=1, end_dim=-1)
(linear1): Linear(in_features=784, out_features=300, bias=True)
(bn1): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(linear2): Linear(in_features=300, out_features=100, bias=True)
(bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(out): Linear(in_features=100, out_features=10, bias=True)
(softmax): Softmax(dim=1)
)
3. 继承nn.Module
基类并应用模型容器来构建模型
当模型的结构比较复杂时可以使用模型容器(如nn.Sequential
、nn.ModuleList
、nn.ModuleDict
)对模型的部分结构进行封装,以增强模型的可读性,减少代码量。
3.1 使用nn.Sequential
模型容器
from torch import nn
import torch.nn.functional as F
in_dim, n_hidden_1, n_hidden_2, out_dim = 28 * 28, 300, 100, 10
class myNet(nn.Module):
# 使用nn.Sequential构建网络,Sequential()函数的功能是将网络的层组合到一起
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(myNet, self).__init__()
self.flatten = nn.Flatten()
self.layer1 = nn.Sequential(nn.Linear(in_dim, n_hidden_1), nn.BatchNorm1d(n_hidden_1))
self.layer2 = nn.Sequential(nn.Linear(n_hidden_1, n_hidden_2), nn.BatchNorm1d(n_hidden_2))
self.out = nn.Sequential(nn.Linear(n_hidden_2, out_dim))
def forward(self, input):
x = self.flatten(input)
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
x = F.softmax(self.out(x),dim=1)
return x
if __name__ == '__main__':
my_net = myNet(in_dim, n_hidden_1, n_hidden_2, out_dim)
print(my_net)
输出:
myNet(
(flatten): Flatten(start_dim=1, end_dim=-1)
(layer1): Sequential(
(0): Linear(in_features=784, out_features=300, bias=True)
(1): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(layer2): Sequential(
(0): Linear(in_features=300, out_features=100, bias=True)
(1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(out): Sequential(
(0): Linear(in_features=100, out_features=10, bias=True)
)
)
3.2 使用nn.ModuleList
模型容器
from torch import nn
import torch.nn.functional as F
in_dim, n_hidden_1, n_hidden_2, out_dim = 28 * 28, 300, 100, 10
class myNet(nn.Module):
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(myNet, self).__init__()
self.layers = nn.ModuleList([
nn.Flatten(),
nn.Linear(in_dim, n_hidden_1),
nn.BatchNorm1d(n_hidden_1),
nn.ReLU(),
nn.Linear(n_hidden_1, n_hidden_2),
nn.BatchNorm1d(n_hidden_2),
nn.ReLU(),
nn.Linear(n_hidden_2, out_dim),
nn.Softmax(dim=1)])
def forward(self, x):
for layer in self.layers:
x = layer(x)
return x
if __name__ == '__main__':
my_net = myNet(in_dim, n_hidden_1, n_hidden_2, out_dim)
print(my_net)
输出:
myNet(
(layers): ModuleList(
(0): Flatten(start_dim=1, end_dim=-1)
(1): Linear(in_features=784, out_features=300, bias=True)
(2): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU()
(4): Linear(in_features=300, out_features=100, bias=True)
(5): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): ReLU()
(7): Linear(in_features=100, out_features=10, bias=True)
(8): Softmax(dim=1)
)
)
3.3 使用nn.ModuleDict
模型容器
from torch import nn
in_dim, n_hidden_1, n_hidden_2, out_dim = 28 * 28, 300, 100, 10
class myNet(nn.Module):
def __init__(self, in_dim, n_hidden_1, n_hidden_2, out_dim):
super(myNet, self).__init__()
self.layers_dict = nn.ModuleDict({
"flatten": nn.Flatten(),
"linear1": nn.Linear(in_dim, n_hidden_1),
"bn1": nn.BatchNorm1d(n_hidden_1),
"relu": nn.ReLU(),
"linear2": nn.Linear(n_hidden_1, n_hidden_2),
"bn2": nn.BatchNorm1d(n_hidden_2),
"out": nn.Linear(n_hidden_2, out_dim),
"softmax": nn.Softmax(dim=1)})
def forward(self, x):
layers = ["flatten","linear1","bn1","relu","linear2","bn2","relu","out","softmax"]
for layer in layers:
x = self.layers_dict[layer](x)
return x
if __name__ == '__main__':
my_net = myNet(in_dim, n_hidden_1, n_hidden_2, out_dim)
print(my_net)
上面的网络构造中激活函数ReLU
在模型中应该出现2次,但在定义字典时只需定义1次,在定义forward
函数的列表中则需要出现2次。
输出:
myNet(
(layers_dict): ModuleDict(
(flatten): Flatten(start_dim=1, end_dim=-1)
(linear1): Linear(in_features=784, out_features=300, bias=True)
(bn1): BatchNorm1d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(linear2): Linear(in_features=300, out_features=100, bias=True)
(bn2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(out): Linear(in_features=100, out_features=10, bias=True)
(softmax): Softmax(dim=1)
)
)
Reference
Python深度学习:基于PyTorch,吴茂贵等,第2版,机械工业出版社