詳解PyTorch中的ModuleList和Sequential

在使用PyTorch的時候，經常遇到nn.Sequential和nn.ModuleList，今天將這兩個模塊認真區分了一下，總結如下。PyTorch版本為1.0.0。本文也會隨著本人逐漸深入Torch和有新的體會時，會進行更新。

本人才疏學淺，希望各位看官不吝賜教。

一、官方文檔

首先看官方文檔的解釋，僅列出了容器(Containers)中幾個比較常用的CLASS。

CLASS torch.nn.Module

Base class for all neural network modules.

Your models should also subclass this class.

import torch.nn as nn import torch.nn.functional as F

class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.conv1 = nn.Conv2d(1, 20, 5)
self.conv2 = nn.Conv2d(20, 20, 5)

def forward(self, x):
x = F.relu(conv1(x))
return F.relu(conv2(x))

CLASS torch.nn.Sequential(*args)

A sequential container. Modules will be added to it in the order they are passed in the constructor. Alternatively, an ordered dict of modules can also be passed in.

# Example of using Sequential model = nn.Sequential( nn.Conv2d(1, 20, 5), nn.ReLU(), nn.Conv2d(20, 64, 5), nn.ReLU() ) # Example of using Sequential with OrderedDict model = nn.Sequential(OrderedDict([ (conv1, nn.Conv2d(1, 20, 5)), (ReLU1, nn.ReLU()), (conv2, nn.Conv2d(20, 64, 5)), (ReLU2, nn.ReLU()) ]))

CLASS torch.nn.ModuleList(modules=None)

Holds submodules in a list.

ModuleList can be indexed like a regular Python list, but modules it contains are properly registered, and will be visible by all Module methods.

class MyModel(nn.Module): def __init__(self): super(MyModel, self).__init__() self.linears = nn.ModuleList([nn.linear for i in range(10)])

# ModuleList can act as an iterable, or be indexed using ints
def forward(self, x):
for i, l in enumerate(self.linears):
x = self.linears[i // 2](x) + l(x)
return x

二、nn.Sequential與nn.ModuleList簡介

nn.Sequential

nn.Sequential裡面的模塊按照順序進行排列的，所以必須確保前一個模塊的輸出大小和下一個模塊的輸入大小是一致的。如下面的例子所示：

#首先導入torch相關包 import torch import torch.nn as nn import torch.nn.functional as F class net_seq(nn.Module): def __init__(self): super(net2, self).__init__() self.seq = nn.Sequential( nn.Conv2d(1,20,5), nn.ReLU(), nn.Conv2d(20,64,5), nn.ReLU() ) def forward(self, x): return self.seq(x) net_seq = net_seq() print(net_seq) #net_seq( # (seq): Sequential( # (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1)) # (1): ReLU() # (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1)) # (3): ReLU() # ) #)

nn.Sequential中可以使用OrderedDict來指定每個module的名字，而不是採用默認的命名方式(按序號 0,1,2,3...)。例子如下：

from collections import OrderedDict

class net_seq(nn.Module):
def __init__(self):
super(net_seq, self).__init__()
self.seq = nn.Sequential(OrderedDict([
(conv1, nn.Conv2d(1,20,5)),
(relu1, nn.ReLU()),
(conv2, nn.Conv2d(20,64,5)),
(relu2, nn.ReLU())
]))
def forward(self, x):
return self.seq(x)
net_seq = net_seq()
print(net_seq)
#net_seq(
# (seq): Sequential(
# (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
# (relu1): ReLU()
# (conv2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
# (relu2): ReLU()
# )
#)

nn.ModuleList

nn.ModuleList，它是一個儲存不同 module，並自動將每個 module 的 parameters 添加到網路之中的容器。你可以把任意 nn.Module 的子類 (比如 nn.Conv2d, nn.Linear 之類的) 加到這個 list 裡面，方法和 Python 自帶的 list 一樣，無非是 extend，append 等操作。但不同於一般的 list，加入到 nn.ModuleList 裡面的 module 是會自動註冊到整個網路上的，同時 module 的 parameters 也會自動添加到整個網路中。若使用python的list，則會出問題。下面看一個例子：

class net_modlist(nn.Module): def __init__(self): super(net_modlist, self).__init__() self.modlist = nn.ModuleList([ nn.Conv2d(1, 20, 5), nn.ReLU(), nn.Conv2d(20, 64, 5), nn.ReLU() ])

def forward(self, x):
for m in self.modlist:
x = m(x)
return x

net_modlist = net_modlist()
print(net_modlist)
#net_modlist(
# (modlist): ModuleList(
# (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
# (1): ReLU()
# (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
# (3): ReLU()
# )
#)

for param in net_modlist.parameters():
print(type(param.data), param.size())
#<class torch.Tensor> torch.Size([20, 1, 5, 5])
#<class torch.Tensor> torch.Size([20])
#<class torch.Tensor> torch.Size([64, 20, 5, 5])
#<class torch.Tensor> torch.Size([64])

可以看到，這個網路權重 (weithgs) 和偏置 (bias) 都在這個網路之內。接下來看看另一個作為對比的網路，它使用 Python 自帶的 list：

class net_modlist(nn.Module): def __init__(self): super(net_modlist, self).__init__() self.modlist = [ nn.Conv2d(1, 20, 5), nn.ReLU(), nn.Conv2d(20, 64, 5), nn.ReLU() ]

def forward(self, x):
for m in self.modlist:
x = m(x)
return x

net_modlist = net_modlist()
print(net_modlist)
#net_modlist()
for param in net_modlist.parameters():
print(type(param.data), param.size())
#None

顯然，使用 Python 的 list 添加的卷積層和它們的 parameters 並沒有自動註冊到我們的網路中。當然，我們還是可以使用 forward 來計算輸出結果。但是如果用其實例化的網路進行訓練的時候，因為這些層的parameters不在整個網路之中，所以其網路參數也不會被更新，也就是無法訓練。

三、nn.Sequential與nn.ModuleList的區別

不同點1：

nn.Sequential內部實現了forward函數，因此可以不用寫forward函數。而nn.ModuleList則沒有實現內部forward函數。

對於nn.Sequential：

#例1：這是來自官方文檔的例子 seq = nn.Sequential( nn.Conv2d(1,20,5), nn.ReLU(), nn.Conv2d(20,64,5), nn.ReLU() ) print(seq) # Sequential( # (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1)) # (1): ReLU() # (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1)) # (3): ReLU() # )

#對上述seq進行輸入
input = torch.randn(16, 1, 20, 20)
print(seq(input))
#torch.Size([16, 64, 12, 12])

#例2：或者繼承nn.Module類的話，就要寫出forward函數
class net1(nn.Module):
def __init__(self):
super(net1, self).__init__()
self.seq = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
def forward(self, x):
return self.seq(x)

#注意：按照下面這種利用for循環的方式也是可以得到同樣結果的
#def forward(self, x):
# for s in self.seq:
# x = s(x)
# return x

#對net1進行輸入
input = torch.randn(16, 1, 20, 20)
net1 = net1()
print(net1(input).shape)
#torch.Size([16, 64, 12, 12])

而對於nn.ModuleList：

#例1：若按照下面這麼寫，則會產生錯誤 modlist = nn.ModuleList([ nn.Conv2d(1, 20, 5), nn.ReLU(), nn.Conv2d(20, 64, 5), nn.ReLU() ]) print(modlist) #ModuleList( # (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1)) # (1): ReLU() # (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1)) # (3): ReLU() #)

input = torch.randn(16, 1, 20, 20)
print(modlist(input))
#產生NotImplementedError

#例2：寫出forward函數
class net2(nn.Module):
def __init__(self):
super(net2, self).__init__()
self.modlist = nn.ModuleList([
nn.Conv2d(1, 20, 5),
nn.ReLU(),
nn.Conv2d(20, 64, 5),
nn.ReLU()
])

#這裡若按照這種寫法則會報NotImplementedError錯
#def forward(self, x):
# return self.modlist(x)

#注意：只能按照下面利用for循環的方式
def forward(self, x):
for m in self.modlist:
x = m(x)
return x

input = torch.randn(16, 1, 20, 20)
net2 = net2()
print(net2(input).shape)
#torch.Size([16, 64, 12, 12])

如果完全直接用 nn.Sequential，確實是可以的，但這麼做的代價就是失去了部分靈活性，不能自己去定製 forward 函數裡面的內容了。

一般情況下 nn.Sequential 的用法是來組成卷積塊 (block)，然後像拼積木一樣把不同的 block 拼成整個網路，讓代碼更簡潔，更加結構化。

不同點2：

nn.Sequential可以使用OrderedDict對每層進行命名，上面已經闡述過了；

不同點3：

nn.Sequential裡面的模塊按照順序進行排列的，所以必須確保前一個模塊的輸出大小和下一個模塊的輸入大小是一致的。而nn.ModuleList 並沒有定義一個網路，它只是將不同的模塊儲存在一起，這些模塊之間並沒有什麼先後順序可言。見下面代碼：

class net3(nn.Module): def __init__(self): super(net3, self).__init__() self.linears = nn.ModuleList([nn.Linear(10,20), nn.Linear(20,30), nn.Linear(5,10)]) def forward(self, x): x = self.linears[2](x) x = self.linears[0](x) x = self.linears[1](x)

return x

net3 = net3()
print(net3)
#net3(
# (linears): ModuleList(
# (0): Linear(in_features=10, out_features=20, bias=True)
# (1): Linear(in_features=20, out_features=30, bias=True)
# (2): Linear(in_features=5, out_features=10, bias=True)
# )
#)

input = torch.randn(32, 5)
print(net3(input).shape)
#torch.Size([32, 30])

根據 net5 的結果，可以看出來這個 ModuleList 裡面的順序不能決定什麼，網路的執行順序是根據 forward 函數來決定的。若將forward函數中幾行代碼互換，使輸入輸出之間的大小不一致，則程序會報錯。此外，為了使代碼具有更高的可讀性，最好把ModuleList和forward中的順序保持一致。

不同點4：

有的時候網路中有很多相似或者重複的層，我們一般會考慮用 for 循環來創建它們，而不是一行一行地寫，比如：

layers = [nn.Linear(10, 10) for i in range(5)]

那麼這裡我們使用ModuleList：

class net4(nn.Module): def __init__(self): super(net4, self).__init__() layers = [nn.Linear(10, 10) for i in range(5)] self.linears = nn.ModuleList(layers)

def forward(self, x):
for layer in self.linears:
x = layer(x)
return x

net = net4()
print(net)
# net4(
# (linears): ModuleList(
# (0): Linear(in_features=10, out_features=10, bias=True)
# (1): Linear(in_features=10, out_features=10, bias=True)
# (2): Linear(in_features=10, out_features=10, bias=True)
# )
# )

參考：

官方文檔: Container
PyTorch 中的 ModuleList 和 Sequential: 區別和使用場景

詳解PyTorch中的ModuleList和Sequential

一、官方文檔

CLASS torch.nn.Module

CLASS torch.nn.Sequential(*args)

CLASS torch.nn.ModuleList(modules=None)