1.GRU从零开始实现
#9.1.2GRU从零开始实现
import torch
from torch import nn
from d2l import torch as d2l
#首先读取 8.5节中使用的时间机器数据集
batch_size,num_steps = 32,35
train_iter,vocab = d2l.load_data_time_machine(batch_size,num_steps)
#初始化模型参数
def get_params(vocab_size,num_hiddens,device):
num_inputs = num_outputs = vocab_size
def normal(shape):
return torch.randn(size=shape,device=device)*0.01
def three():
return (normal((num_inputs,num_hiddens)),
normal((num_hiddens,num_hiddens)),
torch.zeros(num_hiddens,device=device))
W_xz,W_hz,b_z = three() #更新门参数
W_xr,W_hr,b_r = three() #重置门参数
W_xh,W_hh,b_h = three() #候选隐状态参数
#输出层参数
W_hq = normal((num_hiddens,num_outputs))
b_q = torch.zeros(num_outputs,device=device)
#附加梯度
params = [W_xz,W_hz,b_z,W_xr,W_hr,b_r,W_xh,W_hh,b_h,W_hq,b_q]
for param in params:
param.requires_grad_(True)
return params
#定义隐状态的初始化函数init_gru_state
def init_gru_state(batch_size,num_hiddens,device):
return (torch.zeros((batch_size,num_hiddens),device=device),)
#门控循环单元模型
def gru(inputs,state,params):
W_xz,W_hz,b_z,W_xr,W_hr,b_r,W_xh,W_hh,b_h,W_hq,b_q = params
H, = state
outputs = []
for X in inputs:
Z = torch.sigmoid((X @ W_xz)+(H @ W_hz) + b_z)
R = torch.sigmoid((X @ W_xr)+(H @ W_hr) + b_r)
H_tilda = torch.tanh((X @ W_xh)+((R*H) @ W_hh) + b_h)
H = Z * H + (1-Z) * H_tilda
Y = H @ W_hq + b_q
outputs.append(Y)
return torch.cat(outputs,dim=0),(H,)
#训练与预测:打印输出训练集的困惑度,以及前缀“time traveler”和“traveler”的预测序列上的困惑度。
vocab_size,num_hiddens,device = len(vocab),256,d2l.try_gpu()
num_epochs,lr = 500,1
model = d2l.RNNModelScratch(len(vocab),num_hiddens,device,get_params,
init_gru_state,gru)
print(d2l.train_ch8(model,train_iter,vocab,lr,num_epochs,device))
d2l.plt.show()


2.GRU简洁实现


#9.1.3简洁实现
import torch
from torch import nn
from d2l import torch as d2l
#首先读取 8.5节中使用的时间机器数据集
batch_size,num_steps = 32,35
train_iter,vocab = d2l.load_data_time_machine(batch_size,num_steps)
vocab_size,num_hiddens,device = len(vocab),256,d2l.try_gpu()
num_epochs,lr = 500,1
num_inputs = vocab_size
gru_layer = nn.GRU(num_inputs,num_hiddens)
model = d2l.RNNModel(gru_layer,len(vocab))
model = model.to(device)
print(d2l.train_ch8(model,train_iter,vocab,lr,num_epochs,device))
d2l.plt.show()