概述
最近在学习文本相似度方向的知识,顺便做了个实验,摸索了好长时间
- 网络结构是Bi-LSTM(hidden=128)+Attention+FC
- 导入相关包
- 定义网络结构
- 训练部分
- 训练
- 结果
网络结构是Bi-LSTM(hidden=128)+Attention+FC
导入相关包
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from word2vec_util import Vocab_size,Embedding_matrix,get_dataloader
定义网络结构
class test_model(nn.Module):
def __init__(self):
super(test_model, self).__init__()
self.Vocab_size = Vocab_size #词典大小
self.batch_size = 500
self.input_size = 300 #和embedding_size作用一样
self.n_hidden1 = 128 #bi-lstm的隐藏层大小
self.Embedding_dim = 300 #词嵌入维度,这里我使用了word2vec预训练的词向量
self.n_class=2 #相似和不相似两种分类
self.dropout = nn.Dropout(0.5) #dropout设置为0.5,不知道后面起没起作用
self.Embedding_matrix = Embedding_matrix #词嵌入矩阵,#size=[Vocab_size,embedding_size],可以自己训练一个词向量矩阵。
self.word_embeds = nn.Embedding(self.Vocab_size, self.Embedding_dim) #嵌入层
pretrained_weight = np.array(self.Embedding_matrix) #转换成numpy类型
self.word_embeds.weight.data.copy_(torch.from_numpy(pretrained_weight)) #将词嵌入矩阵放到网络结构中
self.Bi_Lstm1 = nn.LSTM(self.Embedding_dim, hidden_size=self.n_hidden1, bidirectional=True) #bi-lstm,hidden_size = 128
self.fc = nn.Linear(self.n_hidden1*2,self.n_class,bias=False) #根据attention后的输出,全连接层的大小设置为(256,2)
self.b = nn.Parameter(torch.rand([self.n_class])) #偏置b
pass
def attention_weight1(self,outputs1,final_state1): #最好debug一步一步看怎么变化
outputs1 = outputs1.permute(1,0,2)
hidden = final_state1.view(-1,self.n_hidden1*2,1)
attention_weights = torch.bmm(outputs1,hidden).squeeze(2) #z=torch.bmm(x,y)针对三维数组相乘,x=[a,b,c],y =[a,c,d], z = [a,b,c],这里的a,b,c,d都是size.
soft_attention_weights1 = F.softmax(attention_weights,1)
context1 = torch.bmm(outputs1.transpose(1,2),soft_attention_weights1.unsqueeze(2)).squeeze(2)
return context1,soft_attention_weights1
pass
def forward(self,train_left,train_right):
train_left = self.word_embeds(train_left).to(device) #train_left为索引,mapping词向量,得到一个词向量矩阵,size(12,300)
train_right = self.word_embeds(train_right).to(device) #同上
train_left = train_left.transpose(0,1) #交换维度 ,变为[seq_len,batch_size,embedding_dim]
train_right = train_right.transpose(0,1)
hidden_state1 = torch.rand(2,self.batch_size,self.n_hidden1).to(device) #隐藏层单元初始化
cell_state1 = torch.rand(2,self.batch_size,self.n_hidden1).to(device)
outputs1_L,(final_state1_L,_) =self.Bi_Lstm1(train_left,(hidden_state1,cell_state1))#左右两边参数共享
outputs1_L = self.dropout(outputs1_L) #左边输出
attn_outputs1_L,attention1_L = self.attention_weight1(outputs1_L,final_state1_L)#左右两边attention也共享
outputs1_R,(final_state1_R,_) =self.Bi_Lstm1(train_right,(hidden_state1,cell_state1))
outputs1_R = self.dropout(outputs1_R)
attn_outputs1_R, attention1_R = self.attention_weight1(outputs1_R, final_state1_R)
outputs1 = attn_outputs1_L #attention后的输出
outputs2 = attn_outputs1_R
output = torch.abs(outputs1 - outputs2) #采用的是曼哈顿距离(两个向量相减的绝对值),也可以使用别的距离
output = self.fc(output)+self.b #全连接,得到二分类
output = F.softmax(output,dim=1) #softmax函数归一
return output
pass
训练部分
def train(model, device, train_loader, optimizer,criterion,epoch):
print('Training on {} samples...'.format(len(train_loader.dataset)))
model.train()
train_loss = 0
num_correct = 0
for batch_idx,(train_left,train_right,lables) in enumerate(train_loader):#返回的是元组,记得加括号
train_left = train_left.to(device)
train_right = train_right.to(device)
lables = lables.to(device)
optimizer.zero_grad()
output = model(train_left,train_right)
loss = criterion(output,lables)
loss.backward()
optimizer.step()
train_loss += float(loss.item())
true = lables.data.cpu()
predict = torch.max(output,dim=1)[1].cpu()
num_correct += torch.eq(predict,true).sum().float().item()
train_acc = num_correct / len(train_loader.dataset)
train_loss = train_loss/len(train_loader)
msg = 'Epoch: {0:>5}, Train Loss: {1:>5.5}, Train Acc: {2:>6.4%}'
print(msg.format(epoch,train_loss,train_acc))
训练
if __name__ == '__main__':
Epochs = 60
Learn_rate = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')
dataloder = get_dataloader()
Bi_LstmModel= test_model().to(device)
criterion = nn.CrossEntropyLoss().to(device)
#criterion = ContrastiveLoss().to(device)
optimizer = optim.Adam(Bi_LstmModel.parameters(),lr=Learn_rate)
for epoch in range(Epochs):
train(Bi_LstmModel, device, dataloder, optimizer, criterion,epoch+1)
结果
length of vocabs:5956
5956
Found 5956 word vectors.
average_length: 6.520725
max_length: 12
Training on 100000 samples...
Epoch: 1, Train Loss: 0.64095, Train Acc: 62.9720%
Training on 100000 samples...
Epoch: 2, Train Loss: 0.58864, Train Acc: 70.8260%
Training on 100000 samples...
Epoch: 3, Train Loss: 0.55469, Train Acc: 74.9770%
Training on 100000 samples...
Epoch: 4, Train Loss: 0.52636, Train Acc: 78.2610%
Training on 100000 samples...
Epoch: 5, Train Loss: 0.50418, Train Acc: 80.7080%
Training on 100000 samples...
Epoch: 6, Train Loss: 0.48491, Train Acc: 82.8620%
Training on 100000 samples...
Epoch: 7, Train Loss: 0.46984, Train Acc: 84.5690%
Training on 100000 samples...
Epoch: 8, Train Loss: 0.456, Train Acc: 85.9410%
Training on 100000 samples...
Epoch: 9, Train Loss: 0.44567, Train Acc: 87.0060%
Training on 100000 samples...
Epoch: 10, Train Loss: 0.43607, Train Acc: 88.0130%
Training on 100000 samples...
Epoch: 11, Train Loss: 0.42797, Train Acc: 88.8480%
Training on 100000 samples...
Epoch: 12, Train Loss: 0.42026, Train Acc: 89.5970%
Training on 100000 samples...
Epoch: 13, Train Loss: 0.41379, Train Acc: 90.2630%
Training on 100000 samples...
Epoch: 14, Train Loss: 0.40766, Train Acc: 90.8920%
Training on 100000 samples...
Epoch: 15, Train Loss: 0.40276, Train Acc: 91.3650%
Training on 100000 samples...
Epoch: 16, Train Loss: 0.39678, Train Acc: 91.9920%
Training on 100000 samples...
Epoch: 17, Train Loss: 0.39264, Train Acc: 92.3740%
Training on 100000 samples...
Epoch: 18, Train Loss: 0.38861, Train Acc: 92.7450%
Training on 100000 samples...
Epoch: 19, Train Loss: 0.38506, Train Acc: 93.0790%
Training on 100000 samples...
Epoch: 20, Train Loss: 0.3821, Train Acc: 93.4100%
Training on 100000 samples...
Epoch: 21, Train Loss: 0.37889, Train Acc: 93.7120%
Training on 100000 samples...
Epoch: 22, Train Loss: 0.37757, Train Acc: 93.8200%
Training on 100000 samples...
Epoch: 23, Train Loss: 0.37417, Train Acc: 94.0930%
Training on 100000 samples...
Epoch: 24, Train Loss: 0.3715, Train Acc: 94.4140%
Training on 100000 samples...
Epoch: 25, Train Loss: 0.36956, Train Acc: 94.5810%
Training on 100000 samples...
Epoch: 26, Train Loss: 0.36763, Train Acc: 94.7730%
Training on 100000 samples...
Epoch: 27, Train Loss: 0.36562, Train Acc: 94.9750%
Training on 100000 samples...
Epoch: 28, Train Loss: 0.36378, Train Acc: 95.1200%
Training on 100000 samples...
Epoch: 29, Train Loss: 0.36189, Train Acc: 95.3260%
Training on 100000 samples...
Epoch: 30, Train Loss: 0.36093, Train Acc: 95.4330%
Training on 100000 samples...
Epoch: 31, Train Loss: 0.35879, Train Acc: 95.6250%
Training on 100000 samples...
Epoch: 32, Train Loss: 0.35819, Train Acc: 95.6460%
Training on 100000 samples...
Epoch: 33, Train Loss: 0.35632, Train Acc: 95.8660%
Training on 100000 samples...
Epoch: 34, Train Loss: 0.35541, Train Acc: 95.9820%
Training on 100000 samples...
Epoch: 35, Train Loss: 0.35451, Train Acc: 96.0200%
Training on 100000 samples...
Epoch: 36, Train Loss: 0.35287, Train Acc: 96.1810%
Training on 100000 samples...
Epoch: 37, Train Loss: 0.35191, Train Acc: 96.2920%
Training on 100000 samples...
Epoch: 38, Train Loss: 0.35084, Train Acc: 96.4330%
Training on 100000 samples...
Epoch: 39, Train Loss: 0.35073, Train Acc: 96.3850%
Training on 100000 samples...
Epoch: 40, Train Loss: 0.34976, Train Acc: 96.5080%
Training on 100000 samples...
Epoch: 41, Train Loss: 0.34839, Train Acc: 96.6140%
Training on 100000 samples...
Epoch: 42, Train Loss: 0.34836, Train Acc: 96.6310%
Training on 100000 samples...
Epoch: 43, Train Loss: 0.34741, Train Acc: 96.6780%
Training on 100000 samples...
Epoch: 44, Train Loss: 0.34707, Train Acc: 96.6900%
Training on 100000 samples...
Epoch: 45, Train Loss: 0.34599, Train Acc: 96.8830%
Training on 100000 samples...
Epoch: 46, Train Loss: 0.34547, Train Acc: 96.9310%
Training on 100000 samples...
Epoch: 47, Train Loss: 0.34464, Train Acc: 96.9570%
Training on 100000 samples...
Epoch: 48, Train Loss: 0.34371, Train Acc: 97.0530%
Training on 100000 samples...
Epoch: 49, Train Loss: 0.3439, Train Acc: 97.0470%
Training on 100000 samples...
Epoch: 50, Train Loss: 0.34344, Train Acc: 97.0800%
Training on 100000 samples...
Epoch: 51, Train Loss: 0.34239, Train Acc: 97.1990%
Training on 100000 samples...
Epoch: 52, Train Loss: 0.34175, Train Acc: 97.2850%
Training on 100000 samples...
Epoch: 53, Train Loss: 0.34119, Train Acc: 97.3100%
Training on 100000 samples...
Epoch: 54, Train Loss: 0.34126, Train Acc: 97.2960%
Training on 100000 samples...
Epoch: 55, Train Loss: 0.34068, Train Acc: 97.3470%
Training on 100000 samples...
Epoch: 56, Train Loss: 0.34012, Train Acc: 97.3980%
Training on 100000 samples...
Epoch: 57, Train Loss: 0.33972, Train Acc: 97.4420%
Training on 100000 samples...
Epoch: 58, Train Loss: 0.3399, Train Acc: 97.4150%
Training on 100000 samples...
Epoch: 59, Train Loss: 0.33941, Train Acc: 97.4970%
Training on 100000 samples...
Epoch: 60, Train Loss: 0.33927, Train Acc: 97.4890%
最后
以上就是痴情戒指为你收集整理的文本相似度(pytorch版本) Bi-Lstm+Attention的全部内容,希望文章能够帮你解决文本相似度(pytorch版本) Bi-Lstm+Attention所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复