bilstm提取特征,embedding-dropout-mask-bilstm-LN-classifier
本部分对应代码在model.py中,在run_lstm_crf.py中适用于

model = NERModel(vocab_size=len(processor.vocab), embedding_size=args.embedding_size,
                 hidden_size=args.hidden_size,device=args.device,label2id=args.label2id)

````

import torch.nn as nn

class NERModel(nn.Module):

def __init__(self, vocab_size, embedding_size, hidden_size, class_num, drop_p=0.1):
    super(NERModel, self).__init__()
    self.embedding = nn.Embedding(vocab_size, embedding_size)        # embedding子层
    self.dropout = nn.Dropout2d(p=drop_p)                    # dropout2d,用于hidden_size维度dropout
    self.bilstm = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, 
                          batch_first=True, num_layers=2, dropout=drop_p,
                          bidirectional=True)                # bilstm子层,注意dropout仅作用于除最后一层外
    self.layer_norm = nn.LayerNorm(hidden_size * 2)                # LN子层,用于归一化hidden_size维度
    self.classifier = nn.Linear(hidden_size * 2, class_num)            # 线性层,使得hidden_size维度与class_num一致

def forward(self, input_ids, input_mask):        # input_ids = [batch_size, seq_length, vocab_size]
    embs = self.embedding(input_ids)            # embs = [batch_size, seq_length, embedding_size]
                                   # 以下dropout部分代码可以在class外重新定义class-dropout子层,更加优雅
    embs = embs.unsqueeze(2)                # embs = [batch_size, seq_length, 1, embedding_size]
    embs = embs.permute(0, 3, 2, 1)                # embs = [batch_size, embedding_size, 1, seq_length]
    embs = self.dropout(embs)                # 对hidden_size维度进行dropout随机归零mask
    embs = embs.permute(0, 3, 2, 1)                # embs = [batch_size, seq_length, 1, embedding_size]
    embs = embs.squeeze(2)                    # embs = [batch_size, seq_length, embedding_size]
    
    embs = embs * input_mask.float().unsqueeze(2)        # 使用input_mask对embs进行sequence mask
    seq_output, _ = self.bilstm(embs)            # seq_output = [batch_size, seq_length, hidden_size*2]
    seq_output = self.layer_norm(seq_output)        # 对hidden_size * 2维度进行layer normalization
    features = self.classifier(seq_output)            # features = [batch_size, seq_length, class_num]
    return features

**通过测试用例可以更加深入地理解该模型的参数**

import numpy as np
inputs = torch.LongTensor(np.arange(0, 6)).view(2, 3)
inputs_mask = torch.Tensor([[1, 1, 1], [1, 1, 0]])
net = NERModel(vocab_size=6, embedding_size=5, hidden_size=10, class_num=7, drop_p=0.1)
'''
重点参数解释:

1. vocab_size,词表大小,虽然nn.Embedding(vocab_size, embedding_size)看起来很类似,但是千万不要把vocab_size当作初始hidden_size大小,vocab_size是去重后的词表大小,代表所有token个数。来源自view(2, 3)中2*3=6。
2. inputs = [batch_size, seq_length],其中batch_size在这里表示句子数量,seq_length表示单句长度(这里已经进行了统一),理解的时候一定不要仅局限于向量维度上,而要明白其物理意义。inputs的类型必须为整数,这是nn.Embedding要求的。
3. inputs_mask是对句子长度进行mask,即sequence mask,不满足统一长度的句子通过乘以0将填充token的影响消除。
4. nn.LSTM中dropout是不会作用于最后一层的,所以如果num_layers=1时,设置dropout参数无意义。
5. MyDropout之所以基于nn.Dropout2d,是对所有seq_length的相同hidden_size位置进行丢弃,物理意义上为丢弃词向量的部分语义空间。

'''
outputs = net(inputs, inputs_mask)
print(outputs)

标签: none

评论已关闭