-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f53e2f2
Showing
13 changed files
with
1,784 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
Code for the ACL 2019 paper: | ||
|
||
## Improving Question Answering over Incomplete KBs with Knowledge-Aware Reader | ||
|
||
Paper link: [https://arxiv.org/abs/1905.07098](https://arxiv.org/abs/1905.07098) | ||
|
||
Model Overview: | ||
<p align="center"><img width="90%" src="assets/model.png" /></p> | ||
|
||
### Requirements | ||
* ``PyTorch 1.0.1`` | ||
* ``tensorboardX`` | ||
* ``tqdm`` | ||
* ``gluonnlp`` | ||
|
||
### Prepare data | ||
``` | ||
mkdir datasets && cd datasets && wget http://nlp.cs.ucsb.edu/data/webqsp.tar.gz && tar -xzvf webqsp.tar.gz | ||
``` | ||
|
||
### Full KB setting | ||
**Training** | ||
``` | ||
CUDA_VISIBLE_DEVICES=0 python train.py --model_id KAReader_full_kb --num_layer 1 --max_num_neighbors 50 --label_smooth 0.1 --data_folder datasets/webqsp/full/ | ||
``` | ||
**Testing** | ||
``` | ||
CUDA_VISIBLE_DEVICES=0 python train.py --model_id KAReader_full_kb --num_layer 1 --max_num_neighbors 50 --label_smooth 0.1 --data_folder datasets/webqsp/full/ --mode test | ||
``` | ||
|
||
### Incomplete KB setting (50%) | ||
**Training** | ||
``` | ||
CUDA_VISIBLE_DEVICES=0 python train.py --model_id KAReader_kb_05 --num_layer 1 --max_num_neighbors 100 --use_doc --label_smooth 0.1 --data_folder datasets/webqsp/kb_05/ | ||
``` | ||
**Testing** | ||
``` | ||
CUDA_VISIBLE_DEVICES=0 python train.py --model_id KAReader_kb_05 --num_layer 1 --max_num_neighbors 100 --use_doc --label_smooth 0.1 --data_folder datasets/webqsp/kb_05/ --mode test --eps 0.12 | ||
``` | ||
|
||
### Bibtex | ||
``` | ||
@article{xiong2019improving, | ||
title={Improving Question Answering over Incomplete KBs with Knowledge-Aware Reader}, | ||
author={Xiong, Wenhan and Yu, Mo and Chang, Shiyu and Guo, Xiaoxiao and Wang, William Yang}, | ||
journal={arXiv preprint arXiv:1905.07098}, | ||
year={2019} | ||
} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
import torch | ||
import torch.nn.functional as F | ||
import torch.nn as nn | ||
from torch.autograd import Variable | ||
import copy | ||
import math | ||
|
||
def attention(query, key, value, mask=None, dropout=None): | ||
"Compute 'Scaled Dot Product Attention'" | ||
d_k = query.size(-1) | ||
scores = torch.matmul(query, key.transpose(-2, -1)) \ | ||
/ math.sqrt(d_k) | ||
if mask is not None: | ||
scores = scores.masked_fill(mask == 0, -1e9) | ||
p_attn = F.softmax(scores, dim = -1) | ||
if dropout is not None: | ||
p_attn = dropout(p_attn) | ||
return torch.matmul(p_attn, value), p_attn | ||
|
||
|
||
def clones(module, N): | ||
"Produce N identical layers." | ||
return nn.ModuleList([copy.deepcopy(module) for _ in range(N)]) | ||
|
||
|
||
class MultiHeadedAttention(nn.Module): | ||
def __init__(self, h, d_model, dropout=0.1): | ||
"Take in model size and number of heads." | ||
super(MultiHeadedAttention, self).__init__() | ||
assert d_model % h == 0 | ||
# We assume d_v always equals d_k | ||
self.d_k = d_model // h | ||
self.h = h | ||
self.linears = clones(nn.Linear(d_model, d_model), 4) | ||
self.attn = None | ||
self.dropout = nn.Dropout(p=dropout) | ||
|
||
def forward(self, query, key, value, mask=None): | ||
"Implements Figure 2" | ||
if mask is not None: | ||
# Same mask applied to all h heads. | ||
mask = mask.unsqueeze(1) | ||
nbatches = query.size(0) | ||
|
||
# 1) Do all the linear projections in batch from d_model => h x d_k | ||
query, key, value = \ | ||
[l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2) | ||
for l, x in zip(self.linears, (query, key, value))] | ||
|
||
# 2) Apply attention on all the projected vectors in batch. | ||
x, self.attn = attention(query, key, value, mask=mask, | ||
dropout=self.dropout) | ||
|
||
# 3) "Concat" using a view and apply a final linear. | ||
x = x.transpose(1, 2).contiguous() \ | ||
.view(nbatches, -1, self.h * self.d_k) | ||
return self.linears[-1](x) | ||
|
||
class PositionwiseFeedForward(nn.Module): | ||
"Implements FFN equation." | ||
def __init__(self, d_model, d_ff, dropout=0.1): | ||
super(PositionwiseFeedForward, self).__init__() | ||
self.w_1 = nn.Linear(d_model, d_ff) | ||
self.w_2 = nn.Linear(d_ff, d_model) | ||
self.dropout = nn.Dropout(dropout) | ||
|
||
def forward(self, x): | ||
return self.w_2(self.dropout(F.relu(self.w_1(x)))) | ||
|
||
class PositionalEncoding(nn.Module): | ||
"Implement the PE function." | ||
def __init__(self, d_model, dropout, max_len=5000): | ||
super(PositionalEncoding, self).__init__() | ||
self.dropout = nn.Dropout(p=dropout) | ||
|
||
# Compute the positional encodings once in log space. | ||
pe = torch.zeros(max_len, d_model) | ||
position = torch.arange(0, max_len).unsqueeze(1).float() | ||
div_term = torch.exp(torch.arange(0, d_model, 2).float() * | ||
-(math.log(10000.0) / d_model)) | ||
pe[:, 0::2] = torch.sin(position * div_term) | ||
pe[:, 1::2] = torch.cos(position * div_term) | ||
pe = pe.unsqueeze(0) | ||
self.register_buffer('pe', pe) | ||
|
||
def forward(self, x): | ||
x = x + Variable(self.pe[:, :x.size(1)], | ||
requires_grad=False) | ||
return self.dropout(x) | ||
|
||
class LayerNorm(nn.Module): | ||
"Construct a layernorm module (See citation for details)." | ||
def __init__(self, features, eps=1e-6): | ||
super(LayerNorm, self).__init__() | ||
self.a_2 = nn.Parameter(torch.ones(features)) | ||
self.b_2 = nn.Parameter(torch.zeros(features)) | ||
self.eps = eps | ||
|
||
def forward(self, x): | ||
mean = x.mean(-1, keepdim=True) | ||
std = x.std(-1, keepdim=True) | ||
return self.a_2 * (x - mean) / (std + self.eps) + self.b_2 | ||
|
||
class SublayerConnection(nn.Module): | ||
""" | ||
A residual connection followed by a layer norm. | ||
Note for code simplicity the norm is first as opposed to last. | ||
""" | ||
def __init__(self, size, dropout): | ||
super(SublayerConnection, self).__init__() | ||
self.norm = LayerNorm(size) | ||
self.dropout = nn.Dropout(dropout) | ||
|
||
def forward(self, x, sublayer): | ||
"Apply residual connection to any sublayer with the same size." | ||
return x + self.dropout(sublayer(self.norm(x))) | ||
|
||
class EncoderLayer(nn.Module): | ||
"Encoder is made up of self-attn and feed forward (defined below)" | ||
def __init__(self, size, self_attn, feed_forward, dropout): | ||
super(EncoderLayer, self).__init__() | ||
self.self_attn = self_attn | ||
self.feed_forward = feed_forward | ||
self.sublayer = clones(SublayerConnection(size, dropout), 2) | ||
self.size = size | ||
|
||
def forward(self, x, mask): | ||
"Follow Figure 1 (left) for connections." | ||
x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, mask)) | ||
return self.sublayer[1](x, self.feed_forward) | ||
|
||
class Encoder(nn.Module): | ||
"Core encoder is a stack of N layers" | ||
def __init__(self, layer, N): | ||
super(Encoder, self).__init__() | ||
self.layers = clones(layer, N) | ||
self.norm = LayerNorm(layer.size) | ||
|
||
def forward(self, x, mask): | ||
"Pass the input (and mask) through each layer in turn." | ||
for layer in self.layers: | ||
x = layer(x, mask) | ||
return self.norm(x) | ||
|
||
class SimpleEncoder(nn.Module): | ||
""" | ||
takes (batch_size, seq_len, embed_dim) as inputs | ||
calculate MASK, POSITION_ENCODING | ||
""" | ||
def __init__(self, embed_dim, head=4, layer=1, dropout=0.1): | ||
super(SimpleEncoder, self).__init__() | ||
d_ff = 2 * embed_dim | ||
|
||
self.position = PositionalEncoding(embed_dim, dropout) | ||
attn = MultiHeadedAttention(head, embed_dim) | ||
ff = PositionwiseFeedForward(embed_dim, d_ff) | ||
self.encoder = Encoder(EncoderLayer(embed_dim, attn, ff, dropout), layer) | ||
|
||
def forward(self, x, mask): | ||
mask = mask.unsqueeze(-2) | ||
x = self.position(x) | ||
x = self.encoder(x, mask) | ||
return x | ||
|
||
if __name__ == '__main__': | ||
encoder = SimpleEncoder(350, 2, 1) | ||
inputs = torch.zeros(1000,50,350) | ||
lens = [10] * 1000 | ||
encoder(inputs, lens) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import gluonnlp as nlp | ||
import numpy as np | ||
from tqdm import tqdm | ||
|
||
dataset = 'datasets/webqsp/kb_05' | ||
rel_path = dataset + '/relations.txt' | ||
|
||
word_counter = [] | ||
|
||
# load original vocab | ||
with open(dataset + '/vocab.txt') as f: | ||
for line in f.readlines(): | ||
word_counter.append(line.strip()) | ||
|
||
rel_words = [] | ||
max_num_words = 0 | ||
all_relations = [] | ||
|
||
# how to split the relation | ||
if 'webqsp' in dataset: | ||
with open(rel_path) as f: | ||
first_line = True | ||
for line in tqdm(f.readlines()): | ||
if first_line: | ||
first_line = False | ||
continue | ||
line = line.strip() | ||
all_relations.append(line) | ||
line = line[1:-1] | ||
fields = line.split('.') | ||
words = fields[-2].split('_') + fields[-1].split('_') | ||
max_num_words = max(len(words), max_num_words) | ||
rel_words.append(words) | ||
word_counter += words | ||
elif 'wikimovie' in dataset: | ||
with open(rel_path) as f: | ||
for line in tqdm(f.readlines()): | ||
line = line.strip() | ||
all_relations.append(line) | ||
words = line.split('_') | ||
max_num_words = max(len(words), max_num_words) | ||
rel_words.append(words) | ||
word_counter += words | ||
|
||
print('max_num_words: ', max_num_words) | ||
|
||
word_counter = nlp.data.count_tokens(word_counter) | ||
glove_emb = nlp.embedding.create('glove', source='glove.6B.100d') | ||
vocab = nlp.Vocab(word_counter) | ||
vocab.set_embedding(glove_emb) | ||
|
||
emb_mat = vocab.embedding.idx_to_vec.asnumpy() | ||
np.save(dataset + '/glove_word_emb_100d', emb_mat) | ||
|
||
with open(dataset + '/glove_vocab.txt', 'w') as g: | ||
g.write('\n'.join(vocab.idx_to_token)) | ||
|
||
assert False | ||
|
||
rel_word_ids = np.ones((len(rel_words) + 1, max_num_words), dtype=int) # leave the first 1 for padding relation | ||
rel_emb_mat = [] | ||
for rel_idx, words in enumerate(rel_words): | ||
for i, word in enumerate(words): | ||
rel_word_ids[rel_idx + 1, i] = vocab.token_to_idx[word] | ||
|
||
np.save(dataset + '/rel_word_idx', rel_word_ids) | ||
|
||
all_relations = ['pad_rel'] + all_relations | ||
with open(rel_path, 'w') as g: | ||
g.write('\n'.join(all_relations)) | ||
|
||
|
||
|
Oops, something went wrong.