Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d95a352
merge with kaldi_52
hainan-xv May 11, 2017
fab3faa
scripts to train RNNLM with tensorflow
hainan-xv May 22, 2017
cae43c6
successfully did python training plus c++ eval
hainan-xv May 23, 2017
1302854
bigger graph idea is working
hainan-xv Jun 3, 2017
e148dbb
an initial working version of lstm LM that is accessible from C++
hainan-xv Jun 5, 2017
56d9c89
supports multilayer LSTM now
hainan-xv Jun 6, 2017
1df10a8
add script to install bazel
hainan-xv Jun 6, 2017
bfb4ad2
add script to compile tensorflow with simple RNN c++example
hainan-xv Jun 6, 2017
0c4b2b4
more files added
hainan-xv Jun 7, 2017
066cc74
change for spoken machines
Jun 7, 2017
4d27c7d
add makefile
hainan-xv Jun 7, 2017
8bb2437
Merge branch 'tensorflow' of https://github.com/hainan-xv/kaldi into …
hainan-xv Jun 7, 2017
96e5a2b
tf compiles with kaldi
hainan-xv Jun 9, 2017
85fd7b2
starting to write the tensorflow wrappers
hainan-xv Jun 12, 2017
5c19b09
include the h and cc files; delete some of the unuseful files
hainan-xv Jun 12, 2017
6fa3f3f
add binary; undebugged
hainan-xv Jun 12, 2017
3396413
starting to debug the tensorflow code
hainan-xv Jun 13, 2017
c076159
add more text processing
hainan-xv Jun 13, 2017
e4a0aeb
more changes
hainan-xv Jun 14, 2017
37050ba
tf rnnlm rescoring working
Jun 15, 2017
f435a3a
draft of a much more efficient way of rescoring
Jun 16, 2017
e4ed676
going to change the BOS/EOS implementations
Jun 19, 2017
a33d4ef
use only one sentence-boundary symbol
Jun 19, 2017
063fe10
use only one sentence-boundary symbol
Jun 19, 2017
7ef2de4
recipe draft finished
Jun 20, 2017
8787364
add new objf; still debugging
Jun 20, 2017
f83c006
new objf working
Jun 21, 2017
beeb56c
fix small issue
Jun 22, 2017
023f2ba
add better handling of OOS words
Jun 23, 2017
9053aa1
small change in install_tensorflow script
Jun 23, 2017
77f5d71
add install python tf script
Jul 5, 2017
70f349f
make install script chmox+x
Jul 5, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions egs/ami/s5/cmd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

export train_cmd="queue.pl --mem 1G"
export decode_cmd="queue.pl --mem 2G"
export tensorflow_cmd="queue.pl -l hostname=b*"
# the use of cuda_cmd is deprecated but it is sometimes still used in nnet1
# scripts.
export cuda_cmd="queue.pl --gpu 1 --mem 20G"
Expand Down
384 changes: 384 additions & 0 deletions egs/ami/s5/local/tensorflow/lstm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,384 @@
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
# Modified by Hainan Xu to be used in Kaldi for lattice rescoring 2017
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys

sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")

import inspect
import time

import numpy as np
import tensorflow as tf

import reader

flags = tf.flags
logging = tf.logging

flags.DEFINE_string(
"model", "small",
"A type of model. Possible options are: small, medium, large.")
flags.DEFINE_string("data_path", None,
"Where the training/test data is stored.")
flags.DEFINE_string("vocab_path", None,
"Where the wordlist file is stored.")
flags.DEFINE_string("save_path", None,
"Model output directory.")
flags.DEFINE_bool("use_fp16", False,
"Train using 16-bit floats instead of 32bit floats")

FLAGS = flags.FLAGS


def data_type():
return tf.float16 if FLAGS.use_fp16 else tf.float32


class RNNLMInput(object):
"""The input data."""

def __init__(self, config, data, name=None):
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
self.input_data, self.targets = reader.rnnlm_producer(
data, batch_size, num_steps, name=name)


class RNNLMModel(object):
"""The RNNLM model."""

def __init__(self, is_training, config, input_):
self._input = input_

batch_size = input_.batch_size
num_steps = input_.num_steps
size = config.hidden_size
vocab_size = config.vocab_size

# Slightly better results can be obtained with forget gate biases
# initialized to 1 but the hyperparameters of the model would need to be
# different than reported in the paper.
def lstm_cell():
# With the latest TensorFlow source code (as of Mar 27, 2017),
# the BasicLSTMCell will need a reuse parameter which is unfortunately not
# defined in TensorFlow 1.0. To maintain backwards compatibility, we add
# an argument check here:
if 'reuse' in inspect.getargspec(
tf.contrib.rnn.BasicLSTMCell.__init__).args:
return tf.contrib.rnn.BasicLSTMCell(
size, forget_bias=0.0, state_is_tuple=True,
reuse=tf.get_variable_scope().reuse)
else:
return tf.contrib.rnn.BasicLSTMCell(
size, forget_bias=0.0, state_is_tuple=True)
attn_cell = lstm_cell
if is_training and config.keep_prob < 1:
def attn_cell():
return tf.contrib.rnn.DropoutWrapper(
lstm_cell(), output_keep_prob=config.keep_prob)
self.cell = tf.contrib.rnn.MultiRNNCell(
[attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)

self._initial_state = self.cell.zero_state(batch_size, data_type())
self._initial_state_single = self.cell.zero_state(1, data_type())

self.initial = tf.reshape(tf.stack(axis=0, values=self._initial_state_single), [config.num_layers, 2, 1, size], name="test_initial_state")


# first implement the less efficient version
test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")

state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state_in")
# unpacking the input state context
l = tf.unstack(state_placeholder, axis=0)
test_input_state = tuple(
[tf.contrib.rnn.LSTMStateTuple(l[idx][0],l[idx][1])
for idx in range(config.num_layers)]
)

with tf.device("/cpu:0"):
self.embedding = tf.get_variable(
"embedding", [vocab_size, size], dtype=data_type())

inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data)
test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in)

# test time
with tf.variable_scope("RNN"):
(test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state)

test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out")
test_cell_out = tf.reshape(test_cell_output, [1, size], name="test_cell_out")
# above is the first part of the graph for test
# test-word-in
# > ---- > test-state-out
# test-state-in > test-cell-out


# below is the 2nd part of the graph for test
# test-word-out
# > prob(word | test-word-out)
# test-cell-in

test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
cellout_placeholder = tf.placeholder(tf.float32, [1, size], name="test_cell_in")

softmax_w = tf.get_variable(
"softmax_w", [size, vocab_size], dtype=data_type())
softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())

test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b
test_softmaxed = tf.nn.log_softmax(test_logits)

p_word = test_softmaxed[0, test_word_out[0,0]]
test_out = tf.identity(p_word, name="test_out")

if is_training and config.keep_prob < 1:
inputs = tf.nn.dropout(inputs, config.keep_prob)

# Simplified version of models/tutorials/rnn/rnn.py's rnn().
# This builds an unrolled LSTM for tutorial purposes only.
# In general, use the rnn() or state_saving_rnn() from rnn.py.
#
# The alternative version of the code below is:
#
# inputs = tf.unstack(inputs, num=num_steps, axis=1)
# outputs, state = tf.contrib.rnn.static_rnn(
# cell, inputs, initial_state=self._initial_state)
outputs = []
state = self._initial_state
with tf.variable_scope("RNN"):
for time_step in range(num_steps):
if time_step > -1: tf.get_variable_scope().reuse_variables()
(cell_output, state) = self.cell(inputs[:, time_step, :], state)
outputs.append(cell_output)

output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
logits = tf.matmul(output, softmax_w) + softmax_b
loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
[logits],
[tf.reshape(input_.targets, [-1])],
[tf.ones([batch_size * num_steps], dtype=data_type())])
self._cost = cost = tf.reduce_sum(loss) / batch_size
self._final_state = state

if not is_training:
return

self._lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
config.max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(self._lr)
self._train_op = optimizer.apply_gradients(
zip(grads, tvars),
global_step=tf.contrib.framework.get_or_create_global_step())

self._new_lr = tf.placeholder(
tf.float32, shape=[], name="new_learning_rate")
self._lr_update = tf.assign(self._lr, self._new_lr)

def assign_lr(self, session, lr_value):
session.run(self._lr_update, feed_dict={self._new_lr: lr_value})

@property
def input(self):
return self._input

@property
def initial_state(self):
return self._initial_state

@property
def cost(self):
return self._cost

@property
def final_state(self):
return self._final_state

@property
def lr(self):
return self._lr

@property
def train_op(self):
return self._train_op

class TestConfig(object):
"""Tiny config, for testing."""
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 1
num_layers = 1
num_steps = 2
hidden_size = 2
max_epoch = 1
max_max_epoch = 1
keep_prob = 1.0
lr_decay = 0.5
batch_size = 20

class SmallConfig(object):
"""Small config."""
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 5
num_layers = 2
num_steps = 20
hidden_size = 200
max_epoch = 4
max_max_epoch = 13
keep_prob = 1.0
lr_decay = 0.5
batch_size = 64


class MediumConfig(object):
"""Medium config."""
init_scale = 0.05
learning_rate = 1.0
max_grad_norm = 5
num_layers = 2
num_steps = 35
hidden_size = 650
max_epoch = 6
max_max_epoch = 39
keep_prob = 0.5
lr_decay = 0.8
batch_size = 20


class LargeConfig(object):
"""Large config."""
init_scale = 0.04
learning_rate = 1.0
max_grad_norm = 10
num_layers = 2
num_steps = 35
hidden_size = 1500
max_epoch = 14
max_max_epoch = 55
keep_prob = 0.35
lr_decay = 1 / 1.15
batch_size = 20



def run_epoch(session, model, eval_op=None, verbose=False):
"""Runs the model on the given data."""
start_time = time.time()
costs = 0.0
iters = 0
state = session.run(model.initial_state)

fetches = {
"cost": model.cost,
"final_state": model.final_state,
}
if eval_op is not None:
fetches["eval_op"] = eval_op

for step in range(model.input.epoch_size):
feed_dict = {}
for i, (c, h) in enumerate(model.initial_state):
feed_dict[c] = state[i].c
feed_dict[h] = state[i].h

vals = session.run(fetches, feed_dict)
cost = vals["cost"]
state = vals["final_state"]

costs += cost
iters += model.input.num_steps

if verbose and step % (model.input.epoch_size // 10) == 10:
print("%.3f perplexity: %.3f speed: %.0f wps" %
(step * 1.0 / model.input.epoch_size, np.exp(costs / iters),
iters * model.input.batch_size / (time.time() - start_time)))

return np.exp(costs / iters)


def get_config():
if FLAGS.model == "small":
return SmallConfig()
elif FLAGS.model == "medium":
return MediumConfig()
elif FLAGS.model == "large":
return LargeConfig()
elif FLAGS.model == "test":
return TestConfig()
else:
raise ValueError("Invalid model: %s", FLAGS.model)


def main(_):
if not FLAGS.data_path:
raise ValueError("Must set --data_path to RNNLM data directory")

raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path)
train_data, valid_data, _, word_map = raw_data

config = get_config()
config.vocab_size = len(word_map)
eval_config = get_config()
eval_config.batch_size = 1
eval_config.num_steps = 1

with tf.Graph().as_default():
initializer = tf.random_uniform_initializer(-config.init_scale,
config.init_scale)

with tf.name_scope("Train"):
train_input = RNNLMInput(config=config, data=train_data, name="TrainInput")
with tf.variable_scope("Model", reuse=None, initializer=initializer):
m = RNNLMModel(is_training=True, config=config, input_=train_input)
tf.summary.scalar("Training Loss", m.cost)
tf.summary.scalar("Learning Rate", m.lr)

with tf.name_scope("Valid"):
valid_input = RNNLMInput(config=config, data=valid_data, name="ValidInput")
with tf.variable_scope("Model", reuse=True, initializer=initializer):
mvalid = RNNLMModel(is_training=False, config=config, input_=valid_input)
tf.summary.scalar("Validation Loss", mvalid.cost)

sv = tf.train.Supervisor(logdir=FLAGS.save_path)
with sv.managed_session() as session:
for i in range(config.max_max_epoch):
lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
m.assign_lr(session, config.learning_rate * lr_decay)

print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
train_perplexity = run_epoch(session, m, eval_op=m.train_op,
verbose=True)

print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
valid_perplexity = run_epoch(session, mvalid)
print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))

if FLAGS.save_path:
print("Saving model to %s." % FLAGS.save_path)
sv.saver.save(session, FLAGS.save_path)

if __name__ == "__main__":
tf.app.run()
Loading