From fab3faaa515b11a07460a9a3144cf2d37f9872a0 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Mon, 22 May 2017 13:54:05 -0400
Subject: [PATCH 01/30] scripts to train RNNLM with tensorflow

---
 egs/ami/s5/local/tensorflow/ptb_word_lm.py | 389 +++++++++++++++++++++
 egs/ami/s5/local/tensorflow/reader.py      | 128 +++++++
 egs/ami/s5/local/tensorflow/run.sh         |  15 +
 3 files changed, 532 insertions(+)
 create mode 100644 egs/ami/s5/local/tensorflow/ptb_word_lm.py
 create mode 100644 egs/ami/s5/local/tensorflow/reader.py
 create mode 100755 egs/ami/s5/local/tensorflow/run.sh

diff --git a/egs/ami/s5/local/tensorflow/ptb_word_lm.py b/egs/ami/s5/local/tensorflow/ptb_word_lm.py
new file mode 100644
index 00000000000..e1e9673fea4
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/ptb_word_lm.py
@@ -0,0 +1,389 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Example / benchmark for building a PTB LSTM model.
+
+Trains the model described in:
+(Zaremba, et. al.) Recurrent Neural Network Regularization
+http://arxiv.org/abs/1409.2329
+
+There are 3 supported model configurations:
+===========================================
+| config | epochs | train | valid  | test
+===========================================
+| small  | 13     | 37.99 | 121.39 | 115.91
+| medium | 39     | 48.45 |  86.16 |  82.07
+| large  | 55     | 37.87 |  82.62 |  78.29
+The exact results may vary depending on the random initialization.
+
+The hyperparameters used in the model:
+- init_scale - the initial scale of the weights
+- learning_rate - the initial value of the learning rate
+- max_grad_norm - the maximum permissible norm of the gradient
+- num_layers - the number of LSTM layers
+- num_steps - the number of unrolled steps of LSTM
+- hidden_size - the number of LSTM units
+- max_epoch - the number of epochs trained with the initial learning rate
+- max_max_epoch - the total number of epochs for training
+- keep_prob - the probability of keeping weights in the dropout layer
+- lr_decay - the decay of the learning rate for each epoch after "max_epoch"
+- batch_size - the batch size
+
+The data required for this example is in the data/ dir of the
+PTB dataset from Tomas Mikolov's webpage:
+
+$ wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
+$ tar xvf simple-examples.tgz
+
+To run:
+
+$ python ptb_word_lm.py --data_path=simple-examples/data/
+
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import inspect
+import time
+
+import sys
+
+sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")
+
+import numpy as np
+import tensorflow as tf
+
+import reader
+
+flags = tf.flags
+logging = tf.logging
+
+flags.DEFINE_string(
+    "model", "small",
+    "A type of model. Possible options are: small, medium, large.")
+flags.DEFINE_string("data_path", None,
+                    "Where the training/test data is stored.")
+flags.DEFINE_string("save_path", None,
+                    "Model output directory.")
+flags.DEFINE_bool("use_fp16", False,
+                  "Train using 16-bit floats instead of 32bit floats")
+
+FLAGS = flags.FLAGS
+
+
+def data_type():
+  return tf.float16 if FLAGS.use_fp16 else tf.float32
+
+
+class PTBInput(object):
+  """The input data."""
+
+  def __init__(self, config, data, name=None):
+    self.batch_size = batch_size = config.batch_size
+    self.num_steps = num_steps = config.num_steps
+    self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
+    self.input_data, self.targets = reader.ptb_producer(
+        data, batch_size, num_steps, name=name)
+
+
+class PTBModel(object):
+  """The PTB model."""
+
+  def __init__(self, is_training, config, input_):
+    self._input = input_
+
+    batch_size = input_.batch_size
+    num_steps = input_.num_steps
+    size = config.hidden_size
+    vocab_size = config.vocab_size
+
+    # Slightly better results can be obtained with forget gate biases
+    # initialized to 1 but the hyperparameters of the model would need to be
+    # different than reported in the paper.
+    def lstm_cell():
+      # With the latest TensorFlow source code (as of Mar 27, 2017),
+      # the BasicLSTMCell will need a reuse parameter which is unfortunately not
+      # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
+      # an argument check here:
+      if 'reuse' in inspect.getargspec(
+          tf.contrib.rnn.BasicLSTMCell.__init__).args:
+        return tf.contrib.rnn.BasicLSTMCell(
+            size, forget_bias=0.0, state_is_tuple=True,
+            reuse=tf.get_variable_scope().reuse)
+      else:
+        return tf.contrib.rnn.BasicLSTMCell(
+            size, forget_bias=0.0, state_is_tuple=True)
+    attn_cell = lstm_cell
+    if is_training and config.keep_prob < 1:
+      def attn_cell():
+        return tf.contrib.rnn.DropoutWrapper(
+            lstm_cell(), output_keep_prob=config.keep_prob)
+    cell = tf.contrib.rnn.MultiRNNCell(
+        [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)
+
+    self._initial_state = cell.zero_state(batch_size, data_type())
+
+    with tf.device("/cpu:0"):
+      embedding = tf.get_variable(
+          "embedding", [vocab_size, size], dtype=data_type())
+      inputs = tf.nn.embedding_lookup(embedding, input_.input_data)
+
+    if is_training and config.keep_prob < 1:
+      inputs = tf.nn.dropout(inputs, config.keep_prob)
+
+    # Simplified version of models/tutorials/rnn/rnn.py's rnn().
+    # This builds an unrolled LSTM for tutorial purposes only.
+    # In general, use the rnn() or state_saving_rnn() from rnn.py.
+    #
+    # The alternative version of the code below is:
+    #
+    # inputs = tf.unstack(inputs, num=num_steps, axis=1)
+    # outputs, state = tf.contrib.rnn.static_rnn(
+    #     cell, inputs, initial_state=self._initial_state)
+    outputs = []
+    state = self._initial_state
+    with tf.variable_scope("RNN"):
+      for time_step in range(num_steps):
+        if time_step > 0: tf.get_variable_scope().reuse_variables()
+        (cell_output, state) = cell(inputs[:, time_step, :], state)
+        outputs.append(cell_output)
+
+    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
+    softmax_w = tf.get_variable(
+        "softmax_w", [size, vocab_size], dtype=data_type())
+    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
+    logits = tf.matmul(output, softmax_w) + softmax_b
+    loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
+        [logits],
+        [tf.reshape(input_.targets, [-1])],
+        [tf.ones([batch_size * num_steps], dtype=data_type())])
+    self._cost = cost = tf.reduce_sum(loss) / batch_size
+    self._final_state = state
+
+    if not is_training:
+      return
+
+    self._lr = tf.Variable(0.0, trainable=False)
+    tvars = tf.trainable_variables()
+    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
+                                      config.max_grad_norm)
+    optimizer = tf.train.GradientDescentOptimizer(self._lr)
+    self._train_op = optimizer.apply_gradients(
+        zip(grads, tvars),
+        global_step=tf.contrib.framework.get_or_create_global_step())
+
+    self._new_lr = tf.placeholder(
+        tf.float32, shape=[], name="new_learning_rate")
+    self._lr_update = tf.assign(self._lr, self._new_lr)
+
+  def assign_lr(self, session, lr_value):
+    session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
+
+  @property
+  def input(self):
+    return self._input
+
+  @property
+  def initial_state(self):
+    return self._initial_state
+
+  @property
+  def cost(self):
+    return self._cost
+
+  @property
+  def final_state(self):
+    return self._final_state
+
+  @property
+  def lr(self):
+    return self._lr
+
+  @property
+  def train_op(self):
+    return self._train_op
+
+
+class SmallConfig(object):
+  """Small config."""
+  init_scale = 0.1
+  learning_rate = 1.0
+  max_grad_norm = 5
+  num_layers = 2
+  num_steps = 20
+  hidden_size = 200
+  max_epoch = 4
+  max_max_epoch = 13
+  keep_prob = 1.0
+  lr_decay = 0.5
+  batch_size = 20
+  vocab_size = 10000
+
+
+class MediumConfig(object):
+  """Medium config."""
+  init_scale = 0.05
+  learning_rate = 1.0
+  max_grad_norm = 5
+  num_layers = 2
+  num_steps = 35
+  hidden_size = 650
+  max_epoch = 6
+  max_max_epoch = 39
+  keep_prob = 0.5
+  lr_decay = 0.8
+  batch_size = 20
+  vocab_size = 10000
+
+
+class LargeConfig(object):
+  """Large config."""
+  init_scale = 0.04
+  learning_rate = 1.0
+  max_grad_norm = 10
+  num_layers = 2
+  num_steps = 35
+  hidden_size = 1500
+  max_epoch = 14
+  max_max_epoch = 55
+  keep_prob = 0.35
+  lr_decay = 1 / 1.15
+  batch_size = 20
+  vocab_size = 10000
+
+
+class TestConfig(object):
+  """Tiny config, for testing."""
+  init_scale = 0.1
+  learning_rate = 1.0
+  max_grad_norm = 1
+  num_layers = 1
+  num_steps = 2
+  hidden_size = 2
+  max_epoch = 1
+  max_max_epoch = 1
+  keep_prob = 1.0
+  lr_decay = 0.5
+  batch_size = 20
+  vocab_size = 10000
+
+
+def run_epoch(session, model, eval_op=None, verbose=False):
+  """Runs the model on the given data."""
+  start_time = time.time()
+  costs = 0.0
+  iters = 0
+  state = session.run(model.initial_state)
+
+  fetches = {
+      "cost": model.cost,
+      "final_state": model.final_state,
+  }
+  if eval_op is not None:
+    fetches["eval_op"] = eval_op
+
+  for step in range(model.input.epoch_size):
+    feed_dict = {}
+    for i, (c, h) in enumerate(model.initial_state):
+      feed_dict[c] = state[i].c
+      feed_dict[h] = state[i].h
+
+    vals = session.run(fetches, feed_dict)
+    cost = vals["cost"]
+    state = vals["final_state"]
+
+    costs += cost
+    iters += model.input.num_steps
+
+    if verbose and step % (model.input.epoch_size // 10) == 10:
+      print("%.3f perplexity: %.3f speed: %.0f wps" %
+            (step * 1.0 / model.input.epoch_size, np.exp(costs / iters),
+             iters * model.input.batch_size / (time.time() - start_time)))
+
+  return np.exp(costs / iters)
+
+
+def get_config():
+  if FLAGS.model == "small":
+    return SmallConfig()
+  elif FLAGS.model == "medium":
+    return MediumConfig()
+  elif FLAGS.model == "large":
+    return LargeConfig()
+  elif FLAGS.model == "test":
+    return TestConfig()
+  else:
+    raise ValueError("Invalid model: %s", FLAGS.model)
+
+
+def main(_):
+  if not FLAGS.data_path:
+    raise ValueError("Must set --data_path to PTB data directory")
+
+  raw_data = reader.ptb_raw_data(FLAGS.data_path)
+  train_data, valid_data, test_data, _ = raw_data
+
+  config = get_config()
+  eval_config = get_config()
+  eval_config.batch_size = 1
+  eval_config.num_steps = 1
+
+  with tf.Graph().as_default():
+    initializer = tf.random_uniform_initializer(-config.init_scale,
+                                                config.init_scale)
+
+    with tf.name_scope("Train"):
+      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
+      with tf.variable_scope("Model", reuse=None, initializer=initializer):
+        m = PTBModel(is_training=True, config=config, input_=train_input)
+      tf.summary.scalar("Training Loss", m.cost)
+      tf.summary.scalar("Learning Rate", m.lr)
+
+    with tf.name_scope("Valid"):
+      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
+      with tf.variable_scope("Model", reuse=True, initializer=initializer):
+        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
+      tf.summary.scalar("Validation Loss", mvalid.cost)
+
+    with tf.name_scope("Test"):
+      test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
+      with tf.variable_scope("Model", reuse=True, initializer=initializer):
+        mtest = PTBModel(is_training=False, config=eval_config,
+                         input_=test_input)
+
+    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
+    with sv.managed_session() as session:
+      for i in range(config.max_max_epoch):
+        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
+        m.assign_lr(session, config.learning_rate * lr_decay)
+
+        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
+        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
+                                     verbose=True)
+        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
+        valid_perplexity = run_epoch(session, mvalid)
+        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
+
+      test_perplexity = run_epoch(session, mtest)
+      print("Test Perplexity: %.3f" % test_perplexity)
+
+      if FLAGS.save_path:
+        print("Saving model to %s." % FLAGS.save_path)
+        sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
+
+
+if __name__ == "__main__":
+  tf.app.run()
diff --git a/egs/ami/s5/local/tensorflow/reader.py b/egs/ami/s5/local/tensorflow/reader.py
new file mode 100644
index 00000000000..f60bb0d636b
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/reader.py
@@ -0,0 +1,128 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+
+"""Utilities for parsing PTB text files."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import os
+
+import tensorflow as tf
+
+
+
+def _read_words(filename):
+  with tf.gfile.GFile(filename, "r") as f:
+    return f.read().decode("utf-8").replace("\n", "<eos>").split()
+
+
+def _build_vocab(filename):
+  data = _read_words(filename)
+
+  counter = collections.Counter(data)
+  count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
+
+  words, _ = list(zip(*count_pairs))
+  word_to_id = dict(zip(words, range(len(words))))
+
+#
+#  print(word_to_id)
+#
+#  print("")
+
+  return word_to_id
+
+
+def _file_to_word_ids(filename, word_to_id):
+  data = _read_words(filename)
+  return [word_to_id[word] for word in data if word in word_to_id]
+
+
+def ptb_raw_data(data_path=None):
+  """Load PTB raw data from data directory "data_path".
+
+  Reads PTB text files, converts strings to integer ids,
+  and performs mini-batching of the inputs.
+
+  The PTB dataset comes from Tomas Mikolov's webpage:
+
+  http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
+
+  Args:
+    data_path: string path to the directory where simple-examples.tgz has
+      been extracted.
+
+  Returns:
+    tuple (train_data, valid_data, test_data, vocabulary)
+    where each of the data objects can be passed to PTBIterator.
+  """
+
+  train_path = os.path.join(data_path, "train.txt")
+  valid_path = os.path.join(data_path, "dev.txt")
+  test_path = os.path.join(data_path, "eval.txt")
+
+  word_to_id = _build_vocab(train_path)
+  train_data = _file_to_word_ids(train_path, word_to_id)
+  valid_data = _file_to_word_ids(valid_path, word_to_id)
+  test_data = _file_to_word_ids(test_path, word_to_id)
+  vocabulary = len(word_to_id)
+  return train_data, valid_data, test_data, vocabulary
+
+
+def ptb_producer(raw_data, batch_size, num_steps, name=None):
+  """Iterate on the raw PTB data.
+
+  This chunks up raw_data into batches of examples and returns Tensors that
+  are drawn from these batches.
+
+  Args:
+    raw_data: one of the raw data outputs from ptb_raw_data.
+    batch_size: int, the batch size.
+    num_steps: int, the number of unrolls.
+    name: the name of this operation (optional).
+
+  Returns:
+    A pair of Tensors, each shaped [batch_size, num_steps]. The second element
+    of the tuple is the same data time-shifted to the right by one.
+
+  Raises:
+    tf.errors.InvalidArgumentError: if batch_size or num_steps are too high.
+  """
+  with tf.name_scope(name, "PTBProducer", [raw_data, batch_size, num_steps]):
+    raw_data = tf.convert_to_tensor(raw_data, name="raw_data", dtype=tf.int32)
+
+    data_len = tf.size(raw_data)
+    batch_len = data_len // batch_size
+    data = tf.reshape(raw_data[0 : batch_size * batch_len],
+                      [batch_size, batch_len])
+
+    epoch_size = (batch_len - 1) // num_steps
+    assertion = tf.assert_positive(
+        epoch_size,
+        message="epoch_size == 0, decrease batch_size or num_steps")
+    with tf.control_dependencies([assertion]):
+      epoch_size = tf.identity(epoch_size, name="epoch_size")
+
+    i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue()
+    x = tf.strided_slice(data, [0, i * num_steps],
+                         [batch_size, (i + 1) * num_steps])
+    x.set_shape([batch_size, num_steps])
+    y = tf.strided_slice(data, [0, i * num_steps + 1],
+                         [batch_size, (i + 1) * num_steps + 1])
+    y.set_shape([batch_size, num_steps])
+    return x, y
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
new file mode 100755
index 00000000000..7e868452989
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+data_type=sdm1
+model_type=small
+
+dir=data/tensorflow/
+mkdir -p $dir
+
+cat data/$data_type/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9998 | awk '{print $2}' > $dir/wordlist
+
+for i in train dev eval; do
+  cat data/$data_type/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= =g" > $dir/$i.txt
+done
+
+python local/tensorflow/ptb_word_lm.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm.mdl

From cae43c6b57e9582844ff3121f2fdbd20cabc86f5 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Tue, 23 May 2017 18:31:54 -0400
Subject: [PATCH 02/30] successfully did python training plus c++ eval

---
 egs/ami/s5/local/tensorflow/load.py   | 22 +++++++
 egs/ami/s5/local/tensorflow/loader.cc | 88 +++++++++++++++++++++++++++
 egs/ami/s5/local/tensorflow/reader.py |  5 --
 3 files changed, 110 insertions(+), 5 deletions(-)
 create mode 100644 egs/ami/s5/local/tensorflow/load.py
 create mode 100644 egs/ami/s5/local/tensorflow/loader.cc

diff --git a/egs/ami/s5/local/tensorflow/load.py b/egs/ami/s5/local/tensorflow/load.py
new file mode 100644
index 00000000000..0d0959aa746
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/load.py
@@ -0,0 +1,22 @@
+import sys
+
+sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")
+
+import tensorflow as tf
+import numpy as np
+#config = tf.ConfigProto(device_count = {'GPU': 0} )
+
+#with tf.Session(config=config) as sess:
+with tf.Session() as sess:
+    a = tf.Variable(5.5, name='a')
+    b = tf.Variable(6.6, name='b')
+    c = tf.multiply(a, b, name="c")
+
+    sess.run(tf.global_variables_initializer())
+
+    print a.eval() # 5.0
+    print b.eval() # 6.0
+    print c.eval() # 30.0
+    
+    tf.train.write_graph(sess.graph_def, 'models/', 'graph.pb', as_text=False)
+
diff --git a/egs/ami/s5/local/tensorflow/loader.cc b/egs/ami/s5/local/tensorflow/loader.cc
new file mode 100644
index 00000000000..b02b1f4b853
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/loader.cc
@@ -0,0 +1,88 @@
+#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/protobuf/meta_graph.pb.h"
+
+
+using namespace tensorflow;
+
+int main(int argc, char* argv[]) {
+  // Initialize a tensorflow session
+  Session* session;
+  Status status = NewSession(SessionOptions(), &session);
+  if (!status.ok()) {
+    std::cout << status.ToString() << "\n";
+    return 1;
+  }
+
+  const string pathToGraph = "/export/b02/hxu/TensorFlow/save_load/models/m.meta";
+  const string checkpointPath = "/export/b02/hxu/TensorFlow/save_load/models/m";
+
+  // Read in the protobuf graph we exported
+  // (The path seems to be relative to the cwd. Keep this in mind
+  // when using `bazel run` since the cwd isn't where you call
+  // `bazel run` but from inside a temp folder.)
+  MetaGraphDef graph_def;
+  status = ReadBinaryProto(Env::Default(), pathToGraph, &graph_def);
+  if (!status.ok()) {
+    std::cout << status.ToString() << "\n";
+    return 1;
+  }
+
+  // Add the graph to the session
+  status = session->Create(graph_def.graph_def());
+  if (!status.ok()) {
+    std::cout << status.ToString() << "\n";
+    return 1;
+  }
+
+  Tensor checkpointPathTensor(DT_STRING, TensorShape());
+  checkpointPathTensor.scalar<std::string>()() = checkpointPath;
+  
+  status = session->Run(
+            {{ graph_def.saver_def().filename_tensor_name(), checkpointPathTensor },},
+            {},
+            {graph_def.saver_def().restore_op_name()},
+            nullptr);
+  if (!status.ok()) {
+    std::cout << status.ToString() << "\n";
+    return 1;
+  }
+
+  // Setup inputs and outputs:
+
+  Tensor a(DT_FLOAT, TensorShape());
+  a.scalar<float>()() = 5.5;
+
+  Tensor b(DT_FLOAT, TensorShape());
+  b.scalar<float>()() = 6.6;
+
+  std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
+    { "a", a },
+    { "b", b },
+  };
+
+  // The session will initialize the outputs
+  std::vector<tensorflow::Tensor> outputs;
+
+  // Run the session, evaluating our "c" operation from the graph
+  status = session->Run(inputs, {"output"}, {}, &outputs);
+  if (!status.ok()) {
+    std::cout << status.ToString() << "\n";
+    return 1;
+  }
+
+  // Grab the first output (we only evaluated one graph node: "c")
+  // and convert the node to a scalar representation.
+  auto output_c = outputs[0].scalar<float>();
+
+  // (There are similar methods for vectors and matrices here:
+  // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/tensor.h)
+
+  // Print the results
+  std::cout << outputs[0].DebugString() << "\n"; // Tensor<type: float shape: [] values: 30>
+  std::cout << output_c() << "\n"; // 30
+
+  // Free any resources used by the session
+  session->Close();
+  return 0;
+}
diff --git a/egs/ami/s5/local/tensorflow/reader.py b/egs/ami/s5/local/tensorflow/reader.py
index f60bb0d636b..5ec03b19b51 100644
--- a/egs/ami/s5/local/tensorflow/reader.py
+++ b/egs/ami/s5/local/tensorflow/reader.py
@@ -40,11 +40,6 @@ def _build_vocab(filename):
   words, _ = list(zip(*count_pairs))
   word_to_id = dict(zip(words, range(len(words))))
 
-#
-#  print(word_to_id)
-#
-#  print("")
-
   return word_to_id
 
 

From 1302854e86859f6cf19189d32dfe99fe51d6eb03 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Fri, 2 Jun 2017 20:03:33 -0400
Subject: [PATCH 03/30] bigger graph idea is working

---
 egs/ami/s5/local/tensorflow/ptb_word_lm.py | 51 +++++++++++++++-------
 egs/ami/s5/local/tensorflow/run.sh         | 14 +++---
 2 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/egs/ami/s5/local/tensorflow/ptb_word_lm.py b/egs/ami/s5/local/tensorflow/ptb_word_lm.py
index e1e9673fea4..1c48632f8d1 100644
--- a/egs/ami/s5/local/tensorflow/ptb_word_lm.py
+++ b/egs/ami/s5/local/tensorflow/ptb_word_lm.py
@@ -14,11 +14,9 @@
 # ==============================================================================
 
 """Example / benchmark for building a PTB LSTM model.
-
 Trains the model described in:
 (Zaremba, et. al.) Recurrent Neural Network Regularization
 http://arxiv.org/abs/1409.2329
-
 There are 3 supported model configurations:
 ===========================================
 | config | epochs | train | valid  | test
@@ -27,7 +25,6 @@
 | medium | 39     | 48.45 |  86.16 |  82.07
 | large  | 55     | 37.87 |  82.62 |  78.29
 The exact results may vary depending on the random initialization.
-
 The hyperparameters used in the model:
 - init_scale - the initial scale of the weights
 - learning_rate - the initial value of the learning rate
@@ -40,29 +37,24 @@
 - keep_prob - the probability of keeping weights in the dropout layer
 - lr_decay - the decay of the learning rate for each epoch after "max_epoch"
 - batch_size - the batch size
-
 The data required for this example is in the data/ dir of the
 PTB dataset from Tomas Mikolov's webpage:
-
 $ wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
 $ tar xvf simple-examples.tgz
-
 To run:
-
 $ python ptb_word_lm.py --data_path=simple-examples/data/
-
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import inspect
-import time
-
 import sys
 
 sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")
 
+import inspect
+import time
+
 import numpy as np
 import tensorflow as tf
 
@@ -136,10 +128,42 @@ def attn_cell():
 
     self._initial_state = cell.zero_state(batch_size, data_type())
 
+
+    # first implement the less efficient version
+    test_word_in = tf.placeholder(tf.int32, [1, 1])
+    test_word_out = tf.placeholder(tf.int32, [1, 1])
+    test_input_state_c = tf.placeholder(tf.float32, [1, size])
+    test_input_state_h = tf.placeholder(tf.float32, [1, size])
+    test_input_state = tf.contrib.rnn.LSTMStateTuple(test_input_state_c, test_input_state_h)
+
+#    print ("want to be", self._initial_state)
+#    print ("it actually is ", input_state)
     with tf.device("/cpu:0"):
       embedding = tf.get_variable(
           "embedding", [vocab_size, size], dtype=data_type())
+
+#      print("should be ", input_.input_data)
+#      print("is ", test_word)
+
       inputs = tf.nn.embedding_lookup(embedding, input_.input_data)
+      test_inputs = tf.nn.embedding_lookup(embedding, test_word_in)
+#      print("should be ", inputs)
+#      print("is ", test_inputs)
+
+    # test time
+    with tf.variable_scope("RNN"):
+#      tf.get_variable_scope().reuse_variables()
+      (test_cell_output, test_output_state) = cell(test_inputs[:, 0, :], [test_input_state])
+
+    softmax_w = tf.get_variable(
+        "softmax_w", [size, vocab_size], dtype=data_type())
+    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
+
+    test_logits = tf.matmul(test_cell_output, softmax_w) + softmax_b
+    test_softmaxed = tf.nn.softmax(test_logits)
+    print("test softmaxed is ", test_softmaxed)
+    p_word = test_softmaxed[0, test_word_out[0,0]]
+#    p_word = tf.float32(test_softmaxed[:, test_word_out], name="p_out")
 
     if is_training and config.keep_prob < 1:
       inputs = tf.nn.dropout(inputs, config.keep_prob)
@@ -157,14 +181,11 @@ def attn_cell():
     state = self._initial_state
     with tf.variable_scope("RNN"):
       for time_step in range(num_steps):
-        if time_step > 0: tf.get_variable_scope().reuse_variables()
+        if time_step > -1: tf.get_variable_scope().reuse_variables()
         (cell_output, state) = cell(inputs[:, time_step, :], state)
         outputs.append(cell_output)
 
     output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
-    softmax_w = tf.get_variable(
-        "softmax_w", [size, vocab_size], dtype=data_type())
-    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
     logits = tf.matmul(output, softmax_w) + softmax_b
     loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
         [logits],
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index 7e868452989..e869f68873d 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -1,15 +1,15 @@
 #!/bin/bash
 
 data_type=sdm1
-model_type=small
+model_type=test
 
 dir=data/tensorflow/
 mkdir -p $dir
 
-cat data/$data_type/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9998 | awk '{print $2}' > $dir/wordlist
+#cat data/$data_type/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9998 | awk '{print $2}' > $dir/wordlist
+#
+#for i in train dev eval; do
+#  cat data/$data_type/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= =g" > $dir/$i.txt
+#done
 
-for i in train dev eval; do
-  cat data/$data_type/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= =g" > $dir/$i.txt
-done
-
-python local/tensorflow/ptb_word_lm.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm.mdl
+python local/tensorflow/ptb_word_lm.py --data_path=$dir --model=$model_type --save_path=$dir/model

From e148dbbc3edfb32711d13d0e24b653942624b8e1 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Mon, 5 Jun 2017 18:02:05 -0400
Subject: [PATCH 04/30] an initial working version of lstm LM that is
 accessible from C++

---
 egs/ami/s5/local/tensorflow/rnnlm.py | 417 +++++++++++++++++++++++++++
 egs/ami/s5/local/tensorflow/run.sh   |   3 +-
 2 files changed, 419 insertions(+), 1 deletion(-)
 create mode 100644 egs/ami/s5/local/tensorflow/rnnlm.py

diff --git a/egs/ami/s5/local/tensorflow/rnnlm.py b/egs/ami/s5/local/tensorflow/rnnlm.py
new file mode 100644
index 00000000000..b3870cc0919
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/rnnlm.py
@@ -0,0 +1,417 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Example / benchmark for building a PTB LSTM model.
+Trains the model described in:
+(Zaremba, et. al.) Recurrent Neural Network Regularization
+http://arxiv.org/abs/1409.2329
+There are 3 supported model configurations:
+===========================================
+| config | epochs | train | valid  | test
+===========================================
+| small  | 13     | 37.99 | 121.39 | 115.91
+| medium | 39     | 48.45 |  86.16 |  82.07
+| large  | 55     | 37.87 |  82.62 |  78.29
+The exact results may vary depending on the random initialization.
+The hyperparameters used in the model:
+- init_scale - the initial scale of the weights
+- learning_rate - the initial value of the learning rate
+- max_grad_norm - the maximum permissible norm of the gradient
+- num_layers - the number of LSTM layers
+- num_steps - the number of unrolled steps of LSTM
+- hidden_size - the number of LSTM units
+- max_epoch - the number of epochs trained with the initial learning rate
+- max_max_epoch - the total number of epochs for training
+- keep_prob - the probability of keeping weights in the dropout layer
+- lr_decay - the decay of the learning rate for each epoch after "max_epoch"
+- batch_size - the batch size
+The data required for this example is in the data/ dir of the
+PTB dataset from Tomas Mikolov's webpage:
+$ wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
+$ tar xvf simple-examples.tgz
+To run:
+$ python ptb_word_lm.py --data_path=simple-examples/data/
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+
+sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")
+
+import inspect
+import time
+
+import numpy as np
+import tensorflow as tf
+
+import reader
+
+flags = tf.flags
+logging = tf.logging
+
+flags.DEFINE_string(
+    "model", "small",
+    "A type of model. Possible options are: small, medium, large.")
+flags.DEFINE_string("data_path", None,
+                    "Where the training/test data is stored.")
+flags.DEFINE_string("save_path", None,
+                    "Model output directory.")
+flags.DEFINE_bool("use_fp16", False,
+                  "Train using 16-bit floats instead of 32bit floats")
+
+FLAGS = flags.FLAGS
+
+
+def data_type():
+  return tf.float16 if FLAGS.use_fp16 else tf.float32
+
+
+class PTBInput(object):
+  """The input data."""
+
+  def __init__(self, config, data, name=None):
+    self.batch_size = batch_size = config.batch_size
+    self.num_steps = num_steps = config.num_steps
+    self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
+    self.input_data, self.targets = reader.ptb_producer(
+        data, batch_size, num_steps, name=name)
+
+
+class PTBModel(object):
+  """The PTB model."""
+
+  def __init__(self, is_training, config, input_):
+    self._input = input_
+
+    batch_size = input_.batch_size
+    num_steps = input_.num_steps
+    size = config.hidden_size
+    vocab_size = config.vocab_size
+
+    # Slightly better results can be obtained with forget gate biases
+    # initialized to 1 but the hyperparameters of the model would need to be
+    # different than reported in the paper.
+    def lstm_cell():
+      # With the latest TensorFlow source code (as of Mar 27, 2017),
+      # the BasicLSTMCell will need a reuse parameter which is unfortunately not
+      # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
+      # an argument check here:
+      if 'reuse' in inspect.getargspec(
+          tf.contrib.rnn.BasicLSTMCell.__init__).args:
+        return tf.contrib.rnn.BasicLSTMCell(
+            size, forget_bias=0.0, state_is_tuple=True,
+            reuse=tf.get_variable_scope().reuse)
+      else:
+        return tf.contrib.rnn.BasicLSTMCell(
+            size, forget_bias=0.0, state_is_tuple=True)
+    attn_cell = lstm_cell
+    if is_training and config.keep_prob < 1:
+      def attn_cell():
+        return tf.contrib.rnn.DropoutWrapper(
+            lstm_cell(), output_keep_prob=config.keep_prob)
+    self.cell = tf.contrib.rnn.MultiRNNCell(
+        [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)
+
+    self._initial_state = self.cell.zero_state(batch_size, data_type())
+
+
+    # first implement the less efficient version
+    test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")
+    test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
+    test_input_state_c = tf.placeholder(tf.float32, [1, size], name="test_state_c")
+    test_input_state_h = tf.placeholder(tf.float32, [1, size], name="test_state_h")
+    test_input_state = tf.contrib.rnn.LSTMStateTuple(test_input_state_c, test_input_state_h)
+
+#    print ("want to be", self._initial_state)
+#    print ("it actually is ", input_state)
+    with tf.device("/cpu:0"):
+      self.embedding = tf.get_variable(
+          "embedding", [vocab_size, size], dtype=data_type())
+
+#      print("should be ", input_.input_data)
+#      print("is ", test_word)
+
+      inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data)
+      test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in)
+#      print("should be ", inputs)
+#      print("is ", test_inputs)
+
+    # test time
+    with tf.variable_scope("RNN"):
+#      tf.get_variable_scope().reuse_variables()
+      (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], [test_input_state])
+
+    softmax_w = tf.get_variable(
+        "softmax_w", [size, vocab_size], dtype=data_type())
+    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
+
+    test_logits = tf.matmul(test_cell_output, softmax_w) + softmax_b
+    test_softmaxed = tf.nn.softmax(test_logits)
+    print("test softmaxed is ", test_softmaxed)
+    p_word = test_softmaxed[0, test_word_out[0,0]]
+    test_out = tf.identity(p_word, name="test_out")
+#    p_word = tf.float32(test_softmaxed[:, test_word_out], name="p_out")
+
+    if is_training and config.keep_prob < 1:
+      inputs = tf.nn.dropout(inputs, config.keep_prob)
+
+    # Simplified version of models/tutorials/rnn/rnn.py's rnn().
+    # This builds an unrolled LSTM for tutorial purposes only.
+    # In general, use the rnn() or state_saving_rnn() from rnn.py.
+    #
+    # The alternative version of the code below is:
+    #
+    # inputs = tf.unstack(inputs, num=num_steps, axis=1)
+    # outputs, state = tf.contrib.rnn.static_rnn(
+    #     cell, inputs, initial_state=self._initial_state)
+    outputs = []
+    state = self._initial_state
+    with tf.variable_scope("RNN"):
+      for time_step in range(num_steps):
+        if time_step > -1: tf.get_variable_scope().reuse_variables()
+        (cell_output, state) = self.cell(inputs[:, time_step, :], state)
+        outputs.append(cell_output)
+
+    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
+    logits = tf.matmul(output, softmax_w) + softmax_b
+    loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
+        [logits],
+        [tf.reshape(input_.targets, [-1])],
+        [tf.ones([batch_size * num_steps], dtype=data_type())])
+    self._cost = cost = tf.reduce_sum(loss) / batch_size
+    self._final_state = state
+
+    if not is_training:
+      return
+
+    self._lr = tf.Variable(0.0, trainable=False)
+    tvars = tf.trainable_variables()
+    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
+                                      config.max_grad_norm)
+    optimizer = tf.train.GradientDescentOptimizer(self._lr)
+    self._train_op = optimizer.apply_gradients(
+        zip(grads, tvars),
+        global_step=tf.contrib.framework.get_or_create_global_step())
+
+    self._new_lr = tf.placeholder(
+        tf.float32, shape=[], name="new_learning_rate")
+    self._lr_update = tf.assign(self._lr, self._new_lr)
+
+  def assign_lr(self, session, lr_value):
+    session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
+
+  @property
+  def input(self):
+    return self._input
+
+  @property
+  def initial_state(self):
+    return self._initial_state
+
+  @property
+  def cost(self):
+    return self._cost
+
+  @property
+  def final_state(self):
+    return self._final_state
+
+  @property
+  def lr(self):
+    return self._lr
+
+  @property
+  def train_op(self):
+    return self._train_op
+
+
+class SmallConfig(object):
+  """Small config."""
+  init_scale = 0.1
+  learning_rate = 1.0
+  max_grad_norm = 5
+  num_layers = 2
+  num_steps = 20
+  hidden_size = 200
+  max_epoch = 4
+  max_max_epoch = 13
+  keep_prob = 1.0
+  lr_decay = 0.5
+  batch_size = 20
+  vocab_size = 10000
+
+
+class MediumConfig(object):
+  """Medium config."""
+  init_scale = 0.05
+  learning_rate = 1.0
+  max_grad_norm = 5
+  num_layers = 2
+  num_steps = 35
+  hidden_size = 650
+  max_epoch = 6
+  max_max_epoch = 39
+  keep_prob = 0.5
+  lr_decay = 0.8
+  batch_size = 20
+  vocab_size = 10000
+
+
+class LargeConfig(object):
+  """Large config."""
+  init_scale = 0.04
+  learning_rate = 1.0
+  max_grad_norm = 10
+  num_layers = 2
+  num_steps = 35
+  hidden_size = 1500
+  max_epoch = 14
+  max_max_epoch = 55
+  keep_prob = 0.35
+  lr_decay = 1 / 1.15
+  batch_size = 20
+  vocab_size = 10000
+
+
+class TestConfig(object):
+  """Tiny config, for testing."""
+  init_scale = 0.1
+  learning_rate = 1.0
+  max_grad_norm = 1
+  num_layers = 1
+  num_steps = 2
+  hidden_size = 2
+  max_epoch = 1
+  max_max_epoch = 1
+  keep_prob = 1.0
+  lr_decay = 0.5
+  batch_size = 20
+  vocab_size = 10000
+
+
+def run_epoch(session, model, eval_op=None, verbose=False):
+  """Runs the model on the given data."""
+  start_time = time.time()
+  costs = 0.0
+  iters = 0
+  state = session.run(model.initial_state)
+
+  fetches = {
+      "cost": model.cost,
+      "final_state": model.final_state,
+  }
+  if eval_op is not None:
+    fetches["eval_op"] = eval_op
+
+  for step in range(model.input.epoch_size):
+    feed_dict = {}
+    for i, (c, h) in enumerate(model.initial_state):
+      feed_dict[c] = state[i].c
+      feed_dict[h] = state[i].h
+
+    vals = session.run(fetches, feed_dict)
+    cost = vals["cost"]
+    state = vals["final_state"]
+
+    costs += cost
+    iters += model.input.num_steps
+
+    if verbose and step % (model.input.epoch_size // 10) == 10:
+      print("%.3f perplexity: %.3f speed: %.0f wps" %
+            (step * 1.0 / model.input.epoch_size, np.exp(costs / iters),
+             iters * model.input.batch_size / (time.time() - start_time)))
+
+  return np.exp(costs / iters)
+
+
+def get_config():
+  if FLAGS.model == "small":
+    return SmallConfig()
+  elif FLAGS.model == "medium":
+    return MediumConfig()
+  elif FLAGS.model == "large":
+    return LargeConfig()
+  elif FLAGS.model == "test":
+    return TestConfig()
+  else:
+    raise ValueError("Invalid model: %s", FLAGS.model)
+
+
+def main(_):
+  if not FLAGS.data_path:
+    raise ValueError("Must set --data_path to PTB data directory")
+
+  raw_data = reader.ptb_raw_data(FLAGS.data_path)
+  train_data, valid_data, test_data, _ = raw_data
+
+  config = get_config()
+  eval_config = get_config()
+  eval_config.batch_size = 1
+  eval_config.num_steps = 1
+
+  with tf.Graph().as_default():
+    initializer = tf.random_uniform_initializer(-config.init_scale,
+                                                config.init_scale)
+
+    with tf.name_scope("Train"):
+      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
+      with tf.variable_scope("Model", reuse=None, initializer=initializer):
+        m = PTBModel(is_training=True, config=config, input_=train_input)
+      tf.summary.scalar("Training Loss", m.cost)
+      tf.summary.scalar("Learning Rate", m.lr)
+
+#    with tf.name_scope("Valid"):
+#      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
+#      with tf.variable_scope("Model", reuse=True, initializer=initializer):
+#        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
+#      tf.summary.scalar("Validation Loss", mvalid.cost)
+#
+#    with tf.name_scope("Test"):
+#      test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
+#      with tf.variable_scope("Model", reuse=True, initializer=initializer):
+#        mtest = PTBModel(is_training=False, config=eval_config,
+#                         input_=test_input)
+
+#    saver = tf.train.Saver({"embedding": m.embedding})
+#    saver = tf.train.Saver({"embedding": m.embedding, "lstm": m.cell})
+    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
+    with sv.managed_session() as session:
+      for i in range(config.max_max_epoch):
+        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
+        m.assign_lr(session, config.learning_rate * lr_decay)
+
+        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
+        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
+                                     verbose=True)
+
+#        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
+#        valid_perplexity = run_epoch(session, mvalid)
+#        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
+
+#      test_perplexity = run_epoch(session, mtest)
+#      print("Test Perplexity: %.3f" % test_perplexity)
+
+      if FLAGS.save_path:
+#        saver = tf.train.Saver()
+        print("Saving model to %s." % FLAGS.save_path)
+#        saver.save(session, FLAGS.save_path, global_step=sv.global_step)
+        sv.saver.save(session, FLAGS.save_path)
+#        sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
+
+
+if __name__ == "__main__":
+  tf.app.run()
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index e869f68873d..22947616967 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -12,4 +12,5 @@ mkdir -p $dir
 #  cat data/$data_type/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= =g" > $dir/$i.txt
 #done
 
-python local/tensorflow/ptb_word_lm.py --data_path=$dir --model=$model_type --save_path=$dir/model
+#python local/tensorflow/ptb_word_lm.py --data_path=$dir --model=$model_type --save_path=$dir/model
+python local/tensorflow/rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/model

From 56d9c890e7540eae4b9abf59eeec2c5657bf83b8 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Mon, 5 Jun 2017 20:16:14 -0400
Subject: [PATCH 05/30] supports multilayer LSTM now

---
 egs/ami/s5/local/tensorflow/rnnlm.py | 58 ++++++++++++++++------------
 egs/ami/s5/local/tensorflow/run.sh   | 13 +++----
 2 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/egs/ami/s5/local/tensorflow/rnnlm.py b/egs/ami/s5/local/tensorflow/rnnlm.py
index b3870cc0919..7fff2c7d2b4 100644
--- a/egs/ami/s5/local/tensorflow/rnnlm.py
+++ b/egs/ami/s5/local/tensorflow/rnnlm.py
@@ -127,14 +127,24 @@ def attn_cell():
         [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)
 
     self._initial_state = self.cell.zero_state(batch_size, data_type())
+    self._initial_state_single = self.cell.zero_state(1, data_type())
+
+    self.initial = tf.reshape(tf.stack(axis=0, values=self._initial_state_single), [config.num_layers, 2, 1, size], name="test_initial_state")
 
 
     # first implement the less efficient version
     test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")
     test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
-    test_input_state_c = tf.placeholder(tf.float32, [1, size], name="test_state_c")
-    test_input_state_h = tf.placeholder(tf.float32, [1, size], name="test_state_h")
-    test_input_state = tf.contrib.rnn.LSTMStateTuple(test_input_state_c, test_input_state_h)
+#    test_input_state_c = tf.placeholder(tf.float32, [1, size], name="test_state_c")
+#    test_input_state_h = tf.placeholder(tf.float32, [1, size], name="test_state_h")
+    state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state")
+    l = tf.unstack(state_placeholder, axis=0)
+    test_input_state = tuple(
+               [tf.contrib.rnn.LSTMStateTuple(l[idx][0],l[idx][1])
+                 for idx in range(config.num_layers)]
+    )
+
+#    test_input_state = tf.contrib.rnn.LSTMStateTuple(test_input_state_c, test_input_state_h)
 
 #    print ("want to be", self._initial_state)
 #    print ("it actually is ", input_state)
@@ -153,15 +163,16 @@ def attn_cell():
     # test time
     with tf.variable_scope("RNN"):
 #      tf.get_variable_scope().reuse_variables()
-      (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], [test_input_state])
+      (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state)
 
+    test_out_state = tf.reshape(tf.stack(axis=1, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out")
     softmax_w = tf.get_variable(
         "softmax_w", [size, vocab_size], dtype=data_type())
     softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
 
     test_logits = tf.matmul(test_cell_output, softmax_w) + softmax_b
     test_softmaxed = tf.nn.softmax(test_logits)
-    print("test softmaxed is ", test_softmaxed)
+
     p_word = test_softmaxed[0, test_word_out[0,0]]
     test_out = tf.identity(p_word, name="test_out")
 #    p_word = tf.float32(test_softmaxed[:, test_word_out], name="p_out")
@@ -247,8 +258,8 @@ class SmallConfig(object):
   num_layers = 2
   num_steps = 20
   hidden_size = 200
-  max_epoch = 4
-  max_max_epoch = 13
+  max_epoch = 1 #4
+  max_max_epoch = 1 #13
   keep_prob = 1.0
   lr_decay = 0.5
   batch_size = 20
@@ -374,20 +385,19 @@ def main(_):
       tf.summary.scalar("Training Loss", m.cost)
       tf.summary.scalar("Learning Rate", m.lr)
 
-#    with tf.name_scope("Valid"):
-#      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
-#      with tf.variable_scope("Model", reuse=True, initializer=initializer):
-#        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
-#      tf.summary.scalar("Validation Loss", mvalid.cost)
-#
-#    with tf.name_scope("Test"):
-#      test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
-#      with tf.variable_scope("Model", reuse=True, initializer=initializer):
-#        mtest = PTBModel(is_training=False, config=eval_config,
-#                         input_=test_input)
+    with tf.name_scope("Valid"):
+      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
+      with tf.variable_scope("Model", reuse=True, initializer=initializer):
+        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
+      tf.summary.scalar("Validation Loss", mvalid.cost)
+
+    with tf.name_scope("Test"):
+      test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
+      with tf.variable_scope("Model", reuse=True, initializer=initializer):
+        mtest = PTBModel(is_training=False, config=eval_config,
+                         input_=test_input)
 
 #    saver = tf.train.Saver({"embedding": m.embedding})
-#    saver = tf.train.Saver({"embedding": m.embedding, "lstm": m.cell})
     sv = tf.train.Supervisor(logdir=FLAGS.save_path)
     with sv.managed_session() as session:
       for i in range(config.max_max_epoch):
@@ -398,12 +408,12 @@ def main(_):
         train_perplexity = run_epoch(session, m, eval_op=m.train_op,
                                      verbose=True)
 
-#        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
-#        valid_perplexity = run_epoch(session, mvalid)
-#        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
+        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
+        valid_perplexity = run_epoch(session, mvalid)
+        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
 
-#      test_perplexity = run_epoch(session, mtest)
-#      print("Test Perplexity: %.3f" % test_perplexity)
+      test_perplexity = run_epoch(session, mtest)
+      print("Test Perplexity: %.3f" % test_perplexity)
 
       if FLAGS.save_path:
 #        saver = tf.train.Saver()
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index 22947616967..5baa2337741 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -1,16 +1,15 @@
 #!/bin/bash
 
 data_type=sdm1
-model_type=test
+model_type=small
 
 dir=data/tensorflow/
 mkdir -p $dir
 
-#cat data/$data_type/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9998 | awk '{print $2}' > $dir/wordlist
-#
-#for i in train dev eval; do
-#  cat data/$data_type/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= =g" > $dir/$i.txt
-#done
+cat data/$data_type/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9998 | awk '{print $2}' > $dir/wordlist
+
+for i in train dev eval; do
+  cat data/$data_type/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= =g" > $dir/$i.txt
+done
 
-#python local/tensorflow/ptb_word_lm.py --data_path=$dir --model=$model_type --save_path=$dir/model
 python local/tensorflow/rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/model

From 1df10a84c3bcc47c76609ca636ed24dec758002b Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Tue, 6 Jun 2017 14:03:58 -0400
Subject: [PATCH 06/30] add script to install bazel

---
 tools/install_tensorflow.sh | 40 +++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100755 tools/install_tensorflow.sh

diff --git a/tools/install_tensorflow.sh b/tools/install_tensorflow.sh
new file mode 100755
index 00000000000..e5a7513063f
--- /dev/null
+++ b/tools/install_tensorflow.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+export JAVA_HOME=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121
+export PATH=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121/bin/:$PATH
+
+#git clone https://github.com/tensorflow/tensorflow
+
+#cd tensorflow
+#
+#git checkout r1.0
+#
+#cd ../
+#
+##git clone https://github.com/google/bazel/
+
+[ ! -f bazel-0.4.5-dist.zip ] && wget https://github.com/bazelbuild/bazel/releases/download/0.4.5/bazel-0.4.5-dist.zip
+mkdir -p bazel
+cd bazel
+
+unzip ../bazel-0.4.5-dist.zip
+
+./compile.sh
+
+#mkdir build
+#./compile.sh compile build/
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

From bfb4ad2c9cd8bb5b861dc66e80917bd2573ee59e Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Tue, 6 Jun 2017 15:26:57 -0400
Subject: [PATCH 07/30] add script to compile tensorflow with simple RNN
 c++example

---
 egs/ami/s5/local/tensorflow/rnnlm.py |   4 +-
 egs/ami/s5/local/tensorflow/run.sh   |  13 ++--
 src/tensorflow/loader_rnn.cc         | 100 +++++++++++++++++++++++++++
 tools/install_tensorflow.sh          |  37 ++++++----
 4 files changed, 135 insertions(+), 19 deletions(-)
 create mode 100644 src/tensorflow/loader_rnn.cc

diff --git a/egs/ami/s5/local/tensorflow/rnnlm.py b/egs/ami/s5/local/tensorflow/rnnlm.py
index 7fff2c7d2b4..dfc058e309d 100644
--- a/egs/ami/s5/local/tensorflow/rnnlm.py
+++ b/egs/ami/s5/local/tensorflow/rnnlm.py
@@ -258,8 +258,8 @@ class SmallConfig(object):
   num_layers = 2
   num_steps = 20
   hidden_size = 200
-  max_epoch = 1 #4
-  max_max_epoch = 1 #13
+  max_epoch = 4
+  max_max_epoch = 13
   keep_prob = 1.0
   lr_decay = 0.5
   batch_size = 20
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index 5baa2337741..dfc5a0749ba 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -6,10 +6,13 @@ model_type=small
 dir=data/tensorflow/
 mkdir -p $dir
 
-cat data/$data_type/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9998 | awk '{print $2}' > $dir/wordlist
+#cat data/$data_type/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9998 | awk '{print $2}' > $dir/wordlist
+#
+#for i in train dev eval; do
+#  cat data/$data_type/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= =g" > $dir/$i.txt
+#done
 
-for i in train dev eval; do
-  cat data/$data_type/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= =g" > $dir/$i.txt
-done
 
-python local/tensorflow/rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/model
+#python local/tensorflow/rnnlm.py --data_path=$dir --model=small --save_path=$dir/model.small
+python local/tensorflow/rnnlm.py --data_path=$dir --model=medium --save_path=$dir/model.medium
+#python local/tensorflow/rnnlm.py --data_path=$dir --model=large --save_path=$dir/model.large
diff --git a/src/tensorflow/loader_rnn.cc b/src/tensorflow/loader_rnn.cc
new file mode 100644
index 00000000000..33b6fcb3c5d
--- /dev/null
+++ b/src/tensorflow/loader_rnn.cc
@@ -0,0 +1,100 @@
+#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/protobuf/meta_graph.pb.h"
+
+using namespace tensorflow;
+
+int main(int argc, char* argv[]) {
+  // Initialize a tensorflow session
+  Session* session;
+  Status status = NewSession(SessionOptions(), &session);
+  if (!status.ok()) {
+    std::cout << status.ToString() << "\n";
+    return 1;
+  }
+
+  const string pathToGraph = "/export/b02/hxu/TensorFlow/kaldi/egs/ami/s5/data/tensorflow/model.small.meta";
+  const string checkpointPath = "/export/b02/hxu/TensorFlow/kaldi/egs/ami/s5/data/tensorflow/model.small";
+
+  // Read in the protobuf graph we exported
+  // (The path seems to be relative to the cwd. Keep this in mind
+  // when using `bazel run` since the cwd isn't where you call
+  // `bazel run` but from inside a temp folder.)
+  MetaGraphDef graph_def;
+  status = ReadBinaryProto(Env::Default(), pathToGraph, &graph_def);
+  if (!status.ok()) {
+    std::cout << status.ToString() << "\n";
+    return 1;
+  }
+
+  // Add the graph to the session
+  status = session->Create(graph_def.graph_def());
+  if (!status.ok()) {
+    std::cout << status.ToString() << "\n";
+    return 1;
+  }
+
+  Tensor checkpointPathTensor(DT_STRING, TensorShape());
+  checkpointPathTensor.scalar<std::string>()() = checkpointPath;
+  
+  status = session->Run(
+            {{ graph_def.saver_def().filename_tensor_name(), checkpointPathTensor },},
+            {},
+            {graph_def.saver_def().restore_op_name()},
+            nullptr);
+  if (!status.ok()) {
+    std::cout << status.ToString() << "\n";
+    return 1;
+  }
+
+  // Setup inputs and outputs:
+  std::vector<Tensor> state;
+//  std::vector<Tensor> state(DT_FLOAT, {2, 2, 1, 200});
+  status = session->Run(std::vector<std::pair<string, tensorflow::Tensor>>(), {"Train/Model/test_initial_state"}, {}, &state);
+
+  for (int32 word_out = 0; word_out < 10000; word_out++) {
+    Tensor in_word(DT_INT32, {1, 1});
+    in_word.scalar<int32>()() = (word_out + 9999) % 10000; 
+
+    Tensor out_word(DT_INT32, {1, 1});
+    out_word.scalar<int32>()() = word_out; 
+
+    // num-layers
+    // 2 (c and h)
+    // 1 (batchsize)
+    // hidden-size
+
+    std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
+      {"Train/Model/test_word_in", in_word},
+      {"Train/Model/test_word_out", out_word},
+      {"Train/Model/test_state", state[0]},
+    };
+
+    // The session will initialize the outputs
+    std::vector<tensorflow::Tensor> outputs;
+
+    // Run the session, evaluating our "c" operation from the graph
+    status = session->Run(inputs, {"Train/Model/test_out", "Train/Model/test_state_out"}, {}, &outputs);
+
+    if (!status.ok()) {
+      std::cout << status.ToString() << "\n";
+      return 1;
+    }
+
+    // Grab the first output (we only evaluated one graph node: "c")
+    // and convert the node to a scalar representation.
+
+    // (There are similar methods for vectors and matrices here:
+    // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/tensor.h)
+
+    // Print the results
+    std::cout << word_out << ": " << outputs[0].DebugString() << "\n"; // Tensor<type: float shape: [] values: 30>
+    std::cout << word_out << ": " << outputs[1].DebugString() << "\n"; // Tensor<type: float shape: [] values: 30>
+    state[0] = outputs[1];
+//    std::cout << output_c() << "\n"; // 30
+  }
+
+  // Free any resources used by the session
+  session->Close();
+  return 0;
+}
diff --git a/tools/install_tensorflow.sh b/tools/install_tensorflow.sh
index e5a7513063f..f2bc7e9a9e1 100755
--- a/tools/install_tensorflow.sh
+++ b/tools/install_tensorflow.sh
@@ -1,30 +1,43 @@
 #!/bin/bash
 
+set -e
+
+export HOME=/export/b02/hxu
 export JAVA_HOME=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121
 export PATH=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121/bin/:$PATH
 
-#git clone https://github.com/tensorflow/tensorflow
+git clone https://github.com/tensorflow/tensorflow
 
-#cd tensorflow
-#
-#git checkout r1.0
-#
-#cd ../
-#
-##git clone https://github.com/google/bazel/
 
 [ ! -f bazel-0.4.5-dist.zip ] && wget https://github.com/bazelbuild/bazel/releases/download/0.4.5/bazel-0.4.5-dist.zip
 mkdir -p bazel
 cd bazel
-
 unzip ../bazel-0.4.5-dist.zip
-
 ./compile.sh
+cd ../
+
+# now bazel is built
+
+export PATH=$PWD/bazel/output/:$PATH
+
+cd tensorflow
+
+./configure
+
+cd ../
+
+cd tensorflow/tensorflow
+mkdir -p rnnlm
+cd rnnlm
 
-#mkdir build
-#./compile.sh compile build/
+[ ! -f BUILD ] && ln -s ../../../../src/tensorflow/BUILD
+[ ! -f loader_rnn.cc ] && ln -s ../../../../src/tensorflow/loader_rnn.cc
 
+TEST_TMPDIR=tensorflow/build
 
+echo bazel build :loader_rnn
+bazel build --test_tmpdir=$TEST_TMPDIR :loader_rnn
+bazel run -c opt :loader_rnn
 
 
 

From 0c4b2b4e957b876df3b0e1d38e8bef0b5a2fa389 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Wed, 7 Jun 2017 18:23:30 -0400
Subject: [PATCH 08/30] more files added

---
 src/tensorflow/BUILD         |  9 +++++++
 src/tensorflow/loader_rnn.cc | 10 ++++++++
 tools/install_tensorflow.sh  | 49 ++++++++++++++++--------------------
 3 files changed, 41 insertions(+), 27 deletions(-)
 create mode 100644 src/tensorflow/BUILD

diff --git a/src/tensorflow/BUILD b/src/tensorflow/BUILD
new file mode 100644
index 00000000000..a60fdbbc3ec
--- /dev/null
+++ b/src/tensorflow/BUILD
@@ -0,0 +1,9 @@
+cc_binary(
+    name = "loader_rnn",
+    srcs = ["loader_rnn.cc"],
+    deps = [
+        "//tensorflow/core:tensorflow",
+#        "//kaldi/base/libkaldi-base.so",
+    ]
+)
+
diff --git a/src/tensorflow/loader_rnn.cc b/src/tensorflow/loader_rnn.cc
index 33b6fcb3c5d..6f80fa17994 100644
--- a/src/tensorflow/loader_rnn.cc
+++ b/src/tensorflow/loader_rnn.cc
@@ -2,9 +2,18 @@
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
 
+#include "base/kaldi-common.h"
+//#include "fstext/fstext-lib.h"
+//#include "lat/kaldi-lattice.h"
+//#include "lat/lattice-functions.h"
+//#include "lm/kaldi-rnnlm.h"
+//#include "lm/mikolov-rnnlm-lib.h"
+//#include "util/common-utils.h"
+
 using namespace tensorflow;
 
 int main(int argc, char* argv[]) {
+  /*
   // Initialize a tensorflow session
   Session* session;
   Status status = NewSession(SessionOptions(), &session);
@@ -96,5 +105,6 @@ int main(int argc, char* argv[]) {
 
   // Free any resources used by the session
   session->Close();
+  // */
   return 0;
 }
diff --git a/tools/install_tensorflow.sh b/tools/install_tensorflow.sh
index f2bc7e9a9e1..b07a636e3c2 100755
--- a/tools/install_tensorflow.sh
+++ b/tools/install_tensorflow.sh
@@ -5,49 +5,44 @@ set -e
 export HOME=/export/b02/hxu
 export JAVA_HOME=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121
 export PATH=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121/bin/:$PATH
+export PATH=$PWD/bazel/output/:$PATH
+#export PATH=$PWD/tensorflow/bazel-out/host/bin/external/protobuf/:$PATH
+export PATH=$PWD:$PATH
 
-git clone https://github.com/tensorflow/tensorflow
-
+echo which protoc
+which protoc
 
-[ ! -f bazel-0.4.5-dist.zip ] && wget https://github.com/bazelbuild/bazel/releases/download/0.4.5/bazel-0.4.5-dist.zip
-mkdir -p bazel
-cd bazel
-unzip ../bazel-0.4.5-dist.zip
-./compile.sh
-cd ../
+#git clone https://github.com/tensorflow/tensorflow
+[ ! -f bazel.zip ] && wget https://github.com/bazelbuild/bazel/releases/download/0.5.1/bazel-0.5.1-dist.zip -O bazel.zip
+#mkdir -p bazel
+#cd bazel
+#unzip ../bazel.zip
+#./compile.sh
+#cd ../
 
 # now bazel is built
-
-export PATH=$PWD/bazel/output/:$PATH
+git clone https://github.com/tensorflow/tensorflow
 
 cd tensorflow
 
 ./configure
 
-cd ../
+#bazel build //tensorflow/core:framework_headers_lib
+#
+#bazel build //tensorflow:libtensorflow.so
+bazel build //tensorflow:libtensorflow_cc.so
+
+exit
 
 cd tensorflow/tensorflow
 mkdir -p rnnlm
 cd rnnlm
 
 [ ! -f BUILD ] && ln -s ../../../../src/tensorflow/BUILD
+[ ! -f WORKSPACE ] && ln -s ../../../../src/tensorflow/WORKSPACE
 [ ! -f loader_rnn.cc ] && ln -s ../../../../src/tensorflow/loader_rnn.cc
+[ ! -d kaldi_src ] && ln -s ../../../../src/ kaldi_src
 
-TEST_TMPDIR=tensorflow/build
-
-echo bazel build :loader_rnn
 bazel build --test_tmpdir=$TEST_TMPDIR :loader_rnn
-bazel run -c opt :loader_rnn
-
-
-
-
-
-
-
-
-
-
-
-
+#bazel run -c opt :loader_rnn
 

From 066cc74dc6c68ffa6e7b717874c6d227cb463c7c Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainanx@cltdell01gpu.clt.spoken.com>
Date: Wed, 7 Jun 2017 18:38:31 -0400
Subject: [PATCH 09/30] change for spoken machines

---
 tools/install_tensorflow.sh | 43 +++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/tools/install_tensorflow.sh b/tools/install_tensorflow.sh
index b07a636e3c2..ba933225162 100755
--- a/tools/install_tensorflow.sh
+++ b/tools/install_tensorflow.sh
@@ -2,18 +2,15 @@
 
 set -e
 
-export HOME=/export/b02/hxu
-export JAVA_HOME=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121
-export PATH=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121/bin/:$PATH
+export HOME=/home/hainanx/work
+#export JAVA_HOME=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121
+#export PATH=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121/bin/:$PATH
 export PATH=$PWD/bazel/output/:$PATH
 #export PATH=$PWD/tensorflow/bazel-out/host/bin/external/protobuf/:$PATH
 export PATH=$PWD:$PATH
 
-echo which protoc
-which protoc
-
 #git clone https://github.com/tensorflow/tensorflow
-[ ! -f bazel.zip ] && wget https://github.com/bazelbuild/bazel/releases/download/0.5.1/bazel-0.5.1-dist.zip -O bazel.zip
+#[ ! -f bazel.zip ] && wget https://github.com/bazelbuild/bazel/releases/download/0.5.1/bazel-0.5.1-dist.zip -O bazel.zip
 #mkdir -p bazel
 #cd bazel
 #unzip ../bazel.zip
@@ -21,28 +18,28 @@ which protoc
 #cd ../
 
 # now bazel is built
-git clone https://github.com/tensorflow/tensorflow
+#git clone https://github.com/tensorflow/tensorflow
 
 cd tensorflow
 
-./configure
+#./configure
 
 #bazel build //tensorflow/core:framework_headers_lib
 #
 #bazel build //tensorflow:libtensorflow.so
 bazel build //tensorflow:libtensorflow_cc.so
 
-exit
-
-cd tensorflow/tensorflow
-mkdir -p rnnlm
-cd rnnlm
-
-[ ! -f BUILD ] && ln -s ../../../../src/tensorflow/BUILD
-[ ! -f WORKSPACE ] && ln -s ../../../../src/tensorflow/WORKSPACE
-[ ! -f loader_rnn.cc ] && ln -s ../../../../src/tensorflow/loader_rnn.cc
-[ ! -d kaldi_src ] && ln -s ../../../../src/ kaldi_src
-
-bazel build --test_tmpdir=$TEST_TMPDIR :loader_rnn
-#bazel run -c opt :loader_rnn
-
+#exit
+#
+#cd tensorflow/tensorflow
+#mkdir -p rnnlm
+#cd rnnlm
+#
+#[ ! -f BUILD ] && ln -s ../../../../src/tensorflow/BUILD
+#[ ! -f WORKSPACE ] && ln -s ../../../../src/tensorflow/WORKSPACE
+#[ ! -f loader_rnn.cc ] && ln -s ../../../../src/tensorflow/loader_rnn.cc
+#[ ! -d kaldi_src ] && ln -s ../../../../src/ kaldi_src
+#
+#bazel build --test_tmpdir=$TEST_TMPDIR :loader_rnn
+##bazel run -c opt :loader_rnn
+#

From 4d27c7d1a2df9a3763ff17403619a9e229f28ab0 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Wed, 7 Jun 2017 19:10:08 -0400
Subject: [PATCH 10/30] add makefile

---
 src/tensorflow/Makefile | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 src/tensorflow/Makefile

diff --git a/src/tensorflow/Makefile b/src/tensorflow/Makefile
new file mode 100644
index 00000000000..fd0e02458dd
--- /dev/null
+++ b/src/tensorflow/Makefile
@@ -0,0 +1,28 @@
+include ../kaldi.mk
+
+TENSORFLOW = ../../tools/tensorflow
+BAZEL = ../../tools/bazel
+KALDI_ROOT = ../../
+all:
+
+#EXTRA_CXXFLAGS = -Wno-sign-compare -fPIC -I ../ -I $(KALDI_ROOT)/src -DKALDI_NO_PORTAUDIO -I $(TENSORFLOW)/third_party/eigen3 -I $(BAZEL)/third_party/protobuf/3.0.0/src/ \
+#                 -I$(TENSORFLOW)/bazel-genfiles -I $(TENSORFLOW) \
+#  -I $(TENSORFLOW)/tensorflow/contrib/makefile/gen/protobuf/include/ -I $(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen \
+
+EXTRA_CXXFLAGS = -Wno-sign-compare -fPIC -I ../ -I $(KALDI_ROOT)/src -DKALDI_NO_PORTAUDIO -I $(BAZEL)/third_party/protobuf/3.0.0/src \
+                 -I$(TENSORFLOW)/bazel-genfiles -I $(TENSORFLOW) \
+  -I $(TENSORFLOW)/tensorflow/contrib/makefile/gen/protobuf/include/ -I $(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen \
+  -I $(TENSORFLOW)/bazel-out/host/bin/external/protobuf/
+BINFILES = loader_rnn
+
+OBJFILES =
+
+TESTFILES =
+
+ADDLIBS = ../lm/kaldi-lm.a ../util/kaldi-util.a ../thread/kaldi-thread.a \
+          ../matrix/kaldi-matrix.a ../base/kaldi-base.a 
+
+LDLIBS +=  -lz -ldl -fPIC -lrt
+LDLIBS += $(OTHERLIBS) -L$(TENSORFLOW)/bazel-bin/tensorflow -ltensorflow
+
+include ../makefiles/default_rules.mk

From 96e5a2b9f56f3f8dab9e647ced7dad7366bc5af1 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Fri, 9 Jun 2017 18:38:02 -0400
Subject: [PATCH 11/30] tf compiles with kaldi

---
 src/tensorflow/BUILD         |  9 -------
 src/tensorflow/Makefile      | 12 ++++-----
 src/tensorflow/loader_rnn.cc | 14 +++++-----
 tools/install_tensorflow.sh  | 51 +++++++++++-------------------------
 4 files changed, 27 insertions(+), 59 deletions(-)
 delete mode 100644 src/tensorflow/BUILD

diff --git a/src/tensorflow/BUILD b/src/tensorflow/BUILD
deleted file mode 100644
index a60fdbbc3ec..00000000000
--- a/src/tensorflow/BUILD
+++ /dev/null
@@ -1,9 +0,0 @@
-cc_binary(
-    name = "loader_rnn",
-    srcs = ["loader_rnn.cc"],
-    deps = [
-        "//tensorflow/core:tensorflow",
-#        "//kaldi/base/libkaldi-base.so",
-    ]
-)
-
diff --git a/src/tensorflow/Makefile b/src/tensorflow/Makefile
index fd0e02458dd..f214a66f88a 100644
--- a/src/tensorflow/Makefile
+++ b/src/tensorflow/Makefile
@@ -1,7 +1,6 @@
 include ../kaldi.mk
 
 TENSORFLOW = ../../tools/tensorflow
-BAZEL = ../../tools/bazel
 KALDI_ROOT = ../../
 all:
 
@@ -9,10 +8,9 @@ all:
 #                 -I$(TENSORFLOW)/bazel-genfiles -I $(TENSORFLOW) \
 #  -I $(TENSORFLOW)/tensorflow/contrib/makefile/gen/protobuf/include/ -I $(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen \
 
-EXTRA_CXXFLAGS = -Wno-sign-compare -fPIC -I ../ -I $(KALDI_ROOT)/src -DKALDI_NO_PORTAUDIO -I $(BAZEL)/third_party/protobuf/3.0.0/src \
-                 -I$(TENSORFLOW)/bazel-genfiles -I $(TENSORFLOW) \
-  -I $(TENSORFLOW)/tensorflow/contrib/makefile/gen/protobuf/include/ -I $(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen \
-  -I $(TENSORFLOW)/bazel-out/host/bin/external/protobuf/
+EXTRA_CXXFLAGS = -Wno-sign-compare -fPIC -I$(TENSORFLOW)/bazel-tensorflow/external/protobuf/src -I$(TENSORFLOW)/bazel-genfiles -I$(TENSORFLOW) -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen/
+#EXTRA_CXXFLAGS = -Wno-sign-compare -fPIC -I$(TENSORFLOW)/bazel-tensorflow/external/protobuf/src -I$(TENSORFLOW)/bazel-genfiles -I$(TENSORFLOW) -I$(TENSORFLOW)/third_party/eigen3
+
 BINFILES = loader_rnn
 
 OBJFILES =
@@ -20,9 +18,9 @@ OBJFILES =
 TESTFILES =
 
 ADDLIBS = ../lm/kaldi-lm.a ../util/kaldi-util.a ../thread/kaldi-thread.a \
-          ../matrix/kaldi-matrix.a ../base/kaldi-base.a 
+          ../matrix/kaldi-matrix.a ../base/kaldi-base.a ../../tools/tensorflow/bazel-bin/tensorflow/tensorflow_cc.so
 
 LDLIBS +=  -lz -ldl -fPIC -lrt
-LDLIBS += $(OTHERLIBS) -L$(TENSORFLOW)/bazel-bin/tensorflow -ltensorflow
+LDLIBS += $(OTHERLIBS) -L$(TENSORFLOW)/bazel-bin/tensorflow -ltensorflow_cc
 
 include ../makefiles/default_rules.mk
diff --git a/src/tensorflow/loader_rnn.cc b/src/tensorflow/loader_rnn.cc
index 6f80fa17994..7034b920060 100644
--- a/src/tensorflow/loader_rnn.cc
+++ b/src/tensorflow/loader_rnn.cc
@@ -3,17 +3,17 @@
 #include "tensorflow/core/protobuf/meta_graph.pb.h"
 
 #include "base/kaldi-common.h"
-//#include "fstext/fstext-lib.h"
-//#include "lat/kaldi-lattice.h"
-//#include "lat/lattice-functions.h"
-//#include "lm/kaldi-rnnlm.h"
-//#include "lm/mikolov-rnnlm-lib.h"
-//#include "util/common-utils.h"
+#include "fstext/fstext-lib.h"
+#include "lat/kaldi-lattice.h"
+#include "lat/lattice-functions.h"
+#include "lm/kaldi-rnnlm.h"
+#include "lm/mikolov-rnnlm-lib.h"
+#include "util/common-utils.h"
 
 using namespace tensorflow;
 
 int main(int argc, char* argv[]) {
-  /*
+//*
   // Initialize a tensorflow session
   Session* session;
   Status status = NewSession(SessionOptions(), &session);
diff --git a/tools/install_tensorflow.sh b/tools/install_tensorflow.sh
index ba933225162..7593d486fa4 100755
--- a/tools/install_tensorflow.sh
+++ b/tools/install_tensorflow.sh
@@ -2,44 +2,23 @@
 
 set -e
 
-export HOME=/home/hainanx/work
-#export JAVA_HOME=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121
-#export PATH=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121/bin/:$PATH
+export HOME=/export/b02/hxu
+export JAVA_HOME=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121
+export PATH=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121/bin/:$PATH
 export PATH=$PWD/bazel/output/:$PATH
-#export PATH=$PWD/tensorflow/bazel-out/host/bin/external/protobuf/:$PATH
-export PATH=$PWD:$PATH
 
-#git clone https://github.com/tensorflow/tensorflow
-#[ ! -f bazel.zip ] && wget https://github.com/bazelbuild/bazel/releases/download/0.5.1/bazel-0.5.1-dist.zip -O bazel.zip
-#mkdir -p bazel
-#cd bazel
-#unzip ../bazel.zip
-#./compile.sh
-#cd ../
-
-# now bazel is built
-#git clone https://github.com/tensorflow/tensorflow
+[ ! -f bazel.zip ] && wget https://github.com/bazelbuild/bazel/releases/download/0.5.1/bazel-0.5.1-dist.zip -O bazel.zip
+mkdir -p bazel
+cd bazel
+unzip ../bazel.zip
+./compile.sh
+cd ../
 
+## now bazel is built
+git clone https://github.com/tensorflow/tensorflow
 cd tensorflow
+./configure
 
-#./configure
-
-#bazel build //tensorflow/core:framework_headers_lib
-#
-#bazel build //tensorflow:libtensorflow.so
-bazel build //tensorflow:libtensorflow_cc.so
-
-#exit
-#
-#cd tensorflow/tensorflow
-#mkdir -p rnnlm
-#cd rnnlm
-#
-#[ ! -f BUILD ] && ln -s ../../../../src/tensorflow/BUILD
-#[ ! -f WORKSPACE ] && ln -s ../../../../src/tensorflow/WORKSPACE
-#[ ! -f loader_rnn.cc ] && ln -s ../../../../src/tensorflow/loader_rnn.cc
-#[ ! -d kaldi_src ] && ln -s ../../../../src/ kaldi_src
-#
-#bazel build --test_tmpdir=$TEST_TMPDIR :loader_rnn
-##bazel run -c opt :loader_rnn
-#
+tensorflow/contrib/makefile/download_dependencies.sh 
+bazel build //tensorflow:libtensorflow.so
+#bazel build //tensorflow:libtensorflow_cc.so

From 85fd7b2287ec9576ed6eb7668e22c679d4f1b0ce Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Mon, 12 Jun 2017 15:50:17 -0400
Subject: [PATCH 12/30] starting to write the tensorflow wrappers

---
 egs/ami/s5/local/tensorflow/ptb_word_lm.py |  1 -
 egs/ami/s5/local/tensorflow/reader.py      |  5 +++--
 egs/ami/s5/local/tensorflow/rnnlm.py       |  9 ++++++++-
 egs/ami/s5/local/tensorflow/run.sh         | 19 ++++++++++++-------
 src/tensorflow/Makefile                    | 16 ++++++----------
 src/{tensorflow => tfbin}/loader_rnn.cc    |  4 ++--
 6 files changed, 31 insertions(+), 23 deletions(-)
 rename src/{tensorflow => tfbin}/loader_rnn.cc (97%)

diff --git a/egs/ami/s5/local/tensorflow/ptb_word_lm.py b/egs/ami/s5/local/tensorflow/ptb_word_lm.py
index 1c48632f8d1..15040fd30ea 100644
--- a/egs/ami/s5/local/tensorflow/ptb_word_lm.py
+++ b/egs/ami/s5/local/tensorflow/ptb_word_lm.py
@@ -49,7 +49,6 @@
 from __future__ import print_function
 
 import sys
-
 sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")
 
 import inspect
diff --git a/egs/ami/s5/local/tensorflow/reader.py b/egs/ami/s5/local/tensorflow/reader.py
index 5ec03b19b51..6e02fcc8be4 100644
--- a/egs/ami/s5/local/tensorflow/reader.py
+++ b/egs/ami/s5/local/tensorflow/reader.py
@@ -28,7 +28,8 @@
 
 def _read_words(filename):
   with tf.gfile.GFile(filename, "r") as f:
-    return f.read().decode("utf-8").replace("\n", "<eos>").split()
+    return f.read().decode("utf-8").split()
+#    return f.read().decode("utf-8").replace("\n", "<eos>").split()
 
 
 def _build_vocab(filename):
@@ -76,7 +77,7 @@ def ptb_raw_data(data_path=None):
   valid_data = _file_to_word_ids(valid_path, word_to_id)
   test_data = _file_to_word_ids(test_path, word_to_id)
   vocabulary = len(word_to_id)
-  return train_data, valid_data, test_data, vocabulary
+  return train_data, valid_data, test_data, vocabulary, word_to_id
 
 
 def ptb_producer(raw_data, batch_size, num_steps, name=None):
diff --git a/egs/ami/s5/local/tensorflow/rnnlm.py b/egs/ami/s5/local/tensorflow/rnnlm.py
index dfc058e309d..6707755c214 100644
--- a/egs/ami/s5/local/tensorflow/rnnlm.py
+++ b/egs/ami/s5/local/tensorflow/rnnlm.py
@@ -70,6 +70,8 @@
                     "Where the training/test data is stored.")
 flags.DEFINE_string("save_path", None,
                     "Model output directory.")
+flags.DEFINE_string("wordlist_save_path", None,
+                    "wordmap output directory.")
 flags.DEFINE_bool("use_fp16", False,
                   "Train using 16-bit floats instead of 32bit floats")
 
@@ -367,7 +369,12 @@ def main(_):
     raise ValueError("Must set --data_path to PTB data directory")
 
   raw_data = reader.ptb_raw_data(FLAGS.data_path)
-  train_data, valid_data, test_data, _ = raw_data
+  train_data, valid_data, test_data, _, word_map = raw_data
+
+  with open(FLAGS.wordlist_save_path, "w") as wmap_file:
+    count_pairs = sorted(word_map.items(), key=lambda x: (x[1], x[0]))
+    for k, v in count_pairs: 
+      wmap_file.write(str(k) + " " + str(v) + "\n")
 
   config = get_config()
   eval_config = get_config()
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index dfc5a0749ba..0e576796338 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -6,13 +6,18 @@ model_type=small
 dir=data/tensorflow/
 mkdir -p $dir
 
-#cat data/$data_type/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9998 | awk '{print $2}' > $dir/wordlist
-#
-#for i in train dev eval; do
-#  cat data/$data_type/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= =g" > $dir/$i.txt
-#done
+#echo "<s>" > $dir/wordlist
+#echo "</s>" >> $dir/wordlist
 
+# num-words is 10000 - 3 (bos, eos and <oos>)
 
-#python local/tensorflow/rnnlm.py --data_path=$dir --model=small --save_path=$dir/model.small
-python local/tensorflow/rnnlm.py --data_path=$dir --model=medium --save_path=$dir/model.medium
+cat data/$data_type/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9997 | awk '{print $2}' > $dir/wordlist
+
+for i in train dev eval; do
+  cat data/$data_type/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= <s> =g" | sed "s=$= </s>=" > $dir/$i.txt
+done
+
+
+python local/tensorflow/rnnlm.py --data_path=$dir --model=small --save_path=$dir/model.small --wordlist_save_path=$dir/wordlist.rnn
+#python local/tensorflow/rnnlm.py --data_path=$dir --model=medium --save_path=$dir/model.medium
 #python local/tensorflow/rnnlm.py --data_path=$dir --model=large --save_path=$dir/model.large
diff --git a/src/tensorflow/Makefile b/src/tensorflow/Makefile
index f214a66f88a..c9ae405d8f2 100644
--- a/src/tensorflow/Makefile
+++ b/src/tensorflow/Makefile
@@ -1,22 +1,18 @@
 include ../kaldi.mk
 
 TENSORFLOW = ../../tools/tensorflow
-KALDI_ROOT = ../../
-all:
-
-#EXTRA_CXXFLAGS = -Wno-sign-compare -fPIC -I ../ -I $(KALDI_ROOT)/src -DKALDI_NO_PORTAUDIO -I $(TENSORFLOW)/third_party/eigen3 -I $(BAZEL)/third_party/protobuf/3.0.0/src/ \
-#                 -I$(TENSORFLOW)/bazel-genfiles -I $(TENSORFLOW) \
-#  -I $(TENSORFLOW)/tensorflow/contrib/makefile/gen/protobuf/include/ -I $(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen \
 
-EXTRA_CXXFLAGS = -Wno-sign-compare -fPIC -I$(TENSORFLOW)/bazel-tensorflow/external/protobuf/src -I$(TENSORFLOW)/bazel-genfiles -I$(TENSORFLOW) -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen/
-#EXTRA_CXXFLAGS = -Wno-sign-compare -fPIC -I$(TENSORFLOW)/bazel-tensorflow/external/protobuf/src -I$(TENSORFLOW)/bazel-genfiles -I$(TENSORFLOW) -I$(TENSORFLOW)/third_party/eigen3
+all:
 
-BINFILES = loader_rnn
+EXTRA_CXXFLAGS = -Wno-sign-compare -I$(TENSORFLOW)/bazel-tensorflow/external/protobuf/src -I$(TENSORFLOW)/bazel-genfiles -I$(TENSORFLOW) -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen/
+#EXTRA_CXXFLAGS = -Wno-sign-compare -fPIC -I$(TENSORFLOW)/bazel-tensorflow/external/protobuf/src -I$(TENSORFLOW)/bazel-genfiles -I$(TENSORFLOW) -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen/
 
-OBJFILES =
+OBJFILES = tensorflow-rnnlm-lib.o
 
 TESTFILES =
 
+LIBNAME = kaldi-tensorflow-rnnlm
+
 ADDLIBS = ../lm/kaldi-lm.a ../util/kaldi-util.a ../thread/kaldi-thread.a \
           ../matrix/kaldi-matrix.a ../base/kaldi-base.a ../../tools/tensorflow/bazel-bin/tensorflow/tensorflow_cc.so
 
diff --git a/src/tensorflow/loader_rnn.cc b/src/tfbin/loader_rnn.cc
similarity index 97%
rename from src/tensorflow/loader_rnn.cc
rename to src/tfbin/loader_rnn.cc
index 7034b920060..45ddc89fc64 100644
--- a/src/tensorflow/loader_rnn.cc
+++ b/src/tfbin/loader_rnn.cc
@@ -62,10 +62,10 @@ int main(int argc, char* argv[]) {
   status = session->Run(std::vector<std::pair<string, tensorflow::Tensor>>(), {"Train/Model/test_initial_state"}, {}, &state);
 
   for (int32 word_out = 0; word_out < 10000; word_out++) {
-    Tensor in_word(DT_INT32, {1, 1});
+    Tensor in_word(tensorflow::DT_INT32, {1, 1});
     in_word.scalar<int32>()() = (word_out + 9999) % 10000; 
 
-    Tensor out_word(DT_INT32, {1, 1});
+    Tensor out_word(tensorflow::DT_INT32, {1, 1});
     out_word.scalar<int32>()() = word_out; 
 
     // num-layers

From 5c19b09f45313df82e6de1f8ee90e2fd5f323691 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Mon, 12 Jun 2017 15:53:46 -0400
Subject: [PATCH 13/30] include the h and cc files; delete some of the unuseful
 files

---
 egs/ami/s5/local/tensorflow/load.py        |  22 --
 egs/ami/s5/local/tensorflow/loader.cc      |  88 -----
 egs/ami/s5/local/tensorflow/ptb_word_lm.py | 409 ---------------------
 src/tensorflow/tensorflow-rnnlm-lib.cc     | 195 ++++++++++
 src/tensorflow/tensorflow-rnnlm-lib.h      | 103 ++++++
 5 files changed, 298 insertions(+), 519 deletions(-)
 delete mode 100644 egs/ami/s5/local/tensorflow/load.py
 delete mode 100644 egs/ami/s5/local/tensorflow/loader.cc
 delete mode 100644 egs/ami/s5/local/tensorflow/ptb_word_lm.py
 create mode 100644 src/tensorflow/tensorflow-rnnlm-lib.cc
 create mode 100644 src/tensorflow/tensorflow-rnnlm-lib.h

diff --git a/egs/ami/s5/local/tensorflow/load.py b/egs/ami/s5/local/tensorflow/load.py
deleted file mode 100644
index 0d0959aa746..00000000000
--- a/egs/ami/s5/local/tensorflow/load.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import sys
-
-sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")
-
-import tensorflow as tf
-import numpy as np
-#config = tf.ConfigProto(device_count = {'GPU': 0} )
-
-#with tf.Session(config=config) as sess:
-with tf.Session() as sess:
-    a = tf.Variable(5.5, name='a')
-    b = tf.Variable(6.6, name='b')
-    c = tf.multiply(a, b, name="c")
-
-    sess.run(tf.global_variables_initializer())
-
-    print a.eval() # 5.0
-    print b.eval() # 6.0
-    print c.eval() # 30.0
-    
-    tf.train.write_graph(sess.graph_def, 'models/', 'graph.pb', as_text=False)
-
diff --git a/egs/ami/s5/local/tensorflow/loader.cc b/egs/ami/s5/local/tensorflow/loader.cc
deleted file mode 100644
index b02b1f4b853..00000000000
--- a/egs/ami/s5/local/tensorflow/loader.cc
+++ /dev/null
@@ -1,88 +0,0 @@
-#include "tensorflow/core/public/session.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/protobuf/meta_graph.pb.h"
-
-
-using namespace tensorflow;
-
-int main(int argc, char* argv[]) {
-  // Initialize a tensorflow session
-  Session* session;
-  Status status = NewSession(SessionOptions(), &session);
-  if (!status.ok()) {
-    std::cout << status.ToString() << "\n";
-    return 1;
-  }
-
-  const string pathToGraph = "/export/b02/hxu/TensorFlow/save_load/models/m.meta";
-  const string checkpointPath = "/export/b02/hxu/TensorFlow/save_load/models/m";
-
-  // Read in the protobuf graph we exported
-  // (The path seems to be relative to the cwd. Keep this in mind
-  // when using `bazel run` since the cwd isn't where you call
-  // `bazel run` but from inside a temp folder.)
-  MetaGraphDef graph_def;
-  status = ReadBinaryProto(Env::Default(), pathToGraph, &graph_def);
-  if (!status.ok()) {
-    std::cout << status.ToString() << "\n";
-    return 1;
-  }
-
-  // Add the graph to the session
-  status = session->Create(graph_def.graph_def());
-  if (!status.ok()) {
-    std::cout << status.ToString() << "\n";
-    return 1;
-  }
-
-  Tensor checkpointPathTensor(DT_STRING, TensorShape());
-  checkpointPathTensor.scalar<std::string>()() = checkpointPath;
-  
-  status = session->Run(
-            {{ graph_def.saver_def().filename_tensor_name(), checkpointPathTensor },},
-            {},
-            {graph_def.saver_def().restore_op_name()},
-            nullptr);
-  if (!status.ok()) {
-    std::cout << status.ToString() << "\n";
-    return 1;
-  }
-
-  // Setup inputs and outputs:
-
-  Tensor a(DT_FLOAT, TensorShape());
-  a.scalar<float>()() = 5.5;
-
-  Tensor b(DT_FLOAT, TensorShape());
-  b.scalar<float>()() = 6.6;
-
-  std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
-    { "a", a },
-    { "b", b },
-  };
-
-  // The session will initialize the outputs
-  std::vector<tensorflow::Tensor> outputs;
-
-  // Run the session, evaluating our "c" operation from the graph
-  status = session->Run(inputs, {"output"}, {}, &outputs);
-  if (!status.ok()) {
-    std::cout << status.ToString() << "\n";
-    return 1;
-  }
-
-  // Grab the first output (we only evaluated one graph node: "c")
-  // and convert the node to a scalar representation.
-  auto output_c = outputs[0].scalar<float>();
-
-  // (There are similar methods for vectors and matrices here:
-  // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/tensor.h)
-
-  // Print the results
-  std::cout << outputs[0].DebugString() << "\n"; // Tensor<type: float shape: [] values: 30>
-  std::cout << output_c() << "\n"; // 30
-
-  // Free any resources used by the session
-  session->Close();
-  return 0;
-}
diff --git a/egs/ami/s5/local/tensorflow/ptb_word_lm.py b/egs/ami/s5/local/tensorflow/ptb_word_lm.py
deleted file mode 100644
index 15040fd30ea..00000000000
--- a/egs/ami/s5/local/tensorflow/ptb_word_lm.py
+++ /dev/null
@@ -1,409 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Example / benchmark for building a PTB LSTM model.
-Trains the model described in:
-(Zaremba, et. al.) Recurrent Neural Network Regularization
-http://arxiv.org/abs/1409.2329
-There are 3 supported model configurations:
-===========================================
-| config | epochs | train | valid  | test
-===========================================
-| small  | 13     | 37.99 | 121.39 | 115.91
-| medium | 39     | 48.45 |  86.16 |  82.07
-| large  | 55     | 37.87 |  82.62 |  78.29
-The exact results may vary depending on the random initialization.
-The hyperparameters used in the model:
-- init_scale - the initial scale of the weights
-- learning_rate - the initial value of the learning rate
-- max_grad_norm - the maximum permissible norm of the gradient
-- num_layers - the number of LSTM layers
-- num_steps - the number of unrolled steps of LSTM
-- hidden_size - the number of LSTM units
-- max_epoch - the number of epochs trained with the initial learning rate
-- max_max_epoch - the total number of epochs for training
-- keep_prob - the probability of keeping weights in the dropout layer
-- lr_decay - the decay of the learning rate for each epoch after "max_epoch"
-- batch_size - the batch size
-The data required for this example is in the data/ dir of the
-PTB dataset from Tomas Mikolov's webpage:
-$ wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
-$ tar xvf simple-examples.tgz
-To run:
-$ python ptb_word_lm.py --data_path=simple-examples/data/
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")
-
-import inspect
-import time
-
-import numpy as np
-import tensorflow as tf
-
-import reader
-
-flags = tf.flags
-logging = tf.logging
-
-flags.DEFINE_string(
-    "model", "small",
-    "A type of model. Possible options are: small, medium, large.")
-flags.DEFINE_string("data_path", None,
-                    "Where the training/test data is stored.")
-flags.DEFINE_string("save_path", None,
-                    "Model output directory.")
-flags.DEFINE_bool("use_fp16", False,
-                  "Train using 16-bit floats instead of 32bit floats")
-
-FLAGS = flags.FLAGS
-
-
-def data_type():
-  return tf.float16 if FLAGS.use_fp16 else tf.float32
-
-
-class PTBInput(object):
-  """The input data."""
-
-  def __init__(self, config, data, name=None):
-    self.batch_size = batch_size = config.batch_size
-    self.num_steps = num_steps = config.num_steps
-    self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
-    self.input_data, self.targets = reader.ptb_producer(
-        data, batch_size, num_steps, name=name)
-
-
-class PTBModel(object):
-  """The PTB model."""
-
-  def __init__(self, is_training, config, input_):
-    self._input = input_
-
-    batch_size = input_.batch_size
-    num_steps = input_.num_steps
-    size = config.hidden_size
-    vocab_size = config.vocab_size
-
-    # Slightly better results can be obtained with forget gate biases
-    # initialized to 1 but the hyperparameters of the model would need to be
-    # different than reported in the paper.
-    def lstm_cell():
-      # With the latest TensorFlow source code (as of Mar 27, 2017),
-      # the BasicLSTMCell will need a reuse parameter which is unfortunately not
-      # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
-      # an argument check here:
-      if 'reuse' in inspect.getargspec(
-          tf.contrib.rnn.BasicLSTMCell.__init__).args:
-        return tf.contrib.rnn.BasicLSTMCell(
-            size, forget_bias=0.0, state_is_tuple=True,
-            reuse=tf.get_variable_scope().reuse)
-      else:
-        return tf.contrib.rnn.BasicLSTMCell(
-            size, forget_bias=0.0, state_is_tuple=True)
-    attn_cell = lstm_cell
-    if is_training and config.keep_prob < 1:
-      def attn_cell():
-        return tf.contrib.rnn.DropoutWrapper(
-            lstm_cell(), output_keep_prob=config.keep_prob)
-    cell = tf.contrib.rnn.MultiRNNCell(
-        [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)
-
-    self._initial_state = cell.zero_state(batch_size, data_type())
-
-
-    # first implement the less efficient version
-    test_word_in = tf.placeholder(tf.int32, [1, 1])
-    test_word_out = tf.placeholder(tf.int32, [1, 1])
-    test_input_state_c = tf.placeholder(tf.float32, [1, size])
-    test_input_state_h = tf.placeholder(tf.float32, [1, size])
-    test_input_state = tf.contrib.rnn.LSTMStateTuple(test_input_state_c, test_input_state_h)
-
-#    print ("want to be", self._initial_state)
-#    print ("it actually is ", input_state)
-    with tf.device("/cpu:0"):
-      embedding = tf.get_variable(
-          "embedding", [vocab_size, size], dtype=data_type())
-
-#      print("should be ", input_.input_data)
-#      print("is ", test_word)
-
-      inputs = tf.nn.embedding_lookup(embedding, input_.input_data)
-      test_inputs = tf.nn.embedding_lookup(embedding, test_word_in)
-#      print("should be ", inputs)
-#      print("is ", test_inputs)
-
-    # test time
-    with tf.variable_scope("RNN"):
-#      tf.get_variable_scope().reuse_variables()
-      (test_cell_output, test_output_state) = cell(test_inputs[:, 0, :], [test_input_state])
-
-    softmax_w = tf.get_variable(
-        "softmax_w", [size, vocab_size], dtype=data_type())
-    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
-
-    test_logits = tf.matmul(test_cell_output, softmax_w) + softmax_b
-    test_softmaxed = tf.nn.softmax(test_logits)
-    print("test softmaxed is ", test_softmaxed)
-    p_word = test_softmaxed[0, test_word_out[0,0]]
-#    p_word = tf.float32(test_softmaxed[:, test_word_out], name="p_out")
-
-    if is_training and config.keep_prob < 1:
-      inputs = tf.nn.dropout(inputs, config.keep_prob)
-
-    # Simplified version of models/tutorials/rnn/rnn.py's rnn().
-    # This builds an unrolled LSTM for tutorial purposes only.
-    # In general, use the rnn() or state_saving_rnn() from rnn.py.
-    #
-    # The alternative version of the code below is:
-    #
-    # inputs = tf.unstack(inputs, num=num_steps, axis=1)
-    # outputs, state = tf.contrib.rnn.static_rnn(
-    #     cell, inputs, initial_state=self._initial_state)
-    outputs = []
-    state = self._initial_state
-    with tf.variable_scope("RNN"):
-      for time_step in range(num_steps):
-        if time_step > -1: tf.get_variable_scope().reuse_variables()
-        (cell_output, state) = cell(inputs[:, time_step, :], state)
-        outputs.append(cell_output)
-
-    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
-    logits = tf.matmul(output, softmax_w) + softmax_b
-    loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
-        [logits],
-        [tf.reshape(input_.targets, [-1])],
-        [tf.ones([batch_size * num_steps], dtype=data_type())])
-    self._cost = cost = tf.reduce_sum(loss) / batch_size
-    self._final_state = state
-
-    if not is_training:
-      return
-
-    self._lr = tf.Variable(0.0, trainable=False)
-    tvars = tf.trainable_variables()
-    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
-                                      config.max_grad_norm)
-    optimizer = tf.train.GradientDescentOptimizer(self._lr)
-    self._train_op = optimizer.apply_gradients(
-        zip(grads, tvars),
-        global_step=tf.contrib.framework.get_or_create_global_step())
-
-    self._new_lr = tf.placeholder(
-        tf.float32, shape=[], name="new_learning_rate")
-    self._lr_update = tf.assign(self._lr, self._new_lr)
-
-  def assign_lr(self, session, lr_value):
-    session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
-
-  @property
-  def input(self):
-    return self._input
-
-  @property
-  def initial_state(self):
-    return self._initial_state
-
-  @property
-  def cost(self):
-    return self._cost
-
-  @property
-  def final_state(self):
-    return self._final_state
-
-  @property
-  def lr(self):
-    return self._lr
-
-  @property
-  def train_op(self):
-    return self._train_op
-
-
-class SmallConfig(object):
-  """Small config."""
-  init_scale = 0.1
-  learning_rate = 1.0
-  max_grad_norm = 5
-  num_layers = 2
-  num_steps = 20
-  hidden_size = 200
-  max_epoch = 4
-  max_max_epoch = 13
-  keep_prob = 1.0
-  lr_decay = 0.5
-  batch_size = 20
-  vocab_size = 10000
-
-
-class MediumConfig(object):
-  """Medium config."""
-  init_scale = 0.05
-  learning_rate = 1.0
-  max_grad_norm = 5
-  num_layers = 2
-  num_steps = 35
-  hidden_size = 650
-  max_epoch = 6
-  max_max_epoch = 39
-  keep_prob = 0.5
-  lr_decay = 0.8
-  batch_size = 20
-  vocab_size = 10000
-
-
-class LargeConfig(object):
-  """Large config."""
-  init_scale = 0.04
-  learning_rate = 1.0
-  max_grad_norm = 10
-  num_layers = 2
-  num_steps = 35
-  hidden_size = 1500
-  max_epoch = 14
-  max_max_epoch = 55
-  keep_prob = 0.35
-  lr_decay = 1 / 1.15
-  batch_size = 20
-  vocab_size = 10000
-
-
-class TestConfig(object):
-  """Tiny config, for testing."""
-  init_scale = 0.1
-  learning_rate = 1.0
-  max_grad_norm = 1
-  num_layers = 1
-  num_steps = 2
-  hidden_size = 2
-  max_epoch = 1
-  max_max_epoch = 1
-  keep_prob = 1.0
-  lr_decay = 0.5
-  batch_size = 20
-  vocab_size = 10000
-
-
-def run_epoch(session, model, eval_op=None, verbose=False):
-  """Runs the model on the given data."""
-  start_time = time.time()
-  costs = 0.0
-  iters = 0
-  state = session.run(model.initial_state)
-
-  fetches = {
-      "cost": model.cost,
-      "final_state": model.final_state,
-  }
-  if eval_op is not None:
-    fetches["eval_op"] = eval_op
-
-  for step in range(model.input.epoch_size):
-    feed_dict = {}
-    for i, (c, h) in enumerate(model.initial_state):
-      feed_dict[c] = state[i].c
-      feed_dict[h] = state[i].h
-
-    vals = session.run(fetches, feed_dict)
-    cost = vals["cost"]
-    state = vals["final_state"]
-
-    costs += cost
-    iters += model.input.num_steps
-
-    if verbose and step % (model.input.epoch_size // 10) == 10:
-      print("%.3f perplexity: %.3f speed: %.0f wps" %
-            (step * 1.0 / model.input.epoch_size, np.exp(costs / iters),
-             iters * model.input.batch_size / (time.time() - start_time)))
-
-  return np.exp(costs / iters)
-
-
-def get_config():
-  if FLAGS.model == "small":
-    return SmallConfig()
-  elif FLAGS.model == "medium":
-    return MediumConfig()
-  elif FLAGS.model == "large":
-    return LargeConfig()
-  elif FLAGS.model == "test":
-    return TestConfig()
-  else:
-    raise ValueError("Invalid model: %s", FLAGS.model)
-
-
-def main(_):
-  if not FLAGS.data_path:
-    raise ValueError("Must set --data_path to PTB data directory")
-
-  raw_data = reader.ptb_raw_data(FLAGS.data_path)
-  train_data, valid_data, test_data, _ = raw_data
-
-  config = get_config()
-  eval_config = get_config()
-  eval_config.batch_size = 1
-  eval_config.num_steps = 1
-
-  with tf.Graph().as_default():
-    initializer = tf.random_uniform_initializer(-config.init_scale,
-                                                config.init_scale)
-
-    with tf.name_scope("Train"):
-      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
-      with tf.variable_scope("Model", reuse=None, initializer=initializer):
-        m = PTBModel(is_training=True, config=config, input_=train_input)
-      tf.summary.scalar("Training Loss", m.cost)
-      tf.summary.scalar("Learning Rate", m.lr)
-
-    with tf.name_scope("Valid"):
-      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
-      with tf.variable_scope("Model", reuse=True, initializer=initializer):
-        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
-      tf.summary.scalar("Validation Loss", mvalid.cost)
-
-    with tf.name_scope("Test"):
-      test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
-      with tf.variable_scope("Model", reuse=True, initializer=initializer):
-        mtest = PTBModel(is_training=False, config=eval_config,
-                         input_=test_input)
-
-    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
-    with sv.managed_session() as session:
-      for i in range(config.max_max_epoch):
-        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
-        m.assign_lr(session, config.learning_rate * lr_decay)
-
-        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
-        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
-                                     verbose=True)
-        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
-        valid_perplexity = run_epoch(session, mvalid)
-        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
-
-      test_perplexity = run_epoch(session, mtest)
-      print("Test Perplexity: %.3f" % test_perplexity)
-
-      if FLAGS.save_path:
-        print("Saving model to %s." % FLAGS.save_path)
-        sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
-
-
-if __name__ == "__main__":
-  tf.app.run()
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.cc b/src/tensorflow/tensorflow-rnnlm-lib.cc
new file mode 100644
index 00000000000..9db6645c3e3
--- /dev/null
+++ b/src/tensorflow/tensorflow-rnnlm-lib.cc
@@ -0,0 +1,195 @@
+// lm/kaldi-rnnlm.cc
+
+#include <utility>
+#include <fstream>
+
+#include "tensorflow/tensorflow-rnnlm-lib.h"
+#include "util/stl-utils.h"
+#include "util/text-utils.h"
+
+using tensorflow::Status;
+
+namespace kaldi {
+using tf_rnnlm::KaldiTfRnnlmWrapper;
+using tf_rnnlm::TfRnnlmDeterministicFst;
+using std::ifstream;
+
+KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
+    const KaldiTfRnnlmWrapperOpts &opts,
+    const std::string &rnn_wordlist,
+    const std::string &word_symbol_table_rxfilename, // TODO(hxu) will do this later
+    const std::string &unk_prob_rspecifier,
+    Session* session) {
+  session_ = session;
+
+  fst::SymbolTable *fst_word_symbols = NULL;
+  if (!(fst_word_symbols =
+        fst::SymbolTable::ReadText(word_symbol_table_rxfilename))) {
+    KALDI_ERR << "Could not read symbol table from file "
+        << word_symbol_table_rxfilename;
+  }
+
+  fst_label_to_word_.resize(fst_word_symbols->NumSymbols());
+
+  for (int32 i = 0; i < fst_label_to_word_.size(); ++i) {
+    fst_label_to_word_[i] = fst_word_symbols->Find(i);
+    if (fst_label_to_word_[i] == "") {
+      KALDI_ERR << "Could not find word for integer " << i << "in the word "
+          << "symbol table, mismatched symbol table or you have discoutinuous "
+          << "integers in your symbol table?";
+    }
+  }
+
+  fst_label_to_rnn_label_.resize(fst_word_symbols->NumSymbols(), -1);
+
+  { // input.
+    ifstream ifile(rnn_wordlist.c_str());
+    int id;
+    string word;
+    int i = 0;
+    while (ifile >> id >> word) { // TODO(hxu) ugly fix for cued-rnnlm's bug
+                                  // will implement a better fix later
+      if (word == "[UNK]") {
+        word = "<unk>";
+      } else if (word == "<OOS>") {
+        continue;
+      }
+      i++;
+      assert(i == id + 1);
+      rnn_label_to_word_.push_back(word);
+
+      int fst_label = fst_word_symbols->Find(rnn_label_to_word_[i]);
+      KALDI_ASSERT(fst::SymbolTable::kNoSymbol != fst_label);
+      fst_label_to_rnn_label_[fst_label] = i;
+    }
+    bos_ = 1;
+    eos_ = 0; // TODO(hxu)
+  }
+  rnn_label_to_word_.push_back("<OOS>");
+  
+  for (int i = 0; i < fst_label_to_rnn_label_.size(); i++) {
+    if (fst_label_to_rnn_label_[i] == -1) {
+      fst_label_to_rnn_label_[i] = rnn_label_to_word_.size() - 1;
+    }
+  }
+
+
+}
+
+BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
+    int32 word, const std::vector<int32> &wseq,
+    const Tensor &context_in,
+    tensorflow::Tensor *context_out) {
+
+  std::vector<std::string> wseq_symbols(wseq.size());
+  for (int32 i = 0; i < wseq_symbols.size(); ++i) {
+    KALDI_ASSERT(wseq[i] < label_to_word_.size());
+    wseq_symbols[i] = label_to_word_[wseq[i]];
+  }
+
+  std::vector<std::pair<string, Tensor>> inputs;
+
+  Tensor lastword(tensorflow::DT_INT32, {1, 1});
+  Tensor thisword(tensorflow::DT_INT32, {1, 1});
+
+  lastword.scalar<int32>()() = (wseq.size() == 0? bos_: wseq.back());
+  thisword.scalar<int32>()() = word;
+
+  inputs = {
+    {"Train/Model/test_word_in", lastword},
+    {"Train/Model/test_word_out", thisword},
+    {"Train/Model/test_state", context_in},
+  };
+
+  // The session will initialize the outputs
+  std::vector<tensorflow::Tensor> outputs;
+
+  // Run the session, evaluating our "c" operation from the graph
+  Status status = session_->Run(inputs, {"Train/Model/test_out", "Train/Model/test_state_out"}, {}, &outputs);
+
+//  return rnnlm_.computeConditionalLogprob(label_to_word_[word], wseq_symbols,
+//                                          context_in, context_out);
+  if (context_out != NULL)
+    *context_out = outputs[1];
+  return outputs[0].scalar<float>()();
+}
+
+void KaldiTfRnnlmWrapper::GetInitialContext(Tensor *c) const {
+  std::vector<Tensor> state;
+  Status status = session_->Run(std::vector<std::pair<string, tensorflow::Tensor>>(), {"Train/Model/test_initial_state"}, {}, &state);
+  *c = state[0];
+}
+
+TfRnnlmDeterministicFst::TfRnnlmDeterministicFst(int32 max_ngram_order,
+                                             KaldiTfRnnlmWrapper *rnnlm) {
+  KALDI_ASSERT(rnnlm != NULL);
+  max_ngram_order_ = max_ngram_order;
+  rnnlm_ = rnnlm;
+
+  // Uses empty history for <s>.
+  std::vector<Label> bos;
+//  std::vector<float> bos_context(rnnlm->GetHiddenLayerSize(), 1.0);
+
+  Tensor initial_context;
+  rnnlm_->GetInitialContext(&initial_context);
+
+  state_to_wseq_.push_back(bos);
+  state_to_context_.push_back(initial_context);
+  wseq_to_state_[bos] = 0;
+  start_state_ = 0;
+}
+
+fst::StdArc::Weight TfRnnlmDeterministicFst::Final(StateId s) {
+  // At this point, we should have created the state.
+  KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
+
+  std::vector<Label> wseq = state_to_wseq_[s];
+  BaseFloat logprob = rnnlm_->GetLogProb(rnnlm_->GetEos(), wseq,
+                                         state_to_context_[s], NULL);
+  return Weight(-logprob);
+}
+
+bool TfRnnlmDeterministicFst::GetArc(StateId s, Label ilabel, fst::StdArc *oarc) {
+  // At this point, we should have created the state.
+  KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
+
+  std::vector<Label> wseq = state_to_wseq_[s];
+  tensorflow::Tensor new_context;
+
+  int32 rnn_word = rnnlm_->fst_label_to_rnn_label_[ilabel];
+  BaseFloat logprob = rnnlm_->GetLogProb(rnn_word, wseq,
+                                         state_to_context_[s], &new_context);
+
+  wseq.push_back(rnn_word);
+  if (max_ngram_order_ > 0) {
+    while (wseq.size() >= max_ngram_order_) {
+      // History state has at most <max_ngram_order_> - 1 words in the state.
+      wseq.erase(wseq.begin(), wseq.begin() + 1);
+    }
+  }
+
+  std::pair<const std::vector<Label>, StateId> wseq_state_pair(
+      wseq, static_cast<Label>(state_to_wseq_.size()));
+
+  // Attemps to insert the current <lseq_state_pair>. If the pair already exists
+  // then it returns false.
+  typedef MapType::iterator IterType;
+  std::pair<IterType, bool> result = wseq_to_state_.insert(wseq_state_pair);
+
+  // If the pair was just inserted, then also add it to <state_to_wseq_> and
+  // <state_to_context_>.
+  if (result.second == true) {
+    state_to_wseq_.push_back(wseq);
+    state_to_context_.push_back(new_context);
+  }
+
+  // Creates the arc.
+  oarc->ilabel = ilabel;
+  oarc->olabel = ilabel;
+  oarc->nextstate = result.first->second;
+  oarc->weight = Weight(-logprob);
+
+  return true;
+}
+
+}  // namespace kaldi
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.h b/src/tensorflow/tensorflow-rnnlm-lib.h
new file mode 100644
index 00000000000..b260ca5a3a2
--- /dev/null
+++ b/src/tensorflow/tensorflow-rnnlm-lib.h
@@ -0,0 +1,103 @@
+// Copyright 2017 Hainan Xu
+// wrapper for tensorflow rnnlm
+
+#ifndef KALDI_LM_TENSORFLOW_LIB_H_
+#define KALDI_LM_TENSORFLOW_LIB_H_
+
+#include <string>
+#include <vector>
+#include "util/stl-utils.h"
+#include "base/kaldi-common.h"
+#include "fstext/deterministic-fst.h"
+#include "util/common-utils.h"
+
+#include "tensorflow/core/public/session.h"
+
+using tensorflow::Session;
+using tensorflow::Tensor;
+
+namespace kaldi {
+namespace tf_rnnlm {
+
+struct KaldiTfRnnlmWrapperOpts {
+  std::string unk_symbol;
+  std::string bos_symbol;
+  std::string eos_symbol;
+
+  KaldiTfRnnlmWrapperOpts() : unk_symbol("<RNN_UNK>"), bos_symbol("<s>"), eos_symbol("</s>") {}
+
+  void Register(OptionsItf *opts) {
+    opts->Register("unk-symbol", &unk_symbol, "Symbol for out-of-vocabulary "
+                   "words in rnnlm.");
+    opts->Register("bos-symbol", &eos_symbol, "Beginning of setence symbol in "
+                   "rnnlm.");
+    opts->Register("eos-symbol", &eos_symbol, "End of setence symbol in "
+                   "rnnlm.");
+  }
+};
+
+class KaldiTfRnnlmWrapper {
+ public:
+  KaldiTfRnnlmWrapper(const KaldiTfRnnlmWrapperOpts &opts,
+                    const std::string &rnn_wordlist,
+                    const std::string &word_symbol_table_rxfilename,
+                    const std::string &unk_prob_rspecifier,
+                    Session* session);
+
+  int32 GetEos() const { return eos_; }
+  int32 GetBos() const { return bos_; }
+  void GetInitialContext(Tensor* context) const;
+
+  BaseFloat GetLogProb(int32 word, const std::vector<int32> &wseq,
+                       const Tensor &context_in,
+                       Tensor *context_out);
+
+  std::vector<int> fst_label_to_rnn_label_;
+  std::vector<std::string> rnn_label_to_word_;
+  std::vector<std::string> fst_label_to_word_;
+ private:
+
+  Session* session_;  // ptf not owned here
+  std::vector<std::string> label_to_word_;
+  int32 eos_;
+  int32 bos_;
+
+  KALDI_DISALLOW_COPY_AND_ASSIGN(KaldiTfRnnlmWrapper);
+};
+
+class TfRnnlmDeterministicFst
+    : public fst::DeterministicOnDemandFst<fst::StdArc> {
+ public:
+  typedef fst::StdArc::Weight Weight;
+  typedef fst::StdArc::StateId StateId;
+  typedef fst::StdArc::Label Label;
+
+  // Does not take ownership.
+  TfRnnlmDeterministicFst(int32 max_ngram_order, KaldiTfRnnlmWrapper *rnnlm);
+
+  // We cannot use "const" because the pure virtual function in the interface is
+  // not const.
+  virtual StateId Start() { return start_state_; }
+
+  // We cannot use "const" because the pure virtual function in the interface is
+  // not const.
+  virtual Weight Final(StateId s);
+
+  virtual bool GetArc(StateId s, Label ilabel, fst::StdArc* oarc);
+
+ private:
+  typedef unordered_map<std::vector<Label>,
+                        StateId, VectorHasher<Label> > MapType;
+  StateId start_state_;
+  MapType wseq_to_state_;
+  std::vector<std::vector<Label> > state_to_wseq_;
+
+  KaldiTfRnnlmWrapper *rnnlm_;
+  int32 max_ngram_order_;
+  std::vector<tensorflow::Tensor> state_to_context_;
+};
+
+}  // namespace tf_rnnlm
+}  // namespace kaldi
+
+#endif  // KALDI_LM_MIKOLOV_RNNLM_LIB_H_

From 6fa3f3fc143a54cc4a6392a43922db99595c0c18 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Mon, 12 Jun 2017 17:31:50 -0400
Subject: [PATCH 14/30] add binary; undebugged

---
 src/tensorflow/tensorflow-rnnlm-lib.cc  |  42 +++++++-
 src/tensorflow/tensorflow-rnnlm-lib.h   |   9 +-
 src/tfbin/Makefile                      |  26 +++++
 src/tfbin/lattice-lmrescore-tf-rnnlm.cc | 136 ++++++++++++++++++++++++
 src/tfbin/loader_rnn.cc                 | 110 -------------------
 5 files changed, 209 insertions(+), 114 deletions(-)
 create mode 100644 src/tfbin/Makefile
 create mode 100644 src/tfbin/lattice-lmrescore-tf-rnnlm.cc
 delete mode 100644 src/tfbin/loader_rnn.cc

diff --git a/src/tensorflow/tensorflow-rnnlm-lib.cc b/src/tensorflow/tensorflow-rnnlm-lib.cc
index 9db6645c3e3..7121b66d31f 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.cc
+++ b/src/tensorflow/tensorflow-rnnlm-lib.cc
@@ -3,6 +3,10 @@
 #include <utility>
 #include <fstream>
 
+#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/protobuf/meta_graph.pb.h"
+
 #include "tensorflow/tensorflow-rnnlm-lib.h"
 #include "util/stl-utils.h"
 #include "util/text-utils.h"
@@ -19,8 +23,42 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
     const std::string &rnn_wordlist,
     const std::string &word_symbol_table_rxfilename, // TODO(hxu) will do this later
     const std::string &unk_prob_rspecifier,
-    Session* session) {
-  session_ = session;
+//    Session* session) {
+    const std::string &tf_model_path) {
+//  session_ = session;
+  {
+    string graph_path = tf_model_path + "/meta";
+
+    Status status = tensorflow::NewSession(tensorflow::SessionOptions(), &session_);
+    if (!status.ok()) {
+      KALDI_ERR << status.ToString();
+    }
+
+    tensorflow::MetaGraphDef graph_def;
+    status = tensorflow::ReadBinaryProto(tensorflow::Env::Default(), graph_path, &graph_def);
+    if (!status.ok()) {
+      KALDI_ERR << status.ToString();
+    }
+
+    // Add the graph to the session
+    status = session_->Create(graph_def.graph_def());
+    if (!status.ok()) {
+      KALDI_ERR << status.ToString();
+    }
+
+    Tensor checkpointPathTensor(tensorflow::DT_STRING, tensorflow::TensorShape());
+    checkpointPathTensor.scalar<std::string>()() = tf_model_path;
+    
+    status = session_->Run(
+              {{ graph_def.saver_def().filename_tensor_name(), checkpointPathTensor },},
+              {},
+              {graph_def.saver_def().restore_op_name()},
+              nullptr);
+    if (!status.ok()) {
+      KALDI_ERR << status.ToString();
+    }
+
+  }
 
   fst::SymbolTable *fst_word_symbols = NULL;
   if (!(fst_word_symbols =
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.h b/src/tensorflow/tensorflow-rnnlm-lib.h
index b260ca5a3a2..3e0ec14f1a2 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.h
+++ b/src/tensorflow/tensorflow-rnnlm-lib.h
@@ -42,7 +42,12 @@ class KaldiTfRnnlmWrapper {
                     const std::string &rnn_wordlist,
                     const std::string &word_symbol_table_rxfilename,
                     const std::string &unk_prob_rspecifier,
-                    Session* session);
+                    const std::string &tf_model_path);
+//                    Session* session);
+
+  ~KaldiTfRnnlmWrapper() {
+    delete session_;
+  }
 
   int32 GetEos() const { return eos_; }
   int32 GetBos() const { return bos_; }
@@ -57,7 +62,7 @@ class KaldiTfRnnlmWrapper {
   std::vector<std::string> fst_label_to_word_;
  private:
 
-  Session* session_;  // ptf not owned here
+  Session* session_;  // ptf owned here
   std::vector<std::string> label_to_word_;
   int32 eos_;
   int32 bos_;
diff --git a/src/tfbin/Makefile b/src/tfbin/Makefile
new file mode 100644
index 00000000000..35def55cd7d
--- /dev/null
+++ b/src/tfbin/Makefile
@@ -0,0 +1,26 @@
+
+TENSORFLOW = ../../tools/tensorflow
+
+all:
+
+EXTRA_CXXFLAGS = -Wno-sign-compare -I$(TENSORFLOW)/bazel-tensorflow/external/protobuf/src -I$(TENSORFLOW)/bazel-genfiles -I$(TENSORFLOW) -I$(TENSORFLOW)/tensorflow/contrib/makefile/downloads/eigen/
+include ../kaldi.mk
+
+BINFILES = lattice-lmrescore-tf-rnnlm
+
+OBJFILES =
+
+
+
+TESTFILES =
+
+ADDLIBS = ../lat/kaldi-lat.a ../lm/kaldi-lm.a ../fstext/kaldi-fstext.a \
+          ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \
+          ../thread/kaldi-thread.a ../matrix/kaldi-matrix.a \
+          ../base/kaldi-base.a ../tensorflow/kaldi-tensorflow-rnnlm.a \
+          ../../tools/tensorflow/bazel-bin/tensorflow/tensorflow_cc.so
+
+LDLIBS +=  -lz -ldl -fPIC -lrt
+LDLIBS += $(OTHERLIBS) -L$(TENSORFLOW)/bazel-bin/tensorflow -ltensorflow_cc
+
+include ../makefiles/default_rules.mk
diff --git a/src/tfbin/lattice-lmrescore-tf-rnnlm.cc b/src/tfbin/lattice-lmrescore-tf-rnnlm.cc
new file mode 100644
index 00000000000..0278759151f
--- /dev/null
+++ b/src/tfbin/lattice-lmrescore-tf-rnnlm.cc
@@ -0,0 +1,136 @@
+// latbin/lattice-lmrescore-rnnlm.cc
+
+// Copyright 2016  Hainan Xu
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "fstext/fstext-lib.h"
+#include "lat/kaldi-lattice.h"
+#include "lat/lattice-functions.h"
+#include "tensorflow/tensorflow-rnnlm-lib.h"
+#include "util/common-utils.h"
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    using namespace kaldi::tf_rnnlm;
+    typedef kaldi::int32 int32;
+    typedef kaldi::int64 int64;
+
+    const char *usage =
+        "Rescores lattice with rnnlm. The LM will be wrapped into the\n"
+        "DeterministicOnDemandFst interface and the rescoring is done by\n"
+        "composing with the wrapped LM using a special type of composition\n"
+        "algorithm. Determinization will be applied on the composed lattice.\n"
+        "\n"
+        "Usage: lattice-lmrescore-rnnlm [options] <rnnlm-wordlist> \\\n"
+        "             <word-symbol-table-rxfilename> <lattice-rspecifier> \\\n"
+        "             <rnnlm-rxfilename> <lattice-wspecifier>\n"
+        " e.g.: lattice-lmrescore-rnnlm --lm-scale=-1.0 words.txt \\\n"
+        "                     ark:in.lats rnnlm ark:out.lats\n";
+
+    ParseOptions po(usage);
+    int32 max_ngram_order = 3;
+    BaseFloat lm_scale = 1.0;
+
+    po.Register("lm-scale", &lm_scale, "Scaling factor for language model "
+                "costs; frequently 1.0 or -1.0");
+    po.Register("max-ngram-order", &max_ngram_order, "If positive, limit the "
+                "rnnlm context to the given number, -1 means we are not going "
+                "to limit it.");
+
+    KaldiTfRnnlmWrapperOpts opts;
+    opts.Register(&po);
+
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 4 && po.NumArgs() != 5) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string lats_rspecifier, rnn_word_list,
+        word_symbols_rxfilename, rnnlm_rxfilename, lats_wspecifier;
+    KALDI_ASSERT (po.NumArgs() == 5);
+
+    rnn_word_list = po.GetArg(1);
+    word_symbols_rxfilename = po.GetArg(2);
+    lats_rspecifier = po.GetArg(3);
+    rnnlm_rxfilename = po.GetArg(4);
+    lats_wspecifier = po.GetArg(5);
+
+    // Reads the language model.
+    KaldiTfRnnlmWrapper rnnlm(opts, rnn_word_list, word_symbols_rxfilename,
+                                "", rnnlm_rxfilename);
+
+    // Reads and writes as compact lattice.
+    SequentialCompactLatticeReader compact_lattice_reader(lats_rspecifier);
+    CompactLatticeWriter compact_lattice_writer(lats_wspecifier);
+
+    int32 n_done = 0, n_fail = 0;
+    for (; !compact_lattice_reader.Done(); compact_lattice_reader.Next()) {
+      std::string key = compact_lattice_reader.Key();
+      CompactLattice clat = compact_lattice_reader.Value();
+      compact_lattice_reader.FreeCurrent();
+
+      if (lm_scale != 0.0) {
+        // Before composing with the LM FST, we scale the lattice weights
+        // by the inverse of "lm_scale".  We'll later scale by "lm_scale".
+        // We do it this way so we can determinize and it will give the
+        // right effect (taking the "best path" through the LM) regardless
+        // of the sign of lm_scale.
+        fst::ScaleLattice(fst::GraphLatticeScale(1.0 / lm_scale), &clat);
+        ArcSort(&clat, fst::OLabelCompare<CompactLatticeArc>());
+
+        // Wraps the rnnlm into FST. We re-create it for each lattice to prevent
+        // memory usage increasing with time.
+        TfRnnlmDeterministicFst rnnlm_fst(max_ngram_order, &rnnlm);
+
+        // Composes lattice with language model.
+        CompactLattice composed_clat;
+        ComposeCompactLatticeDeterministic(clat, &rnnlm_fst, &composed_clat);
+
+        // Determinizes the composed lattice.
+        Lattice composed_lat;
+        ConvertLattice(composed_clat, &composed_lat);
+        Invert(&composed_lat);
+        CompactLattice determinized_clat;
+        DeterminizeLattice(composed_lat, &determinized_clat);
+        fst::ScaleLattice(fst::GraphLatticeScale(lm_scale), &determinized_clat);
+        if (determinized_clat.Start() == fst::kNoStateId) {
+          KALDI_WARN << "Empty lattice for utterance " << key
+              << " (incompatible LM?)";
+          n_fail++;
+        } else {
+          compact_lattice_writer.Write(key, determinized_clat);
+          n_done++;
+        }
+      } else {
+        // Zero scale so nothing to do.
+        n_done++;
+        compact_lattice_writer.Write(key, clat);
+      }
+    }
+
+    KALDI_LOG << "Done " << n_done << " lattices, failed for " << n_fail;
+    return (n_done != 0 ? 0 : 1);
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
diff --git a/src/tfbin/loader_rnn.cc b/src/tfbin/loader_rnn.cc
deleted file mode 100644
index 45ddc89fc64..00000000000
--- a/src/tfbin/loader_rnn.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-#include "tensorflow/core/public/session.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/protobuf/meta_graph.pb.h"
-
-#include "base/kaldi-common.h"
-#include "fstext/fstext-lib.h"
-#include "lat/kaldi-lattice.h"
-#include "lat/lattice-functions.h"
-#include "lm/kaldi-rnnlm.h"
-#include "lm/mikolov-rnnlm-lib.h"
-#include "util/common-utils.h"
-
-using namespace tensorflow;
-
-int main(int argc, char* argv[]) {
-//*
-  // Initialize a tensorflow session
-  Session* session;
-  Status status = NewSession(SessionOptions(), &session);
-  if (!status.ok()) {
-    std::cout << status.ToString() << "\n";
-    return 1;
-  }
-
-  const string pathToGraph = "/export/b02/hxu/TensorFlow/kaldi/egs/ami/s5/data/tensorflow/model.small.meta";
-  const string checkpointPath = "/export/b02/hxu/TensorFlow/kaldi/egs/ami/s5/data/tensorflow/model.small";
-
-  // Read in the protobuf graph we exported
-  // (The path seems to be relative to the cwd. Keep this in mind
-  // when using `bazel run` since the cwd isn't where you call
-  // `bazel run` but from inside a temp folder.)
-  MetaGraphDef graph_def;
-  status = ReadBinaryProto(Env::Default(), pathToGraph, &graph_def);
-  if (!status.ok()) {
-    std::cout << status.ToString() << "\n";
-    return 1;
-  }
-
-  // Add the graph to the session
-  status = session->Create(graph_def.graph_def());
-  if (!status.ok()) {
-    std::cout << status.ToString() << "\n";
-    return 1;
-  }
-
-  Tensor checkpointPathTensor(DT_STRING, TensorShape());
-  checkpointPathTensor.scalar<std::string>()() = checkpointPath;
-  
-  status = session->Run(
-            {{ graph_def.saver_def().filename_tensor_name(), checkpointPathTensor },},
-            {},
-            {graph_def.saver_def().restore_op_name()},
-            nullptr);
-  if (!status.ok()) {
-    std::cout << status.ToString() << "\n";
-    return 1;
-  }
-
-  // Setup inputs and outputs:
-  std::vector<Tensor> state;
-//  std::vector<Tensor> state(DT_FLOAT, {2, 2, 1, 200});
-  status = session->Run(std::vector<std::pair<string, tensorflow::Tensor>>(), {"Train/Model/test_initial_state"}, {}, &state);
-
-  for (int32 word_out = 0; word_out < 10000; word_out++) {
-    Tensor in_word(tensorflow::DT_INT32, {1, 1});
-    in_word.scalar<int32>()() = (word_out + 9999) % 10000; 
-
-    Tensor out_word(tensorflow::DT_INT32, {1, 1});
-    out_word.scalar<int32>()() = word_out; 
-
-    // num-layers
-    // 2 (c and h)
-    // 1 (batchsize)
-    // hidden-size
-
-    std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
-      {"Train/Model/test_word_in", in_word},
-      {"Train/Model/test_word_out", out_word},
-      {"Train/Model/test_state", state[0]},
-    };
-
-    // The session will initialize the outputs
-    std::vector<tensorflow::Tensor> outputs;
-
-    // Run the session, evaluating our "c" operation from the graph
-    status = session->Run(inputs, {"Train/Model/test_out", "Train/Model/test_state_out"}, {}, &outputs);
-
-    if (!status.ok()) {
-      std::cout << status.ToString() << "\n";
-      return 1;
-    }
-
-    // Grab the first output (we only evaluated one graph node: "c")
-    // and convert the node to a scalar representation.
-
-    // (There are similar methods for vectors and matrices here:
-    // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/tensor.h)
-
-    // Print the results
-    std::cout << word_out << ": " << outputs[0].DebugString() << "\n"; // Tensor<type: float shape: [] values: 30>
-    std::cout << word_out << ": " << outputs[1].DebugString() << "\n"; // Tensor<type: float shape: [] values: 30>
-    state[0] = outputs[1];
-//    std::cout << output_c() << "\n"; // 30
-  }
-
-  // Free any resources used by the session
-  session->Close();
-  // */
-  return 0;
-}

From 33964139f4ee24bec57f9d5ede1896909ac20bf0 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Mon, 12 Jun 2017 20:06:40 -0400
Subject: [PATCH 15/30] starting to debug the tensorflow code

---
 egs/ami/s5/local/tensorflow/run.sh      | 53 +++++++++++++++++++------
 egs/ami/s5/path.sh                      |  1 +
 egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh | 39 +++++++++++++-----
 src/tensorflow/Makefile                 |  6 ++-
 src/tensorflow/tensorflow-rnnlm-lib.cc  | 38 +++++++++++-------
 src/tensorflow/tensorflow-rnnlm-lib.h   |  3 ++
 src/tfbin/Makefile                      |  7 ++--
 tools/config/common_path.sh             |  1 +
 8 files changed, 107 insertions(+), 41 deletions(-)

diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index 0e576796338..427f74f8dcf 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -1,23 +1,50 @@
 #!/bin/bash
-
-data_type=sdm1
+mic=ihm
+ngram_order=4
 model_type=small
+dir=$PWD/data/tensorflow
+stage=3
 
-dir=data/tensorflow/
-mkdir -p $dir
-
-#echo "<s>" > $dir/wordlist
-#echo "</s>" >> $dir/wordlist
+. ./utils/parse_options.sh
+. ./cmd.sh
+. ./path.sh
 
-# num-words is 10000 - 3 (bos, eos and <oos>)
+set -e
 
-cat data/$data_type/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9997 | awk '{print $2}' > $dir/wordlist
+mkdir -p $dir
 
-for i in train dev eval; do
-  cat data/$data_type/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= <s> =g" | sed "s=$= </s>=" > $dir/$i.txt
-done
+if [ $stage -le 1 ]; then
+# num-words is 10000 - 3 (bos, eos and <oos>)
+  cat data/$mic/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9997 | awk '{print $2}' > $dir/wordlist
 
+  for i in train dev eval; do
+    cat data/$mic/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= <s> =g" | sed "s=$= </s>=" > $dir/$i.txt
+  done
+fi
 
-python local/tensorflow/rnnlm.py --data_path=$dir --model=small --save_path=$dir/model.small --wordlist_save_path=$dir/wordlist.rnn
+if [ $stage -le 2 ]; then
+  python local/tensorflow/rnnlm.py --data_path=$dir --model=small --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn
 #python local/tensorflow/rnnlm.py --data_path=$dir --model=medium --save_path=$dir/model.medium
 #python local/tensorflow/rnnlm.py --data_path=$dir --model=large --save_path=$dir/model.large
+fi
+
+touch $dir/unk.probs
+
+final_lm=ami_fsh.o3g.kn
+LM=$final_lm.pr1-7
+
+if [ $stage -le 3 ]; then
+  for decode_set in dev eval; do
+    basedir=exp/$mic/nnet3/tdnn_sp/
+    decode_dir=${basedir}/decode_${decode_set}
+
+    # Lattice rescoring
+    steps/lmrescore_rnnlm_lat.sh \
+      --cmd "$decode_cmd --mem 16G" \
+      --rnnlm-ver tensorflow  --weight 0.5 --max-ngram-order $ngram_order \
+      data/lang_$LM $dir \
+      data/$mic/${decode_set}_hires ${decode_dir} \
+      ${decode_dir}.tfrnnlm.lat.${ngram_order}gram
+
+  done
+fi
diff --git a/egs/ami/s5/path.sh b/egs/ami/s5/path.sh
index ad2c93b309b..4f627ff81ff 100644
--- a/egs/ami/s5/path.sh
+++ b/egs/ami/s5/path.sh
@@ -4,6 +4,7 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
 [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
 . $KALDI_ROOT/tools/config/common_path.sh
 export LC_ALL=C
+export LD_LIBRARY_PATH=$KALDI_ROOT/tools/tensorflow/bazel-bin/tensorflow/
 
 LMBIN=$KALDI_ROOT/tools/irstlm/bin
 SRILM=$KALDI_ROOT/tools/srilm/bin/i686-m64
diff --git a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
index 75b08bc4779..c3070183619 100755
--- a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
+++ b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 
 # Copyright 2015  Guoguo Chen
+#           2017  Hainan Xu
 # Apache 2.0
 
 # This script rescores lattices with RNNLM.  See also rnnlmrescore.sh which is
@@ -14,6 +15,8 @@ N=10
 inv_acwt=12
 weight=1.0  # Interpolation weight for RNNLM.
 # End configuration section.
+rnnlm_ver=
+#layer_string=
 
 echo "$0 $@"  # Print the command line for logging
 
@@ -39,6 +42,25 @@ data=$3
 indir=$4
 outdir=$5
 
+rescoring_binary=lattice-lmrescore-rnnlm
+
+first_arg=ark:$rnnlm_dir/unk.probs # this is for mikolov's rnnlm
+extra_arg=
+
+if [ "$rnnlm_ver" == "cuedrnnlm" ]; then
+  layer_string=`cat $rnnlm_dir/layer_string | sed "s=:= =g"`
+  total_size=`wc -l $rnnlm_dir/unigram.counts | awk '{print $1}'`
+  rescoring_binary="lattice-lmrescore-cuedrnnlm"
+  cat $rnnlm_dir/rnnlm.input.wlist.index | tail -n +2 | awk '{print $1-1,$2}' > $rnnlm_dir/rnn.wlist
+  extra_arg="--full-voc-size=$total_size --layer-sizes=\"$layer_string\""
+  first_arg=$rnnlm_dir/rnn.wlist
+fi
+
+if [ "$rnnlm_ver" == "tensorflow" ]; then
+  rescoring_binary="lattice-lmrescore-tf-rnnlm"
+  first_arg=$rnnlm_dir/wordlist.rnn
+fi
+
 oldlm=$oldlang/G.fst
 if [ -f $oldlang/G.carpa ]; then
   oldlm=$oldlang/G.carpa
@@ -48,7 +70,7 @@ elif [ ! -f $oldlm ]; then
 fi
 
 [ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1;
-[ ! -f $rnnlm_dir/rnnlm ] && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1;
+[ ! -f $rnnlm_dir/rnnlm ] && [ ! -d $rnnlm_dir/rnnlm ] && [ !  && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1;
 [ ! -f $rnnlm_dir/unk.probs ] &&\
   echo "$0: Missing file $rnnlm_dir/unk.probs" && exit 1;
 [ ! -f $oldlang/words.txt ] &&\
@@ -72,20 +94,19 @@ if [ "$oldlm" == "$oldlang/G.fst" ]; then
   $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
     lattice-lmrescore --lm-scale=$oldlm_weight \
     "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:-  \| \
-    lattice-lmrescore-rnnlm --lm-scale=$weight \
-    --max-ngram-order=$max_ngram_order ark:$rnnlm_dir/unk.probs \
-    $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \
+    $rescoring_binary $extra_arg --lm-scale=$weight \
+    --max-ngram-order=$max_ngram_order \
+    $first_arg $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \
     "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
 else
   $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
     lattice-lmrescore-const-arpa --lm-scale=$oldlm_weight \
-    "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm" ark:-  \| \
-    lattice-lmrescore-rnnlm --lm-scale=$weight \
-    --max-ngram-order=$max_ngram_order ark:$rnnlm_dir/unk.probs \
-    $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \
+    "ark:gunzip -c $indir/lat.JOB.gz|" "$oldlm_command" ark:-  \| \
+    $rescoring_binary $extra_arg --lm-scale=$weight \
+    --max-ngram-order=$max_ngram_order \
+    $first_arg $oldlang/words.txt ark:- "$rnnlm_dir/rnnlm" \
     "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
 fi
-
 if ! $skip_scoring ; then
   err_msg="Not scoring because local/score.sh does not exist or not executable."
   [ ! -x local/score.sh ] && echo $err_msg && exit 1;
diff --git a/src/tensorflow/Makefile b/src/tensorflow/Makefile
index c9ae405d8f2..083d22becb0 100644
--- a/src/tensorflow/Makefile
+++ b/src/tensorflow/Makefile
@@ -1,6 +1,7 @@
 include ../kaldi.mk
 
-TENSORFLOW = ../../tools/tensorflow
+CURDIR = $(shell pwd)
+TENSORFLOW = $(CURDIR)/../../tools/tensorflow
 
 all:
 
@@ -14,7 +15,8 @@ TESTFILES =
 LIBNAME = kaldi-tensorflow-rnnlm
 
 ADDLIBS = ../lm/kaldi-lm.a ../util/kaldi-util.a ../thread/kaldi-thread.a \
-          ../matrix/kaldi-matrix.a ../base/kaldi-base.a ../../tools/tensorflow/bazel-bin/tensorflow/tensorflow_cc.so
+          ../matrix/kaldi-matrix.a ../base/kaldi-base.a \
+          $(CURDIR)/../../tools/tensorflow/bazel-bin/tensorflow/tensorflow_cc.so
 
 LDLIBS +=  -lz -ldl -fPIC -lrt
 LDLIBS += $(OTHERLIBS) -L$(TENSORFLOW)/bazel-bin/tensorflow -ltensorflow_cc
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.cc b/src/tensorflow/tensorflow-rnnlm-lib.cc
index 7121b66d31f..529cf260fff 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.cc
+++ b/src/tensorflow/tensorflow-rnnlm-lib.cc
@@ -27,7 +27,7 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
     const std::string &tf_model_path) {
 //  session_ = session;
   {
-    string graph_path = tf_model_path + "/meta";
+    string graph_path = tf_model_path + ".meta";
 
     Status status = tensorflow::NewSession(tensorflow::SessionOptions(), &session_);
     if (!status.ok()) {
@@ -80,38 +80,44 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
 
   fst_label_to_rnn_label_.resize(fst_word_symbols->NumSymbols(), -1);
 
+  num_total_words = fst_word_symbols->NumSymbols();
+
   { // input.
     ifstream ifile(rnn_wordlist.c_str());
     int id;
     string word;
-    int i = 0;
-    while (ifile >> id >> word) { // TODO(hxu) ugly fix for cued-rnnlm's bug
+    int i = -1;
+    while (ifile >> word >> id) { // TODO(hxu) ugly fix for cued-rnnlm's bug
                                   // will implement a better fix later
-      if (word == "[UNK]") {
-        word = "<unk>";
-      } else if (word == "<OOS>") {
-        continue;
-      }
+//      if (word == "<oos>") {
+//        continue;
+//      }
       i++;
-      assert(i == id + 1);
+      assert(i == id);
       rnn_label_to_word_.push_back(word);
 
       int fst_label = fst_word_symbols->Find(rnn_label_to_word_[i]);
-      KALDI_ASSERT(fst::SymbolTable::kNoSymbol != fst_label);
+      if (fst::SymbolTable::kNoSymbol == fst_label) {
+        if (i < 2) continue;
+
+        KALDI_ASSERT(word == "<oos>");
+        oos_ = i;
+        continue;
+      }
+      KALDI_ASSERT(fst_label >= 0);
       fst_label_to_rnn_label_[fst_label] = i;
     }
     bos_ = 1;
     eos_ = 0; // TODO(hxu)
   }
-  rnn_label_to_word_.push_back("<OOS>");
+//  rnn_label_to_word_.push_back("<OOS>");
+  num_rnn_words = rnn_label_to_word_.size();
   
   for (int i = 0; i < fst_label_to_rnn_label_.size(); i++) {
     if (fst_label_to_rnn_label_[i] == -1) {
       fst_label_to_rnn_label_[i] = rnn_label_to_word_.size() - 1;
     }
   }
-
-
 }
 
 BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
@@ -149,7 +155,11 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
 //                                          context_in, context_out);
   if (context_out != NULL)
     *context_out = outputs[1];
-  return outputs[0].scalar<float>()();
+  if (word != oos_) {
+    return outputs[0].scalar<float>()();
+  } else {
+    return outputs[0].scalar<float>()() / (num_total_words - num_rnn_words);
+  }
 }
 
 void KaldiTfRnnlmWrapper::GetInitialContext(Tensor *c) const {
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.h b/src/tensorflow/tensorflow-rnnlm-lib.h
index 3e0ec14f1a2..3171f36152a 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.h
+++ b/src/tensorflow/tensorflow-rnnlm-lib.h
@@ -61,11 +61,14 @@ class KaldiTfRnnlmWrapper {
   std::vector<std::string> rnn_label_to_word_;
   std::vector<std::string> fst_label_to_word_;
  private:
+  int32 num_total_words;
+  int32 num_rnn_words;
 
   Session* session_;  // ptf owned here
   std::vector<std::string> label_to_word_;
   int32 eos_;
   int32 bos_;
+  int32 oos_;
 
   KALDI_DISALLOW_COPY_AND_ASSIGN(KaldiTfRnnlmWrapper);
 };
diff --git a/src/tfbin/Makefile b/src/tfbin/Makefile
index 35def55cd7d..a105f9830e3 100644
--- a/src/tfbin/Makefile
+++ b/src/tfbin/Makefile
@@ -1,5 +1,6 @@
 
-TENSORFLOW = ../../tools/tensorflow
+CURDIR = $(shell pwd)
+TENSORFLOW = $(CURDIR)/../../tools/tensorflow
 
 all:
 
@@ -10,7 +11,7 @@ BINFILES = lattice-lmrescore-tf-rnnlm
 
 OBJFILES =
 
-
+CURDIR = $(shell pwd)
 
 TESTFILES =
 
@@ -18,7 +19,7 @@ ADDLIBS = ../lat/kaldi-lat.a ../lm/kaldi-lm.a ../fstext/kaldi-fstext.a \
           ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \
           ../thread/kaldi-thread.a ../matrix/kaldi-matrix.a \
           ../base/kaldi-base.a ../tensorflow/kaldi-tensorflow-rnnlm.a \
-          ../../tools/tensorflow/bazel-bin/tensorflow/tensorflow_cc.so
+          $(CURDIR)/../../tools/tensorflow/bazel-bin/tensorflow/tensorflow_cc.so
 
 LDLIBS +=  -lz -ldl -fPIC -lrt
 LDLIBS += $(OTHERLIBS) -L$(TENSORFLOW)/bazel-bin/tensorflow -ltensorflow_cc
diff --git a/tools/config/common_path.sh b/tools/config/common_path.sh
index fbc4b674474..49882a06ed4 100644
--- a/tools/config/common_path.sh
+++ b/tools/config/common_path.sh
@@ -20,4 +20,5 @@ ${KALDI_ROOT}/src/online2bin:\
 ${KALDI_ROOT}/src/onlinebin:\
 ${KALDI_ROOT}/src/sgmm2bin:\
 ${KALDI_ROOT}/src/sgmmbin:\
+${KALDI_ROOT}/src/tfbin:\
 $PATH

From c07615947625c37dbf83f26f3c4f642c77b81deb Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Tue, 13 Jun 2017 19:31:32 -0400
Subject: [PATCH 16/30] add more text processing

---
 egs/ami/s5/local/tensorflow/reader.py      | 11 ++--
 egs/ami/s5/local/tensorflow/rnnlm.py       | 18 +++---
 egs/ami/s5/local/tensorflow/run.sh         | 19 +++----
 egs/ami/s5/local/tensorflow/train_rnnlm.sh | 64 ++++++++++++++++++++++
 egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh    |  4 +-
 src/tensorflow/tensorflow-rnnlm-lib.cc     | 48 ++++++++--------
 src/tensorflow/tensorflow-rnnlm-lib.h      |  7 ++-
 7 files changed, 114 insertions(+), 57 deletions(-)
 create mode 100755 egs/ami/s5/local/tensorflow/train_rnnlm.sh

diff --git a/egs/ami/s5/local/tensorflow/reader.py b/egs/ami/s5/local/tensorflow/reader.py
index 6e02fcc8be4..964a7b5e949 100644
--- a/egs/ami/s5/local/tensorflow/reader.py
+++ b/egs/ami/s5/local/tensorflow/reader.py
@@ -68,16 +68,17 @@ def ptb_raw_data(data_path=None):
     where each of the data objects can be passed to PTBIterator.
   """
 
-  train_path = os.path.join(data_path, "train.txt")
-  valid_path = os.path.join(data_path, "dev.txt")
-  test_path = os.path.join(data_path, "eval.txt")
+  train_path = os.path.join(data_path, "train")
+  valid_path = os.path.join(data_path, "valid")
+#  test_path = os.path.join(data_path, "eval.txt")
 
   word_to_id = _build_vocab(train_path)
   train_data = _file_to_word_ids(train_path, word_to_id)
   valid_data = _file_to_word_ids(valid_path, word_to_id)
-  test_data = _file_to_word_ids(test_path, word_to_id)
+#  test_data = _file_to_word_ids(test_path, word_to_id)
   vocabulary = len(word_to_id)
-  return train_data, valid_data, test_data, vocabulary, word_to_id
+  return train_data, valid_data, vocabulary, word_to_id
+#  return train_data, valid_data, test_data, vocabulary, word_to_id
 
 
 def ptb_producer(raw_data, batch_size, num_steps, name=None):
diff --git a/egs/ami/s5/local/tensorflow/rnnlm.py b/egs/ami/s5/local/tensorflow/rnnlm.py
index 6707755c214..345d3a4b0bd 100644
--- a/egs/ami/s5/local/tensorflow/rnnlm.py
+++ b/egs/ami/s5/local/tensorflow/rnnlm.py
@@ -369,7 +369,8 @@ def main(_):
     raise ValueError("Must set --data_path to PTB data directory")
 
   raw_data = reader.ptb_raw_data(FLAGS.data_path)
-  train_data, valid_data, test_data, _, word_map = raw_data
+  train_data, valid_data, _, word_map = raw_data
+#  train_data, valid_data, test_data, _, word_map = raw_data
 
   with open(FLAGS.wordlist_save_path, "w") as wmap_file:
     count_pairs = sorted(word_map.items(), key=lambda x: (x[1], x[0]))
@@ -398,11 +399,11 @@ def main(_):
         mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
       tf.summary.scalar("Validation Loss", mvalid.cost)
 
-    with tf.name_scope("Test"):
-      test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
-      with tf.variable_scope("Model", reuse=True, initializer=initializer):
-        mtest = PTBModel(is_training=False, config=eval_config,
-                         input_=test_input)
+#    with tf.name_scope("Test"):
+#      test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
+#      with tf.variable_scope("Model", reuse=True, initializer=initializer):
+#        mtest = PTBModel(is_training=False, config=eval_config,
+#                         input_=test_input)
 
 #    saver = tf.train.Saver({"embedding": m.embedding})
     sv = tf.train.Supervisor(logdir=FLAGS.save_path)
@@ -419,8 +420,8 @@ def main(_):
         valid_perplexity = run_epoch(session, mvalid)
         print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
 
-      test_perplexity = run_epoch(session, mtest)
-      print("Test Perplexity: %.3f" % test_perplexity)
+#      test_perplexity = run_epoch(session, mtest)
+#      print("Test Perplexity: %.3f" % test_perplexity)
 
       if FLAGS.save_path:
 #        saver = tf.train.Saver()
@@ -429,6 +430,5 @@ def main(_):
         sv.saver.save(session, FLAGS.save_path)
 #        sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
 
-
 if __name__ == "__main__":
   tf.app.run()
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index 427f74f8dcf..d7ff361a1ab 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -3,7 +3,7 @@ mic=ihm
 ngram_order=4
 model_type=small
 dir=$PWD/data/tensorflow
-stage=3
+stage=1
 
 . ./utils/parse_options.sh
 . ./cmd.sh
@@ -14,27 +14,22 @@ set -e
 mkdir -p $dir
 
 if [ $stage -le 1 ]; then
-# num-words is 10000 - 3 (bos, eos and <oos>)
-  cat data/$mic/train/text | awk '{for(i=2;i<=NF;i++)print $i}' | sort | uniq -c | sort -k1nr | head -n 9997 | awk '{print $2}' > $dir/wordlist
-
-  for i in train dev eval; do
-    cat data/$mic/$i/text | awk -v w=$dir/wordlist 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=2;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= <s> =g" | sed "s=$= </s>=" > $dir/$i.txt
-  done
+  local/tensorflow/train_rnnlm.sh $dir
 fi
 
 if [ $stage -le 2 ]; then
-  python local/tensorflow/rnnlm.py --data_path=$dir --model=small --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn
+  python local/tensorflow/rnnlm.py --data_path=$dir --model=test --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn.final
+#  python local/tensorflow/rnnlm.py --data_path=$dir --model=small --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn.final
 #python local/tensorflow/rnnlm.py --data_path=$dir --model=medium --save_path=$dir/model.medium
 #python local/tensorflow/rnnlm.py --data_path=$dir --model=large --save_path=$dir/model.large
 fi
 
-touch $dir/unk.probs
-
 final_lm=ami_fsh.o3g.kn
 LM=$final_lm.pr1-7
 
 if [ $stage -le 3 ]; then
-  for decode_set in dev eval; do
+  for decode_set in dev; do
+#  for decode_set in dev eval; do
     basedir=exp/$mic/nnet3/tdnn_sp/
     decode_dir=${basedir}/decode_${decode_set}
 
@@ -44,7 +39,7 @@ if [ $stage -le 3 ]; then
       --rnnlm-ver tensorflow  --weight 0.5 --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
       data/$mic/${decode_set}_hires ${decode_dir} \
-      ${decode_dir}.tfrnnlm.lat.${ngram_order}gram
+      ${decode_dir}.tfrnnlm.lat.${ngram_order}gram &
 
   done
 fi
diff --git a/egs/ami/s5/local/tensorflow/train_rnnlm.sh b/egs/ami/s5/local/tensorflow/train_rnnlm.sh
new file mode 100755
index 00000000000..ed32ee4d153
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/train_rnnlm.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+#set -v
+set -e
+
+train_text=data/ihm/train/text
+nwords=9997
+
+. path.sh
+. cmd.sh
+
+. utils/parse_options.sh
+
+if [ $# != 1 ]; then
+   echo "Usage: $0 [options] <dest-dir>"
+   echo "For options, see top of script file"
+   exit 1;
+fi
+
+dir=$1
+srcdir=data/local/dict
+
+mkdir -p $dir
+
+cat $srcdir/lexicon.txt | awk '{print $1}' | grep -v -w '!SIL' > $dir/wordlist.all
+
+# Get training data with OOV words (w.r.t. our current vocab) replaced with <unk>.
+cat $train_text | awk -v w=$dir/wordlist.all \
+  'BEGIN{while((getline<w)>0) v[$1]=1;}
+  {for (i=2;i<=NF;i++) if ($i in v) printf $i" ";else printf "<unk> ";print ""}'|sed 's/ $//g' \
+  | perl -e ' use List::Util qw(shuffle); @A=<>; print join("", shuffle(@A)); ' \
+  | gzip -c > $dir/all.gz
+
+echo "Splitting data into train and validation sets."
+heldout_sent=10000
+gunzip -c $dir/all.gz | head -n $heldout_sent > $dir/valid.in # validation data
+gunzip -c $dir/all.gz | tail -n +$heldout_sent > $dir/train.in # training data
+
+
+cat $dir/train.in $dir/wordlist.all | grep -v '</s>' | grep -v '<s>' | \
+  awk '{ for(x=1;x<=NF;x++) count[$x]++; } END{for(w in count){print count[w], w;}}' | \
+  sort -nr > $dir/unigram.counts
+
+total_nwords=`wc -l $dir/unigram.counts | awk '{print $1}'`
+
+head -$nwords $dir/unigram.counts | awk '{print $2}' | tee $dir/wordlist.rnn | awk '{print NR-1, $1}' > $dir/wordlist.rnn.id
+
+tail -n +$nwords $dir/unigram.counts > $dir/unk_class.counts
+
+for type in train valid; do
+  mv $dir/$type.in $dir/$type
+done
+
+# Now randomize the order of the training data.
+cat $dir/train | awk -v rand_seed=$rand_seed 'BEGIN{srand(rand_seed);} {printf("%f\t%s\n", rand(), $0);}' | \
+ sort | cut -f 2 > $dir/foo
+mv $dir/foo $dir/train
+
+# OK we'll train the RNNLM on this data.
+
+touch $dir/unk.probs  # dummy file, not used for cued-rnnlm
+
+echo "data preparation finished"
+
diff --git a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
index c3070183619..ceac46b5eb9 100755
--- a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
+++ b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
@@ -58,7 +58,7 @@ fi
 
 if [ "$rnnlm_ver" == "tensorflow" ]; then
   rescoring_binary="lattice-lmrescore-tf-rnnlm"
-  first_arg=$rnnlm_dir/wordlist.rnn
+  first_arg=$rnnlm_dir/wordlist.rnn.final
 fi
 
 oldlm=$oldlang/G.fst
@@ -70,7 +70,7 @@ elif [ ! -f $oldlm ]; then
 fi
 
 [ ! -f $oldlm ] && echo "$0: Missing file $oldlm" && exit 1;
-[ ! -f $rnnlm_dir/rnnlm ] && [ ! -d $rnnlm_dir/rnnlm ] && [ !  && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1;
+[ ! -f $rnnlm_dir/rnnlm ] && [ ! -d $rnnlm_dir/rnnlm ] && echo "$0: Missing file $rnnlm_dir/rnnlm" && exit 1;
 [ ! -f $rnnlm_dir/unk.probs ] &&\
   echo "$0: Missing file $rnnlm_dir/unk.probs" && exit 1;
 [ ! -f $oldlang/words.txt ] &&\
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.cc b/src/tensorflow/tensorflow-rnnlm-lib.cc
index 529cf260fff..9fe46df339f 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.cc
+++ b/src/tensorflow/tensorflow-rnnlm-lib.cc
@@ -23,9 +23,8 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
     const std::string &rnn_wordlist,
     const std::string &word_symbol_table_rxfilename, // TODO(hxu) will do this later
     const std::string &unk_prob_rspecifier,
-//    Session* session) {
     const std::string &tf_model_path) {
-//  session_ = session;
+  // read the tf model
   {
     string graph_path = tf_model_path + ".meta";
 
@@ -58,8 +57,14 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
       KALDI_ERR << status.ToString();
     }
 
+    // get the initial context
+    std::vector<Tensor> state;
+    session_->Run(std::vector<std::pair<string, tensorflow::Tensor>>(), {"Train/Model/test_initial_state"}, {}, &state);
+    initial_context_ = state[0];
   }
 
+//  GetInitialContext(&initial_context_);
+
   fst::SymbolTable *fst_word_symbols = NULL;
   if (!(fst_word_symbols =
         fst::SymbolTable::ReadText(word_symbol_table_rxfilename))) {
@@ -82,25 +87,22 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
 
   num_total_words = fst_word_symbols->NumSymbols();
 
+  oos_ = -1;
   { // input.
     ifstream ifile(rnn_wordlist.c_str());
     int id;
     string word;
     int i = -1;
-    while (ifile >> word >> id) { // TODO(hxu) ugly fix for cued-rnnlm's bug
-                                  // will implement a better fix later
-//      if (word == "<oos>") {
-//        continue;
-//      }
+    while (ifile >> word >> id) {
       i++;
       assert(i == id);
       rnn_label_to_word_.push_back(word);
 
       int fst_label = fst_word_symbols->Find(rnn_label_to_word_[i]);
       if (fst::SymbolTable::kNoSymbol == fst_label) {
-        if (i < 2) continue;
+        if (i < 2) continue; // <s> and </s>
 
-        KALDI_ASSERT(word == "<oos>");
+        KALDI_ASSERT(word == "<oos>" && oos_ == -1);
         oos_ = i;
         continue;
       }
@@ -108,14 +110,15 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
       fst_label_to_rnn_label_[fst_label] = i;
     }
     bos_ = 1;
-    eos_ = 0; // TODO(hxu)
+    eos_ = 0; // TODO(hxu) need to think carefully about these..
   }
+  KALDI_ASSERT(oos_ != -1);
 //  rnn_label_to_word_.push_back("<OOS>");
   num_rnn_words = rnn_label_to_word_.size();
   
   for (int i = 0; i < fst_label_to_rnn_label_.size(); i++) {
     if (fst_label_to_rnn_label_[i] == -1) {
-      fst_label_to_rnn_label_[i] = rnn_label_to_word_.size() - 1;
+      fst_label_to_rnn_label_[i] = oos_;
     }
   }
 }
@@ -123,14 +126,8 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
 BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
     int32 word, const std::vector<int32> &wseq,
     const Tensor &context_in,
-    tensorflow::Tensor *context_out) {
-
-  std::vector<std::string> wseq_symbols(wseq.size());
-  for (int32 i = 0; i < wseq_symbols.size(); ++i) {
-    KALDI_ASSERT(wseq[i] < label_to_word_.size());
-    wseq_symbols[i] = label_to_word_[wseq[i]];
-  }
-
+    Tensor *context_out) {
+  KALDI_ASSERT(word >= 0);
   std::vector<std::pair<string, Tensor>> inputs;
 
   Tensor lastword(tensorflow::DT_INT32, {1, 1});
@@ -153,8 +150,10 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
 
 //  return rnnlm_.computeConditionalLogprob(label_to_word_[word], wseq_symbols,
 //                                          context_in, context_out);
-  if (context_out != NULL)
+  if (context_out != NULL) {
+    KALDI_ASSERT(outputs.size() == 2);
     *context_out = outputs[1];
+  }
   if (word != oos_) {
     return outputs[0].scalar<float>()();
   } else {
@@ -162,10 +161,8 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
   }
 }
 
-void KaldiTfRnnlmWrapper::GetInitialContext(Tensor *c) const {
-  std::vector<Tensor> state;
-  Status status = session_->Run(std::vector<std::pair<string, tensorflow::Tensor>>(), {"Train/Model/test_initial_state"}, {}, &state);
-  *c = state[0];
+const Tensor& KaldiTfRnnlmWrapper::GetInitialContext() const {
+  return initial_context_;
 }
 
 TfRnnlmDeterministicFst::TfRnnlmDeterministicFst(int32 max_ngram_order,
@@ -178,8 +175,7 @@ TfRnnlmDeterministicFst::TfRnnlmDeterministicFst(int32 max_ngram_order,
   std::vector<Label> bos;
 //  std::vector<float> bos_context(rnnlm->GetHiddenLayerSize(), 1.0);
 
-  Tensor initial_context;
-  rnnlm_->GetInitialContext(&initial_context);
+  const Tensor& initial_context = rnnlm_->GetInitialContext();
 
   state_to_wseq_.push_back(bos);
   state_to_context_.push_back(initial_context);
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.h b/src/tensorflow/tensorflow-rnnlm-lib.h
index 3171f36152a..dbcaf7b0c0d 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.h
+++ b/src/tensorflow/tensorflow-rnnlm-lib.h
@@ -46,12 +46,13 @@ class KaldiTfRnnlmWrapper {
 //                    Session* session);
 
   ~KaldiTfRnnlmWrapper() {
-    delete session_;
+    session_->Close();
   }
 
   int32 GetEos() const { return eos_; }
   int32 GetBos() const { return bos_; }
-  void GetInitialContext(Tensor* context) const;
+
+  const Tensor& GetInitialContext() const;
 
   BaseFloat GetLogProb(int32 word, const std::vector<int32> &wseq,
                        const Tensor &context_in,
@@ -61,11 +62,11 @@ class KaldiTfRnnlmWrapper {
   std::vector<std::string> rnn_label_to_word_;
   std::vector<std::string> fst_label_to_word_;
  private:
+  Tensor initial_context_;
   int32 num_total_words;
   int32 num_rnn_words;
 
   Session* session_;  // ptf owned here
-  std::vector<std::string> label_to_word_;
   int32 eos_;
   int32 bos_;
   int32 oos_;

From e4a0aeb79d7528a6a861e912c26611288b9e70a0 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainan.xv@gmail.com>
Date: Tue, 13 Jun 2017 20:39:00 -0400
Subject: [PATCH 17/30] more changes

---
 egs/ami/s5/local/tensorflow/run.sh         | 10 +++++-----
 egs/ami/s5/local/tensorflow/train_rnnlm.sh |  9 ++-------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index d7ff361a1ab..9b1c27b3097 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -2,7 +2,7 @@
 mic=ihm
 ngram_order=4
 model_type=small
-dir=$PWD/data/tensorflow
+dir=data/tensorflow
 stage=1
 
 . ./utils/parse_options.sh
@@ -18,9 +18,9 @@ if [ $stage -le 1 ]; then
 fi
 
 if [ $stage -le 2 ]; then
-  python local/tensorflow/rnnlm.py --data_path=$dir --model=test --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn.final
+#  python local/tensorflow/rnnlm.py --data_path=$dir --model=test --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn.final
 #  python local/tensorflow/rnnlm.py --data_path=$dir --model=small --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn.final
-#python local/tensorflow/rnnlm.py --data_path=$dir --model=medium --save_path=$dir/model.medium
+python local/tensorflow/rnnlm.py --data_path=$dir --model=medium --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn.final
 #python local/tensorflow/rnnlm.py --data_path=$dir --model=large --save_path=$dir/model.large
 fi
 
@@ -28,8 +28,8 @@ final_lm=ami_fsh.o3g.kn
 LM=$final_lm.pr1-7
 
 if [ $stage -le 3 ]; then
-  for decode_set in dev; do
-#  for decode_set in dev eval; do
+#  for decode_set in dev; do
+  for decode_set in dev eval; do
     basedir=exp/$mic/nnet3/tdnn_sp/
     decode_dir=${basedir}/decode_${decode_set}
 
diff --git a/egs/ami/s5/local/tensorflow/train_rnnlm.sh b/egs/ami/s5/local/tensorflow/train_rnnlm.sh
index ed32ee4d153..2668a1889b8 100755
--- a/egs/ami/s5/local/tensorflow/train_rnnlm.sh
+++ b/egs/ami/s5/local/tensorflow/train_rnnlm.sh
@@ -22,7 +22,7 @@ srcdir=data/local/dict
 
 mkdir -p $dir
 
-cat $srcdir/lexicon.txt | awk '{print $1}' | grep -v -w '!SIL' > $dir/wordlist.all
+cat $srcdir/lexicon.txt | awk '{print $1}' | sort -u | grep -v -w '!SIL' > $dir/wordlist.all
 
 # Get training data with OOV words (w.r.t. our current vocab) replaced with <unk>.
 cat $train_text | awk -v w=$dir/wordlist.all \
@@ -48,14 +48,9 @@ head -$nwords $dir/unigram.counts | awk '{print $2}' | tee $dir/wordlist.rnn | a
 tail -n +$nwords $dir/unigram.counts > $dir/unk_class.counts
 
 for type in train valid; do
-  mv $dir/$type.in $dir/$type
+  cat $dir/$type.in | awk -v w=$dir/wordlist.rnn 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=1;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= <s> =g" | sed "s=$= </s>=" > $dir/$type
 done
 
-# Now randomize the order of the training data.
-cat $dir/train | awk -v rand_seed=$rand_seed 'BEGIN{srand(rand_seed);} {printf("%f\t%s\n", rand(), $0);}' | \
- sort | cut -f 2 > $dir/foo
-mv $dir/foo $dir/train
-
 # OK we'll train the RNNLM on this data.
 
 touch $dir/unk.probs  # dummy file, not used for cued-rnnlm

From 37050ba46c377d649cefad13b067c6c1846b444b Mon Sep 17 00:00:00 2001
From: Hainan Xu <hxu@b02.clsp.jhu.edu>
Date: Thu, 15 Jun 2017 14:29:24 -0400
Subject: [PATCH 18/30] tf rnnlm rescoring working

---
 egs/ami/s5/cmd.sh                      |  1 +
 egs/ami/s5/local/tensorflow/rnnlm.py   | 31 +++---------------------
 egs/ami/s5/local/tensorflow/run.sh     | 25 ++++++++++++-------
 src/tensorflow/tensorflow-rnnlm-lib.cc | 33 ++++++++++++++++++--------
 tools/install_tensorflow.sh            |  2 +-
 5 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/egs/ami/s5/cmd.sh b/egs/ami/s5/cmd.sh
index dd7145dff60..4d0e0fe0f6b 100644
--- a/egs/ami/s5/cmd.sh
+++ b/egs/ami/s5/cmd.sh
@@ -12,6 +12,7 @@
 
 export train_cmd="queue.pl --mem 1G"
 export decode_cmd="queue.pl --mem 2G"
+export tensorflow_cmd="queue.pl -l hostname=b*"
 # the use of cuda_cmd is deprecated but it is sometimes still used in nnet1
 # scripts.
 export cuda_cmd="queue.pl --gpu 1 --mem 20G"
diff --git a/egs/ami/s5/local/tensorflow/rnnlm.py b/egs/ami/s5/local/tensorflow/rnnlm.py
index 345d3a4b0bd..7c9425b36f9 100644
--- a/egs/ami/s5/local/tensorflow/rnnlm.py
+++ b/egs/ami/s5/local/tensorflow/rnnlm.py
@@ -137,37 +137,27 @@ def attn_cell():
     # first implement the less efficient version
     test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")
     test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
-#    test_input_state_c = tf.placeholder(tf.float32, [1, size], name="test_state_c")
-#    test_input_state_h = tf.placeholder(tf.float32, [1, size], name="test_state_h")
+
     state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state")
+    # unpacking the input state context 
     l = tf.unstack(state_placeholder, axis=0)
     test_input_state = tuple(
                [tf.contrib.rnn.LSTMStateTuple(l[idx][0],l[idx][1])
                  for idx in range(config.num_layers)]
     )
 
-#    test_input_state = tf.contrib.rnn.LSTMStateTuple(test_input_state_c, test_input_state_h)
-
-#    print ("want to be", self._initial_state)
-#    print ("it actually is ", input_state)
     with tf.device("/cpu:0"):
       self.embedding = tf.get_variable(
           "embedding", [vocab_size, size], dtype=data_type())
 
-#      print("should be ", input_.input_data)
-#      print("is ", test_word)
-
       inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data)
       test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in)
-#      print("should be ", inputs)
-#      print("is ", test_inputs)
 
     # test time
     with tf.variable_scope("RNN"):
-#      tf.get_variable_scope().reuse_variables()
       (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state)
 
-    test_out_state = tf.reshape(tf.stack(axis=1, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out")
+    test_out_state = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out")
     softmax_w = tf.get_variable(
         "softmax_w", [size, vocab_size], dtype=data_type())
     softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
@@ -177,7 +167,6 @@ def attn_cell():
 
     p_word = test_softmaxed[0, test_word_out[0,0]]
     test_out = tf.identity(p_word, name="test_out")
-#    p_word = tf.float32(test_softmaxed[:, test_word_out], name="p_out")
 
     if is_training and config.keep_prob < 1:
       inputs = tf.nn.dropout(inputs, config.keep_prob)
@@ -370,7 +359,6 @@ def main(_):
 
   raw_data = reader.ptb_raw_data(FLAGS.data_path)
   train_data, valid_data, _, word_map = raw_data
-#  train_data, valid_data, test_data, _, word_map = raw_data
 
   with open(FLAGS.wordlist_save_path, "w") as wmap_file:
     count_pairs = sorted(word_map.items(), key=lambda x: (x[1], x[0]))
@@ -399,13 +387,6 @@ def main(_):
         mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
       tf.summary.scalar("Validation Loss", mvalid.cost)
 
-#    with tf.name_scope("Test"):
-#      test_input = PTBInput(config=eval_config, data=test_data, name="TestInput")
-#      with tf.variable_scope("Model", reuse=True, initializer=initializer):
-#        mtest = PTBModel(is_training=False, config=eval_config,
-#                         input_=test_input)
-
-#    saver = tf.train.Saver({"embedding": m.embedding})
     sv = tf.train.Supervisor(logdir=FLAGS.save_path)
     with sv.managed_session() as session:
       for i in range(config.max_max_epoch):
@@ -420,15 +401,9 @@ def main(_):
         valid_perplexity = run_epoch(session, mvalid)
         print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
 
-#      test_perplexity = run_epoch(session, mtest)
-#      print("Test Perplexity: %.3f" % test_perplexity)
-
       if FLAGS.save_path:
-#        saver = tf.train.Saver()
         print("Saving model to %s." % FLAGS.save_path)
-#        saver.save(session, FLAGS.save_path, global_step=sv.global_step)
         sv.saver.save(session, FLAGS.save_path)
-#        sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
 
 if __name__ == "__main__":
   tf.app.run()
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index 9b1c27b3097..eb9ce8aab14 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
 mic=ihm
-ngram_order=4
+ngram_order=3
 model_type=small
-dir=data/tensorflow
+dir=data/tensorflow/$model_type
 stage=1
 
 . ./utils/parse_options.sh
@@ -18,15 +18,21 @@ if [ $stage -le 1 ]; then
 fi
 
 if [ $stage -le 2 ]; then
-#  python local/tensorflow/rnnlm.py --data_path=$dir --model=test --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn.final
-#  python local/tensorflow/rnnlm.py --data_path=$dir --model=small --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn.final
-python local/tensorflow/rnnlm.py --data_path=$dir --model=medium --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn.final
-#python local/tensorflow/rnnlm.py --data_path=$dir --model=large --save_path=$dir/model.large
+  mkdir -p $dir/
+  python local/tensorflow/rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn.final
+fi
+
+has_oos=`grep "<oos>" $dir/wordlist.rnn.final | wc -l | awk '{print $1}'`
+if [ $has_oos == "0" ]; then
+  n=`wc -l $dir/wordlist.rnn.final | awk '{print $1}'`
+  echo n is $n
+  echo "<oos> $n" >> $dir/wordlist.rnn.final
 fi
 
 final_lm=ami_fsh.o3g.kn
 LM=$final_lm.pr1-7
 
+date
 if [ $stage -le 3 ]; then
 #  for decode_set in dev; do
   for decode_set in dev eval; do
@@ -35,11 +41,14 @@ if [ $stage -le 3 ]; then
 
     # Lattice rescoring
     steps/lmrescore_rnnlm_lat.sh \
-      --cmd "$decode_cmd --mem 16G" \
+      --cmd "$tensorflow_cmd --mem 16G" \
       --rnnlm-ver tensorflow  --weight 0.5 --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
       data/$mic/${decode_set}_hires ${decode_dir} \
-      ${decode_dir}.tfrnnlm.lat.${ngram_order}gram &
+      ${decode_dir}.tfrnnlm.lat.${ngram_order}gram  &
 
   done
 fi
+
+wait
+date
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.cc b/src/tensorflow/tensorflow-rnnlm-lib.cc
index 9fe46df339f..47f9ccc69e8 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.cc
+++ b/src/tensorflow/tensorflow-rnnlm-lib.cc
@@ -21,7 +21,7 @@ using std::ifstream;
 KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
     const KaldiTfRnnlmWrapperOpts &opts,
     const std::string &rnn_wordlist,
-    const std::string &word_symbol_table_rxfilename, // TODO(hxu) will do this later
+    const std::string &word_symbol_table_rxfilename,
     const std::string &unk_prob_rspecifier,
     const std::string &tf_model_path) {
   // read the tf model
@@ -96,9 +96,9 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
     while (ifile >> word >> id) {
       i++;
       assert(i == id);
-      rnn_label_to_word_.push_back(word);
+      rnn_label_to_word_.push_back(word); // vector[i] = word
 
-      int fst_label = fst_word_symbols->Find(rnn_label_to_word_[i]);
+      int fst_label = fst_word_symbols->Find(word);
       if (fst::SymbolTable::kNoSymbol == fst_label) {
         if (i < 2) continue; // <s> and </s>
 
@@ -112,10 +112,15 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
     bos_ = 1;
     eos_ = 0; // TODO(hxu) need to think carefully about these..
   }
-  KALDI_ASSERT(oos_ != -1);
+  if (fst_label_to_word_.size() > rnn_label_to_word_.size()) {
+    KALDI_ASSERT(oos_ != -1);
+  }
 //  rnn_label_to_word_.push_back("<OOS>");
   num_rnn_words = rnn_label_to_word_.size();
   
+  if (oos_ == -1) {
+    return;
+  }
   for (int i = 0; i < fst_label_to_rnn_label_.size(); i++) {
     if (fst_label_to_rnn_label_[i] == -1) {
       fst_label_to_rnn_label_[i] = oos_;
@@ -127,7 +132,7 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
     int32 word, const std::vector<int32> &wseq,
     const Tensor &context_in,
     Tensor *context_out) {
-  KALDI_ASSERT(word >= 0);
+
   std::vector<std::pair<string, Tensor>> inputs;
 
   Tensor lastword(tensorflow::DT_INT32, {1, 1});
@@ -148,17 +153,24 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
   // Run the session, evaluating our "c" operation from the graph
   Status status = session_->Run(inputs, {"Train/Model/test_out", "Train/Model/test_state_out"}, {}, &outputs);
 
-//  return rnnlm_.computeConditionalLogprob(label_to_word_[word], wseq_symbols,
-//                                          context_in, context_out);
   if (context_out != NULL) {
-    KALDI_ASSERT(outputs.size() == 2);
     *context_out = outputs[1];
   }
+
+  float ans;
   if (word != oos_) {
-    return outputs[0].scalar<float>()();
+    ans = log(outputs[0].scalar<float>()());
   } else {
-    return outputs[0].scalar<float>()() / (num_total_words - num_rnn_words);
+    ans = log(outputs[0].scalar<float>()() / (num_total_words - num_rnn_words));
   }
+//  std::ostringstream his_str;
+//  for (int i = 0; i < wseq.size(); i++) {
+//    his_str << rnn_label_to_word_[wseq[i]] << "(" << wseq[i] << ") ";
+//  }
+
+//  KALDI_LOG << "Computing logprob of word " << rnn_label_to_word_[word] << "(" << word << ")"
+//            << " given history " << his_str.str() << " is " << exp(ans);
+  return ans;
 }
 
 const Tensor& KaldiTfRnnlmWrapper::GetInitialContext() const {
@@ -194,6 +206,7 @@ fst::StdArc::Weight TfRnnlmDeterministicFst::Final(StateId s) {
 }
 
 bool TfRnnlmDeterministicFst::GetArc(StateId s, Label ilabel, fst::StdArc *oarc) {
+//  std::cout << "computing label " << ilabel << " ";
   // At this point, we should have created the state.
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
diff --git a/tools/install_tensorflow.sh b/tools/install_tensorflow.sh
index 7593d486fa4..28d97e65567 100755
--- a/tools/install_tensorflow.sh
+++ b/tools/install_tensorflow.sh
@@ -20,5 +20,5 @@ cd tensorflow
 ./configure
 
 tensorflow/contrib/makefile/download_dependencies.sh 
-bazel build //tensorflow:libtensorflow.so
+bazel build --copt=-msse4.2 //tensorflow:libtensorflow.so
 #bazel build //tensorflow:libtensorflow_cc.so

From f435a3a7f5a8cc90948611b0d80a1a0e79767ec8 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hxu@b02.clsp.jhu.edu>
Date: Thu, 15 Jun 2017 20:01:13 -0400
Subject: [PATCH 19/30] draft of a much more efficient way of rescoring

---
 egs/ami/s5/local/tensorflow/rnnlm.py   |  22 ++++-
 egs/ami/s5/local/tensorflow/run.sh     |   7 +-
 src/tensorflow/tensorflow-rnnlm-lib.cc | 107 +++++++++++++++++++------
 src/tensorflow/tensorflow-rnnlm-lib.h  |  17 +++-
 4 files changed, 119 insertions(+), 34 deletions(-)

diff --git a/egs/ami/s5/local/tensorflow/rnnlm.py b/egs/ami/s5/local/tensorflow/rnnlm.py
index 7c9425b36f9..9e7be82a962 100644
--- a/egs/ami/s5/local/tensorflow/rnnlm.py
+++ b/egs/ami/s5/local/tensorflow/rnnlm.py
@@ -136,9 +136,8 @@ def attn_cell():
 
     # first implement the less efficient version
     test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")
-    test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
 
-    state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state")
+    state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state_in")
     # unpacking the input state context 
     l = tf.unstack(state_placeholder, axis=0)
     test_input_state = tuple(
@@ -157,12 +156,27 @@ def attn_cell():
     with tf.variable_scope("RNN"):
       (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state)
 
-    test_out_state = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out")
+    test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out")
+    test_cell_out = tf.reshape(test_cell_output, [1, size], name="test_cell_out")
+    # above is the first part of the graph for test
+    # test-word-in
+    #               > ---- > test-state-out
+    # test-state-in        > test-cell-out
+
+
+    # below is the 2nd part of the graph for test
+    # test-word-out
+    #               > prob(word | test-word-out)
+    # test-cell-in
+
+    test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
+    cellout_placeholder = tf.placeholder(tf.float32, [1, size], name="test_cell_in")
+
     softmax_w = tf.get_variable(
         "softmax_w", [size, vocab_size], dtype=data_type())
     softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
 
-    test_logits = tf.matmul(test_cell_output, softmax_w) + softmax_b
+    test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b
     test_softmaxed = tf.nn.softmax(test_logits)
 
     p_word = test_softmaxed[0, test_word_out[0,0]]
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index eb9ce8aab14..a5e3e3e3cfa 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -2,8 +2,9 @@
 mic=ihm
 ngram_order=3
 model_type=small
-dir=data/tensorflow/$model_type
+dir=data/new_tensorflow/$model_type
 stage=1
+weight=0.5
 
 . ./utils/parse_options.sh
 . ./cmd.sh
@@ -42,10 +43,10 @@ if [ $stage -le 3 ]; then
     # Lattice rescoring
     steps/lmrescore_rnnlm_lat.sh \
       --cmd "$tensorflow_cmd --mem 16G" \
-      --rnnlm-ver tensorflow  --weight 0.5 --max-ngram-order $ngram_order \
+      --rnnlm-ver tensorflow  --weight $weight --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
       data/$mic/${decode_set}_hires ${decode_dir} \
-      ${decode_dir}.tfrnnlm.lat.${ngram_order}gram  &
+      ${decode_dir}.new.tfrnnlm.lat.${ngram_order}gram.$weight  &
 
   done
 fi
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.cc b/src/tensorflow/tensorflow-rnnlm-lib.cc
index 47f9ccc69e8..b82a3476c52 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.cc
+++ b/src/tensorflow/tensorflow-rnnlm-lib.cc
@@ -56,11 +56,6 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
     if (!status.ok()) {
       KALDI_ERR << status.ToString();
     }
-
-    // get the initial context
-    std::vector<Tensor> state;
-    session_->Run(std::vector<std::pair<string, tensorflow::Tensor>>(), {"Train/Model/test_initial_state"}, {}, &state);
-    initial_context_ = state[0];
   }
 
 //  GetInitialContext(&initial_context_);
@@ -126,35 +121,87 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
       fst_label_to_rnn_label_[i] = oos_;
     }
   }
+
+  {
+    Status status;
+    // get the initial context
+    {
+      std::vector<Tensor> state;
+      status = session_->Run(std::vector<std::pair<string, tensorflow::Tensor>>(), {"Train/Model/test_initial_state"}, {}, &state);
+      if (!status.ok()) {
+        KALDI_ERR << status.ToString();
+      }
+      initial_context_ = state[0];
+    }
+
+    {
+      std::vector<Tensor> state;
+      Tensor bosword(tensorflow::DT_INT32, {1, 1});
+      bosword.scalar<int32>()() = bos_;
+
+      std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
+        {"Train/Model/test_word_in", bosword},
+        {"Train/Model/test_state_in", initial_context_},
+      };
+
+      status = session_->Run(inputs, {"Train/Model/test_cell_out"}, {}, &state);
+      if (!status.ok()) {
+        KALDI_ERR << status.ToString();
+      }
+      initial_cell_ = state[0];
+    }
+  }
 }
 
 BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
-    int32 word, const std::vector<int32> &wseq,
+    int32 word,
+//    const std::vector<int32> &wseq,
     const Tensor &context_in,
-    Tensor *context_out) {
+    const Tensor &cell_in,
+    Tensor *context_out,
+    Tensor *new_cell) {
 
   std::vector<std::pair<string, Tensor>> inputs;
 
-  Tensor lastword(tensorflow::DT_INT32, {1, 1});
   Tensor thisword(tensorflow::DT_INT32, {1, 1});
 
-  lastword.scalar<int32>()() = (wseq.size() == 0? bos_: wseq.back());
   thisword.scalar<int32>()() = word;
-
-  inputs = {
-    {"Train/Model/test_word_in", lastword},
-    {"Train/Model/test_word_out", thisword},
-    {"Train/Model/test_state", context_in},
-  };
-
-  // The session will initialize the outputs
   std::vector<tensorflow::Tensor> outputs;
 
-  // Run the session, evaluating our "c" operation from the graph
-  Status status = session_->Run(inputs, {"Train/Model/test_out", "Train/Model/test_state_out"}, {}, &outputs);
-
   if (context_out != NULL) {
+    inputs = {
+      {"Train/Model/test_word_in", thisword},
+      {"Train/Model/test_word_out", thisword},
+      {"Train/Model/test_state_in", context_in},
+      {"Train/Model/test_cell_in", cell_in},
+//      {"Train/Model/test_cell_in", cell_in},
+    };
+
+    // The session will initialize the outputs
+
+    // Run the session, evaluating our "c" operation from the graph
+    Status status = session_->Run(inputs,
+        {"Train/Model/test_out",
+         "Train/Model/test_state_out",
+         "Train/Model/test_cell_out"}, {}, &outputs);
+    if (!status.ok()) {
+      KALDI_ERR << status.ToString();
+    }
+
     *context_out = outputs[1];
+    *new_cell = outputs[2];
+  } else {
+    inputs = {
+      {"Train/Model/test_word_out", thisword},
+      {"Train/Model/test_cell_in", cell_in},
+    };
+
+    // Run the session, evaluating our "c" operation from the graph
+    Status status = session_->Run(inputs,
+        {"Train/Model/test_out"}, {}, &outputs);
+    if (!status.ok()) {
+      KALDI_ERR << status.ToString();
+    }
   }
 
   float ans;
@@ -177,6 +224,10 @@ const Tensor& KaldiTfRnnlmWrapper::GetInitialContext() const {
   return initial_context_;
 }
 
+const Tensor& KaldiTfRnnlmWrapper::GetInitialCell() const {
+  return initial_cell_;
+}
+
 TfRnnlmDeterministicFst::TfRnnlmDeterministicFst(int32 max_ngram_order,
                                              KaldiTfRnnlmWrapper *rnnlm) {
   KALDI_ASSERT(rnnlm != NULL);
@@ -188,9 +239,11 @@ TfRnnlmDeterministicFst::TfRnnlmDeterministicFst(int32 max_ngram_order,
 //  std::vector<float> bos_context(rnnlm->GetHiddenLayerSize(), 1.0);
 
   const Tensor& initial_context = rnnlm_->GetInitialContext();
+  const Tensor& initial_cell = rnnlm_->GetInitialCell();
 
   state_to_wseq_.push_back(bos);
   state_to_context_.push_back(initial_context);
+  state_to_cell_.push_back(initial_cell);
   wseq_to_state_[bos] = 0;
   start_state_ = 0;
 }
@@ -200,8 +253,9 @@ fst::StdArc::Weight TfRnnlmDeterministicFst::Final(StateId s) {
   KALDI_ASSERT(static_cast<size_t>(s) < state_to_wseq_.size());
 
   std::vector<Label> wseq = state_to_wseq_[s];
-  BaseFloat logprob = rnnlm_->GetLogProb(rnnlm_->GetEos(), wseq,
-                                         state_to_context_[s], NULL);
+  BaseFloat logprob = rnnlm_->GetLogProb(rnnlm_->GetEos(), // wseq,
+                                         state_to_context_[s], state_to_cell_[s],
+                                         NULL, NULL);
   return Weight(-logprob);
 }
 
@@ -212,10 +266,14 @@ bool TfRnnlmDeterministicFst::GetArc(StateId s, Label ilabel, fst::StdArc *oarc)
 
   std::vector<Label> wseq = state_to_wseq_[s];
   tensorflow::Tensor new_context;
+  tensorflow::Tensor new_cell;
 
   int32 rnn_word = rnnlm_->fst_label_to_rnn_label_[ilabel];
-  BaseFloat logprob = rnnlm_->GetLogProb(rnn_word, wseq,
-                                         state_to_context_[s], &new_context);
+  BaseFloat logprob = rnnlm_->GetLogProb(rnn_word, // wseq,
+                                         state_to_context_[s],
+                                         state_to_cell_[s],
+                                         &new_context,
+                                         &new_cell);
 
   wseq.push_back(rnn_word);
   if (max_ngram_order_ > 0) {
@@ -238,6 +296,7 @@ bool TfRnnlmDeterministicFst::GetArc(StateId s, Label ilabel, fst::StdArc *oarc)
   if (result.second == true) {
     state_to_wseq_.push_back(wseq);
     state_to_context_.push_back(new_context);
+    state_to_cell_.push_back(new_cell);
   }
 
   // Creates the arc.
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.h b/src/tensorflow/tensorflow-rnnlm-lib.h
index dbcaf7b0c0d..5011b0b138a 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.h
+++ b/src/tensorflow/tensorflow-rnnlm-lib.h
@@ -53,16 +53,26 @@ class KaldiTfRnnlmWrapper {
   int32 GetBos() const { return bos_; }
 
   const Tensor& GetInitialContext() const;
-
-  BaseFloat GetLogProb(int32 word, const std::vector<int32> &wseq,
+  const Tensor& GetInitialCell() const;
+
+  // compute p(word | wseq) and return the log of that
+  // the computation used the input cell,
+  // which is the 2nd-to-last layer of the RNNLM associated with history wseq;
+  //
+  // and we generate (context_out, new_cell) by passing (context_in, word) into the nnet
+  BaseFloat GetLogProb(int32 word,
+///                       const std::vector<int32> &wseq,
                        const Tensor &context_in,
-                       Tensor *context_out);
+                       const Tensor &cell_in,
+                       Tensor *context_out,
+                       Tensor *new_cell);
 
   std::vector<int> fst_label_to_rnn_label_;
   std::vector<std::string> rnn_label_to_word_;
   std::vector<std::string> fst_label_to_word_;
  private:
   Tensor initial_context_;
+  Tensor initial_cell_;
   int32 num_total_words;
   int32 num_rnn_words;
 
@@ -104,6 +114,7 @@ class TfRnnlmDeterministicFst
   KaldiTfRnnlmWrapper *rnnlm_;
   int32 max_ngram_order_;
   std::vector<tensorflow::Tensor> state_to_context_;
+  std::vector<tensorflow::Tensor> state_to_cell_;
 };
 
 }  // namespace tf_rnnlm

From e4ed676b6bea1191f7cc7f66a790516385d223fe Mon Sep 17 00:00:00 2001
From: Hainan Xu <hxu@b02.clsp.jhu.edu>
Date: Mon, 19 Jun 2017 13:03:54 -0400
Subject: [PATCH 20/30] going to change the BOS/EOS implementations

---
 egs/ami/s5/local/tensorflow/lstm.py  | 392 +++++++++++++++++++++++++
 egs/ami/s5/local/tensorflow/rnnlm.py | 424 +--------------------------
 egs/ami/s5/local/tensorflow/run.sh   |   2 +-
 3 files changed, 394 insertions(+), 424 deletions(-)
 create mode 100644 egs/ami/s5/local/tensorflow/lstm.py
 mode change 100644 => 120000 egs/ami/s5/local/tensorflow/rnnlm.py

diff --git a/egs/ami/s5/local/tensorflow/lstm.py b/egs/ami/s5/local/tensorflow/lstm.py
new file mode 100644
index 00000000000..555ec5b7494
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/lstm.py
@@ -0,0 +1,392 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+
+sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")
+
+import inspect
+import time
+
+import numpy as np
+import tensorflow as tf
+
+import reader
+
+flags = tf.flags
+logging = tf.logging
+
+flags.DEFINE_string(
+    "model", "small",
+    "A type of model. Possible options are: small, medium, large.")
+flags.DEFINE_string("data_path", None,
+                    "Where the training/test data is stored.")
+flags.DEFINE_string("save_path", None,
+                    "Model output directory.")
+flags.DEFINE_string("wordlist_save_path", None,
+                    "wordmap output directory.")
+flags.DEFINE_bool("use_fp16", False,
+                  "Train using 16-bit floats instead of 32bit floats")
+
+FLAGS = flags.FLAGS
+
+
+def data_type():
+  return tf.float16 if FLAGS.use_fp16 else tf.float32
+
+
+class PTBInput(object):
+  """The input data."""
+
+  def __init__(self, config, data, name=None):
+    self.batch_size = batch_size = config.batch_size
+    self.num_steps = num_steps = config.num_steps
+    self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
+    self.input_data, self.targets = reader.ptb_producer(
+        data, batch_size, num_steps, name=name)
+
+
+class PTBModel(object):
+  """The PTB model."""
+
+  def __init__(self, is_training, config, input_):
+    self._input = input_
+
+    batch_size = input_.batch_size
+    num_steps = input_.num_steps
+    size = config.hidden_size
+    vocab_size = config.vocab_size
+
+    # Slightly better results can be obtained with forget gate biases
+    # initialized to 1 but the hyperparameters of the model would need to be
+    # different than reported in the paper.
+    def lstm_cell():
+      # With the latest TensorFlow source code (as of Mar 27, 2017),
+      # the BasicLSTMCell will need a reuse parameter which is unfortunately not
+      # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
+      # an argument check here:
+      if 'reuse' in inspect.getargspec(
+          tf.contrib.rnn.BasicLSTMCell.__init__).args:
+        return tf.contrib.rnn.BasicLSTMCell(
+            size, forget_bias=0.0, state_is_tuple=True,
+            reuse=tf.get_variable_scope().reuse)
+      else:
+        return tf.contrib.rnn.BasicLSTMCell(
+            size, forget_bias=0.0, state_is_tuple=True)
+    attn_cell = lstm_cell
+    if is_training and config.keep_prob < 1:
+      def attn_cell():
+        return tf.contrib.rnn.DropoutWrapper(
+            lstm_cell(), output_keep_prob=config.keep_prob)
+    self.cell = tf.contrib.rnn.MultiRNNCell(
+        [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)
+
+    self._initial_state = self.cell.zero_state(batch_size, data_type())
+    self._initial_state_single = self.cell.zero_state(1, data_type())
+
+    self.initial = tf.reshape(tf.stack(axis=0, values=self._initial_state_single), [config.num_layers, 2, 1, size], name="test_initial_state")
+
+
+    # first implement the less efficient version
+    test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")
+
+    state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state_in")
+    # unpacking the input state context 
+    l = tf.unstack(state_placeholder, axis=0)
+    test_input_state = tuple(
+               [tf.contrib.rnn.LSTMStateTuple(l[idx][0],l[idx][1])
+                 for idx in range(config.num_layers)]
+    )
+
+    with tf.device("/cpu:0"):
+      self.embedding = tf.get_variable(
+          "embedding", [vocab_size, size], dtype=data_type())
+
+      inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data)
+      test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in)
+
+    # test time
+    with tf.variable_scope("RNN"):
+      (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state)
+
+    test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out")
+    test_cell_out = tf.reshape(test_cell_output, [1, size], name="test_cell_out")
+    # above is the first part of the graph for test
+    # test-word-in
+    #               > ---- > test-state-out
+    # test-state-in        > test-cell-out
+
+
+    # below is the 2nd part of the graph for test
+    # test-word-out
+    #               > prob(word | test-word-out)
+    # test-cell-in
+
+    test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
+    cellout_placeholder = tf.placeholder(tf.float32, [1, size], name="test_cell_in")
+
+    softmax_w = tf.get_variable(
+        "softmax_w", [size, vocab_size], dtype=data_type())
+    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
+
+    test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b
+    test_softmaxed = tf.nn.softmax(test_logits)
+
+    p_word = test_softmaxed[0, test_word_out[0,0]]
+    test_out = tf.identity(p_word, name="test_out")
+
+    if is_training and config.keep_prob < 1:
+      inputs = tf.nn.dropout(inputs, config.keep_prob)
+
+    # Simplified version of models/tutorials/rnn/rnn.py's rnn().
+    # This builds an unrolled LSTM for tutorial purposes only.
+    # In general, use the rnn() or state_saving_rnn() from rnn.py.
+    #
+    # The alternative version of the code below is:
+    #
+    # inputs = tf.unstack(inputs, num=num_steps, axis=1)
+    # outputs, state = tf.contrib.rnn.static_rnn(
+    #     cell, inputs, initial_state=self._initial_state)
+    outputs = []
+    state = self._initial_state
+    with tf.variable_scope("RNN"):
+      for time_step in range(num_steps):
+        if time_step > -1: tf.get_variable_scope().reuse_variables()
+        (cell_output, state) = self.cell(inputs[:, time_step, :], state)
+        outputs.append(cell_output)
+
+    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
+    logits = tf.matmul(output, softmax_w) + softmax_b
+    loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
+        [logits],
+        [tf.reshape(input_.targets, [-1])],
+        [tf.ones([batch_size * num_steps], dtype=data_type())])
+    self._cost = cost = tf.reduce_sum(loss) / batch_size
+    self._final_state = state
+
+    if not is_training:
+      return
+
+    self._lr = tf.Variable(0.0, trainable=False)
+    tvars = tf.trainable_variables()
+    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
+                                      config.max_grad_norm)
+    optimizer = tf.train.GradientDescentOptimizer(self._lr)
+    self._train_op = optimizer.apply_gradients(
+        zip(grads, tvars),
+        global_step=tf.contrib.framework.get_or_create_global_step())
+
+    self._new_lr = tf.placeholder(
+        tf.float32, shape=[], name="new_learning_rate")
+    self._lr_update = tf.assign(self._lr, self._new_lr)
+
+  def assign_lr(self, session, lr_value):
+    session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
+
+  @property
+  def input(self):
+    return self._input
+
+  @property
+  def initial_state(self):
+    return self._initial_state
+
+  @property
+  def cost(self):
+    return self._cost
+
+  @property
+  def final_state(self):
+    return self._final_state
+
+  @property
+  def lr(self):
+    return self._lr
+
+  @property
+  def train_op(self):
+    return self._train_op
+
+
+class SmallConfig(object):
+  """Small config."""
+  init_scale = 0.1
+  learning_rate = 1.0
+  max_grad_norm = 5
+  num_layers = 2
+  num_steps = 20
+  hidden_size = 200
+  max_epoch = 4
+  max_max_epoch = 13
+  keep_prob = 1.0
+  lr_decay = 0.5
+  batch_size = 20
+  vocab_size = 10000
+
+
+class MediumConfig(object):
+  """Medium config."""
+  init_scale = 0.05
+  learning_rate = 1.0
+  max_grad_norm = 5
+  num_layers = 2
+  num_steps = 35
+  hidden_size = 650
+  max_epoch = 6
+  max_max_epoch = 39
+  keep_prob = 0.5
+  lr_decay = 0.8
+  batch_size = 20
+  vocab_size = 10000
+
+
+class LargeConfig(object):
+  """Large config."""
+  init_scale = 0.04
+  learning_rate = 1.0
+  max_grad_norm = 10
+  num_layers = 2
+  num_steps = 35
+  hidden_size = 1500
+  max_epoch = 14
+  max_max_epoch = 55
+  keep_prob = 0.35
+  lr_decay = 1 / 1.15
+  batch_size = 20
+  vocab_size = 10000
+
+
+class TestConfig(object):
+  """Tiny config, for testing."""
+  init_scale = 0.1
+  learning_rate = 1.0
+  max_grad_norm = 1
+  num_layers = 1
+  num_steps = 2
+  hidden_size = 2
+  max_epoch = 1
+  max_max_epoch = 1
+  keep_prob = 1.0
+  lr_decay = 0.5
+  batch_size = 20
+  vocab_size = 10000
+
+
+def run_epoch(session, model, eval_op=None, verbose=False):
+  """Runs the model on the given data."""
+  start_time = time.time()
+  costs = 0.0
+  iters = 0
+  state = session.run(model.initial_state)
+
+  fetches = {
+      "cost": model.cost,
+      "final_state": model.final_state,
+  }
+  if eval_op is not None:
+    fetches["eval_op"] = eval_op
+
+  for step in range(model.input.epoch_size):
+    feed_dict = {}
+    for i, (c, h) in enumerate(model.initial_state):
+      feed_dict[c] = state[i].c
+      feed_dict[h] = state[i].h
+
+    vals = session.run(fetches, feed_dict)
+    cost = vals["cost"]
+    state = vals["final_state"]
+
+    costs += cost
+    iters += model.input.num_steps
+
+    if verbose and step % (model.input.epoch_size // 10) == 10:
+      print("%.3f perplexity: %.3f speed: %.0f wps" %
+            (step * 1.0 / model.input.epoch_size, np.exp(costs / iters),
+             iters * model.input.batch_size / (time.time() - start_time)))
+
+  return np.exp(costs / iters)
+
+
+def get_config():
+  if FLAGS.model == "small":
+    return SmallConfig()
+  elif FLAGS.model == "medium":
+    return MediumConfig()
+  elif FLAGS.model == "large":
+    return LargeConfig()
+  elif FLAGS.model == "test":
+    return TestConfig()
+  else:
+    raise ValueError("Invalid model: %s", FLAGS.model)
+
+
+def main(_):
+  if not FLAGS.data_path:
+    raise ValueError("Must set --data_path to PTB data directory")
+
+  raw_data = reader.ptb_raw_data(FLAGS.data_path)
+  train_data, valid_data, _, word_map = raw_data
+
+  with open(FLAGS.wordlist_save_path, "w") as wmap_file:
+    count_pairs = sorted(word_map.items(), key=lambda x: (x[1], x[0]))
+    for k, v in count_pairs: 
+      wmap_file.write(str(k) + " " + str(v) + "\n")
+
+  config = get_config()
+  eval_config = get_config()
+  eval_config.batch_size = 1
+  eval_config.num_steps = 1
+
+  with tf.Graph().as_default():
+    initializer = tf.random_uniform_initializer(-config.init_scale,
+                                                config.init_scale)
+
+    with tf.name_scope("Train"):
+      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
+      with tf.variable_scope("Model", reuse=None, initializer=initializer):
+        m = PTBModel(is_training=True, config=config, input_=train_input)
+      tf.summary.scalar("Training Loss", m.cost)
+      tf.summary.scalar("Learning Rate", m.lr)
+
+    with tf.name_scope("Valid"):
+      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
+      with tf.variable_scope("Model", reuse=True, initializer=initializer):
+        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
+      tf.summary.scalar("Validation Loss", mvalid.cost)
+
+    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
+    with sv.managed_session() as session:
+      for i in range(config.max_max_epoch):
+        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
+        m.assign_lr(session, config.learning_rate * lr_decay)
+
+        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
+        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
+                                     verbose=True)
+
+        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
+        valid_perplexity = run_epoch(session, mvalid)
+        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
+
+      if FLAGS.save_path:
+        print("Saving model to %s." % FLAGS.save_path)
+        sv.saver.save(session, FLAGS.save_path)
+
+if __name__ == "__main__":
+  tf.app.run()
diff --git a/egs/ami/s5/local/tensorflow/rnnlm.py b/egs/ami/s5/local/tensorflow/rnnlm.py
deleted file mode 100644
index 9e7be82a962..00000000000
--- a/egs/ami/s5/local/tensorflow/rnnlm.py
+++ /dev/null
@@ -1,423 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Example / benchmark for building a PTB LSTM model.
-Trains the model described in:
-(Zaremba, et. al.) Recurrent Neural Network Regularization
-http://arxiv.org/abs/1409.2329
-There are 3 supported model configurations:
-===========================================
-| config | epochs | train | valid  | test
-===========================================
-| small  | 13     | 37.99 | 121.39 | 115.91
-| medium | 39     | 48.45 |  86.16 |  82.07
-| large  | 55     | 37.87 |  82.62 |  78.29
-The exact results may vary depending on the random initialization.
-The hyperparameters used in the model:
-- init_scale - the initial scale of the weights
-- learning_rate - the initial value of the learning rate
-- max_grad_norm - the maximum permissible norm of the gradient
-- num_layers - the number of LSTM layers
-- num_steps - the number of unrolled steps of LSTM
-- hidden_size - the number of LSTM units
-- max_epoch - the number of epochs trained with the initial learning rate
-- max_max_epoch - the total number of epochs for training
-- keep_prob - the probability of keeping weights in the dropout layer
-- lr_decay - the decay of the learning rate for each epoch after "max_epoch"
-- batch_size - the batch size
-The data required for this example is in the data/ dir of the
-PTB dataset from Tomas Mikolov's webpage:
-$ wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
-$ tar xvf simple-examples.tgz
-To run:
-$ python ptb_word_lm.py --data_path=simple-examples/data/
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-
-sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")
-
-import inspect
-import time
-
-import numpy as np
-import tensorflow as tf
-
-import reader
-
-flags = tf.flags
-logging = tf.logging
-
-flags.DEFINE_string(
-    "model", "small",
-    "A type of model. Possible options are: small, medium, large.")
-flags.DEFINE_string("data_path", None,
-                    "Where the training/test data is stored.")
-flags.DEFINE_string("save_path", None,
-                    "Model output directory.")
-flags.DEFINE_string("wordlist_save_path", None,
-                    "wordmap output directory.")
-flags.DEFINE_bool("use_fp16", False,
-                  "Train using 16-bit floats instead of 32bit floats")
-
-FLAGS = flags.FLAGS
-
-
-def data_type():
-  return tf.float16 if FLAGS.use_fp16 else tf.float32
-
-
-class PTBInput(object):
-  """The input data."""
-
-  def __init__(self, config, data, name=None):
-    self.batch_size = batch_size = config.batch_size
-    self.num_steps = num_steps = config.num_steps
-    self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
-    self.input_data, self.targets = reader.ptb_producer(
-        data, batch_size, num_steps, name=name)
-
-
-class PTBModel(object):
-  """The PTB model."""
-
-  def __init__(self, is_training, config, input_):
-    self._input = input_
-
-    batch_size = input_.batch_size
-    num_steps = input_.num_steps
-    size = config.hidden_size
-    vocab_size = config.vocab_size
-
-    # Slightly better results can be obtained with forget gate biases
-    # initialized to 1 but the hyperparameters of the model would need to be
-    # different than reported in the paper.
-    def lstm_cell():
-      # With the latest TensorFlow source code (as of Mar 27, 2017),
-      # the BasicLSTMCell will need a reuse parameter which is unfortunately not
-      # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
-      # an argument check here:
-      if 'reuse' in inspect.getargspec(
-          tf.contrib.rnn.BasicLSTMCell.__init__).args:
-        return tf.contrib.rnn.BasicLSTMCell(
-            size, forget_bias=0.0, state_is_tuple=True,
-            reuse=tf.get_variable_scope().reuse)
-      else:
-        return tf.contrib.rnn.BasicLSTMCell(
-            size, forget_bias=0.0, state_is_tuple=True)
-    attn_cell = lstm_cell
-    if is_training and config.keep_prob < 1:
-      def attn_cell():
-        return tf.contrib.rnn.DropoutWrapper(
-            lstm_cell(), output_keep_prob=config.keep_prob)
-    self.cell = tf.contrib.rnn.MultiRNNCell(
-        [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)
-
-    self._initial_state = self.cell.zero_state(batch_size, data_type())
-    self._initial_state_single = self.cell.zero_state(1, data_type())
-
-    self.initial = tf.reshape(tf.stack(axis=0, values=self._initial_state_single), [config.num_layers, 2, 1, size], name="test_initial_state")
-
-
-    # first implement the less efficient version
-    test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")
-
-    state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state_in")
-    # unpacking the input state context 
-    l = tf.unstack(state_placeholder, axis=0)
-    test_input_state = tuple(
-               [tf.contrib.rnn.LSTMStateTuple(l[idx][0],l[idx][1])
-                 for idx in range(config.num_layers)]
-    )
-
-    with tf.device("/cpu:0"):
-      self.embedding = tf.get_variable(
-          "embedding", [vocab_size, size], dtype=data_type())
-
-      inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data)
-      test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in)
-
-    # test time
-    with tf.variable_scope("RNN"):
-      (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state)
-
-    test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out")
-    test_cell_out = tf.reshape(test_cell_output, [1, size], name="test_cell_out")
-    # above is the first part of the graph for test
-    # test-word-in
-    #               > ---- > test-state-out
-    # test-state-in        > test-cell-out
-
-
-    # below is the 2nd part of the graph for test
-    # test-word-out
-    #               > prob(word | test-word-out)
-    # test-cell-in
-
-    test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
-    cellout_placeholder = tf.placeholder(tf.float32, [1, size], name="test_cell_in")
-
-    softmax_w = tf.get_variable(
-        "softmax_w", [size, vocab_size], dtype=data_type())
-    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
-
-    test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b
-    test_softmaxed = tf.nn.softmax(test_logits)
-
-    p_word = test_softmaxed[0, test_word_out[0,0]]
-    test_out = tf.identity(p_word, name="test_out")
-
-    if is_training and config.keep_prob < 1:
-      inputs = tf.nn.dropout(inputs, config.keep_prob)
-
-    # Simplified version of models/tutorials/rnn/rnn.py's rnn().
-    # This builds an unrolled LSTM for tutorial purposes only.
-    # In general, use the rnn() or state_saving_rnn() from rnn.py.
-    #
-    # The alternative version of the code below is:
-    #
-    # inputs = tf.unstack(inputs, num=num_steps, axis=1)
-    # outputs, state = tf.contrib.rnn.static_rnn(
-    #     cell, inputs, initial_state=self._initial_state)
-    outputs = []
-    state = self._initial_state
-    with tf.variable_scope("RNN"):
-      for time_step in range(num_steps):
-        if time_step > -1: tf.get_variable_scope().reuse_variables()
-        (cell_output, state) = self.cell(inputs[:, time_step, :], state)
-        outputs.append(cell_output)
-
-    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
-    logits = tf.matmul(output, softmax_w) + softmax_b
-    loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
-        [logits],
-        [tf.reshape(input_.targets, [-1])],
-        [tf.ones([batch_size * num_steps], dtype=data_type())])
-    self._cost = cost = tf.reduce_sum(loss) / batch_size
-    self._final_state = state
-
-    if not is_training:
-      return
-
-    self._lr = tf.Variable(0.0, trainable=False)
-    tvars = tf.trainable_variables()
-    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
-                                      config.max_grad_norm)
-    optimizer = tf.train.GradientDescentOptimizer(self._lr)
-    self._train_op = optimizer.apply_gradients(
-        zip(grads, tvars),
-        global_step=tf.contrib.framework.get_or_create_global_step())
-
-    self._new_lr = tf.placeholder(
-        tf.float32, shape=[], name="new_learning_rate")
-    self._lr_update = tf.assign(self._lr, self._new_lr)
-
-  def assign_lr(self, session, lr_value):
-    session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
-
-  @property
-  def input(self):
-    return self._input
-
-  @property
-  def initial_state(self):
-    return self._initial_state
-
-  @property
-  def cost(self):
-    return self._cost
-
-  @property
-  def final_state(self):
-    return self._final_state
-
-  @property
-  def lr(self):
-    return self._lr
-
-  @property
-  def train_op(self):
-    return self._train_op
-
-
-class SmallConfig(object):
-  """Small config."""
-  init_scale = 0.1
-  learning_rate = 1.0
-  max_grad_norm = 5
-  num_layers = 2
-  num_steps = 20
-  hidden_size = 200
-  max_epoch = 4
-  max_max_epoch = 13
-  keep_prob = 1.0
-  lr_decay = 0.5
-  batch_size = 20
-  vocab_size = 10000
-
-
-class MediumConfig(object):
-  """Medium config."""
-  init_scale = 0.05
-  learning_rate = 1.0
-  max_grad_norm = 5
-  num_layers = 2
-  num_steps = 35
-  hidden_size = 650
-  max_epoch = 6
-  max_max_epoch = 39
-  keep_prob = 0.5
-  lr_decay = 0.8
-  batch_size = 20
-  vocab_size = 10000
-
-
-class LargeConfig(object):
-  """Large config."""
-  init_scale = 0.04
-  learning_rate = 1.0
-  max_grad_norm = 10
-  num_layers = 2
-  num_steps = 35
-  hidden_size = 1500
-  max_epoch = 14
-  max_max_epoch = 55
-  keep_prob = 0.35
-  lr_decay = 1 / 1.15
-  batch_size = 20
-  vocab_size = 10000
-
-
-class TestConfig(object):
-  """Tiny config, for testing."""
-  init_scale = 0.1
-  learning_rate = 1.0
-  max_grad_norm = 1
-  num_layers = 1
-  num_steps = 2
-  hidden_size = 2
-  max_epoch = 1
-  max_max_epoch = 1
-  keep_prob = 1.0
-  lr_decay = 0.5
-  batch_size = 20
-  vocab_size = 10000
-
-
-def run_epoch(session, model, eval_op=None, verbose=False):
-  """Runs the model on the given data."""
-  start_time = time.time()
-  costs = 0.0
-  iters = 0
-  state = session.run(model.initial_state)
-
-  fetches = {
-      "cost": model.cost,
-      "final_state": model.final_state,
-  }
-  if eval_op is not None:
-    fetches["eval_op"] = eval_op
-
-  for step in range(model.input.epoch_size):
-    feed_dict = {}
-    for i, (c, h) in enumerate(model.initial_state):
-      feed_dict[c] = state[i].c
-      feed_dict[h] = state[i].h
-
-    vals = session.run(fetches, feed_dict)
-    cost = vals["cost"]
-    state = vals["final_state"]
-
-    costs += cost
-    iters += model.input.num_steps
-
-    if verbose and step % (model.input.epoch_size // 10) == 10:
-      print("%.3f perplexity: %.3f speed: %.0f wps" %
-            (step * 1.0 / model.input.epoch_size, np.exp(costs / iters),
-             iters * model.input.batch_size / (time.time() - start_time)))
-
-  return np.exp(costs / iters)
-
-
-def get_config():
-  if FLAGS.model == "small":
-    return SmallConfig()
-  elif FLAGS.model == "medium":
-    return MediumConfig()
-  elif FLAGS.model == "large":
-    return LargeConfig()
-  elif FLAGS.model == "test":
-    return TestConfig()
-  else:
-    raise ValueError("Invalid model: %s", FLAGS.model)
-
-
-def main(_):
-  if not FLAGS.data_path:
-    raise ValueError("Must set --data_path to PTB data directory")
-
-  raw_data = reader.ptb_raw_data(FLAGS.data_path)
-  train_data, valid_data, _, word_map = raw_data
-
-  with open(FLAGS.wordlist_save_path, "w") as wmap_file:
-    count_pairs = sorted(word_map.items(), key=lambda x: (x[1], x[0]))
-    for k, v in count_pairs: 
-      wmap_file.write(str(k) + " " + str(v) + "\n")
-
-  config = get_config()
-  eval_config = get_config()
-  eval_config.batch_size = 1
-  eval_config.num_steps = 1
-
-  with tf.Graph().as_default():
-    initializer = tf.random_uniform_initializer(-config.init_scale,
-                                                config.init_scale)
-
-    with tf.name_scope("Train"):
-      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
-      with tf.variable_scope("Model", reuse=None, initializer=initializer):
-        m = PTBModel(is_training=True, config=config, input_=train_input)
-      tf.summary.scalar("Training Loss", m.cost)
-      tf.summary.scalar("Learning Rate", m.lr)
-
-    with tf.name_scope("Valid"):
-      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
-      with tf.variable_scope("Model", reuse=True, initializer=initializer):
-        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
-      tf.summary.scalar("Validation Loss", mvalid.cost)
-
-    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
-    with sv.managed_session() as session:
-      for i in range(config.max_max_epoch):
-        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
-        m.assign_lr(session, config.learning_rate * lr_decay)
-
-        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
-        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
-                                     verbose=True)
-
-        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
-        valid_perplexity = run_epoch(session, mvalid)
-        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
-
-      if FLAGS.save_path:
-        print("Saving model to %s." % FLAGS.save_path)
-        sv.saver.save(session, FLAGS.save_path)
-
-if __name__ == "__main__":
-  tf.app.run()
diff --git a/egs/ami/s5/local/tensorflow/rnnlm.py b/egs/ami/s5/local/tensorflow/rnnlm.py
new file mode 120000
index 00000000000..86c615508a3
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/rnnlm.py
@@ -0,0 +1 @@
+lstm.py
\ No newline at end of file
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index a5e3e3e3cfa..ac016200524 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -2,7 +2,6 @@
 mic=ihm
 ngram_order=3
 model_type=small
-dir=data/new_tensorflow/$model_type
 stage=1
 weight=0.5
 
@@ -12,6 +11,7 @@ weight=0.5
 
 set -e
 
+dir=data/new_tensorflow/$model_type
 mkdir -p $dir
 
 if [ $stage -le 1 ]; then

From a33d4ef779de6d65affe44a4fcc601d3cf408829 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hxu@b02.clsp.jhu.edu>
Date: Mon, 19 Jun 2017 16:04:40 -0400
Subject: [PATCH 21/30] use only one sentence-boundary symbol

---
 .../{train_rnnlm.sh => prep_data.sh}          |   0
 egs/ami/s5/local/tensorflow/run_vannila.sh    |  46 +++
 egs/ami/s5/local/tensorflow/vanilla_rnnlm.py  | 380 ++++++++++++++++++
 3 files changed, 426 insertions(+)
 rename egs/ami/s5/local/tensorflow/{train_rnnlm.sh => prep_data.sh} (100%)
 create mode 100755 egs/ami/s5/local/tensorflow/run_vannila.sh
 create mode 100644 egs/ami/s5/local/tensorflow/vanilla_rnnlm.py

diff --git a/egs/ami/s5/local/tensorflow/train_rnnlm.sh b/egs/ami/s5/local/tensorflow/prep_data.sh
similarity index 100%
rename from egs/ami/s5/local/tensorflow/train_rnnlm.sh
rename to egs/ami/s5/local/tensorflow/prep_data.sh
diff --git a/egs/ami/s5/local/tensorflow/run_vannila.sh b/egs/ami/s5/local/tensorflow/run_vannila.sh
new file mode 100755
index 00000000000..bdef6f2c42d
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/run_vannila.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+mic=ihm
+ngram_order=3
+model_type=small
+stage=1
+weight=0.5
+
+. ./utils/parse_options.sh
+. ./cmd.sh
+. ./path.sh
+
+set -e
+
+dir=data/vannila_tensorflow/$model_type
+mkdir -p $dir
+
+if [ $stage -le 1 ]; then
+  local/tensorflow/prep_data.sh $dir
+fi
+
+if [ $stage -le 2 ]; then
+  mkdir -p $dir/
+  python local/tensorflow/vanilla_rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
+fi
+
+final_lm=ami_fsh.o3g.kn
+LM=$final_lm.pr1-7
+
+if [ $stage -le 3 ]; then
+#  for decode_set in dev; do
+  for decode_set in dev eval; do
+    basedir=exp/$mic/nnet3/tdnn_sp/
+    decode_dir=${basedir}/decode_${decode_set}
+
+    # Lattice rescoring
+    steps/lmrescore_rnnlm_lat.sh \
+      --cmd "$tensorflow_cmd --mem 16G" \
+      --rnnlm-ver tensorflow  --weight $weight --max-ngram-order $ngram_order \
+      data/lang_$LM $dir \
+      data/$mic/${decode_set}_hires ${decode_dir} \
+      ${decode_dir}.vanilla.tfrnnlm.lat.${ngram_order}gram.$weight  &
+
+  done
+fi
+
+wait
diff --git a/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py b/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py
new file mode 100644
index 00000000000..642d7b01172
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py
@@ -0,0 +1,380 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#           Modified by Hainan Xu to be used in Kaldi for lattice rescoring 2017
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+
+sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")
+
+import inspect
+import time
+
+import numpy as np
+import tensorflow as tf
+
+import reader
+
+flags = tf.flags
+logging = tf.logging
+
+flags.DEFINE_string(
+    "model", "small",
+    "A type of model. Possible options are: small, medium, large.")
+flags.DEFINE_string("data_path", None,
+                    "Where the training/test data is stored.")
+flags.DEFINE_string("vocab_path", None,
+                    "Where the wordlist file is stored.")
+flags.DEFINE_string("save_path", None,
+                    "Model output directory.")
+flags.DEFINE_bool("use_fp16", False,
+                  "Train using 16-bit floats instead of 32bit floats")
+
+FLAGS = flags.FLAGS
+
+
+def data_type():
+  return tf.float16 if FLAGS.use_fp16 else tf.float32
+
+
+class RNNLMInput(object):
+  """The input data."""
+
+  def __init__(self, config, data, name=None):
+    self.batch_size = batch_size = config.batch_size
+    self.num_steps = num_steps = config.num_steps
+    self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
+    self.input_data, self.targets = reader.rnnlm_producer(
+        data, batch_size, num_steps, name=name)
+
+class RNNLMModel(object):
+  """The RNNLM model."""
+
+  def __init__(self, is_training, config, input_):
+    self._input = input_
+
+    batch_size = input_.batch_size
+    num_steps = input_.num_steps
+    size = config.hidden_size
+    vocab_size = config.vocab_size
+
+    def rnn_cell():
+      # With the latest TensorFlow source code (as of Mar 27, 2017),
+      # the BasicLSTMCell will need a reuse parameter which is unfortunately not
+      # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
+      # an argument check here:
+      if 'reuse' in inspect.getargspec(
+          tf.contrib.rnn.BasicRNNCell.__init__).args:
+        return tf.contrib.rnn.BasicRNNCell(size,
+                                           reuse=tf.get_variable_scope().reuse)
+      else:
+        return tf.contrib.rnn.BasicRNNCell(size)
+    attn_cell = rnn_cell
+
+    if is_training and config.keep_prob < 1:
+      def attn_cell():
+        return tf.contrib.rnn.DropoutWrapper(
+            rnn_cell(), output_keep_prob=config.keep_prob)
+
+    self.cell = tf.contrib.rnn.MultiRNNCell(
+        [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)
+
+    self._initial_state = self.cell.zero_state(batch_size, data_type())
+    self._initial_state_single = self.cell.zero_state(1, data_type())
+
+    self.initial = tf.reshape(tf.stack(axis=0, values=self._initial_state_single), [config.num_layers, 1, size], name="test_initial_state")
+
+    # first implement the less efficient version
+    test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")
+
+    state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 1, size], name="test_state_in")
+    # unpacking the input state context 
+    l = tf.unstack(state_placeholder, axis=0)
+    test_input_state = tuple(
+               [l[idx] for idx in range(config.num_layers)]
+    )
+
+    with tf.device("/cpu:0"):
+      self.embedding = tf.get_variable(
+          "embedding", [vocab_size, size], dtype=data_type())
+
+      inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data)
+      test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in)
+
+    # test time
+    with tf.variable_scope("RNN"):
+      (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state)
+
+    test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 1, size], name="test_state_out")
+    test_cell_out = tf.reshape(test_cell_output, [1, size], name="test_cell_out")
+    # above is the first part of the graph for test
+    # test-word-in
+    #               > ---- > test-state-out
+    # test-state-in        > test-cell-out
+
+
+    # below is the 2nd part of the graph for test
+    # test-word-out
+    #               > prob(word | test-word-out)
+    # test-cell-in
+
+    test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
+    cellout_placeholder = tf.placeholder(tf.float32, [1, size], name="test_cell_in")
+
+    softmax_w = tf.get_variable(
+        "softmax_w", [size, vocab_size], dtype=data_type())
+    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
+
+    test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b
+    test_softmaxed = tf.nn.softmax(test_logits)
+
+    p_word = test_softmaxed[0, test_word_out[0,0]]
+    test_out = tf.identity(p_word, name="test_out")
+
+    if is_training and config.keep_prob < 1:
+      inputs = tf.nn.dropout(inputs, config.keep_prob)
+
+    # Simplified version of models/tutorials/rnn/rnn.py's rnn().
+    # This builds an unrolled LSTM for tutorial purposes only.
+    # In general, use the rnn() or state_saving_rnn() from rnn.py.
+    #
+    # The alternative version of the code below is:
+    #
+    # inputs = tf.unstack(inputs, num=num_steps, axis=1)
+    # outputs, state = tf.contrib.rnn.static_rnn(
+    #     cell, inputs, initial_state=self._initial_state)
+    outputs = []
+    state = self._initial_state
+    with tf.variable_scope("RNN"):
+      for time_step in range(num_steps):
+        if time_step > -1: tf.get_variable_scope().reuse_variables()
+        (cell_output, state) = self.cell(inputs[:, time_step, :], state)
+        outputs.append(cell_output)
+
+    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
+    logits = tf.matmul(output, softmax_w) + softmax_b
+    loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
+        [logits],
+        [tf.reshape(input_.targets, [-1])],
+        [tf.ones([batch_size * num_steps], dtype=data_type())])
+    self._cost = cost = tf.reduce_sum(loss) / batch_size
+    self._final_state = state
+
+    if not is_training:
+      return
+
+    self._lr = tf.Variable(0.0, trainable=False)
+    tvars = tf.trainable_variables()
+    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
+                                      config.max_grad_norm)
+#    optimizer = tf.train.AdamOptimizer()  # TODO
+    optimizer = tf.train.MomentumOptimizer(self._lr, 0.9)  # TODO
+#    optimizer = tf.train.GradientDescentOptimizer(self._lr)  # TODO
+    self._train_op = optimizer.apply_gradients(
+        zip(grads, tvars),
+        global_step=tf.contrib.framework.get_or_create_global_step())
+
+    self._new_lr = tf.placeholder(
+        tf.float32, shape=[], name="new_learning_rate")
+    self._lr_update = tf.assign(self._lr, self._new_lr)
+
+  def assign_lr(self, session, lr_value):
+    session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
+
+  @property
+  def input(self):
+    return self._input
+
+  @property
+  def initial_state(self):
+    return self._initial_state
+
+  @property
+  def cost(self):
+    return self._cost
+
+  @property
+  def final_state(self):
+    return self._final_state
+
+  @property
+  def lr(self):
+    return self._lr
+
+  @property
+  def train_op(self):
+    return self._train_op
+
+class TestConfig(object):
+  """Tiny config, for testing."""
+  init_scale = 0.1
+  learning_rate = 1.0
+  max_grad_norm = 1
+  num_layers = 1
+  num_steps = 2
+  hidden_size = 2
+  max_epoch = 1
+  max_max_epoch = 1
+  keep_prob = 1.0
+  lr_decay = 0.5
+  batch_size = 20
+
+class SmallConfig(object):
+  """Small config."""
+  init_scale = 0.1
+  learning_rate = 0.1
+  max_grad_norm = 0.5
+  num_layers = 2
+  num_steps = 20
+  hidden_size = 200
+  max_epoch = 10
+  max_max_epoch = 40
+  keep_prob = 1
+  lr_decay = 0.5
+  batch_size = 64
+
+class MediumConfig(object):
+  """Medium config."""
+  init_scale = 0.05
+  learning_rate = 1.0
+  max_grad_norm = 5
+  num_layers = 2
+  num_steps = 35
+  hidden_size = 650
+  max_epoch = 6
+  max_max_epoch = 39
+  keep_prob = 0.5
+  lr_decay = 0.8
+  batch_size = 20
+
+class LargeConfig(object):
+  """Large config."""
+  init_scale = 0.04
+  learning_rate = 1.0
+  max_grad_norm = 10
+  num_layers = 2
+  num_steps = 35
+  hidden_size = 1500
+  max_epoch = 14
+  max_max_epoch = 55
+  keep_prob = 0.35
+  lr_decay = 1 / 1.15
+  batch_size = 20
+
+def run_epoch(session, model, eval_op=None, verbose=False):
+  """Runs the model on the given data."""
+  start_time = time.time()
+  costs = 0.0
+  iters = 0
+  state = session.run(model.initial_state)
+
+  fetches = {
+      "cost": model.cost,
+      "final_state": model.final_state,
+  }
+  if eval_op is not None:
+    fetches["eval_op"] = eval_op
+
+  for step in range(model.input.epoch_size):
+    feed_dict = {}
+    for i, h in enumerate(model.initial_state):
+      feed_dict[h] = state[i]
+
+    vals = session.run(fetches, feed_dict)
+    cost = vals["cost"]
+    state = vals["final_state"]
+
+    costs += cost
+    iters += model.input.num_steps
+
+    if verbose and step % (model.input.epoch_size // 10) == 10:
+      print("%.3f perplexity: %.3f speed: %.0f wps" %
+            (step * 1.0 / model.input.epoch_size, np.exp(costs / iters),
+             iters * model.input.batch_size / (time.time() - start_time)))
+
+  return np.exp(costs / iters)
+
+
+def get_config():
+  if FLAGS.model == "small":
+    return SmallConfig()
+  elif FLAGS.model == "medium":
+    return MediumConfig()
+  elif FLAGS.model == "large":
+    return LargeConfig()
+  elif FLAGS.model == "test":
+    return TestConfig()
+  else:
+    raise ValueError("Invalid model: %s", FLAGS.model)
+
+
+def main(_):
+  if not FLAGS.data_path:
+    raise ValueError("Must set --data_path to RNNLM data directory")
+
+  raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path)
+  train_data, valid_data, _, word_map = raw_data
+
+#  with open(FLAGS.wordlist_save_path, "w") as wmap_file:
+#    count_pairs = sorted(word_map.items(), key=lambda x: (x[1], x[0]))
+#    for k, v in count_pairs: 
+#      wmap_file.write(str(k) + " " + str(v) + "\n")
+
+  config = get_config()
+  config.vocab_size = len(word_map)
+  eval_config = get_config()
+  eval_config.batch_size = 1
+  eval_config.num_steps = 1
+
+  with tf.Graph().as_default():
+    initializer = tf.random_uniform_initializer(-config.init_scale,
+                                                config.init_scale)
+
+    with tf.name_scope("Train"):
+      train_input = RNNLMInput(config=config, data=train_data, name="TrainInput")
+      with tf.variable_scope("Model", reuse=None, initializer=initializer):
+        m = RNNLMModel(is_training=True, config=config, input_=train_input)
+      tf.summary.scalar("Training Loss", m.cost)
+      tf.summary.scalar("Learning Rate", m.lr)
+
+    with tf.name_scope("Valid"):
+      valid_input = RNNLMInput(config=config, data=valid_data, name="ValidInput")
+      with tf.variable_scope("Model", reuse=True, initializer=initializer):
+        mvalid = RNNLMModel(is_training=False, config=config, input_=valid_input)
+      tf.summary.scalar("Validation Loss", mvalid.cost)
+
+    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
+    with sv.managed_session() as session:
+      for i in range(config.max_max_epoch):
+        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
+        m.assign_lr(session, config.learning_rate * lr_decay)
+
+        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
+        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
+                                     verbose=True)
+
+        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
+        valid_perplexity = run_epoch(session, mvalid)
+        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
+
+      if FLAGS.save_path:
+        print("Saving model to %s." % FLAGS.save_path)
+        sv.saver.save(session, FLAGS.save_path)
+
+if __name__ == "__main__":
+  tf.app.run()

From 063fe10f4de3f47ce103c7dbf764a64068babb54 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hxu@b02.clsp.jhu.edu>
Date: Mon, 19 Jun 2017 16:05:51 -0400
Subject: [PATCH 22/30] use only one sentence-boundary symbol

---
 egs/ami/s5/local/tensorflow/lstm.py          | 67 ++++++++++----------
 egs/ami/s5/local/tensorflow/prep_data.sh     | 18 ++++--
 egs/ami/s5/local/tensorflow/reader.py        | 50 +++++++++------
 egs/ami/s5/local/tensorflow/run.sh           | 21 ++----
 egs/ami/s5/local/tensorflow/vanilla_rnnlm.py |  4 +-
 src/tensorflow/tensorflow-rnnlm-lib.cc       | 19 +++---
 src/tensorflow/tensorflow-rnnlm-lib.h        |  7 +-
 7 files changed, 92 insertions(+), 94 deletions(-)

diff --git a/egs/ami/s5/local/tensorflow/lstm.py b/egs/ami/s5/local/tensorflow/lstm.py
index 555ec5b7494..590744ed4fb 100644
--- a/egs/ami/s5/local/tensorflow/lstm.py
+++ b/egs/ami/s5/local/tensorflow/lstm.py
@@ -1,4 +1,5 @@
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#           Modified by Hainan Xu to be used in Kaldi for lattice rescoring 2017
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -37,10 +38,10 @@
     "A type of model. Possible options are: small, medium, large.")
 flags.DEFINE_string("data_path", None,
                     "Where the training/test data is stored.")
+flags.DEFINE_string("vocab_path", None,
+                    "Where the wordlist file is stored.")
 flags.DEFINE_string("save_path", None,
                     "Model output directory.")
-flags.DEFINE_string("wordlist_save_path", None,
-                    "wordmap output directory.")
 flags.DEFINE_bool("use_fp16", False,
                   "Train using 16-bit floats instead of 32bit floats")
 
@@ -51,19 +52,19 @@ def data_type():
   return tf.float16 if FLAGS.use_fp16 else tf.float32
 
 
-class PTBInput(object):
+class RNNLMInput(object):
   """The input data."""
 
   def __init__(self, config, data, name=None):
     self.batch_size = batch_size = config.batch_size
     self.num_steps = num_steps = config.num_steps
     self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
-    self.input_data, self.targets = reader.ptb_producer(
+    self.input_data, self.targets = reader.rnnlm_producer(
         data, batch_size, num_steps, name=name)
 
 
-class PTBModel(object):
-  """The PTB model."""
+class RNNLMModel(object):
+  """The RNNLM model."""
 
   def __init__(self, is_training, config, input_):
     self._input = input_
@@ -223,6 +224,19 @@ def lr(self):
   def train_op(self):
     return self._train_op
 
+class TestConfig(object):
+  """Tiny config, for testing."""
+  init_scale = 0.1
+  learning_rate = 1.0
+  max_grad_norm = 1
+  num_layers = 1
+  num_steps = 2
+  hidden_size = 2
+  max_epoch = 1
+  max_max_epoch = 1
+  keep_prob = 1.0
+  lr_decay = 0.5
+  batch_size = 20
 
 class SmallConfig(object):
   """Small config."""
@@ -236,8 +250,7 @@ class SmallConfig(object):
   max_max_epoch = 13
   keep_prob = 1.0
   lr_decay = 0.5
-  batch_size = 20
-  vocab_size = 10000
+  batch_size = 64
 
 
 class MediumConfig(object):
@@ -253,7 +266,6 @@ class MediumConfig(object):
   keep_prob = 0.5
   lr_decay = 0.8
   batch_size = 20
-  vocab_size = 10000
 
 
 class LargeConfig(object):
@@ -269,24 +281,8 @@ class LargeConfig(object):
   keep_prob = 0.35
   lr_decay = 1 / 1.15
   batch_size = 20
-  vocab_size = 10000
 
 
-class TestConfig(object):
-  """Tiny config, for testing."""
-  init_scale = 0.1
-  learning_rate = 1.0
-  max_grad_norm = 1
-  num_layers = 1
-  num_steps = 2
-  hidden_size = 2
-  max_epoch = 1
-  max_max_epoch = 1
-  keep_prob = 1.0
-  lr_decay = 0.5
-  batch_size = 20
-  vocab_size = 10000
-
 
 def run_epoch(session, model, eval_op=None, verbose=False):
   """Runs the model on the given data."""
@@ -338,17 +334,18 @@ def get_config():
 
 def main(_):
   if not FLAGS.data_path:
-    raise ValueError("Must set --data_path to PTB data directory")
+    raise ValueError("Must set --data_path to RNNLM data directory")
 
-  raw_data = reader.ptb_raw_data(FLAGS.data_path)
+  raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path)
   train_data, valid_data, _, word_map = raw_data
 
-  with open(FLAGS.wordlist_save_path, "w") as wmap_file:
-    count_pairs = sorted(word_map.items(), key=lambda x: (x[1], x[0]))
-    for k, v in count_pairs: 
-      wmap_file.write(str(k) + " " + str(v) + "\n")
+#  with open(FLAGS.wordlist_save_path, "w") as wmap_file:
+#    count_pairs = sorted(word_map.items(), key=lambda x: (x[1], x[0]))
+#    for k, v in count_pairs: 
+#      wmap_file.write(str(k) + " " + str(v) + "\n")
 
   config = get_config()
+  config.vocab_size = len(word_map)
   eval_config = get_config()
   eval_config.batch_size = 1
   eval_config.num_steps = 1
@@ -358,16 +355,16 @@ def main(_):
                                                 config.init_scale)
 
     with tf.name_scope("Train"):
-      train_input = PTBInput(config=config, data=train_data, name="TrainInput")
+      train_input = RNNLMInput(config=config, data=train_data, name="TrainInput")
       with tf.variable_scope("Model", reuse=None, initializer=initializer):
-        m = PTBModel(is_training=True, config=config, input_=train_input)
+        m = RNNLMModel(is_training=True, config=config, input_=train_input)
       tf.summary.scalar("Training Loss", m.cost)
       tf.summary.scalar("Learning Rate", m.lr)
 
     with tf.name_scope("Valid"):
-      valid_input = PTBInput(config=config, data=valid_data, name="ValidInput")
+      valid_input = RNNLMInput(config=config, data=valid_data, name="ValidInput")
       with tf.variable_scope("Model", reuse=True, initializer=initializer):
-        mvalid = PTBModel(is_training=False, config=config, input_=valid_input)
+        mvalid = RNNLMModel(is_training=False, config=config, input_=valid_input)
       tf.summary.scalar("Validation Loss", mvalid.cost)
 
     sv = tf.train.Supervisor(logdir=FLAGS.save_path)
diff --git a/egs/ami/s5/local/tensorflow/prep_data.sh b/egs/ami/s5/local/tensorflow/prep_data.sh
index 2668a1889b8..a763aaf15bd 100755
--- a/egs/ami/s5/local/tensorflow/prep_data.sh
+++ b/egs/ami/s5/local/tensorflow/prep_data.sh
@@ -4,7 +4,7 @@
 set -e
 
 train_text=data/ihm/train/text
-nwords=9997
+nwords=9999
 
 . path.sh
 . cmd.sh
@@ -27,7 +27,7 @@ cat $srcdir/lexicon.txt | awk '{print $1}' | sort -u | grep -v -w '!SIL' > $dir/
 # Get training data with OOV words (w.r.t. our current vocab) replaced with <unk>.
 cat $train_text | awk -v w=$dir/wordlist.all \
   'BEGIN{while((getline<w)>0) v[$1]=1;}
-  {for (i=2;i<=NF;i++) if ($i in v) printf $i" ";else printf "<unk> ";print ""}'|sed 's/ $//g' \
+  {for (i=2;i<=NF;i++) if ($i in v) printf $i" ";else printf "<unk> ";print ""}' | sed 's=$= </s>=g' \
   | perl -e ' use List::Util qw(shuffle); @A=<>; print join("", shuffle(@A)); ' \
   | gzip -c > $dir/all.gz
 
@@ -37,7 +37,7 @@ gunzip -c $dir/all.gz | head -n $heldout_sent > $dir/valid.in # validation data
 gunzip -c $dir/all.gz | tail -n +$heldout_sent > $dir/train.in # training data
 
 
-cat $dir/train.in $dir/wordlist.all | grep -v '</s>' | grep -v '<s>' | \
+cat $dir/train.in $dir/wordlist.all | \
   awk '{ for(x=1;x<=NF;x++) count[$x]++; } END{for(w in count){print count[w], w;}}' | \
   sort -nr > $dir/unigram.counts
 
@@ -48,12 +48,22 @@ head -$nwords $dir/unigram.counts | awk '{print $2}' | tee $dir/wordlist.rnn | a
 tail -n +$nwords $dir/unigram.counts > $dir/unk_class.counts
 
 for type in train valid; do
-  cat $dir/$type.in | awk -v w=$dir/wordlist.rnn 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=1;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' | sed "s=^= <s> =g" | sed "s=$= </s>=" > $dir/$type
+  cat $dir/$type.in | awk -v w=$dir/wordlist.rnn 'BEGIN{while((getline<w)>0)d[$1]=1}{for(i=1;i<=NF;i++){if(d[$i]==1){s=$i}else{s="<oos>"} printf("%s ",s)} print""}' > $dir/$type
 done
 
 # OK we'll train the RNNLM on this data.
 
 touch $dir/unk.probs  # dummy file, not used for cued-rnnlm
 
+cp $dir/wordlist.rnn $dir/wordlist.rnn.final
+
+has_oos=`grep "<oos>" $dir/wordlist.rnn.final | wc -l | awk '{print $1}'`
+if [ $has_oos == "0" ]; then
+#  n=`wc -l $dir/wordlist.rnn.final | awk '{print $1}'`
+#  echo n is $n
+  echo "<oos>" >> $dir/wordlist.rnn.final
+fi
+
+
 echo "data preparation finished"
 
diff --git a/egs/ami/s5/local/tensorflow/reader.py b/egs/ami/s5/local/tensorflow/reader.py
index 964a7b5e949..5458b93ea31 100644
--- a/egs/ami/s5/local/tensorflow/reader.py
+++ b/egs/ami/s5/local/tensorflow/reader.py
@@ -1,4 +1,5 @@
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#           Modified by Hainan Xu to be used in Kaldi for lattice rescoring 2017
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +15,7 @@
 # ==============================================================================
 
 
-"""Utilities for parsing PTB text files."""
+"""Utilities for parsing RNNLM text files."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -24,23 +25,30 @@
 
 import tensorflow as tf
 
-
-
 def _read_words(filename):
   with tf.gfile.GFile(filename, "r") as f:
     return f.read().decode("utf-8").split()
 #    return f.read().decode("utf-8").replace("\n", "<eos>").split()
 
-
 def _build_vocab(filename):
-  data = _read_words(filename)
-
-  counter = collections.Counter(data)
-  count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
-
-  words, _ = list(zip(*count_pairs))
+#  data = _read_words(filename)
+#
+#  counter = collections.Counter(data)
+#  count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
+#
+#  words, _ = list(zip(*count_pairs))
+#  word_to_id = dict(zip(words, range(len(words))))
+  
+#  word_to_id = {}
+#  new_id = 0
+#  with open(filename, "r") as f:
+#    for word in f:
+#      word_to_id[word] = new_id
+#      new_id = new_id + 1
+#  return word_to_id
+
+  words = _read_words(filename)
   word_to_id = dict(zip(words, range(len(words))))
-
   return word_to_id
 
 
@@ -49,13 +57,13 @@ def _file_to_word_ids(filename, word_to_id):
   return [word_to_id[word] for word in data if word in word_to_id]
 
 
-def ptb_raw_data(data_path=None):
-  """Load PTB raw data from data directory "data_path".
+def rnnlm_raw_data(data_path, vocab_path):
+  """Load RNNLM raw data from data directory "data_path".
 
-  Reads PTB text files, converts strings to integer ids,
+  Reads RNNLM text files, converts strings to integer ids,
   and performs mini-batching of the inputs.
 
-  The PTB dataset comes from Tomas Mikolov's webpage:
+  The RNNLM dataset comes from Tomas Mikolov's webpage:
 
   http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
 
@@ -65,14 +73,14 @@ def ptb_raw_data(data_path=None):
 
   Returns:
     tuple (train_data, valid_data, test_data, vocabulary)
-    where each of the data objects can be passed to PTBIterator.
+    where each of the data objects can be passed to RNNLMIterator.
   """
 
   train_path = os.path.join(data_path, "train")
   valid_path = os.path.join(data_path, "valid")
 #  test_path = os.path.join(data_path, "eval.txt")
 
-  word_to_id = _build_vocab(train_path)
+  word_to_id = _build_vocab(vocab_path)
   train_data = _file_to_word_ids(train_path, word_to_id)
   valid_data = _file_to_word_ids(valid_path, word_to_id)
 #  test_data = _file_to_word_ids(test_path, word_to_id)
@@ -81,14 +89,14 @@ def ptb_raw_data(data_path=None):
 #  return train_data, valid_data, test_data, vocabulary, word_to_id
 
 
-def ptb_producer(raw_data, batch_size, num_steps, name=None):
-  """Iterate on the raw PTB data.
+def rnnlm_producer(raw_data, batch_size, num_steps, name=None):
+  """Iterate on the raw RNNLM data.
 
   This chunks up raw_data into batches of examples and returns Tensors that
   are drawn from these batches.
 
   Args:
-    raw_data: one of the raw data outputs from ptb_raw_data.
+    raw_data: one of the raw data outputs from rnnlm_raw_data.
     batch_size: int, the batch size.
     num_steps: int, the number of unrolls.
     name: the name of this operation (optional).
@@ -100,7 +108,7 @@ def ptb_producer(raw_data, batch_size, num_steps, name=None):
   Raises:
     tf.errors.InvalidArgumentError: if batch_size or num_steps are too high.
   """
-  with tf.name_scope(name, "PTBProducer", [raw_data, batch_size, num_steps]):
+  with tf.name_scope(name, "RNNLMProducer", [raw_data, batch_size, num_steps]):
     raw_data = tf.convert_to_tensor(raw_data, name="raw_data", dtype=tf.int32)
 
     data_len = tf.size(raw_data)
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index ac016200524..7e3150482cc 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 mic=ihm
-ngram_order=3
+ngram_order=4
 model_type=small
 stage=1
 weight=0.5
@@ -11,29 +11,21 @@ weight=0.5
 
 set -e
 
-dir=data/new_tensorflow/$model_type
+dir=data/tensorflow/$model_type
 mkdir -p $dir
 
 if [ $stage -le 1 ]; then
-  local/tensorflow/train_rnnlm.sh $dir
+  local/tensorflow/prep_data.sh $dir
 fi
 
+mkdir -p $dir/
 if [ $stage -le 2 ]; then
-  mkdir -p $dir/
-  python local/tensorflow/rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --wordlist_save_path=$dir/wordlist.rnn.final
-fi
-
-has_oos=`grep "<oos>" $dir/wordlist.rnn.final | wc -l | awk '{print $1}'`
-if [ $has_oos == "0" ]; then
-  n=`wc -l $dir/wordlist.rnn.final | awk '{print $1}'`
-  echo n is $n
-  echo "<oos> $n" >> $dir/wordlist.rnn.final
+  python local/tensorflow/rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
 fi
 
 final_lm=ami_fsh.o3g.kn
 LM=$final_lm.pr1-7
 
-date
 if [ $stage -le 3 ]; then
 #  for decode_set in dev; do
   for decode_set in dev eval; do
@@ -46,10 +38,9 @@ if [ $stage -le 3 ]; then
       --rnnlm-ver tensorflow  --weight $weight --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
       data/$mic/${decode_set}_hires ${decode_dir} \
-      ${decode_dir}.new.tfrnnlm.lat.${ngram_order}gram.$weight  &
+      ${decode_dir}.tfrnnlm.lat.${ngram_order}gram.$weight  &
 
   done
 fi
 
 wait
-date
diff --git a/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py b/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py
index 642d7b01172..f6bfeae8b75 100644
--- a/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py
+++ b/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py
@@ -183,8 +183,8 @@ def attn_cell():
     grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                       config.max_grad_norm)
 #    optimizer = tf.train.AdamOptimizer()  # TODO
-    optimizer = tf.train.MomentumOptimizer(self._lr, 0.9)  # TODO
-#    optimizer = tf.train.GradientDescentOptimizer(self._lr)  # TODO
+#    optimizer = tf.train.MomentumOptimizer(self._lr, 0.9)  # TODO
+    optimizer = tf.train.GradientDescentOptimizer(self._lr)  # TODO
     self._train_op = optimizer.apply_gradients(
         zip(grads, tvars),
         global_step=tf.contrib.framework.get_or_create_global_step())
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.cc b/src/tensorflow/tensorflow-rnnlm-lib.cc
index b82a3476c52..b1d7ae7eaa1 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.cc
+++ b/src/tensorflow/tensorflow-rnnlm-lib.cc
@@ -85,27 +85,24 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
   oos_ = -1;
   { // input.
     ifstream ifile(rnn_wordlist.c_str());
-    int id;
     string word;
-    int i = -1;
-    while (ifile >> word >> id) {
-      i++;
-      assert(i == id);
+    int id = -1;
+    eos_ = 0;
+    while (ifile >> word) {
+      id++;
       rnn_label_to_word_.push_back(word); // vector[i] = word
 
       int fst_label = fst_word_symbols->Find(word);
       if (fst::SymbolTable::kNoSymbol == fst_label) {
-        if (i < 2) continue; // <s> and </s>
+        if (id == eos_) continue;
 
         KALDI_ASSERT(word == "<oos>" && oos_ == -1);
-        oos_ = i;
+        oos_ = id;
         continue;
       }
       KALDI_ASSERT(fst_label >= 0);
-      fst_label_to_rnn_label_[fst_label] = i;
+      fst_label_to_rnn_label_[fst_label] = id;
     }
-    bos_ = 1;
-    eos_ = 0; // TODO(hxu) need to think carefully about these..
   }
   if (fst_label_to_word_.size() > rnn_label_to_word_.size()) {
     KALDI_ASSERT(oos_ != -1);
@@ -137,7 +134,7 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
     {
       std::vector<Tensor> state;
       Tensor bosword(tensorflow::DT_INT32, {1, 1});
-      bosword.scalar<int32>()() = bos_;
+      bosword.scalar<int32>()() = eos_; // eos_ is more like a sentence boundary
 
       std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
         {"Train/Model/test_word_in", bosword},
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.h b/src/tensorflow/tensorflow-rnnlm-lib.h
index 5011b0b138a..5af2ee37a70 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.h
+++ b/src/tensorflow/tensorflow-rnnlm-lib.h
@@ -21,16 +21,13 @@ namespace tf_rnnlm {
 
 struct KaldiTfRnnlmWrapperOpts {
   std::string unk_symbol;
-  std::string bos_symbol;
   std::string eos_symbol;
 
-  KaldiTfRnnlmWrapperOpts() : unk_symbol("<RNN_UNK>"), bos_symbol("<s>"), eos_symbol("</s>") {}
+  KaldiTfRnnlmWrapperOpts() : unk_symbol("<RNN_UNK>"), eos_symbol("</s>") {}
 
   void Register(OptionsItf *opts) {
     opts->Register("unk-symbol", &unk_symbol, "Symbol for out-of-vocabulary "
                    "words in rnnlm.");
-    opts->Register("bos-symbol", &eos_symbol, "Beginning of setence symbol in "
-                   "rnnlm.");
     opts->Register("eos-symbol", &eos_symbol, "End of setence symbol in "
                    "rnnlm.");
   }
@@ -50,7 +47,6 @@ class KaldiTfRnnlmWrapper {
   }
 
   int32 GetEos() const { return eos_; }
-  int32 GetBos() const { return bos_; }
 
   const Tensor& GetInitialContext() const;
   const Tensor& GetInitialCell() const;
@@ -78,7 +74,6 @@ class KaldiTfRnnlmWrapper {
 
   Session* session_;  // ptf owned here
   int32 eos_;
-  int32 bos_;
   int32 oos_;
 
   KALDI_DISALLOW_COPY_AND_ASSIGN(KaldiTfRnnlmWrapper);

From 7ef2de4affb004f573c5e4edb9d44ceb41baff9e Mon Sep 17 00:00:00 2001
From: Hainan Xu <hxu@b02.clsp.jhu.edu>
Date: Tue, 20 Jun 2017 12:39:12 -0400
Subject: [PATCH 23/30] recipe draft finished

---
 egs/ami/s5/local/tensorflow/lstm.py          |  5 -----
 egs/ami/s5/local/tensorflow/run_vannila.sh   |  2 +-
 egs/ami/s5/local/tensorflow/vanilla_rnnlm.py | 22 ++++++++------------
 3 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/egs/ami/s5/local/tensorflow/lstm.py b/egs/ami/s5/local/tensorflow/lstm.py
index 590744ed4fb..8c6a0765e70 100644
--- a/egs/ami/s5/local/tensorflow/lstm.py
+++ b/egs/ami/s5/local/tensorflow/lstm.py
@@ -339,11 +339,6 @@ def main(_):
   raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path)
   train_data, valid_data, _, word_map = raw_data
 
-#  with open(FLAGS.wordlist_save_path, "w") as wmap_file:
-#    count_pairs = sorted(word_map.items(), key=lambda x: (x[1], x[0]))
-#    for k, v in count_pairs: 
-#      wmap_file.write(str(k) + " " + str(v) + "\n")
-
   config = get_config()
   config.vocab_size = len(word_map)
   eval_config = get_config()
diff --git a/egs/ami/s5/local/tensorflow/run_vannila.sh b/egs/ami/s5/local/tensorflow/run_vannila.sh
index bdef6f2c42d..71ecd7340ba 100755
--- a/egs/ami/s5/local/tensorflow/run_vannila.sh
+++ b/egs/ami/s5/local/tensorflow/run_vannila.sh
@@ -11,7 +11,7 @@ weight=0.5
 
 set -e
 
-dir=data/vannila_tensorflow/$model_type
+dir=data/vannila_tensorflow_200/$model_type
 mkdir -p $dir
 
 if [ $stage -le 1 ]; then
diff --git a/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py b/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py
index f6bfeae8b75..2fe11222c73 100644
--- a/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py
+++ b/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py
@@ -183,8 +183,8 @@ def attn_cell():
     grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                       config.max_grad_norm)
 #    optimizer = tf.train.AdamOptimizer()  # TODO
-#    optimizer = tf.train.MomentumOptimizer(self._lr, 0.9)  # TODO
-    optimizer = tf.train.GradientDescentOptimizer(self._lr)  # TODO
+    optimizer = tf.train.MomentumOptimizer(self._lr, 0.9)  # TODO
+#    optimizer = tf.train.GradientDescentOptimizer(self._lr)  # TODO
     self._train_op = optimizer.apply_gradients(
         zip(grads, tvars),
         global_step=tf.contrib.framework.get_or_create_global_step())
@@ -237,15 +237,15 @@ class TestConfig(object):
 class SmallConfig(object):
   """Small config."""
   init_scale = 0.1
-  learning_rate = 0.1
-  max_grad_norm = 0.5
-  num_layers = 2
+  learning_rate = 0.2
+  max_grad_norm = 1
+  num_layers = 1
   num_steps = 20
   hidden_size = 200
-  max_epoch = 10
-  max_max_epoch = 40
+  max_epoch = 4
+  max_max_epoch = 20
   keep_prob = 1
-  lr_decay = 0.5
+  lr_decay = 0.95
   batch_size = 64
 
 class MediumConfig(object):
@@ -330,11 +330,6 @@ def main(_):
   raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path)
   train_data, valid_data, _, word_map = raw_data
 
-#  with open(FLAGS.wordlist_save_path, "w") as wmap_file:
-#    count_pairs = sorted(word_map.items(), key=lambda x: (x[1], x[0]))
-#    for k, v in count_pairs: 
-#      wmap_file.write(str(k) + " " + str(v) + "\n")
-
   config = get_config()
   config.vocab_size = len(word_map)
   eval_config = get_config()
@@ -362,6 +357,7 @@ def main(_):
     with sv.managed_session() as session:
       for i in range(config.max_max_epoch):
         lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
+
         m.assign_lr(session, config.learning_rate * lr_decay)
 
         print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))

From 8787364ec9191f37f25dcb408f55144ed33b637b Mon Sep 17 00:00:00 2001
From: Hainan Xu <hxu@b02.clsp.jhu.edu>
Date: Tue, 20 Jun 2017 19:03:46 -0400
Subject: [PATCH 24/30] add new objf; still debugging

---
 egs/ami/s5/local/tensorflow/lstm_fast.py | 409 +++++++++++++++++++++++
 egs/ami/s5/local/tensorflow/run.sh       |   4 +-
 egs/ami/s5/local/tensorflow/run_fast.sh  |  49 +++
 3 files changed, 460 insertions(+), 2 deletions(-)
 create mode 100644 egs/ami/s5/local/tensorflow/lstm_fast.py
 create mode 100755 egs/ami/s5/local/tensorflow/run_fast.sh

diff --git a/egs/ami/s5/local/tensorflow/lstm_fast.py b/egs/ami/s5/local/tensorflow/lstm_fast.py
new file mode 100644
index 00000000000..45533eee958
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/lstm_fast.py
@@ -0,0 +1,409 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#           Modified by Hainan Xu to be used in Kaldi for lattice rescoring 2017
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+
+sys.path.insert(0,"/home/hxu/.local/lib/python2.7/site-packages/")
+
+import inspect
+import time
+
+import numpy as np
+import tensorflow as tf
+
+import reader
+
+flags = tf.flags
+logging = tf.logging
+
+flags.DEFINE_string(
+    "model", "small",
+    "A type of model. Possible options are: small, medium, large.")
+flags.DEFINE_string("data_path", None,
+                    "Where the training/test data is stored.")
+flags.DEFINE_string("vocab_path", None,
+                    "Where the wordlist file is stored.")
+flags.DEFINE_string("save_path", None,
+                    "Model output directory.")
+flags.DEFINE_bool("use_fp16", False,
+                  "Train using 16-bit floats instead of 32bit floats")
+
+FLAGS = flags.FLAGS
+
+
+def data_type():
+  return tf.float16 if FLAGS.use_fp16 else tf.float32
+
+def new_softmax(labels, logits):
+#  logits = -logits;
+#  logits = tf.nn.relu(logits)
+#  logits = -logits;
+#  print (labels, logits)
+  logits = tf.minimum(logits, 0)
+  target = tf.reshape(labels, [-1])
+  exp_logits = tf.exp(logits)
+  row_sums = tf.reduce_sum(exp_logits, 1) # this is the negative part of the objf
+#  print (sums)
+
+  t2 = tf.expand_dims(target, 1)
+  range = tf.expand_dims(tf.range(tf.shape(target)[0]), 1)
+  ind = tf.concat([range, t2], 1)
+  res = tf.gather_nd(logits, ind)
+#  print (res)
+#  positive_part = tf.reduce_sum(res, 1)
+#  print (positive_part)
+
+  return -res + row_sums - 1
+#  return -res + tf.log(row_sums) # this is the original softmax
+
+class RNNLMInput(object):
+  """The input data."""
+
+  def __init__(self, config, data, name=None):
+    self.batch_size = batch_size = config.batch_size
+    self.num_steps = num_steps = config.num_steps
+    self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
+    self.input_data, self.targets = reader.rnnlm_producer(
+        data, batch_size, num_steps, name=name)
+
+
+class RNNLMModel(object):
+  """The RNNLM model."""
+
+  def __init__(self, is_training, config, input_):
+    self._input = input_
+
+    batch_size = input_.batch_size
+    num_steps = input_.num_steps
+    size = config.hidden_size
+    vocab_size = config.vocab_size
+
+    # Slightly better results can be obtained with forget gate biases
+    # initialized to 1 but the hyperparameters of the model would need to be
+    # different than reported in the paper.
+    def lstm_cell():
+      # With the latest TensorFlow source code (as of Mar 27, 2017),
+      # the BasicLSTMCell will need a reuse parameter which is unfortunately not
+      # defined in TensorFlow 1.0. To maintain backwards compatibility, we add
+      # an argument check here:
+      if 'reuse' in inspect.getargspec(
+          tf.contrib.rnn.BasicLSTMCell.__init__).args:
+        return tf.contrib.rnn.BasicLSTMCell(
+            size, forget_bias=0.0, state_is_tuple=True,
+            reuse=tf.get_variable_scope().reuse)
+      else:
+        return tf.contrib.rnn.BasicLSTMCell(
+            size, forget_bias=0.0, state_is_tuple=True)
+    attn_cell = lstm_cell
+    if is_training and config.keep_prob < 1:
+      def attn_cell():
+        return tf.contrib.rnn.DropoutWrapper(
+            lstm_cell(), output_keep_prob=config.keep_prob)
+    self.cell = tf.contrib.rnn.MultiRNNCell(
+        [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True)
+
+    self._initial_state = self.cell.zero_state(batch_size, data_type())
+    self._initial_state_single = self.cell.zero_state(1, data_type())
+
+    self.initial = tf.reshape(tf.stack(axis=0, values=self._initial_state_single), [config.num_layers, 2, 1, size], name="test_initial_state")
+
+
+    # first implement the less efficient version
+    test_word_in = tf.placeholder(tf.int32, [1, 1], name="test_word_in")
+
+    state_placeholder = tf.placeholder(tf.float32, [config.num_layers, 2, 1, size], name="test_state_in")
+    # unpacking the input state context 
+    l = tf.unstack(state_placeholder, axis=0)
+    test_input_state = tuple(
+               [tf.contrib.rnn.LSTMStateTuple(l[idx][0],l[idx][1])
+                 for idx in range(config.num_layers)]
+    )
+
+    with tf.device("/cpu:0"):
+      self.embedding = tf.get_variable(
+          "embedding", [vocab_size, size], dtype=data_type())
+
+      inputs = tf.nn.embedding_lookup(self.embedding, input_.input_data)
+      test_inputs = tf.nn.embedding_lookup(self.embedding, test_word_in)
+
+    # test time
+    with tf.variable_scope("RNN"):
+      (test_cell_output, test_output_state) = self.cell(test_inputs[:, 0, :], test_input_state)
+
+    test_state_out = tf.reshape(tf.stack(axis=0, values=test_output_state), [config.num_layers, 2, 1, size], name="test_state_out")
+    test_cell_out = tf.reshape(test_cell_output, [1, size], name="test_cell_out")
+    # above is the first part of the graph for test
+    # test-word-in
+    #               > ---- > test-state-out
+    # test-state-in        > test-cell-out
+
+
+    # below is the 2nd part of the graph for test
+    # test-word-out
+    #               > prob(word | test-word-out)
+    # test-cell-in
+
+    test_word_out = tf.placeholder(tf.int32, [1, 1], name="test_word_out")
+    cellout_placeholder = tf.placeholder(tf.float32, [1, size], name="test_cell_in")
+
+    softmax_w = tf.get_variable(
+        "softmax_w", [size, vocab_size], dtype=data_type())
+    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
+
+    test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b
+    test_softmaxed = tf.nn.softmax(test_logits)
+
+    p_word = test_softmaxed[0, test_word_out[0,0]]
+    test_out = tf.identity(p_word, name="test_out")
+
+    if is_training and config.keep_prob < 1:
+      inputs = tf.nn.dropout(inputs, config.keep_prob)
+
+    # Simplified version of models/tutorials/rnn/rnn.py's rnn().
+    # This builds an unrolled LSTM for tutorial purposes only.
+    # In general, use the rnn() or state_saving_rnn() from rnn.py.
+    #
+    # The alternative version of the code below is:
+    #
+    # inputs = tf.unstack(inputs, num=num_steps, axis=1)
+    # outputs, state = tf.contrib.rnn.static_rnn(
+    #     cell, inputs, initial_state=self._initial_state)
+    outputs = []
+    state = self._initial_state
+    with tf.variable_scope("RNN"):
+      for time_step in range(num_steps):
+        if time_step > -1: tf.get_variable_scope().reuse_variables()
+        (cell_output, state) = self.cell(inputs[:, time_step, :], state)
+        outputs.append(cell_output)
+
+    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
+    logits = tf.matmul(output, softmax_w) + softmax_b
+    loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
+        [logits],
+        [tf.reshape(input_.targets, [-1])],
+        [tf.ones([batch_size * num_steps], dtype=data_type())],
+        softmax_loss_function=new_softmax)
+    self._cost = cost = tf.reduce_sum(loss) / batch_size
+    self._final_state = state
+
+    if not is_training:
+      return
+
+    self._lr = tf.Variable(0.0, trainable=False)
+    tvars = tf.trainable_variables()
+    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
+                                      config.max_grad_norm)
+    optimizer = tf.train.GradientDescentOptimizer(self._lr)
+    self._train_op = optimizer.apply_gradients(
+        zip(grads, tvars),
+        global_step=tf.contrib.framework.get_or_create_global_step())
+
+    self._new_lr = tf.placeholder(
+        tf.float32, shape=[], name="new_learning_rate")
+    self._lr_update = tf.assign(self._lr, self._new_lr)
+
+  def assign_lr(self, session, lr_value):
+    session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
+
+  @property
+  def input(self):
+    return self._input
+
+  @property
+  def initial_state(self):
+    return self._initial_state
+
+  @property
+  def cost(self):
+    return self._cost
+
+  @property
+  def final_state(self):
+    return self._final_state
+
+  @property
+  def lr(self):
+    return self._lr
+
+  @property
+  def train_op(self):
+    return self._train_op
+
+class TestConfig(object):
+  """Tiny config, for testing."""
+  init_scale = 0.1
+  learning_rate = 1.0
+  max_grad_norm = 1
+  num_layers = 1
+  num_steps = 2
+  hidden_size = 2
+  max_epoch = 1
+  max_max_epoch = 1
+  keep_prob = 1.0
+  lr_decay = 0.5
+  batch_size = 20
+
+class SmallConfig(object):
+  """Small config."""
+  init_scale = 0.1
+  learning_rate = 1.0
+  max_grad_norm = 5
+  num_layers = 2
+  num_steps = 20
+  hidden_size = 200
+  max_epoch = 4
+  max_max_epoch = 13
+  keep_prob = 1.0
+  lr_decay = 0.5
+  batch_size = 64
+
+
+class MediumConfig(object):
+  """Medium config."""
+  init_scale = 0.05
+  learning_rate = 1.0
+  max_grad_norm = 5
+  num_layers = 2
+  num_steps = 35
+  hidden_size = 650
+  max_epoch = 6
+  max_max_epoch = 39
+  keep_prob = 0.5
+  lr_decay = 0.8
+  batch_size = 20
+
+
+class LargeConfig(object):
+  """Large config."""
+  init_scale = 0.04
+  learning_rate = 1.0
+  max_grad_norm = 10
+  num_layers = 2
+  num_steps = 35
+  hidden_size = 1500
+  max_epoch = 14
+  max_max_epoch = 55
+  keep_prob = 0.35
+  lr_decay = 1 / 1.15
+  batch_size = 20
+
+
+
+def run_epoch(session, model, eval_op=None, verbose=False):
+  """Runs the model on the given data."""
+  start_time = time.time()
+  costs = 0.0
+  iters = 0
+  state = session.run(model.initial_state)
+
+  fetches = {
+      "cost": model.cost,
+      "final_state": model.final_state,
+  }
+  if eval_op is not None:
+    fetches["eval_op"] = eval_op
+
+  for step in range(model.input.epoch_size):
+    feed_dict = {}
+    for i, (c, h) in enumerate(model.initial_state):
+      feed_dict[c] = state[i].c
+      feed_dict[h] = state[i].h
+
+    vals = session.run(fetches, feed_dict)
+    cost = vals["cost"]
+    state = vals["final_state"]
+
+
+    costs += cost
+    iters += model.input.num_steps
+
+    if verbose and step % (model.input.epoch_size // 10) == 10:
+      print ("cost is ", costs)
+      print ("avg cost is ", costs / iters)
+      print("%.3f perplexity: %.3f speed: %.0f wps" %
+            (step * 1.0 / model.input.epoch_size, np.exp(costs / iters),
+             iters * model.input.batch_size / (time.time() - start_time)))
+
+  return np.exp(costs / iters)
+
+
+def get_config():
+  if FLAGS.model == "small":
+    return SmallConfig()
+  elif FLAGS.model == "medium":
+    return MediumConfig()
+  elif FLAGS.model == "large":
+    return LargeConfig()
+  elif FLAGS.model == "test":
+    return TestConfig()
+  else:
+    raise ValueError("Invalid model: %s", FLAGS.model)
+
+
+def main(_):
+  if not FLAGS.data_path:
+    raise ValueError("Must set --data_path to RNNLM data directory")
+
+  raw_data = reader.rnnlm_raw_data(FLAGS.data_path, FLAGS.vocab_path)
+  train_data, valid_data, _, word_map = raw_data
+
+  config = get_config()
+  config.vocab_size = len(word_map)
+  eval_config = get_config()
+  eval_config.batch_size = 1
+  eval_config.num_steps = 1
+
+  with tf.Graph().as_default():
+    initializer = tf.random_uniform_initializer(-config.init_scale,
+                                                config.init_scale)
+
+    with tf.name_scope("Train"):
+      train_input = RNNLMInput(config=config, data=train_data, name="TrainInput")
+      with tf.variable_scope("Model", reuse=None, initializer=initializer):
+        m = RNNLMModel(is_training=True, config=config, input_=train_input)
+      tf.summary.scalar("Training Loss", m.cost)
+      tf.summary.scalar("Learning Rate", m.lr)
+
+    with tf.name_scope("Valid"):
+      valid_input = RNNLMInput(config=config, data=valid_data, name="ValidInput")
+      with tf.variable_scope("Model", reuse=True, initializer=initializer):
+        mvalid = RNNLMModel(is_training=False, config=config, input_=valid_input)
+      tf.summary.scalar("Validation Loss", mvalid.cost)
+
+    sv = tf.train.Supervisor(logdir=FLAGS.save_path)
+    with sv.managed_session() as session:
+      for i in range(config.max_max_epoch):
+        lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
+        m.assign_lr(session, config.learning_rate * lr_decay)
+
+        print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
+        train_perplexity = run_epoch(session, m, eval_op=m.train_op,
+                                     verbose=True)
+
+        print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
+        valid_perplexity = run_epoch(session, mvalid)
+        print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
+
+      if FLAGS.save_path:
+        print("Saving model to %s." % FLAGS.save_path)
+        sv.saver.save(session, FLAGS.save_path)
+
+if __name__ == "__main__":
+  tf.app.run()
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index 7e3150482cc..52989a73ca6 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 mic=ihm
 ngram_order=4
-model_type=small
+model_type=test
 stage=1
 weight=0.5
 
@@ -20,7 +20,7 @@ fi
 
 mkdir -p $dir/
 if [ $stage -le 2 ]; then
-  python local/tensorflow/rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
+  $decode_cmd $dir/train.log python local/tensorflow/rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
 fi
 
 final_lm=ami_fsh.o3g.kn
diff --git a/egs/ami/s5/local/tensorflow/run_fast.sh b/egs/ami/s5/local/tensorflow/run_fast.sh
new file mode 100755
index 00000000000..f0d3753ff58
--- /dev/null
+++ b/egs/ami/s5/local/tensorflow/run_fast.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+mic=ihm
+ngram_order=4
+model_type=small
+stage=1
+weight=0.5
+
+. ./utils/parse_options.sh
+. ./cmd.sh
+. ./path.sh
+
+set -e
+
+dir=data/auto_tensorflow/$model_type
+mkdir -p $dir
+
+if [ $stage -le 1 ]; then
+  local/tensorflow/prep_data.sh $dir
+fi
+
+mkdir -p $dir/
+if [ $stage -le 2 ]; then
+  python local/tensorflow/lstm_fast.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
+#  $decode_cmd $dir/train.log python local/tensorflow/rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
+fi
+
+exit
+
+final_lm=ami_fsh.o3g.kn
+LM=$final_lm.pr1-7
+
+if [ $stage -le 3 ]; then
+#  for decode_set in dev; do
+  for decode_set in dev eval; do
+    basedir=exp/$mic/nnet3/tdnn_sp/
+    decode_dir=${basedir}/decode_${decode_set}
+
+    # Lattice rescoring
+    steps/lmrescore_rnnlm_lat.sh \
+      --cmd "$tensorflow_cmd --mem 16G" \
+      --rnnlm-ver tensorflow  --weight $weight --max-ngram-order $ngram_order \
+      data/lang_$LM $dir \
+      data/$mic/${decode_set}_hires ${decode_dir} \
+      ${decode_dir}.tfrnnlm.lat.${ngram_order}gram.$weight  &
+
+  done
+fi
+
+wait

From f83c0063071972a3cdce44de0a897deee4ab9513 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hxu@b02.clsp.jhu.edu>
Date: Wed, 21 Jun 2017 12:58:16 -0400
Subject: [PATCH 25/30] new objf working

---
 egs/ami/s5/local/tensorflow/lstm.py          |  2 +-
 egs/ami/s5/local/tensorflow/lstm_fast.py     | 29 +++++++++-----------
 egs/ami/s5/local/tensorflow/run.sh           |  3 +-
 egs/ami/s5/local/tensorflow/run_fast.sh      |  6 ++--
 egs/ami/s5/local/tensorflow/vanilla_rnnlm.py |  2 +-
 src/tensorflow/tensorflow-rnnlm-lib.cc       | 11 ++++++--
 6 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/egs/ami/s5/local/tensorflow/lstm.py b/egs/ami/s5/local/tensorflow/lstm.py
index 8c6a0765e70..1aba92b129b 100644
--- a/egs/ami/s5/local/tensorflow/lstm.py
+++ b/egs/ami/s5/local/tensorflow/lstm.py
@@ -147,7 +147,7 @@ def attn_cell():
     softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
 
     test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b
-    test_softmaxed = tf.nn.softmax(test_logits)
+    test_softmaxed = tf.nn.log_softmax(test_logits)
 
     p_word = test_softmaxed[0, test_word_out[0,0]]
     test_out = tf.identity(p_word, name="test_out")
diff --git a/egs/ami/s5/local/tensorflow/lstm_fast.py b/egs/ami/s5/local/tensorflow/lstm_fast.py
index 45533eee958..836ec6d45b8 100644
--- a/egs/ami/s5/local/tensorflow/lstm_fast.py
+++ b/egs/ami/s5/local/tensorflow/lstm_fast.py
@@ -51,24 +51,23 @@
 def data_type():
   return tf.float16 if FLAGS.use_fp16 else tf.float32
 
+def f(x):
+  x1 = tf.minimum(0.0, x)
+
+  x2 = tf.maximum(0.0, x)
+
+  return tf.exp(x1) + x2
+
 def new_softmax(labels, logits):
-#  logits = -logits;
-#  logits = tf.nn.relu(logits)
-#  logits = -logits;
-#  print (labels, logits)
   logits = tf.minimum(logits, 0)
   target = tf.reshape(labels, [-1])
-  exp_logits = tf.exp(logits)
-  row_sums = tf.reduce_sum(exp_logits, 1) # this is the negative part of the objf
-#  print (sums)
+  f_logits = f(logits)
+  row_sums = tf.reduce_sum(f_logits, 1) # this is the negative part of the objf
 
   t2 = tf.expand_dims(target, 1)
   range = tf.expand_dims(tf.range(tf.shape(target)[0]), 1)
   ind = tf.concat([range, t2], 1)
   res = tf.gather_nd(logits, ind)
-#  print (res)
-#  positive_part = tf.reduce_sum(res, 1)
-#  print (positive_part)
 
   return -res + row_sums - 1
 #  return -res + tf.log(row_sums) # this is the original softmax
@@ -166,11 +165,11 @@ def attn_cell():
     softmax_w = tf.get_variable(
         "softmax_w", [size, vocab_size], dtype=data_type())
     softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
+    softmax_b = softmax_b - 9.0
 
-    test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b
-    test_softmaxed = tf.nn.softmax(test_logits)
+    test_logits = tf.matmul(cellout_placeholder, tf.transpose(tf.nn.embedding_lookup(tf.transpose(softmax_w), test_word_out[0]))) + softmax_b[test_word_out[0,0]]
 
-    p_word = test_softmaxed[0, test_word_out[0,0]]
+    p_word = test_logits[0, 0]
     test_out = tf.identity(p_word, name="test_out")
 
     if is_training and config.keep_prob < 1:
@@ -263,7 +262,7 @@ class TestConfig(object):
 class SmallConfig(object):
   """Small config."""
   init_scale = 0.1
-  learning_rate = 1.0
+  learning_rate = 1
   max_grad_norm = 5
   num_layers = 2
   num_steps = 20
@@ -335,8 +334,6 @@ def run_epoch(session, model, eval_op=None, verbose=False):
     iters += model.input.num_steps
 
     if verbose and step % (model.input.epoch_size // 10) == 10:
-      print ("cost is ", costs)
-      print ("avg cost is ", costs / iters)
       print("%.3f perplexity: %.3f speed: %.0f wps" %
             (step * 1.0 / model.input.epoch_size, np.exp(costs / iters),
              iters * model.input.batch_size / (time.time() - start_time)))
diff --git a/egs/ami/s5/local/tensorflow/run.sh b/egs/ami/s5/local/tensorflow/run.sh
index 52989a73ca6..b1aa2d06614 100755
--- a/egs/ami/s5/local/tensorflow/run.sh
+++ b/egs/ami/s5/local/tensorflow/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 mic=ihm
 ngram_order=4
-model_type=test
+model_type=small
 stage=1
 weight=0.5
 
@@ -27,7 +27,6 @@ final_lm=ami_fsh.o3g.kn
 LM=$final_lm.pr1-7
 
 if [ $stage -le 3 ]; then
-#  for decode_set in dev; do
   for decode_set in dev eval; do
     basedir=exp/$mic/nnet3/tdnn_sp/
     decode_dir=${basedir}/decode_${decode_set}
diff --git a/egs/ami/s5/local/tensorflow/run_fast.sh b/egs/ami/s5/local/tensorflow/run_fast.sh
index f0d3753ff58..890119a7006 100755
--- a/egs/ami/s5/local/tensorflow/run_fast.sh
+++ b/egs/ami/s5/local/tensorflow/run_fast.sh
@@ -20,12 +20,10 @@ fi
 
 mkdir -p $dir/
 if [ $stage -le 2 ]; then
-  python local/tensorflow/lstm_fast.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
+  $decode_cmd $dir/train.log python local/tensorflow/lstm_fast.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
 #  $decode_cmd $dir/train.log python local/tensorflow/rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
 fi
 
-exit
-
 final_lm=ami_fsh.o3g.kn
 LM=$final_lm.pr1-7
 
@@ -41,7 +39,7 @@ if [ $stage -le 3 ]; then
       --rnnlm-ver tensorflow  --weight $weight --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
       data/$mic/${decode_set}_hires ${decode_dir} \
-      ${decode_dir}.tfrnnlm.lat.${ngram_order}gram.$weight  &
+      ${decode_dir}.fast.tfrnnlm.lat.${ngram_order}gram.$weight  &
 
   done
 fi
diff --git a/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py b/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py
index 2fe11222c73..6e5c72f6adb 100644
--- a/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py
+++ b/egs/ami/s5/local/tensorflow/vanilla_rnnlm.py
@@ -141,7 +141,7 @@ def attn_cell():
     softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
 
     test_logits = tf.matmul(cellout_placeholder, softmax_w) + softmax_b
-    test_softmaxed = tf.nn.softmax(test_logits)
+    test_softmaxed = tf.nn.log_softmax(test_logits)
 
     p_word = test_softmaxed[0, test_word_out[0,0]]
     test_out = tf.identity(p_word, name="test_out")
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.cc b/src/tensorflow/tensorflow-rnnlm-lib.cc
index b1d7ae7eaa1..6c84ded5702 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.cc
+++ b/src/tensorflow/tensorflow-rnnlm-lib.cc
@@ -203,10 +203,16 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
 
   float ans;
   if (word != oos_) {
-    ans = log(outputs[0].scalar<float>()());
+    ans = outputs[0].scalar<float>()();
   } else {
-    ans = log(outputs[0].scalar<float>()() / (num_total_words - num_rnn_words));
+    ans = outputs[0].scalar<float>()() - log (num_total_words - num_rnn_words);
   }
+
+//  if (word != oos_) {
+//    ans = log(outputs[0].scalar<float>()());
+//  } else {
+//    ans = log(outputs[0].scalar<float>()() / (num_total_words - num_rnn_words));
+//  }
 //  std::ostringstream his_str;
 //  for (int i = 0; i < wseq.size(); i++) {
 //    his_str << rnn_label_to_word_[wseq[i]] << "(" << wseq[i] << ") ";
@@ -214,6 +220,7 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
 
 //  KALDI_LOG << "Computing logprob of word " << rnn_label_to_word_[word] << "(" << word << ")"
 //            << " given history " << his_str.str() << " is " << exp(ans);
+//  KALDI_LOG << "prob is " << outputs[0].scalar<float>()();
   return ans;
 }
 

From beeb56c6f3a733e516a330a595ccbbb85d9d7996 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hxu@b02.clsp.jhu.edu>
Date: Thu, 22 Jun 2017 14:29:06 -0400
Subject: [PATCH 26/30] fix small issue

---
 egs/ami/s5/local/tensorflow/lstm_fast.py |  11 +-
 egs/ami/s5/local/tensorflow/run_fast.sh  |   6 +-
 src/tensorflow/tensorflow-rnnlm-lib.cc   | 149 +++++++++++------------
 src/tensorflow/tensorflow-rnnlm-lib.h    |  22 ++--
 src/tfbin/lattice-lmrescore-tf-rnnlm.cc  |   2 +-
 5 files changed, 95 insertions(+), 95 deletions(-)

diff --git a/egs/ami/s5/local/tensorflow/lstm_fast.py b/egs/ami/s5/local/tensorflow/lstm_fast.py
index 836ec6d45b8..e5b7bcc91a2 100644
--- a/egs/ami/s5/local/tensorflow/lstm_fast.py
+++ b/egs/ami/s5/local/tensorflow/lstm_fast.py
@@ -51,17 +51,18 @@
 def data_type():
   return tf.float16 if FLAGS.use_fp16 else tf.float32
 
+# this function does the following:
+# return exp(x) if x < 0
+#        x if x >= 0
 def f(x):
   x1 = tf.minimum(0.0, x)
-
   x2 = tf.maximum(0.0, x)
-
   return tf.exp(x1) + x2
 
 def new_softmax(labels, logits):
-  logits = tf.minimum(logits, 0)
   target = tf.reshape(labels, [-1])
-  f_logits = f(logits)
+  f_logits = tf.exp(logits)
+#  f_logits = f(logits)
   row_sums = tf.reduce_sum(f_logits, 1) # this is the negative part of the objf
 
   t2 = tf.expand_dims(target, 1)
@@ -270,7 +271,7 @@ class SmallConfig(object):
   max_epoch = 4
   max_max_epoch = 13
   keep_prob = 1.0
-  lr_decay = 0.5
+  lr_decay = 0.8
   batch_size = 64
 
 
diff --git a/egs/ami/s5/local/tensorflow/run_fast.sh b/egs/ami/s5/local/tensorflow/run_fast.sh
index 890119a7006..86007258d41 100755
--- a/egs/ami/s5/local/tensorflow/run_fast.sh
+++ b/egs/ami/s5/local/tensorflow/run_fast.sh
@@ -11,7 +11,7 @@ weight=0.5
 
 set -e
 
-dir=data/auto_tensorflow/$model_type
+dir=data/fast_tensorflow/$model_type
 mkdir -p $dir
 
 if [ $stage -le 1 ]; then
@@ -20,8 +20,8 @@ fi
 
 mkdir -p $dir/
 if [ $stage -le 2 ]; then
-  $decode_cmd $dir/train.log python local/tensorflow/lstm_fast.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
-#  $decode_cmd $dir/train.log python local/tensorflow/rnnlm.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
+  python local/tensorflow/lstm_fast.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
+#  $decode_cmd $dir/train.log python local/tensorflow/lstm_fast.py --data_path=$dir --model=$model_type --save_path=$dir/rnnlm --vocab_path=$dir/wordlist.rnn.final
 fi
 
 final_lm=ami_fsh.o3g.kn
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.cc b/src/tensorflow/tensorflow-rnnlm-lib.cc
index 6c84ded5702..4e2d6bc6695 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.cc
+++ b/src/tensorflow/tensorflow-rnnlm-lib.cc
@@ -1,4 +1,5 @@
-// lm/kaldi-rnnlm.cc
+// Copyright 2017 Hainan Xu
+// wrapper for tensorflow rnnlm
 
 #include <utility>
 #include <fstream>
@@ -11,54 +12,52 @@
 #include "util/stl-utils.h"
 #include "util/text-utils.h"
 
-using tensorflow::Status;
-
 namespace kaldi {
+using std::ifstream;
 using tf_rnnlm::KaldiTfRnnlmWrapper;
 using tf_rnnlm::TfRnnlmDeterministicFst;
-using std::ifstream;
+using tensorflow::Status;
 
-KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
-    const KaldiTfRnnlmWrapperOpts &opts,
-    const std::string &rnn_wordlist,
-    const std::string &word_symbol_table_rxfilename,
-    const std::string &unk_prob_rspecifier,
-    const std::string &tf_model_path) {
-  // read the tf model
-  {
-    string graph_path = tf_model_path + ".meta";
+void KaldiTfRnnlmWrapper::ReadTfModel(const std::string &tf_model_path) {
+  string graph_path = tf_model_path + ".meta";
 
-    Status status = tensorflow::NewSession(tensorflow::SessionOptions(), &session_);
-    if (!status.ok()) {
-      KALDI_ERR << status.ToString();
-    }
+  Status status = tensorflow::NewSession(tensorflow::SessionOptions(), &session_);
+  if (!status.ok()) {
+    KALDI_ERR << status.ToString();
+  }
 
-    tensorflow::MetaGraphDef graph_def;
-    status = tensorflow::ReadBinaryProto(tensorflow::Env::Default(), graph_path, &graph_def);
-    if (!status.ok()) {
-      KALDI_ERR << status.ToString();
-    }
+  tensorflow::MetaGraphDef graph_def;
+  status = tensorflow::ReadBinaryProto(tensorflow::Env::Default(), graph_path, &graph_def);
+  if (!status.ok()) {
+    KALDI_ERR << status.ToString();
+  }
 
-    // Add the graph to the session
-    status = session_->Create(graph_def.graph_def());
-    if (!status.ok()) {
-      KALDI_ERR << status.ToString();
-    }
+  // Add the graph to the session
+  status = session_->Create(graph_def.graph_def());
+  if (!status.ok()) {
+    KALDI_ERR << status.ToString();
+  }
 
-    Tensor checkpointPathTensor(tensorflow::DT_STRING, tensorflow::TensorShape());
-    checkpointPathTensor.scalar<std::string>()() = tf_model_path;
-    
-    status = session_->Run(
-              {{ graph_def.saver_def().filename_tensor_name(), checkpointPathTensor },},
-              {},
-              {graph_def.saver_def().restore_op_name()},
-              nullptr);
-    if (!status.ok()) {
-      KALDI_ERR << status.ToString();
-    }
+  Tensor checkpointPathTensor(tensorflow::DT_STRING, tensorflow::TensorShape());
+  checkpointPathTensor.scalar<std::string>()() = tf_model_path;
+  
+  status = session_->Run(
+            {{ graph_def.saver_def().filename_tensor_name(), checkpointPathTensor },},
+            {},
+            {graph_def.saver_def().restore_op_name()},
+            nullptr);
+  if (!status.ok()) {
+    KALDI_ERR << status.ToString();
   }
+}
 
-//  GetInitialContext(&initial_context_);
+KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
+    const KaldiTfRnnlmWrapperOpts &opts,
+    const std::string &rnn_wordlist,
+    const std::string &word_symbol_table_rxfilename,
+    const std::string &unk_prob_rspecifier,
+    const std::string &tf_model_path): opts_(opts) {
+  ReadTfModel(tf_model_path);
 
   fst::SymbolTable *fst_word_symbols = NULL;
   if (!(fst_word_symbols =
@@ -79,9 +78,9 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
   }
 
   fst_label_to_rnn_label_.resize(fst_word_symbols->NumSymbols(), -1);
-
   num_total_words = fst_word_symbols->NumSymbols();
 
+  // read rnn wordlist and then generate ngram-label-to-rnn-label map
   oos_ = -1;
   { // input.
     ifstream ifile(rnn_wordlist.c_str());
@@ -94,9 +93,12 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
 
       int fst_label = fst_word_symbols->Find(word);
       if (fst::SymbolTable::kNoSymbol == fst_label) {
-        if (id == eos_) continue;
-
-        KALDI_ASSERT(word == "<oos>" && oos_ == -1);
+        if (id == eos_) {
+          KALDI_ASSERT(word == opts_.eos_symbol);
+          continue;
+        }
+//        KALDI_LOG << word << " " << opts_.unk_symbol << " " << oos_;
+        KALDI_ASSERT(word == opts_.unk_symbol && oos_ == -1);
         oos_ = id;
         continue;
       }
@@ -107,9 +109,9 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
   if (fst_label_to_word_.size() > rnn_label_to_word_.size()) {
     KALDI_ASSERT(oos_ != -1);
   }
-//  rnn_label_to_word_.push_back("<OOS>");
   num_rnn_words = rnn_label_to_word_.size();
   
+  // we must have a oos symbol in the wordlist
   if (oos_ == -1) {
     return;
   }
@@ -119,34 +121,36 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
     }
   }
 
+  AcquireInitialTensors();
+}
+
+void KaldiTfRnnlmWrapper::AcquireInitialTensors() {
+  Status status;
+  // get the initial context
   {
-    Status status;
-    // get the initial context
-    {
-      std::vector<Tensor> state;
-      status = session_->Run(std::vector<std::pair<string, tensorflow::Tensor>>(), {"Train/Model/test_initial_state"}, {}, &state);
-      if (!status.ok()) {
-        KALDI_ERR << status.ToString();
-      }
-      initial_context_ = state[0];
+    std::vector<Tensor> state;
+    status = session_->Run(std::vector<std::pair<string, tensorflow::Tensor>>(), {"Train/Model/test_initial_state"}, {}, &state);
+    if (!status.ok()) {
+      KALDI_ERR << status.ToString();
     }
+    initial_context_ = state[0];
+  }
 
-    {
-      std::vector<Tensor> state;
-      Tensor bosword(tensorflow::DT_INT32, {1, 1});
-      bosword.scalar<int32>()() = eos_; // eos_ is more like a sentence boundary
+  {
+    std::vector<Tensor> state;
+    Tensor bosword(tensorflow::DT_INT32, {1, 1});
+    bosword.scalar<int32>()() = eos_; // eos_ is more like a sentence boundary
 
-      std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
-        {"Train/Model/test_word_in", bosword},
-        {"Train/Model/test_state_in", initial_context_},
-      };
+    std::vector<std::pair<string, tensorflow::Tensor>> inputs = {
+      {"Train/Model/test_word_in", bosword},
+      {"Train/Model/test_state_in", initial_context_},
+    };
 
-      status = session_->Run(inputs, {"Train/Model/test_cell_out"}, {}, &state);
-      if (!status.ok()) {
-        KALDI_ERR << status.ToString();
-      }
-      initial_cell_ = state[0];
+    status = session_->Run(inputs, {"Train/Model/test_cell_out"}, {}, &state);
+    if (!status.ok()) {
+      KALDI_ERR << status.ToString();
     }
+    initial_cell_ = state[0];
   }
 }
 
@@ -171,11 +175,9 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
       {"Train/Model/test_word_out", thisword},
       {"Train/Model/test_state_in", context_in},
       {"Train/Model/test_cell_in", cell_in},
-//      {"Train/Model/test_cell_in", cell_in},
     };
 
     // The session will initialize the outputs
-
     // Run the session, evaluating our "c" operation from the graph
     Status status = session_->Run(inputs,
         {"Train/Model/test_out",
@@ -208,16 +210,6 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
     ans = outputs[0].scalar<float>()() - log (num_total_words - num_rnn_words);
   }
 
-//  if (word != oos_) {
-//    ans = log(outputs[0].scalar<float>()());
-//  } else {
-//    ans = log(outputs[0].scalar<float>()() / (num_total_words - num_rnn_words));
-//  }
-//  std::ostringstream his_str;
-//  for (int i = 0; i < wseq.size(); i++) {
-//    his_str << rnn_label_to_word_[wseq[i]] << "(" << wseq[i] << ") ";
-//  }
-
 //  KALDI_LOG << "Computing logprob of word " << rnn_label_to_word_[word] << "(" << word << ")"
 //            << " given history " << his_str.str() << " is " << exp(ans);
 //  KALDI_LOG << "prob is " << outputs[0].scalar<float>()();
@@ -240,7 +232,6 @@ TfRnnlmDeterministicFst::TfRnnlmDeterministicFst(int32 max_ngram_order,
 
   // Uses empty history for <s>.
   std::vector<Label> bos;
-//  std::vector<float> bos_context(rnnlm->GetHiddenLayerSize(), 1.0);
 
   const Tensor& initial_context = rnnlm_->GetInitialContext();
   const Tensor& initial_cell = rnnlm_->GetInitialCell();
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.h b/src/tensorflow/tensorflow-rnnlm-lib.h
index 5af2ee37a70..d35dc88273e 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.h
+++ b/src/tensorflow/tensorflow-rnnlm-lib.h
@@ -23,7 +23,7 @@ struct KaldiTfRnnlmWrapperOpts {
   std::string unk_symbol;
   std::string eos_symbol;
 
-  KaldiTfRnnlmWrapperOpts() : unk_symbol("<RNN_UNK>"), eos_symbol("</s>") {}
+  KaldiTfRnnlmWrapperOpts() : unk_symbol("<oos>"), eos_symbol("</s>") {}
 
   void Register(OptionsItf *opts) {
     opts->Register("unk-symbol", &unk_symbol, "Symbol for out-of-vocabulary "
@@ -40,7 +40,6 @@ class KaldiTfRnnlmWrapper {
                     const std::string &word_symbol_table_rxfilename,
                     const std::string &unk_prob_rspecifier,
                     const std::string &tf_model_path);
-//                    Session* session);
 
   ~KaldiTfRnnlmWrapper() {
     session_->Close();
@@ -48,18 +47,21 @@ class KaldiTfRnnlmWrapper {
 
   int32 GetEos() const { return eos_; }
 
+  // get an all-zero Tensor of the size that matches the hidden state of the TF model
   const Tensor& GetInitialContext() const;
+
+  // get the 2nd-to-last layer of RNN when feeding input of
+  // (initial-context, sentence-boundary)
   const Tensor& GetInitialCell() const;
 
   // compute p(word | wseq) and return the log of that
   // the computation used the input cell,
   // which is the 2nd-to-last layer of the RNNLM associated with history wseq;
   //
-  // and we generate (context_out, new_cell) by passing (context_in, word) into the nnet
+  // and we generate (context_out, new_cell) by passing (context_in, word) into the model
   BaseFloat GetLogProb(int32 word,
-///                       const std::vector<int32> &wseq,
-                       const Tensor &context_in,
-                       const Tensor &cell_in,
+                       const Tensor &context_in, // context to pass into RNN
+                       const Tensor &cell_in,  // 2nd-to-last layer
                        Tensor *context_out,
                        Tensor *new_cell);
 
@@ -67,12 +69,18 @@ class KaldiTfRnnlmWrapper {
   std::vector<std::string> rnn_label_to_word_;
   std::vector<std::string> fst_label_to_word_;
  private:
+  void ReadTfModel(const std::string &tf_model_path);
+
+  // do queries on the session to get the initial tensors (cell + context)
+  void AcquireInitialTensors();
+
+  KaldiTfRnnlmWrapperOpts opts_;
   Tensor initial_context_;
   Tensor initial_cell_;
   int32 num_total_words;
   int32 num_rnn_words;
 
-  Session* session_;  // ptf owned here
+  Session* session_;  // owned here
   int32 eos_;
   int32 oos_;
 
diff --git a/src/tfbin/lattice-lmrescore-tf-rnnlm.cc b/src/tfbin/lattice-lmrescore-tf-rnnlm.cc
index 0278759151f..171654f7efb 100644
--- a/src/tfbin/lattice-lmrescore-tf-rnnlm.cc
+++ b/src/tfbin/lattice-lmrescore-tf-rnnlm.cc
@@ -38,7 +38,7 @@ int main(int argc, char *argv[]) {
         "composing with the wrapped LM using a special type of composition\n"
         "algorithm. Determinization will be applied on the composed lattice.\n"
         "\n"
-        "Usage: lattice-lmrescore-rnnlm [options] <rnnlm-wordlist> \\\n"
+        "Usage: lattice-lmrescore-tf-rnnlm [options] <rnnlm-wordlist> \\\n"
         "             <word-symbol-table-rxfilename> <lattice-rspecifier> \\\n"
         "             <rnnlm-rxfilename> <lattice-wspecifier>\n"
         " e.g.: lattice-lmrescore-rnnlm --lm-scale=-1.0 words.txt \\\n"

From 023f2baf71f2541415b9a0c7365906be023f5ab0 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hxu@b02.clsp.jhu.edu>
Date: Fri, 23 Jun 2017 14:28:59 -0400
Subject: [PATCH 27/30] add better handling of OOS words

---
 egs/ami/s5/local/tensorflow/prep_data.sh |  2 +-
 egs/ami/s5/local/tensorflow/run_fast.sh  |  6 +++--
 egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh  |  2 +-
 src/tensorflow/tensorflow-rnnlm-lib.cc   | 34 ++++++++++++++++++++++--
 src/tensorflow/tensorflow-rnnlm-lib.h    |  5 +++-
 src/tfbin/lattice-lmrescore-tf-rnnlm.cc  | 28 +++++++++++--------
 6 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/egs/ami/s5/local/tensorflow/prep_data.sh b/egs/ami/s5/local/tensorflow/prep_data.sh
index a763aaf15bd..49825781c7c 100755
--- a/egs/ami/s5/local/tensorflow/prep_data.sh
+++ b/egs/ami/s5/local/tensorflow/prep_data.sh
@@ -53,7 +53,7 @@ done
 
 # OK we'll train the RNNLM on this data.
 
-touch $dir/unk.probs  # dummy file, not used for cued-rnnlm
+cat $dir/unk_class.counts | awk '{print $2, $1}' > $dir/unk.probs  # dummy file, not used for cued-rnnlm
 
 cp $dir/wordlist.rnn $dir/wordlist.rnn.final
 
diff --git a/egs/ami/s5/local/tensorflow/run_fast.sh b/egs/ami/s5/local/tensorflow/run_fast.sh
index 86007258d41..629a7e064fc 100755
--- a/egs/ami/s5/local/tensorflow/run_fast.sh
+++ b/egs/ami/s5/local/tensorflow/run_fast.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 mic=ihm
-ngram_order=4
+ngram_order=3
 model_type=small
 stage=1
 weight=0.5
@@ -27,6 +27,7 @@ fi
 final_lm=ami_fsh.o3g.kn
 LM=$final_lm.pr1-7
 
+date
 if [ $stage -le 3 ]; then
 #  for decode_set in dev; do
   for decode_set in dev eval; do
@@ -39,9 +40,10 @@ if [ $stage -le 3 ]; then
       --rnnlm-ver tensorflow  --weight $weight --max-ngram-order $ngram_order \
       data/lang_$LM $dir \
       data/$mic/${decode_set}_hires ${decode_dir} \
-      ${decode_dir}.fast.tfrnnlm.lat.${ngram_order}gram.$weight  &
+      ${decode_dir}.unk.fast.tfrnnlm.lat.${ngram_order}gram.$weight  &
 
   done
 fi
 
 wait
+date
diff --git a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
index ceac46b5eb9..d3e6ca73dd4 100755
--- a/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
+++ b/egs/wsj/s5/steps/lmrescore_rnnlm_lat.sh
@@ -58,7 +58,7 @@ fi
 
 if [ "$rnnlm_ver" == "tensorflow" ]; then
   rescoring_binary="lattice-lmrescore-tf-rnnlm"
-  first_arg=$rnnlm_dir/wordlist.rnn.final
+  first_arg="$first_arg $rnnlm_dir/wordlist.rnn.final"
 fi
 
 oldlm=$oldlang/G.fst
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.cc b/src/tensorflow/tensorflow-rnnlm-lib.cc
index 4e2d6bc6695..7513b9207a7 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.cc
+++ b/src/tensorflow/tensorflow-rnnlm-lib.cc
@@ -18,6 +18,28 @@ using tf_rnnlm::KaldiTfRnnlmWrapper;
 using tf_rnnlm::TfRnnlmDeterministicFst;
 using tensorflow::Status;
 
+void SetUnkPenalties(const string &filename, const fst::SymbolTable& fst_word_symbols,
+                     std::vector<float> *out) {
+  if (filename == "")
+    return;
+  out->resize(fst_word_symbols.NumSymbols(), 0);  // default is 0
+  ifstream ifile(filename.c_str());
+  string word;
+  float count, total_count = 0;
+  while (ifile >> word >> count) {
+    int id = fst_word_symbols.Find(word);
+    KALDI_ASSERT(id != fst::SymbolTable::kNoSymbol);
+    (*out)[id] = count;
+    total_count += count;
+  }
+
+  for (int i = 0; i < out->size(); i++) {
+    if ((*out)[i] != 0) {
+      (*out)[i] = log ((*out)[i] / total_count);
+    }
+  }
+}
+
 void KaldiTfRnnlmWrapper::ReadTfModel(const std::string &tf_model_path) {
   string graph_path = tf_model_path + ".meta";
 
@@ -55,7 +77,7 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
     const KaldiTfRnnlmWrapperOpts &opts,
     const std::string &rnn_wordlist,
     const std::string &word_symbol_table_rxfilename,
-    const std::string &unk_prob_rspecifier,
+    const std::string &unk_prob_file,
     const std::string &tf_model_path): opts_(opts) {
   ReadTfModel(tf_model_path);
 
@@ -122,6 +144,7 @@ KaldiTfRnnlmWrapper::KaldiTfRnnlmWrapper(
   }
 
   AcquireInitialTensors();
+  SetUnkPenalties(unk_prob_file, *fst_word_symbols, &unk_probs_);
 }
 
 void KaldiTfRnnlmWrapper::AcquireInitialTensors() {
@@ -156,6 +179,7 @@ void KaldiTfRnnlmWrapper::AcquireInitialTensors() {
 
 BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
     int32 word,
+    int32 fst_word,
 //    const std::vector<int32> &wseq,
     const Tensor &context_in,
     const Tensor &cell_in,
@@ -207,7 +231,11 @@ BaseFloat KaldiTfRnnlmWrapper::GetLogProb(
   if (word != oos_) {
     ans = outputs[0].scalar<float>()();
   } else {
-    ans = outputs[0].scalar<float>()() - log (num_total_words - num_rnn_words);
+    if (unk_probs_.size() == 0) {
+      ans = outputs[0].scalar<float>()() - log (num_total_words - num_rnn_words);
+    } else {
+      ans = outputs[0].scalar<float>()() + unk_probs_[fst_word];
+    } 
   }
 
 //  KALDI_LOG << "Computing logprob of word " << rnn_label_to_word_[word] << "(" << word << ")"
@@ -249,6 +277,7 @@ fst::StdArc::Weight TfRnnlmDeterministicFst::Final(StateId s) {
 
   std::vector<Label> wseq = state_to_wseq_[s];
   BaseFloat logprob = rnnlm_->GetLogProb(rnnlm_->GetEos(), // wseq,
+                                         -1,
                                          state_to_context_[s], state_to_cell_[s],
                                          NULL, NULL);
   return Weight(-logprob);
@@ -265,6 +294,7 @@ bool TfRnnlmDeterministicFst::GetArc(StateId s, Label ilabel, fst::StdArc *oarc)
 
   int32 rnn_word = rnnlm_->fst_label_to_rnn_label_[ilabel];
   BaseFloat logprob = rnnlm_->GetLogProb(rnn_word, // wseq,
+                                         ilabel,
                                          state_to_context_[s],
                                          state_to_cell_[s],
                                          &new_context,
diff --git a/src/tensorflow/tensorflow-rnnlm-lib.h b/src/tensorflow/tensorflow-rnnlm-lib.h
index d35dc88273e..58a73981992 100644
--- a/src/tensorflow/tensorflow-rnnlm-lib.h
+++ b/src/tensorflow/tensorflow-rnnlm-lib.h
@@ -38,7 +38,7 @@ class KaldiTfRnnlmWrapper {
   KaldiTfRnnlmWrapper(const KaldiTfRnnlmWrapperOpts &opts,
                     const std::string &rnn_wordlist,
                     const std::string &word_symbol_table_rxfilename,
-                    const std::string &unk_prob_rspecifier,
+                    const std::string &unk_prob_file,
                     const std::string &tf_model_path);
 
   ~KaldiTfRnnlmWrapper() {
@@ -60,6 +60,7 @@ class KaldiTfRnnlmWrapper {
   //
   // and we generate (context_out, new_cell) by passing (context_in, word) into the model
   BaseFloat GetLogProb(int32 word,
+                       int32 fst_word,
                        const Tensor &context_in, // context to pass into RNN
                        const Tensor &cell_in,  // 2nd-to-last layer
                        Tensor *context_out,
@@ -84,6 +85,8 @@ class KaldiTfRnnlmWrapper {
   int32 eos_;
   int32 oos_;
 
+  std::vector<float> unk_probs_;
+
   KALDI_DISALLOW_COPY_AND_ASSIGN(KaldiTfRnnlmWrapper);
 };
 
diff --git a/src/tfbin/lattice-lmrescore-tf-rnnlm.cc b/src/tfbin/lattice-lmrescore-tf-rnnlm.cc
index 171654f7efb..5574a01c063 100644
--- a/src/tfbin/lattice-lmrescore-tf-rnnlm.cc
+++ b/src/tfbin/lattice-lmrescore-tf-rnnlm.cc
@@ -59,24 +59,30 @@ int main(int argc, char *argv[]) {
 
     po.Read(argc, argv);
 
-    if (po.NumArgs() != 4 && po.NumArgs() != 5) {
+    if (po.NumArgs() != 6 && po.NumArgs() != 5) {
       po.PrintUsage();
       exit(1);
     }
 
     std::string lats_rspecifier, rnn_word_list,
-        word_symbols_rxfilename, rnnlm_rxfilename, lats_wspecifier;
-    KALDI_ASSERT (po.NumArgs() == 5);
-
-    rnn_word_list = po.GetArg(1);
-    word_symbols_rxfilename = po.GetArg(2);
-    lats_rspecifier = po.GetArg(3);
-    rnnlm_rxfilename = po.GetArg(4);
-    lats_wspecifier = po.GetArg(5);
-
+        word_symbols_rxfilename, rnnlm_rxfilename, lats_wspecifier, unk_prob_file;
+    if (po.NumArgs() == 5) {
+      rnn_word_list = po.GetArg(1);
+      word_symbols_rxfilename = po.GetArg(2);
+      lats_rspecifier = po.GetArg(3);
+      rnnlm_rxfilename = po.GetArg(4);
+      lats_wspecifier = po.GetArg(5);
+    } else {
+      unk_prob_file = po.GetArg(1);
+      rnn_word_list = po.GetArg(2);
+      word_symbols_rxfilename = po.GetArg(3);
+      lats_rspecifier = po.GetArg(4);
+      rnnlm_rxfilename = po.GetArg(5);
+      lats_wspecifier = po.GetArg(6);
+    }
     // Reads the language model.
     KaldiTfRnnlmWrapper rnnlm(opts, rnn_word_list, word_symbols_rxfilename,
-                                "", rnnlm_rxfilename);
+                                unk_prob_file, rnnlm_rxfilename);
 
     // Reads and writes as compact lattice.
     SequentialCompactLatticeReader compact_lattice_reader(lats_rspecifier);

From 9053aa1deed5659a3fd21c7aaeb945e54f55c190 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainanx@cltdell01gpu.clt.spoken.com>
Date: Fri, 23 Jun 2017 17:41:50 -0400
Subject: [PATCH 28/30] small change in install_tensorflow script

---
 src/tfbin/Makefile          |  2 +-
 tools/install_tensorflow.sh | 10 ++++------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/tfbin/Makefile b/src/tfbin/Makefile
index a105f9830e3..0c75db0e5cc 100644
--- a/src/tfbin/Makefile
+++ b/src/tfbin/Makefile
@@ -19,7 +19,7 @@ ADDLIBS = ../lat/kaldi-lat.a ../lm/kaldi-lm.a ../fstext/kaldi-fstext.a \
           ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \
           ../thread/kaldi-thread.a ../matrix/kaldi-matrix.a \
           ../base/kaldi-base.a ../tensorflow/kaldi-tensorflow-rnnlm.a \
-          $(CURDIR)/../../tools/tensorflow/bazel-bin/tensorflow/tensorflow_cc.so
+          $(CURDIR)/../../tools/tensorflow/bazel-bin/tensorflow/libtensorflow_cc.so
 
 LDLIBS +=  -lz -ldl -fPIC -lrt
 LDLIBS += $(OTHERLIBS) -L$(TENSORFLOW)/bazel-bin/tensorflow -ltensorflow_cc
diff --git a/tools/install_tensorflow.sh b/tools/install_tensorflow.sh
index 28d97e65567..f96cb48df0d 100755
--- a/tools/install_tensorflow.sh
+++ b/tools/install_tensorflow.sh
@@ -2,10 +2,8 @@
 
 set -e
 
-export HOME=/export/b02/hxu
-export JAVA_HOME=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121
-export PATH=/export/b02/hxu/TensorFlow/java/jdk1.8.0_121/bin/:$PATH
-export PATH=$PWD/bazel/output/:$PATH
+export HOME=$PWD/tensorflow_build/
+mkdir -p $HOME
 
 [ ! -f bazel.zip ] && wget https://github.com/bazelbuild/bazel/releases/download/0.5.1/bazel-0.5.1-dist.zip -O bazel.zip
 mkdir -p bazel
@@ -14,11 +12,11 @@ unzip ../bazel.zip
 ./compile.sh
 cd ../
 
-## now bazel is built
+# now bazel is built
 git clone https://github.com/tensorflow/tensorflow
 cd tensorflow
 ./configure
 
 tensorflow/contrib/makefile/download_dependencies.sh 
 bazel build --copt=-msse4.2 //tensorflow:libtensorflow.so
-#bazel build //tensorflow:libtensorflow_cc.so
+bazel build --copt=-msse4.2 //tensorflow:libtensorflow_cc.so

From 77f5d71f367bff6270f2ef9778c7613f84a064b3 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainanx@cltdell01gpu.clt.spoken.com>
Date: Wed, 5 Jul 2017 13:04:26 -0400
Subject: [PATCH 29/30] add install python tf script

---
 ...stall_tensorflow.sh => install_tensorflow_cc.sh} |  1 +
 tools/install_tensorflow_py.sh                      | 13 +++++++++++++
 2 files changed, 14 insertions(+)
 rename tools/{install_tensorflow.sh => install_tensorflow_cc.sh} (93%)
 create mode 100644 tools/install_tensorflow_py.sh

diff --git a/tools/install_tensorflow.sh b/tools/install_tensorflow_cc.sh
similarity index 93%
rename from tools/install_tensorflow.sh
rename to tools/install_tensorflow_cc.sh
index f96cb48df0d..d0e454a8b89 100755
--- a/tools/install_tensorflow.sh
+++ b/tools/install_tensorflow_cc.sh
@@ -2,6 +2,7 @@
 
 set -e
 
+# first part: for compiling with kaldi
 export HOME=$PWD/tensorflow_build/
 mkdir -p $HOME
 
diff --git a/tools/install_tensorflow_py.sh b/tools/install_tensorflow_py.sh
new file mode 100644
index 00000000000..e5877b32b5f
--- /dev/null
+++ b/tools/install_tensorflow_py.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+export HOME=$PWD/tensorflow_build/
+
+has_gpu=false
+
+tf_source=https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.2.0-cp27-none-linux_x86_64.whl
+
+if [ $has_gpu != "true" ]; then
+  tf_source=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.2.0-cp27-none-linux_x86_64.whl
+fi
+
+pip install --user $tf_source

From 70f349f1f0fcc4e08097d6fbf7d4c8b5034abe73 Mon Sep 17 00:00:00 2001
From: Hainan Xu <hainanx@cltdell01gpu.clt.spoken.com>
Date: Wed, 5 Jul 2017 13:16:13 -0400
Subject: [PATCH 30/30] make install script chmox+x

---
 tools/install_tensorflow_py.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 tools/install_tensorflow_py.sh

diff --git a/tools/install_tensorflow_py.sh b/tools/install_tensorflow_py.sh
old mode 100644
new mode 100755