diff --git a/src/nnetbin/nnet-train-frmshuff.cc b/src/nnetbin/nnet-train-frmshuff.cc index 5b7a6af6f60..cc50e33ea42 100644 --- a/src/nnetbin/nnet-train-frmshuff.cc +++ b/src/nnetbin/nnet-train-frmshuff.cc @@ -159,7 +159,7 @@ int main(int argc, char *argv[]) { CuMatrix feats_transf, nnet_out, obj_diff; - Timer time; + Timer time, time_io; KALDI_LOG << (crossvalidate ? "CROSS-VALIDATION" : "TRAINING") << " STARTED"; @@ -167,6 +167,8 @@ int main(int argc, char *argv[]) { num_no_tgt_mat = 0, num_other_error = 0; + double time_io_accu = 0.0; + // main loop, while (!feature_reader.Done()) { #if HAVE_CUDA == 1 @@ -174,6 +176,7 @@ int main(int argc, char *argv[]) { CuDevice::Instantiate().CheckGpuHealth(); #endif // fill the randomizer, + time_io.Reset(); for ( ; !feature_reader.Done(); feature_reader.Next()) { if (feature_randomizer.IsFull()) { // break the loop without calling Next(), @@ -219,6 +222,10 @@ int main(int argc, char *argv[]) { weights.Scale(w); } + // accumulate the I/O time, + time_io_accu += time_io.Elapsed(); + time_io.Reset(); // to be sure we don't count 2x, + // skip too long utterances (or we run out of memory), if (mat.NumRows() > max_frames) { KALDI_WARN << "Utterance too long, skipping! " << utt @@ -299,13 +306,7 @@ int main(int argc, char *argv[]) { weights_randomizer.AddData(weights); num_done++; - // report the speed, - if (num_done % 5000 == 0) { - double time_now = time.Elapsed(); - KALDI_VLOG(1) << "After " << num_done << " utterances: " - << "time elapsed = " << time_now / 60 << " min; " - << "processed " << total_frames / time_now << " frames per sec."; - } + time_io.Reset(); // reset before reading next feature matrix, } // randomize, @@ -350,11 +351,11 @@ int main(int argc, char *argv[]) { // 1st mini-batch : show what happens in network, if (total_frames == 0) { - KALDI_VLOG(1) << "### After " << total_frames << " frames,"; - KALDI_VLOG(1) << nnet.InfoPropagate(); + KALDI_LOG << "### After " << total_frames << " frames,"; + KALDI_LOG << nnet.InfoPropagate(); if (!crossvalidate) { - KALDI_VLOG(1) << nnet.InfoBackPropagate(); - KALDI_VLOG(1) << nnet.InfoGradient(); + KALDI_LOG << nnet.InfoBackPropagate(); + KALDI_LOG << nnet.InfoGradient(); } } @@ -380,11 +381,11 @@ int main(int argc, char *argv[]) { } // main loop, // after last mini-batch : show what happens in network, - KALDI_VLOG(1) << "### After " << total_frames << " frames,"; - KALDI_VLOG(1) << nnet.InfoPropagate(); + KALDI_LOG << "### After " << total_frames << " frames,"; + KALDI_LOG << nnet.InfoPropagate(); if (!crossvalidate) { - KALDI_VLOG(1) << nnet.InfoBackPropagate(); - KALDI_VLOG(1) << nnet.InfoGradient(); + KALDI_LOG << nnet.InfoBackPropagate(); + KALDI_LOG << nnet.InfoGradient(); } if (!crossvalidate) { @@ -397,7 +398,8 @@ int main(int argc, char *argv[]) { << "[" << (crossvalidate ? "CROSS-VALIDATION" : "TRAINING") << ", " << (randomize ? "RANDOMIZED" : "NOT-RANDOMIZED") << ", " << time.Elapsed() / 60 << " min, processing " - << total_frames / time.Elapsed() << " frames per sec.]"; + << total_frames / time.Elapsed() << " frames per sec;" + << " i/o time " << 100.*time_io_accu/time.Elapsed() << "%]"; if (objective_function == "xent") { KALDI_LOG << xent.ReportPerClass(); diff --git a/src/nnetbin/nnet-train-multistream-perutt.cc b/src/nnetbin/nnet-train-multistream-perutt.cc index 7b68dee05e5..3694cf29e01 100644 --- a/src/nnetbin/nnet-train-multistream-perutt.cc +++ b/src/nnetbin/nnet-train-multistream-perutt.cc @@ -282,7 +282,6 @@ int main(int argc, char *argv[]) { os << frame_num_utt[i] << " "; } os << "]"; - KALDI_LOG << "frame_num_utt[" << frame_num_utt.size() << "]" << os.str(); } // Reset all the streams (we have new sentences), @@ -301,31 +300,20 @@ int main(int argc, char *argv[]) { // 1st model update : show what happens in network, if (total_frames == 0) { - KALDI_VLOG(1) << "### After " << total_frames << " frames,"; - KALDI_VLOG(1) << nnet.Info(); - KALDI_VLOG(1) << nnet.InfoPropagate(); + KALDI_LOG << "### After " << total_frames << " frames,"; + KALDI_LOG << nnet.Info(); + KALDI_LOG << nnet.InfoPropagate(); if (!crossvalidate) { - KALDI_VLOG(1) << nnet.InfoBackPropagate(); - KALDI_VLOG(1) << nnet.InfoGradient(); + KALDI_LOG << nnet.InfoBackPropagate(); + KALDI_LOG << nnet.InfoGradient(); } } - int32 tmp_done = num_done; kaldi::int64 tmp_frames = total_frames; num_done += frame_num_utt.size(); total_frames += std::accumulate(frame_num_utt.begin(), frame_num_utt.end(), 0); - // report the speed, - int32 N = 5000; - if (tmp_done / N != num_done / N) { - double time_now = time.Elapsed(); - KALDI_VLOG(1) << "After " << num_done << " utterances, " - << "(" << total_frames/360000.0 << "h), " - << "time elapsed = " << time_now / 60 << " min; " - << "processed " << total_frames / time_now << " frames per sec."; - } - // monitor the NN training (--verbose=2), int32 F = 25000; if (GetVerboseLevel() >= 3) { @@ -343,14 +331,12 @@ int main(int argc, char *argv[]) { } // after last model update : show what happens in network, - if (GetVerboseLevel() >= 1) { // vlog-1 - KALDI_VLOG(1) << "### After " << total_frames << " frames,"; - KALDI_VLOG(1) << nnet.Info(); - KALDI_VLOG(1) << nnet.InfoPropagate(); - if (!crossvalidate) { - KALDI_VLOG(1) << nnet.InfoBackPropagate(); - KALDI_VLOG(1) << nnet.InfoGradient(); - } + KALDI_LOG << "### After " << total_frames << " frames,"; + KALDI_LOG << nnet.Info(); + KALDI_LOG << nnet.InfoPropagate(); + if (!crossvalidate) { + KALDI_LOG << nnet.InfoBackPropagate(); + KALDI_LOG << nnet.InfoGradient(); } if (!crossvalidate) { diff --git a/src/nnetbin/nnet-train-multistream.cc b/src/nnetbin/nnet-train-multistream.cc index 5607072aa94..133c49e02a5 100644 --- a/src/nnetbin/nnet-train-multistream.cc +++ b/src/nnetbin/nnet-train-multistream.cc @@ -360,7 +360,6 @@ int main(int argc, char *argv[]) { os << frame_num_utt[i] << " "; } os << "]"; - KALDI_LOG << "frame_num_utt[" << frame_num_utt.size() << "]" << os.str(); } @@ -387,31 +386,20 @@ int main(int argc, char *argv[]) { // 1st minibatch : show what happens in network, if (total_frames == 0) { - KALDI_VLOG(1) << "### After " << total_frames << " frames,"; - KALDI_VLOG(1) << nnet.Info(); - KALDI_VLOG(1) << nnet.InfoPropagate(); + KALDI_LOG << "### After " << total_frames << " frames,"; + KALDI_LOG << nnet.Info(); + KALDI_LOG << nnet.InfoPropagate(); if (!crossvalidate) { - KALDI_VLOG(1) << nnet.InfoBackPropagate(); - KALDI_VLOG(1) << nnet.InfoGradient(); + KALDI_LOG << nnet.InfoBackPropagate(); + KALDI_LOG << nnet.InfoGradient(); } } - int32 tmp_done = num_done; kaldi::int64 tmp_frames = total_frames; num_done += std::accumulate(new_utt_flags.begin(), new_utt_flags.end(), 0); total_frames += std::accumulate(frame_num_utt.begin(), frame_num_utt.end(), 0); - // report the speed, - int32 N = 5000; - if (tmp_done / N != num_done / N) { - double time_now = time.Elapsed(); - KALDI_VLOG(1) << "After " << num_done << " utterances, " - << "(" << total_frames/360000.0 << "h), " - << "time elapsed = " << time_now / 60 << " min; " - << "processed " << total_frames / time_now << " frames per sec."; - } - // monitor the NN training (--verbose=2), int32 F = 25000; if (GetVerboseLevel() >= 2) { @@ -429,12 +417,12 @@ int main(int argc, char *argv[]) { } // after last minibatch : show what happens in network, - KALDI_VLOG(1) << "### After " << total_frames << " frames,"; - KALDI_VLOG(1) << nnet.Info(); - KALDI_VLOG(1) << nnet.InfoPropagate(); + KALDI_LOG << "### After " << total_frames << " frames,"; + KALDI_LOG << nnet.Info(); + KALDI_LOG << nnet.InfoPropagate(); if (!crossvalidate) { - KALDI_VLOG(1) << nnet.InfoBackPropagate(); - KALDI_VLOG(1) << nnet.InfoGradient(); + KALDI_LOG << nnet.InfoBackPropagate(); + KALDI_LOG << nnet.InfoGradient(); } if (!crossvalidate) { diff --git a/src/nnetbin/nnet-train-perutt.cc b/src/nnetbin/nnet-train-perutt.cc index 90e681d0f0b..8f417e1b608 100644 --- a/src/nnetbin/nnet-train-perutt.cc +++ b/src/nnetbin/nnet-train-perutt.cc @@ -238,11 +238,11 @@ int main(int argc, char *argv[]) { // 1st minibatch : show what happens in network, if (total_frames == 0) { - KALDI_VLOG(1) << "### After " << total_frames << " frames,"; - KALDI_VLOG(1) << nnet.InfoPropagate(); + KALDI_LOG << "### After " << total_frames << " frames,"; + KALDI_LOG << nnet.InfoPropagate(); if (!crossvalidate) { - KALDI_VLOG(1) << nnet.InfoBackPropagate(); - KALDI_VLOG(1) << nnet.InfoGradient(); + KALDI_LOG << nnet.InfoBackPropagate(); + KALDI_LOG << nnet.InfoGradient(); } } @@ -265,27 +265,14 @@ int main(int argc, char *argv[]) { num_done++; total_frames += frm_weights.Sum(); - - // do this every 5000 utterances, - if (num_done % 5000 == 0) { - // report the speed, - double time_now = time.Elapsed(); - KALDI_VLOG(1) << "After " << num_done << " utterances: " - << "time elapsed = " << time_now / 60 << " min; " - << "processed " << total_frames / time_now << " frames per sec."; -#if HAVE_CUDA == 1 - // check that GPU computes accurately, - CuDevice::Instantiate().CheckGpuHealth(); -#endif - } } // main loop, // after last minibatch : show what happens in network, - KALDI_VLOG(1) << "### After " << total_frames << " frames,"; - KALDI_VLOG(1) << nnet.InfoPropagate(); + KALDI_LOG << "### After " << total_frames << " frames,"; + KALDI_LOG << nnet.InfoPropagate(); if (!crossvalidate) { - KALDI_VLOG(1) << nnet.InfoBackPropagate(); - KALDI_VLOG(1) << nnet.InfoGradient(); + KALDI_LOG << nnet.InfoBackPropagate(); + KALDI_LOG << nnet.InfoGradient(); } if (!crossvalidate) {