Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/nnet3/nnet-analyze.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1280,12 +1280,13 @@ void Analyzer::Init(const Nnet &nnet, const NnetComputation &computation) {
&matrix_accesses);
}

void GetSegmentEnds(const NnetComputation &computation,
std::vector<int32> *command_indexes) {
void GetCommandsOfType(const NnetComputation &computation,
CommandType t,
std::vector<int32> *command_indexes) {
int32 num_commands = computation.commands.size();
command_indexes->clear();
for (int32 c = 0; c < num_commands; c++)
if (computation.commands[c].command_type == kNoOperationMarker)
if (computation.commands[c].command_type == t)
command_indexes->push_back(c);
}

Expand Down
12 changes: 5 additions & 7 deletions src/nnet3/nnet-analyze.h
Original file line number Diff line number Diff line change
Expand Up @@ -423,13 +423,11 @@ class ComputationChecker {
};


/// This utility function works out from a computation, the locations of the
/// 'segment ends'. This is useful for online compilation, where the
/// computation has multiple segments corresponding to new pieces of input data
/// to process. The implementation of the function is extremely simple; it
/// just gives you the locations of commands of type 'kNoOperationMarker'.
void GetSegmentEnds(const NnetComputation &computation,
std::vector<int32> *command_indexes);
/// This utility function works out from a computation, the command-indexes of
/// the commands of the given type.
void GetCommandsOfType(const NnetComputation &computation,
CommandType t,
std::vector<int32> *command_indexes);

/// This is a convenience interface for class ComputationChecker. Call it with
/// check_rewrite = true only if the computation is pre-optimization.
Expand Down
91 changes: 39 additions & 52 deletions src/nnet3/nnet-optimize-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3301,8 +3301,7 @@ class ComputationLoopedOptimizer {
private:

// Figures out the time shift between the successive computation requests.
static int32 FindTimeShift(const NnetComputation &computation,
const std::vector<int32> &segment_ends);
static int32 FindTimeShift(const NnetComputation &computation);

// This function creates a mapping from a matrix-index > 0,
// to a pair (unique_id, time_offset) that represents the debug-info
Expand Down Expand Up @@ -3440,20 +3439,20 @@ class ComputationLoopedOptimizer {



/// Given a list of command indexes ('segment_end_commands') which are
/// Given a list of command indexes ('splice_point_commands') which are
/// expected to be command indexes of the kNoOperationMarker at segment
/// boundaries, this function outputs for each of these command indexes a list
/// of matrices which are 'active' at that point in time. By 'active' we mean
/// that the matrix has been written to before that time (note, we don't count
/// initialization with zeros as being written to); and will be read after
/// that time. These is the list of matrices that 'need to be in scope'
/// at those points in time. '*active_matrices' is indexed by the
/// same index as 'segment_end_commands', and is then a list of active
/// same index as 'splice_point_commands', and is then a list of active
/// matrices, in numerical order of matrix index.
/// Note: for each i, (*active_matrices)[i] will be sorted and unique.
static void FindActiveMatrices(const NnetComputation &computation,
const Analyzer &analyzer,
const std::vector<int32> &segment_end_commands,
const std::vector<int32> &splice_point_commands,
std::vector<std::vector<int32> > *active_matrices);


Expand All @@ -3462,13 +3461,14 @@ class ComputationLoopedOptimizer {
Analyzer analyzer_;
std::vector<std::pair<int32, int32> > matrix_to_pair_;

std::vector<int32> segment_end_commands_;
std::vector<int32> splice_point_commands_;
};

// static
int32 ComputationLoopedOptimizer::FindTimeShift(
const NnetComputation &computation,
const std::vector<int32> &segment_ends) {
const NnetComputation &computation) {
std::vector<int32> segment_ends;
GetCommandsOfType(computation, kNoOperationMarker, &segment_ends);
KALDI_ASSERT(segment_ends.size() >= 3);
// Ignore the first segment as it tends to be a special case
// (it has more left context),
Expand Down Expand Up @@ -3647,19 +3647,9 @@ bool ComputationLoopedOptimizer::FindFirstRepeat(
// differ.
KALDI_ASSERT(num_segments >= 2);

bool perform_time_offset_check = true;
if (normalized_active_pairs.back().empty()) {
// If there are no variables active after the end of the last-but-one segment
// (which is the last element in segment_ends, since we remove the end of the
// very last segment), then don't perform the check related to
// time-offsets, it's not relevant. [this would probably be a computation
// that doesn't require any context].
perform_time_offset_check = false;
}
for (int32 s = 0; s < num_segments; s++) {
for (int32 t = s + 1; t < num_segments; t++) {
if ((!perform_time_offset_check ||
time_offsets[t]-time_offsets[s] == (t-s) * time_shift_per_segment) &&
if ((time_offsets[t]-time_offsets[s] == (t-s) * time_shift_per_segment) &&
normalized_active_pairs[s] == normalized_active_pairs[t]) {
*seg1 = s;
*seg2 = t;
Expand Down Expand Up @@ -3696,35 +3686,35 @@ void ComputationLoopedOptimizer::PairListToMatrixList(
void ComputationLoopedOptimizer::FindActiveMatrices(
const NnetComputation &computation,
const Analyzer &analyzer,
const std::vector<int32> &segment_end_commands,
const std::vector<int32> &splice_point_commands,
std::vector<std::vector<int32> > *active_matrices) {
int32 num_matrices = computation.matrices.size();
int32 num_segments = segment_end_commands.size();
int32 num_splice_points = splice_point_commands.size();
active_matrices->clear();
active_matrices->resize(num_segments);
active_matrices->resize(num_splice_points);
// this object just makes available some extra functions, vs. the Analyzer
// object.
ComputationAnalysis analysis(computation, analyzer);
KALDI_ASSERT(IsSortedAndUniq(segment_end_commands));
KALDI_ASSERT(IsSortedAndUniq(splice_point_commands));

// the following vector gives us, for each matrix index, a submatrix index
// that covers the whole of that matrix (needed by interface of 'analysis' object).
std::vector<int32> whole_submatrices;
computation.GetWholeSubmatrices(&whole_submatrices);
for (int32 m = 1; m < num_matrices; m++) {
// the following are command indexes, comparable with the indexes
// in 'segment_end_commands'.
// in 'splice_point_commands'.
int32 s = whole_submatrices[m], // submatrix consisting of the whole of
// 'm'.
first_access = analysis.FirstAccess(s),
last_access = analysis.LastAccess(s);
for (int32 seg = 0; seg < num_segments; seg++) {
int32 segment_end = segment_end_commands[seg];
if (first_access < segment_end && last_access > segment_end) {
for (int32 i = 0; i < num_splice_points; i++) {
int32 splice_point = splice_point_commands[i];
if (first_access < splice_point && last_access > splice_point) {
// If the block of time during which the matrix is accessed, includes
// this segment end-point, then the matrix is considered 'active' at
// that time.
(*active_matrices)[seg].push_back(m);
// this command index, then the matrix is considered 'active' at this
// time.
(*active_matrices)[i].push_back(m);
}
}
}
Expand Down Expand Up @@ -3860,8 +3850,8 @@ void ComputationLoopedOptimizer::FormInfiniteLoop(
KALDI_ASSERT(static_cast<int32>(computation->commands.size()) >=
command2 + 1 && command1 < command2);
KALDI_ASSERT(
computation->commands[command1].command_type == kNoOperationMarker &&
computation->commands[command2].command_type == kNoOperationMarker);
computation->commands[command1].command_type == kNoOperationPermanent &&
computation->commands[command2].command_type == kNoOperationPermanent);
// Remove any commands after 'command2'.
computation->commands.resize(command2 + 1);
computation->commands[command2].command_type = kGotoLabel;
Expand All @@ -3880,25 +3870,22 @@ bool ComputationLoopedOptimizer::Optimize() {
"You must request matrix debug info when compiling "
"looped computations.");

// get the indexes of the separator commands at the ends of segments.
std::vector<int32> segment_ends;
GetSegmentEnds(*computation_, &segment_ends);
int32 time_shift_per_segment = FindTimeShift(*computation_,
segment_ends);

// Ignore the end of the very last segment- it is not a candidate for a
// 'splice point'. What we're doing here is like creating a tape loop; we
// have to find a place where the list of variables is the same except for a
// time offset.
// [note: it's not exactly like a tape loop because the prologue can
// vary... the sequence is of the form like a b b b b b .. ]
segment_ends.pop_back();
// get the indexes of potential splice points, one per segment of the
// computation. We locate the splice points where kNoOperationPermanent is.
// This is guaranteed to be after the inputs have been received, and before
// the bulk of the computation in the segment, and of course before we provide
// the output. It happens that by choosing this as the splice point we avoid
// certain problems that would arise, for instance, if we chose the segment
// boundaries (kNoOperationMarker).
std::vector<int32> splice_points;
GetCommandsOfType(*computation_, kNoOperationPermanent,
&splice_points);
int32 time_shift_per_segment = FindTimeShift(*computation_);


std::vector<std::vector<int32> > active_matrices;
// Find the list of matrices active at each of those segment-end-command
// times.
FindActiveMatrices(*computation_, analyzer_, segment_ends,
// Find the list of matrices active at each of the potential splice points.
FindActiveMatrices(*computation_, analyzer_, splice_points,
&active_matrices);

// Find a representation of the matrices of the computation as pairs
Expand All @@ -3912,16 +3899,16 @@ bool ComputationLoopedOptimizer::Optimize() {
unordered_map<std::pair<int32, int32>, int32, PairHasher<int32> > pair_to_matrix;
GetPairToMatrixMap(matrix_to_pair, &pair_to_matrix);

// get lists of matrix per segment in the pair representation.
// get lists of matrix per splice-point in the pair representation.
std::vector<std::vector<std::pair<int32, int32> > > pair_lists;
ConvertListsToPairLists(active_matrices, matrix_to_pair,
&pair_lists);

std::vector<int32> time_offsets;
NormalizePairLists(&pair_lists, &time_offsets);

// Note: seg1 and seg2 are indexes into 'segment_ends', representing
// points in time (that happen to be the ends of segments).
// Note: seg1 and seg2 are indexes into 'splice_points', representing
// potential splice points (located near the beginnings of segments).
int32 seg1, seg2;
if (!FindFirstRepeat(pair_lists,
time_offsets,
Expand All @@ -3945,7 +3932,7 @@ bool ComputationLoopedOptimizer::Optimize() {
time_difference);


FormInfiniteLoop(segment_ends[seg1], segment_ends[seg2], computation_);
FormInfiniteLoop(splice_points[seg1], splice_points[seg2], computation_);

AddMatrixSwapCommands(seg1_matrices, seg2_matrices, computation_);

Expand Down
108 changes: 11 additions & 97 deletions src/nnet3/nnet-optimize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ bool NnetOptimizeOptions::operator == (const NnetOptimizeOptions &other) const {
}

// move commands that resize matrices to as late/early as possible.
// (however, keep input and output commands where they were; it
// creates other headaches if we move those).
void MoveSizingCommands(const Nnet &nnet, NnetComputation *computation) {
ComputationVariables variables;
variables.Init(*computation);
Expand Down Expand Up @@ -163,15 +165,20 @@ void MoveSizingCommands(const Nnet &nnet, NnetComputation *computation) {
}
if (first_access_command != -1) {
KALDI_ASSERT(first_access_command > ma.allocate_command);
// move the initialization command to just before the first access.
commands[ma.allocate_command].first = first_access_command * 3 - 1;
// move the initialization command to just before the first access,
// it wasn't a kAcceptInput command.
if (commands[ma.allocate_command].second->command_type != kAcceptInput)
commands[ma.allocate_command].first = first_access_command * 3 - 1;
}
}
if (ma.deallocate_command != -1) {
if (!ma.accesses.empty()) {
int32 last_access_command = ma.accesses.back().command_index;
// move the destruction command to just after the last access.
commands[ma.deallocate_command].first = last_access_command * 3 + 1;
// move the destruction command to just after the last access, if it
// wasn't a kProvideOutput command.
if (commands[ma.deallocate_command].second->command_type !=
kProvideOutput)
commands[ma.deallocate_command].first = last_access_command * 3 + 1;
}
}
}
Expand Down Expand Up @@ -437,7 +444,6 @@ void Optimize(const NnetOptimizeOptions &config,
if (GetVerboseLevel() >= 4)
CheckComputation(nnet, *computation, true);


{ // Call LimitDerivativeTimes(); it's important that this
// should come before other optimizations (search for "insist" in
// nnet-optimize-utils.cc for the reasons).
Expand Down Expand Up @@ -831,98 +837,9 @@ static void SplitComputationIntoSegments(
segments->push_back(std::pair<int32, int32>(cur_start, num_commands));
}

// This is a helper function used in ConsolidateIoOperations().
//
// Suppose we had something like this before ConsolidateIoOperations() (as would
// be printed by Print()

// c90: output m50 to user [for node: 'output']
// ...
// c100: [label for goto statement]
// c101: # computation segment separator [e.g., begin backward commands]
// ...
// c105: m62 = user input [for node: 'input']
// ...
// c190: output m79 to user [for node: 'output']
// ...
// c200: goto c100
//
// this would get reordered to the following by ConsolidateIoOperations
// (the bulk of the code, before this function is called):
//
// c99: [label for goto statement]
// c100: output m50 to user [for node: 'output']
// c101: # computation segment separator [e.g., begin backward commands]
// c102: m62 = user input [for node: 'input']
// ...
// c199: goto c199
// c200: output m79 to user [for node: 'output']
//
// Now command c200 is unreachable, but there is a similar command at c100
// (after the goto) that will substitute. However, the matrix indexes are different.
// So we need to change the above so that the last two commands read:
// c199: m50.swap(m79}
// c200: goto c199
void FixGotoOutputReordering(const Nnet &nnet,
NnetComputation *computation) {
FixGotoLabel(computation); // make sure the destination label of the goto statement was
// correct.
int32 goto_command_index = -1;
for (int32 c = computation->commands.size() - 1; c >= 0; c--)
if (computation->commands[c].command_type == kGotoLabel)
goto_command_index = c;
KALDI_ASSERT(goto_command_index > 0);
int32 goto_label_index = computation->commands[goto_command_index].arg1;

std::vector<int32> output_commands_after_goto,
output_commands_after_label;
for (int32 c = goto_command_index + 1;
c < static_cast<int32>(computation->commands.size()); c++) {
KALDI_ASSERT(computation->commands[c].command_type == kProvideOutput);
output_commands_after_goto.push_back(c);
}
for (int32 c = goto_label_index + 1;
c < goto_command_index; c++) { // note: we break from this loop.
CommandType t = computation->commands[c].command_type;
if (t == kProvideOutput)
output_commands_after_label.push_back(c);
else if (t != kNoOperationMarker && t != kAcceptInput)
break;
}
if (output_commands_after_goto.size() != output_commands_after_label.size()) {
computation->Print(std::cerr, nnet);
KALDI_ERR << "Could not fix goto/output reordering, size mismatch.";
}
NnetComputation::Command goto_command = computation->commands[goto_command_index];
// be we'll be replacing the final kProvideOutput commands with
// kAllocMatrixFromOther [i.e. swap commands], and moving them one command
// backward; later we'll put the goto command at the end.
for (size_t i = 0; i < output_commands_after_goto.size(); i++) {
int32 c1 = output_commands_after_label[i],
c2 = output_commands_after_goto[i],
new_c2 = c2 - 1;
int32 s1 = computation->commands[c1].arg1,
s2 = computation->commands[c2].arg1;
// The following assert checks that the network node-index is the same...
// the idea is that the outputs should have been provided in the same order.
// I can think of no reason why the order might be different.
KALDI_ASSERT(computation->commands[c1].arg2 ==
computation->commands[c1].arg2);
computation->commands[new_c2].command_type = kAllocMatrixFromOther;
computation->commands[new_c2].arg1 = s1;
computation->commands[new_c2].arg2 = s2;
}
// ... and move the goto command to the end.
computation->commands.back() = goto_command;
}


void ConsolidateIoOperations(const Nnet &nnet,
NnetComputation *computation) {
bool ends_with_goto =
(!computation->commands.empty() &&
computation->commands.back().command_type == kGotoLabel);

// These segments, represented as (start-index, end-index),
// are segments of the computation separated by kNoOperationMarker.
std::vector<std::pair<int32, int32> > segments;
Expand Down Expand Up @@ -971,9 +888,6 @@ void ConsolidateIoOperations(const Nnet &nnet,
KALDI_ASSERT(c == segment_end);
}
computation->commands.swap(reordered_commands);

if (ends_with_goto)
FixGotoOutputReordering(nnet, computation);
}


Expand Down