diff --git a/.gitattributes b/.gitattributes index 5a815654b4c..bede44edf8a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -15,4 +15,6 @@ windows/INSTALL* eol=native windows/NewGuidCmd.exe.config text eol=crlf windows/NewGuidCmd.exe binary +# Prevent git changing CR-LF to LF when archiving (patch requires CR-LF on Windows). +**/*.patch -text diff --git a/tools/extras/openfstwin-1.3.4.patch b/tools/extras/openfstwin-1.3.4.patch index e142341f5ba..858a61160fa 100644 --- a/tools/extras/openfstwin-1.3.4.patch +++ b/tools/extras/openfstwin-1.3.4.patch @@ -1,425 +1,425 @@ -diff --git a/src/include/fst/fst.h b/src/include/fst/fst.h -index 5ad3b52..d9c0ca6 100644 ---- a/src/include/fst/fst.h -+++ b/src/include/fst/fst.h -@@ -45,6 +45,12 @@ DECLARE_bool(fst_align); - - namespace fst { - -+ typedef ::int64 int64; -+ typedef ::uint64 uint64; -+ typedef ::int32 int32; -+ typedef ::uint32 uint32; -+ -+ - bool OPENFSTDLL IsFstHeader(istream &, const string &); //ChangedPD - - class FstHeader; -diff --git a/src/include/fst/interval-set.h b/src/include/fst/interval-set.h -index c4362f2..58cad44 100644 ---- a/src/include/fst/interval-set.h -+++ b/src/include/fst/interval-set.h -@@ -37,38 +37,38 @@ template - class IntervalSet { - public: - struct Interval { -- T begin; -- T end; -+ T begin_; -+ T end_; - -- Interval() : begin(-1), end(-1) {} -+ Interval() : begin_(-1), end_(-1) {} - -- Interval(T b, T e) : begin(b), end(e) {} -+ Interval(T b, T e) : begin_(b), end_(e) {} - - bool operator<(const Interval &i) const { -- return begin < i.begin || (begin == i.begin && end > i.end); -+ return begin_ < i.begin_ || (begin_ == i.begin_ && end_ > i.end_); - } - - bool operator==(const Interval &i) const { -- return begin == i.begin && end == i.end; -+ return begin_ == i.begin_ && end_ == i.end_; - } - - bool operator!=(const Interval &i) const { -- return begin != i.begin || end != i.end; -+ return begin_ != i.begin_ || end_ != i.end_; - } - - istream &Read(istream &strm) { - T n; - ReadType(strm, &n); -- begin = n; -+ begin_ = n; - ReadType(strm, &n); -- end = n; -+ end_ = n; - return strm; - } - - ostream &Write(ostream &strm) const { -- T n = begin; -+ T n = begin_; - WriteType(strm, n); -- n = end; -+ n = end_; - WriteType(strm, n); - return strm; - } -@@ -108,7 +108,7 @@ class IntervalSet { - lower_bound(intervals_.begin(), intervals_.end(), interval); - if (lb == intervals_.begin()) - return false; -- return (--lb)->end > value; -+ return (--lb)->end_ > value; - } - - // Requires intervals be normalized. -@@ -123,7 +123,7 @@ class IntervalSet { - - bool Singleton() const { - return intervals_.size() == 1 && -- intervals_[0].begin + 1 == intervals_[0].end; -+ intervals_[0].begin_ + 1 == intervals_[0].end_; - } - - -@@ -178,17 +178,17 @@ void IntervalSet::Normalize() { - T size = 0; - for (T i = 0; i < intervals_.size(); ++i) { - Interval &inti = intervals_[i]; -- if (inti.begin == inti.end) -+ if (inti.begin_ == inti.end_) - continue; - for (T j = i + 1; j < intervals_.size(); ++j) { - Interval &intj = intervals_[j]; -- if (intj.begin > inti.end) -+ if (intj.begin_ > inti.end_) - break; -- if (intj.end > inti.end) -- inti.end = intj.end; -+ if (intj.end_ > inti.end_) -+ inti.end_ = intj.end_; - ++i; - } -- count_ += inti.end - inti.begin; -+ count_ += inti.end_ - inti.begin_; - intervals_[size++] = inti; - } - intervals_.resize(size); -@@ -208,17 +208,17 @@ void IntervalSet::Intersect(const IntervalSet &iset, - oset->count_ = 0; - - while (it1 != intervals_.end() && it2 != iintervals->end()) { -- if (it1->end <= it2->begin) { -+ if (it1->end_ <= it2->begin_) { - ++it1; -- } else if (it2->end <= it1->begin) { -+ } else if (it2->end_ <= it1->begin_) { - ++it2; - } else { - Interval interval; -- interval.begin = max(it1->begin, it2->begin); -- interval.end = min(it1->end, it2->end); -+ interval.begin_ = max(it1->begin_, it2->begin_); -+ interval.end_ = min(it1->end_, it2->end_); - ointervals->push_back(interval); -- oset->count_ += interval.end - interval.begin; -- if (it1->end < it2->end) -+ oset->count_ += interval.end_ - interval.begin_; -+ if (it1->end_ < it2->end_) - ++it1; - else - ++it2; -@@ -235,21 +235,21 @@ void IntervalSet::Complement(T maxval, IntervalSet *oset) const { - oset->count_ = 0; - - Interval interval; -- interval.begin = 0; -+ interval.begin_ = 0; - for (typename vector::const_iterator it = intervals_.begin(); - it != intervals_.end(); - ++it) { -- interval.end = min(it->begin, maxval); -- if (interval.begin < interval.end) { -+ interval.end_ = min(it->begin_, maxval); -+ if (interval.begin_ < interval.end_) { - ointervals->push_back(interval); -- oset->count_ += interval.end - interval.begin; -+ oset->count_ += interval.end_ - interval.begin_; - } -- interval.begin = it->end; -+ interval.begin_ = it->end_; - } -- interval.end = maxval; -- if (interval.begin < interval.end) { -+ interval.end_ = maxval; -+ if (interval.begin_ < interval.end_) { - ointervals->push_back(interval); -- oset->count_ += interval.end - interval.begin; -+ oset->count_ += interval.end_ - interval.begin_; - } - } - -@@ -263,7 +263,7 @@ void IntervalSet::Difference(const IntervalSet &iset, - oset->count_ = 0; - } else { - IntervalSet cset; -- iset.Complement(intervals_.back().end, &cset); -+ iset.Complement(intervals_.back().end_, &cset); - Intersect(cset, oset); - } - } -@@ -277,9 +277,9 @@ bool IntervalSet::Overlaps(const IntervalSet &iset) const { - typename vector::const_iterator it2 = intervals->begin(); - - while (it1 != intervals_.end() && it2 != intervals->end()) { -- if (it1->end <= it2->begin) { -+ if (it1->end_ <= it2->begin_) { - ++it1; -- } else if (it2->end <= it1->begin) { -+ } else if (it2->end_ <= it1->begin_) { - ++it2; - } else { - return true; -@@ -300,21 +300,21 @@ bool IntervalSet::StrictlyOverlaps(const IntervalSet &iset) const { - bool overlap = false; // point in both intervals_ and intervals - - while (it1 != intervals_.end() && it2 != intervals->end()) { -- if (it1->end <= it2->begin) { // no overlap - it1 first -+ if (it1->end_ <= it2->begin_) { // no overlap - it1 first - only1 = true; - ++it1; -- } else if (it2->end <= it1->begin) { // no overlap - it2 first -+ } else if (it2->end_ <= it1->begin_) { // no overlap - it2 first - only2 = true; - ++it2; -- } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals -+ } else if (it2->begin_ == it1->begin_ && it2->end_ == it1->end_) { // equals - overlap = true; - ++it1; - ++it2; -- } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2 -+ } else if (it2->begin_ <= it1->begin_ && it2->end_ >= it1->end_) { // 1 c 2 - only2 = true; - overlap = true; - ++it1; -- } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1 -+ } else if (it1->begin_ <= it2->begin_ && it1->end_ >= it2->end_) { // 2 c 1 - only1 = true; - overlap = true; - ++it2; -@@ -346,11 +346,11 @@ bool IntervalSet::Contains(const IntervalSet &iset) const { - typename vector::const_iterator it2 = intervals->begin(); - - while (it1 != intervals_.end() && it2 != intervals->end()) { -- if (it1->end <= it2->begin) { // no overlap - it1 first -+ if (it1->end_ <= it2->begin_) { // no overlap - it1 first - ++it1; -- } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C -+ } else if (it2->begin_ < it1->begin_ || it2->end_ > it1->end_) { // no C - return false; -- } else if (it2->end == it1->end) { -+ } else if (it2->end_ == it1->end_) { - ++it1; - ++it2; - } else { -@@ -370,7 +370,7 @@ ostream &operator<<(ostream &strm, const IntervalSet &s) { - ++it) { - if (it != intervals->begin()) - strm << ","; -- strm << "[" << it->begin << "," << it->end << ")"; -+ strm << "[" << it->begin_ << "," << it->end_ << ")"; - } - strm << "}"; - return strm; -diff --git a/src/include/fst/label-reachable.h b/src/include/fst/label-reachable.h -index a7c3360..491ef7d 100644 ---- a/src/include/fst/label-reachable.h -+++ b/src/include/fst/label-reachable.h -@@ -359,9 +359,9 @@ class LabelReachable { - iiter = intervals->begin(); - iiter != intervals->end(); ++iiter) { - begin_low = LowerBound(aiter, end_low, aiter_end, -- aiter_input, iiter->begin); -+ aiter_input, iiter->begin_); - end_low = LowerBound(aiter, begin_low, aiter_end, -- aiter_input, iiter->end); -+ aiter_input, iiter->end_); - if (end_low - begin_low > 0) { - if (reach_begin_ < 0) - reach_begin_ = begin_low; -diff --git a/src/include/fst/minimize.h b/src/include/fst/minimize.h -index 3fbe3ba..6e9dd3d 100644 ---- a/src/include/fst/minimize.h -+++ b/src/include/fst/minimize.h -@@ -134,7 +134,14 @@ class CyclicMinimizer { - typedef typename A::Weight Weight; - typedef ReverseArc RevA; - -- CyclicMinimizer(const ExpandedFst& fst) { -+ CyclicMinimizer(const ExpandedFst& fst): -+ // tell the Partition data-member to expect multiple repeated -+ // calls to SplitOn with the same element if we are non-deterministic. -+ P_(fst.Properties(kIDeterministic, true) == 0) { -+ if(fst.Properties(kIDeterministic, true) == 0) -+ CHECK(Weight::Properties() & kIdempotent); // this minimization -+ // algorithm for non-deterministic FSTs can only work with idempotent -+ // semirings. - Initialize(fst); - Compute(fst); - } -@@ -315,7 +322,13 @@ class AcyclicMinimizer { - typedef typename A::StateId ClassId; - typedef typename A::Weight Weight; - -- AcyclicMinimizer(const ExpandedFst& fst) { -+ AcyclicMinimizer(const ExpandedFst& fst): -+ // tell the Partition data-member to expect multiple repeated -+ // calls to SplitOn with the same element if we are non-deterministic. -+ partition_(fst.Properties(kIDeterministic, true) == 0) { -+ if(fst.Properties(kIDeterministic, true) == 0) -+ CHECK(Weight::Properties() & kIdempotent); // minimization for -+ // non-deterministic FSTs can only work with idempotent semirings. - Initialize(fst); - Refine(fst); - } -@@ -531,13 +544,7 @@ template - void Minimize(MutableFst* fst, - MutableFst* sfst = 0, - float delta = kDelta) { -- uint64 props = fst->Properties(kAcceptor | kIDeterministic| -- kWeighted | kUnweighted, true); -- if (!(props & kIDeterministic)) { -- FSTERROR() << "FST is not deterministic"; -- fst->SetProperties(kError, kError); -- return; -- } -+ uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true); - - if (!(props & kAcceptor)) { // weighted transducer - VectorFst< GallicArc > gfst; -diff --git a/src/include/fst/partition.h b/src/include/fst/partition.h -index dcee67b..40b849a 100644 ---- a/src/include/fst/partition.h -+++ b/src/include/fst/partition.h -@@ -43,8 +43,8 @@ class Partition { - friend class PartitionIterator; - - struct Element { -- Element() : value(0), next(0), prev(0) {} -- Element(T v) : value(v), next(0), prev(0) {} -+ Element() : value(0), next(0), prev(0) {} -+ Element(T v) : value(v), next(0), prev(0) {} - - T value; - Element* next; -@@ -52,9 +52,11 @@ class Partition { - }; - - public: -- Partition() {} -+ Partition(bool allow_repeated_split): -+ allow_repeated_split_(allow_repeated_split) {} - -- Partition(T num_states) { -+ Partition(bool allow_repeated_split, T num_states): -+ allow_repeated_split_(allow_repeated_split) { - Initialize(num_states); - } - -@@ -137,16 +139,16 @@ class Partition { - if (class_size_[class_id] == 1) return; - - // first time class is split -- if (split_size_[class_id] == 0) -+ if (split_size_[class_id] == 0) { - visited_classes_.push_back(class_id); -- -+ class_split_[class_id] = classes_[class_id]; -+ } - // increment size of split (set of element at head of chain) - split_size_[class_id]++; -- -+ - // update split point -- if (class_split_[class_id] == 0) -- class_split_[class_id] = classes_[class_id]; -- if (class_split_[class_id] == elements_[element_id]) -+ if (class_split_[class_id] != 0 -+ && class_split_[class_id] == elements_[element_id]) - class_split_[class_id] = elements_[element_id]->next; - - // move to head of chain in same class -@@ -157,24 +159,31 @@ class Partition { - // class indices of the newly created class. Returns the new_class id - // or -1 if no new class was created. - T SplitRefine(T class_id) { -+ -+ Element* split_el = class_split_[class_id]; - // only split if necessary -- if (class_size_[class_id] == split_size_[class_id]) { -- class_split_[class_id] = 0; -+ //if (class_size_[class_id] == split_size_[class_id]) { -+ if(split_el == NULL) { // we split on everything... - split_size_[class_id] = 0; - return -1; - } else { -- - T new_class = AddClass(); -+ -+ if(allow_repeated_split_) { // split_size_ is possibly -+ // inaccurate, so work it out exactly. -+ size_t split_count; Element *e; -+ for(split_count=0,e=classes_[class_id]; -+ e != split_el; split_count++, e=e->next); -+ split_size_[class_id] = split_count; -+ } - size_t remainder = class_size_[class_id] - split_size_[class_id]; - if (remainder < split_size_[class_id]) { // add smaller -- Element* split_el = class_split_[class_id]; - classes_[new_class] = split_el; -- class_size_[class_id] = split_size_[class_id]; -- class_size_[new_class] = remainder; - split_el->prev->next = 0; - split_el->prev = 0; -+ class_size_[class_id] = split_size_[class_id]; -+ class_size_[new_class] = remainder; - } else { -- Element* split_el = class_split_[class_id]; - classes_[new_class] = classes_[class_id]; - class_size_[class_id] = remainder; - class_size_[new_class] = split_size_[class_id]; -@@ -245,10 +254,16 @@ class Partition { - vector class_size_; - - // size of split for each class -+ // in the nondeterministic case, split_size_ is actually an upper -+ // bound on the size of split for each class. - vector split_size_; - - // set of visited classes to be used in split refine - vector visited_classes_; -+ -+ // true if input fst was deterministic: we can make -+ // certain assumptions in this case that speed up the algorithm. -+ bool allow_repeated_split_; - }; - - -diff --git a/src/include/fst/state-reachable.h b/src/include/fst/state-reachable.h -index 6d0c971..1da922e 100644 ---- a/src/include/fst/state-reachable.h -+++ b/src/include/fst/state-reachable.h -@@ -112,7 +112,7 @@ class IntervalReachVisitor { - void FinishState(StateId s, StateId p, const A *arc) { - if (index_ >= 0 && fst_.Final(s) != Weight::Zero()) { - vector *intervals = (*isets_)[s].Intervals(); -- (*intervals)[0].end = index_; // Update tree interval end -+ (*intervals)[0].end_ = index_; // Update tree interval end - } - (*isets_)[s].Normalize(); - if (p != kNoStateId) +diff --git a/src/include/fst/fst.h b/src/include/fst/fst.h +index 5ad3b52..d9c0ca6 100644 +--- a/src/include/fst/fst.h ++++ b/src/include/fst/fst.h +@@ -45,6 +45,12 @@ DECLARE_bool(fst_align); + + namespace fst { + ++ typedef ::int64 int64; ++ typedef ::uint64 uint64; ++ typedef ::int32 int32; ++ typedef ::uint32 uint32; ++ ++ + bool OPENFSTDLL IsFstHeader(istream &, const string &); //ChangedPD + + class FstHeader; +diff --git a/src/include/fst/interval-set.h b/src/include/fst/interval-set.h +index c4362f2..58cad44 100644 +--- a/src/include/fst/interval-set.h ++++ b/src/include/fst/interval-set.h +@@ -37,38 +37,38 @@ template + class IntervalSet { + public: + struct Interval { +- T begin; +- T end; ++ T begin_; ++ T end_; + +- Interval() : begin(-1), end(-1) {} ++ Interval() : begin_(-1), end_(-1) {} + +- Interval(T b, T e) : begin(b), end(e) {} ++ Interval(T b, T e) : begin_(b), end_(e) {} + + bool operator<(const Interval &i) const { +- return begin < i.begin || (begin == i.begin && end > i.end); ++ return begin_ < i.begin_ || (begin_ == i.begin_ && end_ > i.end_); + } + + bool operator==(const Interval &i) const { +- return begin == i.begin && end == i.end; ++ return begin_ == i.begin_ && end_ == i.end_; + } + + bool operator!=(const Interval &i) const { +- return begin != i.begin || end != i.end; ++ return begin_ != i.begin_ || end_ != i.end_; + } + + istream &Read(istream &strm) { + T n; + ReadType(strm, &n); +- begin = n; ++ begin_ = n; + ReadType(strm, &n); +- end = n; ++ end_ = n; + return strm; + } + + ostream &Write(ostream &strm) const { +- T n = begin; ++ T n = begin_; + WriteType(strm, n); +- n = end; ++ n = end_; + WriteType(strm, n); + return strm; + } +@@ -108,7 +108,7 @@ class IntervalSet { + lower_bound(intervals_.begin(), intervals_.end(), interval); + if (lb == intervals_.begin()) + return false; +- return (--lb)->end > value; ++ return (--lb)->end_ > value; + } + + // Requires intervals be normalized. +@@ -123,7 +123,7 @@ class IntervalSet { + + bool Singleton() const { + return intervals_.size() == 1 && +- intervals_[0].begin + 1 == intervals_[0].end; ++ intervals_[0].begin_ + 1 == intervals_[0].end_; + } + + +@@ -178,17 +178,17 @@ void IntervalSet::Normalize() { + T size = 0; + for (T i = 0; i < intervals_.size(); ++i) { + Interval &inti = intervals_[i]; +- if (inti.begin == inti.end) ++ if (inti.begin_ == inti.end_) + continue; + for (T j = i + 1; j < intervals_.size(); ++j) { + Interval &intj = intervals_[j]; +- if (intj.begin > inti.end) ++ if (intj.begin_ > inti.end_) + break; +- if (intj.end > inti.end) +- inti.end = intj.end; ++ if (intj.end_ > inti.end_) ++ inti.end_ = intj.end_; + ++i; + } +- count_ += inti.end - inti.begin; ++ count_ += inti.end_ - inti.begin_; + intervals_[size++] = inti; + } + intervals_.resize(size); +@@ -208,17 +208,17 @@ void IntervalSet::Intersect(const IntervalSet &iset, + oset->count_ = 0; + + while (it1 != intervals_.end() && it2 != iintervals->end()) { +- if (it1->end <= it2->begin) { ++ if (it1->end_ <= it2->begin_) { + ++it1; +- } else if (it2->end <= it1->begin) { ++ } else if (it2->end_ <= it1->begin_) { + ++it2; + } else { + Interval interval; +- interval.begin = max(it1->begin, it2->begin); +- interval.end = min(it1->end, it2->end); ++ interval.begin_ = max(it1->begin_, it2->begin_); ++ interval.end_ = min(it1->end_, it2->end_); + ointervals->push_back(interval); +- oset->count_ += interval.end - interval.begin; +- if (it1->end < it2->end) ++ oset->count_ += interval.end_ - interval.begin_; ++ if (it1->end_ < it2->end_) + ++it1; + else + ++it2; +@@ -235,21 +235,21 @@ void IntervalSet::Complement(T maxval, IntervalSet *oset) const { + oset->count_ = 0; + + Interval interval; +- interval.begin = 0; ++ interval.begin_ = 0; + for (typename vector::const_iterator it = intervals_.begin(); + it != intervals_.end(); + ++it) { +- interval.end = min(it->begin, maxval); +- if (interval.begin < interval.end) { ++ interval.end_ = min(it->begin_, maxval); ++ if (interval.begin_ < interval.end_) { + ointervals->push_back(interval); +- oset->count_ += interval.end - interval.begin; ++ oset->count_ += interval.end_ - interval.begin_; + } +- interval.begin = it->end; ++ interval.begin_ = it->end_; + } +- interval.end = maxval; +- if (interval.begin < interval.end) { ++ interval.end_ = maxval; ++ if (interval.begin_ < interval.end_) { + ointervals->push_back(interval); +- oset->count_ += interval.end - interval.begin; ++ oset->count_ += interval.end_ - interval.begin_; + } + } + +@@ -263,7 +263,7 @@ void IntervalSet::Difference(const IntervalSet &iset, + oset->count_ = 0; + } else { + IntervalSet cset; +- iset.Complement(intervals_.back().end, &cset); ++ iset.Complement(intervals_.back().end_, &cset); + Intersect(cset, oset); + } + } +@@ -277,9 +277,9 @@ bool IntervalSet::Overlaps(const IntervalSet &iset) const { + typename vector::const_iterator it2 = intervals->begin(); + + while (it1 != intervals_.end() && it2 != intervals->end()) { +- if (it1->end <= it2->begin) { ++ if (it1->end_ <= it2->begin_) { + ++it1; +- } else if (it2->end <= it1->begin) { ++ } else if (it2->end_ <= it1->begin_) { + ++it2; + } else { + return true; +@@ -300,21 +300,21 @@ bool IntervalSet::StrictlyOverlaps(const IntervalSet &iset) const { + bool overlap = false; // point in both intervals_ and intervals + + while (it1 != intervals_.end() && it2 != intervals->end()) { +- if (it1->end <= it2->begin) { // no overlap - it1 first ++ if (it1->end_ <= it2->begin_) { // no overlap - it1 first + only1 = true; + ++it1; +- } else if (it2->end <= it1->begin) { // no overlap - it2 first ++ } else if (it2->end_ <= it1->begin_) { // no overlap - it2 first + only2 = true; + ++it2; +- } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals ++ } else if (it2->begin_ == it1->begin_ && it2->end_ == it1->end_) { // equals + overlap = true; + ++it1; + ++it2; +- } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2 ++ } else if (it2->begin_ <= it1->begin_ && it2->end_ >= it1->end_) { // 1 c 2 + only2 = true; + overlap = true; + ++it1; +- } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1 ++ } else if (it1->begin_ <= it2->begin_ && it1->end_ >= it2->end_) { // 2 c 1 + only1 = true; + overlap = true; + ++it2; +@@ -346,11 +346,11 @@ bool IntervalSet::Contains(const IntervalSet &iset) const { + typename vector::const_iterator it2 = intervals->begin(); + + while (it1 != intervals_.end() && it2 != intervals->end()) { +- if (it1->end <= it2->begin) { // no overlap - it1 first ++ if (it1->end_ <= it2->begin_) { // no overlap - it1 first + ++it1; +- } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C ++ } else if (it2->begin_ < it1->begin_ || it2->end_ > it1->end_) { // no C + return false; +- } else if (it2->end == it1->end) { ++ } else if (it2->end_ == it1->end_) { + ++it1; + ++it2; + } else { +@@ -370,7 +370,7 @@ ostream &operator<<(ostream &strm, const IntervalSet &s) { + ++it) { + if (it != intervals->begin()) + strm << ","; +- strm << "[" << it->begin << "," << it->end << ")"; ++ strm << "[" << it->begin_ << "," << it->end_ << ")"; + } + strm << "}"; + return strm; +diff --git a/src/include/fst/label-reachable.h b/src/include/fst/label-reachable.h +index a7c3360..491ef7d 100644 +--- a/src/include/fst/label-reachable.h ++++ b/src/include/fst/label-reachable.h +@@ -359,9 +359,9 @@ class LabelReachable { + iiter = intervals->begin(); + iiter != intervals->end(); ++iiter) { + begin_low = LowerBound(aiter, end_low, aiter_end, +- aiter_input, iiter->begin); ++ aiter_input, iiter->begin_); + end_low = LowerBound(aiter, begin_low, aiter_end, +- aiter_input, iiter->end); ++ aiter_input, iiter->end_); + if (end_low - begin_low > 0) { + if (reach_begin_ < 0) + reach_begin_ = begin_low; +diff --git a/src/include/fst/minimize.h b/src/include/fst/minimize.h +index 3fbe3ba..6e9dd3d 100644 +--- a/src/include/fst/minimize.h ++++ b/src/include/fst/minimize.h +@@ -134,7 +134,14 @@ class CyclicMinimizer { + typedef typename A::Weight Weight; + typedef ReverseArc RevA; + +- CyclicMinimizer(const ExpandedFst& fst) { ++ CyclicMinimizer(const ExpandedFst& fst): ++ // tell the Partition data-member to expect multiple repeated ++ // calls to SplitOn with the same element if we are non-deterministic. ++ P_(fst.Properties(kIDeterministic, true) == 0) { ++ if(fst.Properties(kIDeterministic, true) == 0) ++ CHECK(Weight::Properties() & kIdempotent); // this minimization ++ // algorithm for non-deterministic FSTs can only work with idempotent ++ // semirings. + Initialize(fst); + Compute(fst); + } +@@ -315,7 +322,13 @@ class AcyclicMinimizer { + typedef typename A::StateId ClassId; + typedef typename A::Weight Weight; + +- AcyclicMinimizer(const ExpandedFst& fst) { ++ AcyclicMinimizer(const ExpandedFst& fst): ++ // tell the Partition data-member to expect multiple repeated ++ // calls to SplitOn with the same element if we are non-deterministic. ++ partition_(fst.Properties(kIDeterministic, true) == 0) { ++ if(fst.Properties(kIDeterministic, true) == 0) ++ CHECK(Weight::Properties() & kIdempotent); // minimization for ++ // non-deterministic FSTs can only work with idempotent semirings. + Initialize(fst); + Refine(fst); + } +@@ -531,13 +544,7 @@ template + void Minimize(MutableFst* fst, + MutableFst* sfst = 0, + float delta = kDelta) { +- uint64 props = fst->Properties(kAcceptor | kIDeterministic| +- kWeighted | kUnweighted, true); +- if (!(props & kIDeterministic)) { +- FSTERROR() << "FST is not deterministic"; +- fst->SetProperties(kError, kError); +- return; +- } ++ uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true); + + if (!(props & kAcceptor)) { // weighted transducer + VectorFst< GallicArc > gfst; +diff --git a/src/include/fst/partition.h b/src/include/fst/partition.h +index dcee67b..40b849a 100644 +--- a/src/include/fst/partition.h ++++ b/src/include/fst/partition.h +@@ -43,8 +43,8 @@ class Partition { + friend class PartitionIterator; + + struct Element { +- Element() : value(0), next(0), prev(0) {} +- Element(T v) : value(v), next(0), prev(0) {} ++ Element() : value(0), next(0), prev(0) {} ++ Element(T v) : value(v), next(0), prev(0) {} + + T value; + Element* next; +@@ -52,9 +52,11 @@ class Partition { + }; + + public: +- Partition() {} ++ Partition(bool allow_repeated_split): ++ allow_repeated_split_(allow_repeated_split) {} + +- Partition(T num_states) { ++ Partition(bool allow_repeated_split, T num_states): ++ allow_repeated_split_(allow_repeated_split) { + Initialize(num_states); + } + +@@ -137,16 +139,16 @@ class Partition { + if (class_size_[class_id] == 1) return; + + // first time class is split +- if (split_size_[class_id] == 0) ++ if (split_size_[class_id] == 0) { + visited_classes_.push_back(class_id); +- ++ class_split_[class_id] = classes_[class_id]; ++ } + // increment size of split (set of element at head of chain) + split_size_[class_id]++; +- ++ + // update split point +- if (class_split_[class_id] == 0) +- class_split_[class_id] = classes_[class_id]; +- if (class_split_[class_id] == elements_[element_id]) ++ if (class_split_[class_id] != 0 ++ && class_split_[class_id] == elements_[element_id]) + class_split_[class_id] = elements_[element_id]->next; + + // move to head of chain in same class +@@ -157,24 +159,31 @@ class Partition { + // class indices of the newly created class. Returns the new_class id + // or -1 if no new class was created. + T SplitRefine(T class_id) { ++ ++ Element* split_el = class_split_[class_id]; + // only split if necessary +- if (class_size_[class_id] == split_size_[class_id]) { +- class_split_[class_id] = 0; ++ //if (class_size_[class_id] == split_size_[class_id]) { ++ if(split_el == NULL) { // we split on everything... + split_size_[class_id] = 0; + return -1; + } else { +- + T new_class = AddClass(); ++ ++ if(allow_repeated_split_) { // split_size_ is possibly ++ // inaccurate, so work it out exactly. ++ size_t split_count; Element *e; ++ for(split_count=0,e=classes_[class_id]; ++ e != split_el; split_count++, e=e->next); ++ split_size_[class_id] = split_count; ++ } + size_t remainder = class_size_[class_id] - split_size_[class_id]; + if (remainder < split_size_[class_id]) { // add smaller +- Element* split_el = class_split_[class_id]; + classes_[new_class] = split_el; +- class_size_[class_id] = split_size_[class_id]; +- class_size_[new_class] = remainder; + split_el->prev->next = 0; + split_el->prev = 0; ++ class_size_[class_id] = split_size_[class_id]; ++ class_size_[new_class] = remainder; + } else { +- Element* split_el = class_split_[class_id]; + classes_[new_class] = classes_[class_id]; + class_size_[class_id] = remainder; + class_size_[new_class] = split_size_[class_id]; +@@ -245,10 +254,16 @@ class Partition { + vector class_size_; + + // size of split for each class ++ // in the nondeterministic case, split_size_ is actually an upper ++ // bound on the size of split for each class. + vector split_size_; + + // set of visited classes to be used in split refine + vector visited_classes_; ++ ++ // true if input fst was deterministic: we can make ++ // certain assumptions in this case that speed up the algorithm. ++ bool allow_repeated_split_; + }; + + +diff --git a/src/include/fst/state-reachable.h b/src/include/fst/state-reachable.h +index 6d0c971..1da922e 100644 +--- a/src/include/fst/state-reachable.h ++++ b/src/include/fst/state-reachable.h +@@ -112,7 +112,7 @@ class IntervalReachVisitor { + void FinishState(StateId s, StateId p, const A *arc) { + if (index_ >= 0 && fst_.Final(s) != Weight::Zero()) { + vector *intervals = (*isets_)[s].Intervals(); +- (*intervals)[0].end = index_; // Update tree interval end ++ (*intervals)[0].end_ = index_; // Update tree interval end + } + (*isets_)[s].Normalize(); + if (p != kNoStateId)