Skip to content

Commit

Permalink
Replace more PointerVector by std::vector for src/training
Browse files Browse the repository at this point in the history
Signed-off-by: Stefan Weil <[email protected]>
  • Loading branch information
stweil committed Mar 19, 2021
1 parent dae5acc commit cac116d
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 35 deletions.
21 changes: 0 additions & 21 deletions src/ccutil/genericvector.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,27 +401,6 @@ class PointerVector : public GenericVector<T *> {
GenericVector<T *>::truncate(size);
}

// Compact the vector by deleting elements for which delete_cb returns
// true. delete_cb is a permanent callback and will be deleted.
void compact(std::function<bool(const T *)> delete_cb) {
int new_size = 0;
int old_index = 0;
// Until the callback returns true, the elements stay the same.
while (old_index < GenericVector<T *>::size_used_ &&
!delete_cb(GenericVector<T *>::data_[old_index++])) {
++new_size;
}
// Now just copy anything else that gets false from delete_cb.
for (; old_index < GenericVector<T *>::size_used_; ++old_index) {
if (!delete_cb(GenericVector<T *>::data_[old_index])) {
GenericVector<T *>::data_[new_size++] = GenericVector<T *>::data_[old_index];
} else {
delete GenericVector<T *>::data_[old_index];
}
}
GenericVector<T *>::size_used_ = new_size;
}

// Clear the array, calling the clear callback function if any.
// All the owned callbacks are also deleted.
// If you don't want the callbacks to be deleted, before calling clear, set
Expand Down
30 changes: 21 additions & 9 deletions src/training/common/trainingsampleset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,15 @@ TrainingSampleSet::TrainingSampleSet(const FontInfoTable &font_table)
, fontinfo_table_(font_table) {}

TrainingSampleSet::~TrainingSampleSet() {
for (auto sample : samples_) {
delete sample;
}
delete font_class_array_;
}

// Writes to the given file. Returns false in case of error.
bool TrainingSampleSet::Serialize(FILE *fp) const {
if (!samples_.Serialize(fp))
if (!tesseract::Serialize(fp, samples_))
return false;
if (!unicharset_.save_to_file(fp))
return false;
Expand All @@ -106,7 +109,7 @@ bool TrainingSampleSet::Serialize(FILE *fp) const {
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool TrainingSampleSet::DeSerialize(bool swap, FILE *fp) {
if (!samples_.DeSerialize(swap, fp))
if (!tesseract::DeSerialize(swap, fp, samples_))
return false;
num_raw_samples_ = samples_.size();
if (!unicharset_.load_from_file(fp))
Expand Down Expand Up @@ -498,17 +501,26 @@ void TrainingSampleSet::KillSample(TrainingSample *sample) {
// Deletes all samples with zero features marked by KillSample.
void TrainingSampleSet::DeleteDeadSamples() {
using namespace std::placeholders; // for _1
samples_.compact(std::bind(&TrainingSampleSet::DeleteableSample, this, _1));
auto old_it = samples_.begin();
for (; old_it < samples_.end(); ++old_it) {
if (*old_it == nullptr || (*old_it)->class_id() < 0) {
break;
}
}
auto new_it = old_it;
for (; old_it < samples_.end(); ++old_it) {
if (*old_it == nullptr || (*old_it)->class_id() < 0) {
delete *old_it;
} else {
*new_it = *old_it;
++new_it;
}
}
samples_.resize(new_it - samples_.begin() + 1);
num_raw_samples_ = samples_.size();
// Samples must be re-organized now we have deleted a few.
}

// Callback function returns true if the given sample is to be deleted, due
// to having a negative classid.
bool TrainingSampleSet::DeleteableSample(const TrainingSample *sample) {
return sample == nullptr || sample->class_id() < 0;
}

// Construct an array to access the samples by font,class pair.
void TrainingSampleSet::OrganizeByFontAndClass() {
// Font indexes are sparse, so we used a map to compact them, so we can
Expand Down
6 changes: 1 addition & 5 deletions src/training/common/trainingsampleset.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,6 @@ class TrainingSampleSet {
// must be called after as the samples have been renumbered.
void DeleteDeadSamples();

// Callback function returns true if the given sample is to be deleted, due
// to having a negative classid.
bool DeleteableSample(const TrainingSample *sample);

// Construct an array to access the samples by font,class pair.
void OrganizeByFontAndClass();

Expand Down Expand Up @@ -254,7 +250,7 @@ class TrainingSampleSet {
std::vector<FontClassDistance> distance_cache;
};

PointerVector<TrainingSample> samples_;
std::vector<TrainingSample *> samples_;
// Number of samples before replication/randomization.
int num_raw_samples_;
// Character set we are training for.
Expand Down

0 comments on commit cac116d

Please sign in to comment.