Skip to content

Commit

Permalink
Merge pull request #3592 from stweil/unsigned
Browse files Browse the repository at this point in the history
Fix compiler warnings (mainly signed / unsigned mismatches) and modernize some code
egorpugin authored Oct 10, 2021
2 parents 0aad8b8 + d935502 commit 5a36943
Showing 86 changed files with 685 additions and 777 deletions.
12 changes: 6 additions & 6 deletions src/ccmain/applybox.cpp
Original file line number Diff line number Diff line change
@@ -243,7 +243,7 @@ void Tesseract::MaximallyChopWord(const std::vector<TBOX> &boxes, BLOCK *block,
std::vector<BLOB_CHOICE *> blob_choices;
ASSERT_HOST(!word_res->chopped_word->blobs.empty());
auto rating = static_cast<float>(INT8_MAX);
for (int i = 0; i < word_res->chopped_word->NumBlobs(); ++i) {
for (unsigned i = 0; i < word_res->chopped_word->NumBlobs(); ++i) {
// The rating and certainty are not quite arbitrary. Since
// select_blob_to_chop uses the worst certainty to choose, they all have
// to be different, so starting with INT8_MAX, subtract 1/8 for each blob
@@ -257,7 +257,7 @@ void Tesseract::MaximallyChopWord(const std::vector<TBOX> &boxes, BLOCK *block,
rating -= 0.125f;
}
const double e = exp(1.0); // The base of natural logs.
int blob_number;
unsigned blob_number;
int right_chop_index = 0;
if (!assume_fixed_pitch_char_segment) {
// We only chop if the language is not fixed pitch like CJK.
@@ -613,8 +613,8 @@ bool Tesseract::FindSegmentation(const std::vector<UNICHAR_ID> &target_text, WER
/// @param best_rating
/// @param best_segmentation
void Tesseract::SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, int choices_pos,
int choices_length, const std::vector<UNICHAR_ID> &target_text,
int text_index, float rating, std::vector<int> *segmentation,
unsigned choices_length, const std::vector<UNICHAR_ID> &target_text,
unsigned text_index, float rating, std::vector<int> *segmentation,
float *best_rating, std::vector<int> *best_segmentation) {
const UnicharAmbigsVector &table = getDict().getUnicharAmbigs().dang_ambigs();
for (unsigned length = 1; length <= choices[choices_pos].size(); ++length) {
@@ -625,12 +625,12 @@ void Tesseract::SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, in
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); choice_it.forward()) {
const BLOB_CHOICE *choice = choice_it.data();
choice_rating = choice->rating();
UNICHAR_ID class_id = choice->unichar_id();
auto class_id = choice->unichar_id();
if (class_id == target_text[text_index]) {
break;
}
// Search ambigs table.
if (class_id < table.size() && table[class_id] != nullptr) {
if (static_cast<size_t>(class_id) < table.size() && table[class_id] != nullptr) {
AmbigSpec_IT spec_it(table[class_id]);
for (spec_it.mark_cycle_pt(); !spec_it.cycled_list(); spec_it.forward()) {
const AmbigSpec *ambig_spec = spec_it.data();
32 changes: 16 additions & 16 deletions src/ccmain/control.cpp
Original file line number Diff line number Diff line change
@@ -227,7 +227,7 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC *monitor, PAGE_RES_IT
}
}
if (word->word->tess_failed) {
int s;
unsigned s;
for (s = 0; s < word->lang_words.size() && word->lang_words[s]->tess_failed; ++s) {
}
// If all are failed, skip it. Image words are skipped by this test.
@@ -727,7 +727,7 @@ void Tesseract::script_pos_pass(PAGE_RES *page_res) {
// Scan for upper/lower.
int num_upper = 0;
int num_lower = 0;
for (int i = 0; i < word->best_choice->length(); ++i) {
for (unsigned i = 0; i < word->best_choice->length(); ++i) {
if (word->uch_set->get_isupper(word->best_choice->unichar_id(i))) {
++num_upper;
} else if (word->uch_set->get_islower(word->best_choice->unichar_id(i))) {
@@ -743,7 +743,7 @@ void Tesseract::script_pos_pass(PAGE_RES *page_res) {
}

// Helper finds the gap between the index word and the next.
static void WordGap(const PointerVector<WERD_RES> &words, int index, int *right, int *next_left) {
static void WordGap(const PointerVector<WERD_RES> &words, unsigned index, int *right, int *next_left) {
*right = -INT32_MAX;
*next_left = INT32_MAX;
if (index < words.size()) {
@@ -756,13 +756,13 @@ static void WordGap(const PointerVector<WERD_RES> &words, int index, int *right,

// Factored helper computes the rating, certainty, badness and validity of
// the permuter of the words in [first_index, end_index).
static void EvaluateWordSpan(const PointerVector<WERD_RES> &words, int first_index, int end_index,
static void EvaluateWordSpan(const PointerVector<WERD_RES> &words, unsigned first_index, unsigned end_index,
float *rating, float *certainty, bool *bad, bool *valid_permuter) {
if (end_index <= first_index) {
*bad = true;
*valid_permuter = false;
}
for (int index = first_index; index < end_index && index < words.size(); ++index) {
for (unsigned index = first_index; index < end_index && index < words.size(); ++index) {
WERD_CHOICE *choice = words[index]->best_choice;
if (choice == nullptr) {
*bad = true;
@@ -790,11 +790,11 @@ static int SelectBestWords(double rating_ratio, double certainty_margin, bool de
// boundary at the end.
std::vector<WERD_RES *> out_words;
// Index into each word vector (best, new).
int b = 0, n = 0;
unsigned b = 0, n = 0;
int num_best = 0, num_new = 0;
while (b < best_words->size() || n < new_words->size()) {
// Start of the current run in each.
int start_b = b, start_n = n;
auto start_b = b, start_n = n;
while (b < best_words->size() || n < new_words->size()) {
int b_right = -INT32_MAX;
int next_b_left = INT32_MAX;
@@ -884,7 +884,7 @@ int Tesseract::RetryWithLanguage(const WordData &word_data, WordRecognizer recog
*in_word = nullptr;
}
if (debug) {
for (int i = 0; i < new_words.size(); ++i) {
for (unsigned i = 0; i < new_words.size(); ++i) {
new_words[i]->DebugTopChoice("Lang result");
}
}
@@ -896,7 +896,7 @@ int Tesseract::RetryWithLanguage(const WordData &word_data, WordRecognizer recog

// Helper returns true if all the words are acceptable.
static bool WordsAcceptable(const PointerVector<WERD_RES> &words) {
for (int w = 0; w < words.size(); ++w) {
for (unsigned w = 0; w < words.size(); ++w) {
if (words[w]->tess_failed || !words[w]->tess_accepted) {
return false;
}
@@ -1597,10 +1597,10 @@ void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK *b
word->fix_hyphens();
}
/* Don't trust fix_quotes! - though I think I've fixed the bug */
if (word->best_choice->length() != word->box_word->length()) {
if (static_cast<unsigned>(word->best_choice->length()) != word->box_word->length()) {
tprintf(
"POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d;"
" #Blobs=%d\n",
" #Blobs=%u\n",
word->best_choice->debug_string().c_str(), word->best_choice->length(),
word->box_word->length());
}
@@ -1621,7 +1621,7 @@ void Tesseract::match_word_pass_n(int pass_n, WERD_RES *word, ROW *row, BLOCK *b
static BLOB_CHOICE *FindBestMatchingChoice(UNICHAR_ID char_id, WERD_RES *word_res) {
// Find the corresponding best BLOB_CHOICE from any position in the word_res.
BLOB_CHOICE *best_choice = nullptr;
for (int i = 0; i < word_res->best_choice->length(); ++i) {
for (unsigned i = 0; i < word_res->best_choice->length(); ++i) {
BLOB_CHOICE *choice = FindMatchingChoice(char_id, word_res->GetBlobChoices(i));
if (choice != nullptr) {
if (best_choice == nullptr || choice->rating() < best_choice->rating()) {
@@ -1637,7 +1637,7 @@ static BLOB_CHOICE *FindBestMatchingChoice(UNICHAR_ID char_id, WERD_RES *word_re
// in the best_choice.
static void CorrectRepcharChoices(BLOB_CHOICE *blob_choice, WERD_RES *word_res) {
WERD_CHOICE *word = word_res->best_choice;
for (int i = 0; i < word_res->best_choice->length(); ++i) {
for (unsigned i = 0; i < word_res->best_choice->length(); ++i) {
BLOB_CHOICE *choice =
FindMatchingChoice(blob_choice->unichar_id(), word_res->GetBlobChoices(i));
if (choice == nullptr) {
@@ -1646,7 +1646,7 @@ static void CorrectRepcharChoices(BLOB_CHOICE *blob_choice, WERD_RES *word_res)
}
}
// Correct any incorrect results in word.
for (int i = 0; i < word->length(); ++i) {
for (unsigned i = 0; i < word->length(); ++i) {
if (word->unichar_id(i) != blob_choice->unichar_id()) {
word->set_unichar_id(blob_choice->unichar_id(), i);
}
@@ -1666,7 +1666,7 @@ void Tesseract::fix_rep_char(PAGE_RES_IT *page_res_it) {

// Find the frequency of each unique character in the word.
SortHelper<UNICHAR_ID> rep_ch(word.length());
for (int i = 0; i < word.length(); ++i) {
for (unsigned i = 0; i < word.length(); ++i) {
rep_ch.Add(word.unichar_id(i), 1);
}

@@ -1951,7 +1951,7 @@ void Tesseract::set_word_fonts(WERD_RES *word) {
if (tessedit_debug_fonts) {
tprintf("Examining fonts in %s\n", word->best_choice->debug_string().c_str());
}
for (int b = 0; b < word->best_choice->length(); ++b) {
for (unsigned b = 0; b < word->best_choice->length(); ++b) {
const BLOB_CHOICE *choice = word->GetBlobChoice(b);
if (choice == nullptr) {
continue;
4 changes: 2 additions & 2 deletions src/ccmain/docqual.cpp
Original file line number Diff line number Diff line change
@@ -64,7 +64,7 @@ int16_t Tesseract::word_outline_errs(WERD_RES *word) {
int16_t err_count = 0;

if (word->rebuild_word != nullptr) {
for (int b = 0; b < word->rebuild_word->NumBlobs(); ++b) {
for (unsigned b = 0; b < word->rebuild_word->NumBlobs(); ++b) {
TBLOB *blob = word->rebuild_word->blobs[b];
err_count += count_outline_errs(word->best_choice->unichar_string()[i], blob->NumOutlines());
i++;
@@ -911,7 +911,7 @@ bool Tesseract::noise_outlines(TWERD *word) {
int16_t max_dimension;
float small_limit = kBlnXHeight * crunch_small_outlines_size;

for (int b = 0; b < word->NumBlobs(); ++b) {
for (unsigned b = 0; b < word->NumBlobs(); ++b) {
TBLOB *blob = word->blobs[b];
for (TESSLINE *ol = blob->outlines; ol != nullptr; ol = ol->next) {
outline_count++;
2 changes: 1 addition & 1 deletion src/ccmain/equationdetect.cpp
Original file line number Diff line number Diff line change
@@ -742,7 +742,7 @@ int EquationDetect::CountAlignment(const std::vector<int> &sorted_vec, const int

// Search right side.
index = pos + 1 - sorted_vec.begin();
while (index < sorted_vec.size() && sorted_vec[index++] - val < kDistTh) {
while (static_cast<size_t>(index) < sorted_vec.size() && sorted_vec[index++] - val < kDistTh) {
count++;
}

17 changes: 8 additions & 9 deletions src/ccmain/fixspace.cpp
Original file line number Diff line number Diff line change
@@ -262,7 +262,7 @@ int16_t Tesseract::eval_word_spacing(WERD_RES_LIST &word_res_list) {
int16_t total_score = 0;
int16_t word_count = 0;
int16_t done_word_count = 0;
int16_t i;
int i;
int16_t offset;
int16_t prev_word_score = 0;
bool prev_word_done = false;
@@ -684,7 +684,6 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {

int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score) {
float noise_score[512];
int i;
int min_noise_blob; // 1st contender
int max_noise_blob; // last contender
int non_noise_count;
@@ -697,7 +696,7 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score
}

// Normalised.
int blob_count = word_res->box_word->length();
auto blob_count = word_res->box_word->length();
ASSERT_HOST(blob_count <= 512);
if (blob_count < 5) {
return -1; // too short to split
@@ -712,7 +711,7 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score
}
#endif

for (i = 0; i < blob_count && i < word_res->rebuild_word->NumBlobs(); i++) {
for (unsigned i = 0; i < blob_count && i < word_res->rebuild_word->NumBlobs(); i++) {
TBLOB *blob = word_res->rebuild_word->blobs[i];
if (word_res->reject_map[i].accepted()) {
noise_score[i] = non_noise_limit;
@@ -731,7 +730,8 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score
/* Now find the worst one which is far enough away from the end of the word */

non_noise_count = 0;
for (i = 0; i < blob_count && non_noise_count < fixsp_non_noise_limit; i++) {
int i;
for (i = 0; static_cast<unsigned>(i) < blob_count && non_noise_count < fixsp_non_noise_limit; i++) {
if (noise_score[i] >= non_noise_limit) {
non_noise_count++;
}
@@ -760,7 +760,7 @@ int16_t Tesseract::worst_noise_blob(WERD_RES *word_res, float *worst_noise_score

*worst_noise_score = small_limit;
worst_noise_blob = -1;
for (i = min_noise_blob; i <= max_noise_blob; i++) {
for (auto i = min_noise_blob; i <= max_noise_blob; i++) {
if (noise_score[i] < *worst_noise_score) {
worst_noise_blob = i;
*worst_noise_score = noise_score[i];
@@ -838,7 +838,6 @@ int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
WERD_RES_IT word_it(&word_res_list);
WERD_RES *word;
int16_t score = 0;
int16_t i;
float small_limit = kBlnXHeight * fixsp_small_outlines_size;

for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
@@ -849,9 +848,9 @@ int16_t Tesseract::fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {
if (word->done || word->tess_accepted || word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
word->best_choice->permuter() == FREQ_DAWG_PERM ||
word->best_choice->permuter() == USER_DAWG_PERM || safe_dict_word(word) > 0) {
int num_blobs = word->rebuild_word->NumBlobs();
auto num_blobs = word->rebuild_word->NumBlobs();
UNICHAR_ID space = word->uch_set->unichar_to_id(" ");
for (i = 0; i < word->best_choice->length() && i < num_blobs; ++i) {
for (unsigned i = 0; i < word->best_choice->length() && i < num_blobs; ++i) {
TBLOB *blob = word->rebuild_word->blobs[i];
if (word->best_choice->unichar_id(i) == space || blob_noise_score(blob) < small_limit) {
score -= 1; // penalise possibly erroneous non-space
12 changes: 2 additions & 10 deletions src/ccmain/linerec.cpp
Original file line number Diff line number Diff line change
@@ -269,22 +269,14 @@ void Tesseract::SearchWords(PointerVector<WERD_RES> *words) {
if (stopper_dict == nullptr) {
stopper_dict = &getDict();
}
bool any_nonspace_delimited = false;
for (int w = 0; w < words->size(); ++w) {
WERD_RES *word = (*words)[w];
if (word->best_choice != nullptr && word->best_choice->ContainsAnyNonSpaceDelimited()) {
any_nonspace_delimited = true;
break;
}
}
for (int w = 0; w < words->size(); ++w) {
for (unsigned w = 0; w < words->size(); ++w) {
WERD_RES *word = (*words)[w];
if (word->best_choice == nullptr) {
// It is a dud.
word->SetupFake(lstm_recognizer_->GetUnicharset());
} else {
// Set the best state.
for (int i = 0; i < word->best_choice->length(); ++i) {
for (unsigned i = 0; i < word->best_choice->length(); ++i) {
int length = word->best_choice->state(i);
word->best_state.push_back(length);
}
8 changes: 4 additions & 4 deletions src/ccmain/ltrresultiterator.cpp
Original file line number Diff line number Diff line change
@@ -335,10 +335,10 @@ char *LTRResultIterator::WordNormedUTF8Text() const {
WERD_CHOICE *best_choice = it_->word()->best_choice;
const UNICHARSET *unicharset = it_->word()->uch_set;
ASSERT_HOST(best_choice != nullptr);
for (int i = 0; i < best_choice->length(); ++i) {
for (unsigned i = 0; i < best_choice->length(); ++i) {
ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
}
int length = ocr_text.length() + 1;
auto length = ocr_text.length() + 1;
char *result = new char[length];
strncpy(result, ocr_text.c_str(), length);
return result;
@@ -404,7 +404,7 @@ ChoiceIterator::ChoiceIterator(const LTRResultIterator &result_it) {
strcmp(word_res_->CTC_symbol_choices[0][0].first, " ")) {
blanks_before_word_ = 0;
}
auto index = *tstep_index_;
unsigned index = *tstep_index_;
index += blanks_before_word_;
if (index < word_res_->CTC_symbol_choices.size()) {
LSTM_choices_ = &word_res_->CTC_symbol_choices[index];
@@ -484,7 +484,7 @@ float ChoiceIterator::Confidence() const {

// Returns the set of timesteps which belong to the current symbol
std::vector<std::vector<std::pair<const char *, float>>> *ChoiceIterator::Timesteps() const {
int offset = *tstep_index_ + blanks_before_word_;
unsigned offset = *tstep_index_ + blanks_before_word_;
if (offset >= word_res_->segmented_timesteps.size() || !oemLSTM_) {
return nullptr;
}
4 changes: 2 additions & 2 deletions src/ccmain/osdetect.cpp
Original file line number Diff line number Diff line change
@@ -381,7 +381,7 @@ bool OrientationDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list() && choice == nullptr;
choice_it.forward()) {
int choice_script = choice_it.data()->script_id();
int s = 0;
unsigned s = 0;
for (s = 0; s < allowed_scripts_->size(); ++s) {
if ((*allowed_scripts_)[s] == choice_script) {
choice = choice_it.data();
@@ -477,7 +477,7 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST *scores) {
int id = choice->script_id();
if (allowed_scripts_ != nullptr && !allowed_scripts_->empty()) {
// Check that the choice is in an allowed script.
int s = 0;
size_t s = 0;
for (s = 0; s < allowed_scripts_->size(); ++s) {
if ((*allowed_scripts_)[s] == id) {
break;
9 changes: 4 additions & 5 deletions src/ccmain/output.cpp
Original file line number Diff line number Diff line change
@@ -101,7 +101,6 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
bool force_eol) { // override tilde crunch?
WERD_RES *word = page_res_it.word();
const UNICHARSET &uchset = *word->uch_set;
int i;
bool need_reject = false;
UNICHAR_ID space = uchset.unichar_to_id(" ");

@@ -181,15 +180,15 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
if (!word->word->flag(W_REP_CHAR) || !tessedit_write_rep_codes) {
if (tessedit_zero_rejection) {
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
for (i = 0; i < word->best_choice->length(); ++i) {
for (unsigned i = 0; i < word->best_choice->length(); ++i) {
if (word->reject_map[i].rejected()) {
word->reject_map[i].setrej_minimal_rej_accept();
}
}
}
if (tessedit_minimal_rejection) {
/* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */
for (i = 0; i < word->best_choice->length(); ++i) {
for (unsigned i = 0; i < word->best_choice->length(); ++i) {
if ((word->best_choice->unichar_id(i) != space) && word->reject_map[i].rejected()) {
word->reject_map[i].setrej_minimal_rej_accept();
}
@@ -365,7 +364,7 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) {

int16_t Tesseract::count_alphas(const WERD_CHOICE &word) {
int count = 0;
for (int i = 0; i < word.length(); ++i) {
for (unsigned i = 0; i < word.length(); ++i) {
if (word.unicharset()->get_isalpha(word.unichar_id(i))) {
count++;
}
@@ -375,7 +374,7 @@ int16_t Tesseract::count_alphas(const WERD_CHOICE &word) {

int16_t Tesseract::count_alphanums(const WERD_CHOICE &word) {
int count = 0;
for (int i = 0; i < word.length(); ++i) {
for (unsigned i = 0; i < word.length(); ++i) {
if (word.unicharset()->get_isalpha(word.unichar_id(i)) ||
word.unicharset()->get_isdigit(word.unichar_id(i))) {
count++;
Loading

0 comments on commit 5a36943

Please sign in to comment.