Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some issues which were reported by Codacy #4266

Merged
merged 2 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion INSTALL.GIT.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ all languages).

git clone https://github.com/tesseract-ocr/tessdata.git tesseract-ocr.tessdata


You need an Internet connection and [curl](https://curl.haxx.se/) to compile `ScrollView.jar`
because the build will automatically download
[piccolo2d-core-3.0.1.jar](https://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-core/3.0.1/piccolo2d-core-3.0.1.jar) and
Expand Down
4 changes: 2 additions & 2 deletions src/api/pdfrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,13 +242,13 @@ static void GetWordBaseline(int writing_direction, int ppi, int height, int word
double word_length;
double x, y;
{
int px = word_x1;
int py = word_y1;
double l2 = dist2(line_x1, line_y1, line_x2, line_y2);
if (l2 == 0) {
x = line_x1;
y = line_y1;
} else {
int px = word_x1;
int py = word_y1;
double t = ((px - line_x2) * (line_x2 - line_x1) + (py - line_y2) * (line_y2 - line_y1)) / l2;
x = line_x2 + t * (line_x2 - line_x1);
y = line_y2 + t * (line_y2 - line_y1);
Expand Down
3 changes: 2 additions & 1 deletion src/ccmain/applybox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,10 +258,10 @@ void Tesseract::MaximallyChopWord(const std::vector<TBOX> &boxes, BLOCK *block,
}
const double e = exp(1.0); // The base of natural logs.
unsigned blob_number;
int right_chop_index = 0;
if (!assume_fixed_pitch_char_segment) {
// We only chop if the language is not fixed pitch like CJK.
SEAM *seam = nullptr;
int right_chop_index = 0;
while ((seam = chop_one_blob(boxes, blob_choices, word_res, &blob_number)) != nullptr) {
word_res->InsertSeam(blob_number, seam);
BLOB_CHOICE *left_choice = blob_choices[blob_number];
Expand Down Expand Up @@ -685,6 +685,7 @@ void Tesseract::SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, in
void Tesseract::TidyUp(PAGE_RES *page_res) {
int ok_blob_count = 0;
int bad_blob_count = 0;
// TODO: check usage of ok_word_count.
int ok_word_count = 0;
int unlabelled_words = 0;
PAGE_RES_IT pr_it(page_res);
Expand Down
18 changes: 8 additions & 10 deletions src/ccmain/control.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,7 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
}
real_word->AddSelectedOutlines(wanted, wanted_blobs, wanted_outlines, nullptr);
AssignDiacriticsToNewBlobs(outlines, pass, real_word, pr_it, &word_wanted, &target_blobs);
// TODO: check code.
int non_overlapped = 0;
int non_overlapped_used = 0;
for (unsigned i = 0; i < word_wanted.size(); ++i) {
Expand Down Expand Up @@ -1121,9 +1122,9 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
C_BLOB *blob,
const std::vector<C_OUTLINE *> &outlines,
int num_outlines, std::vector<bool> *ok_outlines) {
std::string best_str;
float target_cert = certainty_threshold;
if (blob != nullptr) {
std::string best_str;
float target_c2;
target_cert = ClassifyBlobAsWord(pass, pr_it, blob, best_str, &target_c2);
if (debug_noise_removal) {
Expand Down Expand Up @@ -1797,9 +1798,6 @@ Allow a single hyphen in a lower case word
}

bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
bool show_map_detail = false;
int16_t i;

if (!test_pt) {
return false;
}
Expand All @@ -1811,6 +1809,7 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
if (location < 0) {
return true; // For breakpoint use
}
bool show_map_detail = false;
tessedit_rejection_debug.set_value(true);
debug_x_ht_level.set_value(2);
tprintf("\n\nTESTWD::");
Expand Down Expand Up @@ -1864,7 +1863,7 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
tprintf("\n");
if (show_map_detail) {
tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
word->reject_map[i].full_print(debug_fp);
}
Expand All @@ -1891,13 +1890,12 @@ static void find_modal_font( // good chars in word
int16_t *font_out, // output font
int8_t *font_count // output count
) {
int16_t font; // font index
int32_t count; // pile count

if (fonts->get_total() > 0) {
font = static_cast<int16_t>(fonts->mode());
// font index
int16_t font = static_cast<int16_t>(fonts->mode());
*font_out = font;
count = fonts->pile_count(font);
// pile count
int32_t count = fonts->pile_count(font);
*font_count = count < INT8_MAX ? count : INT8_MAX;
fonts->add(font, -*font_count);
} else {
Expand Down
28 changes: 11 additions & 17 deletions src/ccmain/docqual.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ int16_t Tesseract::word_blob_quality(WERD_RES *word) {
}

int16_t Tesseract::word_outline_errs(WERD_RES *word) {
int16_t i = 0;
int16_t err_count = 0;

if (word->rebuild_word != nullptr) {
int16_t i = 0;
for (unsigned b = 0; b < word->rebuild_word->NumBlobs(); ++b) {
TBLOB *blob = word->rebuild_word->blobs[b];
err_count += count_outline_errs(word->best_choice->unichar_string()[i], blob->NumOutlines());
Expand Down Expand Up @@ -209,13 +209,8 @@ void Tesseract::unrej_good_quality_words( // unreject potential

void Tesseract::doc_and_block_rejection( // reject big chunks
PAGE_RES_IT &page_res_it, bool good_quality_doc) {
int16_t block_no = 0;
int16_t row_no = 0;
BLOCK_RES *current_block;
ROW_RES *current_row;

bool rej_word;
bool prev_word_rejected;
int16_t char_quality = 0;
int16_t accepted_char_quality;

Expand All @@ -238,16 +233,17 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
WERD_RES *word;
while ((word = page_res_it.word()) != nullptr) {
current_block = page_res_it.block();
block_no = current_block->block->pdblk.index();
int16_t block_no = current_block->block->pdblk.index();
if (current_block->char_count > 0 &&
(current_block->rej_count * 100.0 / current_block->char_count) >
tessedit_reject_block_percent) {
if (tessedit_debug_block_rejection) {
tprintf("REJECTING BLOCK %d #chars: %d; #Rejects: %d\n", block_no,
current_block->char_count, current_block->rej_count);
}
prev_word_rejected = false;
bool prev_word_rejected = false;
while ((word = page_res_it.word()) != nullptr && (page_res_it.block() == current_block)) {
bool rej_word;
if (tessedit_preserve_blk_rej_perfect_wds) {
rej_word = word->reject_map.reject_count() > 0 ||
word->reject_map.length() < tessedit_preserve_min_wd_len;
Expand Down Expand Up @@ -284,9 +280,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
}

/* Walk rows in block testing for row rejection */
row_no = 0;
int16_t row_no = 0;
while (page_res_it.word() != nullptr && page_res_it.block() == current_block) {
current_row = page_res_it.row();
ROW_RES *current_row = page_res_it.row();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto?

row_no++;
/* Reject whole row if:
fraction of chars on row which are rejected exceed a limit AND
Expand All @@ -302,9 +298,10 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
tprintf("REJECTING ROW %d #chars: %d; #Rejects: %d\n", row_no,
current_row->char_count, current_row->rej_count);
}
prev_word_rejected = false;
bool prev_word_rejected = false;
while ((word = page_res_it.word()) != nullptr && page_res_it.row() == current_row) {
/* Preserve words on good docs unless they are mostly rejected*/
bool rej_word;
if (!tessedit_row_rej_good_docs && good_quality_doc) {
rej_word = word->reject_map.reject_count() /
static_cast<float>(word->reject_map.length()) >
Expand Down Expand Up @@ -448,20 +445,18 @@ void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) {
}

bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level) {
float rating_per_ch;
int adjusted_len;
int crunch_mode = 0;

if (word->best_choice->unichar_string().empty() ||
(strspn(word->best_choice->unichar_string().c_str(), " ") ==
word->best_choice->unichar_string().size())) {
crunch_mode = 1;
} else {
adjusted_len = word->reject_map.length();
int adjusted_len = word->reject_map.length();
if (adjusted_len > crunch_rating_max) {
adjusted_len = crunch_rating_max;
}
rating_per_ch = word->best_choice->rating() / adjusted_len;
float rating_per_ch = word->best_choice->rating() / adjusted_len;

if (rating_per_ch > crunch_terrible_rating) {
crunch_mode = 2;
Expand Down Expand Up @@ -528,7 +523,6 @@ bool Tesseract::potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_leve
}

void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
WERD_RES *word;
PAGE_RES_IT copy_it;
bool deleting_from_bol = false;
bool marked_delete_point = false;
Expand All @@ -539,7 +533,7 @@ void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {

page_res_it.restart_page();
while (page_res_it.word() != nullptr) {
word = page_res_it.word();
WERD_RES *word = page_res_it.word();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto?


delete_mode = word_deletable(word, debug_delete_mode);
if (delete_mode != CR_NONE) {
Expand Down
20 changes: 7 additions & 13 deletions src/ccmain/fixspace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R
void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) {
int16_t best_score;
WERD_RES_LIST current_perm;
int16_t current_score;
bool improved = false;

best_score = eval_word_spacing(best_perm); // default score
Expand All @@ -183,7 +182,7 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *

while ((best_score != PERFECT_WERDS) && !current_perm.empty()) {
match_current_words(current_perm, row, block);
current_score = eval_word_spacing(current_perm);
int16_t current_score = eval_word_spacing(current_perm);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto?

dump_words(current_perm, current_score, 2, improved);
if (current_score > best_score) {
best_perm.clear();
Expand All @@ -201,11 +200,10 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) {
WERD_RES_IT src_it(&src_list);
WERD_RES_IT new_it(&new_list);
WERD_RES *src_wd;
WERD_RES *new_wd;

for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
src_wd = src_it.data();
WERD_RES *src_wd = src_it.data();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

auto?

if (!src_wd->combination) {
new_wd = WERD_RES::deep_copy(src_wd);
new_wd->combination = false;
Expand Down Expand Up @@ -393,8 +391,6 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
WERD_RES_IT prev_word_it(&words);
WERD_RES *word;
WERD_RES *prev_word;
WERD_RES *combo;
WERD *copy_word;
int16_t prev_right = -INT16_MAX;
TBOX box;
int16_t gap;
Expand Down Expand Up @@ -425,12 +421,13 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
gap = box.left() - prev_right;
if (gap <= min_gap) {
prev_word = prev_word_it.data();
WERD_RES *combo;
if (prev_word->combination) {
combo = prev_word;
} else {
/* Make a new combination and insert before
* the first word being joined. */
copy_word = new WERD;
auto *copy_word = new WERD;
*copy_word = *(prev_word->word);
// deep copy
combo = new WERD_RES(copy_word);
Expand Down Expand Up @@ -546,7 +543,6 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
WERD_RES *word_res;
WERD_RES_LIST sub_word_list;
WERD_RES_IT sub_word_list_it(&sub_word_list);
int16_t blob_index;
int16_t new_length;
float junk;

Expand All @@ -556,7 +552,7 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
return;
}

blob_index = worst_noise_blob(word_res, &junk);
auto blob_index = worst_noise_blob(word_res, &junk);
if (blob_index < 0) {
return;
}
Expand Down Expand Up @@ -623,7 +619,6 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
WERD_RES_IT worst_word_it;
float worst_noise_score = 9999;
int worst_blob_index = -1; // Noisiest blob of noisiest wd
int blob_index; // of wds noisiest blob
float noise_score; // of wds noisiest blob
WERD_RES *word_res;
C_BLOB_IT blob_it;
Expand All @@ -636,7 +631,7 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
int16_t i;

for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
blob_index = worst_noise_blob(word_it.data(), &noise_score);
auto blob_index = worst_noise_blob(word_it.data(), &noise_score);
if (blob_index > -1 && worst_noise_score > noise_score) {
worst_noise_score = noise_score;
worst_blob_index = blob_index;
Expand Down Expand Up @@ -806,7 +801,6 @@ float Tesseract::blob_noise_score(TBLOB *blob) {
void fixspace_dbg(WERD_RES *word) {
TBOX box = word->word->bounding_box();
const bool show_map_detail = false;
int16_t i;

box.print();
tprintf(" \"%s\" ", word->best_choice->unichar_string().c_str());
Expand All @@ -816,7 +810,7 @@ void fixspace_dbg(WERD_RES *word) {
tprintf("\n");
if (show_map_detail) {
tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
word->reject_map[i].full_print(debug_fp);
}
Expand Down
2 changes: 1 addition & 1 deletion src/ccmain/output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,11 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
bool force_eol) { // override tilde crunch?
WERD_RES *word = page_res_it.word();
const UNICHARSET &uchset = *word->uch_set;
bool need_reject = false;
UNICHAR_ID space = uchset.unichar_to_id(" ");

if ((word->unlv_crunch_mode != CR_NONE || word->best_choice->empty()) &&
!tessedit_zero_kelvin_rejection && !tessedit_word_for_word) {
bool need_reject = false;
if ((word->unlv_crunch_mode != CR_DELETE) &&
(!stats_.tilde_crunch_written ||
((word->unlv_crunch_mode == CR_KEEP_SPACE) && (word->word->space() > 0) &&
Expand Down
2 changes: 1 addition & 1 deletion src/ccmain/paragraphs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2407,8 +2407,8 @@ static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowI
// Set up text, lword_text, and rword_text (mostly for debug printing).
std::string fake_text;
PageIterator pit(static_cast<const PageIterator &>(it));
bool first_word = true;
if (!pit.Empty(RIL_WORD)) {
bool first_word = true;
do {
fake_text += "x";
if (first_word) {
Expand Down
9 changes: 4 additions & 5 deletions src/ccmain/pgedit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -703,9 +703,7 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
WERD_RES *word_res = pr_it->word();
WERD *word = word_res->word;
TBOX word_bb; // word bounding box
int word_height; // ht of word BB
bool displayed_something = false;
float shift; // from bot left

if (color_mode != CM_RAINBOW && word_res->box_word != nullptr) {
# ifndef DISABLED_LEGACY_ENGINE
Expand Down Expand Up @@ -842,13 +840,14 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
if (text.length() > 0) {
word_bb = word->bounding_box();
image_win->Pen(ScrollView::RED);
word_height = word_bb.height();
int text_height = 0.50 * word_height;
auto word_height = word_bb.height();
int text_height = word_height / 2;
if (text_height > 20) {
text_height = 20;
}
image_win->TextAttributes("Arial", text_height, false, false, false);
shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f;
// from bot left
float shift = (word_height < word_bb.width()) ? 0.25f * word_height : 0.0f;
image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height, text.c_str());
if (blame.length() > 0) {
image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height - text_height,
Expand Down
6 changes: 2 additions & 4 deletions src/ccmain/reject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,6 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
int16_t i;
int16_t offset;
bool non_conflict_set_char; // non conf set a/n?
bool conflict = false;
bool allow_1s;
ACCEPTABLE_WERD_TYPE word_type;
bool dict_perm_type;
bool dict_word_ok;
Expand Down Expand Up @@ -411,11 +409,11 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
Else reject all conflict chs
*/
if (word_contains_non_1_digit(word, lengths)) {
allow_1s =
bool allow_1s =
(alpha_count(word, lengths) == 0) || (word_res->best_choice->permuter() == NUMBER_PERM);

int16_t offset;
conflict = false;
bool conflict = false;
for (i = 0, offset = 0; word[offset] != '\0';
offset += word_res->best_choice->unichar_lengths()[i++]) {
if ((!allow_1s || (word[offset] != '1')) &&
Expand Down
Loading
Loading