Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix <Unicode> output and issues reported by Coverity Scan for PAGE XML renderer #4234

Merged
merged 2 commits into from
May 3, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions src/api/pagerenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -746,8 +746,9 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {

float x1, y1, x2, y2, word_conf, line_conf, block_conf;

tesseract::Orientation orientation_block;
tesseract::WritingDirection writing_direction_block;
tesseract::Orientation orientation_block = ORIENTATION_PAGE_UP;
tesseract::WritingDirection writing_direction_block =
WRITING_DIRECTION_LEFT_TO_RIGHT;
tesseract::TextlineOrder textline_order_block;

Pta *block_top_pts = ptaCreate(0);
Expand Down Expand Up @@ -834,24 +835,22 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
block_conf = ((res_it->Confidence(RIL_BLOCK)) / 100.);
page_str << "\t\t<TextRegion id=\"r" << rcnt << "\" " << "custom=\""
<< "readingOrder {index:" << rcnt << ";} ";
if (writing_direction_block != 0) {
if (writing_direction_block != WRITING_DIRECTION_LEFT_TO_RIGHT) {
page_str << "readingDirection {"
<< WritingDirectionToStr(writing_direction_block) << ";} ";
}
page_str << "orientation {" << orientation_block << ";}\">\n";
page_str << "\t\t\t";
if ((!POLYGONFLAG ||
(orientation_block != 0 && orientation_block != 2)) &&
if ((!POLYGONFLAG || (orientation_block != ORIENTATION_PAGE_UP &&
orientation_block != ORIENTATION_PAGE_DOWN)) &&
LEVELFLAG == 0) {
AddBoxToPAGE(res_it, RIL_BLOCK, page_str);
}
}

// Writing direction changes at a per-word granularity
// tesseract::WritingDirection writing_direction_before;
tesseract::WritingDirection writing_direction;

writing_direction = writing_direction_block;
auto writing_direction = writing_direction_block;
if (writing_direction_block != WRITING_DIRECTION_TOP_TO_BOTTOM) {
switch (res_it->WordDirection()) {
case DIR_LEFT_TO_RIGHT:
Expand All @@ -868,14 +867,19 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
bool ttb_flag = (writing_direction == WRITING_DIRECTION_TOP_TO_BOTTOM);
// TODO: Rework polygon handling if line is skewed (90 or 180 degress),
// for now using LinePts
bool skewed_flag = (orientation_block != 0 && orientation_block != 2);
bool skewed_flag = (orientation_block != ORIENTATION_PAGE_UP &&
orientation_block != ORIENTATION_PAGE_DOWN);

if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
// writing_direction_before = writing_direction;
line_conf = ((res_it->Confidence(RIL_TEXTLINE)) / 100.);
line_content << HOcrEscape(res_it->GetUTF8Text(RIL_TEXTLINE)).c_str();
std::string textline = res_it->GetUTF8Text(RIL_TEXTLINE);
if (textline.back() == '\n') {
textline.erase(textline.length() - 1);
}
line_content << HOcrEscape(textline.c_str());
line_str << "\t\t\t<TextLine id=\"r" << rcnt << "l" << lcnt << "\" ";
if (writing_direction != 0 &&
if (writing_direction != WRITING_DIRECTION_LEFT_TO_RIGHT &&
writing_direction != writing_direction_block) {
line_str << "readingDirection=\""
<< WritingDirectionToStr(writing_direction) << "\" ";
Expand Down Expand Up @@ -1078,12 +1082,10 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
region_content << line_content.str();
line_content.str("");
if (!last_word_in_cblock) {
region_content << "\n\t\t\t\t\t";
region_content << '\n';
}
lcnt++;
wcnt = 0;
} else {
line_content << " ";
}

// Write region information to the output
Expand Down
Loading