Skip to content

Commit 40a944c

Browse files
authored
pdfrenderer.cpp: Ignore non-text blocks
Fix tesseract-ocr#3957.
1 parent 490611e commit 40a944c

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

src/api/pdfrenderer.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,10 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
354354
const std::unique_ptr</*non-const*/ ResultIterator> res_it(api->GetIterator());
355355
while (!res_it->Empty(RIL_BLOCK)) {
356356
if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
357+
auto block type = res_it->BlockType();
358+
if (!res_it->PTIsTextType(block_type)) {
359+
continue; // ignore non-text blocks
360+
}
357361
pdf_str << "BT\n3 Tr"; // Begin text object, use invisible ink
358362
old_fontsize = 0; // Every block will declare its fontsize
359363
new_block = true; // Every block will declare its affine matrix

0 commit comments

Comments
 (0)