Skip to content
Christian Clausner edited this page May 13, 2019 · 9 revisions

Loading page content from XML and iterating over content

try {
		
	//Load page
	Page page = PageXmlInputOutput.readPage("c:\\temp\\text.xml");
			
	//Iterate over regions
	for (int i=0; i<page.getLayout().getRegionCount(); i++) {
		Region region = page.getLayout().getRegion(i);
	}
			
	//Iterate over regions using iterator
	for (RegionIterator it = new RegionIterator(page.getLayout(), RegionType.TextRegion, null); it.hasNext(); ) {
		TextRegion region = (TextRegion)it.next();

		//Iterate over nested text lines
		for (int j=0; j<region.getTextObjectCount(); j++) {
			TextLine textLine = (TextLine)region.getTextObject(j);
		}
	}
			
	//Iterate over all text lines using iterator
	for (LowLevelTextObjectIterator it = new LowLevelTextObjectIterator(page.getLayout(), LowLevelTextType.TextLine, null); it.hasNext(); ) {
		TextLine textLine = (TextLine)it.next();
	}			
			
} catch (Exception e) {
	e.printStackTrace();
}

Working with text objects

TextRegion textRegion = (TextRegion)region;

for (int i=0; i<textRegion.getTextObjectCount(); i++) {
	TextLine textLine = (TextLine)textRegion.getTextObject(i);

	for (int j=0; j<textLine.getTextObjectCount(); j++) {
		...
	}
}
textRegion.addTextObject(...);
textRegion.removeTextObject(...);

String text = textRegion.getText();
Clone this wiki locally