Skip to content

Commit

Permalink
alts: Added alternative function calls
Browse files Browse the repository at this point in the history
  • Loading branch information
SheatNoisette committed Nov 14, 2021
1 parent 0215e9c commit 51f5297
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 10 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@ vesseract
*.so
*.dylib
*.dll

# Tesseract
*.txt
*.xml
*.xml
*.box
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ langs := vesseract.get_languages() or { panic(err) }
println("$langs")
// Get alto xml - Require Tesseract >4.1.0
alto := vesseract.image_to_alto_xml('sample/demo.png') or { panic(err) }
alto := vesseract.image_to_alto_xml_path('sample/demo.png') or { panic(err) }
// "XML: <?xml version="1.0" encoding="UTF-8"?> ... "
println("XML: $alto")
```
Expand Down
13 changes: 13 additions & 0 deletions alternatives.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module vesseract

// Variant of image_to_alto_xml() but don't need extra parameters
[inline]
pub fn image_to_alto_xml_path(image_path string) ?string {
return image_to_alto_xml(image: image_path, lang: 'eng', args: '')
}

// Variant of image_to_string, only a file path is required
[inline]
pub fn image_to_string_path(filepath string) ?string {
return image_to_string(image: filepath, lang: 'eng', args: '')
}
21 changes: 14 additions & 7 deletions vesseract.v
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,17 @@ module vesseract

import os

// Used for bounding box detection
pub struct Tesseract_box {
pub:
letter string
x1 int
y1 int
x2 int
y2 int
}

// Used as a parameter
pub struct Tesseract {
pub:
// Image path
Expand All @@ -12,6 +23,7 @@ pub:
lang string = 'eng'
}

// Used to make it easier to get tesseract version
pub struct Tesseract_version {
pub:
major int
Expand Down Expand Up @@ -43,11 +55,6 @@ pub fn image_to_string(t Tesseract) ?string {
return str[..str.len - 2]
}

// Variant of image_to_string, only a file path is required
pub fn image_to_string_path(filepath string) ?string {
return image_to_string(image: filepath, lang: 'eng', args: '')
}

// Get installed languages from Tesseract-OCR
// return a list of languages code
pub fn get_languages() ?[]string {
Expand Down Expand Up @@ -104,7 +111,7 @@ pub fn get_tesseract_version() ?Tesseract_version {
}

// Get alto representation from Tesseract-OCR as XML format
pub fn image_to_alto_xml(image string) ?string {
pub fn image_to_alto_xml(t Tesseract) ?string {
// Tesseract option: -c tessedit_create_alto=1

// Check version for alto support
Expand All @@ -119,7 +126,7 @@ pub fn image_to_alto_xml(image string) ?string {
xml_filename := id + '.xml'

// Run tesseract
run_tesseract([image, id, '-c tessedit_create_alto=1']) or { return err }
run_tesseract([t.image, id, '-c tessedit_create_alto=1', t.args]) or { return err }

// Read output
xml := os.read_file(xml_filename) or { return err }
Expand Down
7 changes: 6 additions & 1 deletion vesseract_test.v
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ fn test_image_to_string_empty() {
}

fn test_image_to_alto_xml() {
xml := image_to_alto_xml('sample/demo.png') or { panic(err) }
xml := image_to_alto_xml(image: 'sample/demo.png', lang: 'eng', args: '') or { panic(err) }
assert xml.contains('http://www.loc.gov/standards/alto/ns-v3#')
}

fn test_image_to_alto_xml_path() {
xml := image_to_alto_xml_path('sample/demo.png') or { panic(err) }
assert xml.contains('http://www.loc.gov/standards/alto/ns-v3#')
}

0 comments on commit 51f5297

Please sign in to comment.