Skip to content

Commit

Permalink
readme/vesseract: Added is_language_code_supported
Browse files Browse the repository at this point in the history
  • Loading branch information
SheatNoisette committed Nov 14, 2021
1 parent f6a6897 commit 9aeebb2
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 8 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,19 @@ version := vesseract.get_tesseract_version() or { panic(err) }
// 4.1.0 installed: "Tesseract 4 - 1 - 0 detected!"
println("Tesseract $version.major - $version.minor - $version.patch detected!")
// Get languages supported by Tesseract
langs := vesseract.get_languages() or { panic(err) }
// Example: "['afr', 'amh', 'ara', 'asm', ... 'uzb_cyrl', 'vie', 'yid', 'yor']"
println("$langs")
// Get alto xml - Require Tesseract >4.1.0
alto := vesseract.image_to_alto_xml_path('sample/demo.png') or { panic(err) }
// "XML: <?xml version="1.0" encoding="UTF-8"?> ... "
println("XML: $alto")
// Get bounding boxes for letters
boxes := image_to_boxes(image: 'sample/demo.png', lang: 'eng', args: '') or { panic(err) }
println("$boxes")
Expand All @@ -53,6 +56,14 @@ println("$boxes")
page: 0
}, ... "
// Check if a language model exists in Tesseract
if (vesseract.is_language_code_supported("fra")) {
// Do stuff...
} else {
// Use another model instead...
}
```
# License

Expand Down
2 changes: 1 addition & 1 deletion v.mod
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Module {
name: 'vesseract'
description: 'A Tesseract-OCR wrapper for V!'
version: '0.0.2'
version: '0.0.3'
license: 'MIT'
dependencies: []
}
42 changes: 35 additions & 7 deletions vesseract.v
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,14 @@ pub fn image_to_string(t Tesseract) ?string {
return str[..str.len - 2]
}

// Get installed languages from Tesseract-OCR
// return a list of languages code
pub fn get_languages() ?[]string {
// Language list
mut langs_supported := []string{}

// Generate a map containing all of the languages supported by tesseract
fn get_language_map() ?map[string]bool {
// Get tesseract langs
t_result := run_tesseract(['--list-langs']) or { return err }

// Language list
mut langs_supported := map[string]bool{}

// Split
content := t_result.split('\n')

Expand All @@ -74,13 +73,42 @@ pub fn get_languages() ?[]string {

// Filter empty lines
if line.len > 0 {
langs_supported << content[i]
langs_supported[content[i]] = true
}
}

return langs_supported
}

// Get installed languages from Tesseract-OCR
// return a list of languages code
pub fn get_languages() ?[]string {
// Get tesseract langs
t_result := run_tesseract(['--list-langs']) or { return err }

// Get language map
lang_map := get_language_map() or { return err }

// Language list
mut langs_supported := []string{}

// Skip first line
for code, _ in lang_map {
langs_supported << code
}

return langs_supported
}

// Check if a language code is supported
// No optional as this make the code easier to write
// Return false on tesseract error (or not available), return true if supported
pub fn is_language_code_supported(code string) bool {
// Get a map of langages
map_lang := get_language_map() or { return false }
return code in map_lang
}

// Get tesseract-OCR version
pub fn get_tesseract_version() ?Tesseract_version {
// Get tesseract version
Expand Down
5 changes: 5 additions & 0 deletions vesseract_test.v
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ fn tr_find(list []string, item string) bool {
return false
}

fn test_is_language_code_supported() {
assert is_language_code_supported("eng")
assert is_language_code_supported("UNKNOWN") == false
}

fn test_get_languages() {
langs := get_languages() or { panic(err) }

Expand Down

0 comments on commit 9aeebb2

Please sign in to comment.