From e66d43390782f056b9be6e4aee4bf35c214a2f2d Mon Sep 17 00:00:00 2001 From: "zdenop@gmail.com" Date: Sun, 10 Nov 2013 20:59:11 +0000 Subject: [PATCH] fix issue 938: change tessdata-dir/datadir rules; implement --tessdata-dir option git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@907 d0cd1f9f-072b-0410-8dd7-cf729c803f20 --- api/baseapi.cpp | 4 +++ api/tesseractmain.cpp | 55 ++++++++++++++++++++++++----------------- ccutil/mainblk.cpp | 57 +++++++++++++++++++++++-------------------- 3 files changed, 67 insertions(+), 49 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index c5e3242c2a..a4ee691fc6 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -263,6 +263,10 @@ int TessBaseAPI::Init(const char* datapath, const char* language, datapath_ = new STRING(datapath); else *datapath_ = datapath; + if ((strcmp(datapath_->string(), "") == 0) && + (strcmp(tesseract_->datadir.string(), "") != 0)) + *datapath_ = tesseract_->datadir; + if (language_ == NULL) language_ = new STRING(language); else diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 32bccc4b5a..039a9fbc48 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -24,7 +24,6 @@ #include "allheaders.h" #include "baseapi.h" -#include "basedir.h" #include "renderer.h" #include "strngs.h" #include "tprintf.h" @@ -56,6 +55,7 @@ int main(int argc, char **argv) { const char* lang = "eng"; const char* image = NULL; const char* output = NULL; + const char* datapath = NULL; bool noocr = false; bool list_langs = false; bool print_parameters = false; @@ -66,6 +66,12 @@ int main(int argc, char **argv) { if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) { lang = argv[arg + 1]; ++arg; + } else if (strcmp(argv[arg], "--tessdata-dir") == 0 && arg + 1 < argc) { + datapath = argv[arg + 1]; + ++arg; + } else if (strcmp(argv[arg], "--list-langs") == 0) { + noocr = true; + list_langs = true; } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) { pagesegmode = static_cast(atoi(argv[arg + 1])); ++arg; @@ -89,40 +95,43 @@ int main(int argc, char **argv) { } if (output == NULL && noocr == false) { - fprintf(stderr, "Usage:%s imagename outputbase|stdout [-l lang] " - "[-psm pagesegmode] [-c configvar=value] " - "[configfile...]\n\n", argv[0]); + fprintf(stderr, "Usage:\n %s imagename outputbase|stdout [options...] " + "[configfile...]\n\n", argv[0]); + + fprintf(stderr, "OCR options:\n"); + fprintf(stderr, " --tessdata-dir /path\tspecify location of tessdata" + " path\n"); + fprintf(stderr, " -l lang[+lang]\tspecify language(s) used for OCR\n"); + fprintf(stderr, " -c configvar=value\tset value for control parameter.\n" + "\t\t\tMultiple -c arguments are allowed.\n"); + fprintf(stderr, " -psm pagesegmode\tspecify page segmentation mode.\n"); + fprintf(stderr, "These options must occur before any configfile.\n\n"); fprintf(stderr, "pagesegmode values are:\n" - "0 = Orientation and script detection (OSD) only.\n" - "1 = Automatic page segmentation with OSD.\n" - "2 = Automatic page segmentation, but no OSD, or OCR\n" - "3 = Fully automatic page segmentation, but no OSD. (Default)\n" - "4 = Assume a single column of text of variable sizes.\n" - "5 = Assume a single uniform block of vertically aligned text.\n" - "6 = Assume a single uniform block of text.\n" - "7 = Treat the image as a single text line.\n" - "8 = Treat the image as a single word.\n" - "9 = Treat the image as a single word in a circle.\n" - "10 = Treat the image as a single character.\n"); - fprintf(stderr, "multiple -c arguments are allowed.\n"); - fprintf(stderr, "-l lang, -psm pagesegmode and any -c options must occur" - "before any configfile.\n\n"); + " 0 = Orientation and script detection (OSD) only.\n" + " 1 = Automatic page segmentation with OSD.\n" + " 2 = Automatic page segmentation, but no OSD, or OCR\n" + " 3 = Fully automatic page segmentation, but no OSD. (Default)\n" + " 4 = Assume a single column of text of variable sizes.\n" + " 5 = Assume a single uniform block of vertically aligned text.\n" + " 6 = Assume a single uniform block of text.\n" + " 7 = Treat the image as a single text line.\n" + " 8 = Treat the image as a single word.\n" + " 9 = Treat the image as a single word in a circle.\n" + " 10 = Treat the image as a single character.\n\n"); fprintf(stderr, "Single options:\n"); fprintf(stderr, " -v --version: version info\n"); fprintf(stderr, " --list-langs: list available languages for tesseract " - "engine\n"); + "engine. Can be used with --tessdata-dir.\n"); fprintf(stderr, " --print-parameters: print tesseract parameters to the " - "stdout\n"); + "stdout.\n"); exit(1); } tesseract::TessBaseAPI api; - STRING tessdata_dir; - truncate_path(argv[0], &tessdata_dir); api.SetOutputName(output); - int rc = api.Init(tessdata_dir.string(), lang, tesseract::OEM_DEFAULT, + int rc = api.Init(datapath, lang, tesseract::OEM_DEFAULT, &(argv[arg]), argc - arg, NULL, NULL, false); if (rc) { diff --git a/ccutil/mainblk.cpp b/ccutil/mainblk.cpp index f88a61a07d..a6e46ebe4d 100644 --- a/ccutil/mainblk.cpp +++ b/ccutil/mainblk.cpp @@ -27,7 +27,7 @@ #include #include "ccutil.h" -#define VARDIR "configs/" /*variables files */ +#define VARDIR "configs/" /**< variables files */ #define EXTERN const ERRCODE NO_PATH = @@ -41,39 +41,44 @@ namespace tesseract { * Main for mithras demo program. Read the arguments and set up globals. **********************************************************************/ -void CCUtil::main_setup( /*main demo program */ - const char *argv0, //program name - const char *basename //name of image - ) { - imagebasename = basename; /*name of image */ +/** + * @brief CCUtil::main_setup - set location of tessdata and name of image + * + * @param argv0 - paths to the directory with language files and config files. + * An actual value of argv0 is used if not NULL, otherwise TESSDATA_PREFIX is + * used if not NULL, next try to use compiled in -DTESSDATA_PREFIX. If previous + * is not sucessul - use current directory. + * @param basename - name of image + */ +void CCUtil::main_setup(const char *argv0, const char *basename) { + imagebasename = basename; /**< name of image */ - // TESSDATA_PREFIX Environment variable overrules everything. - // Compiled in -DTESSDATA_PREFIX is next. - // An actual value of argv0 is used if not NULL, otherwise current directory. - if (!getenv("TESSDATA_PREFIX")) { + if (argv0 != NULL) { + datadir = argv0; + } else { + if (getenv("TESSDATA_PREFIX")) { + datadir = getenv("TESSDATA_PREFIX"); + } else { #ifdef TESSDATA_PREFIX #define _STR(a) #a #define _XSTR(a) _STR(a) datadir = _XSTR(TESSDATA_PREFIX); #undef _XSTR #undef _STR -#else - if (argv0 != NULL) { - datadir = argv0; - // Remove tessdata from the end if present, as we will add it back! - int length = datadir.length(); - if (length >= 8 && strcmp(&datadir[length - 8], "tessdata") == 0) - datadir.truncate_at(length - 8); - else if (length >= 9 && strcmp(&datadir[length - 9], "tessdata/") == 0) - datadir.truncate_at(length - 9); - if (datadir.length() == 0) - datadir = "./"; - } else { - datadir = "./"; - } #endif + } + } + + // datadir may still be empty: + if (datadir.length() == 0) { + datadir = "./"; } else { - datadir = getenv("TESSDATA_PREFIX"); + // Remove tessdata from the end if present, as we will add it back! + int length = datadir.length(); + if (length >= 8 && strcmp(&datadir[length - 8], "tessdata") == 0) + datadir.truncate_at(length - 8); + else if (length >= 9 && strcmp(&datadir[length - 9], "tessdata/") == 0) + datadir.truncate_at(length - 9); } // check for missing directory separator @@ -82,6 +87,6 @@ void CCUtil::main_setup( /*main demo program */ if ((strcmp(lastchar, "/") != 0) && (strcmp(lastchar, "\\") != 0)) datadir += "/"; - datadir += m_data_sub_dir; /*data directory */ + datadir += m_data_sub_dir; /**< data directory */ } } // namespace tesseract