Skip to content

Commit

Permalink
fix issue 938: change tessdata-dir/datadir rules; implement --tessdat…
Browse files Browse the repository at this point in the history
…a-dir option

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@907 d0cd1f9f-072b-0410-8dd7-cf729c803f20
  • Loading branch information
[email protected] committed Nov 10, 2013
1 parent 7bf559d commit e66d433
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 49 deletions.
4 changes: 4 additions & 0 deletions api/baseapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,10 @@ int TessBaseAPI::Init(const char* datapath, const char* language,
datapath_ = new STRING(datapath);
else
*datapath_ = datapath;
if ((strcmp(datapath_->string(), "") == 0) &&
(strcmp(tesseract_->datadir.string(), "") != 0))
*datapath_ = tesseract_->datadir;

if (language_ == NULL)
language_ = new STRING(language);
else
Expand Down
55 changes: 32 additions & 23 deletions api/tesseractmain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

#include "allheaders.h"
#include "baseapi.h"
#include "basedir.h"
#include "renderer.h"
#include "strngs.h"
#include "tprintf.h"
Expand Down Expand Up @@ -56,6 +55,7 @@ int main(int argc, char **argv) {
const char* lang = "eng";
const char* image = NULL;
const char* output = NULL;
const char* datapath = NULL;
bool noocr = false;
bool list_langs = false;
bool print_parameters = false;
Expand All @@ -66,6 +66,12 @@ int main(int argc, char **argv) {
if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) {
lang = argv[arg + 1];
++arg;
} else if (strcmp(argv[arg], "--tessdata-dir") == 0 && arg + 1 < argc) {
datapath = argv[arg + 1];
++arg;
} else if (strcmp(argv[arg], "--list-langs") == 0) {
noocr = true;
list_langs = true;
} else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) {
pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1]));
++arg;
Expand All @@ -89,40 +95,43 @@ int main(int argc, char **argv) {
}

if (output == NULL && noocr == false) {
fprintf(stderr, "Usage:%s imagename outputbase|stdout [-l lang] "
"[-psm pagesegmode] [-c configvar=value] "
"[configfile...]\n\n", argv[0]);
fprintf(stderr, "Usage:\n %s imagename outputbase|stdout [options...] "
"[configfile...]\n\n", argv[0]);

fprintf(stderr, "OCR options:\n");
fprintf(stderr, " --tessdata-dir /path\tspecify location of tessdata"
" path\n");
fprintf(stderr, " -l lang[+lang]\tspecify language(s) used for OCR\n");
fprintf(stderr, " -c configvar=value\tset value for control parameter.\n"
"\t\t\tMultiple -c arguments are allowed.\n");
fprintf(stderr, " -psm pagesegmode\tspecify page segmentation mode.\n");
fprintf(stderr, "These options must occur before any configfile.\n\n");
fprintf(stderr,
"pagesegmode values are:\n"
"0 = Orientation and script detection (OSD) only.\n"
"1 = Automatic page segmentation with OSD.\n"
"2 = Automatic page segmentation, but no OSD, or OCR\n"
"3 = Fully automatic page segmentation, but no OSD. (Default)\n"
"4 = Assume a single column of text of variable sizes.\n"
"5 = Assume a single uniform block of vertically aligned text.\n"
"6 = Assume a single uniform block of text.\n"
"7 = Treat the image as a single text line.\n"
"8 = Treat the image as a single word.\n"
"9 = Treat the image as a single word in a circle.\n"
"10 = Treat the image as a single character.\n");
fprintf(stderr, "multiple -c arguments are allowed.\n");
fprintf(stderr, "-l lang, -psm pagesegmode and any -c options must occur"
"before any configfile.\n\n");
" 0 = Orientation and script detection (OSD) only.\n"
" 1 = Automatic page segmentation with OSD.\n"
" 2 = Automatic page segmentation, but no OSD, or OCR\n"
" 3 = Fully automatic page segmentation, but no OSD. (Default)\n"
" 4 = Assume a single column of text of variable sizes.\n"
" 5 = Assume a single uniform block of vertically aligned text.\n"
" 6 = Assume a single uniform block of text.\n"
" 7 = Treat the image as a single text line.\n"
" 8 = Treat the image as a single word.\n"
" 9 = Treat the image as a single word in a circle.\n"
" 10 = Treat the image as a single character.\n\n");
fprintf(stderr, "Single options:\n");
fprintf(stderr, " -v --version: version info\n");
fprintf(stderr, " --list-langs: list available languages for tesseract "
"engine\n");
"engine. Can be used with --tessdata-dir.\n");
fprintf(stderr, " --print-parameters: print tesseract parameters to the "
"stdout\n");
"stdout.\n");
exit(1);
}

tesseract::TessBaseAPI api;

STRING tessdata_dir;
truncate_path(argv[0], &tessdata_dir);
api.SetOutputName(output);
int rc = api.Init(tessdata_dir.string(), lang, tesseract::OEM_DEFAULT,
int rc = api.Init(datapath, lang, tesseract::OEM_DEFAULT,
&(argv[arg]), argc - arg, NULL, NULL, false);

if (rc) {
Expand Down
57 changes: 31 additions & 26 deletions ccutil/mainblk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#include <stdlib.h>

This comment was marked as spam.

Copy link
@pranitdas09
#include "ccutil.h"

#define VARDIR "configs/" /*variables files */
#define VARDIR "configs/" /**< variables files */
#define EXTERN

const ERRCODE NO_PATH =
Expand All @@ -41,39 +41,44 @@ namespace tesseract {
* Main for mithras demo program. Read the arguments and set up globals.
**********************************************************************/

void CCUtil::main_setup( /*main demo program */
const char *argv0, //program name
const char *basename //name of image
) {
imagebasename = basename; /*name of image */
/**
* @brief CCUtil::main_setup - set location of tessdata and name of image
*
* @param argv0 - paths to the directory with language files and config files.
* An actual value of argv0 is used if not NULL, otherwise TESSDATA_PREFIX is
* used if not NULL, next try to use compiled in -DTESSDATA_PREFIX. If previous
* is not sucessul - use current directory.
* @param basename - name of image
*/
void CCUtil::main_setup(const char *argv0, const char *basename) {
imagebasename = basename; /**< name of image */

// TESSDATA_PREFIX Environment variable overrules everything.
// Compiled in -DTESSDATA_PREFIX is next.
// An actual value of argv0 is used if not NULL, otherwise current directory.
if (!getenv("TESSDATA_PREFIX")) {
if (argv0 != NULL) {
datadir = argv0;
} else {
if (getenv("TESSDATA_PREFIX")) {
datadir = getenv("TESSDATA_PREFIX");
} else {
#ifdef TESSDATA_PREFIX
#define _STR(a) #a
#define _XSTR(a) _STR(a)
datadir = _XSTR(TESSDATA_PREFIX);
#undef _XSTR
#undef _STR
#else
if (argv0 != NULL) {
datadir = argv0;
// Remove tessdata from the end if present, as we will add it back!
int length = datadir.length();
if (length >= 8 && strcmp(&datadir[length - 8], "tessdata") == 0)
datadir.truncate_at(length - 8);
else if (length >= 9 && strcmp(&datadir[length - 9], "tessdata/") == 0)
datadir.truncate_at(length - 9);
if (datadir.length() == 0)
datadir = "./";
} else {
datadir = "./";
}
#endif
}
}

// datadir may still be empty:
if (datadir.length() == 0) {
datadir = "./";
} else {
datadir = getenv("TESSDATA_PREFIX");
// Remove tessdata from the end if present, as we will add it back!
int length = datadir.length();
if (length >= 8 && strcmp(&datadir[length - 8], "tessdata") == 0)
datadir.truncate_at(length - 8);
else if (length >= 9 && strcmp(&datadir[length - 9], "tessdata/") == 0)
datadir.truncate_at(length - 9);
}

// check for missing directory separator
Expand All @@ -82,6 +87,6 @@ void CCUtil::main_setup( /*main demo program */
if ((strcmp(lastchar, "/") != 0) && (strcmp(lastchar, "\\") != 0))
datadir += "/";

datadir += m_data_sub_dir; /*data directory */
datadir += m_data_sub_dir; /**< data directory */
}
} // namespace tesseract

0 comments on commit e66d433

Please sign in to comment.