Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 34 additions & 30 deletions pkgs/applications/graphics/tesseract/default.nix
Original file line number Diff line number Diff line change
@@ -1,10 +1,38 @@
{ stdenv, fetchFromGitHub, autoreconfHook, pkgconfig
, leptonica, libpng, libtiff, icu, pango, opencl-headers

# Supported list of languages or `null' for all available languages
, enableLanguages ? null
# if you want just a specific list of languages, optionally specify a hash
# to make tessdata a fixed output derivation.
, enableLanguagesHash ? (if enableLanguages == null # all languages
then "1h48xfzabhn0ldbx5ib67cp9607pr0zpblsy8z6fs4knn0zznfnw"
else null)
}:

let tessdata = stdenv.mkDerivation ({
name = "tessdata";
src = fetchFromGitHub {
owner = "tesseract-ocr";
repo = "tessdata";
rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d";
# when updating don't forget to update the default value fo enableLanguagesHash
sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7";
};
buildCommand = ''
cd $src;
for lang in ${if enableLanguages==null then "*.traineddata" else stdenv.lib.concatMapStringsSep " " (x: x+".traineddata") enableLanguages} ; do
install -Dt $out/share/tessdata $src/$lang ;
done;
'';
preferLocalBuild = true;
} // (stdenv.lib.optionalAttrs (enableLanguagesHash != null) {
# when a hash is given, we make this a fixed output derivation.
outputHashMode = "recursive";
outputHashAlgo = "sha256";
outputHash = enableLanguagesHash;
}));
in

stdenv.mkDerivation rec {
name = "tesseract-${version}";
version = "3.05.00";
Expand All @@ -16,41 +44,17 @@ stdenv.mkDerivation rec {
sha256 = "11wrpcfl118wxsv2c3w2scznwb48c4547qml42s2bpdz079g8y30";
};

tessdata = fetchFromGitHub {
owner = "tesseract-ocr";
repo = "tessdata";
rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d";
sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7";
};
enableParallelBuilding = true;

nativeBuildInputs = [ pkgconfig autoreconfHook ];
buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ];

LIBLEPT_HEADERSDIR = "${leptonica}/include";

# Copy the .traineddata files of the languages specified in enableLanguages
# into `$out/share/tessdata' and check afterwards if copying was successful.
postInstall = let
mkArg = lang: "-iname ${stdenv.lib.escapeShellArg "${lang}.traineddata"}";
mkFindArgs = stdenv.lib.concatMapStringsSep " -o " mkArg;
findLangArgs = if enableLanguages != null
then "\\( ${mkFindArgs enableLanguages} \\)"
else "-iname '*.traineddata'";
in ''
numLangs="$(find "$tessdata" -mindepth 1 -maxdepth 1 -type f \
${findLangArgs} -exec cp -t "$out/share/tessdata" {} + -print | wc -l)"

${if enableLanguages != null then ''
expected=${toString (builtins.length enableLanguages)}
'' else ''
expected="$(ls -1 "$tessdata/"*.traineddata | wc -l)"
''}

if [ "$numLangs" -ne "$expected" ]; then
echo "Expected $expected languages, but $numLangs" \
"were copied to \`$out/share/tessdata'" >&2
exit 1
fi
postInstall = ''
for i in ${tessdata}/share/tessdata/*; do
ln -s $i $out/share/tessdata;
done
'';

meta = {
Expand Down