From 0f9169517d5a8651ee245d083b7a75f31383d7f0 Mon Sep 17 00:00:00 2001 From: Shana Moore Date: Tue, 3 Oct 2023 15:16:06 -0700 Subject: [PATCH] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20revert=20tesseract=20best?= =?UTF-8?q?=20changes=20to=20dockerfile?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this change belongs in the knapsack directory instead --- Dockerfile | 5 ----- config/application.rb | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index c9d3ecdd9..d13b8e942 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,11 +47,6 @@ RUN wget https://github.com/ImageMagick/ImageMagick/archive/refs/tags/7.1.0-57.t && rm -rf ImageMagick* \ && rm -rf /var/cache/apk/* -# Install "best" training data for Tesseract -RUN echo "📚 Installing Tesseract Best (training data)!" && \ - cd /usr/share/tessdata/ && \ - wget https://github.com/tesseract-ocr/tessdata_best/blob/main/eng.traineddata?raw=true -O eng_best.traineddata - ARG VIPS_VERSION=8.11.3 RUN set -x -o pipefail \ diff --git a/config/application.rb b/config/application.rb index 9244f821c..2c6ceefb2 100644 --- a/config/application.rb +++ b/config/application.rb @@ -77,7 +77,7 @@ class Application < Rails::Application # IiifPrint::DerivativeRodeoService, # Hyrax::FileSetDerivativesService] - DerivativeRodeo::Generators::HocrGenerator.additional_tessearct_options = "-l eng_best" + DerivativeRodeo::Generators::HocrGenerator.additional_tessearct_options = nil # Allows us to use decorator files Dir.glob(File.join(File.dirname(__FILE__), "../app/**/*_decorator*.rb")).sort.each do |c|