tetherto · olyasir · Feb 11, 2026 · Feb 9, 2026 · Feb 9, 2026 · Feb 9, 2026
@@ -255,8 +255,8 @@ jobs:
         if: needs.setup.outputs.qvac_needed == 'true'
         working-directory: ${{ env.PKG_DIR }}
         run: |
-          mkdir -p models/ocr/rec_512
-          aws s3 cp s3://tether-ai-dev/qvac_models_compiled/ocr/rec_512/ models/ocr/rec_512/ --recursive --exclude "*" \
+          mkdir -p models/ocr/rec_dyn
+          aws s3 cp s3://tether-ai-dev/qvac_models_compiled/ocr/rec_dyn/ models/ocr/rec_dyn/ --recursive --exclude "*" \
             --include "detector_craft.onnx" \
             --include "recognizer_latin.onnx"
           echo "Downloaded QVAC OCR models:"

@@ -55,7 +55,7 @@ jobs:
         if: steps.version.outputs.bumped == 'true'
         uses: softprops/action-gh-release@v2
         with:
-          tag_name: v${{ steps.version.outputs.current }}
+          tag_name: ocr-onnx-v${{ steps.version.outputs.current }}
           name: QVAC OCR Addon v${{ steps.version.outputs.current }}
           body_path: ${{ inputs.workdir }}/release-notes/v${{ steps.version.outputs.current }}.md
         env:

@@ -151,8 +151,8 @@ jobs:
         working-directory: ${{ inputs.workdir || env.PKG_DIR }}
         shell: bash
         run: |
-          mkdir -p models/ocr/rec_512
-          aws s3 cp s3://tether-ai-dev/qvac_models_compiled/ocr/rec_512/ models/ocr/rec_512/ --recursive
+          mkdir -p models/ocr/rec_dyn
+          aws s3 cp s3://tether-ai-dev/qvac_models_compiled/ocr/rec_dyn/ models/ocr/rec_dyn/ --recursive
 
       - name: Run integration test
         working-directory: ${{ inputs.workdir || env.PKG_DIR }}

@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+# [0.1.6] - 2026-02-09
+
+### Changed
+
+- Replaced fixed-width recognizer preprocessing with EasyOCR-style dynamic-width resizing for improved OCR accuracy. Images are now resized proportionally to model height with LANCZOS4 interpolation instead of aspect-preserving resize to a fixed 512px width.
+- Switched to dynamic-width recognizer models (`rec_dyn`). Batch inference now uses per-batch proportional width instead of fixed `RECOGNIZER_MODEL_WIDTH`.
+- Updated default model path from `rec_512` to `rec_dyn` across tests, benchmarks, and scripts.
+- Replaced English recognizer with Latin recognizer in unit tests (`recognizer_english.onnx` → `recognizer_latin.onnx`).
+- Added `--model-dir` CLI option to batch OCR CLI, evaluate script, and QVAC OCR backend for configurable model directory.
+
+### Fixed
+
+- Improved Portuguese OCR accuracy (minor punctuation corrections in test expected outputs).
+
 # [0.1.2] - 2026-01-16
 
 ### Changed

@@ -262,18 +262,35 @@ cv::Mat normalizeAndPad(const cv::Mat &img, int channels, int height, int maxWid
 }
 
 /**
- * @brief resizes the image to fit recognizer input sizes
+ * @brief calculates the proportional width for EasyOCR-style resizing
  *
- * The image is not simply resized to recognizer input format. After height is adjusted, the portion corresponding to [new image width,
- * recognizerImageWidth] is padded with the last column of the image
+ * Always scales height to RECOGNIZER_MODEL_HEIGHT, width is proportional to aspect ratio.
+ * This matches EasyOCR's preprocessing approach.
+ *
+ * @param width : original image width
+ * @param height : original image height
+ * @return int : the proportional width after resizing to model height
+ */
+int calculateProportionalWidth(int width, int height) {
+  float ratio = static_cast<float>(width) / static_cast<float>(height);
+  int newWidth = static_cast<int>(std::ceil(RECOGNIZER_MODEL_HEIGHT * ratio));
+  return std::max(1, newWidth);  // Ensure at least 1 pixel width
+}
+
+/**
+ * @brief resizes the image to fit recognizer input sizes (EasyOCR-style)
+ *
+ * Always scales height to RECOGNIZER_MODEL_HEIGHT (64), width is proportional.
+ * The image is then padded to targetWidth for batching.
  *
  * It also receives contrast treatment according to adjustContrast
  *
  * @param subImage : image to be treated
+ * @param targetWidth : target width for padding (typically max width in batch)
  * @param adjustContrast : target contrast
  * @return adjusted image
  */
-cv::Mat alignAndCollate(const SubImage &subImage, double adjustContrast = 0.0) {
+cv::Mat alignAndCollate(const SubImage &subImage, int targetWidth, double adjustContrast = 0.0) {
   cv::Mat image = subImage.image;
   int width = image.cols;
   int height = image.rows;
@@ -285,25 +302,21 @@ cv::Mat alignAndCollate(const SubImage &subImage, double adjustContrast = 0.0) {
     image = adjustContrastGrey(image, adjustContrast);
   }
 
-  // Aspect-preserving resize (ExecutorTorch approach)
-  // Scale both dimensions by the same ratio to fit within RECOGNIZER_MODEL_WIDTH x RECOGNIZER_MODEL_HEIGHT
-  float heightRatio = static_cast<float>(RECOGNIZER_MODEL_HEIGHT) / static_cast<float>(height);
-  float widthRatio = static_cast<float>(RECOGNIZER_MODEL_WIDTH) / static_cast<float>(width);
-  float resizeRatio = std::min(heightRatio, widthRatio);
+  // EasyOCR-style resize: always scale height to model height, width proportional
+  int proportionalWidth = calculateProportionalWidth(width, height);
 
-  int resizedW = static_cast<int>(std::round(static_cast<float>(width) * resizeRatio));
-  int resizedH = static_cast<int>(std::round(static_cast<float>(height) * resizeRatio));
-
-  // Clamp to model dimensions
-  resizedW = std::min(resizedW, RECOGNIZER_MODEL_WIDTH);
-  resizedH = std::min(resizedH, RECOGNIZER_MODEL_HEIGHT);
+  // Use LANCZOS interpolation like EasyOCR
+  cv::Mat resizedImage;
+  cv::resize(image, resizedImage, cv::Size(proportionalWidth, RECOGNIZER_MODEL_HEIGHT), 0, 0, cv::INTER_LANCZOS4);
 
-  // Use INTER_AREA for downscaling, INTER_CUBIC for upscaling
-  int interpolation = (resizeRatio < 1.0F) ? cv::INTER_AREA : cv::INTER_CUBIC;
+  return normalizeAndPad(resizedImage, 1 /*grayscale*/, RECOGNIZER_MODEL_HEIGHT, targetWidth);
+}
 
-  cv::Mat resizedImage;
-  cv::resize(image, resizedImage, cv::Size(resizedW, resizedH), 0, 0, interpolation);
-  return normalizeAndPad(resizedImage, 1 /*grayscale*/, RECOGNIZER_MODEL_HEIGHT, RECOGNIZER_MODEL_WIDTH);
+/**
+ * @brief Legacy version for backward compatibility - uses fixed RECOGNIZER_MODEL_WIDTH
+ */
+cv::Mat alignAndCollate(const SubImage &subImage, double adjustContrast = 0.0) {
+  return alignAndCollate(subImage, RECOGNIZER_MODEL_WIDTH, adjustContrast);
 }
 
 /**
@@ -799,19 +812,21 @@ cv::Mat StepRecognizeText::runInferenceOnImg(const cv::Mat &img) {
   return preds.clone();
 }
 
-cv::Mat StepRecognizeText::runBatchInference(const std::vector<cv::Mat> &images) {
+cv::Mat StepRecognizeText::runBatchInference(const std::vector<cv::Mat> &images, int dynamicWidth) {
   auto t0 = std::chrono::high_resolution_clock::now();
   if (images.empty()) {
     return cv::Mat();
   }
 
   const int batchSize = static_cast<int>(images.size());
-  QLOG(qvac_lib_inference_addon_cpp::logger::Priority::DEBUG,
-       "[Recognition] runBatchInference called with batch_size=" + std::to_string(batchSize));
   const int height = RECOGNIZER_MODEL_HEIGHT;
-  const int width = RECOGNIZER_MODEL_WIDTH;
+  const int width = dynamicWidth;
   const int numChannels = 1;
 
+  QLOG(qvac_lib_inference_addon_cpp::logger::Priority::DEBUG,
+       "[Recognition] runBatchInference called with batch_size=" + std::to_string(batchSize) +
+       ", dynamic_width=" + std::to_string(width));
+
   // Create batch tensor: [batch, channels, height, width]
   std::vector<float> batchData(batchSize * numChannels * height * width);
 
@@ -918,17 +933,28 @@ std::vector<InferredText> StepRecognizeText::processImgList() {
     size_t batchEnd = std::min(batchStart + static_cast<size_t>(batchSize), allIndices.size());
     size_t currentBatchSize = batchEnd - batchStart;
 
-    // Prepare images ONLY for this batch
+    // Calculate max proportional width for this batch (EasyOCR-style dynamic batching)
+    int maxProportionalWidth = 0;
+    for (size_t i = batchStart; i < batchEnd; i++) {
+      auto &idx = allIndices[i];
+      auto &subImage = imgListOfLists_[idx.listIdx][idx.imgIdx];
+      int propWidth = calculateProportionalWidth(subImage.image.cols, subImage.image.rows);
+      maxProportionalWidth = std::max(maxProportionalWidth, propWidth);
+    }
+    // Ensure minimum width for model stability
+    maxProportionalWidth = std::max(maxProportionalWidth, RECOGNIZER_MODEL_HEIGHT);
+
+    // Prepare images ONLY for this batch, using dynamic max width
     std::vector<cv::Mat> preparedImages;
     preparedImages.reserve(currentBatchSize);
     for (size_t i = batchStart; i < batchEnd; i++) {
       auto &idx = allIndices[i];
       auto &subImage = imgListOfLists_[idx.listIdx][idx.imgIdx];
-      cv::Mat preparedImg = alignAndCollate(subImage, 0.0);
+      cv::Mat preparedImg = alignAndCollate(subImage, maxProportionalWidth, 0.0);
       preparedImages.push_back(preparedImg);
     }
 
-    cv::Mat batchPreds = runBatchInference(preparedImages);
+    cv::Mat batchPreds = runBatchInference(preparedImages, maxProportionalWidth);
 
     // Decode results and populate SubImages for this batch
     for (size_t i = 0; i < currentBatchSize; i++) {
@@ -966,16 +992,26 @@ std::vector<InferredText> StepRecognizeText::processImgList() {
       for (size_t batchStart = 0; batchStart < lowConfidenceIndices.size(); batchStart += batchSize) {
         size_t batchEnd = std::min(batchStart + static_cast<size_t>(batchSize), lowConfidenceIndices.size());
 
+        // Calculate max proportional width for contrast batch
+        int maxProportionalWidth = 0;
+        for (size_t j = batchStart; j < batchEnd; j++) {
+          auto &idx = lowConfidenceIndices[j];
+          auto &subImage = imgListOfLists_[idx.listIdx][idx.imgIdx];
+          int propWidth = calculateProportionalWidth(subImage.image.cols, subImage.image.rows);
+          maxProportionalWidth = std::max(maxProportionalWidth, propWidth);
+        }
+        maxProportionalWidth = std::max(maxProportionalWidth, RECOGNIZER_MODEL_HEIGHT);
+
         std::vector<cv::Mat> contrastImages;
         contrastImages.reserve(batchEnd - batchStart);
         for (size_t j = batchStart; j < batchEnd; j++) {
           auto &idx = lowConfidenceIndices[j];
           auto &subImage = imgListOfLists_[idx.listIdx][idx.imgIdx];
-          cv::Mat contrastImg = alignAndCollate(subImage, TARGET_ADJUSTED_CONTRAST);
+          cv::Mat contrastImg = alignAndCollate(subImage, maxProportionalWidth, TARGET_ADJUSTED_CONTRAST);
           contrastImages.push_back(contrastImg);
         }
 
-        cv::Mat contrastPreds = runBatchInference(contrastImages);
+        cv::Mat contrastPreds = runBatchInference(contrastImages, maxProportionalWidth);
 
         for (size_t j = 0; j < contrastImages.size(); j++) {
           auto &idx = lowConfidenceIndices[batchStart + j];

@@ -106,12 +106,13 @@ struct StepRecognizeText {
   cv::Mat runInferenceOnImg(const cv::Mat &img);
 
   /**
-   * @brief runs ONNX batch inference on multiple images
+   * @brief runs ONNX batch inference on multiple images with dynamic width
    *
    * @param images : vector of prepared recognizer inputs
+   * @param dynamicWidth : the width of input images (for dynamic-width models)
    * @return cv::Mat : the recognizer predictions with shape [batch, seq_len, num_chars]
    */
-  cv::Mat runBatchInference(const std::vector<cv::Mat> &images);
+  cv::Mat runBatchInference(const std::vector<cv::Mat> &images, int dynamicWidth);
 
   /**
    * @brief processes the sub image to run recognizer inference and populate text and confidence score

@@ -25,6 +25,7 @@ def __init__(
         language: str = "en",
         timeout: int = 600,
         batch_size: int = 50,
+        model_dir: str = "rec_dyn",
         **kwargs
     ):
         """Initialize QVAC OCR backend.
@@ -35,13 +36,15 @@ def __init__(
             language: Language code for OCR (e.g., 'en')
             timeout: Timeout in seconds for batch operations
             batch_size: Number of images to process in one batch
+            model_dir: Model directory name (e.g., 'rec_dyn' or 'rec_512')
             **kwargs: Additional arguments passed to parent
         """
         super().__init__(name="qvac", **kwargs)
         self.bare_path = bare_path
         self.language = language
         self.timeout = timeout
         self.batch_size = batch_size
+        self.model_dir = model_dir
 
         # Determine addon path
         if addon_path:
@@ -110,7 +113,8 @@ def _run_batch(self, image_paths: List[str]) -> dict:
                 str(self.batch_cli_script),
                 "--input", input_file,
                 "--output", output_file,
-                "--lang", self.language
+                "--lang", self.language,
+                "--model-dir", self.model_dir
             ]
 
             result = subprocess.run(

@@ -326,6 +326,11 @@ def print_summary(all_results: Dict[str, Dict[str, Dict[str, Any]]]) -> None:
     default=None,
     help="Filter samples by image_path containing this string (e.g., 'HierText')"
 )
+@click.option(
+    "--model-dir",
+    default="rec_dyn",
+    help="Model directory name within models/ocr/ (default: rec_dyn)"
+)
 def main(
     dataset_path: str,
     backends: str,
@@ -336,7 +341,8 @@ def main(
     limit: int,
     qvac_addon_path: Optional[str],
     gpu: bool,
-    dataset_filter: Optional[str]
+    dataset_filter: Optional[str],
+    model_dir: str
 ):
     """OCR Quality Evaluation Framework.
 
@@ -385,8 +391,10 @@ def main(
         # Create backend
         try:
             backend_kwargs = {"gpu": gpu}
-            if backend_name == "qvac" and qvac_addon_path:
-                backend_kwargs["addon_path"] = qvac_addon_path
+            if backend_name == "qvac":
+                backend_kwargs["model_dir"] = model_dir
+                if qvac_addon_path:
+                    backend_kwargs["addon_path"] = qvac_addon_path
 
             backend = create_backend(backend_name, **backend_kwargs)
             backend.initialize()

@@ -17,6 +17,7 @@ const args = process.argv.slice(2)
 let language = 'en'
 let inputFile = null
 let outputFile = null
+let modelDir = 'rec_dyn'
 
 for (let i = 0; i < args.length; i++) {
   if (args[i] === '--lang' && args[i + 1]) {
@@ -28,6 +29,9 @@ for (let i = 0; i < args.length; i++) {
   } else if (args[i] === '--output' && args[i + 1]) {
     outputFile = args[i + 1]
     i++
+  } else if (args[i] === '--model-dir' && args[i + 1]) {
+    modelDir = args[i + 1]
+    i++
   }
 }
 
@@ -95,8 +99,8 @@ async function main () {
     model = new ONNXOcr({
       params: {
         langList: [language],
-        pathDetector: './models/ocr/rec_512/detector_craft.onnx',
-        pathRecognizerPrefix: './models/ocr/rec_512/recognizer_',
+        pathDetector: `./models/ocr/${modelDir}/detector_craft.onnx`,
+        pathRecognizerPrefix: `./models/ocr/${modelDir}/recognizer_`,
         useGPU: false,
         // Match EasyOCR defaults for fair comparison
         magRatio: 1.0,

@@ -42,8 +42,8 @@ async function main () {
     const model = new ONNXOcr({
       params: {
         langList: [language],
-        pathDetector: './models/ocr/rec_512/detector_craft.onnx',
-        pathRecognizerPrefix: './models/ocr/rec_512/recognizer_',
+        pathDetector: './models/ocr/rec_dyn/detector_craft.onnx',
+        pathRecognizerPrefix: './models/ocr/rec_dyn/recognizer_',
         useGPU: false
       },
       opts: {

@@ -1,6 +1,6 @@
 {
   "name": "@qvac/ocr-onnx",
-  "version": "0.1.5",
+  "version": "0.1.6",
   "description": "OCR addon for qvac",
   "addon": true,
   "engines": {

@@ -0,0 +1,19 @@
+# QVAC OCR Addon v0.1.6 Release Notes
+
+This release introduces EasyOCR-style dynamic-width recognizer preprocessing, improving OCR accuracy by preserving aspect ratios during text recognition.
+
+## Accuracy Improvements
+
+### Dynamic-Width Recognizer Preprocessing
+
+The text recognizer pipeline now uses EasyOCR-style dynamic-width resizing instead of the previous fixed-width (512px) approach. Images are resized proportionally to the model height (64px) using LANCZOS4 interpolation, and each batch uses the maximum proportional width across all images in the batch. This preserves the original aspect ratio of text regions, leading to better recognition accuracy.
+
+### Dynamic-Width Models
+
+Switched from fixed-width recognizer models (`rec_512`) to dynamic-width models (`rec_dyn`) that accept variable input widths. The English recognizer has been replaced by the Latin recognizer, which covers all Latin-script languages including English.
+
+## Benchmark Tooling
+
+### Configurable Model Directory
+
+Added `--model-dir` CLI option to the batch OCR CLI (`ocr_batch_cli.js`), evaluation script (`evaluate.py`), and QVAC OCR backend (`qvac_ocr_backend.py`), allowing easy switching between model directories for benchmarking.
@@ -19,8 +19,8 @@ set -e
 # Configuration
 REGION="${AWS_REGION:-eu-central-1}"
 BUCKET="${S3_BUCKET:-tether-ai-dev}"
-# Use rec_512 subdirectory - matches desktop workflow
-BASE_PATH="qvac_models_compiled/ocr/rec_512"
+# Use rec_dyn subdirectory - dynamic width models
+BASE_PATH="qvac_models_compiled/ocr/rec_dyn"
 
 echo "🔑 Generating presigned URLs for OCR models..."
 echo "   Region: $REGION"

@@ -63,8 +63,8 @@ test('Full OCR test suite', { timeout: 40 * 60 * 1000, skip: isMobile }, async f
 
     const onnxOcr = new ONNXOcr({
       params: {
-        pathDetector: 'models/ocr/rec_512/detector_craft.onnx',
-        pathRecognizer: `models/ocr/rec_512/recognizer_${recognizerModelName}.onnx`,
+        pathDetector: 'models/ocr/rec_dyn/detector_craft.onnx',
+        pathRecognizer: `models/ocr/rec_dyn/recognizer_${recognizerModelName}.onnx`,
         langList: testCase.langList,
         useGPU: false,
         timeout