tetherto · Alok-Ranjan23 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026 · Feb 11, 2026
diff --git a/.github/workflows/benchmark-qvac-lib-inference-addon-onnx-ocr-fasttext.yml b/.github/workflows/benchmark-qvac-lib-inference-addon-onnx-ocr-fasttext.yml
@@ -255,8 +255,8 @@ jobs:
         if: needs.setup.outputs.qvac_needed == 'true'
         working-directory: ${{ env.PKG_DIR }}
         run: |
-          mkdir -p models/ocr/rec_512
-          aws s3 cp s3://tether-ai-dev/qvac_models_compiled/ocr/rec_512/ models/ocr/rec_512/ --recursive --exclude "*" \
+          mkdir -p models/ocr/rec_dyn
+          aws s3 cp s3://tether-ai-dev/qvac_models_compiled/ocr/rec_dyn/ models/ocr/rec_dyn/ --recursive --exclude "*" \
             --include "detector_craft.onnx" \
             --include "recognizer_latin.onnx"
           echo "Downloaded QVAC OCR models:"

diff --git a/.github/workflows/create-github-release-qvac-lib-inference-addon-onnx-ocr-fasttext.yml b/.github/workflows/create-github-release-qvac-lib-inference-addon-onnx-ocr-fasttext.yml
@@ -55,7 +55,7 @@ jobs:
         if: steps.version.outputs.bumped == 'true'
         uses: softprops/action-gh-release@v2
         with:
-          tag_name: v${{ steps.version.outputs.current }}
+          tag_name: ocr-onnx-v${{ steps.version.outputs.current }}
           name: QVAC OCR Addon v${{ steps.version.outputs.current }}
           body_path: ${{ inputs.workdir }}/release-notes/v${{ steps.version.outputs.current }}.md
         env:

diff --git a/.github/workflows/integration-test-qvac-lib-inference-addon-onnx-ocr-fasttext.yml b/.github/workflows/integration-test-qvac-lib-inference-addon-onnx-ocr-fasttext.yml
@@ -151,8 +151,8 @@ jobs:
         working-directory: ${{ inputs.workdir || env.PKG_DIR }}
         shell: bash
         run: |
-          mkdir -p models/ocr/rec_512
-          aws s3 cp s3://tether-ai-dev/qvac_models_compiled/ocr/rec_512/ models/ocr/rec_512/ --recursive
+          mkdir -p models/ocr/rec_dyn
+          aws s3 cp s3://tether-ai-dev/qvac_models_compiled/ocr/rec_dyn/ models/ocr/rec_dyn/ --recursive
 
       - name: Run integration test
         working-directory: ${{ inputs.workdir || env.PKG_DIR }}

diff --git a/.github/workflows/prebuilds-qvac-lib-infer-nmtcpp.yml b/.github/workflows/prebuilds-qvac-lib-infer-nmtcpp.yml
@@ -56,19 +56,14 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - os: ubuntu-24.04
-            platform: linux
-            arch: x64
-          - os: ubuntu-24.04-arm64-private
-            platform: linux
-            arch: arm64
           # Linux builds on Ubuntu 22.04 for glibc compatibility
           - os: ubuntu-22.04
             platform: linux
             arch: x64
           - os: ubuntu-22.04-arm
             platform: linux
             arch: arm64
+          # Android build requires Ubuntu 24.04 for NDK tooling
           - os: ubuntu-24.04
             platform: android
             arch: arm64
@@ -116,7 +111,7 @@ jobs:
           echo "Matrix tags: ${{ matrix.tags }}"
           echo "PKG_DIR: ${{ env.PKG_DIR }}"
 
-      - if: ${{ matrix.os == 'ubuntu-24.04' || matrix.os == 'ubuntu-24.04-arm64-private' || matrix.os == 'ubuntu-22.04' || matrix.os == 'ubuntu-22.04-arm' }}
+      - if: ${{ matrix.os == 'ubuntu-22.04' || matrix.os == 'ubuntu-22.04-arm' || matrix.os == 'ubuntu-24.04' }}
         name: Update c++ tools
         run: |
           wget https://apt.llvm.org/llvm.sh
@@ -236,7 +231,7 @@ jobs:
           echo "VCPKG_ROOT=$VCPKG_ROOT" >> $GITHUB_ENV
           echo "$VCPKG_ROOT" >> $GITHUB_PATH
 
-      - if: ${{ matrix.os == 'ubuntu-24.04' || matrix.os == 'ubuntu-24.04-arm64-private' || matrix.os == 'ubuntu-22.04' || matrix.os == 'ubuntu-22.04-arm' }}
+      - if: ${{ matrix.os == 'ubuntu-22.04' || matrix.os == 'ubuntu-22.04-arm' || matrix.os == 'ubuntu-24.04' }}
         name: Configure vcpkg in linux
         run: echo "VCPKG_ROOT=$VCPKG_INSTALLATION_ROOT" >> $GITHUB_ENV
 
@@ -268,15 +263,10 @@ jobs:
           env | sort
         continue-on-error: true
 
-      - if: ${{ matrix.os == 'ubuntu-24.04' || matrix.os == 'ubuntu-24.04-arm64-private' || matrix.os == 'ubuntu-22.04' || matrix.os == 'ubuntu-22.04-arm' }}
+      - if: ${{ matrix.os == 'ubuntu-22.04' || matrix.os == 'ubuntu-22.04-arm' || matrix.os == 'ubuntu-24.04' }}
         name: Update apt sources
         run: sudo apt-get update
 
-      - if: ${{ matrix.os == 'ubuntu-24.04' && matrix.arch == 'arm64' }}
-        name: Install tooling for cross compilation - ubuntu arm64
-        run: |
-          sudo apt-get install gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross lld
-
       - if: ${{ matrix.os == 'windows-2022' }}
         name: Configure cmake generator in windows
         run: echo "CMAKE_GENERATOR=Visual Studio 17 2022" >> $env:GITHUB_ENV
@@ -290,20 +280,8 @@ jobs:
       - if: ${{ matrix.os == 'windows-2022' && matrix.arch == 'arm64' }}
         run: echo "CMAKE_GENERATOR_PLATFORM=ARM64" >> $env:GITHUB_ENV
 
-      - if: ${{ matrix.os == 'ubuntu-24.04' }}
-        name: Install vulkan in linux x64
-        run: |
-          wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
-          sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list http://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list
-          sudo apt update
-          sudo apt install -y vulkan-sdk
-
-      - if: ${{ matrix.os == 'ubuntu-24.04-arm64-private' }}
-        name: Build Vulkan SDK for linux arm64 (with S3 cache)
-        env:
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          AWS_DEFAULT_REGION: us-east-1
+      - if: ${{ matrix.os == 'ubuntu-22.04' || matrix.os == 'ubuntu-22.04-arm' }}
+        name: Download Vulkan SDK for ubuntu-22.04
         run: |
           sudo apt install -y xz-utils
           wget -q -O /tmp/vulkansdk.tar.xz https://sdk.lunarg.com/sdk/download/latest/linux/vulkan_sdk.tar.xz
@@ -312,34 +290,20 @@ jobs:
           cd vulkan
           tar xf /tmp/vulkansdk.tar.xz --strip-components=1
 
-          # Extract SDK major.minor version from README.txt (e.g., "1.4" from "1.4.341.0")
-          SDK_VERSION=$(grep -o 'sdk/[0-9]*\.[0-9]*' README.txt | head -1 | sed 's|sdk/||')
-          S3_BUCKET="tether-ai-dev"
-          S3_KEY="vulkan-sdk-cache/linux-arm64-${SDK_VERSION}.tar.gz"
-
-          echo "Vulkan SDK version: ${SDK_VERSION}"
-
-          # Try to download cached build from S3
-          if aws s3 cp "s3://${S3_BUCKET}/${S3_KEY}" /tmp/vulkan-arm64-cache.tar.gz 2>/dev/null; then
-            echo "Found cached Vulkan SDK, extracting..."
-            tar xzf /tmp/vulkan-arm64-cache.tar.gz -C ~/vulkan
-            rm /tmp/vulkan-arm64-cache.tar.gz
-          else
-            echo "No cache found, building Vulkan SDK for ARM64..."
-            ./vulkansdk --maxjobs
-
-            # Upload the compiled SDK to S3 for future runs
-            echo "Uploading compiled SDK to S3..."
-            tar czf /tmp/vulkan-arm64-cache.tar.gz aarch64
-            aws s3 cp /tmp/vulkan-arm64-cache.tar.gz "s3://${S3_BUCKET}/${S3_KEY}"
-            rm /tmp/vulkan-arm64-cache.tar.gz
-          fi
+      - if: ${{ matrix.os == 'ubuntu-22.04' }}
+        name: Setup Vulkan SDK path for ubuntu-22.04 x64
+        run: |
+          VULKAN_SDK=~/vulkan/x86_64
+          echo "VULKAN_SDK=$VULKAN_SDK" >> $GITHUB_ENV
 
-      - if: ${{ matrix.os == 'ubuntu-24.04-arm64-private' }}
-        name: Setup Vulkan SDK environment for linux arm64
+      - if: ${{ matrix.os == 'ubuntu-22.04-arm' }}
+        name: Build Vulkan SDK for ubuntu-22.04 arm64
         run: |
           VULKAN_SDK=~/vulkan/aarch64
           echo "VULKAN_SDK=$VULKAN_SDK" >> $GITHUB_ENV
+          cd ~/vulkan
+          ./vulkansdk --maxjobs
+
           echo "PATH=$VULKAN_SDK/bin:$PATH" >> $GITHUB_ENV
           echo "LD_LIBRARY_PATH=$VULKAN_SDK/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" >> $GITHUB_ENV
           echo "VK_ADD_LAYER_PATH=$VULKAN_SDK/share/vulkan/explicit_layer.d" >> $GITHUB_ENV

diff --git a/packages/qvac-lib-inference-addon-onnx-ocr-fasttext/CHANGELOG.md b/packages/qvac-lib-inference-addon-onnx-ocr-fasttext/CHANGELOG.md
@@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+# [0.1.6] - 2026-02-09
+
+### Changed
+
+- Replaced fixed-width recognizer preprocessing with EasyOCR-style dynamic-width resizing for improved OCR accuracy. Images are now resized proportionally to model height with LANCZOS4 interpolation instead of aspect-preserving resize to a fixed 512px width.
+- Switched to dynamic-width recognizer models (`rec_dyn`). Batch inference now uses per-batch proportional width instead of fixed `RECOGNIZER_MODEL_WIDTH`.
+- Updated default model path from `rec_512` to `rec_dyn` across tests, benchmarks, and scripts.
+- Replaced English recognizer with Latin recognizer in unit tests (`recognizer_english.onnx` → `recognizer_latin.onnx`).
+- Added `--model-dir` CLI option to batch OCR CLI, evaluate script, and QVAC OCR backend for configurable model directory.
+
+### Fixed
+
+- Improved Portuguese OCR accuracy (minor punctuation corrections in test expected outputs).
+
 # [0.1.2] - 2026-01-16
 
 ### Changed

diff --git a/packages/qvac-lib-inference-addon-onnx-ocr-fasttext/addon/pipeline/StepRecognizeText.cpp b/packages/qvac-lib-inference-addon-onnx-ocr-fasttext/addon/pipeline/StepRecognizeText.cpp
@@ -262,18 +262,35 @@ cv::Mat normalizeAndPad(const cv::Mat &img, int channels, int height, int maxWid
 }
 
 /**
- * @brief resizes the image to fit recognizer input sizes
+ * @brief calculates the proportional width for EasyOCR-style resizing
  *
- * The image is not simply resized to recognizer input format. After height is adjusted, the portion corresponding to [new image width,
- * recognizerImageWidth] is padded with the last column of the image
+ * Always scales height to RECOGNIZER_MODEL_HEIGHT, width is proportional to aspect ratio.
+ * This matches EasyOCR's preprocessing approach.
+ *
+ * @param width : original image width
+ * @param height : original image height
+ * @return int : the proportional width after resizing to model height
+ */
+int calculateProportionalWidth(int width, int height) {
+  float ratio = static_cast<float>(width) / static_cast<float>(height);
+  int newWidth = static_cast<int>(std::ceil(RECOGNIZER_MODEL_HEIGHT * ratio));
+  return std::max(1, newWidth);  // Ensure at least 1 pixel width
+}
+
+/**
+ * @brief resizes the image to fit recognizer input sizes (EasyOCR-style)
+ *
+ * Always scales height to RECOGNIZER_MODEL_HEIGHT (64), width is proportional.
+ * The image is then padded to targetWidth for batching.
  *
  * It also receives contrast treatment according to adjustContrast
  *
  * @param subImage : image to be treated
+ * @param targetWidth : target width for padding (typically max width in batch)
  * @param adjustContrast : target contrast
  * @return adjusted image
  */
-cv::Mat alignAndCollate(const SubImage &subImage, double adjustContrast = 0.0) {
+cv::Mat alignAndCollate(const SubImage &subImage, int targetWidth, double adjustContrast = 0.0) {
   cv::Mat image = subImage.image;
   int width = image.cols;
   int height = image.rows;
@@ -285,25 +302,21 @@ cv::Mat alignAndCollate(const SubImage &subImage, double adjustContrast = 0.0) {
     image = adjustContrastGrey(image, adjustContrast);
   }
 
-  // Aspect-preserving resize (ExecutorTorch approach)
-  // Scale both dimensions by the same ratio to fit within RECOGNIZER_MODEL_WIDTH x RECOGNIZER_MODEL_HEIGHT
-  float heightRatio = static_cast<float>(RECOGNIZER_MODEL_HEIGHT) / static_cast<float>(height);
-  float widthRatio = static_cast<float>(RECOGNIZER_MODEL_WIDTH) / static_cast<float>(width);
-  float resizeRatio = std::min(heightRatio, widthRatio);
+  // EasyOCR-style resize: always scale height to model height, width proportional
+  int proportionalWidth = calculateProportionalWidth(width, height);
 
-  int resizedW = static_cast<int>(std::round(static_cast<float>(width) * resizeRatio));
-  int resizedH = static_cast<int>(std::round(static_cast<float>(height) * resizeRatio));
-
-  // Clamp to model dimensions
-  resizedW = std::min(resizedW, RECOGNIZER_MODEL_WIDTH);
-  resizedH = std::min(resizedH, RECOGNIZER_MODEL_HEIGHT);
+  // Use LANCZOS interpolation like EasyOCR
+  cv::Mat resizedImage;
+  cv::resize(image, resizedImage, cv::Size(proportionalWidth, RECOGNIZER_MODEL_HEIGHT), 0, 0, cv::INTER_LANCZOS4);
 
-  // Use INTER_AREA for downscaling, INTER_CUBIC for upscaling
-  int interpolation = (resizeRatio < 1.0F) ? cv::INTER_AREA : cv::INTER_CUBIC;
+  return normalizeAndPad(resizedImage, 1 /*grayscale*/, RECOGNIZER_MODEL_HEIGHT, targetWidth);
+}
 
-  cv::Mat resizedImage;
-  cv::resize(image, resizedImage, cv::Size(resizedW, resizedH), 0, 0, interpolation);
-  return normalizeAndPad(resizedImage, 1 /*grayscale*/, RECOGNIZER_MODEL_HEIGHT, RECOGNIZER_MODEL_WIDTH);
+/**
+ * @brief Legacy version for backward compatibility - uses fixed RECOGNIZER_MODEL_WIDTH
+ */
+cv::Mat alignAndCollate(const SubImage &subImage, double adjustContrast = 0.0) {
+  return alignAndCollate(subImage, RECOGNIZER_MODEL_WIDTH, adjustContrast);
 }
 
 /**
@@ -799,19 +812,21 @@ cv::Mat StepRecognizeText::runInferenceOnImg(const cv::Mat &img) {
   return preds.clone();
 }
 
-cv::Mat StepRecognizeText::runBatchInference(const std::vector<cv::Mat> &images) {
+cv::Mat StepRecognizeText::runBatchInference(const std::vector<cv::Mat> &images, int dynamicWidth) {
   auto t0 = std::chrono::high_resolution_clock::now();
   if (images.empty()) {
     return cv::Mat();
   }
 
   const int batchSize = static_cast<int>(images.size());
-  QLOG(qvac_lib_inference_addon_cpp::logger::Priority::DEBUG,
-       "[Recognition] runBatchInference called with batch_size=" + std::to_string(batchSize));
   const int height = RECOGNIZER_MODEL_HEIGHT;
-  const int width = RECOGNIZER_MODEL_WIDTH;
+  const int width = dynamicWidth;
   const int numChannels = 1;
 
+  QLOG(qvac_lib_inference_addon_cpp::logger::Priority::DEBUG,
+       "[Recognition] runBatchInference called with batch_size=" + std::to_string(batchSize) +
+       ", dynamic_width=" + std::to_string(width));
+
   // Create batch tensor: [batch, channels, height, width]
   std::vector<float> batchData(batchSize * numChannels * height * width);
 
@@ -918,17 +933,28 @@ std::vector<InferredText> StepRecognizeText::processImgList() {
     size_t batchEnd = std::min(batchStart + static_cast<size_t>(batchSize), allIndices.size());
     size_t currentBatchSize = batchEnd - batchStart;
 
-    // Prepare images ONLY for this batch
+    // Calculate max proportional width for this batch (EasyOCR-style dynamic batching)
+    int maxProportionalWidth = 0;
+    for (size_t i = batchStart; i < batchEnd; i++) {
+      auto &idx = allIndices[i];
+      auto &subImage = imgListOfLists_[idx.listIdx][idx.imgIdx];
+      int propWidth = calculateProportionalWidth(subImage.image.cols, subImage.image.rows);
+      maxProportionalWidth = std::max(maxProportionalWidth, propWidth);
+    }
+    // Ensure minimum width for model stability
+    maxProportionalWidth = std::max(maxProportionalWidth, RECOGNIZER_MODEL_HEIGHT);
+
+    // Prepare images ONLY for this batch, using dynamic max width
     std::vector<cv::Mat> preparedImages;
     preparedImages.reserve(currentBatchSize);
     for (size_t i = batchStart; i < batchEnd; i++) {
       auto &idx = allIndices[i];
       auto &subImage = imgListOfLists_[idx.listIdx][idx.imgIdx];
-      cv::Mat preparedImg = alignAndCollate(subImage, 0.0);
+      cv::Mat preparedImg = alignAndCollate(subImage, maxProportionalWidth, 0.0);
       preparedImages.push_back(preparedImg);
     }
 
-    cv::Mat batchPreds = runBatchInference(preparedImages);
+    cv::Mat batchPreds = runBatchInference(preparedImages, maxProportionalWidth);
 
     // Decode results and populate SubImages for this batch
     for (size_t i = 0; i < currentBatchSize; i++) {
@@ -966,16 +992,26 @@ std::vector<InferredText> StepRecognizeText::processImgList() {
       for (size_t batchStart = 0; batchStart < lowConfidenceIndices.size(); batchStart += batchSize) {
         size_t batchEnd = std::min(batchStart + static_cast<size_t>(batchSize), lowConfidenceIndices.size());
 
+        // Calculate max proportional width for contrast batch
+        int maxProportionalWidth = 0;
+        for (size_t j = batchStart; j < batchEnd; j++) {
+          auto &idx = lowConfidenceIndices[j];
+          auto &subImage = imgListOfLists_[idx.listIdx][idx.imgIdx];
+          int propWidth = calculateProportionalWidth(subImage.image.cols, subImage.image.rows);
+          maxProportionalWidth = std::max(maxProportionalWidth, propWidth);
+        }
+        maxProportionalWidth = std::max(maxProportionalWidth, RECOGNIZER_MODEL_HEIGHT);
+
         std::vector<cv::Mat> contrastImages;
         contrastImages.reserve(batchEnd - batchStart);
         for (size_t j = batchStart; j < batchEnd; j++) {
           auto &idx = lowConfidenceIndices[j];
           auto &subImage = imgListOfLists_[idx.listIdx][idx.imgIdx];
-          cv::Mat contrastImg = alignAndCollate(subImage, TARGET_ADJUSTED_CONTRAST);
+          cv::Mat contrastImg = alignAndCollate(subImage, maxProportionalWidth, TARGET_ADJUSTED_CONTRAST);
           contrastImages.push_back(contrastImg);
         }
 
-        cv::Mat contrastPreds = runBatchInference(contrastImages);
+        cv::Mat contrastPreds = runBatchInference(contrastImages, maxProportionalWidth);
 
         for (size_t j = 0; j < contrastImages.size(); j++) {
           auto &idx = lowConfidenceIndices[batchStart + j];

diff --git a/packages/qvac-lib-inference-addon-onnx-ocr-fasttext/addon/pipeline/StepRecognizeText.hpp b/packages/qvac-lib-inference-addon-onnx-ocr-fasttext/addon/pipeline/StepRecognizeText.hpp
@@ -106,12 +106,13 @@ struct StepRecognizeText {
   cv::Mat runInferenceOnImg(const cv::Mat &img);
 
   /**
-   * @brief runs ONNX batch inference on multiple images
+   * @brief runs ONNX batch inference on multiple images with dynamic width
    *
    * @param images : vector of prepared recognizer inputs
+   * @param dynamicWidth : the width of input images (for dynamic-width models)
    * @return cv::Mat : the recognizer predictions with shape [batch, seq_len, num_chars]
    */
-  cv::Mat runBatchInference(const std::vector<cv::Mat> &images);
+  cv::Mat runBatchInference(const std::vector<cv::Mat> &images, int dynamicWidth);
 
   /**
    * @brief processes the sub image to run recognizer inference and populate text and confidence score

diff --git a/...ib-inference-addon-onnx-ocr-fasttext/benchmarks/quality_eval/backends/qvac_ocr_backend.py b/...ib-inference-addon-onnx-ocr-fasttext/benchmarks/quality_eval/backends/qvac_ocr_backend.py
@@ -25,6 +25,7 @@ def __init__(
         language: str = "en",
         timeout: int = 600,
         batch_size: int = 50,
+        model_dir: str = "rec_dyn",
         **kwargs
     ):
         """Initialize QVAC OCR backend.
@@ -35,13 +36,15 @@ def __init__(
             language: Language code for OCR (e.g., 'en')
             timeout: Timeout in seconds for batch operations
             batch_size: Number of images to process in one batch
+            model_dir: Model directory name (e.g., 'rec_dyn' or 'rec_512')
             **kwargs: Additional arguments passed to parent
         """
         super().__init__(name="qvac", **kwargs)
         self.bare_path = bare_path
         self.language = language
         self.timeout = timeout
         self.batch_size = batch_size
+        self.model_dir = model_dir
 
         # Determine addon path
         if addon_path:
@@ -110,7 +113,8 @@ def _run_batch(self, image_paths: List[str]) -> dict:
                 str(self.batch_cli_script),
                 "--input", input_file,
                 "--output", output_file,
-                "--lang", self.language
+                "--lang", self.language,
+                "--model-dir", self.model_dir
             ]
 
             result = subprocess.run(