From dd3d7263d3def827496b0e4fe883a093947e6d0c Mon Sep 17 00:00:00 2001 From: olyasir Date: Fri, 20 Mar 2026 14:39:28 +0200 Subject: [PATCH 1/3] fix[notask]: resolve code scanning security findings in nmtcpp and ocr-onnx Fix ReDoS vulnerabilities in indic-processor URL and numeral regexes by removing nested quantifiers. Fix ReDoS in sacremoses tokenizer protected patterns by requiring opening quotes to eliminate ambiguous backtracking. Fix incomplete string replacement in indic_normalize by using global regex for pipe character substitution. Replace insecure tempfile.mktemp with NamedTemporaryFile in ocr-onnx benchmark script. --- .../ocr-onnx/benchmarks/quality_eval/benchmark_100.py | 4 +++- .../indic-processor-deps/indicnlp/indic_normalize.js | 8 ++++---- .../indic-processor-deps/sacremoses/tokenizer.js | 4 ++-- .../qvac-lib-infer-nmtcpp/third-party/indic-processor.js | 4 ++-- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/packages/ocr-onnx/benchmarks/quality_eval/benchmark_100.py b/packages/ocr-onnx/benchmarks/quality_eval/benchmark_100.py index 0f6ce8f395..8328b6fd59 100644 --- a/packages/ocr-onnx/benchmarks/quality_eval/benchmark_100.py +++ b/packages/ocr-onnx/benchmarks/quality_eval/benchmark_100.py @@ -90,7 +90,9 @@ def run_qvac_benchmark(images): f.write(img + '\n') input_file = f.name - output_file = tempfile.mktemp(suffix='.jsonl') + output_fd = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) + output_file = output_fd.name + output_fd.close() try: # Run QVAC batch CLI diff --git a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/indicnlp/indic_normalize.js b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/indicnlp/indic_normalize.js index bd9127aba8..222c99b445 100644 --- a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/indicnlp/indic_normalize.js +++ b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/indicnlp/indic_normalize.js @@ -427,7 +427,7 @@ class DevanagariNormalizer extends BaseNormalizer { } // replace pipe character for poorna virama - text = text.replace('\u007c', '\u0964') + text = text.replace(/\u007c/g, '\u0964') // correct visarga text = text.replace(/([ऀ-ॿ]):/, '$1\u0903') @@ -565,7 +565,7 @@ class GurmukhiNormalizer extends BaseNormalizer { text = text.replace('\u0a65', '\u0965') // replace pipe character for poorna virama - text = text.replace('\u007c', '\u0964') + text = text.replace(/\u007c/g, '\u0964') // correct visarga text = text.replace(/([਀-੿]):/, '$1\u0a03') @@ -790,9 +790,9 @@ class BengaliNormalizer extends BaseNormalizer { text = text.replace('\u09e5', '\u0965') // replace pipe character for poorna virama - text = text.replace('\u007c', '\u0964') + text = text.replace(/\u007c/g, '\u0964') // replace bengali currency numerator four for poorna virama (it looks similar and is used as a substitute) - text = text.replace('\u09f7', '\u0964') + text = text.replace(/\u09f7/g, '\u0964') // two part dependent vowels text = text.replace('\u09c7\u09be', '\u09cb') diff --git a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/sacremoses/tokenizer.js b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/sacremoses/tokenizer.js index 4eaeccec5c..4369af7a33 100644 --- a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/sacremoses/tokenizer.js +++ b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/sacremoses/tokenizer.js @@ -331,8 +331,8 @@ class MosesTokenizer { // Protected patterns this.BASIC_PROTECTED_PATTERN_1 = /<\/?\S+\/?>/ - this.BASIC_PROTECTED_PATTERN_2 = /<\S+( [a-zA-Z0-9]+="?[^"]*")+ ?\/?>/ - this.BASIC_PROTECTED_PATTERN_3 = /<\S+( [a-zA-Z0-9]+='?[^']*')+ ?\/?>/ + this.BASIC_PROTECTED_PATTERN_2 = /<\S+(?: [a-zA-Z0-9]+="[^"]*")+ ?\/?>/ + this.BASIC_PROTECTED_PATTERN_3 = /<\S+(?: [a-zA-Z0-9]+='[^']*')+ ?\/?>/ this.BASIC_PROTECTED_PATTERN_4 = /[\w\-_.]+@([\w\-_]+\.)+[a-zA-Z]{2,}/ this.BASIC_PROTECTED_PATTERN_5 = /(https?|ftp):\/\/[^:/\s]+(\/\w+)*\/[\w\-.]+/ diff --git a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js index f12ab0d84d..7873bfb4fa 100644 --- a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js +++ b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js @@ -220,9 +220,9 @@ class IndicProcessor { this._END_BRACKET_SPACE_PUNC_REGEX = /\) ([.!:?;,])/g this._URL_PATTERN = - /\b(? Date: Fri, 20 Mar 2026 15:36:49 +0200 Subject: [PATCH 2/3] fix[notask]: resolve polynomial ReDoS in numeral and other patterns Fix _NUMERAL_PATTERN by replacing ambiguous \d+\.?\d* with \d+(?:\.\d+)? to eliminate overlapping digit quantifiers. Fix _OTHER_PATTERN by bounding the prefix to {0,100} to prevent polynomial backtracking when no separator is found. --- packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js index 7873bfb4fa..e1d214560c 100644 --- a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js +++ b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js @@ -222,9 +222,9 @@ class IndicProcessor { this._URL_PATTERN = /\b(? Date: Fri, 20 Mar 2026 16:06:17 +0200 Subject: [PATCH 3/3] fix[notask]: bound regex quantifiers to eliminate polynomial ReDoS Replace unbounded \d+ with \d{1,20} and \w+ with \w{1,100} in _NUMERAL_PATTERN and _OTHER_PATTERN to make backtracking constant-time regardless of input length. No real-world numeral exceeds 20 digits and no hashtag/mention exceeds 100 chars. --- packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js index e1d214560c..a0dbc5802e 100644 --- a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js +++ b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js @@ -222,9 +222,9 @@ class IndicProcessor { this._URL_PATTERN = /\b(?