diff --git a/packages/ocr-onnx/benchmarks/quality_eval/benchmark_100.py b/packages/ocr-onnx/benchmarks/quality_eval/benchmark_100.py index 0f6ce8f395..8328b6fd59 100644 --- a/packages/ocr-onnx/benchmarks/quality_eval/benchmark_100.py +++ b/packages/ocr-onnx/benchmarks/quality_eval/benchmark_100.py @@ -90,7 +90,9 @@ def run_qvac_benchmark(images): f.write(img + '\n') input_file = f.name - output_file = tempfile.mktemp(suffix='.jsonl') + output_fd = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) + output_file = output_fd.name + output_fd.close() try: # Run QVAC batch CLI diff --git a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/indicnlp/indic_normalize.js b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/indicnlp/indic_normalize.js index bd9127aba8..222c99b445 100644 --- a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/indicnlp/indic_normalize.js +++ b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/indicnlp/indic_normalize.js @@ -427,7 +427,7 @@ class DevanagariNormalizer extends BaseNormalizer { } // replace pipe character for poorna virama - text = text.replace('\u007c', '\u0964') + text = text.replace(/\u007c/g, '\u0964') // correct visarga text = text.replace(/([ऀ-ॿ]):/, '$1\u0903') @@ -565,7 +565,7 @@ class GurmukhiNormalizer extends BaseNormalizer { text = text.replace('\u0a65', '\u0965') // replace pipe character for poorna virama - text = text.replace('\u007c', '\u0964') + text = text.replace(/\u007c/g, '\u0964') // correct visarga text = text.replace(/([਀-੿]):/, '$1\u0a03') @@ -790,9 +790,9 @@ class BengaliNormalizer extends BaseNormalizer { text = text.replace('\u09e5', '\u0965') // replace pipe character for poorna virama - text = text.replace('\u007c', '\u0964') + text = text.replace(/\u007c/g, '\u0964') // replace bengali currency numerator four for poorna virama (it looks similar and is used as a substitute) - text = text.replace('\u09f7', '\u0964') + text = text.replace(/\u09f7/g, '\u0964') // two part dependent vowels text = text.replace('\u09c7\u09be', '\u09cb') diff --git a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/sacremoses/tokenizer.js b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/sacremoses/tokenizer.js index 4eaeccec5c..4369af7a33 100644 --- a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/sacremoses/tokenizer.js +++ b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor-deps/sacremoses/tokenizer.js @@ -331,8 +331,8 @@ class MosesTokenizer { // Protected patterns this.BASIC_PROTECTED_PATTERN_1 = /<\/?\S+\/?>/ - this.BASIC_PROTECTED_PATTERN_2 = /<\S+( [a-zA-Z0-9]+="?[^"]*")+ ?\/?>/ - this.BASIC_PROTECTED_PATTERN_3 = /<\S+( [a-zA-Z0-9]+='?[^']*')+ ?\/?>/ + this.BASIC_PROTECTED_PATTERN_2 = /<\S+(?: [a-zA-Z0-9]+="[^"]*")+ ?\/?>/ + this.BASIC_PROTECTED_PATTERN_3 = /<\S+(?: [a-zA-Z0-9]+='[^']*')+ ?\/?>/ this.BASIC_PROTECTED_PATTERN_4 = /[\w\-_.]+@([\w\-_]+\.)+[a-zA-Z]{2,}/ this.BASIC_PROTECTED_PATTERN_5 = /(https?|ftp):\/\/[^:/\s]+(\/\w+)*\/[\w\-.]+/ diff --git a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js index f12ab0d84d..a0dbc5802e 100644 --- a/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js +++ b/packages/qvac-lib-infer-nmtcpp/third-party/indic-processor.js @@ -220,11 +220,11 @@ class IndicProcessor { this._END_BRACKET_SPACE_PUNC_REGEX = /\) ([.!:?;,])/g this._URL_PATTERN = - /\b(?