From 50d97ba1f1dc9e7a5efc900db79a7bcf2a3b42c9 Mon Sep 17 00:00:00 2001 From: Alessandro Ristori <81309500+RistoAle97@users.noreply.github.com> Date: Fri, 12 May 2023 17:44:27 +0200 Subject: [PATCH] Fixed _tokenizer_13a The behaviour is the same as the original implementation now --- src/torchmetrics/functional/text/sacre_bleu.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/torchmetrics/functional/text/sacre_bleu.py b/src/torchmetrics/functional/text/sacre_bleu.py index cb3a45b94f1..2f489887697 100644 --- a/src/torchmetrics/functional/text/sacre_bleu.py +++ b/src/torchmetrics/functional/text/sacre_bleu.py @@ -174,7 +174,7 @@ def _tokenize_base(cls, line: str) -> str: @classmethod def _tokenize_13a(cls, line: str) -> str: - """Tokenizes an line using a relatively minimal tokenization that is equivalent to mteval-v13a, used by WMT. + """Tokenizes a line using a relatively minimal tokenization that is equivalent to mteval-v13a, used by WMT. Args: line: input sentence @@ -193,7 +193,7 @@ def _tokenize_13a(cls, line: str) -> str: line = line.replace("<", "<") line = line.replace(">", ">") - return cls._tokenize_regex(line) + return cls._tokenize_regex(f" {line} ") @classmethod def _tokenize_zh(cls, line: str) -> str: