From 99982714ea881f0292df53c0db5b21f50d7e146c Mon Sep 17 00:00:00 2001 From: gicraveiro Date: Sun, 6 Feb 2022 22:04:11 +0100 Subject: [PATCH] removed one comment from utils --- utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils.py b/utils.py index b56d7d3..e16da30 100644 --- a/utils.py +++ b/utils.py @@ -56,7 +56,7 @@ def clean_corpus(corpus): corpus = re.sub("(\s+\-)", r" - ", corpus) corpus = re.sub("([a-zA-Z]+)([0-9]+)", r"\1 \2", corpus) corpus = re.sub("([0-9]+)([a-zA-Z]+)", r"\1 \2", corpus) - corpus = re.sub("([()!,;:\.\?\[\]\|])", r" \1 ", corpus) # including : afterwards didnt change result + corpus = re.sub("([()!,;:\.\?\[\]\|])", r" \1 ", corpus) corpus = re.sub(" +", " ", corpus) return corpus