Skip to content

Commit

Permalink
refactor punctuation
Browse files Browse the repository at this point in the history
  • Loading branch information
wq2012 committed Jul 7, 2024
1 parent a97f888 commit 8cc5c51
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 6 deletions.
17 changes: 12 additions & 5 deletions DiarizationLM/diarizationlm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,18 @@ def normalize_text(text: str) -> str:
# Convert to lower case.
text_lower = text.lower().strip()

# Remove punctuation.
for punc in PUNCTUATIONS:
text_lower = text_lower.replace(punc, "")

return " ".join(text_lower.split())
# Remove punctuations.
words = text_lower.split()
new_words = []
for word in words:
new_word = word
for punc in PUNCTUATIONS:
replaced = new_word.replace(punc, "")
if len(replaced.split()) != 1:
continue
new_word = replaced
new_words.append(new_word)
return " ".join(new_words)


def speakers_transform(speakers: Sequence[str]) -> list[str]:
Expand Down
2 changes: 1 addition & 1 deletion DiarizationLM/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import setuptools

VERSION = "0.0.9"
VERSION = "0.0.10"

with open("README.md", "r") as file_object:
LONG_DESCRIPTION = file_object.read()
Expand Down

0 comments on commit 8cc5c51

Please sign in to comment.