Skip to content
This repository has been archived by the owner on Mar 13, 2023. It is now read-only.

Commit

Permalink
feat: filter more kinds of non-emoji unicode characters (#720)
Browse files Browse the repository at this point in the history
  • Loading branch information
LordOfPolls committed Nov 13, 2022
1 parent 545f64c commit 6f3d6d4
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 2 deletions.
6 changes: 4 additions & 2 deletions naff/models/discord/emoji.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import string
import unicodedata
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union

import attrs
Expand All @@ -22,7 +23,7 @@
__all__ = ("PartialEmoji", "CustomEmoji", "process_emoji_req_format", "process_emoji")

emoji_regex = re.compile(r"<?(a)?:(\w*):(\d*)>?")
unicode_emoji_reg = re.compile(r"[^\w\s,]")
unicode_emoji_reg = re.compile(r"[^\w\s,’‘“”…–—•◦‣⁃⁎⁏⁒⁓⁺⁻⁼⁽⁾ⁿ₊₋₌₍₎]")


@attrs.define(eq=False, order=False, hash=False, kw_only=False)
Expand Down Expand Up @@ -87,7 +88,8 @@ def from_str(cls, emoji_str: str, *, language: str = "alias") -> Optional["Parti

# the regex will match certain special characters, so this acts as a final failsafe
if match not in string.printable:
return cls(name=match)
if unicodedata.category(match) == "So":
return cls(name=match)
return None

def __str__(self) -> str:
Expand Down
31 changes: 31 additions & 0 deletions tests/test_emoji.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,34 @@ def test_numerical_emoji() -> None:
def test_false_positives() -> None:
for _e in string.printable:
assert PartialEmoji.from_str(_e) is None

unicode_general_punctuation = [
"’",
"‘",
"“",
"”",
"…",
"–",
"—",
"•",
"◦",
"‣",
"⁃",
"⁎",
"⁏",
"⁒",
"⁓",
"⁺",
"⁻",
"⁼",
"⁽",
"⁾",
"ⁿ",
"₊",
"₋",
"₌",
"₍",
"₎",
]
for _e in unicode_general_punctuation:
assert PartialEmoji.from_str(_e) is None

0 comments on commit 6f3d6d4

Please sign in to comment.