Skip to content
This repository has been archived by the owner on Mar 13, 2023. It is now read-only.

Commit

Permalink
feat: Improved emoji matching (#712)
Browse files Browse the repository at this point in the history
* tests: implement target tests

* feat: improved emoji matching
  • Loading branch information
LordOfPolls authored Nov 4, 2022
1 parent 97281e7 commit cfd3bb1
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 3 deletions.
14 changes: 11 additions & 3 deletions naff/models/discord/emoji.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import string
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union

import attrs
Expand All @@ -21,6 +22,7 @@
__all__ = ("PartialEmoji", "CustomEmoji", "process_emoji_req_format", "process_emoji")

emoji_regex = re.compile(r"<?(a)?:(\w*):(\d*)>?")
unicode_emoji_reg = re.compile(r"[^\w\s,]")


@attrs.define(eq=False, order=False, hash=False, kw_only=False)
Expand Down Expand Up @@ -77,9 +79,15 @@ def from_str(cls, emoji_str: str, *, language: str = "alias") -> Optional["Parti
_emoji_list = emoji.distinct_emoji_list(emoji_str)
if _emoji_list:
return cls(name=_emoji_list[0])
if len(emoji_str) == 1:
# likely a regional indicator
return cls(name=emoji_str)

# the emoji lib handles *most* emoji, however there are certain ones that it misses
# this acts as a fallback check
if matches := unicode_emoji_reg.search(emoji_str):
match = matches.group()

# the regex will match certain special characters, so this acts as a final failsafe
if match not in string.printable:
return cls(name=match)
return None

def __str__(self) -> str:
Expand Down
53 changes: 53 additions & 0 deletions tests/test_emoji.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import string

import emoji

from naff.models.discord.emoji import PartialEmoji, process_emoji, process_emoji_req_format

__all__ = ()
Expand Down Expand Up @@ -54,3 +58,52 @@ def test_emoji_processing() -> None:
assert str(from_str) == raw_sample

assert PartialEmoji.from_str("<a:sparklesnek:910496037708374016>").animated is True


def test_unicode_recognition() -> None:
for _e in emoji.EMOJI_DATA:
assert PartialEmoji.from_str(_e) is not None


def test_regional_indicators() -> None:
regional_indicators = [
"🇦",
"🇧",
"🇨",
"🇩",
"🇪",
"🇫",
"🇬",
"🇭",
"🇮",
"🇯",
"🇰",
"🇱",
"🇲",
"🇳",
"🇴",
"🇵",
"🇶",
"🇷",
"🇸",
"🇹",
"🇺",
"🇻",
"🇼",
"🇽",
"🇾",
"🇿",
]
for _e in regional_indicators:
assert PartialEmoji.from_str(_e) is not None


def test_numerical_emoji() -> None:
numerical_emoji = ["0️⃣", "1️⃣", "2️⃣", "3️⃣", "4️⃣", "5️⃣", "6️⃣", "7️⃣", "8️⃣", "9️⃣"]
for _e in numerical_emoji:
assert PartialEmoji.from_str(_e) is not None


def test_false_positives() -> None:
for _e in string.printable:
assert PartialEmoji.from_str(_e) is None

0 comments on commit cfd3bb1

Please sign in to comment.