Skip to content

Commit

Permalink
Move get_emoji_unicode_dict(),get_aliases_unicode_dict() to testu…
Browse files Browse the repository at this point in the history
…tils.py
  • Loading branch information
cvzi committed May 16, 2024
1 parent 0a3f747 commit 42c27f5
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 45 deletions.
52 changes: 14 additions & 38 deletions emoji/unicode_codes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,33 @@
from typing import Any, Dict, Optional
from typing import Optional
from functools import lru_cache
from emoji.unicode_codes.data_dict import *
from emoji.unicode_codes.data_dict import EMOJI_DATA, STATUS, LANGUAGES

__all__ = [
'get_emoji_by_name', 'get_emoji_unicode_dict', 'get_aliases_unicode_dict',
'get_emoji_by_name',
'EMOJI_DATA', 'STATUS', 'LANGUAGES'
]


_EMOJI_UNICODE: Dict[str, Any] = {lang: None for lang in LANGUAGES} # Cache for the language dicts

_ALIASES_UNICODE: Dict[str, str] = {} # Cache for the aliases dict


@lru_cache(maxsize=4000)
def get_emoji_by_name(name: str, lang: str) -> Optional[str]:
"""Find emoji in a specific language or return None if not found"""
def get_emoji_by_name(name: str, language: str) -> Optional[str]:
"""
Find emoji by short-name in a specific language.
Returns None if not found
:param name: emoji short code e.g. ":banana:"
:param language: language-code e.g. 'es', 'de', etc. or 'alias'
"""

fully_qualified = STATUS['fully_qualified']

if lang == 'alias':
if language == 'alias':
for emj, data in EMOJI_DATA.items():
if name in data.get('alias', []) and data['status'] <= fully_qualified:
return emj
lang = 'en'
language = 'en'

for emj, data in EMOJI_DATA.items():
if data.get(lang) == name and data['status'] <= fully_qualified:
if data.get(language) == name and data['status'] <= fully_qualified:
return emj

return None


def get_emoji_unicode_dict(lang: str) -> Dict[str, Any]:
"""Generate dict containing all fully-qualified and component emoji name for a language
The dict is only generated once per language and then cached in _EMOJI_UNICODE[lang]"""

if _EMOJI_UNICODE[lang] is None:
_EMOJI_UNICODE[lang] = {data[lang]: emj for emj, data in EMOJI_DATA.items()
if lang in data and data['status'] <= STATUS['fully_qualified']}

return _EMOJI_UNICODE[lang]


def get_aliases_unicode_dict() -> Dict[str, str]:
"""Generate dict containing all fully-qualified and component aliases
The dict is only generated once and then cached in _ALIASES_UNICODE"""

if not _ALIASES_UNICODE:
_ALIASES_UNICODE.update(get_emoji_unicode_dict('en'))
for emj, data in EMOJI_DATA.items():
if 'alias' in data and data['status'] <= STATUS['fully_qualified']:
for alias in data['alias']:
_ALIASES_UNICODE[alias] = emj

return _ALIASES_UNICODE
4 changes: 2 additions & 2 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing_extensions import Literal
import pytest
import emoji.unicode_codes
from testutils import ascii, normalize, all_language_packs, all_language_and_alias_packs
from testutils import ascii, normalize, all_language_packs, all_language_and_alias_packs, get_emoji_unicode_dict


def test_emojize_name_only():
Expand Down Expand Up @@ -108,7 +108,7 @@ def test_emojize_variant():
def remove_variant(s: str) -> str:
return re.sub('[\ufe0e\ufe0f]$', '', s)

english_pack = emoji.unicode_codes.get_emoji_unicode_dict('en')
english_pack = get_emoji_unicode_dict('en')

assert emoji.emojize(
':Taurus:', variant=None) == english_pack[':Taurus:']
Expand Down
19 changes: 16 additions & 3 deletions tests/test_unicode_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Set
import emoji.unicode_codes
from testutils import get_language_packs
from testutils import get_language_packs, get_aliases_unicode_dict, get_emoji_unicode_dict


def test_emoji_english_names():
Expand All @@ -17,8 +17,8 @@ def test_compare_normal_and_aliases():
# There should always be more aliases than normal codes
# since the aliases contain the normal codes

english_pack = emoji.unicode_codes.get_emoji_unicode_dict('en')
alias_pack = emoji.unicode_codes.get_aliases_unicode_dict()
english_pack = get_emoji_unicode_dict('en')
alias_pack = get_aliases_unicode_dict()

assert len(english_pack) < len(alias_pack)

Expand All @@ -32,3 +32,16 @@ def test_no_alias_duplicates():
for alias in data['alias']:
assert alias not in all_aliases
all_aliases.add(alias)


def test_get_emoji_by_alias():
# Compare get_emoji_by_name() to get_aliases_unicode_dict()
for alias, emj in get_aliases_unicode_dict().items():
assert emoji.unicode_codes.get_emoji_by_name(alias, 'alias') == emj


def test_get_emoji_by_name():
# Compare get_emoji_by_name() to get_emoji_unicode_dict()
for lang in emoji.LANGUAGES:
for name, emj in get_emoji_unicode_dict(lang).items():
assert emoji.unicode_codes.get_emoji_by_name(name, lang) == emj
32 changes: 30 additions & 2 deletions utils/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,42 @@ def is_normalized(form: _NormalizationForm, s: str) -> bool:
return normalize(form, s) == s


_EMOJI_UNICODE: Dict[str, Any] = {lang: None for lang in emoji.LANGUAGES} # Cache for the language dicts
_ALIASES_UNICODE: Dict[str, str] = {} # Cache for the aliases dict

def get_emoji_unicode_dict(lang: str) -> Dict[str, Any]:
"""Generate dict containing all fully-qualified and component emoji name for a language
The dict is only generated once per language and then cached in _EMOJI_UNICODE[lang]"""

if _EMOJI_UNICODE[lang] is None:
_EMOJI_UNICODE[lang] = {data[lang]: emj for emj, data in emoji.EMOJI_DATA.items()
if lang in data and data['status'] <= emoji.STATUS['fully_qualified']}

return _EMOJI_UNICODE[lang]


def get_aliases_unicode_dict() -> Dict[str, str]:
"""Generate dict containing all fully-qualified and component aliases
The dict is only generated once and then cached in _ALIASES_UNICODE"""

if not _ALIASES_UNICODE:
_ALIASES_UNICODE.update(get_emoji_unicode_dict('en'))
for emj, data in emoji.EMOJI_DATA.items():
if 'alias' in data and data['status'] <= emoji.STATUS['fully_qualified']:
for alias in data['alias']:
_ALIASES_UNICODE[alias] = emj

return _ALIASES_UNICODE


def all_language_packs() -> Generator[Tuple[str, Dict[str, Any]], None, None]:
for lang_code in emoji.LANGUAGES:
yield (lang_code, emoji.unicode_codes.get_emoji_unicode_dict(lang_code))
yield (lang_code, get_emoji_unicode_dict(lang_code))


def all_language_and_alias_packs(
) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
yield ('alias', emoji.unicode_codes.get_aliases_unicode_dict())
yield ('alias', get_aliases_unicode_dict())
yield from all_language_packs()


Expand Down

2 comments on commit 42c27f5

@M0rtenB
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm saddened by this change, as I was relying on emoji.unicode_codes.get_aliases_unicode_dict() in my code. Please make it accessible again.

@cvzi
Copy link
Contributor Author

@cvzi cvzi commented on 42c27f5 May 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can just copy the code for this function into your program if you really need it:

emoji/utils/testutils.py

Lines 29 to 51 in e83cf4a

def get_emoji_unicode_dict(lang: str) -> Dict[str, Any]:
"""Generate dict containing all fully-qualified and component emoji name for a language
The dict is only generated once per language and then cached in _EMOJI_UNICODE[lang]"""
if _EMOJI_UNICODE[lang] is None:
_EMOJI_UNICODE[lang] = {data[lang]: emj for emj, data in emoji.EMOJI_DATA.items()
if lang in data and data['status'] <= emoji.STATUS['fully_qualified']}
return _EMOJI_UNICODE[lang]
def get_aliases_unicode_dict() -> Dict[str, str]:
"""Generate dict containing all fully-qualified and component aliases
The dict is only generated once and then cached in _ALIASES_UNICODE"""
if not _ALIASES_UNICODE:
_ALIASES_UNICODE.update(get_emoji_unicode_dict('en'))
for emj, data in emoji.EMOJI_DATA.items():
if 'alias' in data and data['status'] <= emoji.STATUS['fully_qualified']:
for alias in data['alias']:
_ALIASES_UNICODE[alias] = emj
return _ALIASES_UNICODE

Please sign in to comment.