Skip to content

Commit

Permalink
Implement Unicode Support
Browse files Browse the repository at this point in the history
closes #93

* fix full-width characters issue (magmax/python-inquirer#432)

* Fix the issue that raises a bunch of OSError exceptions in the test script (#93)

* Fix UnicodeEncodeError when inputting emojis

* add tests for new unicode support
  • Loading branch information
sakkyoi authored Aug 11, 2024
1 parent a3e9b0b commit d1d903d
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 16 deletions.
29 changes: 20 additions & 9 deletions readchar/_win_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,38 @@


def readchar() -> str:
"""Reads a single character from the input stream.
"""Reads a single utf8-character from the input stream.
Blocks until a character is available."""

# manual byte decoding because some bytes in windows are not utf-8 encodable.
return chr(int.from_bytes(msvcrt.getch(), "big"))
# read a single wide character from the input
return msvcrt.getwch()


def readkey() -> str:
"""Reads the next keypress. If an escaped key is pressed, the full
sequence is read and returned as noted in `_win_key.py`."""

# read first character
ch = readchar()

# keys like CTRL+C should cause a interrupt
if ch in config.INTERRUPT_KEYS:
raise KeyboardInterrupt

# if it is a normal character:
if ch not in "\x00\xe0":
return ch
# parse special multi character keys (see key module)
# https://learn.microsoft.com/cpp/c-runtime-library/reference/getch-getwch#remarks
if ch in "\x00\xe0":
# read the second half
# we always return the 0x00 prefix, this avoids duplications in the key module
ch = "\x00" + readchar()

# if it is a scpeal key, read second half:
ch2 = readchar()
# parse unicode surrogates
# https://docs.python.org/3/c-api/unicode.html#c.Py_UNICODE_IS_SURROGATE
if "\uD800" <= ch <= "\uDFFF":
ch += readchar()

return "\x00" + ch2
# combine the characters into a single utf-16 encoded string.
# this prevents the character from being treated as a surrogate pair again.
ch = ch.encode("utf-16", errors="surrogatepass").decode("utf-16")

return ch
13 changes: 6 additions & 7 deletions tests/windows/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,20 @@
import pytest


if sys.platform in ("win32", "cygwin"):
import msvcrt


# ignore all tests in this folder if not on windows
def pytest_ignore_collect(path, config):
if sys.platform not in ("win32", "cygwin"):
return True


@pytest.fixture
def patched_stdin():
def patched_stdin(monkeypatch):
class mocked_stdin:
def push(self, string):
for c in string:
msvcrt.ungetch(ord(c).to_bytes(1, "big"))
# Create an iterator from the string
characters = iter(string)

# Patch msvcrt.getwch to return the next character from the iterator.
monkeypatch.setattr("msvcrt.getwch", lambda: next(characters))

return mocked_stdin()
15 changes: 15 additions & 0 deletions tests/windows/test_readchar.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,18 @@ def test_controlCharacters(seq, key, patched_stdin):
def test_CTRL_Characters(seq, key, patched_stdin):
patched_stdin.push(seq)
assert key == readchar()


@pytest.mark.parametrize(
["seq", "key"],
[
("\xe4", "ä"),
("\xe1", "á"),
("\xe5", "å"),
("\xdf", "ß"),
("\u304c", "が"),
],
)
def test_Unicode_Characters(seq, key, patched_stdin):
patched_stdin.push(seq)
assert key == readchar()
19 changes: 19 additions & 0 deletions tests/windows/test_readkey.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,22 @@ def test_navigationKeys(seq, key, patched_stdin):
def test_functionKeys(seq, key, patched_stdin):
patched_stdin.push(seq)
assert key == readkey()


@pytest.mark.parametrize(
["seq", "key"],
[
("\ud83d\ude00", "😀"),
("\ud83d\ude18", "😘"),
("\ud83d\ude09", "😉"),
("\ud83d\udc4d", "👍"),
("\ud83d\udc35", "🐵"),
("\ud83c\udf47", "🍇"),
("\ud83c\udf83", "🎃"),
("\ud83d\udc53", "👓"),
("\ud83c\udfc1", "🏁"),
],
)
def test_UnicodeSurrogates(seq, key, patched_stdin):
patched_stdin.push(seq)
assert key == readkey()

0 comments on commit d1d903d

Please sign in to comment.