-
-
Notifications
You must be signed in to change notification settings - Fork 33.5k
bpo-24665: double-width CJK chars support for textwrap #89
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
6ea78e3
aa94f26
0264d9d
d630821
bfdfb22
8337ce5
cb9812b
54de7aa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,9 +5,10 @@ | |
| # Copyright (C) 2002, 2003 Python Software Foundation. | ||
| # Written by Greg Ward <[email protected]> | ||
|
|
||
| import re | ||
| import re, unicodedata | ||
|
|
||
| __all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten'] | ||
| __all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten', | ||
| 'cjkwide', 'cjklen', 'cjkslices'] | ||
|
|
||
| # Hardcode the recognized whitespace characters to the US-ASCII | ||
| # whitespace characters. The main reason for doing this is that | ||
|
|
@@ -26,6 +27,8 @@ class TextWrapper: | |
| width (default: 70) | ||
| the maximum width of wrapped lines (unless break_long_words | ||
| is false) | ||
| cjk (default: False) | ||
| Handle double-width CJK chars. | ||
| initial_indent (default: "") | ||
| string that will be prepended to the first line of wrapped | ||
| output. Counts towards the line's width. | ||
|
|
@@ -114,6 +117,7 @@ class TextWrapper: | |
|
|
||
| def __init__(self, | ||
| width=70, | ||
| cjk=False, | ||
| initial_indent="", | ||
| subsequent_indent="", | ||
| expand_tabs=True, | ||
|
|
@@ -127,6 +131,7 @@ def __init__(self, | |
| max_lines=None, | ||
| placeholder=' [...]'): | ||
| self.width = width | ||
| self.cjk = cjk | ||
| self.initial_indent = initial_indent | ||
| self.subsequent_indent = subsequent_indent | ||
| self.expand_tabs = expand_tabs | ||
|
|
@@ -139,6 +144,7 @@ def __init__(self, | |
| self.max_lines = max_lines | ||
| self.placeholder = placeholder | ||
|
|
||
| self.len = cjklen if self.cjk else len | ||
|
||
|
|
||
| # -- Private methods ----------------------------------------------- | ||
| # (possibly useful for subclasses to override) | ||
|
|
@@ -215,8 +221,13 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): | |
| # If we're allowed to break long words, then do so: put as much | ||
| # of the next chunk onto the current line as will fit. | ||
| if self.break_long_words: | ||
| cur_line.append(reversed_chunks[-1][:space_left]) | ||
| reversed_chunks[-1] = reversed_chunks[-1][space_left:] | ||
| if self.cjk: | ||
| chunk_start, chunk_end = cjkslices(reversed_chunks[-1], space_left) | ||
| cur_line.append(chunk_start) | ||
| reversed_chunks[-1] = chunk_end | ||
| else: | ||
| cur_line.append(reversed_chunks[-1][:space_left]) | ||
| reversed_chunks[-1] = reversed_chunks[-1][space_left:] | ||
|
|
||
| # Otherwise, we have to preserve the long word intact. Only add | ||
| # it to the current line if there's nothing already there -- | ||
|
|
@@ -246,6 +257,9 @@ def _wrap_chunks(self, chunks): | |
| lines = [] | ||
| if self.width <= 0: | ||
| raise ValueError("invalid width %r (must be > 0)" % self.width) | ||
| elif self.width == 1 and (sum(self.len(chunk) for chunk in chunks) > | ||
| sum(len(chunk) for chunk in chunks)): | ||
| raise ValueError("invalid width 1 (must be > 1 when CJK chars)") | ||
| if self.max_lines is not None: | ||
| if self.max_lines > 1: | ||
| indent = self.subsequent_indent | ||
|
|
@@ -280,7 +294,7 @@ def _wrap_chunks(self, chunks): | |
| del chunks[-1] | ||
|
|
||
| while chunks: | ||
| l = len(chunks[-1]) | ||
| l = self.len(chunks[-1]) | ||
|
|
||
| # Can at least squeeze this chunk onto the current line. | ||
| if cur_len + l <= width: | ||
|
|
@@ -293,7 +307,7 @@ def _wrap_chunks(self, chunks): | |
|
|
||
| # The current line is full, and the next chunk is too big to | ||
| # fit on *any* line (not just this one). | ||
| if chunks and len(chunks[-1]) > width: | ||
| if chunks and self.len(chunks[-1]) > width: | ||
| self._handle_long_word(chunks, cur_line, cur_len, width) | ||
| cur_len = sum(map(len, cur_line)) | ||
|
|
||
|
|
@@ -365,7 +379,7 @@ def fill(self, text): | |
|
|
||
| # -- Convenience interface --------------------------------------------- | ||
|
|
||
| def wrap(text, width=70, **kwargs): | ||
| def wrap(text, width=70, cjk=False, **kwargs): | ||
|
||
| """Wrap a single paragraph of text, returning a list of wrapped lines. | ||
|
|
||
| Reformat the single paragraph in 'text' so it fits in lines of no | ||
|
|
@@ -375,10 +389,10 @@ def wrap(text, width=70, **kwargs): | |
| space. See TextWrapper class for available keyword args to customize | ||
| wrapping behaviour. | ||
| """ | ||
| w = TextWrapper(width=width, **kwargs) | ||
| w = TextWrapper(width=width, cjk=cjk, **kwargs) | ||
| return w.wrap(text) | ||
|
|
||
| def fill(text, width=70, **kwargs): | ||
| def fill(text, width=70, cjk=False, **kwargs): | ||
| """Fill a single paragraph of text, returning a new string. | ||
|
|
||
| Reformat the single paragraph in 'text' to fit in lines of no more | ||
|
|
@@ -387,10 +401,10 @@ def fill(text, width=70, **kwargs): | |
| whitespace characters converted to space. See TextWrapper class for | ||
| available keyword args to customize wrapping behaviour. | ||
| """ | ||
| w = TextWrapper(width=width, **kwargs) | ||
| w = TextWrapper(width=width, cjk=cjk, **kwargs) | ||
| return w.fill(text) | ||
|
|
||
| def shorten(text, width, **kwargs): | ||
| def shorten(text, width, cjk=False, **kwargs): | ||
| """Collapse and truncate the given text to fit in the given width. | ||
|
|
||
| The text first has its whitespace collapsed. If it then fits in | ||
|
|
@@ -402,10 +416,41 @@ def shorten(text, width, **kwargs): | |
| >>> textwrap.shorten("Hello world!", width=11) | ||
| 'Hello [...]' | ||
| """ | ||
| w = TextWrapper(width=width, max_lines=1, **kwargs) | ||
| w = TextWrapper(width=width, cjk=cjk, max_lines=1, **kwargs) | ||
| return w.fill(' '.join(text.strip().split())) | ||
|
|
||
|
|
||
| # -- CJK support ------------------------------------------------------ | ||
|
|
||
| def cjkwide(char): | ||
|
||
| """Return True if char is Fullwidth or Wide, False otherwise. | ||
| Fullwidth and Wide CJK chars are double-width. | ||
| """ | ||
| return unicodedata.east_asian_width(char) in ('F', 'W') | ||
|
||
|
|
||
|
|
||
| def cjklen(text): | ||
|
||
| """Return the real width of text (its len if not a string). | ||
| """ | ||
| if not isinstance(text, str): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Strange case handling, maybe we should expect only string type text argument in this function...
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again: it's for an handy replacement of the built-in |
||
| return len(text) | ||
|
||
| return sum(2 if cjkwide(char) else 1 for char in text) | ||
|
|
||
|
|
||
| def cjkslices(text, index): | ||
|
||
| """Return the two slices of text cut to the index. | ||
| """ | ||
| if not isinstance(text, str): | ||
| return text[:index], text[index:] | ||
| if cjklen(text) <= index: | ||
| return text, '' | ||
| i = 1 | ||
| # <= and i-1 to catch the last double length char of odd line | ||
| while cjklen(text[:i]) <= index: | ||
| i = i + 1 | ||
|
||
| return text[:i-1], text[i-1:] | ||
|
|
||
|
|
||
| # -- Loosely related functionality ------------------------------------- | ||
|
|
||
| _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -495,6 +495,7 @@ Lele Gaifax | |
| Santiago Gala | ||
| Yitzchak Gale | ||
| Matthew Gallagher | ||
| Florent Gallaire | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if that makes lots of sense for such a small change (even though such changes are very welcome of course!) There are commits from loads of people in this repo, but way less entries in this file (adding every contributor would quickly make the file pretty chaotic). IMO it'd be better to only add yourself to this file when you contribute e.g. a major fix, new feature, etc
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I strongly disagree, if persons are missing they should be added.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Florent definitely belongs in ACKS. We have been fairly liberal in adding people, some for as little as a well-crafted sentence or line of code. |
||
| Quentin Gallet-Gilles | ||
| Riccardo Attilio Galli | ||
| Raymund Galvin | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These arguments can be passed as positional argument.
So new argument should be added last.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree