feat(term): ansi: implement wrap and wordwrap (#51)

* feat(term): ansi: implement wrap and wordwrap This is based on @muesli's awesome [reflow](https://github.com/muesli/reflow) library. It uses the ANSI parser state machine to find escape codes and runes. Since it uses the ANSI parser state machine, it supports OSC, DCS, and other sequences. * fix(term): ansi: mike's feedback
charmbracelet · Mar 21, 2024 · 7faadd0 · 7faadd0
1 parent 2f4b840
commit 7faadd0
Show file tree

Hide file tree

Showing 2 changed files with 353 additions and 0 deletions.
diff --git a/exp/term/ansi/wrap.go b/exp/term/ansi/wrap.go
@@ -0,0 +1,243 @@
+package ansi
+
+import (
+	"bytes"
+	"unicode"
+	"unicode/utf8"
+
+	"github.com/charmbracelet/x/exp/term/ansi/parser"
+	"github.com/rivo/uniseg"
+)
+
+// Wrap wraps a string or a block of text to a given line length, breaking word
+// boundaries. This will preserve ANSI escape codes and will account for
+// wide-characters in the string.
+// When preserveSpace is true, spaces at the beginning of a line will be
+// preserved.
+func Wrap(s string, limit int, preserveSpace bool) string {
+	if limit < 1 {
+		return s
+	}
+
+	var (
+		cluster      []byte
+		buf          bytes.Buffer
+		curWidth     int
+		forceNewline bool
+		gstate       = -1
+		pstate       = parser.GroundState // initial state
+		b            = []byte(s)
+	)
+
+	addNewline := func() {
+		buf.WriteByte('\n')
+		curWidth = 0
+	}
+
+	i := 0
+	for i < len(b) {
+		state, action := parser.Table.Transition(pstate, b[i])
+
+		switch action {
+		case parser.CollectAction:
+			if w := utf8ByteLen(b[i]); w <= 1 {
+				// Collect sequence intermediate bytes
+				buf.WriteByte(b[i])
+				break
+			}
+
+			var width int
+			cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)
+			i += len(cluster)
+
+			if curWidth+width > limit {
+				addNewline()
+			}
+			if !preserveSpace && curWidth == 0 && len(cluster) <= 4 {
+				// Skip spaces at the beginning of a line
+				if r, _ := utf8.DecodeRune(cluster); r != utf8.RuneError && unicode.IsSpace(r) {
+					pstate = parser.GroundState
+					continue
+				}
+			}
+
+			buf.Write(cluster)
+			curWidth += width
+			gstate = -1 // reset grapheme state otherwise, width calculation might be off
+			pstate = parser.GroundState
+			continue
+		case parser.PrintAction, parser.ExecuteAction:
+			if b[i] == '\n' {
+				addNewline()
+				forceNewline = false
+				break
+			}
+
+			if curWidth+1 > limit {
+				addNewline()
+				forceNewline = true
+			}
+
+			// Skip spaces at the beginning of a line
+			if curWidth == 0 {
+				if !preserveSpace && forceNewline && unicode.IsSpace(rune(b[i])) {
+					break
+				}
+				forceNewline = false
+			}
+
+			buf.WriteByte(b[i])
+			curWidth++
+		default:
+			buf.WriteByte(b[i])
+		}
+
+		// We manage the UTF8 state separately manually above.
+		if pstate != parser.Utf8State {
+			pstate = state
+		}
+		i++
+	}
+
+	return buf.String()
+}
+
+// Wordwrap wraps a string or a block of text to a given line length, not
+// breaking word boundaries. This will preserve ANSI escape codes and will
+// account for wide-characters in the string.
+// The breakpoints string is a list of characters that are considered
+// breakpoints for word wrapping. A hyphen (-) is always considered a
+// breakpoint.
+func Wordwrap(s string, limit int, breakpoints string) string {
+	if limit < 1 {
+		return s
+	}
+
+	// Add a hyphen to the breakpoints
+	breakpoints += "-"
+
+	var (
+		cluster  []byte
+		buf      bytes.Buffer
+		word     bytes.Buffer
+		space    bytes.Buffer
+		curWidth int
+		wordLen  int
+		gstate   = -1
+		pstate   = parser.GroundState // initial state
+		b        = []byte(s)
+	)
+
+	addSpace := func() {
+		curWidth += space.Len()
+		buf.Write(space.Bytes())
+		space.Reset()
+	}
+
+	addWord := func() {
+		if word.Len() == 0 {
+			return
+		}
+		addSpace()
+		curWidth += wordLen
+		buf.Write(word.Bytes())
+		word.Reset()
+		wordLen = 0
+	}
+
+	addNewline := func() {
+		buf.WriteByte('\n')
+		curWidth = 0
+		space.Reset()
+	}
+
+	i := 0
+	for i < len(b) {
+		state, action := parser.Table.Transition(pstate, b[i])
+
+		switch action {
+		case parser.CollectAction:
+			if w := utf8ByteLen(b[i]); w <= 1 {
+				// Collect sequence intermediate bytes
+				word.WriteByte(b[i])
+				break
+			}
+
+			var width int
+			cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)
+			i += len(cluster)
+
+			r, _ := utf8.DecodeRune(cluster)
+			if r != utf8.RuneError && unicode.IsSpace(r) {
+				addWord()
+				space.WriteRune(r)
+			} else if bytes.ContainsAny(cluster, breakpoints) {
+				addSpace()
+				addWord()
+				buf.Write(cluster)
+			} else {
+				word.Write(cluster)
+				wordLen += width
+				if curWidth+space.Len()+wordLen > limit &&
+					wordLen < limit {
+					addNewline()
+				}
+			}
+
+			pstate = parser.GroundState
+			continue
+		case parser.PrintAction, parser.ExecuteAction:
+			r := rune(b[i])
+			switch {
+			case r == '\n':
+				if wordLen == 0 {
+					if curWidth+space.Len() > limit {
+						curWidth = 0
+					} else {
+						buf.Write(space.Bytes())
+					}
+					space.Reset()
+				}
+
+				addWord()
+				addNewline()
+			case unicode.IsSpace(r):
+				addWord()
+				space.WriteByte(b[i])
+			case runeContainsAny(r, breakpoints):
+				addSpace()
+				addWord()
+				buf.WriteByte(b[i])
+			default:
+				word.WriteByte(b[i])
+				wordLen++
+				if curWidth+space.Len()+wordLen > limit &&
+					wordLen < limit {
+					addNewline()
+				}
+			}
+
+		default:
+			word.WriteByte(b[i])
+		}
+
+		// We manage the UTF8 state separately manually above.
+		if pstate != parser.Utf8State {
+			pstate = state
+		}
+		i++
+	}
+
+	addWord()
+
+	return buf.String()
+}
+
+func runeContainsAny(r rune, s string) bool {
+	for _, c := range s {
+		if c == r {
+			return true
+		}
+	}
+	return false
+}
diff --git a/exp/term/ansi/wrap_test.go b/exp/term/ansi/wrap_test.go
@@ -0,0 +1,110 @@
+package ansi_test
+
+import (
+	"testing"
+
+	"github.com/charmbracelet/x/exp/term/ansi"
+)
+
+var cases = []struct {
+	name          string
+	input         string
+	limit         int
+	expected      string
+	preserveSpace bool
+}{
+	{"empty string", "", 0, "", true},
+	{"passthrough", "foobar\n ", 0, "foobar\n ", true},
+	{"pass", "foo", 4, "foo", true},
+	{"simple", "foobarfoo", 4, "foob\narfo\no", true},
+	{"lf", "f\no\nobar", 3, "f\no\noba\nr", true},
+	{"lf_space", "foo bar\n  baz", 3, "foo\n ba\nr\n  b\naz", true},
+	{"tab", "foo\tbar", 3, "foo\n\tba\nr", true},
+	{"unicode_space", "foo\xc2\xa0bar", 3, "foo\nbar", false},
+	{"style_nochange", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", true},
+	{"style", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mju\nst \nano\nthe\nr t\nest\x1B[38;2;249;38;114m\n)\x1B[0m", true},
+	{"style_lf", "I really \x1B[38;2;249;38;114mlove\x1B[0m Go!", 8, "I really\n\x1b[38;2;249;38;114mlove\x1b[0m Go!", false},
+	{"style_emoji", "I really \x1B[38;2;249;38;114mlove u🫧\x1B[0m", 8, "I really\n\x1b[38;2;249;38;114mlove u🫧\x1b[0m", false},
+	{"hyperlink", "I really \x1B]8;;https://example.com/\x1B\\love\x1B]8;;\x1B\\ Go!", 10, "I really \x1b]8;;https://example.com/\x1b\\l\nove\x1b]8;;\x1b\\ Go!", false},
+	{"dcs", "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foobar", 3, "\x1BPq#0;2;0;0;0#1;2;100;100;0#2;2;0;100;0#1~~@@vv@@~~@@~~$#2??}}GG}}??}}??-#1!14@\x1B\\foo\nbar", false},
+	{"begin_with_space", " foo", 4, " foo", false},
+	{"style_dont_affect_wrap", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", false},
+	{"preserve_style", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mju\nst \nano\nthe\nr t\nest\x1B[38;2;249;38;114m\n)\x1B[0m", false},
+	{"emoji", "foo🫧foobar", 4, "foo\n🫧fo\nobar", false},
+	{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\", false},
+}
+
+func TestWrap(t *testing.T) {
+	for i, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := ansi.Wrap(tt.input, tt.limit, tt.preserveSpace); got != tt.expected {
+				t.Errorf("case %d, expected %q, got %q", i+1, tt.expected, got)
+			}
+		})
+	}
+}
+
+var wwCases = []struct {
+	name        string
+	input       string
+	limit       int
+	breakPoints string
+	expected    string
+}{
+	{"empty string", "", 0, "", ""},
+	{"passthrough", "foobar\n ", 0, "", "foobar\n "},
+	{"pass", "foo", 3, "", "foo"},
+	{"toolong", "foobarfoo", 4, "", "foobarfoo"},
+	{"white space", "foo bar foo", 4, "", "foo\nbar\nfoo"},
+	{"broken_at_spaces", "foo bars foobars", 4, "", "foo\nbars\nfoobars"},
+	{"hyphen", "foo-foobar", 4, "-", "foo-\nfoobar"},
+	{"emoji_breakpoint", "foo😃 foobar", 4, "😃", "foo😃\nfoobar"},
+	{"wide_emoji_breakpoint", "foo🫧 foobar", 4, "🫧", "foo🫧\nfoobar"},
+	{"space_breakpoint", "foo --bar", 9, "-", "foo --bar"},
+	{"simple", "foo bars foobars", 4, "", "foo\nbars\nfoobars"},
+	{"limit", "foo bar", 5, "", "foo\nbar"},
+	{"remove white spaces", "foo    \nb   ar   ", 4, "", "foo\nb\nar"},
+	{"white space trail width", "foo\nb\t a\n bar", 4, "", "foo\nb\t a\n bar"},
+	{"explicit_line_break", "foo bar foo\n", 4, "", "foo\nbar\nfoo\n"},
+	{"explicit_breaks", "\nfoo bar\n\n\nfoo\n", 4, "", "\nfoo\nbar\n\n\nfoo\n"},
+	{"example", " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* foo  \nbar    ", 6, "", " This\nis a\nlist: \n\n\t* foo\n\t* bar\n\n\n\t* foo\nbar"},
+	{"style_code_dont_affect_length", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m", 7, "", "\x1B[38;2;249;38;114mfoo\x1B[0m\x1B[38;2;248;248;242m \x1B[0m\x1B[38;2;230;219;116mbar\x1B[0m"},
+	{"style_code_dont_get_wrapped", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust another test\x1B[38;2;249;38;114m)\x1B[0m", 3, "", "\x1B[38;2;249;38;114m(\x1B[0m\x1B[38;2;248;248;242mjust\nanother\ntest\x1B[38;2;249;38;114m)\x1B[0m"},
+	{"osc8_wrap", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\ สวัสดีสวัสดี\x1b]8;;\x1b\\", 8, "", "สวัสดีสวัสดี\x1b]8;;https://example.com\x1b\\\nสวัสดีสวัสดี\x1b]8;;\x1b\\"},
+}
+
+func TestWordwrap(t *testing.T) {
+	for i, tt := range wwCases {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := ansi.Wordwrap(tt.input, tt.limit, tt.breakPoints); got != tt.expected {
+				t.Errorf("case %d, expected %q, got %q", i+1, tt.expected, got)
+			}
+		})
+	}
+}
+
+func TestWrapWordwrap(t *testing.T) {
+	t.Skip("WIP")
+	input := "the quick brown foxxxxxxxxxxxxxxxx jumped over the lazy dog."
+	limit := 16
+	output := ansi.Wordwrap(input, limit, "")
+	t.Logf("output: %q", output)
+	output = ansi.Wrap(output, limit, false)
+	if output != "the quick brown\nfoxxxxxxxxxxxxx\nxxxx jumped over\nthe lazy dog." {
+		t.Errorf("expected %q, got %q", "the quick brown\nfoxxxxxxxxxxxxxx\nxx jumped over\nthe lazy dog.", output)
+	}
+}
+
+const _ = `
+ the quick brown
+ foxxxxxxxxxxxxxxxx
+ jumped over the
+ lazy dog.
+`
+
+const _ = `
+ the quick brown
+ foxxxxxxxxxxxxxx
+ xx jumped over t
+ he lazy dog.
+`