Skip to content

Commit

Permalink
text/unicod/bidi: implement API, remove panics
Browse files Browse the repository at this point in the history
The bidi API splits strings with mixed left-to-right (ltr) and
right-to-left (rtl) parts into substrings (segments). Each segment
contains a substring, a direction of text flow (either ltr or rtl)
and the start and end positions in the input.

The paragraph validators do not panic, instead the newParagraph function
returns an error message in case the input is invalid.

Fixes golang/go#42356

Change-Id: I90cafc8fadb0cf6936dfb1ab373586017147d709
Reviewed-on: https://go-review.googlesource.com/c/text/+/267857
Trust: Ian Lance Taylor <[email protected]>
Reviewed-by: Marcel van Lohuizen <[email protected]>
  • Loading branch information
pgundlach authored and ianlancetaylor committed Nov 24, 2020
1 parent 4482a91 commit 967b8f6
Show file tree
Hide file tree
Showing 4 changed files with 584 additions and 57 deletions.
221 changes: 191 additions & 30 deletions unicode/bidi/bidi.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@
// and without notice.
package bidi // import "golang.org/x/text/unicode/bidi"

// TODO:
// The following functionality would not be hard to implement, but hinges on
// the definition of a Segmenter interface. For now this is up to the user.
// - Iterate over paragraphs
// - Segmenter to iterate over runs directly from a given text.
// Also:
// TODO
// - Transformer for reordering?
// - Transformer (validator, really) for Bidi Rule.

import (
"bytes"
)

// This API tries to avoid dealing with embedding levels for now. Under the hood
// these will be computed, but the question is to which extent the user should
// know they exist. We should at some point allow the user to specify an
Expand Down Expand Up @@ -49,7 +48,9 @@ const (
Neutral
)

type options struct{}
type options struct {
defaultDirection Direction
}

// An Option is an option for Bidi processing.
type Option func(*options)
Expand All @@ -66,12 +67,62 @@ type Option func(*options)
// DefaultDirection sets the default direction for a Paragraph. The direction is
// overridden if the text contains directional characters.
func DefaultDirection(d Direction) Option {
panic("unimplemented")
return func(opts *options) {
opts.defaultDirection = d
}
}

// A Paragraph holds a single Paragraph for Bidi processing.
type Paragraph struct {
// buffers
p []byte
o Ordering
opts []Option
types []Class
pairTypes []bracketType
pairValues []rune
runes []rune
options options
}

// Initialize the p.pairTypes, p.pairValues and p.types from the input previously
// set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
// separator (bidi class B).
//
// The function p.Order() needs these values to be set, so this preparation could be postponed.
// But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
// separator, the whole input needs to be processed anyway and should not be done twice.
//
// The function has the same return values as SetBytes() / SetString()
func (p *Paragraph) prepareInput() (n int, err error) {
p.runes = bytes.Runes(p.p)
bytecount := 0
// clear slices from previous SetString or SetBytes
p.pairTypes = nil
p.pairValues = nil
p.types = nil

for _, r := range p.runes {
props, i := LookupRune(r)
bytecount += i
cls := props.Class()
if cls == B {
return bytecount, nil
}
p.types = append(p.types, cls)
if props.IsOpeningBracket() {
p.pairTypes = append(p.pairTypes, bpOpen)
p.pairValues = append(p.pairValues, r)
} else if props.IsBracket() {
// this must be a closing bracket,
// since IsOpeningBracket is not true
p.pairTypes = append(p.pairTypes, bpClose)
p.pairValues = append(p.pairValues, r)
} else {
p.pairTypes = append(p.pairTypes, bpNone)
p.pairValues = append(p.pairValues, 0)
}
}
return bytecount, nil
}

// SetBytes configures p for the given paragraph text. It replaces text
Expand All @@ -80,70 +131,150 @@ type Paragraph struct {
// consumed from b including this separator. Error may be non-nil if options are
// given.
func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
panic("unimplemented")
p.p = b
p.opts = opts
return p.prepareInput()
}

// SetString configures p for the given paragraph text. It replaces text
// previously set by SetBytes or SetString. If b contains a paragraph separator
// SetString configures s for the given paragraph text. It replaces text
// previously set by SetBytes or SetString. If s contains a paragraph separator
// it will only process the first paragraph and report the number of bytes
// consumed from b including this separator. Error may be non-nil if options are
// consumed from s including this separator. Error may be non-nil if options are
// given.
func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
panic("unimplemented")
p.p = []byte(s)
p.opts = opts
return p.prepareInput()
}

// IsLeftToRight reports whether the principle direction of rendering for this
// paragraphs is left-to-right. If this returns false, the principle direction
// of rendering is right-to-left.
func (p *Paragraph) IsLeftToRight() bool {
panic("unimplemented")
return p.Direction() == LeftToRight
}

// Direction returns the direction of the text of this paragraph.
//
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
func (p *Paragraph) Direction() Direction {
panic("unimplemented")
return p.o.Direction()
}

// TODO: what happens if the position is > len(input)? This should return an error.

// RunAt reports the Run at the given position of the input text.
//
// This method can be used for computing line breaks on paragraphs.
func (p *Paragraph) RunAt(pos int) Run {
panic("unimplemented")
c := 0
runNumber := 0
for i, r := range p.o.runes {
c += len(r)
if pos < c {
runNumber = i
}
}
return p.o.Run(runNumber)
}

func calculateOrdering(levels []level, runes []rune) Ordering {
var curDir Direction

prevDir := Neutral
prevI := 0

o := Ordering{}
// lvl = 0,2,4,...: left to right
// lvl = 1,3,5,...: right to left
for i, lvl := range levels {
if lvl%2 == 0 {
curDir = LeftToRight
} else {
curDir = RightToLeft
}
if curDir != prevDir {
if i > 0 {
o.runes = append(o.runes, runes[prevI:i])
o.directions = append(o.directions, prevDir)
o.startpos = append(o.startpos, prevI)
}
prevI = i
prevDir = curDir
}
}
o.runes = append(o.runes, runes[prevI:])
o.directions = append(o.directions, prevDir)
o.startpos = append(o.startpos, prevI)
return o
}

// Order computes the visual ordering of all the runs in a Paragraph.
func (p *Paragraph) Order() (Ordering, error) {
panic("unimplemented")
if len(p.types) == 0 {
return Ordering{}, nil
}

for _, fn := range p.opts {
fn(&p.options)
}
lvl := level(-1)
if p.options.defaultDirection == RightToLeft {
lvl = 1
}
para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl)
if err != nil {
return Ordering{}, err
}

levels := para.getLevels([]int{len(p.types)})

p.o = calculateOrdering(levels, p.runes)
return p.o, nil
}

// Line computes the visual ordering of runs for a single line starting and
// ending at the given positions in the original text.
func (p *Paragraph) Line(start, end int) (Ordering, error) {
panic("unimplemented")
lineTypes := p.types[start:end]
para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
if err != nil {
return Ordering{}, err
}
levels := para.getLevels([]int{len(lineTypes)})
o := calculateOrdering(levels, p.runes[start:end])
return o, nil
}

// An Ordering holds the computed visual order of runs of a Paragraph. Calling
// SetBytes or SetString on the originating Paragraph invalidates an Ordering.
// The methods of an Ordering should only be called by one goroutine at a time.
type Ordering struct{}
type Ordering struct {
runes [][]rune
directions []Direction
startpos []int
}

// Direction reports the directionality of the runs.
//
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
func (o *Ordering) Direction() Direction {
panic("unimplemented")
return o.directions[0]
}

// NumRuns returns the number of runs.
func (o *Ordering) NumRuns() int {
panic("unimplemented")
return len(o.runes)
}

// Run returns the ith run within the ordering.
func (o *Ordering) Run(i int) Run {
panic("unimplemented")
r := Run{
runes: o.runes[i],
direction: o.directions[i],
startpos: o.startpos[i],
}
return r
}

// TODO: perhaps with options.
Expand All @@ -155,16 +286,19 @@ func (o *Ordering) Run(i int) Run {

// A Run is a continuous sequence of characters of a single direction.
type Run struct {
runes []rune
direction Direction
startpos int
}

// String returns the text of the run in its original order.
func (r *Run) String() string {
panic("unimplemented")
return string(r.runes)
}

// Bytes returns the text of the run in its original order.
func (r *Run) Bytes() []byte {
panic("unimplemented")
return []byte(r.String())
}

// TODO: methods for
Expand All @@ -174,25 +308,52 @@ func (r *Run) Bytes() []byte {

// Direction reports the direction of the run.
func (r *Run) Direction() Direction {
panic("unimplemented")
return r.direction
}

// Position of the Run within the text passed to SetBytes or SetString of the
// Pos returns the position of the Run within the text passed to SetBytes or SetString of the
// originating Paragraph value.
func (r *Run) Pos() (start, end int) {
panic("unimplemented")
return r.startpos, r.startpos + len(r.runes) - 1
}

// AppendReverse reverses the order of characters of in, appends them to out,
// and returns the result. Modifiers will still follow the runes they modify.
// Brackets are replaced with their counterparts.
func AppendReverse(out, in []byte) []byte {
panic("unimplemented")
ret := make([]byte, len(in)+len(out))
copy(ret, out)
inRunes := bytes.Runes(in)

for i, r := range inRunes {
prop, _ := LookupRune(r)
if prop.IsBracket() {
inRunes[i] = prop.reverseBracket(r)
}
}

for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
}
copy(ret[len(out):], string(inRunes))

return ret
}

// ReverseString reverses the order of characters in s and returns a new string.
// Modifiers will still follow the runes they modify. Brackets are replaced with
// their counterparts.
func ReverseString(s string) string {
panic("unimplemented")
input := []rune(s)
li := len(input)
ret := make([]rune, li)
for i, r := range input {
prop, _ := LookupRune(r)
if prop.IsBracket() {
ret[li-i-1] = prop.reverseBracket(r)
} else {
ret[li-i-1] = r
}
}
return string(ret)
}
Loading

0 comments on commit 967b8f6

Please sign in to comment.