Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions go/mysql/icuregex/internal/icudata/embed.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,17 @@ var Nfkc []byte
// case folding.
// This is used for property checks of characters about composition.
//
//go:embed nfkc_cf.nrm
var NfkcCf []byte
//Unused: go:embed nfkc_cf.nrm
//var NfkcCf []byte

// BrkChar is used for matching against character break
// characters in regular expressions.
//
//go:embed char.brk
var BrkChar []byte
//Unused: go:embed char.brk
//var BrkChar []byte

// BrkWord is used for matching against word break
// characters in regular expressions.
//
//go:embed word.brk
var BrkWord []byte
//Unused: go:embed word.brk
///var BrkWord []byte
125 changes: 125 additions & 0 deletions go/mysql/icuregex/internal/ubidi/loader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
© 2016 and later: Unicode, Inc. and others.
Copyright (C) 2004-2015, International Business Machines Corporation and others.
Copyright 2023 The Vitess Authors.

This file contains code derived from the Unicode Project's ICU library.
License & terms of use for the original code: http://www.unicode.org/copyright.html

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package ubidi

import (
"errors"
"sync"

"vitess.io/vitess/go/mysql/icuregex/internal/icudata"
"vitess.io/vitess/go/mysql/icuregex/internal/udata"
"vitess.io/vitess/go/mysql/icuregex/internal/utrie"
)

var ubidiOnce sync.Once
var ubidi struct {
indexes []int32
trie *utrie.UTrie2
mirrors []uint32
jg []uint8
jg2 []uint8
}

func indexes() []int32 {
loadUBidi()
return ubidi.indexes
}

func trie() *utrie.UTrie2 {
loadUBidi()
return ubidi.trie
}

func mirrors() []uint32 {
loadUBidi()
return ubidi.mirrors
}

func jg() []uint8 {
loadUBidi()
return ubidi.jg
}

func jg2() []uint8 {
loadUBidi()
return ubidi.jg2
}

func loadUBidi() {
ubidiOnce.Do(func() {
b := udata.NewBytes(icudata.UBidi)
if err := readData(b); err != nil {
panic(err)
}
})
}

func readData(bytes *udata.Bytes) error {
err := bytes.ReadHeader(func(info *udata.DataInfo) bool {
return info.DataFormat[0] == 0x42 &&
info.DataFormat[1] == 0x69 &&
info.DataFormat[2] == 0x44 &&
info.DataFormat[3] == 0x69 &&
info.FormatVersion[0] == 2
})
if err != nil {
return err
}

count := int32(bytes.Uint32())
if count < ixTop {
return errors.New("indexes[0] too small in ucase.icu")
}

ubidi.indexes = make([]int32, count)
ubidi.indexes[0] = count

for i := int32(1); i < count; i++ {
ubidi.indexes[i] = int32(bytes.Uint32())
}

ubidi.trie, err = utrie.UTrie2FromBytes(bytes)
if err != nil {
return err
}

expectedTrieLength := ubidi.indexes[ixTrieSize]
trieLength := ubidi.trie.SerializedLength()

if trieLength > expectedTrieLength {
return errors.New("ucase.icu: not enough bytes for the trie")
}

bytes.Skip(expectedTrieLength - trieLength)

if n := ubidi.indexes[ixMirrorLength]; n > 0 {
ubidi.mirrors = bytes.Uint32Slice(n)
}
if n := ubidi.indexes[ixJgLimit] - ubidi.indexes[ixJgStart]; n > 0 {
ubidi.jg = bytes.Uint8Slice(n)
}
if n := ubidi.indexes[ixJgLimit2] - ubidi.indexes[ixJgStart2]; n > 0 {
ubidi.jg2 = bytes.Uint8Slice(n)
}

return nil
}
121 changes: 25 additions & 96 deletions go/mysql/icuregex/internal/ubidi/ubidi.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,6 @@ limitations under the License.

package ubidi

import (
"errors"

"vitess.io/vitess/go/mysql/icuregex/internal/icudata"
"vitess.io/vitess/go/mysql/icuregex/internal/udata"
"vitess.io/vitess/go/mysql/icuregex/internal/utrie"
)

const (
ixIndexTop = iota
ixLength
Expand All @@ -44,72 +36,6 @@ const (
ixTop
)

var ubidi struct {
indexes []int32
trie *utrie.UTrie2
mirrors []uint32
jg []uint8
jg2 []uint8
}

func readData(bytes *udata.Bytes) error {
err := bytes.ReadHeader(func(info *udata.DataInfo) bool {
return info.DataFormat[0] == 0x42 &&
info.DataFormat[1] == 0x69 &&
info.DataFormat[2] == 0x44 &&
info.DataFormat[3] == 0x69 &&
info.FormatVersion[0] == 2
})
if err != nil {
return err
}

count := int32(bytes.Uint32())
if count < ixTop {
return errors.New("indexes[0] too small in ucase.icu")
}

ubidi.indexes = make([]int32, count)
ubidi.indexes[0] = count

for i := int32(1); i < count; i++ {
ubidi.indexes[i] = int32(bytes.Uint32())
}

ubidi.trie, err = utrie.UTrie2FromBytes(bytes)
if err != nil {
return err
}

expectedTrieLength := ubidi.indexes[ixTrieSize]
trieLength := ubidi.trie.SerializedLength()

if trieLength > expectedTrieLength {
return errors.New("ucase.icu: not enough bytes for the trie")
}

bytes.Skip(expectedTrieLength - trieLength)

if n := ubidi.indexes[ixMirrorLength]; n > 0 {
ubidi.mirrors = bytes.Uint32Slice(n)
}
if n := ubidi.indexes[ixJgLimit] - ubidi.indexes[ixJgStart]; n > 0 {
ubidi.jg = bytes.Uint8Slice(n)
}
if n := ubidi.indexes[ixJgLimit2] - ubidi.indexes[ixJgStart2]; n > 0 {
ubidi.jg2 = bytes.Uint8Slice(n)
}

return nil
}

func init() {
b := udata.NewBytes(icudata.UBidi)
if err := readData(b); err != nil {
panic(err)
}
}

const (
/* UBIDI_CLASS_SHIFT=0, */ /* bidi class: 5 bits (4..0) */
jtShift = 5 /* joining type: 3 bits (7..5) */
Expand Down Expand Up @@ -362,22 +288,24 @@ type propertySet interface {

func AddPropertyStarts(sa propertySet) {
/* add the start code point of each same-value range of the trie */
ubidi.trie.Enum(nil, func(start, _ rune, _ uint32) bool {
trie().Enum(nil, func(start, _ rune, _ uint32) bool {
sa.AddRune(start)
return true
})

idxs := indexes()
mrs := mirrors()
/* add the code points from the bidi mirroring table */
length := ubidi.indexes[ixMirrorLength]
length := idxs[ixMirrorLength]
for i := int32(0); i < length; i++ {
c := mirrorCodePoint(rune(ubidi.mirrors[i]))
c := mirrorCodePoint(rune(mrs[i]))
sa.AddRuneRange(c, c+1)
}

/* add the code points from the Joining_Group array where the value changes */
start := ubidi.indexes[ixJgStart]
limit := ubidi.indexes[ixJgLimit]
jgArray := ubidi.jg[:]
start := idxs[ixJgStart]
limit := idxs[ixJgLimit]
jgArray := jg()
for {
prev := uint8(0)
for start < limit {
Expand All @@ -393,11 +321,11 @@ func AddPropertyStarts(sa propertySet) {
/* add the limit code point if the last value was not 0 (it is now start==limit) */
sa.AddRune(limit)
}
if limit == ubidi.indexes[ixJgLimit] {
if limit == idxs[ixJgLimit] {
/* switch to the second Joining_Group range */
start = ubidi.indexes[ixJgStart2]
limit = ubidi.indexes[ixJgLimit2]
jgArray = ubidi.jg2[:]
start = idxs[ixJgStart2]
limit = idxs[ixJgLimit2]
jgArray = jg2()
} else {
break
}
Expand All @@ -417,45 +345,46 @@ func mirrorCodePoint(m rune) rune {
}

func IsJoinControl(c rune) bool {
props := ubidi.trie.Get16(c)
props := trie().Get16(c)
return HasFlag(props, joinControlShift)
}

func JoinType(c rune) JoiningType {
props := ubidi.trie.Get16(c)
props := trie().Get16(c)
return JoiningType((props & jtMask) >> jtShift)
}

func JoinGroup(c rune) JoiningGroup {
start := ubidi.indexes[ixJgStart]
limit := ubidi.indexes[ixJgLimit]
idxs := indexes()
start := idxs[ixJgStart]
limit := idxs[ixJgLimit]
if start <= c && c < limit {
return JoiningGroup(ubidi.jg[c-start])
return JoiningGroup(jg()[c-start])
}
start = ubidi.indexes[ixJgStart2]
limit = ubidi.indexes[ixJgLimit2]
start = idxs[ixJgStart2]
limit = idxs[ixJgLimit2]
if start <= c && c < limit {
return JoiningGroup(ubidi.jg2[c-start])
return JoiningGroup(jg2()[c-start])
}
return JgNoJoiningGroup
}

func IsMirrored(c rune) bool {
props := ubidi.trie.Get16(c)
props := trie().Get16(c)
return HasFlag(props, isMirroredShift)
}

func IsBidiControl(c rune) bool {
props := ubidi.trie.Get16(c)
props := trie().Get16(c)
return HasFlag(props, bidiControlShift)
}

func PairedBracketType(c rune) UPairedBracketType {
props := ubidi.trie.Get16(c)
props := trie().Get16(c)
return UPairedBracketType((props & bptMask) >> bptShift)
}

func Class(c rune) CharDirection {
props := ubidi.trie.Get16(c)
props := trie().Get16(c)
return CharDirection(props & classMask)
}
6 changes: 3 additions & 3 deletions go/mysql/icuregex/internal/ucase/fold.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func FoldRunes(str []rune) []rune {
- U+0130 has no simple case folding (simple-case-folds to itself).
*/
func Fold(c rune) rune {
props := ucase.trie.Get16(c)
props := trie().Get16(c)
if !hasException(props) {
if isUpperOrTitle(props) {
c += getDelta(props)
Expand Down Expand Up @@ -130,7 +130,7 @@ func Fold(c rune) rune {

func FullFolding(c rune) (rune, []uint16) {
result := c
props := ucase.trie.Get16(c)
props := trie().Get16(c)

if !hasException(props) {
if isUpperOrTitle(props) {
Expand Down Expand Up @@ -222,7 +222,7 @@ func getDelta(props uint16) rune {
}

func getExceptions(props uint16) []uint16 {
return ucase.exceptions[props>>4:]
return exceptions()[props>>4:]
}

func hasSlot(flags uint16, idx int32) bool {
Expand Down
Loading