Skip to content

Commit

Permalink
charset: clean up some code about charset (#1306)
Browse files Browse the repository at this point in the history
  • Loading branch information
xiongjiwei authored Aug 16, 2021
1 parent b4bbfae commit a9d82a5
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 87 deletions.
99 changes: 34 additions & 65 deletions charset/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,17 @@ type Collation struct {
IsDefault bool
}

var charsets = make(map[string]*Charset)
var collationsIDMap = make(map[int]*Collation)
var collationsNameMap = make(map[string]*Collation)
var descs = make([]*Desc, 0, len(charsetInfos))
var supportedCollations = make([]*Collation, 0, len(supportedCollationNames))

// All the supported charsets should be in the following table.
var charsetInfos = []*Charset{
{CharsetUTF8, CollationUTF8, make(map[string]*Collation), "UTF-8 Unicode", 3},
{CharsetUTF8MB4, CollationUTF8MB4, make(map[string]*Collation), "UTF-8 Unicode", 4},
{CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1},
{CharsetLatin1, CollationLatin1, make(map[string]*Collation), "Latin1", 1},
{CharsetBin, CollationBin, make(map[string]*Collation), "binary", 1},
var charsetInfos = map[string]*Charset{
CharsetUTF8: {CharsetUTF8, CollationUTF8, make(map[string]*Collation), "UTF-8 Unicode", 3},
CharsetUTF8MB4: {CharsetUTF8MB4, CollationUTF8MB4, make(map[string]*Collation), "UTF-8 Unicode", 4},
CharsetASCII: {CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1},
CharsetLatin1: {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "Latin1", 1},
CharsetBin: {CharsetBin, CollationBin, make(map[string]*Collation), "binary", 1},
}

// All the names supported collations should be in the following table.
Expand All @@ -69,17 +67,14 @@ var supportedCollationNames = map[string]struct{}{
CollationBin: {},
}

// Desc is a charset description.
type Desc struct {
Name string
Desc string
DefaultCollation string
Maxlen int
}

// GetSupportedCharsets gets descriptions for all charsets supported so far.
func GetSupportedCharsets() []*Desc {
return descs
func GetSupportedCharsets() []*Charset {
charsets := make([]*Charset, 0, len(charsetInfos))
for _, ch := range charsetInfos {
charsets = append(charsets, ch)
}

return charsets
}

// GetSupportedCollations gets information for all collations supported so far.
Expand All @@ -94,31 +89,26 @@ func ValidCharsetAndCollation(cs string, co string) bool {
if cs == "" {
cs = "utf8"
}
cs = strings.ToLower(cs)
c, ok := charsets[cs]
if !ok {
chs, err := GetCharsetInfo(cs)
if err != nil {
return false
}

if co == "" {
return true
}
co = strings.ToLower(co)
_, ok = c.Collations[co]
_, ok := chs.Collations[co]
return ok
}

// GetDefaultCollation returns the default collation for charset.
func GetDefaultCollation(charset string) (string, error) {
charset = strings.ToLower(charset)
if charset == CharsetBin {
return CollationBin, nil
}
c, ok := charsets[charset]
if !ok {
return "", errors.Errorf("Unknown charset %s", charset)
cs, err := GetCharsetInfo(charset)
if err != nil {
return "", err
}
return c.DefaultCollation, nil
return cs.DefaultCollation, nil
}

// GetDefaultCharsetAndCollate returns the default charset and collation.
Expand All @@ -127,30 +117,12 @@ func GetDefaultCharsetAndCollate() (string, string) {
}

// GetCharsetInfo returns charset and collation for cs as name.
func GetCharsetInfo(cs string) (string, string, error) {
c, ok := charsets[strings.ToLower(cs)]
if !ok {
return "", "", errors.Errorf("Unknown charset %s", cs)
func GetCharsetInfo(cs string) (*Charset, error) {
if c, ok := charsetInfos[strings.ToLower(cs)]; ok {
return c, nil
}
return c.Name, c.DefaultCollation, nil
}

// GetCharsetDesc gets charset descriptions in the local charsets.
func GetCharsetDesc(cs string) (*Desc, error) {
switch strings.ToLower(cs) {
case CharsetUTF8:
return descs[0], nil
case CharsetUTF8MB4:
return descs[1], nil
case CharsetASCII:
return descs[2], nil
case CharsetLatin1:
return descs[3], nil
case CharsetBin:
return descs[4], nil
default:
return nil, errors.Errorf("Unknown charset %s", cs)
}
return nil, errors.Errorf("Unknown charset %s", cs)
}

// GetCharsetInfoByID returns charset and collation for id as cs_number.
Expand Down Expand Up @@ -209,6 +181,8 @@ const (
// CollationLatin1 is the default collation for CharsetLatin1.
CollationLatin1 = "latin1_bin"

CollationGBKBin = "gbk_bin"

CharsetARMSCII8 = "armscii8"
CharsetBig5 = "big5"
CharsetBinary = "binary"
Expand Down Expand Up @@ -475,14 +449,13 @@ var collations = []*Collation{
// AddCharset adds a new charset.
// Use only when adding a custom charset to the parser.
func AddCharset(c *Charset) {
charsets[c.Name] = c
desc := &Desc{
Name: c.Name,
DefaultCollation: c.DefaultCollation,
Desc: c.Desc,
Maxlen: c.Maxlen,
}
descs = append(descs, desc)
charsetInfos[c.Name] = c
}

// RemoveCharset remove a charset.
// Use only when adding a custom charset to the parser.
func RemoveCharset(c string) {
delete(charsetInfos, c)
}

// AddCollation adds a new collation.
Expand All @@ -495,17 +468,13 @@ func AddCollation(c *Collation) {
supportedCollations = append(supportedCollations, c)
}

if charset, ok := charsets[c.CharsetName]; ok {
if charset, ok := charsetInfos[c.CharsetName]; ok {
charset.Collations[c.Name] = c
}
}

// init method always puts to the end of file.
func init() {
for _, c := range charsetInfos {
AddCharset(c)
}

for _, c := range collations {
AddCollation(c)
}
Expand Down
15 changes: 4 additions & 11 deletions charset/charset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,6 @@ func (s *testCharsetSuite) TestValidCustomCharset(c *C) {
}
}

func (s *testCharsetSuite) TestGetSupportedCharsets(c *C) {
charset := &Charset{"test", "test_bin", nil, "Test", 5}
charsetInfos = append(charsetInfos, charset)
descs := GetSupportedCharsets()
c.Assert(len(descs), Equals, len(charsetInfos)-1)
}

func testGetDefaultCollation(c *C, charset string, expectCollation string, succ bool) {
b, err := GetDefaultCollation(charset)
if !succ {
Expand Down Expand Up @@ -116,13 +109,13 @@ func (s *testCharsetSuite) TestGetDefaultCollation(c *C) {
charset_num := 0
for _, collate := range collations {
if collate.IsDefault {
if desc, ok := charsets[collate.CharsetName]; ok {
if desc, ok := charsetInfos[collate.CharsetName]; ok {
c.Assert(collate.Name, Equals, desc.DefaultCollation)
charset_num += 1
}
}
}
c.Assert(charset_num, Equals, len(charsets))
c.Assert(charset_num, Equals, len(charsetInfos))
}

func (s *testCharsetSuite) TestSupportedCollations(c *C) {
Expand Down Expand Up @@ -159,7 +152,7 @@ func (s *testCharsetSuite) TestGetCharsetDesc(c *C) {
{"", "utf8_bin", false},
}
for _, tt := range tests {
desc, err := GetCharsetDesc(tt.cs)
desc, err := GetCharsetInfo(tt.cs)
if !tt.succ {
c.Assert(err, NotNil)
} else {
Expand Down Expand Up @@ -187,6 +180,6 @@ func BenchmarkGetCharsetDesc(b *testing.B) {
cs := charsets[index]

for i := 0; i < b.N; i++ {
GetCharsetDesc(cs)
GetCharsetInfo(cs)
}
}
2 changes: 1 addition & 1 deletion digester.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ func (d *sqlDigester) normalize(sql string) {

if currTok.tok == identifier {
if strings.HasPrefix(currTok.lit, "_") {
_, _, err := charset.GetCharsetInfo(currTok.lit[1:])
_, err := charset.GetCharsetInfo(currTok.lit[1:])
if err == nil {
currTok.tok = underscoreCS
goto APPEND
Expand Down
4 changes: 2 additions & 2 deletions misc.go
Original file line number Diff line number Diff line change
Expand Up @@ -990,10 +990,10 @@ func handleIdent(lval *yySymType) int {
if !strings.HasPrefix(s, "_") {
return identifier
}
cs, _, err := charset.GetCharsetInfo(s[1:])
cs, err := charset.GetCharsetInfo(s[1:])
if err != nil {
return identifier
}
lval.ident = cs
lval.ident = cs.Name
return underscoreCS
}
8 changes: 4 additions & 4 deletions parser.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -9611,14 +9611,14 @@ CharsetName:
StringName
{
// Validate input charset name to keep the same behavior as parser of MySQL.
name, _, err := charset.GetCharsetInfo($1)
cs, err := charset.GetCharsetInfo($1)
if err != nil {
yylex.AppendError(ErrUnknownCharacterSet.GenWithStackByArgs($1))
return 1
}
// Use charset name returned from charset.GetCharsetInfo(),
// to keep lower case of input for generated column restore.
$$ = name
$$ = cs.Name
}
| binaryType
{
Expand Down Expand Up @@ -11622,14 +11622,14 @@ OptCharsetWithOptBinary:
}
| "UNICODE"
{
name, _, err := charset.GetCharsetInfo("ucs2")
cs, err := charset.GetCharsetInfo("ucs2")
if err != nil {
yylex.AppendError(ErrUnknownCharacterSet.GenWithStackByArgs("ucs2"))
return 1
}
$$ = &ast.OptBinary{
IsBinary: false,
Charset: name,
Charset: cs.Name,
}
}
| "BYTE"
Expand Down

0 comments on commit a9d82a5

Please sign in to comment.