From a9d82a55820622ba0d3c1dde238b582387b607cb Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Mon, 16 Aug 2021 14:27:59 +0800 Subject: [PATCH] charset: clean up some code about charset (#1306) --- charset/charset.go | 99 ++++++++++++++--------------------------- charset/charset_test.go | 15 ++----- digester.go | 2 +- misc.go | 4 +- parser.go | 8 ++-- parser.y | 8 ++-- 6 files changed, 49 insertions(+), 87 deletions(-) diff --git a/charset/charset.go b/charset/charset.go index 69a669235..68bf9e53c 100644 --- a/charset/charset.go +++ b/charset/charset.go @@ -45,19 +45,17 @@ type Collation struct { IsDefault bool } -var charsets = make(map[string]*Charset) var collationsIDMap = make(map[int]*Collation) var collationsNameMap = make(map[string]*Collation) -var descs = make([]*Desc, 0, len(charsetInfos)) var supportedCollations = make([]*Collation, 0, len(supportedCollationNames)) // All the supported charsets should be in the following table. -var charsetInfos = []*Charset{ - {CharsetUTF8, CollationUTF8, make(map[string]*Collation), "UTF-8 Unicode", 3}, - {CharsetUTF8MB4, CollationUTF8MB4, make(map[string]*Collation), "UTF-8 Unicode", 4}, - {CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1}, - {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "Latin1", 1}, - {CharsetBin, CollationBin, make(map[string]*Collation), "binary", 1}, +var charsetInfos = map[string]*Charset{ + CharsetUTF8: {CharsetUTF8, CollationUTF8, make(map[string]*Collation), "UTF-8 Unicode", 3}, + CharsetUTF8MB4: {CharsetUTF8MB4, CollationUTF8MB4, make(map[string]*Collation), "UTF-8 Unicode", 4}, + CharsetASCII: {CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1}, + CharsetLatin1: {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "Latin1", 1}, + CharsetBin: {CharsetBin, CollationBin, make(map[string]*Collation), "binary", 1}, } // All the names supported collations should be in the following table. @@ -69,17 +67,14 @@ var supportedCollationNames = map[string]struct{}{ CollationBin: {}, } -// Desc is a charset description. -type Desc struct { - Name string - Desc string - DefaultCollation string - Maxlen int -} - // GetSupportedCharsets gets descriptions for all charsets supported so far. -func GetSupportedCharsets() []*Desc { - return descs +func GetSupportedCharsets() []*Charset { + charsets := make([]*Charset, 0, len(charsetInfos)) + for _, ch := range charsetInfos { + charsets = append(charsets, ch) + } + + return charsets } // GetSupportedCollations gets information for all collations supported so far. @@ -94,9 +89,8 @@ func ValidCharsetAndCollation(cs string, co string) bool { if cs == "" { cs = "utf8" } - cs = strings.ToLower(cs) - c, ok := charsets[cs] - if !ok { + chs, err := GetCharsetInfo(cs) + if err != nil { return false } @@ -104,21 +98,17 @@ func ValidCharsetAndCollation(cs string, co string) bool { return true } co = strings.ToLower(co) - _, ok = c.Collations[co] + _, ok := chs.Collations[co] return ok } // GetDefaultCollation returns the default collation for charset. func GetDefaultCollation(charset string) (string, error) { - charset = strings.ToLower(charset) - if charset == CharsetBin { - return CollationBin, nil - } - c, ok := charsets[charset] - if !ok { - return "", errors.Errorf("Unknown charset %s", charset) + cs, err := GetCharsetInfo(charset) + if err != nil { + return "", err } - return c.DefaultCollation, nil + return cs.DefaultCollation, nil } // GetDefaultCharsetAndCollate returns the default charset and collation. @@ -127,30 +117,12 @@ func GetDefaultCharsetAndCollate() (string, string) { } // GetCharsetInfo returns charset and collation for cs as name. -func GetCharsetInfo(cs string) (string, string, error) { - c, ok := charsets[strings.ToLower(cs)] - if !ok { - return "", "", errors.Errorf("Unknown charset %s", cs) +func GetCharsetInfo(cs string) (*Charset, error) { + if c, ok := charsetInfos[strings.ToLower(cs)]; ok { + return c, nil } - return c.Name, c.DefaultCollation, nil -} -// GetCharsetDesc gets charset descriptions in the local charsets. -func GetCharsetDesc(cs string) (*Desc, error) { - switch strings.ToLower(cs) { - case CharsetUTF8: - return descs[0], nil - case CharsetUTF8MB4: - return descs[1], nil - case CharsetASCII: - return descs[2], nil - case CharsetLatin1: - return descs[3], nil - case CharsetBin: - return descs[4], nil - default: - return nil, errors.Errorf("Unknown charset %s", cs) - } + return nil, errors.Errorf("Unknown charset %s", cs) } // GetCharsetInfoByID returns charset and collation for id as cs_number. @@ -209,6 +181,8 @@ const ( // CollationLatin1 is the default collation for CharsetLatin1. CollationLatin1 = "latin1_bin" + CollationGBKBin = "gbk_bin" + CharsetARMSCII8 = "armscii8" CharsetBig5 = "big5" CharsetBinary = "binary" @@ -475,14 +449,13 @@ var collations = []*Collation{ // AddCharset adds a new charset. // Use only when adding a custom charset to the parser. func AddCharset(c *Charset) { - charsets[c.Name] = c - desc := &Desc{ - Name: c.Name, - DefaultCollation: c.DefaultCollation, - Desc: c.Desc, - Maxlen: c.Maxlen, - } - descs = append(descs, desc) + charsetInfos[c.Name] = c +} + +// RemoveCharset remove a charset. +// Use only when adding a custom charset to the parser. +func RemoveCharset(c string) { + delete(charsetInfos, c) } // AddCollation adds a new collation. @@ -495,17 +468,13 @@ func AddCollation(c *Collation) { supportedCollations = append(supportedCollations, c) } - if charset, ok := charsets[c.CharsetName]; ok { + if charset, ok := charsetInfos[c.CharsetName]; ok { charset.Collations[c.Name] = c } } // init method always puts to the end of file. func init() { - for _, c := range charsetInfos { - AddCharset(c) - } - for _, c := range collations { AddCollation(c) } diff --git a/charset/charset_test.go b/charset/charset_test.go index 3c95a3893..fe562d5eb 100644 --- a/charset/charset_test.go +++ b/charset/charset_test.go @@ -77,13 +77,6 @@ func (s *testCharsetSuite) TestValidCustomCharset(c *C) { } } -func (s *testCharsetSuite) TestGetSupportedCharsets(c *C) { - charset := &Charset{"test", "test_bin", nil, "Test", 5} - charsetInfos = append(charsetInfos, charset) - descs := GetSupportedCharsets() - c.Assert(len(descs), Equals, len(charsetInfos)-1) -} - func testGetDefaultCollation(c *C, charset string, expectCollation string, succ bool) { b, err := GetDefaultCollation(charset) if !succ { @@ -116,13 +109,13 @@ func (s *testCharsetSuite) TestGetDefaultCollation(c *C) { charset_num := 0 for _, collate := range collations { if collate.IsDefault { - if desc, ok := charsets[collate.CharsetName]; ok { + if desc, ok := charsetInfos[collate.CharsetName]; ok { c.Assert(collate.Name, Equals, desc.DefaultCollation) charset_num += 1 } } } - c.Assert(charset_num, Equals, len(charsets)) + c.Assert(charset_num, Equals, len(charsetInfos)) } func (s *testCharsetSuite) TestSupportedCollations(c *C) { @@ -159,7 +152,7 @@ func (s *testCharsetSuite) TestGetCharsetDesc(c *C) { {"", "utf8_bin", false}, } for _, tt := range tests { - desc, err := GetCharsetDesc(tt.cs) + desc, err := GetCharsetInfo(tt.cs) if !tt.succ { c.Assert(err, NotNil) } else { @@ -187,6 +180,6 @@ func BenchmarkGetCharsetDesc(b *testing.B) { cs := charsets[index] for i := 0; i < b.N; i++ { - GetCharsetDesc(cs) + GetCharsetInfo(cs) } } diff --git a/digester.go b/digester.go index d682e539b..4b43cefcd 100644 --- a/digester.go +++ b/digester.go @@ -186,7 +186,7 @@ func (d *sqlDigester) normalize(sql string) { if currTok.tok == identifier { if strings.HasPrefix(currTok.lit, "_") { - _, _, err := charset.GetCharsetInfo(currTok.lit[1:]) + _, err := charset.GetCharsetInfo(currTok.lit[1:]) if err == nil { currTok.tok = underscoreCS goto APPEND diff --git a/misc.go b/misc.go index be3b1e6ad..6ad117c2b 100644 --- a/misc.go +++ b/misc.go @@ -990,10 +990,10 @@ func handleIdent(lval *yySymType) int { if !strings.HasPrefix(s, "_") { return identifier } - cs, _, err := charset.GetCharsetInfo(s[1:]) + cs, err := charset.GetCharsetInfo(s[1:]) if err != nil { return identifier } - lval.ident = cs + lval.ident = cs.Name return underscoreCS } diff --git a/parser.go b/parser.go index f680d48ce..d1aeb9254 100644 --- a/parser.go +++ b/parser.go @@ -17521,14 +17521,14 @@ yynewstate: case 1688: { // Validate input charset name to keep the same behavior as parser of MySQL. - name, _, err := charset.GetCharsetInfo(yyS[yypt-0].ident) + cs, err := charset.GetCharsetInfo(yyS[yypt-0].ident) if err != nil { yylex.AppendError(ErrUnknownCharacterSet.GenWithStackByArgs(yyS[yypt-0].ident)) return 1 } // Use charset name returned from charset.GetCharsetInfo(), // to keep lower case of input for generated column restore. - parser.yyVAL.ident = name + parser.yyVAL.ident = cs.Name } case 1689: { @@ -19232,14 +19232,14 @@ yynewstate: } case 2118: { - name, _, err := charset.GetCharsetInfo("ucs2") + cs, err := charset.GetCharsetInfo("ucs2") if err != nil { yylex.AppendError(ErrUnknownCharacterSet.GenWithStackByArgs("ucs2")) return 1 } parser.yyVAL.item = &ast.OptBinary{ IsBinary: false, - Charset: name, + Charset: cs.Name, } } case 2119: diff --git a/parser.y b/parser.y index 5070ff9a9..d61fa71ae 100644 --- a/parser.y +++ b/parser.y @@ -9611,14 +9611,14 @@ CharsetName: StringName { // Validate input charset name to keep the same behavior as parser of MySQL. - name, _, err := charset.GetCharsetInfo($1) + cs, err := charset.GetCharsetInfo($1) if err != nil { yylex.AppendError(ErrUnknownCharacterSet.GenWithStackByArgs($1)) return 1 } // Use charset name returned from charset.GetCharsetInfo(), // to keep lower case of input for generated column restore. - $$ = name + $$ = cs.Name } | binaryType { @@ -11622,14 +11622,14 @@ OptCharsetWithOptBinary: } | "UNICODE" { - name, _, err := charset.GetCharsetInfo("ucs2") + cs, err := charset.GetCharsetInfo("ucs2") if err != nil { yylex.AppendError(ErrUnknownCharacterSet.GenWithStackByArgs("ucs2")) return 1 } $$ = &ast.OptBinary{ IsBinary: false, - Charset: name, + Charset: cs.Name, } } | "BYTE"