Skip to content

Commit

Permalink
expression: fix charset conversion warning and error behavior (#51191) (
Browse files Browse the repository at this point in the history
#53227)

close #50295
  • Loading branch information
ti-chi-bot authored May 15, 2024
1 parent 2a04b9c commit d774c73
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 18 deletions.
4 changes: 2 additions & 2 deletions pkg/expression/builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ func newBaseBuiltinFuncWithTp(ctx BuildContext, funcName string, args []Expressi
args[i] = WrapWithCastAsDecimal(ctx, args[i])
case types.ETString:
args[i] = WrapWithCastAsString(ctx, args[i])
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName)
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName, false)
case types.ETDatetime:
args[i] = WrapWithCastAsTime(ctx, args[i], types.NewFieldType(mysql.TypeDatetime))
case types.ETTimestamp:
Expand Down Expand Up @@ -253,7 +253,7 @@ func newBaseBuiltinFuncWithFieldTypes(ctx BuildContext, funcName string, args []
args[i] = WrapWithCastAsReal(ctx, args[i])
case types.ETString:
args[i] = WrapWithCastAsString(ctx, args[i])
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName)
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName, false)
case types.ETJson:
args[i] = WrapWithCastAsJSON(ctx, args[i])
// https://github.com/pingcap/tidb/issues/44196
Expand Down
2 changes: 1 addition & 1 deletion pkg/expression/builtin_cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ func (c *castAsStringFunctionClass) getFunction(ctx BuildContext, args []Express
case types.ETString:
// When cast from binary to some other charsets, we should check if the binary is valid or not.
// so we build a from_binary function to do this check.
bf.args[0] = HandleBinaryLiteral(ctx, args[0], &ExprCollation{Charset: c.tp.GetCharset(), Collation: c.tp.GetCollate()}, c.funcName)
bf.args[0] = HandleBinaryLiteral(ctx, args[0], &ExprCollation{Charset: c.tp.GetCharset(), Collation: c.tp.GetCollate()}, c.funcName, true)
sig = &builtinCastStringAsStringSig{bf}
sig.setPbCode(tipb.ScalarFuncSig_CastStringAsString)
default:
Expand Down
11 changes: 7 additions & 4 deletions pkg/expression/builtin_cast_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,7 @@ func TestWrapWithCastAsString(t *testing.T) {

cases := []struct {
expr Expression
err bool
warn bool
ret string
}{
{
Expand Down Expand Up @@ -1432,11 +1432,14 @@ func TestWrapWithCastAsString(t *testing.T) {
"-127",
},
}
lastWarningLen := 0
for _, c := range cases {
expr := BuildCastFunction(ctx, c.expr, types.NewFieldType(mysql.TypeVarString))
res, _, err := expr.EvalString(ctx, chunk.Row{})
if c.err {
require.Error(t, err)
res, _, _ := expr.EvalString(ctx, chunk.Row{})
if c.warn {
warns := ctx.GetSessionVars().StmtCtx.GetWarnings()
require.Greater(t, len(warns), lastWarningLen)
lastWarningLen = len(warns)
} else {
require.Equal(t, c.ret, res)
}
Expand Down
46 changes: 38 additions & 8 deletions pkg/expression/builtin_convert_charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ func (b *builtinInternalToBinarySig) vecEvalString(ctx EvalContext, input *chunk
type tidbFromBinaryFunctionClass struct {
baseFunctionClass

tp *types.FieldType
tp *types.FieldType
cannotConvertStringAsWarning bool
}

func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) {
Expand All @@ -150,7 +151,7 @@ func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expre
return nil, err
}
bf.tp = c.tp
sig = &builtinInternalFromBinarySig{bf}
sig = &builtinInternalFromBinarySig{bf, c.cannotConvertStringAsWarning}
sig.setPbCode(tipb.ScalarFuncSig_FromBinary)
default:
return nil, fmt.Errorf("unexpected argTp: %d", argTp)
Expand All @@ -160,6 +161,9 @@ func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expre

type builtinInternalFromBinarySig struct {
baseBuiltinFunc

// TODO: also pass this field when pushing down this function. The behavior of TiDB and TiKV is different on this function now.
cannotConvertStringAsWarning bool
}

func (b *builtinInternalFromBinarySig) Clone() builtinFunc {
Expand All @@ -179,8 +183,20 @@ func (b *builtinInternalFromBinarySig) evalString(ctx EvalContext, row chunk.Row
if err != nil {
strHex := formatInvalidChars(valBytes)
err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())

if b.cannotConvertStringAsWarning {
tc := typeCtx(ctx)
tc.AppendWarning(err)
if sqlMode(ctx).HasStrictMode() {
return "", true, nil
}

return string(ret), false, nil
}

return "", false, err
}
return string(ret), false, err
return string(ret), false, nil
}

func (b *builtinInternalFromBinarySig) vectorized() bool {
Expand Down Expand Up @@ -209,7 +225,21 @@ func (b *builtinInternalFromBinarySig) vecEvalString(ctx EvalContext, input *chu
val, err := enc.Transform(encodedBuf, str, charset.OpDecode)
if err != nil {
strHex := formatInvalidChars(str)
return errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())
err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())

if b.cannotConvertStringAsWarning {
tc := typeCtx(ctx)
tc.AppendWarning(err)
if sqlMode(ctx).HasStrictMode() {
result.AppendNull()
continue
}

result.AppendBytes(str)
continue
}

return err
}
result.AppendBytes(val)
}
Expand All @@ -232,8 +262,8 @@ func BuildToBinaryFunction(ctx BuildContext, expr Expression) (res Expression) {
}

// BuildFromBinaryFunction builds from_binary function.
func BuildFromBinaryFunction(ctx BuildContext, expr Expression, tp *types.FieldType) (res Expression) {
fc := &tidbFromBinaryFunctionClass{baseFunctionClass{InternalFuncFromBinary, 1, 1}, tp}
func BuildFromBinaryFunction(ctx BuildContext, expr Expression, tp *types.FieldType, cannotConvertStringAsWarning bool) (res Expression) {
fc := &tidbFromBinaryFunctionClass{baseFunctionClass{InternalFuncFromBinary, 1, 1}, tp, cannotConvertStringAsWarning}
f, err := fc.getFunction(ctx, []Expression{expr})
if err != nil {
return expr
Expand Down Expand Up @@ -305,7 +335,7 @@ func init() {
}

// HandleBinaryLiteral wraps `expr` with to_binary or from_binary sig.
func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, funcName string) Expression {
func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, funcName string, explicitCast bool) Expression {
argChs, dstChs := expr.GetType().GetCharset(), ec.Charset
switch convertFuncsMap[funcName] {
case funcPropNone:
Expand All @@ -326,7 +356,7 @@ func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, f
ft := expr.GetType().Clone()
ft.SetCharset(ec.Charset)
ft.SetCollate(ec.Collation)
return BuildFromBinaryFunction(ctx, expr, ft)
return BuildFromBinaryFunction(ctx, expr, ft, explicitCast)
}
}
return expr
Expand Down
3 changes: 2 additions & 1 deletion pkg/expression/distsql_builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,8 @@ func getSignatureByPB(ctx BuildContext, sigCode tipb.ScalarFuncSig, tp *tipb.Fie
case tipb.ScalarFuncSig_ToBinary:
f = &builtinInternalToBinarySig{base}
case tipb.ScalarFuncSig_FromBinary:
f = &builtinInternalFromBinarySig{base}
// TODO: set the `cannotConvertStringAsWarning` accordingly
f = &builtinInternalFromBinarySig{base, false}

default:
e = ErrFunctionNotExists.GenWithStackByArgs("FUNCTION", sigCode)
Expand Down
2 changes: 1 addition & 1 deletion pkg/expression/scalar_function.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ func newFunctionImpl(ctx BuildContext, fold int, funcName string, retType *types
case ast.GetVar:
return BuildGetVarFunction(ctx, args[0], retType)
case InternalFuncFromBinary:
return BuildFromBinaryFunction(ctx, args[0], retType), nil
return BuildFromBinaryFunction(ctx, args[0], retType, false), nil
case InternalFuncToBinary:
return BuildToBinaryFunction(ctx, args[0]), nil
case ast.Sysdate:
Expand Down
12 changes: 12 additions & 0 deletions tests/integrationtest/r/expression/charset_and_collation.result
Original file line number Diff line number Diff line change
Expand Up @@ -1991,6 +1991,18 @@ LOCATE('bar' collate utf8mb4_0900_ai_ci, 'FOOBAR' collate utf8mb4_0900_ai_ci)
select 'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci;
'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci
1
select cast(compress('b') as char);
cast(compress('b') as char)
NULL
Level Code Message
Warning 3854 Cannot convert string 'x\x9C...' from binary to utf8mb4
set sql_mode='';
select cast(compress('b') as char);
cast(compress('b') as char)
x
Level Code Message
Warning 3854 Cannot convert string 'x\x9C...' from binary to utf8mb4
set sql_mode=DEFAULT;
set names utf8mb4 collate utf8mb4_0900_ai_ci;
select reverse(cast('[]' as json)) between 'W' and 'm';
reverse(cast('[]' as json)) between 'W' and 'm'
Expand Down
10 changes: 9 additions & 1 deletion tests/integrationtest/t/expression/charset_and_collation.test
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,14 @@ select min(id) from t group by str order by str;
select LOCATE('bar' collate utf8mb4_0900_ai_ci, 'FOOBAR' collate utf8mb4_0900_ai_ci);
select 'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci;

# TestConvertCharsetFromBinary
--enable_warnings
select cast(compress('b') as char);
set sql_mode='';
select cast(compress('b') as char);
--disable_warnings
set sql_mode=DEFAULT;

# TestCollationWithJSONArg
set names utf8mb4 collate utf8mb4_0900_ai_ci;
select reverse(cast('[]' as json)) between 'W' and 'm';
Expand All @@ -838,4 +846,4 @@ select insert('[]', 0, 100, cast('[]' as json)) between 'W' and 'm';
select substr(cast('[]' as json), 1) between 'W' and 'm';
select repeat(cast('[]' as json), 10) between 'W' and 'm';
select export_set(3,cast('[]' as json),'2','-',8) between 'W' and 'm';
set names default;
set names default;

0 comments on commit d774c73

Please sign in to comment.