Skip to content

expression: fix charset conversion warning and error behavior (#51191) #53227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pkg/expression/builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ func newBaseBuiltinFuncWithTp(ctx BuildContext, funcName string, args []Expressi
args[i] = WrapWithCastAsDecimal(ctx, args[i])
case types.ETString:
args[i] = WrapWithCastAsString(ctx, args[i])
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName)
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName, false)
case types.ETDatetime:
args[i] = WrapWithCastAsTime(ctx, args[i], types.NewFieldType(mysql.TypeDatetime))
case types.ETTimestamp:
Expand Down Expand Up @@ -253,7 +253,7 @@ func newBaseBuiltinFuncWithFieldTypes(ctx BuildContext, funcName string, args []
args[i] = WrapWithCastAsReal(ctx, args[i])
case types.ETString:
args[i] = WrapWithCastAsString(ctx, args[i])
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName)
args[i] = HandleBinaryLiteral(ctx, args[i], ec, funcName, false)
case types.ETJson:
args[i] = WrapWithCastAsJSON(ctx, args[i])
// https://github.com/pingcap/tidb/issues/44196
Expand Down
2 changes: 1 addition & 1 deletion pkg/expression/builtin_cast.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ func (c *castAsStringFunctionClass) getFunction(ctx BuildContext, args []Express
case types.ETString:
// When cast from binary to some other charsets, we should check if the binary is valid or not.
// so we build a from_binary function to do this check.
bf.args[0] = HandleBinaryLiteral(ctx, args[0], &ExprCollation{Charset: c.tp.GetCharset(), Collation: c.tp.GetCollate()}, c.funcName)
bf.args[0] = HandleBinaryLiteral(ctx, args[0], &ExprCollation{Charset: c.tp.GetCharset(), Collation: c.tp.GetCollate()}, c.funcName, true)
sig = &builtinCastStringAsStringSig{bf}
sig.setPbCode(tipb.ScalarFuncSig_CastStringAsString)
default:
Expand Down
11 changes: 7 additions & 4 deletions pkg/expression/builtin_cast_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1448,7 +1448,7 @@ func TestWrapWithCastAsString(t *testing.T) {

cases := []struct {
expr Expression
err bool
warn bool
ret string
}{
{
Expand Down Expand Up @@ -1487,11 +1487,14 @@ func TestWrapWithCastAsString(t *testing.T) {
"-127",
},
}
lastWarningLen := 0
for _, c := range cases {
expr := BuildCastFunction(ctx, c.expr, types.NewFieldType(mysql.TypeVarString))
res, _, err := expr.EvalString(ctx, chunk.Row{})
if c.err {
require.Error(t, err)
res, _, _ := expr.EvalString(ctx, chunk.Row{})
if c.warn {
warns := ctx.GetSessionVars().StmtCtx.GetWarnings()
require.Greater(t, len(warns), lastWarningLen)
lastWarningLen = len(warns)
} else {
require.Equal(t, c.ret, res)
}
Expand Down
46 changes: 38 additions & 8 deletions pkg/expression/builtin_convert_charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ func (b *builtinInternalToBinarySig) vecEvalString(ctx EvalContext, input *chunk
type tidbFromBinaryFunctionClass struct {
baseFunctionClass

tp *types.FieldType
tp *types.FieldType
cannotConvertStringAsWarning bool
}

func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) {
Expand All @@ -150,7 +151,7 @@ func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expre
return nil, err
}
bf.tp = c.tp
sig = &builtinInternalFromBinarySig{bf}
sig = &builtinInternalFromBinarySig{bf, c.cannotConvertStringAsWarning}
sig.setPbCode(tipb.ScalarFuncSig_FromBinary)
default:
return nil, fmt.Errorf("unexpected argTp: %d", argTp)
Expand All @@ -160,6 +161,9 @@ func (c *tidbFromBinaryFunctionClass) getFunction(ctx BuildContext, args []Expre

type builtinInternalFromBinarySig struct {
baseBuiltinFunc

// TODO: also pass this field when pushing down this function. The behavior of TiDB and TiKV is different on this function now.
cannotConvertStringAsWarning bool
}

func (b *builtinInternalFromBinarySig) Clone() builtinFunc {
Expand All @@ -179,8 +183,20 @@ func (b *builtinInternalFromBinarySig) evalString(ctx EvalContext, row chunk.Row
if err != nil {
strHex := formatInvalidChars(valBytes)
err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())

if b.cannotConvertStringAsWarning {
tc := typeCtx(ctx)
tc.AppendWarning(err)
if sqlMode(ctx).HasStrictMode() {
return "", true, nil
}

return string(ret), false, nil
}

return "", false, err
}
return string(ret), false, err
return string(ret), false, nil
}

func (b *builtinInternalFromBinarySig) vectorized() bool {
Expand Down Expand Up @@ -209,7 +225,21 @@ func (b *builtinInternalFromBinarySig) vecEvalString(ctx EvalContext, input *chu
val, err := enc.Transform(encodedBuf, str, charset.OpDecode)
if err != nil {
strHex := formatInvalidChars(str)
return errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())
err = errCannotConvertString.GenWithStackByArgs(strHex, charset.CharsetBin, b.tp.GetCharset())

if b.cannotConvertStringAsWarning {
tc := typeCtx(ctx)
tc.AppendWarning(err)
if sqlMode(ctx).HasStrictMode() {
result.AppendNull()
continue
}

result.AppendBytes(str)
continue
}

return err
}
result.AppendBytes(val)
}
Expand All @@ -232,8 +262,8 @@ func BuildToBinaryFunction(ctx BuildContext, expr Expression) (res Expression) {
}

// BuildFromBinaryFunction builds from_binary function.
func BuildFromBinaryFunction(ctx BuildContext, expr Expression, tp *types.FieldType) (res Expression) {
fc := &tidbFromBinaryFunctionClass{baseFunctionClass{InternalFuncFromBinary, 1, 1}, tp}
func BuildFromBinaryFunction(ctx BuildContext, expr Expression, tp *types.FieldType, cannotConvertStringAsWarning bool) (res Expression) {
fc := &tidbFromBinaryFunctionClass{baseFunctionClass{InternalFuncFromBinary, 1, 1}, tp, cannotConvertStringAsWarning}
f, err := fc.getFunction(ctx, []Expression{expr})
if err != nil {
return expr
Expand Down Expand Up @@ -305,7 +335,7 @@ func init() {
}

// HandleBinaryLiteral wraps `expr` with to_binary or from_binary sig.
func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, funcName string) Expression {
func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, funcName string, explicitCast bool) Expression {
argChs, dstChs := expr.GetType().GetCharset(), ec.Charset
switch convertFuncsMap[funcName] {
case funcPropNone:
Expand All @@ -326,7 +356,7 @@ func HandleBinaryLiteral(ctx BuildContext, expr Expression, ec *ExprCollation, f
ft := expr.GetType().Clone()
ft.SetCharset(ec.Charset)
ft.SetCollate(ec.Collation)
return BuildFromBinaryFunction(ctx, expr, ft)
return BuildFromBinaryFunction(ctx, expr, ft, explicitCast)
}
}
return expr
Expand Down
3 changes: 2 additions & 1 deletion pkg/expression/distsql_builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,8 @@ func getSignatureByPB(ctx BuildContext, sigCode tipb.ScalarFuncSig, tp *tipb.Fie
case tipb.ScalarFuncSig_ToBinary:
f = &builtinInternalToBinarySig{base}
case tipb.ScalarFuncSig_FromBinary:
f = &builtinInternalFromBinarySig{base}
// TODO: set the `cannotConvertStringAsWarning` accordingly
f = &builtinInternalFromBinarySig{base, false}

default:
e = ErrFunctionNotExists.GenWithStackByArgs("FUNCTION", sigCode)
Expand Down
2 changes: 1 addition & 1 deletion pkg/expression/scalar_function.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ func newFunctionImpl(ctx BuildContext, fold int, funcName string, retType *types
case ast.GetVar:
return BuildGetVarFunction(ctx, args[0], retType)
case InternalFuncFromBinary:
return BuildFromBinaryFunction(ctx, args[0], retType), nil
return BuildFromBinaryFunction(ctx, args[0], retType, false), nil
case InternalFuncToBinary:
return BuildToBinaryFunction(ctx, args[0]), nil
case ast.Sysdate:
Expand Down
12 changes: 12 additions & 0 deletions tests/integrationtest/r/expression/charset_and_collation.result
Original file line number Diff line number Diff line change
Expand Up @@ -1991,3 +1991,15 @@ LOCATE('bar' collate utf8mb4_0900_ai_ci, 'FOOBAR' collate utf8mb4_0900_ai_ci)
select 'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci;
'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci
1
select cast(compress('b') as char);
cast(compress('b') as char)
NULL
Level Code Message
Warning 3854 Cannot convert string 'x\x9C...' from binary to utf8mb4
set sql_mode='';
select cast(compress('b') as char);
cast(compress('b') as char)
x
Level Code Message
Warning 3854 Cannot convert string 'x\x9C...' from binary to utf8mb4
set sql_mode=DEFAULT;
8 changes: 8 additions & 0 deletions tests/integrationtest/t/expression/charset_and_collation.test
Original file line number Diff line number Diff line change
Expand Up @@ -814,3 +814,11 @@ select min(id) from t group by str order by str;
# TestUTF8MB40900AICIStrFunc
select LOCATE('bar' collate utf8mb4_0900_ai_ci, 'FOOBAR' collate utf8mb4_0900_ai_ci);
select 'FOOBAR' collate utf8mb4_0900_ai_ci REGEXP 'foo.*' collate utf8mb4_0900_ai_ci;

# TestConvertCharsetFromBinary
--enable_warnings
select cast(compress('b') as char);
set sql_mode='';
select cast(compress('b') as char);
--disable_warnings
set sql_mode=DEFAULT;