Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,6 @@ case class Like(left: Expression, right: Expression, escapeChar: Char = '\\')
""")
}
} else {
val pattern = ctx.freshName("pattern")
val rightStr = ctx.freshName("rightStr")
// We need double escape to avoid org.codehaus.commons.compiler.CompileException.
// '\\' will cause exception 'Single quote must be backslash-escaped in character literal'.
// '\"' will cause exception 'Line break in literal not allowed'.
Expand All @@ -162,10 +160,17 @@ case class Like(left: Expression, right: Expression, escapeChar: Char = '\\')
} else {
escapeChar
}
val rightStr = ctx.freshName("rightStr")
val pattern = ctx.addMutableState(patternClass, "pattern")
val lastRightStr = ctx.addMutableState(classOf[String].getName, "lastRightStr")

nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
s"""
String $rightStr = $eval2.toString();
$patternClass $pattern = $patternClass.compile($escapeFunc($rightStr, '$newEscapeChar'));
if (!$rightStr.equals($lastRightStr)) {
$pattern = $patternClass.compile($escapeFunc($rightStr, '$newEscapeChar'));
$lastRightStr = $rightStr;
}
${ev.value} = $pattern.matcher($eval1.toString()).matches();
"""
})
Expand Down Expand Up @@ -240,11 +245,16 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress
}
} else {
val rightStr = ctx.freshName("rightStr")
val pattern = ctx.freshName("pattern")
val pattern = ctx.addMutableState(patternClass, "pattern")
val lastRightStr = ctx.addMutableState(classOf[String].getName, "lastRightStr")

nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
s"""
String $rightStr = $eval2.toString();
$patternClass $pattern = $patternClass.compile($rightStr);
if (!$rightStr.equals($lastRightStr)) {
Copy link
Member

@HyukjinKwon HyukjinKwon Jun 21, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The positive cases are good enough. The concern I heard was actually here we add some overhead for string comparison, and it could be worse when the strings are very long.

Can we identify the worst cases? It's okay to show the trade-off explicitly. I tend to agree with compiling the pattern once is better in general. Feel free to reopen the PR once we're clear on the trade-off.

cc @rednaxelafx as well FYI.

$pattern = $patternClass.compile($rightStr);
$lastRightStr = $rightStr;
}
${ev.value} = $pattern.matcher($eval1.toString()).find(0);
"""
})
Expand Down