Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ protected String visitTableVersion(TableVersionExpression node, Void context)
static String formatStringLiteral(String s)
{
s = s.replace("'", "''");
if (CharMatcher.inRange((char) 0x20, (char) 0x7E).matchesAllOf(s)) {
if (CharMatcher.inRange((char) 0x20, (char) 0x7E).or(CharMatcher.anyOf("\n\r\t")).matchesAllOf(s)) {
return "'" + s + "'";
}

Expand All @@ -720,7 +720,7 @@ static String formatStringLiteral(String s)
while (iterator.hasNext()) {
int codePoint = iterator.nextInt();
checkArgument(codePoint >= 0, "Invalid UTF-8 encoding in characters: %s", s);
if (isAsciiPrintable(codePoint)) {
if (isAsciiPrintable(codePoint) || isWhitespace(codePoint)) {
char ch = (char) codePoint;
if (ch == '\\') {
builder.append(ch);
Expand Down Expand Up @@ -797,6 +797,11 @@ private static boolean isAsciiPrintable(int codePoint)
return true;
}

private static boolean isWhitespace(int codePoint)
{
return codePoint == '\n' || codePoint == '\r' || codePoint == '\t';
}

private static String formatGroupingSet(List<Expression> groupingSet, Optional<List<Expression>> parameters)
{
return format("(%s)", Joiner.on(", ").join(groupingSet.stream()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.sql;

import com.facebook.presto.sql.parser.ParsingOptions;
import com.facebook.presto.sql.parser.SqlParser;
import com.facebook.presto.sql.tree.Expression;
import com.facebook.presto.sql.tree.StringLiteral;
import org.testng.annotations.Test;

import java.util.Optional;

import static com.facebook.presto.sql.ExpressionFormatter.formatStringLiteral;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;

public class TestExpressionFormatter
{
private static final SqlParser SQL_PARSER = new SqlParser();

@Test
public void testFormatStringLiteralPreservesNewlines()
{
String input = "line1\nline2\nline3";
String formatted = formatStringLiteral(input);

assertTrue(formatted.contains("\n"), "Newlines should be preserved in output");
assertEquals(formatted, "'line1\nline2\nline3'");
}

@Test
public void testFormatStringLiteralPreservesTabs()
{
String input = "col1\tcol2\tcol3";
String formatted = formatStringLiteral(input);

assertTrue(formatted.contains("\t"), "Tabs should be preserved in output");
assertEquals(formatted, "'col1\tcol2\tcol3'");
}

@Test
public void testFormatStringLiteralPreservesCarriageReturns()
{
String input = "line1\rline2";
String formatted = formatStringLiteral(input);

assertTrue(formatted.contains("\r"), "Carriage returns should be preserved in output");
assertEquals(formatted, "'line1\rline2'");
}

@Test
public void testFormatStringLiteralWithMixedWhitespace()
{
String input = "def foo():\n\treturn 'bar'\r\n";
String formatted = formatStringLiteral(input);

assertTrue(formatted.contains("\n"), "Newlines should be preserved");
assertTrue(formatted.contains("\t"), "Tabs should be preserved");
assertTrue(formatted.contains("\r"), "Carriage returns should be preserved");
assertEquals(formatted, "'def foo():\n\treturn ''bar''\r\n'");
}

@Test
public void testFormatStringLiteralWithPythonCode()
{
String pythonCode = "def process(x):\n" +
"\tif x > 0:\n" +
"\t\treturn x * 2\n" +
"\treturn 0";
String formatted = formatStringLiteral(pythonCode);

assertTrue(formatted.contains("\n"), "Newlines in Python code should be preserved");
assertTrue(formatted.contains("\t"), "Tabs in Python code should be preserved");
assertEquals(formatted, "'" + pythonCode + "'");
}

@Test
public void testFormatStringLiteralPreservesRegularStrings()
{
String input = "hello world";
String formatted = formatStringLiteral(input);
assertEquals(formatted, "'hello world'");
}

@Test
public void testFormatStringLiteralEscapesSingleQuotes()
{
String input = "it's a test";
String formatted = formatStringLiteral(input);
assertEquals(formatted, "'it''s a test'");
}

@Test
public void testFormatStringLiteralWithUnicodeCharacters()
{
String input = "hello \u0001 world";
String formatted = formatStringLiteral(input);

assertTrue(formatted.startsWith("U&'"), "Non-printable characters should use Unicode format");
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Um, I may understand wrong, but isn't your fix to solve this problem? Why still seeing U& in the formatted string?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@feilong-liu The U& format is still needed here because the string contains \u0001 (a non-printable control character) - the fix doesn't eliminate Unicode format entirely, it just ensures that \n\r\t are preserved as literal whitespace characters instead of being incorrectly escaped as \000A, \0009, and \000D. So when a string has both whitespace AND non-printable chars like \u0001, it still uses U&'...' format but now preserves the literal \n\r\t inside it.

assertTrue(formatted.contains("0001"), "Unicode escape should be present");
}

@Test
public void testFormatStringLiteralPreservesWhitespaceInUnicodeMode()
{
String input = "test\n\u0001\ttab";
String formatted = formatStringLiteral(input);

assertTrue(formatted.startsWith("U&'"), "Should use Unicode format for non-printable chars");
assertTrue(formatted.contains("\n"), "Newlines should be preserved even in Unicode mode");
assertTrue(formatted.contains("\t"), "Tabs should be preserved even in Unicode mode");
}

@Test
public void testStringLiteralRoundTrip()
{
String pythonCode = "def add(a, b):\n\treturn a + b";

Expression parsed = SQL_PARSER.createExpression("'" + pythonCode + "'", new ParsingOptions());
String formattedExpression = ExpressionFormatter.formatExpression(parsed, Optional.empty());

Expression reparsed = SQL_PARSER.createExpression(formattedExpression, new ParsingOptions());

assertTrue(parsed instanceof StringLiteral);
assertTrue(reparsed instanceof StringLiteral);
assertEquals(((StringLiteral) reparsed).getValue(), pythonCode);
}

@Test
public void testMultiLineStringWithIndentation()
{
String pythonUdf = "def my_udf(x):\n\tif x > 10:\n\t\treturn x * 2\n\telse:\n\t\treturn x + 5";

Expression parsed = SQL_PARSER.createExpression("'" + pythonUdf + "'", new ParsingOptions());
String formatted = ExpressionFormatter.formatExpression(parsed, Optional.empty());

assertTrue(formatted.contains("\n"), "Multi-line content should preserve newlines");
assertTrue(formatted.contains("\t"), "Indentation should be preserved");
}

@Test
public void testEmptyString()
{
String input = "";
String formatted = formatStringLiteral(input);
assertEquals(formatted, "''");
}

@Test
public void testStringWithOnlyWhitespace()
{
String input = "\n\t\r";
String formatted = formatStringLiteral(input);
assertEquals(formatted, "'\n\t\r'");
}

@Test
public void testStringWithBackslash()
{
String input = "path\\to\\file";
String formatted = formatStringLiteral(input);
assertEquals(formatted, "'path\\to\\file'");
}

@Test
public void testStringWithBackslashAndNewline()
{
String input = "line1\\\nline2";
String formatted = formatStringLiteral(input);
assertEquals(formatted, "'line1\\\nline2'");
}

@Test
public void testStringComparisonSemanticEquivalence()
{
// Verify that literal whitespace and Unicode escapes parse to identical values
String valueWithNewline = "text\nmore\ttext";

Expression literalForm = SQL_PARSER.createExpression("'" + valueWithNewline + "'", new ParsingOptions());
Expression unicodeForm = SQL_PARSER.createExpression("U&'text\\000Amore\\0009text'", new ParsingOptions());

assertTrue(literalForm instanceof StringLiteral);
assertTrue(unicodeForm instanceof StringLiteral);
assertEquals(((StringLiteral) literalForm).getValue(), valueWithNewline);
assertEquals(((StringLiteral) unicodeForm).getValue(), valueWithNewline);
assertEquals(((StringLiteral) literalForm).getValue(), ((StringLiteral) unicodeForm).getValue());
}

@Test
public void testQueryWithNewlineComparisonRoundTrip()
{
// Validate that comparisons maintain correctness through format/parse cycles
String originalValue = "line1\nline2";

Expression parsed = SQL_PARSER.createExpression("column = '" + originalValue + "'", new ParsingOptions());
String formatted = ExpressionFormatter.formatExpression(parsed, Optional.empty());
Expression reparsed = SQL_PARSER.createExpression(formatted, new ParsingOptions());

assertEquals(ExpressionFormatter.formatExpression(parsed, Optional.empty()),
ExpressionFormatter.formatExpression(reparsed, Optional.empty()));
}

@Test
public void testRegexPatternWithNewlinesPreserved()
{
// Verify regex patterns with newlines are preserved through format/parse cycle
String regexPattern = "line1\nline2.*\nline3";

Expression parsed = SQL_PARSER.createExpression("'" + regexPattern + "'", new ParsingOptions());
String formatted = ExpressionFormatter.formatExpression(parsed, Optional.empty());

assertTrue(formatted.contains("\n"), "Regex pattern should preserve literal newlines");

Expression reparsed = SQL_PARSER.createExpression(formatted, new ParsingOptions());
assertEquals(((StringLiteral) reparsed).getValue(), regexPattern,
"Regex pattern value should be preserved through format/parse cycle");
}

@Test
public void testWhitespaceInComparisonPreservesSemantics()
{
// Validate that comparisons against whitespace preserve value semantics
String textWithWhitespace = "before\nmiddle\tafter";

Expression original = SQL_PARSER.createExpression("column = '" + textWithWhitespace + "'", new ParsingOptions());
String reformatted = ExpressionFormatter.formatExpression(original, Optional.empty());
Expression reparsed = SQL_PARSER.createExpression(reformatted, new ParsingOptions());

assertTrue(reformatted.contains("\n") && reformatted.contains("\t"),
"Reformatted version should contain literal whitespace");

assertEquals(ExpressionFormatter.formatExpression(original, Optional.empty()),
ExpressionFormatter.formatExpression(reparsed, Optional.empty()),
"Comparison expression must be semantically preserved through format/parse cycle");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8169,6 +8169,42 @@ public void testRandomizeSemiJoinNullKeyStrategy()
assertQueryWithSameQueryRunner(alwaysSession, query21, disabledSession);
}

@Test
public void testStringLiteralWithWhitespace()
{
// Test newlines
assertQuery("SELECT 'line1\nline2' = 'line1\nline2'", "SELECT true");
assertQuery("SELECT length('line1\nline2')", "SELECT 11");
assertQuery("SELECT * FROM (VALUES ('line1\nline2')) t(x) WHERE x = 'line1\nline2'", "VALUES 'line1\nline2'");

// Test tabs
assertQuery("SELECT 'col1\tcol2' = 'col1\tcol2'", "SELECT true");
assertQuery("SELECT length('col1\tcol2')", "SELECT 9");
assertQuery("SELECT * FROM (VALUES ('col1\tcol2')) t(x) WHERE x = 'col1\tcol2'", "VALUES 'col1\tcol2'");

// Test carriage returns
assertQuery("SELECT 'line1\rline2' = 'line1\rline2'", "SELECT true");
assertQuery("SELECT length('line1\rline2')", "SELECT 11");
assertQuery("SELECT * FROM (VALUES ('line1\rline2')) t(x) WHERE x = 'line1\rline2'", "VALUES 'line1\rline2'");

// Test mixed whitespace
assertQuery("SELECT 'def foo():\n\treturn ''bar''\r\n' = 'def foo():\n\treturn ''bar''\r\n'", "SELECT true");
assertQuery("SELECT length('test\n\ttab')", "SELECT 9");
assertQuery("SELECT * FROM (VALUES ('before\nmiddle\tafter')) t(x) WHERE x = 'before\nmiddle\tafter'", "VALUES 'before\nmiddle\tafter'");

// Test whitespace in joins
assertQuery("SELECT t1.x FROM (VALUES ('a\nb')) t1(x) JOIN (VALUES ('a\nb')) t2(y) ON t1.x = t2.y", "VALUES 'a\nb'");
assertQuery("SELECT t1.x FROM (VALUES ('a\tb')) t1(x) JOIN (VALUES ('a\tb')) t2(y) ON t1.x = t2.y", "VALUES 'a\tb'");

// Test whitespace in group by - verify correct grouping by checking distinct count
assertQuery("SELECT count(DISTINCT x) FROM (VALUES ('a\nb'), ('a\nb'), ('c\td')) t(x)", "SELECT 2");

// Verify that literal whitespace and Unicode escape are semantically equivalent
assertQuery("SELECT 'text\nmore' = U&'text\\000Amore'", "SELECT true");
assertQuery("SELECT 'text\tmore' = U&'text\\0009more'", "SELECT true");
assertQuery("SELECT 'line1\rline2' = U&'line1\\000Dline2'", "SELECT true");
}

private List<MaterializedRow> getNativeWorkerSessionProperties(List<MaterializedRow> inputRows, String sessionPropertyName)
{
return inputRows.stream()
Expand Down
Loading