diff --git a/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java b/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java index b93626d45b9..7b2d7d66684 100644 --- a/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java +++ b/livy/src/main/java/org/apache/zeppelin/livy/LivySparkSQLInterpreter.java @@ -18,6 +18,7 @@ package org.apache.zeppelin.livy; import org.apache.commons.lang.StringUtils; +import static org.apache.commons.lang.StringEscapeUtils.escapeJavaScript; import org.apache.zeppelin.interpreter.*; import org.apache.zeppelin.scheduler.Scheduler; import org.apache.zeppelin.scheduler.SchedulerFactory; @@ -26,7 +27,6 @@ import java.util.List; import java.util.Properties; - /** * Livy SparkSQL Interpreter for Zeppelin. */ @@ -166,15 +166,18 @@ public FormType getFormType() { protected List parseSQLOutput(String output) { List rows = new ArrayList<>(); - String[] lines = output.split("\n"); + // Get first line by breaking on \n. We can guarantee + // that \n marks the end of the first line, but not for + // subsequent lines (as it could be in the cells) + String firstLine = output.split("\n", 2)[0]; // at least 4 lines, even for empty sql output // +---+---+ // | a| b| // +---+---+ // +---+---+ - // use the first line to determinte the position of feach cell - String[] tokens = StringUtils.split(lines[0], "\\+"); + // use the first line to determine the position of each cell + String[] tokens = StringUtils.split(firstLine, "\\+"); // pairs keeps the start/end position of each cell. We parse it from the first row // which use '+' as separator List pairs = new ArrayList<>(); @@ -186,17 +189,26 @@ protected List parseSQLOutput(String output) { pairs.add(new Pair(start, end)); } - for (String line : lines) { + // Use the header line to determine the position + // of subsequent lines + int lineStart = 0; + int lineEnd = firstLine.length(); + while (lineEnd < output.length()) { // Only match format "|....|" // skip line like "+---+---+" and "only showing top 1 row" - if (line.matches("^\\|.*\\|$")) { + String line = output.substring(lineStart, lineEnd); + // Use the DOTALL regex mode to match newlines + if (line.matches("(?s)^\\|.*\\|$")) { List cells = new ArrayList<>(); for (Pair pair : pairs) { - // strip the blank space around the cell - cells.add(line.substring(pair.start, pair.end).trim()); + // strip the blank space around the cell and escape the string + cells.add(escapeJavaScript(line.substring(pair.start, pair.end)).trim()); } rows.add(StringUtils.join(cells, "\t")); } + // Determine position of next line skipping newline + lineStart += firstLine.length() + 1; + lineEnd = lineStart + firstLine.length(); } return rows; } diff --git a/livy/src/test/java/org/apache/zeppelin/livy/LivySQLInterpreterTest.java b/livy/src/test/java/org/apache/zeppelin/livy/LivySQLInterpreterTest.java index 24d70ec2b04..0541b876755 100644 --- a/livy/src/test/java/org/apache/zeppelin/livy/LivySQLInterpreterTest.java +++ b/livy/src/test/java/org/apache/zeppelin/livy/LivySQLInterpreterTest.java @@ -124,5 +124,49 @@ public void testParseSQLOutput() { assertEquals(2, rows.size()); assertEquals("a", rows.get(0)); assertEquals("1", rows.get(1)); + + + // sql output with 3 rows, 3 columns, showing "only showing top 3 rows" with a line break in the data + // +---+---+---+ + // | a| b| c| + // +---+---+---+ + // | 1a| 1b| 1c| + // | 2a| 2 + // b| 2c| + // | 3a| 3b| 3c| + // +---+---+---+ + // only showing top 3 rows + rows = sqlInterpreter.parseSQLOutput("+---+----+---+\n" + + "| a| b| c|\n" + + "+---+----+---+\n" + + "| 1a| 1b| 1c|\n" + + "| 2a| 2\nb| 2c|\n" + + "| 3a| 3b| 3c|\n" + + "+---+---+---+\n" + + "only showing top 3 rows"); + assertEquals(4, rows.size()); + assertEquals("a\tb\tc", rows.get(0)); + assertEquals("1a\t1b\t1c", rows.get(1)); + assertEquals("2a\t2\\nb\t2c", rows.get(2)); + assertEquals("3a\t3b\t3c", rows.get(3)); + + + // sql output with 2 rows and one containing a tab + // +---+---+ + // | a| b| + // +---+---+ + // | 1| \ta| + // | 2| 2b| + // +---+---+ + rows = sqlInterpreter.parseSQLOutput("+---+---+\n" + + "| a| b|\n" + + "+---+---+\n" + + "| 1| \ta|\n" + + "| 2| 2b|\n" + + "+---+---+"); + assertEquals(3, rows.size()); + assertEquals("a\tb", rows.get(0)); + assertEquals("1\t\\ta", rows.get(1)); + assertEquals("2\t2b", rows.get(2)); } }