Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion docs/usage/interpreter/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@ When you click the ```+Create``` button on the interpreter page, the interpreter

You can create multiple interpreters for the same engine with different interpreter setting. e.g. You can create `spark2` for Spark 2.x and create `spark1` for Spark 1.x.

For each paragraph you write in Zeppelin, you need to specify its interpreter first via `%interpreter_group.interpreter_name`. e.g. `%spark.pyspark`, `%spark.r`
For each paragraph you write in Zeppelin, you need to specify its interpreter first via `%interpreter_group.interpreter_name`. e.g. `%spark.pyspark`, `%spark.r`.

If you specify interpreter, you can also pass local properties to it (if it needs them). This is done by providing a set of key/value pairs, separated by comma, inside the round brackets right after the interpreter name. If key or value contain characters like `=`, or `,`, then you can either escape them with `\` character, or wrap the whole value inside the double quotes For example:

```
%cassandra(outputFormat=cql, dateFormat="E, d MMM yy", timeFormat=E\, d MMM yy)
```

## What are the Interpreter Settings?
The interpreter settings are the configuration of a given interpreter on the Zeppelin server. For example, certain properties need to be set for the Apache Hive JDBC interpreter to connect to the Hive server.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@

package org.apache.zeppelin.notebook;

import org.apache.commons.lang3.StringUtils;

import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Parser which is used for parsing the paragraph text.
Expand All @@ -42,6 +41,7 @@
* localProperties: Map(pool->pool_1)
*/
public class ParagraphTextParser {
private static final Logger LOGGER = LoggerFactory.getLogger(ParagraphTextParser.class);

public static class ParseResult {
private String intpText;
Expand All @@ -67,41 +67,112 @@ public Map<String, String> getLocalProperties() {
}
}

private static Pattern REPL_PATTERN =
Pattern.compile("(\\s*)%([\\w\\.]+)(\\(.*?\\))?.*", Pattern.DOTALL);
private static Pattern REPL_PATTERN = Pattern.compile("^(\\s*)%(\\w+(?:\\.\\w+)*)");

private static int parseLocalProperties(
final String text, int startPos,
Map<String, String> localProperties) throws RuntimeException{
startPos++;
String propKey = null;
boolean insideQuotes = false, parseKey = true, finished = false;
StringBuilder sb = new StringBuilder();
while(!finished && startPos < text.length()) {
char ch = text.charAt(startPos);
switch (ch) {
case ')': {
if (!insideQuotes) {
if (parseKey) {
propKey = sb.toString().trim();
if (!propKey.isEmpty()) {
localProperties.put(propKey, propKey);
}
} else {
localProperties.put(propKey, sb.toString().trim());
}
finished = true;
} else {
sb.append(ch);
}
break;
}
case '\\': {
if ((startPos + 1) == text.length()) {
throw new RuntimeException(
"Problems by parsing paragraph. Unfinished escape sequence");
}
startPos++;
ch = text.charAt(startPos);
switch (ch) {
case 'n':
sb.append('\n');
break;
case 't':
sb.append('\t');
break;
default:
sb.append(ch);
}
break;
}
case '"': {
insideQuotes = !insideQuotes;
break;
}
case '=': {
if (insideQuotes) {
sb.append(ch);
} else {
propKey = sb.toString().trim();
sb.delete(0, sb.length());
parseKey = false;
}
break;
}
case ',': {
if (insideQuotes) {
sb.append(ch);
} else if (propKey == null || propKey.trim().isEmpty()) {
throw new RuntimeException(
"Problems by parsing paragraph. Local property key is empty");
} else {
if (parseKey) {
propKey = sb.toString().trim();
localProperties.put(propKey, propKey);
} else {
localProperties.put(propKey, sb.toString().trim());
}
}
propKey = null;
parseKey = true;
sb.delete(0, sb.length());
break;
}
default:
sb.append(ch);
}
startPos++;
}
if (!finished) {
throw new RuntimeException(
"Problems by parsing paragraph. Not finished interpreter configuration");
}
return startPos;
}

public static ParseResult parse(String text) {
Map<String, String> localProperties = new HashMap<>();
String intpText = null;
String intpText = "";
String scriptText = null;

Matcher matcher = REPL_PATTERN.matcher(text);
if (matcher.matches()) {
if (matcher.find()) {
String headingSpace = matcher.group(1);
intpText = matcher.group(2);
if (matcher.groupCount() == 3 && matcher.group(3) != null) {
String localPropertiesText = matcher.group(3);
String[] splits = localPropertiesText.substring(1, localPropertiesText.length() - 1)
.split(",");
for (String split : splits) {
String[] kv = split.split("=");
if (StringUtils.isBlank(split) || kv.length == 0) {
continue;
}
if (kv.length > 2) {
throw new RuntimeException("Invalid paragraph properties format: " + split);
}
if (kv.length == 1) {
localProperties.put(kv[0].trim(), kv[0].trim());
} else {
localProperties.put(kv[0].trim(), kv[1].trim());
}
}
scriptText = text.substring(headingSpace.length() + intpText.length() +
localPropertiesText.length() + 1).trim();
} else {
scriptText = text.substring(headingSpace.length() + intpText.length() + 1).trim();
int startPos = headingSpace.length() + intpText.length() + 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put the local property extraction logics into a separate function for more readability.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zjffdu done. If you ok with overall change, I can push it to master, and rework my changes regarding formatting in Cassandra...

if (text.charAt(startPos) == '(') {
startPos = parseLocalProperties(text, startPos, localProperties);
}
scriptText = text.substring(startPos).trim();
} else {
intpText = "";
scriptText = text;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

package org.apache.zeppelin.notebook;

import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;

import static junit.framework.TestCase.assertEquals;

Expand All @@ -33,30 +35,119 @@ public void testJupyter() {
}

@Test
public void testParagraphText() {
public void testParagraphTextLocalPropertiesAndText() {
ParagraphTextParser.ParseResult parseResult = ParagraphTextParser.parse("%spark.pyspark(pool=pool_1) sc.version");
assertEquals("spark.pyspark", parseResult.getIntpText());
assertEquals(1, parseResult.getLocalProperties().size());
assertEquals("pool_1", parseResult.getLocalProperties().get("pool"));
assertEquals("sc.version", parseResult.getScriptText());
}

// no script text
parseResult = ParagraphTextParser.parse("%spark.pyspark(pool=pool_1)");
@Test
public void testParagraphTextLocalPropertiesNoText() {
ParagraphTextParser.ParseResult parseResult = ParagraphTextParser.parse("%spark.pyspark(pool=pool_1)");
assertEquals("spark.pyspark", parseResult.getIntpText());
assertEquals(1, parseResult.getLocalProperties().size());
assertEquals("pool_1", parseResult.getLocalProperties().get("pool"));
assertEquals("", parseResult.getScriptText());
}

@Test
public void testParagraphTextLocalPropertyNoValueNoText() {
ParagraphTextParser.ParseResult parseResult = ParagraphTextParser.parse("%spark.pyspark(pool)");
assertEquals("spark.pyspark", parseResult.getIntpText());
assertEquals(1, parseResult.getLocalProperties().size());
assertEquals("pool", parseResult.getLocalProperties().get("pool"));
assertEquals("", parseResult.getScriptText());
}

// no paragraph local properties
parseResult = ParagraphTextParser.parse("%spark.pyspark sc.version");
@Test
public void testParagraphTextNoLocalProperties() {
ParagraphTextParser.ParseResult parseResult = ParagraphTextParser.parse("%spark.pyspark sc.version");
assertEquals("spark.pyspark", parseResult.getIntpText());
assertEquals(0, parseResult.getLocalProperties().size());
assertEquals("sc.version", parseResult.getScriptText());
}

// no intp text and paragraph local properties
parseResult = ParagraphTextParser.parse("sc.version");
@Test
public void testParagraphNoInterpreter() {
ParagraphTextParser.ParseResult parseResult = ParagraphTextParser.parse("sc.version");
assertEquals("", parseResult.getIntpText());
assertEquals(0, parseResult.getLocalProperties().size());
assertEquals("sc.version", parseResult.getScriptText());
}

@Test
public void testParagraphInterpreterWithoutProperties() {
ParagraphTextParser.ParseResult parseResult = ParagraphTextParser.parse("%spark() sc.version");
assertEquals("spark", parseResult.getIntpText());
assertEquals(0, parseResult.getLocalProperties().size());
assertEquals("sc.version", parseResult.getScriptText());
}

@Test
public void testParagraphTextQuotedPropertyValue1() {
ParagraphTextParser.ParseResult parseResult = ParagraphTextParser.parse(
"%spark.pyspark(pool=\"value with = inside\")");
assertEquals("spark.pyspark", parseResult.getIntpText());
assertEquals(1, parseResult.getLocalProperties().size());
assertEquals("value with = inside", parseResult.getLocalProperties().get("pool"));
assertEquals("", parseResult.getScriptText());
}

@Test
public void testParagraphTextQuotedPropertyValue2() {
ParagraphTextParser.ParseResult parseResult = ParagraphTextParser.parse(
"%spark.pyspark(pool=\"value with \\\" inside\", p=\"eol\\ninside\" )");
assertEquals("spark.pyspark", parseResult.getIntpText());
assertEquals(2, parseResult.getLocalProperties().size());
assertEquals("value with \" inside", parseResult.getLocalProperties().get("pool"));
assertEquals("eol\ninside", parseResult.getLocalProperties().get("p"));
assertEquals("", parseResult.getScriptText());
}

@Test
public void testParagraphTextQuotedPropertyKeyAndValue() {
ParagraphTextParser.ParseResult parseResult = ParagraphTextParser.parse(
"%spark.pyspark(\"po ol\"=\"value with \\\" inside\")");
assertEquals("spark.pyspark", parseResult.getIntpText());
assertEquals(1, parseResult.getLocalProperties().size());
assertEquals("value with \" inside", parseResult.getLocalProperties().get("po ol"));
assertEquals("", parseResult.getScriptText());
}

@Rule
public ExpectedException exceptionRule = ExpectedException.none();

@Test
public void testParagraphTextUnfinishedConfig() {
exceptionRule.expect(RuntimeException.class);
exceptionRule.expectMessage(
"Problems by parsing paragraph. Not finished interpreter configuration");
ParagraphTextParser.parse("%spark.pyspark(pool=");
}

@Test
public void testParagraphTextUnfinishedQuote() {
exceptionRule.expect(RuntimeException.class);
exceptionRule.expectMessage(
"Problems by parsing paragraph. Not finished interpreter configuration");
ParagraphTextParser.parse("%spark.pyspark(pool=\"2314234) sc.version");
}

@Test
public void testParagraphTextUnclosedBackslash() {
exceptionRule.expect(RuntimeException.class);
exceptionRule.expectMessage(
"Problems by parsing paragraph. Unfinished escape sequence");
ParagraphTextParser.parse("%spark.pyspark(pool=\\");
}

@Test
public void testParagraphTextEmptyKey() {
exceptionRule.expect(RuntimeException.class);
exceptionRule.expectMessage(
"Problems by parsing paragraph. Local property key is empty");
ParagraphTextParser.parse("%spark.pyspark(pool=123, ,)");
}
}