diff --git a/bin/interpreter.sh b/bin/interpreter.sh
index 458ffc00d47..f23ca823e62 100755
--- a/bin/interpreter.sh
+++ b/bin/interpreter.sh
@@ -220,8 +220,8 @@ if [[ ! -z "$ZEPPELIN_IMPERSONATE_USER" ]] && [[ -n "${suid}" || -z "${SPARK_SUB
fi
eval $INTERPRETER_RUN_COMMAND &
-
pid=$!
+
if [[ -z "${pid}" ]]; then
exit 1;
else
diff --git a/bin/stop-interpreter.sh b/bin/stop-interpreter.sh
new file mode 100755
index 00000000000..e6ff16e9e9f
--- /dev/null
+++ b/bin/stop-interpreter.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Stop Zeppelin Interpreter Processes
+#
+
+bin=$(dirname "${BASH_SOURCE-$0}")
+bin=$(cd "${bin}">/dev/null; pwd)
+
+. "${bin}/common.sh"
+
+export ZEPPELIN_FORCE_STOP=1
+
+ZEPPELIN_STOP_INTERPRETER_MAIN=org.apache.zeppelin.interpreter.recovery.StopInterpreter
+ZEPPELIN_LOGFILE="${ZEPPELIN_LOG_DIR}/stop-interpreter.log"
+JAVA_OPTS+=" -Dzeppelin.log.file=${ZEPPELIN_LOGFILE}"
+
+if [[ -d "${ZEPPELIN_HOME}/zeppelin-zengine/target/classes" ]]; then
+ ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-zengine/target/classes"
+fi
+
+if [[ -d "${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes" ]]; then
+ ZEPPELIN_CLASSPATH+=":${ZEPPELIN_HOME}/zeppelin-interpreter/target/classes"
+fi
+
+addJarInDir "${ZEPPELIN_HOME}/zeppelin-interpreter/target/lib"
+addJarInDir "${ZEPPELIN_HOME}/zeppelin-server/target/lib"
+addJarInDir "${ZEPPELIN_HOME}/lib"
+addJarInDir "${ZEPPELIN_HOME}/lib/interpreter"
+
+CLASSPATH+=":${ZEPPELIN_CLASSPATH}"
+$ZEPPELIN_RUNNER $JAVA_OPTS -cp $CLASSPATH $ZEPPELIN_STOP_INTERPRETER_MAIN ${@}
diff --git a/bin/zeppelin-daemon.sh b/bin/zeppelin-daemon.sh
index 5982aee2e0f..e8988497513 100755
--- a/bin/zeppelin-daemon.sh
+++ b/bin/zeppelin-daemon.sh
@@ -217,18 +217,6 @@ function stop() {
action_msg "${ZEPPELIN_NAME} stop" "${SET_OK}"
fi
fi
-
- # list all pid that used in remote interpreter and kill them
- for f in ${ZEPPELIN_PID_DIR}/*.pid; do
- if [[ ! -f ${f} ]]; then
- continue;
- fi
-
- pid=$(cat ${f})
- wait_for_zeppelin_to_die $pid 20
- $(rm -f ${f})
- done
-
}
function find_zeppelin_process() {
diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template
index 3c5bbeae59a..d566a717884 100755
--- a/conf/zeppelin-site.xml.template
+++ b/conf/zeppelin-site.xml.template
@@ -480,4 +480,45 @@
10000:10010
-->
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/usage/interpreter/overview.md b/docs/usage/interpreter/overview.md
index dd5ed220c88..035c381b8a9 100644
--- a/docs/usage/interpreter/overview.md
+++ b/docs/usage/interpreter/overview.md
@@ -144,3 +144,11 @@ So users needs to understand the ([interpreter mode setting ](../usage/interpret
In this scenario, user need to put `ConfInterpreter` as the first paragraph as the below example. Otherwise the customized setting can not be applied (Actually it would report ERROR)
+
+## Interpreter Process Recovery
+
+Before 0.8.0, shutting down Zeppelin also mean to shutdown all the running interpreter processes. Usually admin will shutdown Zeppelin server for maintenance or upgrade, but don't want to shut down the running interpreter processes.
+In such cases, interpreter process recovery is necessary. Starting from 0.8.0, user can enable interpreter process recovering via setting `zeppelin.recovery.storage.class` as
+`org.apache.zeppelin.interpreter.recovery.FileSystemRecoveryStorage` or other implementations if available in future, by default it is `org.apache.zeppelin.interpreter.recovery.NullRecoveryStorage`
+ which means recovery is not enabled. Enable recover means shutting down Zeppelin would not terminating interpreter process,
+and when Zeppelin is restarted, it would try to reconnect to the existing running interpreter processes. If you want to kill all the interpreter processes after terminating Zeppelin even when recovery is enabled, you can run `bin/stop-interpreter.sh`
diff --git a/spark/src/main/resources/interpreter-setting.json b/spark/src/main/resources/interpreter-setting.json
index 485f6950df0..d656532eb07 100644
--- a/spark/src/main/resources/interpreter-setting.json
+++ b/spark/src/main/resources/interpreter-setting.json
@@ -61,7 +61,7 @@
"description": "Spark master uri. ex) spark://masterhost:7077",
"type": "string"
},
- "zeppelin.spark.unSupportedVersionCheck": {
+ "zeppelin.spark.enableSupportedVersionCheck": {
"envName": null,
"propertyName": "zeppelin.spark.enableSupportedVersionCheck",
"defaultValue": true,
diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
index 438c661f8bc..77279edcd39 100644
--- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
@@ -355,6 +355,19 @@ public String getNotebookDir() {
return getString(ConfVars.ZEPPELIN_NOTEBOOK_DIR);
}
+ public String getRecoveryDir() {
+ return getRelativeDir(ConfVars.ZEPPELIN_RECOVERY_DIR);
+ }
+
+ public String getRecoveryStorageClass() {
+ return getString(ConfVars.ZEPPELIN_RECOVERY_STORAGE_CLASS);
+ }
+
+ public boolean isRecoveryEnabled() {
+ return !getString(ConfVars.ZEPPELIN_RECOVERY_STORAGE_CLASS).equals(
+ "org.apache.zeppelin.interpreter.recovery.NullRecoveryStorage");
+ }
+
public String getUser() {
return getString(ConfVars.ZEPPELIN_NOTEBOOK_S3_USER);
}
@@ -658,6 +671,10 @@ public static enum ConfVars {
ZEPPELIN_INTERPRETER_OUTPUT_LIMIT("zeppelin.interpreter.output.limit", 1024 * 100),
ZEPPELIN_ENCODING("zeppelin.encoding", "UTF-8"),
ZEPPELIN_NOTEBOOK_DIR("zeppelin.notebook.dir", "notebook"),
+ ZEPPELIN_RECOVERY_DIR("zeppelin.recovery.dir", "recovery"),
+ ZEPPELIN_RECOVERY_STORAGE_CLASS("zeppelin.recovery.storage.class",
+ "org.apache.zeppelin.interpreter.recovery.NullRecoveryStorage"),
+
// use specified notebook (id) as homescreen
ZEPPELIN_NOTEBOOK_HOMESCREEN("zeppelin.notebook.homescreen", null),
// whether homescreen notebook will be hidden from notebook list or not
diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterClient.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterClient.java
index b991079feca..813dad86881 100644
--- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterClient.java
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterClient.java
@@ -19,8 +19,20 @@
/**
* Interface to InterpreterClient which is created by InterpreterLauncher. This is the component
- * that is used to for the communication fromzeppelin-server process to zeppelin interpreter process
+ * that is used to for the communication from zeppelin-server process to zeppelin interpreter
+ * process.
*/
public interface InterpreterClient {
+ String getInterpreterSettingName();
+
+ void start(String userName, Boolean isUserImpersonate);
+
+ void stop();
+
+ String getHost();
+
+ int getPort();
+
+ boolean isRunning();
}
diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterLaunchContext.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterLaunchContext.java
index 9e253555a90..6901e2c7a62 100644
--- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterLaunchContext.java
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterLaunchContext.java
@@ -30,6 +30,7 @@ public class InterpreterLaunchContext {
private Properties properties;
private InterpreterOption option;
private InterpreterRunner runner;
+ private String interpreterGroupId;
private String interpreterSettingId;
private String interpreterSettingGroup;
private String interpreterSettingName;
@@ -37,12 +38,14 @@ public class InterpreterLaunchContext {
public InterpreterLaunchContext(Properties properties,
InterpreterOption option,
InterpreterRunner runner,
+ String interpreterGroupId,
String interpreterSettingId,
String interpreterSettingGroup,
String interpreterSettingName) {
this.properties = properties;
this.option = option;
this.runner = runner;
+ this.interpreterGroupId = interpreterGroupId;
this.interpreterSettingId = interpreterSettingId;
this.interpreterSettingGroup = interpreterSettingGroup;
this.interpreterSettingName = interpreterSettingName;
@@ -60,6 +63,10 @@ public InterpreterRunner getRunner() {
return runner;
}
+ public String getInterpreterGroupId() {
+ return interpreterGroupId;
+ }
+
public String getInterpreterSettingId() {
return interpreterSettingId;
}
diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterLauncher.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterLauncher.java
index 5d0acf3515a..1cee20e7a04 100644
--- a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterLauncher.java
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/launcher/InterpreterLauncher.java
@@ -18,6 +18,7 @@
package org.apache.zeppelin.interpreter.launcher;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.interpreter.recovery.RecoveryStorage;
import java.io.IOException;
import java.util.Properties;
@@ -29,9 +30,11 @@ public abstract class InterpreterLauncher {
protected ZeppelinConfiguration zConf;
protected Properties properties;
+ protected RecoveryStorage recoveryStorage;
- public InterpreterLauncher(ZeppelinConfiguration zConf) {
+ public InterpreterLauncher(ZeppelinConfiguration zConf, RecoveryStorage recoveryStorage) {
this.zConf = zConf;
+ this.recoveryStorage = recoveryStorage;
}
public abstract InterpreterClient launch(InterpreterLaunchContext context) throws IOException;
diff --git a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/recovery/RecoveryStorage.java b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/recovery/RecoveryStorage.java
new file mode 100644
index 00000000000..8bbe8302fcf
--- /dev/null
+++ b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/recovery/RecoveryStorage.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.interpreter.recovery;
+
+import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.interpreter.launcher.InterpreterClient;
+
+import java.io.IOException;
+import java.util.Map;
+
+
+/**
+ * Interface for storing interpreter process recovery metadata.
+ *
+ */
+public abstract class RecoveryStorage {
+
+ protected ZeppelinConfiguration zConf;
+ protected Map restoredClients;
+
+ public RecoveryStorage(ZeppelinConfiguration zConf) throws IOException {
+ this.zConf = zConf;
+ }
+
+ /**
+ * Update RecoveryStorage when new InterpreterClient is started
+ * @param client
+ * @throws IOException
+ */
+ public abstract void onInterpreterClientStart(InterpreterClient client) throws IOException;
+
+ /**
+ * Update RecoveryStorage when InterpreterClient is stopped
+ * @param client
+ * @throws IOException
+ */
+ public abstract void onInterpreterClientStop(InterpreterClient client) throws IOException;
+
+ /**
+ *
+ * It is only called when Zeppelin Server is started.
+ *
+ * @return
+ * @throws IOException
+ */
+ public abstract Map restore() throws IOException;
+
+
+ /**
+ * It is called after constructor
+ *
+ * @throws IOException
+ */
+ public void init() throws IOException {
+ this.restoredClients = restore();
+ }
+
+ public InterpreterClient getInterpreterClient(String interpreterGroupId) {
+ if (restoredClients.containsKey(interpreterGroupId)) {
+ return restoredClients.get(interpreterGroupId);
+ } else {
+ return null;
+ }
+ }
+}
diff --git a/zeppelin-server/notebook/.python.recovery.crc b/zeppelin-server/notebook/.python.recovery.crc
new file mode 100644
index 00000000000..6bd3e7ae43b
Binary files /dev/null and b/zeppelin-server/notebook/.python.recovery.crc differ
diff --git a/zeppelin-server/notebook/python.recovery b/zeppelin-server/notebook/python.recovery
new file mode 100644
index 00000000000..eaf4938fdad
--- /dev/null
+++ b/zeppelin-server/notebook/python.recovery
@@ -0,0 +1 @@
+2CZA1DVUG:shared_process 192.168.3.2:55410
\ No newline at end of file
diff --git a/zeppelin-server/pom.xml b/zeppelin-server/pom.xml
index 08ede293e4d..925c637fcfc 100644
--- a/zeppelin-server/pom.xml
+++ b/zeppelin-server/pom.xml
@@ -349,6 +349,21 @@
+
+ maven-surefire-plugin
+ ${plugin.surefire.version}
+
+ -Xmx2g -Xms1g -Dfile.encoding=UTF-8
+
+ ${tests.to.exclude}
+
+
+ 1
+
+
+
+
+
org.scala-tools
maven-scala-plugin
diff --git a/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java b/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java
index 0b66a437d5b..f8625c2357c 100644
--- a/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java
+++ b/zeppelin-server/src/main/java/org/apache/zeppelin/server/ZeppelinServer.java
@@ -162,7 +162,7 @@ public ZeppelinServer() throws Exception {
public static void main(String[] args) throws InterruptedException {
- ZeppelinConfiguration conf = ZeppelinConfiguration.create();
+ final ZeppelinConfiguration conf = ZeppelinConfiguration.create();
conf.setProperty("args", args);
jettyWebServer = setupJettyServer(conf);
@@ -199,7 +199,9 @@ public static void main(String[] args) throws InterruptedException {
LOG.info("Shutting down Zeppelin Server ... ");
try {
jettyWebServer.stop();
- notebook.getInterpreterSettingManager().close();
+ if (!conf.isRecoveryEnabled()) {
+ ZeppelinServer.notebook.getInterpreterSettingManager().close();
+ }
notebook.close();
Thread.sleep(3000);
} catch (Exception e) {
@@ -222,7 +224,9 @@ public static void main(String[] args) throws InterruptedException {
}
jettyWebServer.join();
- ZeppelinServer.notebook.getInterpreterSettingManager().close();
+ if (!conf.isRecoveryEnabled()) {
+ ZeppelinServer.notebook.getInterpreterSettingManager().close();
+ }
}
private static Server setupJettyServer(ZeppelinConfiguration conf) {
diff --git a/zeppelin-server/src/test/java/org/apache/zeppelin/recovery/RecoveryTest.java b/zeppelin-server/src/test/java/org/apache/zeppelin/recovery/RecoveryTest.java
new file mode 100644
index 00000000000..37277ee0c36
--- /dev/null
+++ b/zeppelin-server/src/test/java/org/apache/zeppelin/recovery/RecoveryTest.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.recovery;
+
+import com.google.common.io.Files;
+import com.google.gson.Gson;
+import com.google.gson.reflect.TypeToken;
+import org.apache.commons.httpclient.methods.PostMethod;
+import org.apache.commons.io.FileUtils;
+import org.apache.zeppelin.conf.ZeppelinConfiguration;
+import org.apache.zeppelin.interpreter.ManagedInterpreterGroup;
+import org.apache.zeppelin.interpreter.recovery.FileSystemRecoveryStorage;
+import org.apache.zeppelin.interpreter.recovery.StopInterpreter;
+import org.apache.zeppelin.notebook.Note;
+import org.apache.zeppelin.notebook.Paragraph;
+import org.apache.zeppelin.rest.AbstractTestRestApi;
+import org.apache.zeppelin.scheduler.Job;
+import org.apache.zeppelin.server.ZeppelinServer;
+import org.apache.zeppelin.user.AuthenticationInfo;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.File;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+public class RecoveryTest extends AbstractTestRestApi {
+
+ private Gson gson = new Gson();
+ private static File recoveryDir = null;
+
+ @BeforeClass
+ public static void init() throws Exception {
+ System.setProperty(ZeppelinConfiguration.ConfVars.ZEPPELIN_RECOVERY_STORAGE_CLASS.getVarName(),
+ FileSystemRecoveryStorage.class.getName());
+ recoveryDir = Files.createTempDir();
+ System.setProperty(ZeppelinConfiguration.ConfVars.ZEPPELIN_RECOVERY_DIR.getVarName(), recoveryDir.getAbsolutePath());
+ startUp(RecoveryTest.class.getSimpleName());
+ }
+
+ @AfterClass
+ public static void destroy() throws Exception {
+ shutDown();
+ FileUtils.deleteDirectory(recoveryDir);
+ }
+
+ @Test
+ public void testRecovery() throws Exception {
+ Note note1 = ZeppelinServer.notebook.createNote(AuthenticationInfo.ANONYMOUS);
+
+ // run python interpreter and create new variable `user`
+ Paragraph p1 = note1.addNewParagraph(AuthenticationInfo.ANONYMOUS);
+ p1.setText("%python user='abc'");
+ PostMethod post = httpPost("/notebook/job/" + note1.getId(), "");
+ assertThat(post, isAllowed());
+ Map resp = gson.fromJson(post.getResponseBodyAsString(), new TypeToken