diff --git a/docs/interpreter/python.md b/docs/interpreter/python.md index c4fe7569f4b..0476c680b7f 100644 --- a/docs/interpreter/python.md +++ b/docs/interpreter/python.md @@ -68,8 +68,8 @@ print("".join(z.checkbox("f3", [("o1","1"), ("o2","2")],["1"]))) * Code-completion is currently not implemented. ## Matplotlib integration - The python interpreter can display matplotlib graph with the function **_zeppelin_show()_** - You need to already have matplotlib module installed and a running XServer to use this functionality ! + The python interpreter can display matplotlib graph with the function `zeppelin_show()`. + You need to have matplotlib module installed and a XServer running to use this functionality ! ```python %python @@ -90,28 +90,6 @@ zeppelin_show(plt,height='150px') [![pythonmatplotlib](../interpreter/screenshots/pythonMatplotlib.png)](/docs/interpreter/screenshots/pythonMatplotlib.png) -## Technical description - Interpreter architecture +## Technical description -### Dev prerequisites - -* Python 2 and 3 installed with py4j (0.9.2) and matplotlib (1.31 or later) installed on each - -* Tests only checks the interpreter logic and starts any Python process ! Python process is mocked with a class that simply output it input. - -* Make sure the code wrote in bootstrap.py and bootstrap_input.py is Python2 and 3 compliant. - -* Use PEP8 convention for python code. - -### Technical overview - - * When interpreter is starting it launches a python process inside a Java ProcessBuilder. Python is started with -i (interactive mode) and -u (unbuffered stdin, stdout and stderr) options. Thus the interpreter has a "sleeping" python process. - - * Interpreter sends command to python with a Java `outputStreamWiter` and read from an `InputStreamReader`. To know when stop reading stdout, interpreter sends `print "*!?flush reader!?*"`after each command and reads stdout until he receives back the `*!?flush reader!?*`. - - * When interpreter is starting, it sends some Python code (bootstrap.py and bootstrap_input.py) to initialize default behavior and functions (`help(), z.input()...`). bootstrap_input.py is sent only if py4j library is detected inside Python process. - - * [Py4J](https://www.py4j.org/) python and java libraries is used to load Input zeppelin Java class into the python process (make java code with python code !). Therefore the interpreter can directly create Zeppelin input form inside the Python process (and eventually with some python variable already defined). JVM opens a random open port to be accessible from python process. - - * JavaBuilder can't send SIGINT signal to interrupt paragraph execution. Therefore interpreter directly send a `kill SIGINT PID` to python process to interrupt execution. Python process catch SIGINT signal with some code defined in bootstrap.py - - * Matplotlib display feature is made with SVG export (in string) and then displays it with html code. +For in-depth technical details on current implementation plese reffer [python/README.md](https://github.com/apache/zeppelin/blob/master/python/README.md) diff --git a/python/README.md b/python/README.md new file mode 100644 index 00000000000..4cc58d6cb2b --- /dev/null +++ b/python/README.md @@ -0,0 +1,42 @@ +# Overview +Python interpreter for Apache Zeppelin + +# Architecture +Current interpreter implementation spawns new system python process through `ProcessBuilder` and re-directs it's stdin\strout to Zeppelin + +# Details + + - **Py4j support** + + [Py4j](https://www.py4j.org/) enables Python programs to dynamically access Java objects in a JVM. + It is required in order to use Zeppelin [dynamic forms](http://zeppelin.apache.org/docs/0.6.0-SNAPSHOT/manual/dynamicform.html) feature. + + - bootstrap process + + Interpreter environment is setup with thex [bootstrap.py](https://github.com/apache/zeppelin/blob/master/python/src/main/resources/bootstrap.py) + It defines `help()` and `z` convenience functions + + +### Dev prerequisites + + * Python 2 or 3 installed with py4j (0.9.2) and matplotlib (1.31 or later) installed on each + + * Tests only checks the interpreter logic and starts any Python process! Python process is mocked with a class that simply output it input. + + * Code wrote in `bootstrap.py` and `bootstrap_input.py` should always be Python 2 and 3 compliant. + +* Use PEP8 convention for python code. + +### Technical overview + + * When interpreter is starting it launches a python process inside a Java ProcessBuilder. Python is started with -i (interactive mode) and -u (unbuffered stdin, stdout and stderr) options. Thus the interpreter has a "sleeping" python process. + + * Interpreter sends command to python with a Java `outputStreamWiter` and read from an `InputStreamReader`. To know when stop reading stdout, interpreter sends `print "*!?flush reader!?*"`after each command and reads stdout until he receives back the `*!?flush reader!?*`. + + * When interpreter is starting, it sends some Python code (bootstrap.py and bootstrap_input.py) to initialize default behavior and functions (`help(), z.input()...`). bootstrap_input.py is sent only if py4j library is detected inside Python process. + + * [Py4J](https://www.py4j.org/) python and java libraries is used to load Input zeppelin Java class into the python process (make java code with python code !). Therefore the interpreter can directly create Zeppelin input form inside the Python process (and eventually with some python variable already defined). JVM opens a random open port to be accessible from python process. + + * JavaBuilder can't send SIGINT signal to interrupt paragraph execution. Therefore interpreter directly send a `kill SIGINT PID` to python process to interrupt execution. Python process catch SIGINT signal with some code defined in bootstrap.py + + * Matplotlib display feature is made with SVG export (in string) and then displays it with html code. diff --git a/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java b/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java index e80548f0eaf..0253501db4c 100644 --- a/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java +++ b/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java @@ -52,11 +52,12 @@ public class PythonInterpreter extends Interpreter { private Integer port; private GatewayServer gatewayServer; - PythonProcess process = null; private long pythonPid; private Boolean py4J = false; private InterpreterContext context; + PythonProcess process = null; + static { Interpreter.register( "python", @@ -69,17 +70,13 @@ public class PythonInterpreter extends Interpreter { ); } - public PythonInterpreter(Properties property) { super(property); } @Override public void open() { - logger.info("Starting Python interpreter ....."); - - logger.info("Python path is set to:" + property.getProperty(ZEPPELIN_PYTHON)); process = getPythonProcess(); @@ -116,13 +113,10 @@ public void open() { "initialize Zeppelin inputs in python process", e); } } - - } @Override public void close() { - logger.info("closing Python interpreter ....."); try { if (process != null) { @@ -134,13 +128,10 @@ public void close() { } catch (IOException e) { logger.error("Can't close the interpreter", e); } - } - @Override public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) { - this.context = contextInterpreter; String output = sendCommandToPython(cmd); @@ -148,7 +139,6 @@ public InterpreterResult interpret(String cmd, InterpreterContext contextInterpr .replaceAll("\\.\\.\\.", "").trim()); } - @Override public void cancel(InterpreterContext context) { try { @@ -180,10 +170,11 @@ public List completion(String buf, int cursor) { } public PythonProcess getPythonProcess() { - if (process == null) + if (process == null) { return new PythonProcess(getProperty(ZEPPELIN_PYTHON)); - else + } else { return process; + } } private Job getRunningJob(String paragraphId) { @@ -192,6 +183,7 @@ private Job getRunningJob(String paragraphId) { for (Job job : jobsRunning) { if (job.getId().equals(paragraphId)) { foundJob = job; + break; } } return foundJob; @@ -199,21 +191,17 @@ private Job getRunningJob(String paragraphId) { private String sendCommandToPython(String cmd) { - String output = ""; - logger.info("Sending : \n " + cmd); + logger.info("Sending : \n" + (cmd.length() > 200 ? cmd.substring(0, 120) + "..." : cmd)); try { output = process.sendAndGetResult(cmd); } catch (IOException e) { logger.error("Error when sending commands to python process", e); } - return output; } - private void bootStrapInterpreter(String file) throws IOException { - BufferedReader bootstrapReader = new BufferedReader( new InputStreamReader( PythonInterpreter.class.getResourceAsStream(file))); @@ -226,30 +214,25 @@ private void bootStrapInterpreter(String file) throws IOException { if (py4J && port != null && port != -1) { bootstrapCode = bootstrapCode.replaceAll("\\%PORT\\%", port.toString()); } - logger.info("Bootstrap python interpreter with \n " + bootstrapCode); + logger.info("Bootstrap python interpreter with code from \n " + file); sendCommandToPython(bootstrapCode); } - public GUI getGui() { - return context.getGui(); - } public Integer getPy4JPort() { - return port; - } public Boolean isPy4jInstalled() { - String output = sendCommandToPython("\n\nimport py4j\n"); - if (output.contains("ImportError")) + if (output.contains("ImportError")) { return false; - else return true; - + } else { + return true; + } } private int findRandomOpenPortOnAllLocalInterfaces() { diff --git a/python/src/main/java/org/apache/zeppelin/python/PythonProcess.java b/python/src/main/java/org/apache/zeppelin/python/PythonProcess.java index a6712240f46..364d372f366 100644 --- a/python/src/main/java/org/apache/zeppelin/python/PythonProcess.java +++ b/python/src/main/java/org/apache/zeppelin/python/PythonProcess.java @@ -33,15 +33,15 @@ * Object encapsulated interactive * Python process (REPL) used by python interpreter */ - public class PythonProcess { - Logger logger = LoggerFactory.getLogger(PythonProcess.class); + InputStream stdout; OutputStream stdin; BufferedWriter writer; BufferedReader reader; Process process; + private String binPath; private long pid; @@ -64,22 +64,17 @@ public void open() throws IOException { logger.warn("Can't find python pid process", e); pid = -1; } - - } public void close() throws IOException { - process.destroy(); reader.close(); writer.close(); stdin.close(); stdout.close(); - } public void interrupt() throws IOException { - if (pid > -1) { logger.info("Sending SIGINT signal to PID : " + pid); Runtime.getRuntime().exec("kill -SIGINT " + pid); @@ -87,12 +82,9 @@ public void interrupt() throws IOException { logger.warn("Non UNIX/Linux system, close the interpreter"); close(); } - - } public String sendAndGetResult(String cmd) throws IOException { - writer.write(cmd + "\n\n"); writer.write("print (\"*!?flush reader!?*\")\n\n"); writer.flush(); @@ -106,18 +98,13 @@ public String sendAndGetResult(String cmd) throws IOException { output += "Syntax error ! "; break; } - output += "\r" + line + "\n"; } - return output; - } - private long findPid() throws NoSuchFieldException, IllegalAccessException { long pid = -1; - if (process.getClass().getName().equals("java.lang.UNIXProcess")) { Field f = process.getClass().getDeclaredField("pid"); f.setAccessible(true); @@ -130,4 +117,5 @@ private long findPid() throws NoSuchFieldException, IllegalAccessException { public long getPid() { return pid; } + } diff --git a/python/src/main/resources/bootstrap.py b/python/src/main/resources/bootstrap.py index ee199f4710c..4f0dc5e09f4 100644 --- a/python/src/main/resources/bootstrap.py +++ b/python/src/main/resources/bootstrap.py @@ -14,11 +14,12 @@ # limitations under the License. # PYTHON 2 / 3 comptability : -# bootstrap.py must be runnable with Python 2 and 3 +# bootstrap.py must be runnable with Python 2 or 3 # Remove interactive mode displayhook import sys import signal + try: import StringIO as io except ImportError: @@ -26,14 +27,12 @@ sys.displayhook = lambda x: None - def intHandler(signum, frame): # Set the signal handler print ("Paragraph interrupted") raise KeyboardInterrupt() signal.signal(signal.SIGINT, intHandler) - def help(): print ('%html') print ('

Python Interpreter help

') @@ -72,9 +71,7 @@ def help(): print('''
zeppelin_show(plt,width='50px')
 zeppelin_show(plt,height='150px') 
''') - # Matplotlib show function - def zeppelin_show(p, width="0", height="0"): img = io.StringIO() p.savefig(img, format='svg') @@ -88,10 +85,8 @@ def zeppelin_show(p, width="0", height="0"): style += 'height:'+height print("%html
" + img.read() + "
") - # If py4j is detected, these class will be override # with the implementation in bootstrap_input.py - class PyZeppelinContext(): errorMsg = "You must install py4j Python module " \ "(pip install py4j) to use Zeppelin dynamic forms features" diff --git a/python/src/main/resources/bootstrap_input.py b/python/src/main/resources/bootstrap_input.py index 822434bb035..402df7e9a6c 100644 --- a/python/src/main/resources/bootstrap_input.py +++ b/python/src/main/resources/bootstrap_input.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - from py4j.java_gateway import JavaGateway from py4j.java_gateway import java_import, JavaGateway, GatewayClient @@ -21,14 +20,16 @@ gateway = JavaGateway(client) java_import(gateway.jvm, "org.apache.zeppelin.display.Input") - class PyZeppelinContext(): paramOption = gateway.jvm.org.apache.zeppelin.display.Input.ParamOption javaList = gateway.jvm.java.util.ArrayList + def __init__(self, zc): self.z = zc + def input(self, name, defaultValue=""): return self.z.getGui().input(name, defaultValue) + def select(self, name, options, defaultValue=""): javaOptions = gateway.new_array(self.paramOption, len(options)) i = 0 @@ -36,6 +37,7 @@ def select(self, name, options, defaultValue=""): javaOptions[i] = self.paramOption(tuple[0], tuple[1]) i += 1 return self.z.getGui().select(name, defaultValue, javaOptions) + def checkbox(self, name, options, defaultChecked=[]): javaOptions = gateway.new_array(self.paramOption, len(options)) i = 0 @@ -47,5 +49,4 @@ def checkbox(self, name, options, defaultChecked=[]): javaDefaultCheck.append(check) return self.z.getGui().checkbox(name, javaDefaultCheck, javaOptions) - z = PyZeppelinContext(gateway.entry_point)