apache · baekhoseok · Oct 14, 2016 · Oct 14, 2016 · Oct 11, 2016 · Oct 13, 2016
diff --git a/bin/interpreter.sh b/bin/interpreter.sh
@@ -149,6 +149,28 @@ elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
   else
     echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded"
   fi
+elif [[ "${INTERPRETER_ID}" == "pig" ]]; then
+   # autodetect HADOOP_CONF_HOME by heuristic
+  if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then
+    if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then
+      export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
+    elif [[ -d "/etc/hadoop/conf" ]]; then
+      export HADOOP_CONF_DIR="/etc/hadoop/conf"
+    fi
+  fi
+
+  if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
+    ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
+  fi
+
+  # autodetect TEZ_CONF_DIR
+  if [[ -n "${TEZ_CONF_DIR}" ]]; then
+    ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}"
+  elif [[ -d "/etc/tez/conf" ]]; then
+    ZEPPELIN_INTP_CLASSPATH+=":/etc/tez/conf"
+  else
+    echo "TEZ_CONF_DIR is not set, configuration might not be loaded"
+  fi
 fi
 
 addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}"

diff --git a/conf/interpreter-list b/conf/interpreter-list
@@ -32,6 +32,7 @@ kylin           org.apache.zeppelin:zeppelin-kylin:0.6.1                Kylin in
 lens            org.apache.zeppelin:zeppelin-lens:0.6.1                 Lens interpreter
 livy            org.apache.zeppelin:zeppelin-livy:0.6.1                 Livy interpreter
 md              org.apache.zeppelin:zeppelin-markdown:0.6.1             Markdown support
+pig             org.apache.zeppelin:zeppelin-pig:0.6.1                  Pig interpreter
 postgresql      org.apache.zeppelin:zeppelin-postgresql:0.6.1           Postgresql interpreter
 python          org.apache.zeppelin:zeppelin-python:0.6.1               Python interpreter
 shell           org.apache.zeppelin:zeppelin-shell:0.6.1                Shell command
diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template
@@ -190,7 +190,7 @@
 
 <property>
   <name>zeppelin.interpreters</name>
-  <value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter</value>
+  <value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter,org.apache.zeppelin.pig.PigInterpreter, org.apache.zeppelin.pig.PigQueryInterpreter</value>
   <description>Comma separated interpreter configurations. First interpreter become a default</description>
 </property>
 

diff --git a/docs/_includes/themes/zeppelin/_navigation.html b/docs/_includes/themes/zeppelin/_navigation.html
@@ -62,6 +62,7 @@
                 <li><a href="{{BASE_PATH}}/interpreter/lens.html">Lens</a></li>
                 <li><a href="{{BASE_PATH}}/interpreter/livy.html">Livy</a></li>
                 <li><a href="{{BASE_PATH}}/interpreter/markdown.html">Markdown</a></li>
+                <li><a href="{{BASE_PATH}}/interpreter/pig.html">Pig</a></li>
                 <li><a href="{{BASE_PATH}}/interpreter/python.html">Python</a></li>
                 <li><a href="{{BASE_PATH}}/interpreter/postgresql.html">Postgresql, HAWQ</a></li>
                 <li><a href="{{BASE_PATH}}/interpreter/r.html">R</a></li>
@@ -118,8 +119,6 @@
                 <li><a href="{{BASE_PATH}}/development/howtocontributewebsite.html">How to contribute (website)</a></li>
               </ul>
             </li>
-          </ul>
-          <ul class="nav navbar-nav">
             <li>
               <a href="{{BASE_PATH}}/search.html" class="nav-search-link">
                 <span class="fa fa-search nav-search-icon"></span>

diff --git a/docs/assets/themes/zeppelin/css/style.css b/docs/assets/themes/zeppelin/css/style.css
@@ -619,6 +619,10 @@ and (max-width: 1024px) {
   .navbar-collapse.collapse {
     padding-right: 0;
   }
+
+  .navbar-fixed-top > .container {
+    width: 800px;
+  }
 }
 
 /* master branch docs dropdown menu */

diff --git a/docs/assets/themes/zeppelin/img/ui-img/configuration_menu.png b/docs/assets/themes/zeppelin/img/ui-img/configuration_menu.png
diff --git a/docs/assets/themes/zeppelin/img/ui-img/interpreter_menu.png b/docs/assets/themes/zeppelin/img/ui-img/interpreter_menu.png
diff --git a/docs/interpreter/pig.md b/docs/interpreter/pig.md
@@ -0,0 +1,97 @@
+---
+layout: page
+title: "Pig Interpreter for Apache Zeppelin"
+description: "Apache Pig is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs."
+group: manual
+---
+{% include JB/setup %}
+
+
+# Pig Interpreter for Apache Zeppelin
+
+<div id="toc"></div>
+
+## Overview
+[Apache Pig](https://pig.apache.org/) is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large data sets.
+
+## Supported interpreter type
+  - `%pig.script` (default)
+
+    All the pig script can run in this type of interpreter, and display type is plain text.
+
+  - `%pig.query`
+
+    Almost the same as `%pig.script`. The only difference is that you don't need to add alias in the last statement. And the display type is table.   
+
+## Supported runtime mode
+  - Local
+  - MapReduce
+  - Tez  (Only Tez 0.7 is supported)
+
+## How to use
+
+### How to setup Pig
+
+- Local Mode
+
+    Nothing needs to be done for local mode
+
+- MapReduce Mode
+
+    HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
+
+- Tez Mode
+
+    HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.
+
+### How to configure interpreter
+
+At the Interpreters menu, you have to create a new Pig interpreter. Pig interpreter has below properties by default.
+
+<table class="table-configuration">
+    <tr>
+        <th>Property</th>
+        <th>Default</th>
+        <th>Description</th>
+    </tr>
+    <tr>
+        <td>zeppelin.pig.execType</td>
+        <td>mapreduce</td>
+        <td>Execution mode for pig runtime. local | mapreduce | tez </td>
+    </tr>
+    <tr>
+        <td>zeppelin.pig.includeJobStats</td>
+        <td>false</td>
+        <td>whether display jobStats info in <code>%pig.script</code></td>
+    </tr>
+    <tr>
+        <td>zeppelin.pig.maxResult</td>
+        <td>1000</td>
+        <td>max row number displayed in <code>%pig.query</code></td>
+    </tr>
+</table>  
+
+### Example
+
+##### pig
+
+```
+%pig
+
+raw_data = load 'dataset/sf_crime/train.csv' using PigStorage(',') as (Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y);
+b = group raw_data all;
+c = foreach b generate COUNT($1);
+dump c;
+```
+
+##### pig.query
+
+```
+%pig.query
+
+b = foreach raw_data generate Category;
+c = group b by Category;
+foreach c generate group as category, COUNT($1) as count;
+```
+
+Data is shared between `%pig` and `%pig.query`, so that you can do some common work in `%pig`, and do different kinds of query based on the data of `%pig`.
diff --git a/docs/manual/interpreters.md b/docs/manual/interpreters.md
@@ -82,3 +82,49 @@ interpreter.start()
 The above code will start interpreter thread inside your process. Once the interpreter is started you can configure zeppelin to connect to RemoteInterpreter by checking **Connect to existing process** checkbox and then provide **Host** and **Port** on which interpreter porocess is listening as shown in the image below:
 
 <img src="../assets/themes/zeppelin/img/screenshots/existing_interpreter.png" width="450px">
+
+
+## (Experimental) Interpreter Execution Hooks
+
+Zeppelin allows for users to specify additional code to be executed by an interpreter at pre and post-paragraph code execution. This is primarily useful if you need to run the same set of code for all of the paragraphs within your notebook at specific times. Currently, this feature is only available for the spark and pyspark interpreters. To specify your hook code, you may use '`z.registerHook()`. For example, enter the following into one paragraph:
+
+```python
+%pyspark
+z.registerHook("post_exec", "print 'This code should be executed before the parapgraph code!'")
+z.registerHook("pre_exec", "print 'This code should be executed after the paragraph code!'")
+```
+
+These calls will not take into effect until the next time you run a paragraph. In another paragraph, enter
+```python
+%pyspark
+print "This code should be entered into the paragraph by the user!"
+```
+
+The output should be:
+```
+This code should be executed before the paragraph code!
+This code should be entered into the paragraph by the user!
+This code should be executed after the paragraph code!
+```
+
+If you ever need to know the hook code, use `z.getHook()`:
+```python
+%pyspark
+print z.getHook("post_exec")
+```
+```
+print 'This code should be executed after the paragraph code!'
+```
+Any call to `z.registerHook()` will automatically overwrite what was previously registered. To completely unregister a hook event, use `z.unregisterHook(eventCode)`. Currently only `"post_exec"` and `"pre_exec"` are valid event codes for the Zeppelin Hook Registry system.
+
+Finally, the hook registry is internally shared by other interpreters in the same group. This would allow for hook code for one interpreter REPL to be set by another as follows:
+
+```scala
+%spark
+z.unregisterHook("post_exec", "pyspark")
+```
+The API is identical for both the spark (scala) and pyspark (python) implementations.
+
+### Caveats
+Calls to `z.registerHook("pre_exec", ...)` should be made with care. If there are errors in your specified hook code, this will cause the interpreter REPL to become unable to execute any code pass the pre-execute stage making it impossible for direct calls to `z.unregisterHook()` to take into effect. Current workarounds include calling `z.unregisterHook()` from a different interpreter REPL in the same interpreter group (see above) or manually restarting the interpreter group in the UI. 
+
diff --git a/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java b/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java
@@ -22,6 +22,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
@@ -35,7 +36,6 @@
 import org.apache.commons.lang.StringUtils;
 import org.apache.zeppelin.interpreter.Interpreter;
 import org.apache.zeppelin.interpreter.InterpreterContext;
-import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder;
 import org.apache.zeppelin.interpreter.InterpreterResult;
 import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
 import org.elasticsearch.action.delete.DeleteResponse;
@@ -48,6 +48,8 @@
 import org.elasticsearch.client.transport.TransportClient;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.transport.InetSocketTransportAddress;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.search.SearchHit;
@@ -437,14 +439,37 @@ else if (agg instanceof InternalSingleBucketAggregation) {
       resMsg = XContentHelper.toString((InternalSingleBucketAggregation) agg).toString();
     }
     else if (agg instanceof InternalMultiBucketAggregation) {
-      final StringBuffer buffer = new StringBuffer("key\tdoc_count");
-
+      final Set<String> headerKeys = new HashSet<>();
+      final List<Map<String, Object>> buckets = new LinkedList<>();
       final InternalMultiBucketAggregation multiBucketAgg = (InternalMultiBucketAggregation) agg;
+
       for (MultiBucketsAggregation.Bucket bucket : multiBucketAgg.getBuckets()) {
-        buffer.append("\n")
-          .append(bucket.getKeyAsString())
-          .append("\t")
-          .append(bucket.getDocCount());
+        try {
+          final XContentBuilder builder = XContentFactory.jsonBuilder();
+          bucket.toXContent(builder, null);
+          final Map<String, Object> bucketMap = JsonFlattener.flattenAsMap(builder.string());
+          headerKeys.addAll(bucketMap.keySet());
+          buckets.add(bucketMap);
+        }
+        catch (IOException e) {
+          logger.error("Processing bucket: " + e.getMessage(), e);
+        }
+      }
+
+      final StringBuffer buffer = new StringBuffer();
+      final String[] keys = headerKeys.toArray(new String[0]);
+      for (String key: keys) {
+        buffer.append("\t" + key);
+      }
+      buffer.deleteCharAt(0);
+
+      for (Map<String, Object> bucket : buckets) {
+        buffer.append("\n");
+
+        for (String key: keys) {
+          buffer.append(bucket.get(key)).append("\t");
+        }
+        buffer.deleteCharAt(buffer.length() - 1);
       }
 
       resType = InterpreterResult.Type.TABLE;

diff --git a/...csearch/src/test/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreterTest.java b/...csearch/src/test/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreterTest.java
@@ -21,7 +21,12 @@
 import static org.junit.Assert.assertEquals;
 
 import java.io.IOException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.List;
+import java.util.Properties;
+import java.util.UUID;
 
 import org.apache.commons.lang.math.RandomUtils;
 import org.apache.zeppelin.interpreter.InterpreterResult;
@@ -178,6 +183,11 @@ public void testAgg() {
     res = interpreter.interpret("search /logs { \"aggs\" : { \"status_count\" : " +
             " { \"terms\" : { \"field\" : \"status\" } } } }", null);
     assertEquals(Code.SUCCESS, res.code());
+
+    res = interpreter.interpret("search /logs { \"aggs\" : { " +
+            " \"length\" : { \"terms\": { \"field\": \"status\" }, " +
+            "   \"aggs\" : { \"sum_length\" : { \"sum\" : { \"field\" : \"content_length\" } }, \"sum_status\" : { \"sum\" : { \"field\" : \"status\" } } } } } }", null);
+    assertEquals(Code.SUCCESS, res.code());
   }
 
   @Test

diff --git a/kylin/src/main/java/org/apache/zeppelin/kylin/KylinInterpreter.java b/kylin/src/main/java/org/apache/zeppelin/kylin/KylinInterpreter.java
@@ -56,24 +56,6 @@ public class KylinInterpreter extends Interpreter {
   static final Pattern KYLIN_TABLE_FORMAT_REGEX_LABEL = Pattern.compile("\"label\":\"(.*?)\"");
   static final Pattern KYLIN_TABLE_FORMAT_REGEX = Pattern.compile("\"results\":\\[\\[\"(.*?)\"]]");
 
-  static {
-    Interpreter.register(
-        "kylin",
-        "kylin",
-        KylinInterpreter.class.getName(),
-        new InterpreterPropertyBuilder()
-            .add(KYLIN_USERNAME, "ADMIN", "username for kylin user")
-            .add(KYLIN_PASSWORD, "KYLIN", "password for kylin user")
-            .add(KYLIN_QUERY_API_URL, "http://<host>:<port>/kylin/api/query", "Kylin API.")
-            .add(KYLIN_QUERY_PROJECT, "default", "kylin project name")
-            .add(KYLIN_QUERY_OFFSET, "0", "kylin query offset")
-            .add(KYLIN_QUERY_LIMIT, "5000", "kylin query limit")
-            .add(KYLIN_QUERY_ACCEPT_PARTIAL, "true", "The kylin query partial flag").build());
-  }
-
-
-
-
   public KylinInterpreter(Properties property) {
     super(property);
   }

diff --git a/kylin/src/main/resources/interpreter-setting.json b/kylin/src/main/resources/interpreter-setting.json
@@ -0,0 +1,54 @@
+[
+  {
+    "group": "kylin",
+    "name": "kylin",
+    "className": "org.apache.zeppelin.kylin.KylinInterpreter",
+    "properties": {
+      "kylin.api.url": {
+        "envName": null,
+        "propertyName": "kylin.api.url",
+        "defaultValue": "http://localhost:7070/kylin/api/query",
+        "description": "Kylin API"
+      },
+      "kylin.api.user": {
+        "envName": null,
+        "propertyName": "kylin.api.user",
+        "defaultValue": "ADMIN",
+        "description": "username for kylin user"
+      },
+      "kylin.api.password": {
+        "envName": null,
+        "propertyName": "kylin.api.password",
+        "defaultValue": "KYLIN",
+        "description": "password for kylin user"
+      },
+      "kylin.query.project": {
+        "envName": null,
+        "propertyName": "kylin.query.project",
+        "defaultValue": "default",
+        "description": "kylin project name"
+      },
+      "kylin.query.offset": {
+        "envName": null,
+        "propertyName": "kylin.query.offset",
+        "defaultValue": "0",
+        "description": "kylin query offset"
+      },
+      "kylin.query.limit": {
+        "envName": null,
+        "propertyName": "kylin.query.limit",
+        "defaultValue": "5000",
+        "description": "kylin query limit"
+      },
+      "kylin.query.ispartial": {
+        "envName": null,
+        "propertyName": "kylin.query.ispartial",
+        "defaultValue": "true",
+        "description": "The kylin query partial flag"
+      }
+    },
+    "editor": {
+      "language": "sql"
+    }
+  }
+]