Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0e7ee1c
minor ui improvement
Oct 14, 2016
188eeae
update screenshot
Oct 14, 2016
465c51a
ZEPPELIN-335. Pig Interpreter
zjffdu Oct 11, 2016
3afd444
[Zeppelin-1540] fix note url input placeholder
khalidhuseynov Oct 13, 2016
e5f211b
[ZEPPELIN-1550] fixed 'add from URL' button
yunho0130 Oct 14, 2016
201d601
[ZEPPELIN-1552] Search button goes to next line when display's width …
hyonzin Oct 15, 2016
58cdba2
[ZEPPELIN-916] Apply new mechanism to KylinInterpreter
jongyoul Oct 14, 2016
c67bd6d
ZEPPELIN-1423 - Allow users to specify pre/post-execute hooks for int…
agoodm Oct 6, 2016
8ea16ab
[ZEPPELIN-1532] Remove map visualization from source tree, because of…
Leemoonsoo Oct 10, 2016
b6e3c8e
[Zeppelin-1557] Graceful storage exception handling
khalidhuseynov Oct 17, 2016
b77f9ea
[ZEPPELIN-1437, 1438] Multi-user note management - user aware reload …
khalidhuseynov Sep 27, 2016
6e3a1d2
[ZEPPELIN-1487] Resolve possible NPEs in storage sync mechanism
khalidhuseynov Sep 22, 2016
1214463
[ZEPPELIN-1542] Cleanup of Note created during jUnit test in zeppeli…
raja-imaginea Oct 14, 2016
abf0470
[ZEPPELIN-1534] Does not load dependency library when creating new in…
Oct 11, 2016
fab3e5f
[ZEPPELIN-1537] Elasticsearch improvement for results of aggregations
bbonnin Oct 12, 2016
ab44633
[ZEPPELIN-1314] dump out the R command
snowch Oct 18, 2016
26e67d5
[ZEPPELIN-1544]upgrade spark version to 2.0.1 as profile spark-2.0
LantaoJin Oct 16, 2016
a3a2e4d
[ ZEPPELIN-1559 ] Code Editor slow performance resolve of the many No…
cloverhearts Oct 18, 2016
908b2a7
[ZEPPELIN-1210] Run interpreter per user
cloverhearts Oct 19, 2016
9e9ea3a
[ZEPPELIN-1483] Zeppelin home page list notebooks doesn't show notebo…
prabhjyotsingh Oct 20, 2016
7ef0816
improvement minor ui
Oct 20, 2016
7e771ed
ui fix further improvement
Oct 20, 2016
58893ab
Merge branch 'minor_ui_fix' of github.com:baekhoseok/zeppelin into mi…
Oct 20, 2016
dd2ba6e
minor ui improvement
Oct 14, 2016
5182abf
update screenshot
Oct 14, 2016
205ea6a
improvement minor ui
Oct 20, 2016
20b68f8
ui fix further improvement
Oct 20, 2016
a127854
Merge branch 'minor_ui_fix' of github.com:baekhoseok/zeppelin into mi…
Oct 21, 2016
572623e
Repository button name change
Oct 23, 2016
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions bin/interpreter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,28 @@ elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then
else
echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded"
fi
elif [[ "${INTERPRETER_ID}" == "pig" ]]; then
# autodetect HADOOP_CONF_HOME by heuristic
if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then
if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then
export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop"
elif [[ -d "/etc/hadoop/conf" ]]; then
export HADOOP_CONF_DIR="/etc/hadoop/conf"
fi
fi

if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}"
fi

# autodetect TEZ_CONF_DIR
if [[ -n "${TEZ_CONF_DIR}" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}"
elif [[ -d "/etc/tez/conf" ]]; then
ZEPPELIN_INTP_CLASSPATH+=":/etc/tez/conf"
else
echo "TEZ_CONF_DIR is not set, configuration might not be loaded"
fi
fi

addJarInDirForIntp "${LOCAL_INTERPRETER_REPO}"
Expand Down
1 change: 1 addition & 0 deletions conf/interpreter-list
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ kylin org.apache.zeppelin:zeppelin-kylin:0.6.1 Kylin in
lens org.apache.zeppelin:zeppelin-lens:0.6.1 Lens interpreter
livy org.apache.zeppelin:zeppelin-livy:0.6.1 Livy interpreter
md org.apache.zeppelin:zeppelin-markdown:0.6.1 Markdown support
pig org.apache.zeppelin:zeppelin-pig:0.6.1 Pig interpreter
postgresql org.apache.zeppelin:zeppelin-postgresql:0.6.1 Postgresql interpreter
python org.apache.zeppelin:zeppelin-python:0.6.1 Python interpreter
shell org.apache.zeppelin:zeppelin-shell:0.6.1 Shell command
2 changes: 1 addition & 1 deletion conf/zeppelin-site.xml.template
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@

<property>
<name>zeppelin.interpreters</name>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter</value>
<value>org.apache.zeppelin.spark.SparkInterpreter,org.apache.zeppelin.spark.PySparkInterpreter,org.apache.zeppelin.rinterpreter.RRepl,org.apache.zeppelin.rinterpreter.KnitR,org.apache.zeppelin.spark.SparkRInterpreter,org.apache.zeppelin.spark.SparkSqlInterpreter,org.apache.zeppelin.spark.DepInterpreter,org.apache.zeppelin.markdown.Markdown,org.apache.zeppelin.angular.AngularInterpreter,org.apache.zeppelin.shell.ShellInterpreter,org.apache.zeppelin.file.HDFSFileInterpreter,org.apache.zeppelin.flink.FlinkInterpreter,,org.apache.zeppelin.python.PythonInterpreter,org.apache.zeppelin.lens.LensInterpreter,org.apache.zeppelin.ignite.IgniteInterpreter,org.apache.zeppelin.ignite.IgniteSqlInterpreter,org.apache.zeppelin.cassandra.CassandraInterpreter,org.apache.zeppelin.geode.GeodeOqlInterpreter,org.apache.zeppelin.postgresql.PostgreSqlInterpreter,org.apache.zeppelin.jdbc.JDBCInterpreter,org.apache.zeppelin.kylin.KylinInterpreter,org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter,org.apache.zeppelin.scalding.ScaldingInterpreter,org.apache.zeppelin.alluxio.AlluxioInterpreter,org.apache.zeppelin.hbase.HbaseInterpreter,org.apache.zeppelin.livy.LivySparkInterpreter,org.apache.zeppelin.livy.LivyPySparkInterpreter,org.apache.zeppelin.livy.LivySparkRInterpreter,org.apache.zeppelin.livy.LivySparkSQLInterpreter,org.apache.zeppelin.bigquery.BigQueryInterpreter,org.apache.zeppelin.beam.BeamInterpreter,org.apache.zeppelin.pig.PigInterpreter, org.apache.zeppelin.pig.PigQueryInterpreter</value>
<description>Comma separated interpreter configurations. First interpreter become a default</description>
</property>

Expand Down
3 changes: 1 addition & 2 deletions docs/_includes/themes/zeppelin/_navigation.html
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
<li><a href="{{BASE_PATH}}/interpreter/lens.html">Lens</a></li>
<li><a href="{{BASE_PATH}}/interpreter/livy.html">Livy</a></li>
<li><a href="{{BASE_PATH}}/interpreter/markdown.html">Markdown</a></li>
<li><a href="{{BASE_PATH}}/interpreter/pig.html">Pig</a></li>
<li><a href="{{BASE_PATH}}/interpreter/python.html">Python</a></li>
<li><a href="{{BASE_PATH}}/interpreter/postgresql.html">Postgresql, HAWQ</a></li>
<li><a href="{{BASE_PATH}}/interpreter/r.html">R</a></li>
Expand Down Expand Up @@ -118,8 +119,6 @@
<li><a href="{{BASE_PATH}}/development/howtocontributewebsite.html">How to contribute (website)</a></li>
</ul>
</li>
</ul>
<ul class="nav navbar-nav">
<li>
<a href="{{BASE_PATH}}/search.html" class="nav-search-link">
<span class="fa fa-search nav-search-icon"></span>
Expand Down
4 changes: 4 additions & 0 deletions docs/assets/themes/zeppelin/css/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,10 @@ and (max-width: 1024px) {
.navbar-collapse.collapse {
padding-right: 0;
}

.navbar-fixed-top > .container {
width: 800px;
}
}

/* master branch docs dropdown menu */
Expand Down
Binary file modified docs/assets/themes/zeppelin/img/ui-img/configuration_menu.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/assets/themes/zeppelin/img/ui-img/interpreter_menu.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
97 changes: 97 additions & 0 deletions docs/interpreter/pig.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
---
layout: page
title: "Pig Interpreter for Apache Zeppelin"
description: "Apache Pig is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs."
group: manual
---
{% include JB/setup %}


# Pig Interpreter for Apache Zeppelin

<div id="toc"></div>

## Overview
[Apache Pig](https://pig.apache.org/) is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large data sets.

## Supported interpreter type
- `%pig.script` (default)

All the pig script can run in this type of interpreter, and display type is plain text.

- `%pig.query`

Almost the same as `%pig.script`. The only difference is that you don't need to add alias in the last statement. And the display type is table.

## Supported runtime mode
- Local
- MapReduce
- Tez (Only Tez 0.7 is supported)

## How to use

### How to setup Pig

- Local Mode

Nothing needs to be done for local mode

- MapReduce Mode

HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.

- Tez Mode

HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`.

### How to configure interpreter

At the Interpreters menu, you have to create a new Pig interpreter. Pig interpreter has below properties by default.

<table class="table-configuration">
<tr>
<th>Property</th>
<th>Default</th>
<th>Description</th>
</tr>
<tr>
<td>zeppelin.pig.execType</td>
<td>mapreduce</td>
<td>Execution mode for pig runtime. local | mapreduce | tez </td>
</tr>
<tr>
<td>zeppelin.pig.includeJobStats</td>
<td>false</td>
<td>whether display jobStats info in <code>%pig.script</code></td>
</tr>
<tr>
<td>zeppelin.pig.maxResult</td>
<td>1000</td>
<td>max row number displayed in <code>%pig.query</code></td>
</tr>
</table>

### Example

##### pig

```
%pig

raw_data = load 'dataset/sf_crime/train.csv' using PigStorage(',') as (Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y);
b = group raw_data all;
c = foreach b generate COUNT($1);
dump c;
```

##### pig.query

```
%pig.query

b = foreach raw_data generate Category;
c = group b by Category;
foreach c generate group as category, COUNT($1) as count;
```

Data is shared between `%pig` and `%pig.query`, so that you can do some common work in `%pig`, and do different kinds of query based on the data of `%pig`.
46 changes: 46 additions & 0 deletions docs/manual/interpreters.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,49 @@ interpreter.start()
The above code will start interpreter thread inside your process. Once the interpreter is started you can configure zeppelin to connect to RemoteInterpreter by checking **Connect to existing process** checkbox and then provide **Host** and **Port** on which interpreter porocess is listening as shown in the image below:

<img src="../assets/themes/zeppelin/img/screenshots/existing_interpreter.png" width="450px">


## (Experimental) Interpreter Execution Hooks

Zeppelin allows for users to specify additional code to be executed by an interpreter at pre and post-paragraph code execution. This is primarily useful if you need to run the same set of code for all of the paragraphs within your notebook at specific times. Currently, this feature is only available for the spark and pyspark interpreters. To specify your hook code, you may use '`z.registerHook()`. For example, enter the following into one paragraph:

```python
%pyspark
z.registerHook("post_exec", "print 'This code should be executed before the parapgraph code!'")
z.registerHook("pre_exec", "print 'This code should be executed after the paragraph code!'")
```

These calls will not take into effect until the next time you run a paragraph. In another paragraph, enter
```python
%pyspark
print "This code should be entered into the paragraph by the user!"
```

The output should be:
```
This code should be executed before the paragraph code!
This code should be entered into the paragraph by the user!
This code should be executed after the paragraph code!
```

If you ever need to know the hook code, use `z.getHook()`:
```python
%pyspark
print z.getHook("post_exec")
```
```
print 'This code should be executed after the paragraph code!'
```
Any call to `z.registerHook()` will automatically overwrite what was previously registered. To completely unregister a hook event, use `z.unregisterHook(eventCode)`. Currently only `"post_exec"` and `"pre_exec"` are valid event codes for the Zeppelin Hook Registry system.

Finally, the hook registry is internally shared by other interpreters in the same group. This would allow for hook code for one interpreter REPL to be set by another as follows:

```scala
%spark
z.unregisterHook("post_exec", "pyspark")
```
The API is identical for both the spark (scala) and pyspark (python) implementations.

### Caveats
Calls to `z.registerHook("pre_exec", ...)` should be made with care. If there are errors in your specified hook code, this will cause the interpreter REPL to become unable to execute any code pass the pre-execute stage making it impossible for direct calls to `z.unregisterHook()` to take into effect. Current workarounds include calling `z.unregisterHook()` from a different interpreter REPL in the same interpreter group (see above) or manually restarting the interpreter group in the UI.

Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
Expand All @@ -35,7 +36,6 @@
import org.apache.commons.lang.StringUtils;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.elasticsearch.action.delete.DeleteResponse;
Expand All @@ -48,6 +48,8 @@
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
Expand Down Expand Up @@ -437,14 +439,37 @@ else if (agg instanceof InternalSingleBucketAggregation) {
resMsg = XContentHelper.toString((InternalSingleBucketAggregation) agg).toString();
}
else if (agg instanceof InternalMultiBucketAggregation) {
final StringBuffer buffer = new StringBuffer("key\tdoc_count");

final Set<String> headerKeys = new HashSet<>();
final List<Map<String, Object>> buckets = new LinkedList<>();
final InternalMultiBucketAggregation multiBucketAgg = (InternalMultiBucketAggregation) agg;

for (MultiBucketsAggregation.Bucket bucket : multiBucketAgg.getBuckets()) {
buffer.append("\n")
.append(bucket.getKeyAsString())
.append("\t")
.append(bucket.getDocCount());
try {
final XContentBuilder builder = XContentFactory.jsonBuilder();
bucket.toXContent(builder, null);
final Map<String, Object> bucketMap = JsonFlattener.flattenAsMap(builder.string());
headerKeys.addAll(bucketMap.keySet());
buckets.add(bucketMap);
}
catch (IOException e) {
logger.error("Processing bucket: " + e.getMessage(), e);
}
}

final StringBuffer buffer = new StringBuffer();
final String[] keys = headerKeys.toArray(new String[0]);
for (String key: keys) {
buffer.append("\t" + key);
}
buffer.deleteCharAt(0);

for (Map<String, Object> bucket : buckets) {
buffer.append("\n");

for (String key: keys) {
buffer.append(bucket.get(key)).append("\t");
}
buffer.deleteCharAt(buffer.length() - 1);
}

resType = InterpreterResult.Type.TABLE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@
import static org.junit.Assert.assertEquals;

import java.io.IOException;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.UUID;

import org.apache.commons.lang.math.RandomUtils;
import org.apache.zeppelin.interpreter.InterpreterResult;
Expand Down Expand Up @@ -178,6 +183,11 @@ public void testAgg() {
res = interpreter.interpret("search /logs { \"aggs\" : { \"status_count\" : " +
" { \"terms\" : { \"field\" : \"status\" } } } }", null);
assertEquals(Code.SUCCESS, res.code());

res = interpreter.interpret("search /logs { \"aggs\" : { " +
" \"length\" : { \"terms\": { \"field\": \"status\" }, " +
" \"aggs\" : { \"sum_length\" : { \"sum\" : { \"field\" : \"content_length\" } }, \"sum_status\" : { \"sum\" : { \"field\" : \"status\" } } } } } }", null);
assertEquals(Code.SUCCESS, res.code());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,24 +56,6 @@ public class KylinInterpreter extends Interpreter {
static final Pattern KYLIN_TABLE_FORMAT_REGEX_LABEL = Pattern.compile("\"label\":\"(.*?)\"");
static final Pattern KYLIN_TABLE_FORMAT_REGEX = Pattern.compile("\"results\":\\[\\[\"(.*?)\"]]");

static {
Interpreter.register(
"kylin",
"kylin",
KylinInterpreter.class.getName(),
new InterpreterPropertyBuilder()
.add(KYLIN_USERNAME, "ADMIN", "username for kylin user")
.add(KYLIN_PASSWORD, "KYLIN", "password for kylin user")
.add(KYLIN_QUERY_API_URL, "http://<host>:<port>/kylin/api/query", "Kylin API.")
.add(KYLIN_QUERY_PROJECT, "default", "kylin project name")
.add(KYLIN_QUERY_OFFSET, "0", "kylin query offset")
.add(KYLIN_QUERY_LIMIT, "5000", "kylin query limit")
.add(KYLIN_QUERY_ACCEPT_PARTIAL, "true", "The kylin query partial flag").build());
}




public KylinInterpreter(Properties property) {
super(property);
}
Expand Down
54 changes: 54 additions & 0 deletions kylin/src/main/resources/interpreter-setting.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
[
{
"group": "kylin",
"name": "kylin",
"className": "org.apache.zeppelin.kylin.KylinInterpreter",
"properties": {
"kylin.api.url": {
"envName": null,
"propertyName": "kylin.api.url",
"defaultValue": "http://localhost:7070/kylin/api/query",
"description": "Kylin API"
},
"kylin.api.user": {
"envName": null,
"propertyName": "kylin.api.user",
"defaultValue": "ADMIN",
"description": "username for kylin user"
},
"kylin.api.password": {
"envName": null,
"propertyName": "kylin.api.password",
"defaultValue": "KYLIN",
"description": "password for kylin user"
},
"kylin.query.project": {
"envName": null,
"propertyName": "kylin.query.project",
"defaultValue": "default",
"description": "kylin project name"
},
"kylin.query.offset": {
"envName": null,
"propertyName": "kylin.query.offset",
"defaultValue": "0",
"description": "kylin query offset"
},
"kylin.query.limit": {
"envName": null,
"propertyName": "kylin.query.limit",
"defaultValue": "5000",
"description": "kylin query limit"
},
"kylin.query.ispartial": {
"envName": null,
"propertyName": "kylin.query.ispartial",
"defaultValue": "true",
"description": "The kylin query partial flag"
}
},
"editor": {
"language": "sql"
}
}
]
Loading