Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
335 changes: 219 additions & 116 deletions docs/interpreter/spark.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/usage/interpreter/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ Before 0.8.0, Zeppelin didn't have lifecycle management for interpreters. Users
Users can change this threshold via the `zeppelin.interpreter.lifecyclemanager.timeout.threshold` setting. `TimeoutLifecycleManager` is the default lifecycle manager, and users can change it via `zeppelin.interpreter.lifecyclemanager.class`.


## Generic ConfInterpreter
## Inline Generic ConfInterpreter

Zeppelin's interpreter setting is shared by all users and notes, if you want to have different settings, you have to create a new interpreter, e.g. you can create `spark_jar1` for running Spark with dependency jar1 and `spark_jar2` for running Spark with dependency jar2.
This approach works, but is not particularly convenient. `ConfInterpreter` can provide more fine-grained control on interpreter settings and more flexibility.
Expand Down
125 changes: 81 additions & 44 deletions spark/interpreter/src/main/resources/interpreter-setting.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,62 +5,99 @@
"className": "org.apache.zeppelin.spark.SparkInterpreter",
"defaultInterpreter": true,
"properties": {
"SPARK_HOME": {
"envName": "SPARK_HOME",
"propertyName": "SPARK_HOME",
"defaultValue": "",
"description": "Location of spark distribution",
"type": "string"
},
"master": {
"envName": "",
"propertyName": "spark.master",
"defaultValue": "local[*]",
"description": "Spark master uri. ex) spark://master_host:7077",
"type": "string"
},
"spark.app.name": {
"envName": "",
"propertyName": "spark.app.name",
"defaultValue": "Zeppelin",
"description": "The name of spark application.",
"type": "string"
},
"spark.driver.cores": {
"envName": "",
"propertyName": "spark.driver.cores",
"defaultValue": "1",
"description": "Number of cores to use for the driver process, only in cluster mode.",
"type": "int"
},
"spark.driver.memory": {
"envName": "",
"propertyName": "spark.driver.memory",
"defaultValue": "1g",
"description": "Amount of memory to use for the driver process, i.e. where SparkContext is initialized, in the same format as JVM memory strings with a size unit suffix (\"k\", \"m\", \"g\" or \"t\") (e.g. 512m, 2g).",
"type": "string"
},
"spark.executor.cores": {
"envName": null,
"propertyName": "spark.executor.cores",
"defaultValue": "1",
"description": "The number of cores to use on each executor",
"type": "int"
},
"spark.executor.memory": {
"envName": null,
"propertyName": "spark.executor.memory",
"defaultValue": "",
"description": "Executor memory per worker instance. ex) 512m, 32g",
"type": "string"
},
"args": {
"spark.files": {
"envName": null,
"propertyName": null,
"propertyName": "spark.files",
"defaultValue": "",
"description": "spark commandline args",
"type": "textarea"
"description": "Comma-separated list of files to be placed in the working directory of each executor. Globs are allowed.",
"type": "string"
},
"spark.jars": {
"envName": null,
"propertyName": "spark.jars",
"defaultValue": "",
"description": "Comma-separated list of jars to include on the driver and executor classpaths. Globs are allowed.",
"type": "string"
},
"spark.jars.packages": {
"envName": null,
"propertyName": "spark.jars.packages",
"defaultValue": "",
"description": "Comma-separated list of Maven coordinates of jars to include on the driver and executor classpaths. The coordinates should be groupId:artifactId:version. If spark.jars.ivySettings is given artifacts will be resolved according to the configuration in the file, otherwise artifacts will be searched for in the local maven repo, then maven central and finally any additional remote repositories given by the command-line option --repositories.",
"type": "string"
},
"zeppelin.spark.useHiveContext": {
"envName": "ZEPPELIN_SPARK_USEHIVECONTEXT",
"envName": null,
"propertyName": "zeppelin.spark.useHiveContext",
"defaultValue": true,
"description": "Use HiveContext instead of SQLContext if it is true.",
"description": "Use HiveContext instead of SQLContext if it is true. Enable hive for SparkSession.",
"type": "checkbox"
},
"spark.app.name": {
"envName": "SPARK_APP_NAME",
"propertyName": "spark.app.name",
"defaultValue": "Zeppelin",
"description": "The name of spark application.",
"type": "string"
},

"zeppelin.spark.printREPLOutput": {
"envName": null,
"propertyName": "zeppelin.spark.printREPLOutput",
"defaultValue": true,
"description": "Print REPL output",
"description": "Print scala REPL output",
"type": "checkbox"
},
"spark.cores.max": {
"envName": null,
"propertyName": "spark.cores.max",
"defaultValue": "",
"description": "Total number of cores to use. Empty value uses all available core.",
"type": "number"
},
"zeppelin.spark.maxResult": {
"envName": "ZEPPELIN_SPARK_MAXRESULT",
"envName": null,
"propertyName": "zeppelin.spark.maxResult",
"defaultValue": "1000",
"description": "Max number of Spark SQL result to display.",
"type": "number"
},
"master": {
"envName": "MASTER",
"propertyName": "spark.master",
"defaultValue": "local[*]",
"description": "Spark master uri. ex) spark://masterhost:7077",
"type": "string"
},

"zeppelin.spark.enableSupportedVersionCheck": {
"envName": null,
"propertyName": "zeppelin.spark.enableSupportedVersionCheck",
Expand Down Expand Up @@ -110,21 +147,21 @@
"className": "org.apache.zeppelin.spark.SparkSqlInterpreter",
"properties": {
"zeppelin.spark.concurrentSQL": {
"envName": "ZEPPELIN_SPARK_CONCURRENTSQL",
"envName": null,
"propertyName": "zeppelin.spark.concurrentSQL",
"defaultValue": false,
"description": "Execute multiple SQL concurrently if set true.",
"type": "checkbox"
},
"zeppelin.spark.concurrentSQL.max": {
"envName": "ZEPPELIN_SPARK_CONCURRENTSQL_MAX",
"envName": null,
"propertyName": "zeppelin.spark.concurrentSQL.max",
"defaultValue": 10,
"description": "Max number of SQL concurrently executed",
"type": "number"
},
"zeppelin.spark.sql.stacktrace": {
"envName": "ZEPPELIN_SPARK_SQL_STACKTRACE",
"envName": null,
"propertyName": "zeppelin.spark.sql.stacktrace",
"defaultValue": false,
"description": "Show full exception stacktrace for SQL queries if set to true.",
Expand All @@ -134,18 +171,18 @@
"envName": null,
"propertyName": "zeppelin.spark.sql.interpolation",
"defaultValue": false,
"description": "Enable ZeppelinContext variable interpolation into paragraph text",
"description": "Enable ZeppelinContext variable interpolation into spark sql",
"type": "checkbox"
},
"zeppelin.spark.maxResult": {
"envName": "ZEPPELIN_SPARK_MAXRESULT",
"envName": null,
"propertyName": "zeppelin.spark.maxResult",
"defaultValue": "1000",
"description": "Max number of Spark SQL result to display.",
"type": "number"
},
"zeppelin.spark.importImplicit": {
"envName": "ZEPPELIN_SPARK_IMPORTIMPLICIT",
"envName": null,
"propertyName": "zeppelin.spark.importImplicit",
"defaultValue": true,
"description": "Import implicits, UDF collection, and sql if set true. true by default.",
Expand All @@ -168,21 +205,21 @@
"envName": "PYSPARK_PYTHON",
"propertyName": "PYSPARK_PYTHON",
"defaultValue": "python",
"description": "Python command to run pyspark with",
"description": "Python binary executable to use for PySpark in driver only (default is `PYSPARK_PYTHON`). Property <code>spark.pyspark.driver.python</code> take precedence if it is set",
"type": "string"
},
"PYSPARK_DRIVER_PYTHON": {
"envName": "PYSPARK_DRIVER_PYTHON",
"propertyName": "PYSPARK_DRIVER_PYTHON",
"defaultValue": "python",
"description": "Python command to run pyspark with",
"description": "Python binary executable to use for PySpark in driver only (default is `PYSPARK_PYTHON`). Property <code>spark.pyspark.driver.python</code> take precedence if it is set",
"type": "string"
},
"zeppelin.pyspark.useIPython": {
"envName": null,
"propertyName": "zeppelin.pyspark.useIPython",
"defaultValue": true,
"description": "whether use IPython when it is available",
"description": "Whether use IPython when it is available",
"type": "checkbox"
}
},
Expand Down Expand Up @@ -210,28 +247,28 @@
"className": "org.apache.zeppelin.spark.SparkRInterpreter",
"properties": {
"zeppelin.R.knitr": {
"envName": "ZEPPELIN_R_KNITR",
"envName": null,
"propertyName": "zeppelin.R.knitr",
"defaultValue": true,
"description": "whether use knitr or not",
"description": "Whether use knitr or not",
"type": "checkbox"
},
"zeppelin.R.cmd": {
"envName": "ZEPPELIN_R_CMD",
"envName": null,
"propertyName": "zeppelin.R.cmd",
"defaultValue": "R",
"description": "R repl path",
"description": "R binary executable path",
"type": "string"
},
"zeppelin.R.image.width": {
"envName": "ZEPPELIN_R_IMAGE_WIDTH",
"envName": null,
"propertyName": "zeppelin.R.image.width",
"defaultValue": "100%",
"description": "",
"type": "number"
},
"zeppelin.R.render.options": {
"envName": "ZEPPELIN_R_RENDER_OPTIONS",
"envName": null,
"propertyName": "zeppelin.R.render.options",
"defaultValue": "out.format = 'html', comment = NA, echo = FALSE, results = 'asis', message = F, warning = F, fig.retina = 2",
"description": "",
Expand Down