From 71307508928a0e0706c1665df9951f909bf7b214 Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Wed, 29 Jun 2016 18:37:32 +0800 Subject: [PATCH 1/3] Add labelling support for include_example Jekyll plugin --- docs/_plugins/include_example.rb | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb index f7485826a762d..9bb744fb13389 100644 --- a/docs/_plugins/include_example.rb +++ b/docs/_plugins/include_example.rb @@ -32,8 +32,18 @@ def render(context) @code_dir = File.join(site.source, config_dir) clean_markup = @markup.strip - @file = File.join(@code_dir, clean_markup) - @lang = clean_markup.split('.').last + + parts = clean_markup.strip.split(' ') + if parts.length > 1 then + @snippet_label = ':' + parts[0] + snippet_file = parts[1] + else + @snippet_label = '' + snippet_file = parts[0] + end + + @file = File.join(@code_dir, snippet_file) + @lang = snippet_file.split('.').last code = File.open(@file).read.encode("UTF-8") code = select_lines(code) @@ -41,7 +51,7 @@ def render(context) rendered_code = Pygments.highlight(code, :lexer => @lang) hint = "
Find full example code at " \ - "\"examples/src/main/#{clean_markup}\" in the Spark repo.
" + "\"examples/src/main/#{snippet_file}\" in the Spark repo." rendered_code + hint end @@ -66,13 +76,13 @@ def select_lines(code) # Select the array of start labels from code. startIndices = lines .each_with_index - .select { |l, i| l.include? "$example on$" } + .select { |l, i| l.include? "$example on#{@snippet_label}$" } .map { |l, i| i } # Select the array of end labels from code. endIndices = lines .each_with_index - .select { |l, i| l.include? "$example off$" } + .select { |l, i| l.include? "$example off#{@snippet_label}$" } .map { |l, i| i } raise "Start indices amount is not equal to end indices amount, see #{@file}." \ @@ -88,7 +98,7 @@ def select_lines(code) startIndices.zip(endIndices).each do |start, endline| raise "Overlapping between two example code blocks are not allowed, see #{@file}." \ if start <= lastIndex - raise "$example on$ should not be in the same line with $example off$, see #{@file}." \ + raise "$example on[:tag]$ should not be in the same line with $example off[:tag]$, see #{@file}." \ if start == endline lastIndex = endline range = Range.new(start + 1, endline - 1) From 93f33986e2d6c31200c53f4887893a275948a65a Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Wed, 29 Jun 2016 19:39:30 +0800 Subject: [PATCH 2/3] Update SQL programming guide and example code to illustrate the new labelling feature --- docs/_plugins/include_example.rb | 2 +- docs/sql-programming-guide.md | 41 +++---------------- .../spark/examples/sql/JavaSparkSQL.java | 5 +++ examples/src/main/python/sql.py | 5 +++ .../spark/examples/sql/RDDRelation.scala | 10 ++++- 5 files changed, 25 insertions(+), 38 deletions(-) diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb index 9bb744fb13389..0acd4618bba93 100644 --- a/docs/_plugins/include_example.rb +++ b/docs/_plugins/include_example.rb @@ -98,7 +98,7 @@ def select_lines(code) startIndices.zip(endIndices).each do |start, endline| raise "Overlapping between two example code blocks are not allowed, see #{@file}." \ if start <= lastIndex - raise "$example on[:tag]$ should not be in the same line with $example off[:tag]$, see #{@file}." \ + raise "$example on$ should not be in the same line with $example off$, see #{@file}." \ if start == endline lastIndex = endline range = Range.new(start + 1, endline - 1) diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 4b52c942e5449..7ffebd78eafba 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -57,52 +57,23 @@ Throughout this document, we will often refer to Scala/Java Datasets of `Row`s a
-The entry point into all functionality in Spark is the [`SparkSession`](api/scala/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.build()`: - -{% highlight scala %} -import org.apache.spark.sql.SparkSession - -val spark = SparkSession.build() - .master("local") - .appName("Word Count") - .config("spark.some.config.option", "some-value") - .getOrCreate() - -// this is used to implicitly convert an RDD to a DataFrame. -import spark.implicits._ -{% endhighlight %} +The entry point into all functionality in Spark is the [`SparkSession`](api/scala/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.builder()`: +{% include_example init_session scala/org/apache/spark/examples/sql/RDDRelation.scala %}
-The entry point into all functionality in Spark is the [`SparkSession`](api/java/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.build()`: +The entry point into all functionality in Spark is the [`SparkSession`](api/java/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.builder()`: -{% highlight java %} -import org.apache.spark.sql.SparkSession - -SparkSession spark = SparkSession.build() - .master("local") - .appName("Word Count") - .config("spark.some.config.option", "some-value") - .getOrCreate(); -{% endhighlight %} +{% include_example init_session java/org/apache/spark/examples/sql/JavaSparkSQL.java %}
-The entry point into all functionality in Spark is the [`SparkSession`](api/python/pyspark.sql.html#pyspark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.build`: - -{% highlight python %} -from pyspark.sql import SparkSession - -spark = SparkSession.build \ - .master("local") \ - .appName("Word Count") \ - .config("spark.some.config.option", "some-value") \ - .getOrCreate() -{% endhighlight %} +The entry point into all functionality in Spark is the [`SparkSession`](api/python/pyspark.sql.html#pyspark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.builder`: +{% include_example init_session python/sql.py %}
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java index e512979ac71b0..7fc6c007b6843 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java @@ -26,7 +26,9 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; +// $example on:init_session$ import org.apache.spark.sql.SparkSession; +// $example off:init_session$ public class JavaSparkSQL { public static class Person implements Serializable { @@ -51,10 +53,13 @@ public void setAge(int age) { } public static void main(String[] args) throws Exception { + // $example on:init_session$ SparkSession spark = SparkSession .builder() .appName("JavaSparkSQL") + .config("spark.some.config.option", "some-value") .getOrCreate(); + // $example off:init_session$ System.out.println("=== Data source: RDD ==="); // Load a text file and convert each line to a Java Bean. diff --git a/examples/src/main/python/sql.py b/examples/src/main/python/sql.py index ac7246938d3b4..ea11d2c4c7b33 100644 --- a/examples/src/main/python/sql.py +++ b/examples/src/main/python/sql.py @@ -20,15 +20,20 @@ import os import sys +# $example on:init_session$ from pyspark.sql import SparkSession +# $example off:init_session$ from pyspark.sql.types import Row, StructField, StructType, StringType, IntegerType if __name__ == "__main__": + # $example on:init_session$ spark = SparkSession\ .builder\ .appName("PythonSQL")\ + .config("spark.some.config.option", "some-value")\ .getOrCreate() + # $example off:init_session$ # A list of Rows. Infer schema from the first row, create a DataFrame and print the schema rows = [Row(name="John", age=19), Row(name="Smith", age=23), Row(name="Sarah", age=18)] diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala index 1b019fbb51771..deaa9f252b9b0 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala @@ -18,7 +18,10 @@ // scalastyle:off println package org.apache.spark.examples.sql -import org.apache.spark.sql.{SaveMode, SparkSession} +import org.apache.spark.sql.SaveMode +// $example on:init_session$ +import org.apache.spark.sql.SparkSession +// $example off:init_session$ // One method for defining the schema of an RDD is to make a case class with the desired column // names and types. @@ -26,13 +29,16 @@ case class Record(key: Int, value: String) object RDDRelation { def main(args: Array[String]) { + // $example on:init_session$ val spark = SparkSession .builder - .appName("RDDRelation") + .appName("Spark Examples") + .config("spark.some.config.option", "some-value") .getOrCreate() // Importing the SparkSession gives access to all the SQL functions and implicit conversions. import spark.implicits._ + // $example off:init_session$ val df = spark.createDataFrame((1 to 100).map(i => Record(i, s"val_$i"))) // Any RDD containing case classes can be used to create a temporary view. The schema of the From 7ea9c753fc8b490f2b0549b6dbb303bd0b8a573f Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Thu, 30 Jun 2016 10:24:31 +0800 Subject: [PATCH 3/3] Support overlapped labels --- docs/_plugins/include_example.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb index 0acd4618bba93..306888801df21 100644 --- a/docs/_plugins/include_example.rb +++ b/docs/_plugins/include_example.rb @@ -102,7 +102,10 @@ def select_lines(code) if start == endline lastIndex = endline range = Range.new(start + 1, endline - 1) - result += trim_codeblock(lines[range]).join + trimmed = trim_codeblock(lines[range]) + # Filter out possible example tags of overlapped labels. + taggs_filtered = trimmed.select { |l| !l.include? '$example ' } + result += taggs_filtered.join result += "\n" end result