From 71307508928a0e0706c1665df9951f909bf7b214 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Wed, 29 Jun 2016 18:37:32 +0800
Subject: [PATCH 1/3] Add labelling support for include_example Jekyll plugin

---
 docs/_plugins/include_example.rb | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb
index f7485826a762d..9bb744fb13389 100644
--- a/docs/_plugins/include_example.rb
+++ b/docs/_plugins/include_example.rb
@@ -32,8 +32,18 @@ def render(context)
       @code_dir = File.join(site.source, config_dir)
 
       clean_markup = @markup.strip
-      @file = File.join(@code_dir, clean_markup)
-      @lang = clean_markup.split('.').last
+
+      parts = clean_markup.strip.split(' ')
+      if parts.length > 1 then
+        @snippet_label = ':' + parts[0]
+        snippet_file = parts[1]
+      else
+        @snippet_label = ''
+        snippet_file = parts[0]
+      end
+
+      @file = File.join(@code_dir, snippet_file)
+      @lang = snippet_file.split('.').last
 
       code = File.open(@file).read.encode("UTF-8")
       code = select_lines(code)
@@ -41,7 +51,7 @@ def render(context)
       rendered_code = Pygments.highlight(code, :lexer => @lang)
 
       hint = "<div><small>Find full example code at " \
-        "\"examples/src/main/#{clean_markup}\" in the Spark repo.</small></div>"
+        "\"examples/src/main/#{snippet_file}\" in the Spark repo.</small></div>"
 
       rendered_code + hint
     end
@@ -66,13 +76,13 @@ def select_lines(code)
       # Select the array of start labels from code.
       startIndices = lines
         .each_with_index
-        .select { |l, i| l.include? "$example on$" }
+        .select { |l, i| l.include? "$example on#{@snippet_label}$" }
         .map { |l, i| i }
 
       # Select the array of end labels from code.
       endIndices = lines
         .each_with_index
-        .select { |l, i| l.include? "$example off$" }
+        .select { |l, i| l.include? "$example off#{@snippet_label}$" }
         .map { |l, i| i }
 
       raise "Start indices amount is not equal to end indices amount, see #{@file}." \
@@ -88,7 +98,7 @@ def select_lines(code)
       startIndices.zip(endIndices).each do |start, endline|
         raise "Overlapping between two example code blocks are not allowed, see #{@file}." \
             if start <= lastIndex
-        raise "$example on$ should not be in the same line with $example off$, see #{@file}." \
+        raise "$example on[:tag]$ should not be in the same line with $example off[:tag]$, see #{@file}." \
             if start == endline
         lastIndex = endline
         range = Range.new(start + 1, endline - 1)

From 93f33986e2d6c31200c53f4887893a275948a65a Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Wed, 29 Jun 2016 19:39:30 +0800
Subject: [PATCH 2/3] Update SQL programming guide and example code to
 illustrate the new labelling feature

---
 docs/_plugins/include_example.rb              |  2 +-
 docs/sql-programming-guide.md                 | 41 +++----------------
 .../spark/examples/sql/JavaSparkSQL.java      |  5 +++
 examples/src/main/python/sql.py               |  5 +++
 .../spark/examples/sql/RDDRelation.scala      | 10 ++++-
 5 files changed, 25 insertions(+), 38 deletions(-)

diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb
index 9bb744fb13389..0acd4618bba93 100644
--- a/docs/_plugins/include_example.rb
+++ b/docs/_plugins/include_example.rb
@@ -98,7 +98,7 @@ def select_lines(code)
       startIndices.zip(endIndices).each do |start, endline|
         raise "Overlapping between two example code blocks are not allowed, see #{@file}." \
             if start <= lastIndex
-        raise "$example on[:tag]$ should not be in the same line with $example off[:tag]$, see #{@file}." \
+        raise "$example on$ should not be in the same line with $example off$, see #{@file}." \
             if start == endline
         lastIndex = endline
         range = Range.new(start + 1, endline - 1)
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 4b52c942e5449..7ffebd78eafba 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -57,52 +57,23 @@ Throughout this document, we will often refer to Scala/Java Datasets of `Row`s a
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
 
-The entry point into all functionality in Spark is the [`SparkSession`](api/scala/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.build()`:
-
-{% highlight scala %}
-import org.apache.spark.sql.SparkSession
-
-val spark = SparkSession.build()
-  .master("local")
-  .appName("Word Count")
-  .config("spark.some.config.option", "some-value")
-  .getOrCreate()
-
-// this is used to implicitly convert an RDD to a DataFrame.
-import spark.implicits._
-{% endhighlight %}
+The entry point into all functionality in Spark is the [`SparkSession`](api/scala/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.builder()`:
 
+{% include_example init_session scala/org/apache/spark/examples/sql/RDDRelation.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
 
-The entry point into all functionality in Spark is the [`SparkSession`](api/java/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.build()`:
+The entry point into all functionality in Spark is the [`SparkSession`](api/java/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.builder()`:
 
-{% highlight java %}
-import org.apache.spark.sql.SparkSession
-
-SparkSession spark = SparkSession.build()
-  .master("local")
-  .appName("Word Count")
-  .config("spark.some.config.option", "some-value")
-  .getOrCreate();
-{% endhighlight %}
+{% include_example init_session java/org/apache/spark/examples/sql/JavaSparkSQL.java %}
 </div>
 
 <div data-lang="python"  markdown="1">
 
-The entry point into all functionality in Spark is the [`SparkSession`](api/python/pyspark.sql.html#pyspark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.build`:
-
-{% highlight python %}
-from pyspark.sql import SparkSession
-
-spark = SparkSession.build \
-  .master("local") \
-  .appName("Word Count") \
-  .config("spark.some.config.option", "some-value") \
-  .getOrCreate()
-{% endhighlight %}
+The entry point into all functionality in Spark is the [`SparkSession`](api/python/pyspark.sql.html#pyspark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.builder`:
 
+{% include_example init_session python/sql.py %}
 </div>
 
 <div data-lang="r"  markdown="1">
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
index e512979ac71b0..7fc6c007b6843 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java
@@ -26,7 +26,9 @@
 
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
+// $example on:init_session$
 import org.apache.spark.sql.SparkSession;
+// $example off:init_session$
 
 public class JavaSparkSQL {
   public static class Person implements Serializable {
@@ -51,10 +53,13 @@ public void setAge(int age) {
   }
 
   public static void main(String[] args) throws Exception {
+    // $example on:init_session$
     SparkSession spark = SparkSession
       .builder()
       .appName("JavaSparkSQL")
+      .config("spark.some.config.option", "some-value")
       .getOrCreate();
+    // $example off:init_session$
 
     System.out.println("=== Data source: RDD ===");
     // Load a text file and convert each line to a Java Bean.
diff --git a/examples/src/main/python/sql.py b/examples/src/main/python/sql.py
index ac7246938d3b4..ea11d2c4c7b33 100644
--- a/examples/src/main/python/sql.py
+++ b/examples/src/main/python/sql.py
@@ -20,15 +20,20 @@
 import os
 import sys
 
+# $example on:init_session$
 from pyspark.sql import SparkSession
+# $example off:init_session$
 from pyspark.sql.types import Row, StructField, StructType, StringType, IntegerType
 
 
 if __name__ == "__main__":
+    # $example on:init_session$
     spark = SparkSession\
         .builder\
         .appName("PythonSQL")\
+        .config("spark.some.config.option", "some-value")\
         .getOrCreate()
+    # $example off:init_session$
 
     # A list of Rows. Infer schema from the first row, create a DataFrame and print the schema
     rows = [Row(name="John", age=19), Row(name="Smith", age=23), Row(name="Sarah", age=18)]
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
index 1b019fbb51771..deaa9f252b9b0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
@@ -18,7 +18,10 @@
 // scalastyle:off println
 package org.apache.spark.examples.sql
 
-import org.apache.spark.sql.{SaveMode, SparkSession}
+import org.apache.spark.sql.SaveMode
+// $example on:init_session$
+import org.apache.spark.sql.SparkSession
+// $example off:init_session$
 
 // One method for defining the schema of an RDD is to make a case class with the desired column
 // names and types.
@@ -26,13 +29,16 @@ case class Record(key: Int, value: String)
 
 object RDDRelation {
   def main(args: Array[String]) {
+    // $example on:init_session$
     val spark = SparkSession
       .builder
-      .appName("RDDRelation")
+      .appName("Spark Examples")
+      .config("spark.some.config.option", "some-value")
       .getOrCreate()
 
     // Importing the SparkSession gives access to all the SQL functions and implicit conversions.
     import spark.implicits._
+    // $example off:init_session$
 
     val df = spark.createDataFrame((1 to 100).map(i => Record(i, s"val_$i")))
     // Any RDD containing case classes can be used to create a temporary view.  The schema of the

From 7ea9c753fc8b490f2b0549b6dbb303bd0b8a573f Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Thu, 30 Jun 2016 10:24:31 +0800
Subject: [PATCH 3/3] Support overlapped labels

---
 docs/_plugins/include_example.rb | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb
index 0acd4618bba93..306888801df21 100644
--- a/docs/_plugins/include_example.rb
+++ b/docs/_plugins/include_example.rb
@@ -102,7 +102,10 @@ def select_lines(code)
             if start == endline
         lastIndex = endline
         range = Range.new(start + 1, endline - 1)
-        result += trim_codeblock(lines[range]).join
+        trimmed = trim_codeblock(lines[range])
+        # Filter out possible example tags of overlapped labels.
+        taggs_filtered = trimmed.select { |l| !l.include? '$example ' }
+        result += taggs_filtered.join
         result += "\n"
       end
       result