From 24bfa382a43c2fbbf54b24bb8f03766910216490 Mon Sep 17 00:00:00 2001 From: CodingCat Date: Mon, 7 Mar 2016 09:37:37 -0500 Subject: [PATCH 1/4] improve the doc for "spark.memory.offHeap.size" --- docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index 0017219e0726..bab7628d2dc2 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -901,7 +901,7 @@ Apart from these, the following properties are also available, and may be useful spark.memory.offHeap.size 0 - The absolute amount of memory in bytes which can be used for off-heap allocation. + The absolute amount of memory (in terms by bytes) which can be used for off-heap allocation. This setting has no impact on heap memory usage, so if your executors' total memory consumption must fit within some hard limit then be sure to shrink your JVM heap size accordingly. This must be set to a positive value when spark.memory.offHeap.enabled=true. From 2209e345df4636f8fa881b3ad45084b75f9fe3eb Mon Sep 17 00:00:00 2001 From: CodingCat Date: Mon, 7 Mar 2016 14:00:16 -0500 Subject: [PATCH 2/4] fix --- docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index bab7628d2dc2..0017219e0726 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -901,7 +901,7 @@ Apart from these, the following properties are also available, and may be useful spark.memory.offHeap.size 0 - The absolute amount of memory (in terms by bytes) which can be used for off-heap allocation. + The absolute amount of memory in bytes which can be used for off-heap allocation. This setting has no impact on heap memory usage, so if your executors' total memory consumption must fit within some hard limit then be sure to shrink your JVM heap size accordingly. This must be set to a positive value when spark.memory.offHeap.enabled=true. From d9222d7b0a1f41be0a77c80f2229a3714032f0fb Mon Sep 17 00:00:00 2001 From: CodingCat Date: Tue, 7 Feb 2017 10:43:27 -0800 Subject: [PATCH 3/4] add notes --- .../org/apache/spark/sql/execution/streaming/Sink.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala index 2571b59be54f..fcb5241bd2f1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala @@ -31,8 +31,11 @@ trait Sink { * this method is called more than once with the same batchId (which will happen in the case of * failures), then `data` should only be added once. * - * Note: You cannot apply any operators on `data` except consuming it (e.g., `collect/foreach`). + * Note 1: You cannot apply any operators on `data` except consuming it (e.g., `collect/foreach`). * Otherwise, you may get a wrong result. + * + * Note 2: The method is supposed to be executed synchronously, i.e. the method should only return + * after data is added to sink successfully. */ def addBatch(batchId: Long, data: DataFrame): Unit } From a58e1c81272d3649a8f6c2f1f69c8c7c5492a95a Mon Sep 17 00:00:00 2001 From: CodingCat Date: Tue, 7 Feb 2017 12:21:21 -0800 Subject: [PATCH 4/4] address the comments --- .../scala/org/apache/spark/sql/execution/streaming/Sink.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala index fcb5241bd2f1..d10cd3044ecd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala @@ -35,7 +35,7 @@ trait Sink { * Otherwise, you may get a wrong result. * * Note 2: The method is supposed to be executed synchronously, i.e. the method should only return - * after data is added to sink successfully. + * after data is consumed by sink successfully. */ def addBatch(batchId: Long, data: DataFrame): Unit }