Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/ContextCleaner.scala
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
periodicGCService.shutdown()
}

/** Register a RDD for cleanup when it is garbage collected. */
/** Register an RDD for cleanup when it is garbage collected. */
def registerRDDForCleanup(rdd: RDD[_]): Unit = {
registerForCleanup(rdd, CleanRDD(rdd.id))
}
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/Partitioner.scala
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class HashPartitioner(partitions: Int) extends Partitioner {
* A [[org.apache.spark.Partitioner]] that partitions sortable records by range into roughly
* equal ranges. The ranges are determined by sampling the content of the RDD passed in.
*
* Note that the actual number of partitions created by the RangePartitioner might not be the same
* @note The actual number of partitions created by the RangePartitioner might not be the same
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before

  • Scala
    2016-11-17 6 30 55
  • Java
    2016-11-17 6 24 01

After

  • Scala
    2016-11-17 6 30 49
  • Java
    2016-11-17 6 24 23

* as the `partitions` parameter, in the case where the number of sampled records is less than
* the value of `partitions`.
*/
Expand Down
6 changes: 3 additions & 3 deletions core/src/main/scala/org/apache/spark/SparkConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ import org.apache.spark.util.Utils
* All setter methods in this class support chaining. For example, you can write
* `new SparkConf().setMaster("local").setAppName("My app")`.
*
* Note that once a SparkConf object is passed to Spark, it is cloned and can no longer be modified
* by the user. Spark does not support modifying the configuration at runtime.
*
* @param loadDefaults whether to also load values from Java system properties
*
* @note Once a SparkConf object is passed to Spark, it is cloned and can no longer be modified
* by the user. Spark does not support modifying the configuration at runtime.
*/
class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Serializable {

Expand Down
47 changes: 25 additions & 22 deletions core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ class SparkContext(config: SparkConf) extends Logging {
/**
* A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse.
*
* '''Note:''' As it will be reused in all Hadoop RDDs, it's better not to modify it unless you
* @note As it will be reused in all Hadoop RDDs, it's better not to modify it unless you
* plan to set some global configurations for all Hadoop RDDs.
*/
def hadoopConfiguration: Configuration = _hadoopConfiguration
Expand Down Expand Up @@ -700,7 +700,7 @@ class SparkContext(config: SparkConf) extends Logging {
* Execute a block of code in a scope such that all new RDDs created in this body will
* be part of the same scope. For more detail, see {{org.apache.spark.rdd.RDDOperationScope}}.
*
* Note: Return statements are NOT allowed in the given body.
* @note Return statements are NOT allowed in the given body.
*/
private[spark] def withScope[U](body: => U): U = RDDOperationScope.withScope[U](this)(body)

Expand Down Expand Up @@ -927,7 +927,7 @@ class SparkContext(config: SparkConf) extends Logging {
/**
* Load data from a flat binary file, assuming the length of each record is constant.
*
* '''Note:''' We ensure that the byte array for each record in the resulting RDD
* @note We ensure that the byte array for each record in the resulting RDD
* has the provided record length.
*
* @param path Directory to the input data files, the path can be comma separated paths as the
Expand Down Expand Up @@ -970,7 +970,7 @@ class SparkContext(config: SparkConf) extends Logging {
* @param valueClass Class of the values
* @param minPartitions Minimum number of Hadoop Splits to generate.
*
* '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
* @note Because Hadoop's RecordReader class re-uses the same Writable object for each
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before

  • Scala
    2016-11-17 6 33 27
  • Java
    2016-11-17 6 33 51

After

  • Scala
    2016-11-17 6 34 31
  • Java
    2016-11-17 6 37 16

* record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
* operation will create many references to the same object.
* If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
Expand All @@ -995,7 +995,7 @@ class SparkContext(config: SparkConf) extends Logging {

/** Get an RDD for a Hadoop file with an arbitrary InputFormat
*
* '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
* @note Because Hadoop's RecordReader class re-uses the same Writable object for each
* record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
* operation will create many references to the same object.
* If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
Expand Down Expand Up @@ -1034,7 +1034,7 @@ class SparkContext(config: SparkConf) extends Logging {
* val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path, minPartitions)
* }}}
*
* '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
* @note Because Hadoop's RecordReader class re-uses the same Writable object for each
* record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
* operation will create many references to the same object.
* If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
Expand All @@ -1058,7 +1058,7 @@ class SparkContext(config: SparkConf) extends Logging {
* val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path)
* }}}
*
* '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
* @note Because Hadoop's RecordReader class re-uses the same Writable object for each
* record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
* operation will create many references to the same object.
* If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
Expand All @@ -1084,7 +1084,7 @@ class SparkContext(config: SparkConf) extends Logging {
* Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
* and extra configuration options to pass to the input format.
*
* '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
* @note Because Hadoop's RecordReader class re-uses the same Writable object for each
* record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
* operation will create many references to the same object.
* If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
Expand Down Expand Up @@ -1124,7 +1124,7 @@ class SparkContext(config: SparkConf) extends Logging {
* @param kClass Class of the keys
* @param vClass Class of the values
*
* '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
* @note Because Hadoop's RecordReader class re-uses the same Writable object for each
* record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
* operation will create many references to the same object.
* If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
Expand All @@ -1150,7 +1150,7 @@ class SparkContext(config: SparkConf) extends Logging {
/**
* Get an RDD for a Hadoop SequenceFile with given key and value types.
*
* '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
* @note Because Hadoop's RecordReader class re-uses the same Writable object for each
* record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
* operation will create many references to the same object.
* If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
Expand All @@ -1169,7 +1169,7 @@ class SparkContext(config: SparkConf) extends Logging {
/**
* Get an RDD for a Hadoop SequenceFile with given key and value types.
*
* '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
* @note Because Hadoop's RecordReader class re-uses the same Writable object for each
* record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
* operation will create many references to the same object.
* If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
Expand Down Expand Up @@ -1199,7 +1199,7 @@ class SparkContext(config: SparkConf) extends Logging {
* for the appropriate type. In addition, we pass the converter a ClassTag of its type to
* allow it to figure out the Writable class to use in the subclass case.
*
* '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
* @note Because Hadoop's RecordReader class re-uses the same Writable object for each
* record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
* operation will create many references to the same object.
* If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
Expand Down Expand Up @@ -1330,16 +1330,18 @@ class SparkContext(config: SparkConf) extends Logging {
}

/**
* Register the given accumulator. Note that accumulators must be registered before use, or it
* will throw exception.
* Register the given accumulator.
*
* @note Accumulators must be registered before use, or it will throw exception.
*/
def register(acc: AccumulatorV2[_, _]): Unit = {
acc.register(this)
}

/**
* Register the given accumulator with given name. Note that accumulators must be registered
* before use, or it will throw exception.
* Register the given accumulator with given name.
*
* @note Accumulators must be registered before use, or it will throw exception.
*/
def register(acc: AccumulatorV2[_, _], name: String): Unit = {
acc.register(this, name = Some(name))
Expand Down Expand Up @@ -1550,7 +1552,7 @@ class SparkContext(config: SparkConf) extends Logging {
* :: DeveloperApi ::
* Request that the cluster manager kill the specified executors.
*
* Note: This is an indication to the cluster manager that the application wishes to adjust
* @note This is an indication to the cluster manager that the application wishes to adjust
* its resource usage downwards. If the application wishes to replace the executors it kills
* through this method with new ones, it should follow up explicitly with a call to
* {{SparkContext#requestExecutors}}.
Expand All @@ -1572,7 +1574,7 @@ class SparkContext(config: SparkConf) extends Logging {
* :: DeveloperApi ::
* Request that the cluster manager kill the specified executor.
*
* Note: This is an indication to the cluster manager that the application wishes to adjust
* @note This is an indication to the cluster manager that the application wishes to adjust
* its resource usage downwards. If the application wishes to replace the executor it kills
* through this method with a new one, it should follow up explicitly with a call to
* {{SparkContext#requestExecutors}}.
Expand All @@ -1590,7 +1592,7 @@ class SparkContext(config: SparkConf) extends Logging {
* this request. This assumes the cluster manager will automatically and eventually
* fulfill all missing application resource requests.
*
* Note: The replace is by no means guaranteed; another application on the same cluster
* @note The replace is by no means guaranteed; another application on the same cluster
* can steal the window of opportunity and acquire this application's resources in the
* mean time.
*
Expand Down Expand Up @@ -1639,7 +1641,8 @@ class SparkContext(config: SparkConf) extends Logging {

/**
* Returns an immutable map of RDDs that have marked themselves as persistent via cache() call.
* Note that this does not necessarily mean the caching or computation was successful.
*
* @note This does not necessarily mean the caching or computation was successful.
*/
def getPersistentRDDs: Map[Int, RDD[_]] = persistentRdds.toMap

Expand Down Expand Up @@ -2298,7 +2301,7 @@ object SparkContext extends Logging {
* singleton object. Because we can only have one active SparkContext per JVM,
* this is useful when applications may wish to share a SparkContext.
*
* Note: This function cannot be used to create multiple SparkContext instances
* @note This function cannot be used to create multiple SparkContext instances
* even if multiple contexts are allowed.
*/
def getOrCreate(config: SparkConf): SparkContext = {
Expand All @@ -2323,7 +2326,7 @@ object SparkContext extends Logging {
*
* This method allows not passing a SparkConf (useful if just retrieving).
*
* Note: This function cannot be used to create multiple SparkContext instances
* @note This function cannot be used to create multiple SparkContext instances
* even if multiple contexts are allowed.
*/
def getOrCreate(): SparkContext = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double])
* Return the intersection of this RDD and another one. The output will not contain any duplicate
* elements, even if the input RDDs did.
*
* Note that this method performs a shuffle internally.
* @note This method performs a shuffle internally.
*/
def intersection(other: JavaDoubleRDD): JavaDoubleRDD = fromRDD(srdd.intersection(other.srdd))

Expand Down Expand Up @@ -256,7 +256,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double])
* e.g 1<=x<10 , 10<=x<20, 20<=x<50
* And on the input of 1 and 50 we would have a histogram of 1,0,0
*
* Note: if your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
* @note If your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
* from an O(log n) insertion to O(1) per element. (where n = # buckets) if you set evenBuckets
* to true.
* buckets must be sorted and not contain any duplicates.
Expand Down
26 changes: 16 additions & 10 deletions core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* Return the intersection of this RDD and another one. The output will not contain any duplicate
* elements, even if the input RDDs did.
*
* Note that this method performs a shuffle internally.
* @note This method performs a shuffle internally.
*/
def intersection(other: JavaPairRDD[K, V]): JavaPairRDD[K, V] =
new JavaPairRDD[K, V](rdd.intersection(other.rdd))
Expand All @@ -223,9 +223,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
/**
* Generic function to combine the elements for each key using a custom set of aggregation
* functions. Turns a JavaPairRDD[(K, V)] into a result of type JavaPairRDD[(K, C)], for a
* "combined type" C. Note that V and C can be different -- for example, one might group an
* RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three
* functions:
* "combined type" C.
*
* Users provide three functions:
*
* - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
* - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
Expand All @@ -234,6 +234,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* In addition, users can control the partitioning of the output RDD, the serializer that is use
* for the shuffle, and whether to perform map-side aggregation (if a mapper can produce multiple
* items with the same key).
*
* @note V and C can be different -- for example, one might group an RDD of type (Int, Int) into
* an RDD of type (Int, List[Int]).
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems fine now.

2016-11-18 4 47 33
2016-11-18 4 47 17

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, it works alright as a standalone note. It could have stayed an inline sentence too. OK either way.

*/
def combineByKey[C](createCombiner: JFunction[V, C],
mergeValue: JFunction2[C, V, C],
Expand All @@ -255,16 +258,19 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
/**
* Generic function to combine the elements for each key using a custom set of aggregation
* functions. Turns a JavaPairRDD[(K, V)] into a result of type JavaPairRDD[(K, C)], for a
* "combined type" C. Note that V and C can be different -- for example, one might group an
* RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three
* functions:
* "combined type" C.
*
* Users provide three functions:
*
* - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
* - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
* - `mergeCombiners`, to combine two C's into a single one.
*
* In addition, users can control the partitioning of the output RDD. This method automatically
* uses map-side aggregation in shuffling the RDD.
*
* @note V and C can be different -- for example, one might group an RDD of type (Int, Int) into
* an RDD of type (Int, List[Int]).
*/
def combineByKey[C](createCombiner: JFunction[V, C],
mergeValue: JFunction2[C, V, C],
Expand Down Expand Up @@ -398,7 +404,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* Group the values for each key in the RDD into a single sequence. Allows controlling the
* partitioning of the resulting key-value pair RDD by passing a Partitioner.
*
* Note: If you are grouping in order to perform an aggregation (such as a sum or average) over
* @note If you are grouping in order to perform an aggregation (such as a sum or average) over
* each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
* will provide much better performance.
*/
Expand All @@ -409,7 +415,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* Group the values for each key in the RDD into a single sequence. Hash-partitions the
* resulting RDD with into `numPartitions` partitions.
*
* Note: If you are grouping in order to perform an aggregation (such as a sum or average) over
* @note If you are grouping in order to perform an aggregation (such as a sum or average) over
* each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
* will provide much better performance.
*/
Expand Down Expand Up @@ -539,7 +545,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* Group the values for each key in the RDD into a single sequence. Hash-partitions the
* resulting RDD with the existing partitioner/parallelism level.
*
* Note: If you are grouping in order to perform an aggregation (such as a sum or average) over
* @note If you are grouping in order to perform an aggregation (such as a sum or average) over
* each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
* will provide much better performance.
*/
Expand Down
Loading