Skip to content

Commit 322e7e7

Browse files
committed
[SQL] JavaDoc update for various DataFrame functions.
Author: Reynold Xin <[email protected]> Closes #5935 from rxin/df-doc1 and squashes the following commits: aaeaadb [Reynold Xin] [SQL] JavaDoc update for various DataFrame functions.
1 parent 32cdc81 commit 322e7e7

File tree

4 files changed

+32
-21
lines changed

4 files changed

+32
-21
lines changed

sql/core/src/main/scala/org/apache/spark/sql/Column.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,14 @@ class Column(protected[sql] val expr: Expression) extends Logging {
8484

8585
/**
8686
* Inversion of boolean expression, i.e. NOT.
87-
* {{
87+
* {{{
8888
* // Scala: select rows that are not active (isActive === false)
8989
* df.filter( !df("isActive") )
9090
*
9191
* // Java:
9292
* import static org.apache.spark.sql.functions.*;
9393
* df.filter( not(df.col("isActive")) );
94-
* }}
94+
* }}}
9595
*
9696
* @group expr_ops
9797
*/

sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -681,11 +681,11 @@ class DataFrame private[sql](
681681

682682
/**
683683
* (Scala-specific) Aggregates on the entire [[DataFrame]] without groups.
684-
* {{
684+
* {{{
685685
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
686686
* df.agg("age" -> "max", "salary" -> "avg")
687687
* df.groupBy().agg("age" -> "max", "salary" -> "avg")
688-
* }}
688+
* }}}
689689
* @group dfops
690690
*/
691691
def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
@@ -694,33 +694,33 @@ class DataFrame private[sql](
694694

695695
/**
696696
* (Scala-specific) Aggregates on the entire [[DataFrame]] without groups.
697-
* {{
697+
* {{{
698698
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
699699
* df.agg(Map("age" -> "max", "salary" -> "avg"))
700700
* df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
701-
* }}
701+
* }}}
702702
* @group dfops
703703
*/
704704
def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs)
705705

706706
/**
707707
* (Java-specific) Aggregates on the entire [[DataFrame]] without groups.
708-
* {{
708+
* {{{
709709
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
710710
* df.agg(Map("age" -> "max", "salary" -> "avg"))
711711
* df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
712-
* }}
712+
* }}}
713713
* @group dfops
714714
*/
715715
def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs)
716716

717717
/**
718718
* Aggregates on the entire [[DataFrame]] without groups.
719-
* {{
719+
* {{{
720720
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
721721
* df.agg(max($"age"), avg($"salary"))
722722
* df.groupBy().agg(max($"age"), avg($"salary"))
723-
* }}
723+
* }}}
724724
* @group dfops
725725
*/
726726
@scala.annotation.varargs

sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
3737
StatFunctions.calculateCov(df, Seq(col1, col2))
3838
}
3939

40-
/*
40+
/**
4141
* Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson
4242
* Correlation Coefficient. For Spearman Correlation, consider using RDD methods found in
4343
* MLlib's Statistics.
@@ -75,7 +75,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
7575
* each row.
7676
* @param col2 The name of the second column. Distinct items will make the column names
7777
* of the DataFrame.
78-
* @return A Local DataFrame containing the table
78+
* @return A DataFrame containing for the contingency table.
7979
*/
8080
def crosstab(col1: String, col2: String): DataFrame = {
8181
StatFunctions.crossTabulate(df, col1, col2)
@@ -110,14 +110,25 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
110110
}
111111

112112
/**
113-
* Python friendly implementation for `freqItems`
113+
* (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
114+
* frequent element count algorithm described in
115+
* [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
116+
*
117+
* @param cols the names of the columns to search frequent items in.
118+
* @return A Local DataFrame with the Array of frequent items for each column.
114119
*/
115120
def freqItems(cols: Seq[String], support: Double): DataFrame = {
116121
FrequentItems.singlePassFreqItems(df, cols, support)
117122
}
118123

119124
/**
120-
* Python friendly implementation for `freqItems` with a default `support` of 1%.
125+
* (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
126+
* frequent element count algorithm described in
127+
* [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
128+
* Uses a `default` support of 1%.
129+
*
130+
* @param cols the names of the columns to search frequent items in.
131+
* @return A Local DataFrame with the Array of frequent items for each column.
121132
*/
122133
def freqItems(cols: Seq[String]): DataFrame = {
123134
FrequentItems.singlePassFreqItems(df, cols, 0.01)

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import scala.reflect.runtime.universe.{TypeTag, typeTag}
2222

2323
import org.apache.spark.annotation.Experimental
2424
import org.apache.spark.sql.catalyst.ScalaReflection
25-
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, Star}
25+
import org.apache.spark.sql.catalyst.analysis.{UnresolvedFunction, Star}
2626
import org.apache.spark.sql.catalyst.expressions._
2727
import org.apache.spark.sql.catalyst.expressions.mathfuncs._
2828
import org.apache.spark.sql.types._
@@ -86,21 +86,21 @@ object functions {
8686

8787
/**
8888
* Returns a sort expression based on ascending order of the column.
89-
* {{
89+
* {{{
9090
* // Sort by dept in ascending order, and then age in descending order.
9191
* df.sort(asc("dept"), desc("age"))
92-
* }}
92+
* }}}
9393
*
9494
* @group sort_funcs
9595
*/
9696
def asc(columnName: String): Column = Column(columnName).asc
9797

9898
/**
9999
* Returns a sort expression based on the descending order of the column.
100-
* {{
100+
* {{{
101101
* // Sort by dept in ascending order, and then age in descending order.
102102
* df.sort(asc("dept"), desc("age"))
103-
* }}
103+
* }}}
104104
*
105105
* @group sort_funcs
106106
*/
@@ -353,13 +353,13 @@ object functions {
353353

354354
/**
355355
* Inversion of boolean expression, i.e. NOT.
356-
* {{
356+
* {{{
357357
* // Scala: select rows that are not active (isActive === false)
358358
* df.filter( !df("isActive") )
359359
*
360360
* // Java:
361361
* df.filter( not(df.col("isActive")) );
362-
* }}
362+
* }}}
363363
*
364364
* @group normal_funcs
365365
*/

0 commit comments

Comments
 (0)