Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,43 +25,50 @@

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.KeyValueGroupedDataset;
import org.apache.spark.sql.expressions.javalang.typed;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggest to use rename a class on import, e.g. import org.apache.spark.sql.expressions.javalang.{typed => javaTyped}
instead of use full class name in rest of code

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is Java not Scala. What do you propose doesn't work here.


/**
* Suite that replicates tests in JavaDatasetAggregatorSuite using lambda syntax.
*/
public class Java8DatasetAggregatorSuite extends JavaDatasetAggregatorSuiteBase {
@SuppressWarnings("deprecation")
@Test
public void testTypedAggregationAverage() {
KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
Dataset<Tuple2<String, Double>> agged = grouped.agg(typed.avg(v -> (double)(v._2() * 2)));
Dataset<Tuple2<String, Double>> agged = grouped.agg(
org.apache.spark.sql.expressions.javalang.typed.avg(v -> (double)(v._2() * 2)));
Copy link

@seanli-rallyhealth seanli-rallyhealth Jan 2, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't think it's a good idea to use full class name. you may use rename a class on import
import org.apache.spark.sql.expressions.javalang.{typed => javaTyped}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The purpose of using full path to deprecated methods is to put any references to deprecated class under the annotation @SuppressWarnings("deprecation"). Even if we forget that your code doesn't compile in Java, it still refers to deprecated class.

Assert.assertEquals(
Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 6.0)),
agged.collectAsList());
}

@SuppressWarnings("deprecation")
@Test
public void testTypedAggregationCount() {
KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.count(v -> v));
Dataset<Tuple2<String, Long>> agged = grouped.agg(
org.apache.spark.sql.expressions.javalang.typed.count(v -> v));
Assert.assertEquals(
Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)),
agged.collectAsList());
}

@SuppressWarnings("deprecation")
@Test
public void testTypedAggregationSumDouble() {
KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
Dataset<Tuple2<String, Double>> agged = grouped.agg(typed.sum(v -> (double)v._2()));
Dataset<Tuple2<String, Double>> agged = grouped.agg(
org.apache.spark.sql.expressions.javalang.typed.sum(v -> (double)v._2()));
Assert.assertEquals(
Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 3.0)),
agged.collectAsList());
}

@SuppressWarnings("deprecation")
@Test
public void testTypedAggregationSumLong() {
KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.sumLong(v -> (long)v._2()));
Dataset<Tuple2<String, Long>> agged = grouped.agg(
org.apache.spark.sql.expressions.javalang.typed.sumLong(v -> (long)v._2()));
Assert.assertEquals(
Arrays.asList(new Tuple2<>("a", 3L), new Tuple2<>("b", 3L)),
agged.collectAsList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ public void testBeanWithoutGetter() {
Assert.assertEquals(1, df.collectAsList().size());
}

@SuppressWarnings("deprecation")
@Test
public void testJsonRDDToDataFrame() {
// This is a test for the deprecated API in SPARK-15615.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.KeyValueGroupedDataset;
import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.expressions.javalang.typed;

/**
* Suite for testing the aggregate functionality of Datasets in Java.
Expand Down Expand Up @@ -85,37 +84,45 @@ public Encoder<Integer> outputEncoder() {
}
}

@SuppressWarnings("deprecation")
@Test
public void testTypedAggregationAverage() {
KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
Dataset<Tuple2<String, Double>> agged = grouped.agg(typed.avg(value -> value._2() * 2.0));
Dataset<Tuple2<String, Double>> agged = grouped.agg(
org.apache.spark.sql.expressions.javalang.typed.avg(value -> value._2() * 2.0));
Assert.assertEquals(
Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 6.0)),
agged.collectAsList());
}

@SuppressWarnings("deprecation")
@Test
public void testTypedAggregationCount() {
KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.count(value -> value));
Dataset<Tuple2<String, Long>> agged = grouped.agg(
org.apache.spark.sql.expressions.javalang.typed.count(value -> value));
Assert.assertEquals(
Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)),
agged.collectAsList());
}

@SuppressWarnings("deprecation")
@Test
public void testTypedAggregationSumDouble() {
KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
Dataset<Tuple2<String, Double>> agged = grouped.agg(typed.sum(value -> (double) value._2()));
Dataset<Tuple2<String, Double>> agged = grouped.agg(
org.apache.spark.sql.expressions.javalang.typed.sum(value -> (double) value._2()));
Assert.assertEquals(
Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 3.0)),
agged.collectAsList());
}

@SuppressWarnings("deprecation")
@Test
public void testTypedAggregationSumLong() {
KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.sumLong(value -> (long) value._2()));
Dataset<Tuple2<String, Long>> agged = grouped.agg(
org.apache.spark.sql.expressions.javalang.typed.sumLong(value -> (long) value._2()));
Assert.assertEquals(
Arrays.asList(new Tuple2<>("a", 3L), new Tuple2<>("b", 3L)),
agged.collectAsList());
Expand Down