Skip to content

Commit 19dcb57

Browse files
varadharajanmengxr
authored andcommitted
[SPARK-4047] - Generate runtime warnings for example implementation of PageRank
Based on SPARK-2434, this PR generates runtime warnings for example implementations (Python, Scala) of PageRank. Author: Varadharajan Mukundan <[email protected]> Closes #2894 from varadharajan/SPARK-4047 and squashes the following commits: 5f9406b [Varadharajan Mukundan] [SPARK-4047] - Point users to LogisticRegressionWithSGD and LogisticRegressionWithLBFGS instead of LogisticRegressionModel 252f595 [Varadharajan Mukundan] a. Generate runtime warnings for 05a018b [Varadharajan Mukundan] Fix PageRank implementation's package reference 5c2bf54 [Varadharajan Mukundan] [SPARK-4047] - Generate runtime warnings for example implementation of PageRank (cherry picked from commit 974d334) Signed-off-by: Xiangrui Meng <[email protected]>
1 parent dd1b2a0 commit 19dcb57

File tree

9 files changed

+83
-8
lines changed

9 files changed

+83
-8
lines changed

examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,25 @@
3030

3131
/**
3232
* Logistic regression based classification.
33+
*
34+
* This is an example implementation for learning how to use Spark. For more conventional use,
35+
* please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
36+
* org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
3337
*/
3438
public final class JavaHdfsLR {
3539

3640
private static final int D = 10; // Number of dimensions
3741
private static final Random rand = new Random(42);
3842

43+
static void showWarning() {
44+
String warning = "WARN: This is a naive implementation of Logistic Regression " +
45+
"and is given as an example!\n" +
46+
"Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD " +
47+
"or org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS " +
48+
"for more conventional use.";
49+
System.err.println(warning);
50+
}
51+
3952
static class DataPoint implements Serializable {
4053
DataPoint(double[] x, double y) {
4154
this.x = x;
@@ -109,6 +122,8 @@ public static void main(String[] args) {
109122
System.exit(1);
110123
}
111124

125+
showWarning();
126+
112127
SparkConf sparkConf = new SparkConf().setAppName("JavaHdfsLR");
113128
JavaSparkContext sc = new JavaSparkContext(sparkConf);
114129
JavaRDD<String> lines = sc.textFile(args[0]);

examples/src/main/java/org/apache/spark/examples/JavaPageRank.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,21 @@
4545
* URL neighbor URL
4646
* ...
4747
* where URL and their neighbors are separated by space(s).
48+
*
49+
* This is an example implementation for learning how to use Spark. For more conventional use,
50+
* please refer to org.apache.spark.graphx.lib.PageRank
4851
*/
4952
public final class JavaPageRank {
5053
private static final Pattern SPACES = Pattern.compile("\\s+");
5154

55+
static void showWarning() {
56+
String warning = "WARN: This is a naive implementation of PageRank " +
57+
"and is given as an example! \n" +
58+
"Please use the PageRank implementation found in " +
59+
"org.apache.spark.graphx.lib.PageRank for more conventional use.";
60+
System.err.println(warning);
61+
}
62+
5263
private static class Sum implements Function2<Double, Double, Double> {
5364
@Override
5465
public Double call(Double a, Double b) {
@@ -62,6 +73,8 @@ public static void main(String[] args) throws Exception {
6273
System.exit(1);
6374
}
6475

76+
showWarning();
77+
6578
SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank");
6679
JavaSparkContext ctx = new JavaSparkContext(sparkConf);
6780

examples/src/main/python/pagerank.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
# limitations under the License.
1616
#
1717

18+
"""
19+
This is an example implementation of PageRank. For more conventional use,
20+
Please refer to PageRank implementation provided by graphx
21+
"""
22+
1823
import re
1924
import sys
2025
from operator import add
@@ -40,6 +45,9 @@ def parseNeighbors(urls):
4045
print >> sys.stderr, "Usage: pagerank <file> <iterations>"
4146
exit(-1)
4247

48+
print >> sys.stderr, """WARN: This is a naive implementation of PageRank and is
49+
given as an example! Please refer to PageRank implementation provided by graphx"""
50+
4351
# Initialize the spark context.
4452
sc = SparkContext(appName="PythonPageRank")
4553

examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ import breeze.linalg.{Vector, DenseVector}
2525
* Logistic regression based classification.
2626
*
2727
* This is an example implementation for learning how to use Spark. For more conventional use,
28-
* please refer to org.apache.spark.mllib.classification.LogisticRegression
28+
* please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
29+
* org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
2930
*/
3031
object LocalFileLR {
3132
val D = 10 // Numer of dimensions
@@ -41,7 +42,8 @@ object LocalFileLR {
4142
def showWarning() {
4243
System.err.println(
4344
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
44-
|Please use the LogisticRegression method found in org.apache.spark.mllib.classification
45+
|Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
46+
|org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
4547
|for more conventional use.
4648
""".stripMargin)
4749
}

examples/src/main/scala/org/apache/spark/examples/LocalLR.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ import breeze.linalg.{Vector, DenseVector}
2525
* Logistic regression based classification.
2626
*
2727
* This is an example implementation for learning how to use Spark. For more conventional use,
28-
* please refer to org.apache.spark.mllib.classification.LogisticRegression
28+
* please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
29+
* org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
2930
*/
3031
object LocalLR {
3132
val N = 10000 // Number of data points
@@ -48,7 +49,8 @@ object LocalLR {
4849
def showWarning() {
4950
System.err.println(
5051
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
51-
|Please use the LogisticRegression method found in org.apache.spark.mllib.classification
52+
|Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
53+
|org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
5254
|for more conventional use.
5355
""".stripMargin)
5456
}

examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ import org.apache.spark.scheduler.InputFormatInfo
3232
* Logistic regression based classification.
3333
*
3434
* This is an example implementation for learning how to use Spark. For more conventional use,
35-
* please refer to org.apache.spark.mllib.classification.LogisticRegression
35+
* please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
36+
* org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
3637
*/
3738
object SparkHdfsLR {
3839
val D = 10 // Numer of dimensions
@@ -54,7 +55,8 @@ object SparkHdfsLR {
5455
def showWarning() {
5556
System.err.println(
5657
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
57-
|Please use the LogisticRegression method found in org.apache.spark.mllib.classification
58+
|Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
59+
|org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
5860
|for more conventional use.
5961
""".stripMargin)
6062
}

examples/src/main/scala/org/apache/spark/examples/SparkLR.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ import org.apache.spark._
3030
* Usage: SparkLR [slices]
3131
*
3232
* This is an example implementation for learning how to use Spark. For more conventional use,
33-
* please refer to org.apache.spark.mllib.classification.LogisticRegression
33+
* please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
34+
* org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
3435
*/
3536
object SparkLR {
3637
val N = 10000 // Number of data points
@@ -53,7 +54,8 @@ object SparkLR {
5354
def showWarning() {
5455
System.err.println(
5556
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
56-
|Please use the LogisticRegression method found in org.apache.spark.mllib.classification
57+
|Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
58+
|org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
5759
|for more conventional use.
5860
""".stripMargin)
5961
}

examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,28 @@ import org.apache.spark.{SparkConf, SparkContext}
2828
* URL neighbor URL
2929
* ...
3030
* where URL and their neighbors are separated by space(s).
31+
*
32+
* This is an example implementation for learning how to use Spark. For more conventional use,
33+
* please refer to org.apache.spark.graphx.lib.PageRank
3134
*/
3235
object SparkPageRank {
36+
37+
def showWarning() {
38+
System.err.println(
39+
"""WARN: This is a naive implementation of PageRank and is given as an example!
40+
|Please use the PageRank implementation found in org.apache.spark.graphx.lib.PageRank
41+
|for more conventional use.
42+
""".stripMargin)
43+
}
44+
3345
def main(args: Array[String]) {
3446
if (args.length < 1) {
3547
System.err.println("Usage: SparkPageRank <file> <iter>")
3648
System.exit(1)
3749
}
50+
51+
showWarning()
52+
3853
val sparkConf = new SparkConf().setAppName("PageRank")
3954
val iters = if (args.length > 0) args(1).toInt else 10
4055
val ctx = new SparkContext(sparkConf)

examples/src/main/scala/org/apache/spark/examples/SparkTachyonHdfsLR.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,24 @@ import org.apache.spark.storage.StorageLevel
3232
/**
3333
* Logistic regression based classification.
3434
* This example uses Tachyon to persist rdds during computation.
35+
*
36+
* This is an example implementation for learning how to use Spark. For more conventional use,
37+
* please refer to either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
38+
* org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS based on your needs.
3539
*/
3640
object SparkTachyonHdfsLR {
3741
val D = 10 // Numer of dimensions
3842
val rand = new Random(42)
3943

44+
def showWarning() {
45+
System.err.println(
46+
"""WARN: This is a naive implementation of Logistic Regression and is given as an example!
47+
|Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or
48+
|org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
49+
|for more conventional use.
50+
""".stripMargin)
51+
}
52+
4053
case class DataPoint(x: Vector[Double], y: Double)
4154

4255
def parsePoint(line: String): DataPoint = {
@@ -51,6 +64,9 @@ object SparkTachyonHdfsLR {
5164
}
5265

5366
def main(args: Array[String]) {
67+
68+
showWarning()
69+
5470
val inputPath = args(0)
5571
val sparkConf = new SparkConf().setAppName("SparkTachyonHdfsLR")
5672
val conf = new Configuration()

0 commit comments

Comments
 (0)