Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 43 additions & 8 deletions docs/ml-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -408,31 +408,31 @@ import org.apache.spark.sql.SQLContext;
// Labeled and unlabeled instance types.
// Spark SQL can infer schema from Java Beans.
public class Document implements Serializable {
private Long id;
private long id;
private String text;

public Document(Long id, String text) {
public Document(long id, String text) {
this.id = id;
this.text = text;
}

public Long getId() { return this.id; }
public void setId(Long id) { this.id = id; }
public long getId() { return this.id; }
public void setId(long id) { this.id = id; }

public String getText() { return this.text; }
public void setText(String text) { this.text = text; }
}

public class LabeledDocument extends Document implements Serializable {
private Double label;
private double label;

public LabeledDocument(Long id, String text, Double label) {
public LabeledDocument(long id, String text, double label) {
super(id, text);
this.label = label;
}

public Double getLabel() { return this.label; }
public void setLabel(Double label) { this.label = label; }
public double getLabel() { return this.label; }
public void setLabel(double label) { this.label = label; }
}

// Set up contexts.
Expand Down Expand Up @@ -565,6 +565,11 @@ import org.apache.spark.ml.tuning.{ParamGridBuilder, CrossValidator}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.sql.{Row, SQLContext}

// Labeled and unlabeled instance types.
// Spark SQL can infer schema from case classes.
case class LabeledDocument(id: Long, text: String, label: Double)
case class Document(id: Long, text: String)

val conf = new SparkConf().setAppName("CrossValidatorExample")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
Expand Down Expand Up @@ -655,6 +660,36 @@ import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;

// Labeled and unlabeled instance types.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK so this is intentionally duplicated from the example above? I guess that's reasonable since the point is to be self-contained, and I don't imagine there's a lot of maintenance overhead in trying to evolve both copies together.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it's annoying when copy/pasting a bunch of code into spark shell and it fails because these classes are not declared.

// Spark SQL can infer schema from Java Beans.
public class Document implements Serializable {
private long id;
private String text;

public Document(long id, String text) {
this.id = id;
this.text = text;
}

public long getId() { return this.id; }
public void setId(long id) { this.id = id; }

public String getText() { return this.text; }
public void setText(String text) { this.text = text; }
}

public class LabeledDocument extends Document implements Serializable {
private double label;

public LabeledDocument(long id, String text, double label) {
super(id, text);
this.label = label;
}

public double getLabel() { return this.label; }
public void setLabel(double label) { this.label = label; }
}

SparkConf conf = new SparkConf().setAppName("JavaCrossValidatorExample");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext jsql = new SQLContext(jsc);
Expand Down