Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ private[feature] trait Word2VecBase extends Params
* Validate and transform the input schema.
*/
protected def validateAndTransformSchema(schema: StructType): StructType = {
SchemaUtils.checkColumnType(schema, $(inputCol), new ArrayType(StringType, true))
val typeCandidates = List(new ArrayType(StringType, true), new ArrayType(StringType, false))
SchemaUtils.checkColumnTypes(schema, $(inputCol), typeCandidates)
SchemaUtils.appendColumn(schema, $(outputCol), new VectorUDT)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,5 +207,26 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul
val newInstance = testDefaultReadWrite(instance)
assert(newInstance.getVectors.collect() === instance.getVectors.collect())
}

test("Word2Vec works with input that is non-nullable (NGram)") {
val spark = this.spark
import spark.implicits._

val sentence = "a q s t q s t b b b s t m s t m q "
val docDF = sc.parallelize(Seq(sentence, sentence)).map(_.split(" ")).toDF("text")

val ngram = new NGram().setN(2).setInputCol("text").setOutputCol("ngrams")
val ngramDF = ngram.transform(docDF)

val model = new Word2Vec()
.setVectorSize(2)
.setInputCol("ngrams")
.setOutputCol("result")
.fit(ngramDF)

// Just test that this transformation succeeds
model.transform(ngramDF).collect()
}

}