1717
1818package org .apache .spark .examples .mllib
1919
20+ import scopt .OptionParser
21+
2022import org .apache .spark .mllib .fpm .FPGrowth
2123import org .apache .spark .{SparkConf , SparkContext }
22- import scopt .OptionParser
2324
2425/**
2526 * Example for mining frequent itemsets using FP-growth.
26- * Example usage: ./bin/run-example org.apache.spark.examples. mllib.FPGrowthExample
27- * --minSupport 0.8 --numPartition 2 ./data/mllib/sample_fpgrowth.txt
27+ * Example usage: ./bin/run-example mllib.FPGrowthExample \
28+ * --minSupport 0.8 --numPartition 2 ./data/mllib/sample_fpgrowth.txt
2829 */
2930object FPGrowthExample {
3031
@@ -36,7 +37,7 @@ object FPGrowthExample {
3637 def main (args : Array [String ]) {
3738 val defaultParams = Params ()
3839
39- val parser = new OptionParser [Params ](" FPGrowth " ) {
40+ val parser = new OptionParser [Params ](" FPGrowthExample " ) {
4041 head(" FPGrowth: an example FP-growth app." )
4142 opt[Double ](" minSupport" )
4243 .text(s " minimal support level, default: ${defaultParams.minSupport}" )
@@ -45,7 +46,8 @@ object FPGrowthExample {
4546 .text(s " number of partition, default: ${defaultParams.numPartition}" )
4647 .action((x, c) => c.copy(numPartition = x))
4748 arg[String ](" <input>" )
48- .text(" input paths to input data set" )
49+ .text(" input paths to input data set, whose file format is that each line " +
50+ " contains a transaction with each item in String and separated by a space" )
4951 .required()
5052 .action((x, c) => c.copy(input = x))
5153 }
@@ -62,14 +64,14 @@ object FPGrowthExample {
6264 val sc = new SparkContext (conf)
6365 val transactions = sc.textFile(params.input).map(_.split(" " )).cache()
6466
65- println(s " Number of transactions: ${transactions.count}" )
67+ println(s " Number of transactions: ${transactions.count() }" )
6668
6769 val model = new FPGrowth ()
6870 .setMinSupport(params.minSupport)
6971 .setNumPartitions(params.numPartition)
7072 .run(transactions)
7173
72- println(s " Number of frequent itemsets: ${model.freqItemsets.count}" )
74+ println(s " Number of frequent itemsets: ${model.freqItemsets.count() }" )
7375
7476 model.freqItemsets.collect().foreach { itemset =>
7577 println(itemset.items.mkString(" [" , " ," , " ]" ) + " , " + itemset.freq)
0 commit comments