@@ -1388,9 +1388,22 @@ setMethod("groupByKey",
13881388 function (item ) {
13891389 item $ hash <- as.character(hashCode(item [[1 ]]))
13901390 updateOrCreatePair(item , keys , vals , pred ,
1391- function (vs , v ) c(vs , list (v )),
1392- function (x ) list (x ))
1391+ function (acc , x ) {
1392+ addItemToAccumulator(acc , x )
1393+ acc
1394+ },
1395+ function (x ) {
1396+ acc <- initAccumulator()
1397+ addItemToAccumulator(acc , x )
1398+ acc
1399+ })
13931400 })
1401+ # extract out data field
1402+ vals <- eapply(vals ,
1403+ function (x ) {
1404+ length(x $ data ) <- x $ counter
1405+ x $ data
1406+ })
13941407 # Every key in the environment contains a list
13951408 # Convert that to list(K, Seq[V])
13961409 convertEnvsToList(keys , vals )
@@ -1438,7 +1451,7 @@ setMethod("reduceByKey",
14381451 lapply(part ,
14391452 function (item ) {
14401453 item $ hash <- as.character(hashCode(item [[1 ]]))
1441- updateOrCreatePair(item , keys , vals , pred , combineFunc , function ( x ) x )
1454+ updateOrCreatePair(item , keys , vals , pred , combineFunc , identity )
14421455 })
14431456 convertEnvsToList(keys , vals )
14441457 }
@@ -1451,13 +1464,12 @@ setMethod("reduceByKey",
14511464# '
14521465# ' This function operates on RDDs where every element is of the form list(K, V) or c(K, V).
14531466# ' and merges the values for each key using an associative reduce function, but return the
1454- # ' results immediately to master as R list.
1467+ # ' results immediately to the driver as an R list.
14551468# '
14561469# ' @param rdd The RDD to reduce by key. Should be an RDD where each element is
14571470# ' list(K, V) or c(K, V).
14581471# ' @param combineFunc The associative reduce function to use.
1459- # ' @return An list where each element is list(K, V') where V' is the merged
1460- # ' value
1472+ # ' @return A list of elements of type list(K, V') where V' is the merged value for each key
14611473# ' @rdname reduceByKeyLocally
14621474# ' @seealso reduceByKey
14631475# ' @export
@@ -1467,7 +1479,7 @@ setMethod("reduceByKey",
14671479# ' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4))
14681480# ' rdd <- parallelize(sc, pairs)
14691481# ' reduced <- reduceByKeyLocally(rdd, "+")
1470- # ' reduced[[1]] # Should be a list(1, 6 )
1482+ # ' reduced # list(list(1, 6), list(1.1, 3) )
14711483# '}
14721484setGeneric ("reduceByKeyLocally ",
14731485 function (rdd , combineFunc ) {
@@ -1486,7 +1498,7 @@ setMethod("reduceByKeyLocally",
14861498 lapply(part ,
14871499 function (item ) {
14881500 item $ hash <- as.character(hashCode(item [[1 ]]))
1489- updateOrCreatePair(item , keys , vals , pred , combineFunc , function ( x ) x )
1501+ updateOrCreatePair(item , keys , vals , pred , combineFunc , identity )
14901502 })
14911503 list (list (keys , vals )) # return hash to avoid re-compute in merge
14921504 }
@@ -1498,7 +1510,7 @@ setMethod("reduceByKeyLocally",
14981510 function (name ) {
14991511 item <- list (x [[1 ]][[name ]], x [[2 ]][[name ]])
15001512 item $ hash <- name
1501- updateOrCreatePair(item , accum [[1 ]], accum [[2 ]], pred , combineFunc , function ( x ) x )
1513+ updateOrCreatePair(item , accum [[1 ]], accum [[2 ]], pred , combineFunc , identity )
15021514 })
15031515 accum
15041516 }
@@ -1573,8 +1585,7 @@ setMethod("combineByKey",
15731585 lapply(part ,
15741586 function (item ) {
15751587 item $ hash <- as.character(item [[1 ]])
1576- updateOrCreatePair(item , keys , combiners , pred , mergeCombiners ,
1577- function (x ) x )
1588+ updateOrCreatePair(item , keys , combiners , pred , mergeCombiners , identity )
15781589 })
15791590 convertEnvsToList(keys , combiners )
15801591 }
0 commit comments