@@ -623,7 +623,7 @@ def getBuckets(self, bucketCount):
623623 if increment != 0 :
624624 buckets = range (min ,max , increment )
625625
626- return buckets
626+ return { "min" : min , "max" : max , " buckets" : buckets }
627627
628628 def histogram (self , bucketCount , buckets = None ):
629629 """
@@ -633,10 +633,15 @@ def histogram(self, bucketCount, buckets=None):
633633 >>> sc.parallelize([1,49, 23, 100, 12, 13, 20, 22, 75, 50]).histogram(3)
634634 defaultdict(<type 'int'>, {(67, inf): 2, (1, 33): 6, (34, 66): 2})
635635 """
636-
636+ min = float ("-inf" )
637+ max = float ("inf" )
637638 evenBuckets = False
638639 if not buckets :
639- buckets = self .getBuckets (bucketCount )
640+ b = self .getBuckets (bucketCount )
641+ buckets = b ["buckets" ]
642+ min = b ["min" ]
643+ max = b ["max" ]
644+
640645 if len (buckets ) < 2 :
641646 raise ValueError ("requires more than 1 bucket" )
642647 if len (buckets ) % 2 == 0 :
@@ -649,9 +654,9 @@ def histogramPartition(iterator):
649654 if k < len (buckets ) and k > 0 :
650655 key = (buckets [k - 1 ], buckets [k ]- 1 )
651656 elif k == len (buckets ):
652- key = (buckets [k - 1 ], float ( "inf" ) )
657+ key = (buckets [k - 1 ], max )
653658 elif k == 0 :
654- key = (float ( "-inf" ) , buckets [k ]- 1 )
659+ key = (min , buckets [k ]- 1 )
655660 counters [key ] += 1
656661 yield counters
657662
0 commit comments