File tree Expand file tree Collapse file tree 2 files changed +11
-1
lines changed Expand file tree Collapse file tree 2 files changed +11
-1
lines changed Original file line number Diff line number Diff line change @@ -2018,6 +2018,9 @@ def countApproxDistinct(self, relativeSD=0.05):
20182018 >>> n = sc.parallelize(range(1000)).map(str).countApproxDistinct()
20192019 >>> 950 < n < 1050
20202020 True
2021+ >>> n = self.sc.parallelize([i % 20 for i in range(1000)])
2022+ >>> 18 < n < 22
2023+ True
20212024 """
20222025 if relativeSD < 0.000017 :
20232026 raise ValueError ("relativeSD should be greater than 0.000017" )
Original file line number Diff line number Diff line change @@ -409,7 +409,14 @@ def test_count_approx_distinct(self):
409409 self .assertTrue (950 < rdd .countApproxDistinct (0.04 ) < 1050 )
410410 self .assertTrue (950 < rdd .map (float ).countApproxDistinct (0.04 ) < 1050 )
411411 self .assertTrue (950 < rdd .map (str ).countApproxDistinct (0.04 ) < 1050 )
412- self .assertTrue (950 < rdd .map (lambda x : (x , - x )).countApproxDistinct (0.04 ) < 1050 )
412+ self .assertTrue (950 < rdd .map (lambda x : set ([x , - x ])).countApproxDistinct (0.04 ) < 1050 )
413+
414+ rdd = self .sc .parallelize ([i % 20 for i in range (1000 )], 7 )
415+ self .assertTrue (18 < rdd .countApproxDistinct () < 22 )
416+ self .assertTrue (18 < rdd .map (float ).countApproxDistinct () < 22 )
417+ self .assertTrue (18 < rdd .map (str ).countApproxDistinct () < 22 )
418+ self .assertTrue (18 < rdd .map (lambda x : set ([x , - x ])).countApproxDistinct () < 22 )
419+
413420 self .assertRaises (ValueError , lambda : rdd .countApproxDistinct (0.00000001 ))
414421 self .assertRaises (ValueError , lambda : rdd .countApproxDistinct (0.5 ))
415422
You can’t perform that action at this time.
0 commit comments