Skip to content

Commit 5a3ad10

Browse files
mengxrrxin
authored andcommitted
SPARK-1129: use a predefined seed when seed is zero in XORShiftRandom
If the seed is zero, XORShift generates all zeros, which would create unexpected result. JIRA: https://spark-project.atlassian.net/browse/SPARK-1129 Author: Xiangrui Meng <[email protected]> Closes #645 from mengxr/xor and squashes the following commits: 1b086ab [Xiangrui Meng] use MurmurHash3 to set seed in XORShiftRandom 45c6f16 [Xiangrui Meng] minor style change 51f4050 [Xiangrui Meng] use a predefined seed when seed is zero in XORShiftRandom
1 parent 71f69d6 commit 5a3ad10

File tree

2 files changed

+16
-3
lines changed

2 files changed

+16
-3
lines changed

core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,11 @@
1717

1818
package org.apache.spark.util.random
1919

20+
import java.nio.ByteBuffer
2021
import java.util.{Random => JavaRandom}
2122

23+
import scala.util.hashing.MurmurHash3
24+
2225
import org.apache.spark.util.Utils.timeIt
2326

2427
/**
@@ -36,8 +39,8 @@ private[spark] class XORShiftRandom(init: Long) extends JavaRandom(init) {
3639

3740
def this() = this(System.nanoTime)
3841

39-
private var seed = init
40-
42+
private var seed = XORShiftRandom.hashSeed(init)
43+
4144
// we need to just override next - this will be called by nextInt, nextDouble,
4245
// nextGaussian, nextLong, etc.
4346
override protected def next(bits: Int): Int = {
@@ -49,13 +52,19 @@ private[spark] class XORShiftRandom(init: Long) extends JavaRandom(init) {
4952
}
5053

5154
override def setSeed(s: Long) {
52-
seed = s
55+
seed = XORShiftRandom.hashSeed(s)
5356
}
5457
}
5558

5659
/** Contains benchmark method and main method to run benchmark of the RNG */
5760
private[spark] object XORShiftRandom {
5861

62+
/** Hash seeds to have 0/1 bits throughout. */
63+
private def hashSeed(seed: Long): Long = {
64+
val bytes = ByteBuffer.allocate(java.lang.Long.SIZE).putLong(seed).array()
65+
MurmurHash3.bytesHash(bytes)
66+
}
67+
5968
/**
6069
* Main method for running benchmark
6170
* @param args takes one argument - the number of random numbers to generate

core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,8 @@ class XORShiftRandomSuite extends FunSuite with ShouldMatchers {
7272

7373
}
7474

75+
test ("XORShift with zero seed") {
76+
val random = new XORShiftRandom(0L)
77+
assert(random.nextInt() != 0)
78+
}
7579
}

0 commit comments

Comments
 (0)