-
Notifications
You must be signed in to change notification settings - Fork 3
Changes BitSubvector to use System.arraycopy #10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 28 commits
4f4bf8a
b8d2c93
100d506
74d6732
1d1d71f
3fcdeaf
bfa1819
9e7b0e9
be5b237
3c1a4d3
15b009b
d58ce40
40719af
5941577
248d0a7
0764b90
ceaf5f5
675951e
1855176
d897714
6cc1ed8
71fd54c
1597d11
92a6fa5
506ac10
2c329df
9a642b8
49a628a
c5e5480
1213f86
8f1360f
3c5060d
2db78ee
7072f91
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -219,4 +219,30 @@ class BitSet(numBits: Int) extends Serializable { | |
|
|
||
| /** Return the number of longs it would take to hold numBits. */ | ||
| private def bit2words(numBits: Int) = ((numBits - 1) >> 6) + 1 | ||
|
|
||
| /** Bit-wise OR between two BitSets where the ith bit of other is ORed against the i+offset bit of this instance. */ | ||
| private[spark] def orWithOffset(other: BitSet, offset: Int, numBits: Int): Unit = { | ||
| val numWords = bit2words(numBits) | ||
| val wordOffset = offset >> 6 // divide by 64 | ||
|
|
||
| // Bit vectors have memory layout [63..0|127..64|...] where | denotes word boundaries, so left/right within a word | ||
| // and left/right across words are flipped | ||
| val rightOffset = offset % 64 | ||
| val leftOffset = 64 - rightOffset | ||
|
|
||
| var wordIndex = 0 | ||
| while (wordIndex < numWords) { | ||
| // Fill in lowest-order bits from other's previous word's highest-order bits if available | ||
| if (rightOffset > 0 && wordIndex > 0) { | ||
| val maskedShiftedPrevWord = (other.words(wordIndex - 1) & (Long.MaxValue << leftOffset)) >> leftOffset | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is Long.MaxValue all-ones??
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're sure? returns 0.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah you're right! I just saw a bunch of 1s and assumed so but turns out there are only 63 of them
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I should be using |
||
| words(wordIndex + wordOffset) = words(wordIndex + wordOffset) | maskedShiftedPrevWord | ||
| } | ||
|
|
||
| // Mask, shift, and OR with current word | ||
| val maskedShiftedOtherWord = (other.words(wordIndex) & (Long.MaxValue >> rightOffset)) << rightOffset | ||
| words(wordIndex + wordOffset) = words(wordIndex + wordOffset) | maskedShiftedOtherWord | ||
|
|
||
| wordIndex += 1 | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,25 +21,64 @@ import org.apache.spark.SparkFunSuite | |
|
|
||
| class BitSetSuite extends SparkFunSuite { | ||
|
|
||
| test("basic set and get") { | ||
| (() => { | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably avoid sharing mutable data between unit tests. Can you split these up and use beforeAll to initialize a non-mutated instance of the currently shared
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. I will initialize fixtures for each test rather than use |
||
| val setBits = Seq(0, 9, 1, 10, 90, 96) | ||
| val bitset = new BitSet(100) | ||
|
|
||
| for (i <- 0 until 100) { | ||
| assert(!bitset.get(i)) | ||
| test("set and get") { | ||
| for (i <- 0 until 100) { | ||
| assert(!bitset.get(i)) | ||
| } | ||
|
|
||
| setBits.foreach(i => bitset.set(i)) | ||
|
|
||
| for (i <- 0 until 100) { | ||
| if (setBits.contains(i)) { | ||
| assert(bitset.get(i)) | ||
| } else { | ||
| assert(!bitset.get(i)) | ||
| } | ||
| } | ||
| assert(bitset.cardinality() === setBits.size) | ||
| } | ||
|
|
||
| setBits.foreach(i => bitset.set(i)) | ||
| test("orWithOffset offset=0") { | ||
| val copyBitset = new BitSet(100) | ||
| copyBitset.orWithOffset(bitset, 0, bitset.capacity) | ||
| for (i <- 0 until 100) { | ||
| if (setBits.contains(i)) { | ||
| assert(copyBitset.get(i)) | ||
| } else { | ||
| assert(!copyBitset.get(i)) | ||
| } | ||
| } | ||
| assert(copyBitset.cardinality() === setBits.size) | ||
| } | ||
|
|
||
| for (i <- 0 until 100) { | ||
| if (setBits.contains(i)) { | ||
| assert(bitset.get(i)) | ||
| } else { | ||
| assert(!bitset.get(i)) | ||
| test("orWithOffset offset=5") { | ||
|
||
| val copyBitset = new BitSet(100) | ||
| copyBitset.orWithOffset(bitset, 5, bitset.capacity - 5) | ||
| for (i <- 5 until 100) { | ||
| if (setBits.contains(i - 5)) { | ||
| assert(copyBitset.get(i)) | ||
| } else { | ||
| assert(!copyBitset.get(i)) | ||
| } | ||
| } | ||
| } | ||
| assert(bitset.cardinality() === setBits.size) | ||
| } | ||
|
|
||
| test("orWithOffset offset=65 (full word + 1)") { | ||
| val copyBitset = new BitSet(100) | ||
| copyBitset.orWithOffset(bitset, 65, bitset.capacity - 65) | ||
| for (i <- 65 until 100) { | ||
| if (setBits.contains(i - 65)) { | ||
| assert(copyBitset.get(i)) | ||
| } else { | ||
| assert(!copyBitset.get(i)) | ||
| } | ||
| } | ||
| } | ||
| })() | ||
|
|
||
| test("100% full bit set") { | ||
| val bitset = new BitSet(10000) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Document:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK. Modifying to calculate
numBitswithin function rather than taking as an argument