-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathsuccinct-bit-vector.lisp
142 lines (133 loc) · 6.05 KB
/
succinct-bit-vector.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
(defpackage :cp/succinct-bit-vector
(:use :cl)
(:export #:succint-bit-vector #:make-sucbv! #:sucbv-ref #:sucbv-rank #:sucbv-select)
(:documentation "Provides three-layer succinct bit vector. You should use
cp/compact-bit-vector instead as it is far more efficient. I keep this module
just for my reference.
select: O(log(n))"))
(in-package :cp/succinct-bit-vector)
(defconstant +chunk-width+ (* 64 16))
;; This constant cannot be changed as the current implementation depends on the
;; assumption: +BLOCK-WIDTH+ is equal to the word size.
(defconstant +block-width+ 64)
(defconstant +block-number+ (floor +chunk-width+ +block-width+))
(eval-when (:compile-toplevel :load-toplevel :execute)
(assert (zerop (mod +chunk-width+ +block-width+)))
(assert (= sb-vm:n-word-bits 64)))
(defstruct (succinct-bit-vector (:constructor %make-sucbv (storage chunks blocks))
(:conc-name sucbv-)
(:copier nil))
(storage nil :type simple-bit-vector)
(chunks nil :type (simple-array (integer 0 #.most-positive-fixnum) (*)))
(blocks nil :type (simple-array (unsigned-byte 16) (* *))))
(defun make-sucbv! (vector)
"The consequence is undefined when VECTOR is modified after a succinct bit
vector is created."
(declare (optimize (speed 3)))
(check-type vector simple-bit-vector)
(let* ((vector (if (zerop (mod (length vector) +chunk-width+))
vector
(adjust-array vector
(* +chunk-width+ (ceiling (length vector) +chunk-width+))
:initial-element 0)))
(len (length vector))
(chunk-count (floor len +chunk-width+))
(chunks (make-array (+ 1 chunk-count)
:element-type '(integer 0 #.most-positive-fixnum)
:initial-element 0))
(blocks (make-array (list (+ 1 chunk-count) +block-number+)
:element-type '(unsigned-byte 16)
:initial-element 0))
(sum 0))
(declare (simple-bit-vector vector)
((mod #.array-dimension-limit) sum))
(dotimes (i chunk-count)
(setf (aref chunks i) sum)
(let ((block-sum 0))
(declare ((mod #.array-dimension-limit) block-sum))
(dotimes (j +block-number+)
(setf (aref blocks i j) block-sum)
(incf block-sum
(logcount (sb-kernel:%vector-raw-bits vector (+ (* i +block-number+) j)))))
(incf sum block-sum)))
(setf (aref chunks chunk-count) sum)
(%make-sucbv vector chunks blocks)))
(declaim (inline sucbv-ref))
(defun sucbv-ref (sucbv index)
(sbit (sucbv-storage sucbv) index))
;; NOTE: No error handling.
(declaim (ftype (function * (values (mod #.array-dimension-limit) &optional))
sucbv-rank))
(defun sucbv-rank (sucbv end)
"Counts the number of 1's in the range [0, END)."
(declare (optimize (speed 3))
((mod #.array-dimension-limit) end))
(let ((storage (sucbv-storage sucbv))
(chunks (sucbv-chunks sucbv))
(blocks (sucbv-blocks sucbv)))
(multiple-value-bind (cpos crem) (floor end +chunk-width+)
(multiple-value-bind (bpos brem) (floor crem +block-width+)
(let ((csum (aref chunks cpos))
(bsum (aref blocks cpos bpos))
(wordpos (floor end 64)))
(+ csum
bsum
(if (zerop brem) ; avoid out-of-bounds access
0
(logcount (ldb (byte brem 0)
(sb-kernel:%vector-raw-bits storage wordpos))))))))))
(declaim (ftype (function * (values (mod #.array-dimension-limit) &optional))
sucbv-count))
(defun sucbv-count (sucbv value end)
"Counts the number of VALUEs in the range [0, END)"
(declare (optimize (speed 3))
(bit value)
((mod #.array-dimension-limit) end))
(let ((count1 (sucbv-rank sucbv end)))
(if (= value 1)
count1
(- end count1))))
(defun sucbv-select (sucbv num)
"Detects the position of (1-based) NUM-th 1 in SUCBV. (SUCBV-SELECT 0) always
returns 0."
(declare (optimize (speed 3))
((mod #.array-dimension-limit) num))
(let* ((storage (sucbv-storage sucbv))
(chunks (sucbv-chunks sucbv))
(blocks (sucbv-blocks sucbv))
(chunk-size (length chunks)))
(unless (<= num (aref chunks (- chunk-size 1)))
;; FIXME: introduce condition class
(error "~&There aren't ~W 1's in ~W" num sucbv))
(labels ((chunk-bisect (ok ng)
(declare ((unsigned-byte 32) ok ng))
(if (<= (- ng ok) 1)
ok
(let ((mid (ash (+ ok ng) -1)))
(if (<= num (aref chunks mid))
(chunk-bisect ok mid)
(chunk-bisect mid ng))))))
(let* ((chunk-idx (chunk-bisect 0 chunk-size))
(num (- num (aref chunks chunk-idx))))
(labels ((block-bisect (ok ng)
(declare ((unsigned-byte 32) ok ng))
(if (<= (- ng ok) 1)
ok
(let ((mid (ash (+ ok ng) -1)))
(if (<= num (aref blocks chunk-idx mid))
(block-bisect ok mid)
(block-bisect mid ng))))))
(let* ((block-idx (block-bisect 0 +block-number+))
(num (- num (aref blocks chunk-idx block-idx)))
(word-pos (+ block-idx (* chunk-idx +block-number+)))
(word (sb-kernel:%vector-raw-bits storage word-pos)))
(labels ((pos-bisect (ok ng)
(declare ((integer 0 64) ok ng))
(if (<= (- ng ok) 1)
ok
(let ((mid (ash (+ ok ng) -1)))
(if (<= num (logcount (ldb (byte mid 0) word)))
(pos-bisect ok mid)
(pos-bisect mid ng))))))
(let ((pos (pos-bisect 0 64)))
(+ (* 64 word-pos) pos)))))))))