-
Notifications
You must be signed in to change notification settings - Fork 0
/
spatial_index_2d.go
252 lines (223 loc) · 8.71 KB
/
spatial_index_2d.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
package modularspatialindex
import (
"encoding/binary"
"errors"
"fmt"
"math/bits"
"sort"
)
type SpatialIndex2D struct {
hilbert
integerBits int
edgeSizeBits int
intToEightBytes func(int) []byte
eightBytesToInt func([]byte) int
}
// This is the only way to create an instance of SpatialIndex2D. integerBits must be 32 or 64.
// To use the int size of whatever processor you happen to be running on, like what golang itself does
// with the `int` type, you may simply pass in `bits.UintSize`.
func NewSpatialIndex2D(integerBits int) (*SpatialIndex2D, error) {
if integerBits > bits.UintSize {
return nil, fmt.Errorf("can't create a %d bit SpatialIndex2D on a %d bit CPU", integerBits, bits.UintSize)
}
if integerBits != 32 && integerBits != 64 {
return nil, fmt.Errorf("%d bit SpatialIndex2D is not supported, please use 32 or 64 bit", integerBits)
}
// The edges of the hilbert curve plane must have a power-of-two size,
// and for efficient arithmetic on the CPU, the length of the hilbert curve filling this plane
// has to fit within the CPU architecture's `int` (32 bit versus 64 bit).
//
// As it is a space filling curve, length == area. So the length of the curve is equal to width*height.
// therefore, the edge length of the plane should be the largest power of two less than
// - For 32 bit CPUs: sqrt(math.MaxInt32)
// - For 64 bit CPUs: sqrt(math.MaxInt64)
// (because it's a square root, in general, it will have half as many bits.
// and since 1 bit is used by the sign (-/+) of the number, its half minus 1
edgeSizeBits := (integerBits / 2) - 1
toReturn := &SpatialIndex2D{
hilbert: hilbert{
edgeLength: 1 << edgeSizeBits,
},
integerBits: integerBits,
edgeSizeBits: edgeSizeBits,
}
if integerBits == 32 {
toReturn.intToEightBytes = func(v int) []byte {
eightBytes := make([]byte, 8)
binary.BigEndian.PutUint32(eightBytes, uint32(v))
return eightBytes
}
toReturn.eightBytesToInt = func(eightBytes []byte) int {
return int(binary.BigEndian.Uint32(eightBytes[:4]))
}
} else {
toReturn.intToEightBytes = func(v int) []byte {
eightBytes := make([]byte, 8)
binary.BigEndian.PutUint64(eightBytes, uint64(v))
return eightBytes
}
toReturn.eightBytesToInt = func(eightBytes []byte) int {
return int(binary.BigEndian.Uint64(eightBytes[:8]))
}
}
return toReturn, nil
}
// returns the minimum and maximum values for x and y coordinates passed into the index.
// 64-bit SpatialIndex2D: -1073741823, 1073741823
// 32-bit SpatialIndex2D: -16383, 16383
func (index *SpatialIndex2D) GetValidInputRange() (int, int) {
halfHilbertEdgeLength := 1 << (index.edgeSizeBits - 1)
return -halfHilbertEdgeLength + 1, halfHilbertEdgeLength - 1
}
// returns two byte slices of length 8, one representing the smallest key in the index
// and the other representing the largest possible key in the index
// returns (as hex) 0000000000000000, 4000000000000000
// 32 bit SpatialIndex2Ds always leave the last 4 bytes blank.
func (index *SpatialIndex2D) GetOutputRange() ([]byte, []byte) {
return index.intToEightBytes(0), index.intToEightBytes(index.edgeLength * index.edgeLength)
}
// Returns a slice of 8 bytes which can be used as a key in a database index,
// to be spatial-range-queried by RectangleToIndexedRanges
func (index *SpatialIndex2D) GetIndexedPoint(x int, y int) ([]byte, error) {
// x and y can be negative, but the hilbert curve implementation is only defined over positive integers.
// so we have to attempt to transform x and y such that they are always positive.
// btw, `(index.edgeLength >> 1)` is just half of the edge length of the hilbert plane.
// so by adding (index.edgeLength >> 1) we are mapping from a -50%..50% range to a 0%..100% range.
// pointToDistanceAlongCurve will handle any out-of-bounds inputs & return an error for us.
curvePoint, err := index.pointToDistanceAlongCurve(x+(index.edgeLength>>1), y+(index.edgeLength>>1))
if err != nil {
return nil, err
}
return index.intToEightBytes(curvePoint), nil
}
// inverse of GetIndexedPoint. Return [x,y] position from an 8-byte spatial index key
func (index *SpatialIndex2D) GetPositionFromIndexedPoint(indexedPoint []byte) (int, int, error) {
if len(indexedPoint) < 8 {
return 0, 0, errors.New("GetPositionFromIndexedPoint requires at least 8 bytes")
}
x, y, err := index.distanceAlongCurveToPoint(index.eightBytesToInt(indexedPoint))
if err != nil {
return 0, 0, err
}
return x - (index.edgeLength >> 1), y - (index.edgeLength >> 1), nil
}
// Use this with a range query on a database index.
type ByteRange struct {
Start []byte
End []byte
}
// Returns a slice of 1 or more byte ranges (typically 1-4).
// The union of the results of database range queries over these ranges will contain AT LEAST
// all GetIndexedPoint(x,y) keys present within the rectangle defined by [x,y,width,height].
//
// The results will probably also contain records outside the rectangle, it's up to you to filter them out.
//
// iopsCostParam allows you to adjust a tradeoff between wasted I/O bandwidth and # of individual I/O operations.
// I think 1.0 is actually a very reasonable value to use for SSD & HDD
// (waste ~50% of bandwidth, save a lot of unneccessary I/O operations)
// if you have an extremely fast NVME SSD with a good driver, you might try 0.5 or 0.1, but I doubt it will make it any faster.
// 2 is probably way too much for any modern disk to benefit from, unless your data is VERY sparse
func (index *SpatialIndex2D) RectangleToIndexedRanges(x, y, width, height int, iopsCostParam float32) ([]ByteRange, error) {
// scale the universe down (rounding in such a way that the original rectangle is never cropped)
// until we reach a scale where sampling the hilbert curve over the entire area of the query rectangle
// will be quick and painless for the CPU.
reducedBits := 0
for width*height > 128 {
halfX := x / 2
if halfX != 0 {
x = halfX + (x % halfX)
} else {
x = 0
}
halfY := y / 2
if halfY != 0 {
y = halfY + (y % halfY)
} else {
y = 0
}
halfWidth := width / 2
halfHeight := width / 2
if halfWidth != 0 {
width = halfWidth + (width % halfWidth)
} else {
width = 1
}
if halfWidth != 0 {
width = halfWidth + (width % halfWidth)
} else {
width = 1
}
if halfHeight != 0 {
height = halfHeight + (height % halfHeight)
} else {
height = 1
}
reducedBits++
}
if (index.edgeSizeBits - reducedBits) < 3 {
return nil, fmt.Errorf("RectangleToIndexedRanges(): %d by %d rectangle is too large, unable to downsample it to a reasonable size.", width, height)
}
reducedHilbertPlaneEdgeLength := 1 << (index.edgeSizeBits - reducedBits)
// I noticed that this method of reducing the detail is not always accurate.
// (small sections along the edge of the rectangle can be missed by rouding errors)
// so I also expanded the rectangle on all sides by 1 "pixel" at the downscaled size,
// which seemed to eliminate about 90% of the errors.
// The remaining errors I noticed were so minor I felt like I could ignore them.
if reducedBits > 0 {
if x > 1 {
x--
}
if y > 1 {
y--
}
if x+width+1 < reducedHilbertPlaneEdgeLength {
width++
}
if x+width+1 < reducedHilbertPlaneEdgeLength {
width++
}
if y+height+1 < reducedHilbertPlaneEdgeLength {
height++
}
if y+height+1 < reducedHilbertPlaneEdgeLength {
height++
}
}
downsampledCurve := hilbert{edgeLength: reducedHilbertPlaneEdgeLength}
curvePoints := make([]int, width*height)
for i := 0; i < width; i++ {
for j := 0; j < height; j++ {
d, err := downsampledCurve.pointToDistanceAlongCurve(x+i+(downsampledCurve.edgeLength>>1), y+j+(downsampledCurve.edgeLength>>1))
if err != nil {
return nil, err
}
curvePoints[j*width+i] = d
}
}
sort.Ints(curvePoints)
ranges := [][]int{{curvePoints[0], curvePoints[0]}}
for i := 1; i < len(curvePoints); i++ {
distance := curvePoints[i] - curvePoints[i-1]
if float32(distance) > float32(width*height)*iopsCostParam {
ranges[len(ranges)-1][1] = curvePoints[i-1]
ranges = append(ranges, []int{curvePoints[i], curvePoints[i]})
}
}
ranges[len(ranges)-1][1] = curvePoints[len(curvePoints)-1]
byteRanges := make([]ByteRange, len(ranges))
for i, intRange := range ranges {
// Here is where we scale the universe back up before returning the result.
// shift the bits of the resulting curve points representing the beginning and ending
// of the segments to be queried.
//
// Note that they are shifted twice as many bits (aka, squared)
// because the units here are curve length / area.
startCurvePoint := (intRange[0] << (reducedBits * 2))
endCurvePoint := (intRange[1] << (reducedBits * 2))
byteRanges[i] = ByteRange{
Start: index.intToEightBytes(startCurvePoint),
End: index.intToEightBytes(endCurvePoint),
}
}
return byteRanges, nil
}