Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.IpFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperService;
Expand Down Expand Up @@ -125,6 +126,9 @@ boolean useTSDBDocValuesFormat(final String field) {
if (mappingLookup.getMapper(field) instanceof TimeSeriesIdFieldMapper) {
return true;
}
if (mappingLookup.getMapper(field) instanceof IpFieldMapper) {
return true;
}
}
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,35 +182,57 @@ void encode(long[] in, DataOutput out) throws IOException {
* Optimizes for encoding sorted fields where we expect a block to mostly either be the same value
* or to make a transition from one value to a second one.
* <p>
* Encodes blocks in the following format:
* The header is a vlong where the number of trailing ones defines the encoding strategy:
* <ul>
* <li>byte 0: 1/2 bits header+6/7 bits data</li>
* <li>byte 1..n: data</li>
* </ul>
* The header (first 1 or 2 bits) describes how the data is encoded:
* <ul>
* <li>?0 block has a single value (vlong), 2nd bit already contains data</li>
* <li>
* 01 block has two runs, data contains value 1 (vlong), run-length (vint) of value 1,
* and delta from first to second value (zlong)
* </li>
* <li>11 block is bit-packed</li>
* <li>0: single run</li>
* <li>1: two runs</li>
* <li>2: bit-packed</li>
* <li>3: cycle</li>
* </ul>
*/
void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException {
assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
int numRuns = 1;
long firstValue = in[0];
long previousValue = firstValue;
boolean cyclic = false;
int cycleLength = 0;
for (int i = 1; i < in.length; ++i) {
if (in[i - 1] != in[i]) {
long currentValue = in[i];
if (previousValue != currentValue) {
numRuns++;
}
if (currentValue == firstValue && cycleLength != -1) {
if (cycleLength == 0) {
// first candidate cycle detected
cycleLength = i;
} else if (cycleLength == 1 || i % cycleLength != 0) {
// if the first two values are the same this isn't a cycle, it might be a run, though
// this also isn't a cycle if the index of the next occurrence of the first value
// isn't a multiple of the candidate cycle length
// we can stop looking for cycles now
cycleLength = -1;
}
}
previousValue = currentValue;
}
// if the cycle is too long, bit-packing may be more space efficient
int maxCycleLength = in.length / 4;
if (numRuns > 2 && cycleLength > 1 && cycleLength <= maxCycleLength) {
cyclic = true;
for (int i = cycleLength; i < in.length; ++i) {
if (in[i] != in[i - cycleLength]) {
cyclic = false;
break;
}
}
}
if (numRuns == 1 && bitsPerOrd < 63) {
long value = in[0];
// set first bit to 0 to indicate the block has a single run
// unset first bit (0 trailing ones) to indicate the block has a single run
out.writeVLong(value << 1);
} else if (numRuns == 2 && bitsPerOrd < 62) {
// set first two bits to 01 to indicate the block has two runs
// set 1 trailing bit to indicate the block has two runs
out.writeVLong((in[0] << 2) | 0b01);
int firstRunLen = in.length;
for (int i = 1; i < in.length; ++i) {
Expand All @@ -221,8 +243,15 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio
}
out.writeVInt(firstRunLen);
out.writeZLong(in[in.length - 1] - in[0]);
} else if (cyclic) {
// set 3 trailing bits to indicate the block cycles through the same values
long headerAndCycleLength = ((long) cycleLength << 4) | 0b0111;
out.writeVLong(headerAndCycleLength);
for (int i = 0; i < cycleLength; i++) {
out.writeVLong(in[i]);
}
} else {
// set first two bits to 11 to indicate the block is bit-packed
// set 2 trailing bits to indicate the block is bit-packed
out.writeVLong(0b11);
forUtil.encode(in, bitsPerOrd, out);
}
Expand All @@ -232,20 +261,32 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException
assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length;

long v1 = in.readVLong();
int header = (int) (v1 & 0b11L);
if (header == 0b00 || header == 0b10) {
// first bit is zero -> single run
Arrays.fill(out, v1 >>> 1);
} else if (header == 0b01) {
// first two bits are 01 -> two runs
v1 = v1 >>> 2;
int encoding = Long.numberOfTrailingZeros(~v1);
v1 >>>= encoding + 1;
if (encoding == 0) {
// single run
Arrays.fill(out, v1);
} else if (encoding == 1) {
// two runs
int runLen = in.readVInt();
long v2 = v1 + in.readZLong();
Arrays.fill(out, 0, runLen, v1);
Arrays.fill(out, runLen, out.length, v2);
} else {
// first two bits are 11 -> bit-packed
} else if (encoding == 2) {
// bit-packed
forUtil.decode(bitsPerOrd, in, out);
} else if (encoding == 3) {
// cycle encoding
int cycleLength = (int) v1;
for (int i = 0; i < cycleLength; i++) {
out[i] = in.readVLong();
}
int length = cycleLength;
while (length < out.length) {
int copyLength = Math.min(length, out.length - length);
System.arraycopy(out, 0, out, length, copyLength);
length += copyLength;
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -260,12 +260,59 @@ public void testEncodeOrdinalsNoRepetitions() throws IOException {
doTestOrdinals(arr, 113);
}

public void testEncodeOrdinalsBitPack3Bits() throws IOException {
long[] arr = new long[blockSize];
Arrays.fill(arr, 4);
for (int i = 0; i < 4; i++) {
arr[i] = i;
}
doTestOrdinals(arr, 49);
}

public void testEncodeOrdinalsCycle2() throws IOException {
long[] arr = new long[blockSize];
Arrays.setAll(arr, i -> i % 2);
doTestOrdinals(arr, 3);
}

public void testEncodeOrdinalsCycle3() throws IOException {
long[] arr = new long[blockSize];
Arrays.setAll(arr, i -> i % 3);
doTestOrdinals(arr, 4);
}

public void testEncodeOrdinalsLongCycle() throws IOException {
long[] arr = new long[blockSize];
Arrays.setAll(arr, i -> i % 32);
doTestOrdinals(arr, 34);
}

public void testEncodeOrdinalsCycleTooLong() throws IOException {
long[] arr = new long[blockSize];
Arrays.setAll(arr, i -> i % 33);
// the cycle is too long and the vales are bit-packed
doTestOrdinals(arr, 97);
}

public void testEncodeOrdinalsAlmostCycle() throws IOException {
long[] arr = new long[blockSize];
Arrays.setAll(arr, i -> i % 3);
arr[arr.length - 1] = 4;
doTestOrdinals(arr, 49);
}

public void testEncodeOrdinalsDifferentCycles() throws IOException {
long[] arr = new long[blockSize];
Arrays.setAll(arr, i -> i > 64 ? i % 4 : i % 3);
doTestOrdinals(arr, 33);
}

private void doTestOrdinals(long[] arr, long expectedNumBytes) throws IOException {
long maxOrd = 0;
for (long ord : arr) {
maxOrd = Math.max(maxOrd, ord);
}
final int bitsPerOrd = PackedInts.bitsRequired(maxOrd - 1);
final int bitsPerOrd = PackedInts.bitsRequired(maxOrd);
final long[] expected = arr.clone();
try (Directory dir = newDirectory()) {
try (IndexOutput out = dir.createOutput("tests.bin", IOContext.DEFAULT)) {
Expand Down