diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java
index 0e2b50257ae37..f87806debfc7e 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java
@@ -23,6 +23,7 @@
import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.IpFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperService;
@@ -125,6 +126,9 @@ boolean useTSDBDocValuesFormat(final String field) {
if (mappingLookup.getMapper(field) instanceof TimeSeriesIdFieldMapper) {
return true;
}
+ if (mappingLookup.getMapper(field) instanceof IpFieldMapper) {
+ return true;
+ }
}
return false;
}
diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java
index f293eb86141b6..4fdf04f067d06 100644
--- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java
+++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java
@@ -182,35 +182,57 @@ void encode(long[] in, DataOutput out) throws IOException {
* Optimizes for encoding sorted fields where we expect a block to mostly either be the same value
* or to make a transition from one value to a second one.
*
- * Encodes blocks in the following format:
+ * The header is a vlong where the number of trailing ones defines the encoding strategy:
*
- * - byte 0: 1/2 bits header+6/7 bits data
- * - byte 1..n: data
- *
- * The header (first 1 or 2 bits) describes how the data is encoded:
- *
- * - ?0 block has a single value (vlong), 2nd bit already contains data
- * -
- * 01 block has two runs, data contains value 1 (vlong), run-length (vint) of value 1,
- * and delta from first to second value (zlong)
- *
- * - 11 block is bit-packed
+ * - 0: single run
+ * - 1: two runs
+ * - 2: bit-packed
+ * - 3: cycle
*
*/
void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException {
assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE;
int numRuns = 1;
+ long firstValue = in[0];
+ long previousValue = firstValue;
+ boolean cyclic = false;
+ int cycleLength = 0;
for (int i = 1; i < in.length; ++i) {
- if (in[i - 1] != in[i]) {
+ long currentValue = in[i];
+ if (previousValue != currentValue) {
numRuns++;
}
+ if (currentValue == firstValue && cycleLength != -1) {
+ if (cycleLength == 0) {
+ // first candidate cycle detected
+ cycleLength = i;
+ } else if (cycleLength == 1 || i % cycleLength != 0) {
+ // if the first two values are the same this isn't a cycle, it might be a run, though
+ // this also isn't a cycle if the index of the next occurrence of the first value
+ // isn't a multiple of the candidate cycle length
+ // we can stop looking for cycles now
+ cycleLength = -1;
+ }
+ }
+ previousValue = currentValue;
+ }
+ // if the cycle is too long, bit-packing may be more space efficient
+ int maxCycleLength = in.length / 4;
+ if (numRuns > 2 && cycleLength > 1 && cycleLength <= maxCycleLength) {
+ cyclic = true;
+ for (int i = cycleLength; i < in.length; ++i) {
+ if (in[i] != in[i - cycleLength]) {
+ cyclic = false;
+ break;
+ }
+ }
}
if (numRuns == 1 && bitsPerOrd < 63) {
long value = in[0];
- // set first bit to 0 to indicate the block has a single run
+ // unset first bit (0 trailing ones) to indicate the block has a single run
out.writeVLong(value << 1);
} else if (numRuns == 2 && bitsPerOrd < 62) {
- // set first two bits to 01 to indicate the block has two runs
+ // set 1 trailing bit to indicate the block has two runs
out.writeVLong((in[0] << 2) | 0b01);
int firstRunLen = in.length;
for (int i = 1; i < in.length; ++i) {
@@ -221,8 +243,15 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio
}
out.writeVInt(firstRunLen);
out.writeZLong(in[in.length - 1] - in[0]);
+ } else if (cyclic) {
+ // set 3 trailing bits to indicate the block cycles through the same values
+ long headerAndCycleLength = ((long) cycleLength << 4) | 0b0111;
+ out.writeVLong(headerAndCycleLength);
+ for (int i = 0; i < cycleLength; i++) {
+ out.writeVLong(in[i]);
+ }
} else {
- // set first two bits to 11 to indicate the block is bit-packed
+ // set 2 trailing bits to indicate the block is bit-packed
out.writeVLong(0b11);
forUtil.encode(in, bitsPerOrd, out);
}
@@ -232,20 +261,32 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException
assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length;
long v1 = in.readVLong();
- int header = (int) (v1 & 0b11L);
- if (header == 0b00 || header == 0b10) {
- // first bit is zero -> single run
- Arrays.fill(out, v1 >>> 1);
- } else if (header == 0b01) {
- // first two bits are 01 -> two runs
- v1 = v1 >>> 2;
+ int encoding = Long.numberOfTrailingZeros(~v1);
+ v1 >>>= encoding + 1;
+ if (encoding == 0) {
+ // single run
+ Arrays.fill(out, v1);
+ } else if (encoding == 1) {
+ // two runs
int runLen = in.readVInt();
long v2 = v1 + in.readZLong();
Arrays.fill(out, 0, runLen, v1);
Arrays.fill(out, runLen, out.length, v2);
- } else {
- // first two bits are 11 -> bit-packed
+ } else if (encoding == 2) {
+ // bit-packed
forUtil.decode(bitsPerOrd, in, out);
+ } else if (encoding == 3) {
+ // cycle encoding
+ int cycleLength = (int) v1;
+ for (int i = 0; i < cycleLength; i++) {
+ out[i] = in.readVLong();
+ }
+ int length = cycleLength;
+ while (length < out.length) {
+ int copyLength = Math.min(length, out.length - length);
+ System.arraycopy(out, 0, out, length, copyLength);
+ length += copyLength;
+ }
}
}
diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java
index bf0737ebda47b..570b9789e04dd 100644
--- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java
+++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java
@@ -260,12 +260,59 @@ public void testEncodeOrdinalsNoRepetitions() throws IOException {
doTestOrdinals(arr, 113);
}
+ public void testEncodeOrdinalsBitPack3Bits() throws IOException {
+ long[] arr = new long[blockSize];
+ Arrays.fill(arr, 4);
+ for (int i = 0; i < 4; i++) {
+ arr[i] = i;
+ }
+ doTestOrdinals(arr, 49);
+ }
+
+ public void testEncodeOrdinalsCycle2() throws IOException {
+ long[] arr = new long[blockSize];
+ Arrays.setAll(arr, i -> i % 2);
+ doTestOrdinals(arr, 3);
+ }
+
+ public void testEncodeOrdinalsCycle3() throws IOException {
+ long[] arr = new long[blockSize];
+ Arrays.setAll(arr, i -> i % 3);
+ doTestOrdinals(arr, 4);
+ }
+
+ public void testEncodeOrdinalsLongCycle() throws IOException {
+ long[] arr = new long[blockSize];
+ Arrays.setAll(arr, i -> i % 32);
+ doTestOrdinals(arr, 34);
+ }
+
+ public void testEncodeOrdinalsCycleTooLong() throws IOException {
+ long[] arr = new long[blockSize];
+ Arrays.setAll(arr, i -> i % 33);
+ // the cycle is too long and the vales are bit-packed
+ doTestOrdinals(arr, 97);
+ }
+
+ public void testEncodeOrdinalsAlmostCycle() throws IOException {
+ long[] arr = new long[blockSize];
+ Arrays.setAll(arr, i -> i % 3);
+ arr[arr.length - 1] = 4;
+ doTestOrdinals(arr, 49);
+ }
+
+ public void testEncodeOrdinalsDifferentCycles() throws IOException {
+ long[] arr = new long[blockSize];
+ Arrays.setAll(arr, i -> i > 64 ? i % 4 : i % 3);
+ doTestOrdinals(arr, 33);
+ }
+
private void doTestOrdinals(long[] arr, long expectedNumBytes) throws IOException {
long maxOrd = 0;
for (long ord : arr) {
maxOrd = Math.max(maxOrd, ord);
}
- final int bitsPerOrd = PackedInts.bitsRequired(maxOrd - 1);
+ final int bitsPerOrd = PackedInts.bitsRequired(maxOrd);
final long[] expected = arr.clone();
try (Directory dir = newDirectory()) {
try (IndexOutput out = dir.createOutput("tests.bin", IOContext.DEFAULT)) {