diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java index 0e2b50257ae37..f87806debfc7e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java @@ -23,6 +23,7 @@ import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.IpFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperService; @@ -125,6 +126,9 @@ boolean useTSDBDocValuesFormat(final String field) { if (mappingLookup.getMapper(field) instanceof TimeSeriesIdFieldMapper) { return true; } + if (mappingLookup.getMapper(field) instanceof IpFieldMapper) { + return true; + } } return false; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index f293eb86141b6..4fdf04f067d06 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -182,35 +182,57 @@ void encode(long[] in, DataOutput out) throws IOException { * Optimizes for encoding sorted fields where we expect a block to mostly either be the same value * or to make a transition from one value to a second one. *

- * Encodes blocks in the following format: + * The header is a vlong where the number of trailing ones defines the encoding strategy: *

- * The header (first 1 or 2 bits) describes how the data is encoded: - * */ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException { assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; int numRuns = 1; + long firstValue = in[0]; + long previousValue = firstValue; + boolean cyclic = false; + int cycleLength = 0; for (int i = 1; i < in.length; ++i) { - if (in[i - 1] != in[i]) { + long currentValue = in[i]; + if (previousValue != currentValue) { numRuns++; } + if (currentValue == firstValue && cycleLength != -1) { + if (cycleLength == 0) { + // first candidate cycle detected + cycleLength = i; + } else if (cycleLength == 1 || i % cycleLength != 0) { + // if the first two values are the same this isn't a cycle, it might be a run, though + // this also isn't a cycle if the index of the next occurrence of the first value + // isn't a multiple of the candidate cycle length + // we can stop looking for cycles now + cycleLength = -1; + } + } + previousValue = currentValue; + } + // if the cycle is too long, bit-packing may be more space efficient + int maxCycleLength = in.length / 4; + if (numRuns > 2 && cycleLength > 1 && cycleLength <= maxCycleLength) { + cyclic = true; + for (int i = cycleLength; i < in.length; ++i) { + if (in[i] != in[i - cycleLength]) { + cyclic = false; + break; + } + } } if (numRuns == 1 && bitsPerOrd < 63) { long value = in[0]; - // set first bit to 0 to indicate the block has a single run + // unset first bit (0 trailing ones) to indicate the block has a single run out.writeVLong(value << 1); } else if (numRuns == 2 && bitsPerOrd < 62) { - // set first two bits to 01 to indicate the block has two runs + // set 1 trailing bit to indicate the block has two runs out.writeVLong((in[0] << 2) | 0b01); int firstRunLen = in.length; for (int i = 1; i < in.length; ++i) { @@ -221,8 +243,15 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio } out.writeVInt(firstRunLen); out.writeZLong(in[in.length - 1] - in[0]); + } else if (cyclic) { + // set 3 trailing bits to indicate the block cycles through the same values + long headerAndCycleLength = ((long) cycleLength << 4) | 0b0111; + out.writeVLong(headerAndCycleLength); + for (int i = 0; i < cycleLength; i++) { + out.writeVLong(in[i]); + } } else { - // set first two bits to 11 to indicate the block is bit-packed + // set 2 trailing bits to indicate the block is bit-packed out.writeVLong(0b11); forUtil.encode(in, bitsPerOrd, out); } @@ -232,20 +261,32 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length; long v1 = in.readVLong(); - int header = (int) (v1 & 0b11L); - if (header == 0b00 || header == 0b10) { - // first bit is zero -> single run - Arrays.fill(out, v1 >>> 1); - } else if (header == 0b01) { - // first two bits are 01 -> two runs - v1 = v1 >>> 2; + int encoding = Long.numberOfTrailingZeros(~v1); + v1 >>>= encoding + 1; + if (encoding == 0) { + // single run + Arrays.fill(out, v1); + } else if (encoding == 1) { + // two runs int runLen = in.readVInt(); long v2 = v1 + in.readZLong(); Arrays.fill(out, 0, runLen, v1); Arrays.fill(out, runLen, out.length, v2); - } else { - // first two bits are 11 -> bit-packed + } else if (encoding == 2) { + // bit-packed forUtil.decode(bitsPerOrd, in, out); + } else if (encoding == 3) { + // cycle encoding + int cycleLength = (int) v1; + for (int i = 0; i < cycleLength; i++) { + out[i] = in.readVLong(); + } + int length = cycleLength; + while (length < out.length) { + int copyLength = Math.min(length, out.length - length); + System.arraycopy(out, 0, out, length, copyLength); + length += copyLength; + } } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java index bf0737ebda47b..570b9789e04dd 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java @@ -260,12 +260,59 @@ public void testEncodeOrdinalsNoRepetitions() throws IOException { doTestOrdinals(arr, 113); } + public void testEncodeOrdinalsBitPack3Bits() throws IOException { + long[] arr = new long[blockSize]; + Arrays.fill(arr, 4); + for (int i = 0; i < 4; i++) { + arr[i] = i; + } + doTestOrdinals(arr, 49); + } + + public void testEncodeOrdinalsCycle2() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i % 2); + doTestOrdinals(arr, 3); + } + + public void testEncodeOrdinalsCycle3() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i % 3); + doTestOrdinals(arr, 4); + } + + public void testEncodeOrdinalsLongCycle() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i % 32); + doTestOrdinals(arr, 34); + } + + public void testEncodeOrdinalsCycleTooLong() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i % 33); + // the cycle is too long and the vales are bit-packed + doTestOrdinals(arr, 97); + } + + public void testEncodeOrdinalsAlmostCycle() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i % 3); + arr[arr.length - 1] = 4; + doTestOrdinals(arr, 49); + } + + public void testEncodeOrdinalsDifferentCycles() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i > 64 ? i % 4 : i % 3); + doTestOrdinals(arr, 33); + } + private void doTestOrdinals(long[] arr, long expectedNumBytes) throws IOException { long maxOrd = 0; for (long ord : arr) { maxOrd = Math.max(maxOrd, ord); } - final int bitsPerOrd = PackedInts.bitsRequired(maxOrd - 1); + final int bitsPerOrd = PackedInts.bitsRequired(maxOrd); final long[] expected = arr.clone(); try (Directory dir = newDirectory()) { try (IndexOutput out = dir.createOutput("tests.bin", IOContext.DEFAULT)) {