From a5e8bc817cee0033a75ebba8cd576e492d329b1d Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 8 Feb 2024 08:07:59 +0100 Subject: [PATCH 01/11] [TSDB] detect and efficiently encode cyclic ordinals This is beneficial for encoding dimensions that are multivalued, such as host.ip. --- .../codec/tsdb/ES87TSDBDocValuesEncoder.java | 70 ++++++++++++++++--- .../tsdb/ES87TSDBDocValuesEncoderTests.java | 43 +++++++++++- 2 files changed, 102 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index f293eb86141b6..486b0530ee431 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -187,23 +187,58 @@ void encode(long[] in, DataOutput out) throws IOException { *
  • byte 0: 1/2 bits header+6/7 bits data
  • *
  • byte 1..n: data
  • * - * The header (first 1 or 2 bits) describes how the data is encoded: + * The header (first 1-3 bits) describes how the data is encoded: * */ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException { assert in.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; int numRuns = 1; + long firstValue = in[0]; + long previousValue = firstValue; + boolean cyclic = false; + int cycleLength = 0; for (int i = 1; i < in.length; ++i) { - if (in[i - 1] != in[i]) { + long currentValue = in[i]; + if (previousValue != currentValue) { numRuns++; } + if (currentValue == firstValue && cycleLength != -1) { + if (cycleLength == 0) { + // first candidate cycle detected + cycleLength = i; + } else if (i % cycleLength != 0) { + // this isn't a cycle if the index of the next occurrence of the first value + // isn't a multiple of the candidate cycle length + // we can stop looking for cycles + cycleLength = -1; + } + } + previousValue = currentValue; + } + if (numRuns > 2 && cycleLength > 1 && cycleLength < in.length >> 1) { + // check if the data cycles through the same values + cyclic = true; + outer: + for (int i = 0; i < cycleLength; i++) { + long v = in[i]; + for (int j = i + cycleLength; j < in.length; j+= cycleLength) { + if (v != in[j]) { + cyclic = false; + break outer; + } + } + } } if (numRuns == 1 && bitsPerOrd < 63) { long value = in[0]; @@ -221,6 +256,13 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio } out.writeVInt(firstRunLen); out.writeZLong(in[in.length - 1] - in[0]); + } else if (cyclic) { + // set first three bits to 111 to indicate the block cycles through the same values + long headerAndCycleLength = ((long) cycleLength << 3) | 0b111; + out.writeVLong(headerAndCycleLength); + for (int i = 0; i < cycleLength; i++) { + out.writeVLong(in[i]); + } } else { // set first two bits to 11 to indicate the block is bit-packed out.writeVLong(0b11); @@ -232,19 +274,27 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length; long v1 = in.readVLong(); - int header = (int) (v1 & 0b11L); - if (header == 0b00 || header == 0b10) { - // first bit is zero -> single run + if ((v1 & 0b1L) == 0) { + // first bit is 0 -> single run Arrays.fill(out, v1 >>> 1); - } else if (header == 0b01) { + } else if ((v1 & 0b11L) == 0b01) { // first two bits are 01 -> two runs v1 = v1 >>> 2; int runLen = in.readVInt(); long v2 = v1 + in.readZLong(); Arrays.fill(out, 0, runLen, v1); Arrays.fill(out, runLen, out.length, v2); + } else if ((v1 & 0b111L) == 0b111L) { + // first three bits are 111 -> cycle + int cycleLength = (int) v1 >>> 3; + for (int i = 0; i < cycleLength; i++) { + out[i] = in.readVLong(); + } + for (int i = 0; i < out.length; i++) { + out[i] = out[i % cycleLength]; + } } else { - // first two bits are 11 -> bit-packed + // first three bits are 011 -> bit-packed forUtil.decode(bitsPerOrd, in, out); } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java index bf0737ebda47b..10ee8dfcf7ef3 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java @@ -260,12 +260,53 @@ public void testEncodeOrdinalsNoRepetitions() throws IOException { doTestOrdinals(arr, 113); } + public void testEncodeOrdinalsBitPack3Bits() throws IOException { + long[] arr = new long[blockSize]; + Arrays.fill(arr, 4); + for (int i = 0; i < 4; i++) { + arr[i] = i; + } + doTestOrdinals(arr, 49); + } + + public void testEncodeOrdinalsCycle() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i % 3); + doTestOrdinals(arr, 4); + } + + public void testEncodeOrdinalsLongCycle() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i % 63); + doTestOrdinals(arr, 65); + } + + public void testEncodeOrdinalsCycleTooLong() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i % 64); + // the cycle is too long and the vales are bit-packed + doTestOrdinals(arr, 97); + } + + public void testEncodeOrdinalsAlmostCycle() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i % 3); + arr[arr.length -1] = 4; + doTestOrdinals(arr, 49); + } + + public void testEncodeOrdinalsDifferentCycles() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i > 64 ? i % 4 : i % 3); + doTestOrdinals(arr, 33); + } + private void doTestOrdinals(long[] arr, long expectedNumBytes) throws IOException { long maxOrd = 0; for (long ord : arr) { maxOrd = Math.max(maxOrd, ord); } - final int bitsPerOrd = PackedInts.bitsRequired(maxOrd - 1); + final int bitsPerOrd = PackedInts.bitsRequired(maxOrd); final long[] expected = arr.clone(); try (Directory dir = newDirectory()) { try (IndexOutput out = dir.createOutput("tests.bin", IOContext.DEFAULT)) { From 4a4b44ba0d0d904b70cd0b145bd432becb8128e6 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 8 Feb 2024 08:32:58 +0100 Subject: [PATCH 02/11] Apply spotless suggestions --- .../index/codec/tsdb/ES87TSDBDocValuesEncoder.java | 5 ++--- .../index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index 486b0530ee431..f5ee64ada707f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -229,10 +229,9 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio if (numRuns > 2 && cycleLength > 1 && cycleLength < in.length >> 1) { // check if the data cycles through the same values cyclic = true; - outer: - for (int i = 0; i < cycleLength; i++) { + outer: for (int i = 0; i < cycleLength; i++) { long v = in[i]; - for (int j = i + cycleLength; j < in.length; j+= cycleLength) { + for (int j = i + cycleLength; j < in.length; j += cycleLength) { if (v != in[j]) { cyclic = false; break outer; diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java index 10ee8dfcf7ef3..f711076cc9223 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java @@ -291,7 +291,7 @@ public void testEncodeOrdinalsCycleTooLong() throws IOException { public void testEncodeOrdinalsAlmostCycle() throws IOException { long[] arr = new long[blockSize]; Arrays.setAll(arr, i -> i % 3); - arr[arr.length -1] = 4; + arr[arr.length - 1] = 4; doTestOrdinals(arr, 49); } From ed8f3752288cf5539d9fc1a590da0829a2e96cfa Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 8 Feb 2024 09:01:55 +0100 Subject: [PATCH 03/11] Update docs/changelog/105271.yaml --- docs/changelog/105271.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/105271.yaml diff --git a/docs/changelog/105271.yaml b/docs/changelog/105271.yaml new file mode 100644 index 0000000000000..559eb2c5c1bfc --- /dev/null +++ b/docs/changelog/105271.yaml @@ -0,0 +1,5 @@ +pr: 105271 +summary: Efficiently encode multi-valued dimensions +area: TSDB +type: enhancement +issues: [] From a5fa75df5ac62078d3f1bce29f419819857f664e Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 8 Feb 2024 10:40:14 +0100 Subject: [PATCH 04/11] Delete docs/changelog/105271.yaml --- docs/changelog/105271.yaml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 docs/changelog/105271.yaml diff --git a/docs/changelog/105271.yaml b/docs/changelog/105271.yaml deleted file mode 100644 index 559eb2c5c1bfc..0000000000000 --- a/docs/changelog/105271.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 105271 -summary: Efficiently encode multi-valued dimensions -area: TSDB -type: enhancement -issues: [] From 837f69c96498c308f28c17bc6665e6855fd94073 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 8 Feb 2024 10:28:37 +0100 Subject: [PATCH 05/11] Encoding is defined by the number of trailing ones --- .../codec/tsdb/ES87TSDBDocValuesEncoder.java | 52 ++++++++----------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index f5ee64ada707f..317767108da93 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -182,23 +182,12 @@ void encode(long[] in, DataOutput out) throws IOException { * Optimizes for encoding sorted fields where we expect a block to mostly either be the same value * or to make a transition from one value to a second one. *

    - * Encodes blocks in the following format: + * The header is a vlong where the number of trailing ones defines the encoding strategy: *

      - *
    • byte 0: 1/2 bits header+6/7 bits data
    • - *
    • byte 1..n: data
    • - *
    - * The header (first 1-3 bits) describes how the data is encoded: - *
      - *
    • ?0: block has a single value (vlong), 2nd bit already contains data
    • - *
    • - * 01: block has two runs, data contains value 1 (vlong), run-length (vint) of value 1, - * and delta from first to second value (zlong) - *
    • - *
    • - * 111: block contains cyclic data, data contains cycle length (vlong), - * and the values until the cycle repeats (encoded as vlongs) - *
    • - *
    • 011: block is bit-packed
    • + *
    • 0: single run
    • + *
    • 1: two runs
    • + *
    • 2: cycle
    • + *
    • 7: bit-packed
    • *
    */ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException { @@ -241,10 +230,10 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio } if (numRuns == 1 && bitsPerOrd < 63) { long value = in[0]; - // set first bit to 0 to indicate the block has a single run + // set first bit to 0 (0 trailing bits) to indicate the block has a single run out.writeVLong(value << 1); } else if (numRuns == 2 && bitsPerOrd < 62) { - // set first two bits to 01 to indicate the block has two runs + // set 1 trailing bit to indicate the block has two runs out.writeVLong((in[0] << 2) | 0b01); int firstRunLen = in.length; for (int i = 1; i < in.length; ++i) { @@ -256,15 +245,15 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio out.writeVInt(firstRunLen); out.writeZLong(in[in.length - 1] - in[0]); } else if (cyclic) { - // set first three bits to 111 to indicate the block cycles through the same values - long headerAndCycleLength = ((long) cycleLength << 3) | 0b111; + // set 2 trailing bits to indicate the block cycles through the same values + long headerAndCycleLength = ((long) cycleLength << 4) | 0b011; out.writeVLong(headerAndCycleLength); for (int i = 0; i < cycleLength; i++) { out.writeVLong(in[i]); } } else { - // set first two bits to 11 to indicate the block is bit-packed - out.writeVLong(0b11); + // set 7 trailing bits to indicate the block is bit-packed + out.writeVLong(0b1111111); forUtil.encode(in, bitsPerOrd, out); } } @@ -273,27 +262,28 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException assert out.length == ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE : out.length; long v1 = in.readVLong(); - if ((v1 & 0b1L) == 0) { - // first bit is 0 -> single run + int encoding = Long.numberOfTrailingZeros(~v1); + if (encoding == 0) { + // single run Arrays.fill(out, v1 >>> 1); - } else if ((v1 & 0b11L) == 0b01) { - // first two bits are 01 -> two runs + } else if (encoding == 1) { + // two runs v1 = v1 >>> 2; int runLen = in.readVInt(); long v2 = v1 + in.readZLong(); Arrays.fill(out, 0, runLen, v1); Arrays.fill(out, runLen, out.length, v2); - } else if ((v1 & 0b111L) == 0b111L) { - // first three bits are 111 -> cycle - int cycleLength = (int) v1 >>> 3; + } else if (encoding == 2) { + // cycle encoding + int cycleLength = (int) v1 >>> 4; for (int i = 0; i < cycleLength; i++) { out[i] = in.readVLong(); } for (int i = 0; i < out.length; i++) { out[i] = out[i % cycleLength]; } - } else { - // first three bits are 011 -> bit-packed + } else if (encoding == 7) { + // bit-packed forUtil.decode(bitsPerOrd, in, out); } } From 7ea5be2763808612ff6af77e83132bd30dcc248a Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 8 Feb 2024 11:05:36 +0100 Subject: [PATCH 06/11] Optimize loops --- .../codec/tsdb/ES87TSDBDocValuesEncoder.java | 29 ++++++++++--------- .../tsdb/ES87TSDBDocValuesEncoderTests.java | 14 ++++++--- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index 317767108da93..227cf97554cad 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -206,8 +206,10 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio if (cycleLength == 0) { // first candidate cycle detected cycleLength = i; - } else if (i % cycleLength != 0) { - // this isn't a cycle if the index of the next occurrence of the first value + } else if (cycleLength == 1 || i % cycleLength != 0) { + // this isn't a cycle if the first two values are the same, + // because ordinals are a sorted set, it might be a run, though + // this also isn't a cycle if the index of the next occurrence of the first value // isn't a multiple of the candidate cycle length // we can stop looking for cycles cycleLength = -1; @@ -215,16 +217,14 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio } previousValue = currentValue; } - if (numRuns > 2 && cycleLength > 1 && cycleLength < in.length >> 1) { - // check if the data cycles through the same values + // if the cycle is too long, bit-packing may be more space efficient + int maxCycleLength = in.length / 4; + if (numRuns > 2 && cycleLength > 1 && cycleLength <= maxCycleLength) { cyclic = true; - outer: for (int i = 0; i < cycleLength; i++) { - long v = in[i]; - for (int j = i + cycleLength; j < in.length; j += cycleLength) { - if (v != in[j]) { - cyclic = false; - break outer; - } + for (int i = cycleLength; i < in.length; ++i) { + if (in[i] != in[i - cycleLength]) { + cyclic = false; + break; } } } @@ -279,8 +279,11 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException for (int i = 0; i < cycleLength; i++) { out[i] = in.readVLong(); } - for (int i = 0; i < out.length; i++) { - out[i] = out[i % cycleLength]; + int length = cycleLength; + while (length < out.length) { + int copyLength = Math.min(length, out.length - length); + System.arraycopy(out, 0, out, length, copyLength); + length += copyLength; } } else if (encoding == 7) { // bit-packed diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java index f711076cc9223..570b9789e04dd 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoderTests.java @@ -269,7 +269,13 @@ public void testEncodeOrdinalsBitPack3Bits() throws IOException { doTestOrdinals(arr, 49); } - public void testEncodeOrdinalsCycle() throws IOException { + public void testEncodeOrdinalsCycle2() throws IOException { + long[] arr = new long[blockSize]; + Arrays.setAll(arr, i -> i % 2); + doTestOrdinals(arr, 3); + } + + public void testEncodeOrdinalsCycle3() throws IOException { long[] arr = new long[blockSize]; Arrays.setAll(arr, i -> i % 3); doTestOrdinals(arr, 4); @@ -277,13 +283,13 @@ public void testEncodeOrdinalsCycle() throws IOException { public void testEncodeOrdinalsLongCycle() throws IOException { long[] arr = new long[blockSize]; - Arrays.setAll(arr, i -> i % 63); - doTestOrdinals(arr, 65); + Arrays.setAll(arr, i -> i % 32); + doTestOrdinals(arr, 34); } public void testEncodeOrdinalsCycleTooLong() throws IOException { long[] arr = new long[blockSize]; - Arrays.setAll(arr, i -> i % 64); + Arrays.setAll(arr, i -> i % 33); // the cycle is too long and the vales are bit-packed doTestOrdinals(arr, 97); } From c7a0f6541b204b8af90fd99f1911ce32f2412ebd Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 8 Feb 2024 11:08:02 +0100 Subject: [PATCH 07/11] Use TSDB codec for IP fields --- .../org/elasticsearch/index/codec/PerFieldMapperCodec.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java index 0e2b50257ae37..f87806debfc7e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java @@ -23,6 +23,7 @@ import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.IpFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperService; @@ -125,6 +126,9 @@ boolean useTSDBDocValuesFormat(final String field) { if (mappingLookup.getMapper(field) instanceof TimeSeriesIdFieldMapper) { return true; } + if (mappingLookup.getMapper(field) instanceof IpFieldMapper) { + return true; + } } return false; } From b39bf1277b0f891276ff2daa6a1e74c99d5ba2c6 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 8 Feb 2024 11:21:37 +0100 Subject: [PATCH 08/11] Simplify comment --- .../index/codec/tsdb/ES87TSDBDocValuesEncoder.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index 227cf97554cad..7de1137771bb9 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -207,11 +207,10 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio // first candidate cycle detected cycleLength = i; } else if (cycleLength == 1 || i % cycleLength != 0) { - // this isn't a cycle if the first two values are the same, - // because ordinals are a sorted set, it might be a run, though + // if the first two values are the same this isn't a cycle, it might be a run, though // this also isn't a cycle if the index of the next occurrence of the first value // isn't a multiple of the candidate cycle length - // we can stop looking for cycles + // we can stop looking for cycles now cycleLength = -1; } } From 7eae8ebe722bd898a39ad6740a0e904cf066145f Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 8 Feb 2024 11:36:56 +0100 Subject: [PATCH 09/11] Restore 2 trailing ones to mean bit-packed --- .../codec/tsdb/ES87TSDBDocValuesEncoder.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index 7de1137771bb9..c7fd4363fa769 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -186,8 +186,8 @@ void encode(long[] in, DataOutput out) throws IOException { *
      *
    • 0: single run
    • *
    • 1: two runs
    • - *
    • 2: cycle
    • - *
    • 7: bit-packed
    • + *
    • 2: bit-packed
    • + *
    • 3: cycle
    • *
    */ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOException { @@ -244,15 +244,15 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio out.writeVInt(firstRunLen); out.writeZLong(in[in.length - 1] - in[0]); } else if (cyclic) { - // set 2 trailing bits to indicate the block cycles through the same values - long headerAndCycleLength = ((long) cycleLength << 4) | 0b011; + // set 3 trailing bits to indicate the block cycles through the same values + long headerAndCycleLength = ((long) cycleLength << 4) | 0b0111; out.writeVLong(headerAndCycleLength); for (int i = 0; i < cycleLength; i++) { out.writeVLong(in[i]); } } else { - // set 7 trailing bits to indicate the block is bit-packed - out.writeVLong(0b1111111); + // set 2 trailing bits to indicate the block is bit-packed + out.writeVLong(0b11); forUtil.encode(in, bitsPerOrd, out); } } @@ -273,6 +273,9 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException Arrays.fill(out, 0, runLen, v1); Arrays.fill(out, runLen, out.length, v2); } else if (encoding == 2) { + // bit-packed + forUtil.decode(bitsPerOrd, in, out); + } else if (encoding == 3) { // cycle encoding int cycleLength = (int) v1 >>> 4; for (int i = 0; i < cycleLength; i++) { @@ -284,9 +287,6 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException System.arraycopy(out, 0, out, length, copyLength); length += copyLength; } - } else if (encoding == 7) { - // bit-packed - forUtil.decode(bitsPerOrd, in, out); } } From c0b7fff612ae0a53a414267fa0c354b7ddca5e51 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 8 Feb 2024 14:30:46 +0100 Subject: [PATCH 10/11] Strip encoding from first value --- .../index/codec/tsdb/ES87TSDBDocValuesEncoder.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index c7fd4363fa769..685c57bf91165 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -262,12 +262,12 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException long v1 = in.readVLong(); int encoding = Long.numberOfTrailingZeros(~v1); + v1 >>>= encoding + 1; if (encoding == 0) { // single run - Arrays.fill(out, v1 >>> 1); + Arrays.fill(out, v1); } else if (encoding == 1) { // two runs - v1 = v1 >>> 2; int runLen = in.readVInt(); long v2 = v1 + in.readZLong(); Arrays.fill(out, 0, runLen, v1); @@ -277,7 +277,7 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException forUtil.decode(bitsPerOrd, in, out); } else if (encoding == 3) { // cycle encoding - int cycleLength = (int) v1 >>> 4; + int cycleLength = (int) v1; for (int i = 0; i < cycleLength; i++) { out[i] = in.readVLong(); } From 5961b554f555e108de8ae153c096286d76d71fe4 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Thu, 8 Feb 2024 14:32:40 +0100 Subject: [PATCH 11/11] Slight wording change in comment --- .../index/codec/tsdb/ES87TSDBDocValuesEncoder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index 685c57bf91165..4fdf04f067d06 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -229,7 +229,7 @@ void encodeOrdinals(long[] in, DataOutput out, int bitsPerOrd) throws IOExceptio } if (numRuns == 1 && bitsPerOrd < 63) { long value = in[0]; - // set first bit to 0 (0 trailing bits) to indicate the block has a single run + // unset first bit (0 trailing ones) to indicate the block has a single run out.writeVLong(value << 1); } else if (numRuns == 2 && bitsPerOrd < 62) { // set 1 trailing bit to indicate the block has two runs