diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java index 641a1794c9d4b..71164e35ad557 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java @@ -39,7 +39,6 @@ import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.annotations.Threads; import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.profile.AsyncProfiler; @@ -51,9 +50,8 @@ import java.io.IOException; import java.nio.file.Files; import java.util.Random; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; @BenchmarkMode(Mode.SingleShotTime) @OutputTimeUnit(TimeUnit.MILLISECONDS) @@ -71,23 +69,10 @@ public class TSDBDocValuesMergeBenchmark { LogConfigurator.setNodeName("test"); } - @Param("20431204") - private int nDocs; - - @Param("1000") - private int deltaTime; - - @Param("42") - private int seed; - private static final String TIMESTAMP_FIELD = "@timestamp"; private static final String HOSTNAME_FIELD = "host.name"; private static final long BASE_TIMESTAMP = 1704067200000L; - private IndexWriter indexWriterWithoutOptimizedMerge; - private IndexWriter indexWriterWithOptimizedMerge; - private ExecutorService executorService; - public static void main(String[] args) throws RunnerException { final Options options = new OptionsBuilder().include(TSDBDocValuesMergeBenchmark.class.getSimpleName()) .addProfiler(AsyncProfiler.class) @@ -96,78 +81,168 @@ public static void main(String[] args) throws RunnerException { new Runner(options).run(); } - @Setup(Level.Trial) - public void setup() throws IOException { - executorService = Executors.newSingleThreadExecutor(); + @State(Scope.Benchmark) + public static class StateDenseWithoutOptimizeMerge { - final Directory tempDirectoryWithoutDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp1-")); - final Directory tempDirectoryWithDocValuesSkipper = FSDirectory.open(Files.createTempDirectory("temp2-")); + @Param("20431204") + private int nDocs; + + @Param("1000") + private int deltaTime; + + @Param("42") + private int seed; + + private Directory directory; + private final Supplier iwc = () -> createIndexWriterConfig(false); + + @Setup(Level.Trial) + public void setup() throws IOException { + directory = FSDirectory.open(Files.createTempDirectory("temp2-")); + createIndex(directory, iwc.get(), false, nDocs, deltaTime, seed); + } - indexWriterWithoutOptimizedMerge = createIndex(tempDirectoryWithoutDocValuesSkipper, false); - indexWriterWithOptimizedMerge = createIndex(tempDirectoryWithDocValuesSkipper, true); } - private IndexWriter createIndex(final Directory directory, final boolean optimizedMergeEnabled) throws IOException { - final var iwc = createIndexWriterConfig(optimizedMergeEnabled); - long counter1 = 0; - long counter2 = 10_000_000; - long[] gauge1Values = new long[] { 2, 4, 6, 8, 10, 12, 14, 16 }; - long[] gauge2Values = new long[] { -2, -4, -6, -8, -10, -12, -14, -16 }; - int numHosts = 1000; - String[] tags = new String[] { "tag_1", "tag_2", "tag_3", "tag_4", "tag_5", "tag_6", "tag_7", "tag_8" }; + @Benchmark + public void forceMergeDenseWithoutOptimizedMerge(StateDenseWithoutOptimizeMerge state) throws IOException { + forceMerge(state.directory, state.iwc.get()); + } - final Random random = new Random(seed); - IndexWriter indexWriter = new IndexWriter(directory, iwc); - for (int i = 0; i < nDocs; i++) { - final Document doc = new Document(); - - final int batchIndex = i / numHosts; - final String hostName = "host-" + batchIndex; - // Slightly vary the timestamp in each document - final long timestamp = BASE_TIMESTAMP + ((i % numHosts) * deltaTime) + random.nextInt(0, deltaTime); - - doc.add(new SortedDocValuesField(HOSTNAME_FIELD, new BytesRef(hostName))); - doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, timestamp)); - doc.add(new SortedNumericDocValuesField("counter_1", counter1++)); - doc.add(new SortedNumericDocValuesField("counter_2", counter2++)); - doc.add(new SortedNumericDocValuesField("gauge_1", gauge1Values[i % gauge1Values.length])); - doc.add(new SortedNumericDocValuesField("gauge_2", gauge2Values[i % gauge1Values.length])); - int numTags = tags.length % (i + 1); - for (int j = 0; j < numTags; j++) { - doc.add(new SortedSetDocValuesField("tags", new BytesRef(tags[j]))); - } + @State(Scope.Benchmark) + public static class StateDenseWithOptimizeMerge { + + @Param("20431204") + private int nDocs; + + @Param("1000") + private int deltaTime; + + @Param("42") + private int seed; + + private Directory directory; + private final Supplier iwc = () -> createIndexWriterConfig(true); + + @Setup(Level.Trial) + public void setup() throws IOException { + directory = FSDirectory.open(Files.createTempDirectory("temp1-")); + createIndex(directory, iwc.get(), false, nDocs, deltaTime, seed); + } + + } + + @Benchmark + public void forceMergeDenseWithOptimizedMerge(StateDenseWithOptimizeMerge state) throws IOException { + forceMerge(state.directory, state.iwc.get()); + } + + @State(Scope.Benchmark) + public static class StateSparseWithoutOptimizeMerge { - indexWriter.addDocument(doc); + @Param("20431204") + private int nDocs; + + @Param("1000") + private int deltaTime; + + @Param("42") + private int seed; + + private Directory directory; + private final Supplier iwc = () -> createIndexWriterConfig(false); + + @Setup(Level.Trial) + public void setup() throws IOException { + directory = FSDirectory.open(Files.createTempDirectory("temp4-")); + createIndex(directory, iwc.get(), true, nDocs, deltaTime, seed); } - indexWriter.commit(); - return indexWriter; + } @Benchmark - public void forceMergeWithoutOptimizedMerge() throws IOException { - forceMerge(indexWriterWithoutOptimizedMerge); + public void forceMergeSparseWithoutOptimizedMerge(StateSparseWithoutOptimizeMerge state) throws IOException { + forceMerge(state.directory, state.iwc.get()); + } + + @State(Scope.Benchmark) + public static class StateSparseWithOptimizeMerge { + + @Param("20431204") + private int nDocs; + + @Param("1000") + private int deltaTime; + + @Param("42") + private int seed; + + private Directory directory; + private final Supplier iwc = () -> createIndexWriterConfig(true); + + @Setup(Level.Trial) + public void setup() throws IOException { + directory = FSDirectory.open(Files.createTempDirectory("temp3-")); + createIndex(directory, iwc.get(), true, nDocs, deltaTime, seed); + } + } @Benchmark - public void forceMergeWithOptimizedMerge() throws IOException { - forceMerge(indexWriterWithOptimizedMerge); + public void forceMergeSparseWithOptimizedMerge(StateSparseWithOptimizeMerge state) throws IOException { + forceMerge(state.directory, state.iwc.get()); } - private void forceMerge(final IndexWriter indexWriter) throws IOException { - indexWriter.forceMerge(1); + private void forceMerge(Directory directory, IndexWriterConfig config) throws IOException { + try (var indexWriter = new IndexWriter(directory, config)) { + indexWriter.forceMerge(1); + } } - @TearDown(Level.Trial) - public void tearDown() { - if (executorService != null) { - executorService.shutdown(); - try { - if (executorService.awaitTermination(30, TimeUnit.SECONDS) == false) { - executorService.shutdownNow(); + static void createIndex(Directory directory, IndexWriterConfig iwc, boolean sparse, int nDocs, int deltaTime, int seed) + throws IOException { + long counter1 = 0; + long counter2 = 10_000_000; + long[] gauge1Values = new long[] { 2, 4, 6, 8, 10, 12, 14, 16 }; + long[] gauge2Values = new long[] { -2, -4, -6, -8, -10, -12, -14, -16 }; + int numHosts = 10000; + String[] tags = new String[] { "tag_1", "tag_2", "tag_3", "tag_4", "tag_5", "tag_6", "tag_7", "tag_8" }; + + final Random random = new Random(seed); + try (var indexWriter = new IndexWriter(directory, iwc)) { + for (int i = 0; i < nDocs; i++) { + final Document doc = new Document(); + + final int batchIndex = i % numHosts; + final String hostName = "host-" + batchIndex; + // Slightly vary the timestamp in each document + final long timestamp = BASE_TIMESTAMP + ((i % numHosts) * deltaTime) + random.nextInt(0, deltaTime); + + doc.add(new SortedDocValuesField(HOSTNAME_FIELD, new BytesRef(hostName))); + doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, timestamp)); + if (sparse == false || random.nextBoolean()) { + doc.add(new SortedNumericDocValuesField("counter_1", counter1++)); + } + if (sparse == false || random.nextBoolean()) { + doc.add(new SortedNumericDocValuesField("counter_2", counter2++)); + } + if (sparse == false || random.nextBoolean()) { + doc.add(new SortedNumericDocValuesField("gauge_1", gauge1Values[i % gauge1Values.length])); + } + if (sparse == false || random.nextBoolean()) { + doc.add(new SortedNumericDocValuesField("gauge_2", gauge2Values[i % gauge1Values.length])); + } + if (sparse == false || random.nextBoolean()) { + int numTags = tags.length % (i + 1); + for (int j = 0; j < numTags; j++) { + doc.add(new SortedSetDocValuesField("tags", new BytesRef(tags[j]))); + } + } + indexWriter.addDocument(doc); + + if (i % 10000 == 0) { + indexWriter.commit(); } - } catch (InterruptedException e) { - executorService.shutdownNow(); - Thread.currentThread().interrupt(); } } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/DISIAccumulator.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/DISIAccumulator.java new file mode 100644 index 0000000000000..cd85623cdc8fd --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/DISIAccumulator.java @@ -0,0 +1,196 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.tsdb.es819; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IOUtils; +import org.elasticsearch.core.SuppressForbidden; + +import java.io.Closeable; +import java.io.IOException; + +/** + * Fork of {@link org.apache.lucene.codecs.lucene90.IndexedDISI#writeBitSet(DocIdSetIterator, IndexOutput)} but that allows + * building jump list iteratively by one docid at a time instead of relying on docidset iterator. + */ +final class DISIAccumulator implements Closeable { + + private static final int BLOCK_SIZE = 65536; // The number of docIDs that a single block represents + + private static final int DENSE_BLOCK_LONGS = BLOCK_SIZE / Long.SIZE; // 1024 + public static final byte DEFAULT_DENSE_RANK_POWER = 9; // Every 512 docIDs / 8 longs + + static final int MAX_ARRAY_LENGTH = (1 << 12) - 1; + + final Directory dir; + final IOContext context; + final String skipListTempFileName; + final IndexOutput disiTempOutput; + final byte denseRankPower; + final long origo; + + int totalCardinality = 0; + int blockCardinality = 0; + final FixedBitSet buffer = new FixedBitSet(1 << 16); + int[] jumps = new int[ArrayUtil.oversize(1, Integer.BYTES * 2)]; + int prevBlock = -1; + int jumpBlockIndex = 0; + + DISIAccumulator(Directory dir, IOContext context, IndexOutput data, byte denseRankPower) throws IOException { + this.dir = dir; + this.context = context; + this.denseRankPower = denseRankPower; + if ((denseRankPower < 7 || denseRankPower > 15) && denseRankPower != -1) { + throw new IllegalArgumentException( + "Acceptable values for denseRankPower are 7-15 (every 128-32768 docIDs). " + + "The provided power was " + + denseRankPower + + " (every " + + (int) Math.pow(2, denseRankPower) + + " docIDs)" + ); + } + this.disiTempOutput = dir.createTempOutput(data.getName(), "disi", context); + this.skipListTempFileName = disiTempOutput.getName(); + this.origo = disiTempOutput.getFilePointer(); // All jumps are relative to the origo + } + + void addDocId(int doc) throws IOException { + final int block = doc >>> 16; + if (prevBlock != -1 && block != prevBlock) { + // Track offset+index from previous block up to current + jumps = addJumps(jumps, disiTempOutput.getFilePointer() - origo, totalCardinality, jumpBlockIndex, prevBlock + 1); + jumpBlockIndex = prevBlock + 1; + // Flush block + flush(prevBlock, buffer, blockCardinality, denseRankPower, disiTempOutput); + // Reset for next block + buffer.clear(); + totalCardinality += blockCardinality; + blockCardinality = 0; + } + buffer.set(doc & 0xFFFF); + blockCardinality++; + prevBlock = block; + } + + short build(IndexOutput data) throws IOException { + if (blockCardinality > 0) { + jumps = addJumps(jumps, disiTempOutput.getFilePointer() - origo, totalCardinality, jumpBlockIndex, prevBlock + 1); + totalCardinality += blockCardinality; + flush(prevBlock, buffer, blockCardinality, denseRankPower, disiTempOutput); + buffer.clear(); + prevBlock++; + } + final int lastBlock = prevBlock == -1 ? 0 : prevBlock; // There will always be at least 1 block (NO_MORE_DOCS) + // Last entry is a SPARSE with blockIndex == 32767 and the single entry 65535, which becomes the + // docID NO_MORE_DOCS + // To avoid creating 65K jump-table entries, only a single entry is created pointing to the + // offset of the + // NO_MORE_DOCS block, with the jumpBlockIndex set to the logical EMPTY block after all real + // blocks. + jumps = addJumps(jumps, disiTempOutput.getFilePointer() - origo, totalCardinality, lastBlock, lastBlock + 1); + buffer.set(DocIdSetIterator.NO_MORE_DOCS & 0xFFFF); + flush(DocIdSetIterator.NO_MORE_DOCS >>> 16, buffer, 1, denseRankPower, disiTempOutput); + // offset+index jump-table stored at the end + short blockCount = flushBlockJumps(jumps, lastBlock + 1, disiTempOutput); + disiTempOutput.close(); + try (var addressDataInput = dir.openInput(skipListTempFileName, context)) { + data.copyBytes(addressDataInput, addressDataInput.length()); + } + return blockCount; + } + + // Adds entries to the offset & index jump-table for blocks + private static int[] addJumps(int[] jumps, long offset, int index, int startBlock, int endBlock) { + assert offset < Integer.MAX_VALUE : "Logically the offset should not exceed 2^30 but was >= Integer.MAX_VALUE"; + jumps = ArrayUtil.grow(jumps, (endBlock + 1) * 2); + for (int b = startBlock; b < endBlock; b++) { + jumps[b * 2] = index; + jumps[b * 2 + 1] = (int) offset; + } + return jumps; + } + + private static void flush(int block, FixedBitSet buffer, int cardinality, byte denseRankPower, IndexOutput out) throws IOException { + assert block >= 0 && block < BLOCK_SIZE; + out.writeShort((short) block); + assert cardinality > 0 && cardinality <= BLOCK_SIZE; + out.writeShort((short) (cardinality - 1)); + if (cardinality > MAX_ARRAY_LENGTH) { + if (cardinality != BLOCK_SIZE) { // all docs are set + if (denseRankPower != -1) { + final byte[] rank = createRank(buffer, denseRankPower); + out.writeBytes(rank, rank.length); + } + for (long word : buffer.getBits()) { + out.writeLong(word); + } + } + } else { + BitSetIterator it = new BitSetIterator(buffer, cardinality); + for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { + out.writeShort((short) doc); + } + } + } + + // Flushes the offset & index jump-table for blocks. This should be the last data written to out + // This method returns the blockCount for the blocks reachable for the jump_table or -1 for no + // jump-table + private static short flushBlockJumps(int[] jumps, int blockCount, IndexOutput out) throws IOException { + if (blockCount == 2) { // Jumps with a single real entry + NO_MORE_DOCS is just wasted space so we ignore + // that + blockCount = 0; + } + for (int i = 0; i < blockCount; i++) { + out.writeInt(jumps[i * 2]); // index + out.writeInt(jumps[i * 2 + 1]); // offset + } + // As there are at most 32k blocks, the count is a short + // The jumpTableOffset will be at lastPos - (blockCount * Long.BYTES) + return (short) blockCount; + } + + // Creates a DENSE rank-entry (the number of set bits up to a given point) for the buffer. + // One rank-entry for every {@code 2^denseRankPower} bits, with each rank-entry using 2 bytes. + // Represented as a byte[] for fast flushing and mirroring of the retrieval representation. + private static byte[] createRank(FixedBitSet buffer, byte denseRankPower) { + final int longsPerRank = 1 << (denseRankPower - 6); + final int rankMark = longsPerRank - 1; + final int rankIndexShift = denseRankPower - 7; // 6 for the long (2^6) + 1 for 2 bytes/entry + final byte[] rank = new byte[DENSE_BLOCK_LONGS >> rankIndexShift]; + final long[] bits = buffer.getBits(); + int bitCount = 0; + for (int word = 0; word < DENSE_BLOCK_LONGS; word++) { + if ((word & rankMark) == 0) { // Every longsPerRank longs + rank[word >> rankIndexShift] = (byte) (bitCount >> 8); + rank[(word >> rankIndexShift) + 1] = (byte) (bitCount & 0xFF); + } + bitCount += Long.bitCount(bits[word]); + } + return rank; + } + + @Override + @SuppressForbidden(reason = "require usage of Lucene's IOUtils#deleteFilesIgnoringExceptions(...)") + public void close() throws IOException { + IOUtils.close(disiTempOutput); + if (skipListTempFileName != null) { + IOUtils.deleteFilesIgnoringExceptions(dir, skipListTempFileName); + } + } + +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java index b860c0f5983c7..8b991f76db8a3 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesConsumer.java @@ -29,6 +29,8 @@ import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ByteBuffersIndexOutput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -54,6 +56,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { + final Directory dir; + final IOContext context; IndexOutput data, meta; final int maxDoc; private byte[] termsDictBuffer; @@ -70,6 +74,8 @@ final class ES819TSDBDocValuesConsumer extends XDocValuesConsumer { String metaExtension ) throws IOException { this.termsDictBuffer = new byte[1 << 14]; + this.dir = state.directory; + this.context = state.context; boolean success = false; try { final String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); @@ -138,84 +144,101 @@ private long[] writeField(FieldInfo field, TsdbDocValuesProducer valuesProducer, meta.writeLong(numValues); meta.writeInt(numDocsWithValue); - if (numValues > 0) { - // Special case for maxOrd of 1, signal -1 that no blocks will be written - meta.writeInt(maxOrd != 1 ? ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT : -1); - final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput(); - final DirectMonotonicWriter indexWriter = DirectMonotonicWriter.getInstance( - meta, - new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"), - 1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), - ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT - ); - - final long valuesDataOffset = data.getFilePointer(); - // Special case for maxOrd of 1, skip writing the blocks - if (maxOrd != 1) { - final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; - int bufferSize = 0; - final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); - values = valuesProducer.getSortedNumeric(field); - final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; - for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { - final int count = values.docValueCount(); - for (int i = 0; i < count; ++i) { - buffer[bufferSize++] = values.nextValue(); - if (bufferSize == ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) { - indexWriter.add(data.getFilePointer() - valuesDataOffset); - if (maxOrd >= 0) { - encoder.encodeOrdinals(buffer, data, bitsPerOrd); - } else { - encoder.encode(buffer, data); + DISIAccumulator disiAccumulator = null; + try { + if (numValues > 0) { + assert numDocsWithValue > 0; + // Special case for maxOrd of 1, signal -1 that no blocks will be written + meta.writeInt(maxOrd != 1 ? ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT : -1); + final ByteBuffersDataOutput indexOut = new ByteBuffersDataOutput(); + final DirectMonotonicWriter indexWriter = DirectMonotonicWriter.getInstance( + meta, + new ByteBuffersIndexOutput(indexOut, "temp-dv-index", "temp-dv-index"), + 1L + ((numValues - 1) >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT), + ES819TSDBDocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT + ); + + final long valuesDataOffset = data.getFilePointer(); + // Special case for maxOrd of 1, skip writing the blocks + if (maxOrd != 1) { + final long[] buffer = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; + int bufferSize = 0; + final TSDBDocValuesEncoder encoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); + values = valuesProducer.getSortedNumeric(field); + final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; + if (enableOptimizedMerge && numDocsWithValue < maxDoc) { + disiAccumulator = new DISIAccumulator(dir, context, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { + if (disiAccumulator != null) { + disiAccumulator.addDocId(doc); + } + final int count = values.docValueCount(); + for (int i = 0; i < count; ++i) { + buffer[bufferSize++] = values.nextValue(); + if (bufferSize == ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE) { + indexWriter.add(data.getFilePointer() - valuesDataOffset); + if (maxOrd >= 0) { + encoder.encodeOrdinals(buffer, data, bitsPerOrd); + } else { + encoder.encode(buffer, data); + } + bufferSize = 0; } - bufferSize = 0; } } - } - if (bufferSize > 0) { - indexWriter.add(data.getFilePointer() - valuesDataOffset); - // Fill unused slots in the block with zeroes rather than junk - Arrays.fill(buffer, bufferSize, ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE, 0L); - if (maxOrd >= 0) { - encoder.encodeOrdinals(buffer, data, bitsPerOrd); - } else { - encoder.encode(buffer, data); + if (bufferSize > 0) { + indexWriter.add(data.getFilePointer() - valuesDataOffset); + // Fill unused slots in the block with zeroes rather than junk + Arrays.fill(buffer, bufferSize, ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE, 0L); + if (maxOrd >= 0) { + encoder.encodeOrdinals(buffer, data, bitsPerOrd); + } else { + encoder.encode(buffer, data); + } } } - } - final long valuesDataLength = data.getFilePointer() - valuesDataOffset; - if (maxOrd != 1) { - // Special case for maxOrd of 1, indexWriter isn't really used, so no need to invoke finish() method. - indexWriter.finish(); - } - final long indexDataOffset = data.getFilePointer(); - data.copyBytes(indexOut.toDataInput(), indexOut.size()); - meta.writeLong(indexDataOffset); - meta.writeLong(data.getFilePointer() - indexDataOffset); + final long valuesDataLength = data.getFilePointer() - valuesDataOffset; + if (maxOrd != 1) { + // Special case for maxOrd of 1, indexWriter isn't really used, so no need to invoke finish() method. + indexWriter.finish(); + } + final long indexDataOffset = data.getFilePointer(); + data.copyBytes(indexOut.toDataInput(), indexOut.size()); + meta.writeLong(indexDataOffset); + meta.writeLong(data.getFilePointer() - indexDataOffset); - meta.writeLong(valuesDataOffset); - meta.writeLong(valuesDataLength); - } + meta.writeLong(valuesDataOffset); + meta.writeLong(valuesDataLength); + } - if (numDocsWithValue == 0) { // meta[-2, 0]: No documents with values - meta.writeLong(-2); // docsWithFieldOffset - meta.writeLong(0L); // docsWithFieldLength - meta.writeShort((short) -1); // jumpTableEntryCount - meta.writeByte((byte) -1); // denseRankPower - } else if (numDocsWithValue == maxDoc) { // meta[-1, 0]: All documents have values - meta.writeLong(-1); // docsWithFieldOffset - meta.writeLong(0L); // docsWithFieldLength - meta.writeShort((short) -1); // jumpTableEntryCount - meta.writeByte((byte) -1); // denseRankPower - } else { // meta[data.offset, data.length]: IndexedDISI structure for documents with values - long offset = data.getFilePointer(); - meta.writeLong(offset); // docsWithFieldOffset - values = valuesProducer.getSortedNumeric(field); - final short jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); - meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength - meta.writeShort(jumpTableEntryCount); - meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); + if (numDocsWithValue == 0) { // meta[-2, 0]: No documents with values + meta.writeLong(-2); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else if (numDocsWithValue == maxDoc) { // meta[-1, 0]: All documents have values + meta.writeLong(-1); // docsWithFieldOffset + meta.writeLong(0L); // docsWithFieldLength + meta.writeShort((short) -1); // jumpTableEntryCount + meta.writeByte((byte) -1); // denseRankPower + } else { // meta[data.offset, data.length]: IndexedDISI structure for documents with values + long offset = data.getFilePointer(); + meta.writeLong(offset); // docsWithFieldOffset + final short jumpTableEntryCount; + if (maxOrd != 1 && disiAccumulator != null) { + jumpTableEntryCount = disiAccumulator.build(data); + } else { + values = valuesProducer.getSortedNumeric(field); + jumpTableEntryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength + meta.writeShort(jumpTableEntryCount); + meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); + } + } finally { + IOUtils.close(disiAccumulator); } return new long[] { numDocsWithValue, numValues }; diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/DISIAccumulatorTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/DISIAccumulatorTests.java new file mode 100644 index 0000000000000..731b69c838de5 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/DISIAccumulatorTests.java @@ -0,0 +1,518 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package org.elasticsearch.index.codec.tsdb.es819; + +import org.apache.lucene.codecs.lucene90.IndexedDISI; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.apache.lucene.util.BitSet; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.SparseFixedBitSet; +import org.elasticsearch.core.SuppressForbidden; + +import java.io.IOException; +import java.util.Random; + +// Copied from org.apache.lucene.codecs.lucene90.TestIndexedDISI and kept tests that we can run. +// The test suite has been modified to write jump table using writeJumpTable(...) in this class. +// (some original tests require access to package protected constructor of IndexedDISI and was removed) +public class DISIAccumulatorTests extends LuceneTestCase { + + public void testEmpty() throws IOException { + int maxDoc = TestUtil.nextInt(random(), 1, 100000); + BitSet set = new SparseFixedBitSet(maxDoc); + try (Directory dir = newDirectory()) { + doTest(set, dir); + } + } + + // EMPTY blocks are special with regard to jumps as they have size 0 + public void testEmptyBlocks() throws IOException { + final int B = 65536; + int maxDoc = B * 11; + BitSet set = new SparseFixedBitSet(maxDoc); + // block 0: EMPTY + set.set(B + 5); // block 1: SPARSE + // block 2: EMPTY + // block 3: EMPTY + set.set(B * 4 + 5); // block 4: SPARSE + + for (int i = 0; i < B; i++) { + set.set(B * 6 + i); // block 6: ALL + } + for (int i = 0; i < B; i += 3) { + set.set(B * 7 + i); // block 7: DENSE + } + for (int i = 0; i < B; i++) { + if (i != 32768) { + set.set(B * 8 + i); // block 8: DENSE (all-1) + } + } + // block 9-11: EMPTY + + try (Directory dir = newDirectory()) { + doTestAllSingleJump(set, dir); + } + + // Change the first block to DENSE to see if jump-tables sets to position 0 + set.set(0); + try (Directory dir = newDirectory()) { + doTestAllSingleJump(set, dir); + } + } + + // EMPTY blocks are special with regard to jumps as they have size 0 + public void testLastEmptyBlocks() throws IOException { + final int B = 65536; + int maxDoc = B * 3; + BitSet set = new SparseFixedBitSet(maxDoc); + for (int docID = 0; docID < B * 2; docID++) { // first 2 blocks are ALL + set.set(docID); + } + // Last block is EMPTY + + try (Directory dir = newDirectory()) { + doTestAllSingleJump(set, dir); + assertAdvanceBeyondEnd(set, dir); + } + } + + // Checks that advance after the end of the blocks has been reached has the correct behaviour + private void assertAdvanceBeyondEnd(BitSet set, Directory dir) throws IOException { + final int cardinality = set.cardinality(); + final byte denseRankPower = 9; // Not tested here so fixed to isolate factors + int jumpTableentryCount; + try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) { + jumpTableentryCount = writeJumpTable(set, dir, out, denseRankPower); + } + + try (IndexInput in = dir.openInput("bar", IOContext.DEFAULT)) { + BitSetIterator disi2 = new BitSetIterator(set, cardinality); + int doc = disi2.docID(); + int index = 0; + while (doc < cardinality) { + doc = disi2.nextDoc(); + index++; + } + + IndexedDISI disi = new IndexedDISI(in, 0L, in.length(), jumpTableentryCount, denseRankPower, cardinality); + // Advance 1 docID beyond end + assertFalse("There should be no set bit beyond the valid docID range", disi.advanceExact(set.length())); + disi.advance(doc); // Should be the special docID signifyin NO_MORE_DOCS from the BitSetIterator + // disi.index()+1 as the while-loop also counts the NO_MORE_DOCS + assertEquals("The index when advancing beyond the last defined docID should be correct", index, disi.index() + 1); + } + } + + // TODO: can this be toned down? + public void testRandomBlocks() throws IOException { + final int BLOCKS = 5; + BitSet set = createSetWithRandomBlocks(BLOCKS); + try (Directory dir = newDirectory()) { + doTestAllSingleJump(set, dir); + } + } + + private BitSet createSetWithRandomBlocks(int blockCount) { + final int B = 65536; + BitSet set = new SparseFixedBitSet(blockCount * B); + for (int block = 0; block < blockCount; block++) { + switch (random().nextInt(4)) { + case 0: { // EMPTY + break; + } + case 1: { // ALL + for (int docID = block * B; docID < (block + 1) * B; docID++) { + set.set(docID); + } + break; + } + case 2: { // SPARSE ( < 4096 ) + for (int docID = block * B; docID < (block + 1) * B; docID += 101) { + set.set(docID); + } + break; + } + case 3: { // DENSE ( >= 4096 ) + for (int docID = block * B; docID < (block + 1) * B; docID += 3) { + set.set(docID); + } + break; + } + default: + throw new IllegalStateException("Modulo logic error: there should only be 4 possibilities"); + } + } + return set; + } + + private void doTestAllSingleJump(BitSet set, Directory dir) throws IOException { + final int cardinality = set.cardinality(); + final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7) + 7); // sane + chance of disable + long length; + int jumpTableentryCount; + try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { + jumpTableentryCount = writeJumpTable(set, dir, out, denseRankPower); + length = out.getFilePointer(); + } + + try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { + for (int i = 0; i < set.length(); i++) { + IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality); + assertEquals("The bit at " + i + " should be correct with advanceExact", set.get(i), disi.advanceExact(i)); + + IndexedDISI disi2 = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality); + disi2.advance(i); + // Proper sanity check with jump tables as an error could make them seek backwards + assertTrue("The docID should at least be " + i + " after advance(" + i + ") but was " + disi2.docID(), i <= disi2.docID()); + if (set.get(i)) { + assertEquals("The docID should be present with advance", i, disi2.docID()); + } else { + assertNotSame("The docID should not be present with advance", i, disi2.docID()); + } + } + } + } + + public void testOneDoc() throws IOException { + int maxDoc = TestUtil.nextInt(random(), 1, 100000); + BitSet set = new SparseFixedBitSet(maxDoc); + set.set(random().nextInt(maxDoc)); + try (Directory dir = newDirectory()) { + doTest(set, dir); + } + } + + public void testTwoDocs() throws IOException { + int maxDoc = TestUtil.nextInt(random(), 1, 100000); + BitSet set = new SparseFixedBitSet(maxDoc); + set.set(random().nextInt(maxDoc)); + set.set(random().nextInt(maxDoc)); + try (Directory dir = newDirectory()) { + doTest(set, dir); + } + } + + public void testAllDocs() throws IOException { + int maxDoc = TestUtil.nextInt(random(), 1, 100000); + FixedBitSet set = new FixedBitSet(maxDoc); + set.set(1, maxDoc); + try (Directory dir = newDirectory()) { + doTest(set, dir); + } + } + + public void testHalfFull() throws IOException { + int maxDoc = TestUtil.nextInt(random(), 1, 100000); + BitSet set = new SparseFixedBitSet(maxDoc); + for (int i = random().nextInt(2); i < maxDoc; i += TestUtil.nextInt(random(), 1, 3)) { + set.set(i); + } + try (Directory dir = newDirectory()) { + doTest(set, dir); + } + } + + public void testDocRange() throws IOException { + try (Directory dir = newDirectory()) { + for (int iter = 0; iter < 10; ++iter) { + int maxDoc = TestUtil.nextInt(random(), 1, 1000000); + FixedBitSet set = new FixedBitSet(maxDoc); + final int start = random().nextInt(maxDoc); + final int end = TestUtil.nextInt(random(), start + 1, maxDoc); + set.set(start, end); + doTest(set, dir); + } + } + } + + public void testSparseDenseBoundary() throws IOException, NoSuchFieldException, IllegalAccessException { + try (Directory dir = newDirectory()) { + FixedBitSet set = new FixedBitSet(200000); + int start = 65536 + random().nextInt(100); + final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7) + 7); // sane + chance of disable + + // we set MAX_ARRAY_LENGTH bits so the encoding will be sparse + set.set(start, start + DISIAccumulator.MAX_ARRAY_LENGTH); + long length; + int jumpTableEntryCount; + try (IndexOutput out = dir.createOutput("sparse", IOContext.DEFAULT)) { + jumpTableEntryCount = writeJumpTable(set, DISIAccumulator.MAX_ARRAY_LENGTH, dir, out, denseRankPower); + length = out.getFilePointer(); + } + try (IndexInput in = dir.openInput("sparse", IOContext.DEFAULT)) { + IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableEntryCount, denseRankPower, DISIAccumulator.MAX_ARRAY_LENGTH); + assertEquals(start, disi.nextDoc()); + if (System.getSecurityManager() == null) { + assertEquals("SPARSE", getMethodFromDISI(disi)); + } + } + doTest(set, dir); + + // now we set one more bit so the encoding will be dense + set.set(start + DISIAccumulator.MAX_ARRAY_LENGTH + random().nextInt(100)); + try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) { + writeJumpTable(set, DISIAccumulator.MAX_ARRAY_LENGTH, dir, out, denseRankPower); + length = out.getFilePointer(); + } + try (IndexInput in = dir.openInput("bar", IOContext.DEFAULT)) { + IndexedDISI disi = new IndexedDISI( + in, + 0L, + length, + jumpTableEntryCount, + denseRankPower, + DISIAccumulator.MAX_ARRAY_LENGTH + 1 + ); + assertEquals(start, disi.nextDoc()); + if (System.getSecurityManager() == null) { + assertEquals("DENSE", getMethodFromDISI(disi)); + } + } + doTest(set, dir); + } + } + + @SuppressForbidden(reason = "access violation required in order to read private field for this test") + private static String getMethodFromDISI(Object o) throws NoSuchFieldException, IllegalAccessException { + var field = IndexedDISI.class.getDeclaredField("method"); + field.setAccessible(true); + return field.get(o).toString(); + } + + public void testOneDocMissing() throws IOException { + int maxDoc = TestUtil.nextInt(random(), 1, 1000000); + FixedBitSet set = new FixedBitSet(maxDoc); + set.set(0, maxDoc); + set.clear(random().nextInt(maxDoc)); + try (Directory dir = newDirectory()) { + doTest(set, dir); + } + } + + public void testFewMissingDocs() throws IOException { + try (Directory dir = newDirectory()) { + int numIters = atLeast(10); + for (int iter = 0; iter < numIters; ++iter) { + int maxDoc = TestUtil.nextInt(random(), 1, 100000); + FixedBitSet set = new FixedBitSet(maxDoc); + set.set(0, maxDoc); + final int numMissingDocs = TestUtil.nextInt(random(), 2, 1000); + for (int i = 0; i < numMissingDocs; ++i) { + set.clear(random().nextInt(maxDoc)); + } + doTest(set, dir); + } + } + } + + public void testDenseMultiBlock() throws IOException { + try (Directory dir = newDirectory()) { + int maxDoc = 10 * 65536; // 10 blocks + FixedBitSet set = new FixedBitSet(maxDoc); + for (int i = 0; i < maxDoc; i += 2) { // Set every other to ensure dense + set.set(i); + } + doTest(set, dir); + } + } + + public void testIllegalDenseRankPower() throws IOException { + + // Legal values + for (byte denseRankPower : new byte[] { -1, 7, 8, 9, 10, 11, 12, 13, 14, 15 }) { + createAndOpenDISI(denseRankPower, denseRankPower); + } + + // Illegal values + for (byte denseRankPower : new byte[] { -2, 0, 1, 6, 16 }) { + expectThrows(IllegalArgumentException.class, () -> { + createAndOpenDISI(denseRankPower, (byte) 8); // Illegal write, legal read (should not reach read) + }); + + expectThrows(IllegalArgumentException.class, () -> { + createAndOpenDISI((byte) 8, denseRankPower); // Legal write, illegal read (should reach read) + }); + } + } + + private void createAndOpenDISI(byte denseRankPowerWrite, byte denseRankPowerRead) throws IOException { + BitSet set = new FixedBitSet(10); + set.set(set.length() - 1); + try (Directory dir = newDirectory()) { + long length; + int jumpTableEntryCount = -1; + try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { + jumpTableEntryCount = writeJumpTable(set, dir, out, denseRankPowerWrite); + length = out.getFilePointer(); + } + try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { + new IndexedDISI(in, 0L, length, jumpTableEntryCount, denseRankPowerRead, set.cardinality()); + } + // This tests the legality of the denseRankPower only, so we don't do anything with the disi + } + } + + public void testOneDocMissingFixed() throws IOException { + int maxDoc = 9699; + final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7) + 7); // sane + chance of disable + FixedBitSet set = new FixedBitSet(maxDoc); + set.set(0, maxDoc); + set.clear(1345); + try (Directory dir = newDirectory()) { + + final int cardinality = set.cardinality(); + long length; + int jumpTableentryCount; + try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { + jumpTableentryCount = writeJumpTable(set, dir, out, denseRankPower); + length = out.getFilePointer(); + } + + int step = 16000; + try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { + IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality); + BitSetIterator disi2 = new BitSetIterator(set, cardinality); + assertAdvanceEquality(disi, disi2, step); + } + } + } + + public void testRandom() throws IOException { + try (Directory dir = newDirectory()) { + int numIters = atLeast(3); + for (int i = 0; i < numIters; ++i) { + doTestRandom(dir); + } + } + } + + private void doTestRandom(Directory dir) throws IOException { + Random random = random(); + final int maxStep = TestUtil.nextInt(random, 1, 1 << TestUtil.nextInt(random, 2, 20)); + final int numDocs = TestUtil.nextInt(random, 1, Math.min(100000, (Integer.MAX_VALUE - 1) / maxStep)); + BitSet docs = new SparseFixedBitSet(numDocs * maxStep + 1); + int lastDoc = -1; + for (int doc = -1, i = 0; i < numDocs; ++i) { + doc += TestUtil.nextInt(random, 1, maxStep); + docs.set(doc); + lastDoc = doc; + } + final int maxDoc = lastDoc + TestUtil.nextInt(random, 1, 100); + + BitSet set = BitSet.of(new BitSetIterator(docs, docs.approximateCardinality()), maxDoc); + doTest(set, dir); + } + + private void doTest(BitSet set, Directory dir) throws IOException { + final int cardinality = set.cardinality(); + final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7) + 7); // sane + chance of disable + long length; + int jumpTableentryCount; + try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { + jumpTableentryCount = writeJumpTable(set, dir, out, denseRankPower); + length = out.getFilePointer(); + } + + try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { + IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality); + BitSetIterator disi2 = new BitSetIterator(set, cardinality); + assertSingleStepEquality(disi, disi2); + } + + for (int step : new int[] { 1, 10, 100, 1000, 10000, 100000 }) { + try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { + IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality); + BitSetIterator disi2 = new BitSetIterator(set, cardinality); + assertAdvanceEquality(disi, disi2, step); + } + } + + for (int step : new int[] { 10, 100, 1000, 10000, 100000 }) { + try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { + IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality); + BitSetIterator disi2 = new BitSetIterator(set, cardinality); + int disi2length = set.length(); + assertAdvanceExactRandomized(disi, disi2, disi2length, step); + } + } + + dir.deleteFile("foo"); + } + + private void assertAdvanceExactRandomized(IndexedDISI disi, BitSetIterator disi2, int disi2length, int step) throws IOException { + int index = -1; + Random random = random(); + for (int target = 0; target < disi2length;) { + target += TestUtil.nextInt(random, 0, step); + int doc = disi2.docID(); + while (doc < target) { + doc = disi2.nextDoc(); + index++; + } + + boolean exists = disi.advanceExact(target); + assertEquals(doc == target, exists); + if (exists) { + assertEquals(index, disi.index()); + } else if (random.nextBoolean()) { + assertEquals(doc, disi.nextDoc()); + // This is a bit strange when doc == NO_MORE_DOCS as the index overcounts in the disi2 + // while-loop + assertEquals(index, disi.index()); + target = doc; + } + } + } + + private void assertSingleStepEquality(IndexedDISI disi, BitSetIterator disi2) throws IOException { + int i = 0; + for (int doc = disi2.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi2.nextDoc()) { + assertEquals(doc, disi.nextDoc()); + assertEquals(i++, disi.index()); + } + assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc()); + } + + private void assertAdvanceEquality(IndexedDISI disi, BitSetIterator disi2, int step) throws IOException { + int index = -1; + while (true) { + int target = disi2.docID() + step; + int doc; + do { + doc = disi2.nextDoc(); + index++; + } while (doc < target); + assertEquals(doc, disi.advance(target)); + if (doc == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + assertEquals("Expected equality using step " + step + " at docID " + doc, index, disi.index()); + } + } + + private static short writeJumpTable(BitSet set, Directory dir, IndexOutput out, byte denseRankPower) throws IOException { + return writeJumpTable(set, set.cardinality(), dir, out, denseRankPower); + } + + private static short writeJumpTable(BitSet set, long cost, Directory dir, IndexOutput out, byte denseRankPower) throws IOException { + var disiAccumulator = new DISIAccumulator(dir, IOContext.DEFAULT, out, denseRankPower); + var iterator = new BitSetIterator(set, cost); + for (int docId = iterator.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = iterator.nextDoc()) { + disiAccumulator.addDocId(docId); + } + return disiAccumulator.build(out); + } +}