From dc698c494b65257689af5114c416eeb79bae7d68 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 7 Jan 2026 09:19:43 +0000 Subject: [PATCH] Small speedups in `RecyclerBytesStreamOutput` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. No need to compute `vIntLength(i)` in the very common case that there's more than 5 bytes of space left on the current page. 2. No need to subtract the current offset from the return value of `putVInt()` only to add it back again: we can just return the updated offset directly. 3. No need to use `putVInt()` when we've already handled the multibyte case. 4. `writeVLong()` also shows up in profiling sometimes so deserves similar optimizations to `writeVInt()`. Microbenchmark results before change: 1. As committed Benchmark Mode Cnt Score Error Units RecyclerBytesStreamOutputBenchmark.writeVInt avgt 3 1734.806 ± 45.790 ns/op RecyclerBytesStreamOutputBenchmark.writeVLong avgt 3 6196.593 ± 405.058 ns/op 2. With arrays of length 16kiB+1 to overflow page Benchmark Mode Cnt Score Error Units RecyclerBytesStreamOutputBenchmark.writeVInt avgt 3 71214.075 ± 6742.644 ns/op RecyclerBytesStreamOutputBenchmark.writeVLong avgt 3 186032.816 ± 2546.869 ns/op Microbenchmark results after change: 1. As committed Benchmark Mode Cnt Score Error Units RecyclerBytesStreamOutputBenchmark.writeVInt avgt 3 1456.083 ± 8.507 ns/op RecyclerBytesStreamOutputBenchmark.writeVLong avgt 3 1699.816 ± 77.845 ns/op 2. With arrays of length 16kiB+1 to overflow page Benchmark Mode Cnt Score Error Units RecyclerBytesStreamOutputBenchmark.writeVInt avgt 3 64393.687 ± 3891.214 ns/op RecyclerBytesStreamOutputBenchmark.writeVLong avgt 3 74050.944 ± 7618.977 ns/op --- .../RecyclerBytesStreamOutputBenchmark.java | 56 ++++++++++++++----- .../io/stream/RecyclerBytesStreamOutput.java | 23 ++++++-- .../common/io/stream/StreamOutputHelper.java | 45 ++++++++++++--- 3 files changed, 97 insertions(+), 27 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/bytes/RecyclerBytesStreamOutputBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/bytes/RecyclerBytesStreamOutputBenchmark.java index 99ff455a360c4..c9119f0dbd992 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/bytes/RecyclerBytesStreamOutputBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/bytes/RecyclerBytesStreamOutputBenchmark.java @@ -25,6 +25,7 @@ import org.openjdk.jmh.annotations.Warmup; import java.io.IOException; +import java.util.Random; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; @@ -48,6 +49,7 @@ public class RecyclerBytesStreamOutputBenchmark { private byte[] bytes3; private byte[] multiPageBytes; private int[] vints; + private long[] vlongs; @Setup public void initResults() throws IOException { @@ -70,25 +72,43 @@ public void initResults() throws IOException { longString = UTF8StringBytesBenchmark.generateAsciiString(100); nonAsciiString = UTF8StringBytesBenchmark.generateUTF8String(200); veryLongString = UTF8StringBytesBenchmark.generateAsciiString(800); - // vint values for benchmarking + vints = new int[1000]; for (int i = 0; i < vints.length; i++) { + vints[i] = randomVInt(random); + } + vlongs = new long[1000]; + for (int i = 0; i < vlongs.length; i++) { + vlongs[i] = randomVLong(random); + } + } + + private static int randomVInt(Random random) { + if (random.nextBoolean()) { + // 1-byte 50% of the time + return random.nextInt(1 << 7); + } + for (int maxBits = 14; maxBits < 32; maxBits += 7) { + // 2-byte 25% of the time, 3-byte 12.5% of the time etc. if (random.nextBoolean()) { - // 1-byte 50% of the time - vints[i] = random.nextInt(128); - } else if (random.nextBoolean()) { - // 2-byte 25% of the time - vints[i] = random.nextInt(128, 16384); - } else { - if (random.nextBoolean()) { - // 3-byte vints - vints[i] = random.nextInt(16384, 2097152); - } else { - // All vint variants - vints[i] = random.nextInt(); - } + return random.nextInt(1 << (maxBits - 7), 1 << maxBits); } } + return random.nextInt(); + } + + private static long randomVLong(Random random) { + if (random.nextBoolean()) { + // 1-byte 50% of the time + return random.nextLong(1L << 7); + } + for (int maxBits = 14; maxBits < 63; maxBits += 7) { + // 2-byte 25% of the time, 3-byte 12.5% of the time etc. + if (random.nextBoolean()) { + return random.nextLong(1L << (maxBits - 7), 1L << maxBits); + } + } + return random.nextLong() & 0x7FFF_FFFF_FFFF_FFFFL; } @Benchmark @@ -144,6 +164,14 @@ public void writeVInt() throws IOException { } } + @Benchmark + public void writeVLong() throws IOException { + streamOutput.seek(1); + for (long vlong : vlongs) { + streamOutput.writeVLong(vlong); + } + } + private record BenchmarkRecycler(AtomicReference bytesRef) implements Recycler { @Override diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/RecyclerBytesStreamOutput.java b/server/src/main/java/org/elasticsearch/common/io/stream/RecyclerBytesStreamOutput.java index c6e8a0bc798e3..eca2a35df970a 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/RecyclerBytesStreamOutput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/RecyclerBytesStreamOutput.java @@ -132,16 +132,15 @@ public void writeVInt(int i) throws IOException { return; } - int bytesNeeded = vIntLength(i); - if (bytesNeeded > remainingBytesInPage) { + if (MAX_VINT_BYTES > remainingBytesInPage && vIntLength(i) > remainingBytesInPage) { super.writeVInt(i); } else { - BytesRef currentPage = currentBytesRef; - putVInt(i, bytesNeeded, currentPage.bytes, currentPage.offset + currentPageOffset); - this.currentPageOffset = currentPageOffset + bytesNeeded; + this.currentPageOffset = StreamOutputHelper.putMultiByteVInt(currentBytesRef.bytes, i, currentPageOffset); } } + private static final int MAX_VINT_BYTES = 5; + public static int vIntLength(int value) { int leadingZeros = Integer.numberOfLeadingZeros(value); if (leadingZeros >= 25) { @@ -164,6 +163,20 @@ private void putVInt(int i, int bytesNeeded, byte[] page, int offset) { } } + private static final int MAX_VLONG_BYTES = 9; + + @Override + void writeVLongNoCheck(long i) throws IOException { + final int currentPageOffset = this.currentPageOffset; + final int remainingBytesInPage = pageSize - currentPageOffset; + + if (MAX_VLONG_BYTES < remainingBytesInPage) { + this.currentPageOffset = StreamOutputHelper.putVLong(currentBytesRef.bytes, i, currentPageOffset); + } else { + super.writeVLongNoCheck(i); + } + } + @Override public void writeInt(int i) throws IOException { final int currentPageOffset = this.currentPageOffset; diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutputHelper.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutputHelper.java index 6bcfcc90910e2..132183e9fc1bf 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutputHelper.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutputHelper.java @@ -62,7 +62,7 @@ public static void writeString(String str, OutputStream outputStream) throws IOE */ public static void writeString(String str, byte[] buffer, int prefixLength, OutputStream outputStream) throws IOException { final int charCount = str.length(); - int offset = prefixLength + putVInt(buffer, charCount, prefixLength); + int offset = putVInt(buffer, charCount, prefixLength); for (int i = 0; i < charCount; i++) { final int c = str.charAt(i); if (c <= 0x007F) { @@ -130,12 +130,12 @@ public static void writeGenericString(String value, OutputStream outputStream) t * Put the integer {@code i} into the given {@code buffer} starting at the given {@code offset}, formatted as per * {@link StreamOutput#writeVInt}. Performs no bounds checks: callers must verify that there is enough space in {@code buffer} first. * - * @return number of bytes written. + * @return updated offset. */ public static int putVInt(byte[] buffer, int i, int offset) { if (Integer.numberOfLeadingZeros(i) >= 25) { buffer[offset] = (byte) i; - return 1; + return offset + 1; } return putMultiByteVInt(buffer, i, offset); } @@ -144,17 +144,46 @@ public static int putVInt(byte[] buffer, int i, int offset) { * Put the integer {@code i} into the given {@code buffer} starting at the given {@code offset}, formatted as per * {@link StreamOutput#writeVInt}. Performs no bounds checks: callers must verify that there is enough space in {@code buffer} first. * - * @return number of bytes written. + * @return updated offset. */ // extracted from putVInt() to allow the hot single-byte path to be inlined public static int putMultiByteVInt(byte[] buffer, int i, int offset) { - int index = offset; do { - buffer[index++] = ((byte) ((i & 0x7f) | 0x80)); + buffer[offset++] = ((byte) ((i & 0x7f) | 0x80)); i >>>= 7; } while ((i & ~0x7F) != 0); - buffer[index++] = (byte) i; - return index - offset; + buffer[offset++] = (byte) i; + return offset; } + /** + * Put the long {@code l} into the given {@code buffer} starting at the given {@code offset}, formatted as per + * {@link StreamOutput#writeVLong}. Performs no bounds checks: callers must verify that there is enough space in {@code buffer} first + * and that {@code l} is non-negative. + * + * @return updated offset. + */ + public static int putVLong(byte[] buffer, long l, int offset) { + if (Long.numberOfLeadingZeros(l) >= 57) { + buffer[offset] = (byte) l; + return offset + 1; + } + return putMultiByteVLong(buffer, l, offset); + } + + /** + * Put the long {@code l} into the given {@code buffer} starting at the given {@code offset}, formatted as per + * {@link StreamOutput#writeVLong}. Performs no bounds checks: callers must verify that there is enough space in {@code buffer} first + * and that {@code l} is non-negative. + * + * @return updated offset. + */ + public static int putMultiByteVLong(byte[] buffer, long l, int offset) { + do { + buffer[offset++] = ((byte) ((l & 0x7f) | 0x80)); + l >>>= 7; + } while ((l & 0xFFFF_FFFF_FFFF_FF80L) != 0); + buffer[offset++] = (byte) l; + return offset; + } }